├── .gitignore
├── rustfmt.toml
├── COPYING
├── .github
    └── workflows
    │   └── ci.yml
├── README.md
├── Cargo.toml
├── LICENSE-MIT
├── UNLICENSE
├── benches
    └── bench.rs
└── src
    ├── pathutil.rs
    ├── lib.rs
    └── glob.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | tags
3 | target
4 | /Cargo.lock
5 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 79
2 | use_small_heuristics = "max"
3 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | This project is dual-licensed under the Unlicense and MIT licenses.
2 | 
3 | You may use this code under the terms of either license.
4 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 | jobs:
 8 |   test:
 9 |     name: test
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         build: [pinned]
14 |         include:
15 |         - build: pinned
16 |           os: ubuntu-18.04
17 |           rust: 1.28.0
18 |     steps:
19 |     - name: noop
20 |       run: echo noop
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **This repository is not maintained. [globset is maintained in ripgrep](https://github.com/BurntSushi/ripgrep/tree/master/crates/globset).**
 2 | 
 3 | globset
 4 | =======
 5 | Cross platform single glob and glob set matching. Glob set matching is the
 6 | process of matching one or more glob patterns against a single candidate path
 7 | simultaneously, and returning all of the globs that matched.
 8 | 
 9 | [![Build status](https://github.com/BurntSushi/globset/workflows/ci/badge.svg)](https://github.com/BurntSushi/globset/actions)
10 | [![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset)
11 | 
12 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
13 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "globset"
 3 | version = "0.4.4"  #:version
 4 | authors = ["Andrew Gallant <jamslam@gmail.com>"]
 5 | description = """
 6 | Cross platform single glob and glob set matching. Glob set matching is the
 7 | process of matching one or more glob patterns against a single candidate path
 8 | simultaneously, and returning all of the globs that matched.
 9 | """
10 | documentation = "https://docs.rs/globset"
11 | homepage = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
12 | repository = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
13 | readme = "README.md"
14 | keywords = ["regex", "glob", "multiple", "set", "pattern"]
15 | license = "Unlicense/MIT"
16 | 
17 | [lib]
18 | name = "globset"
19 | bench = false
20 | 
21 | [dependencies]
22 | aho-corasick = "0.7.3"
23 | bstr = { version = "0.2.0", default-features = false, features = ["std"] }
24 | fnv = "1.0.6"
25 | log = "0.4.5"
26 | regex = "1.1.5"
27 | 
28 | [dev-dependencies]
29 | glob = "0.3.0"
30 | 
31 | [features]
32 | simd-accel = []
33 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Andrew Gallant
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/benches/bench.rs:
--------------------------------------------------------------------------------
  1 | /*!
  2 | This module benchmarks the glob implementation. For benchmarks on the ripgrep
  3 | tool itself, see the benchsuite directory.
  4 | */
  5 | #![feature(test)]
  6 | 
  7 | extern crate glob;
  8 | extern crate globset;
  9 | #[macro_use]
 10 | extern crate lazy_static;
 11 | extern crate regex;
 12 | extern crate test;
 13 | 
 14 | use std::ffi::OsStr;
 15 | use std::path::Path;
 16 | 
 17 | use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
 18 | 
 19 | const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
 20 | const EXT_PAT: &'static str = "*.txt";
 21 | 
 22 | const SHORT: &'static str = "some/needle.txt";
 23 | const SHORT_PAT: &'static str = "some/**/needle.txt";
 24 | 
 25 | const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
 26 | const LONG_PAT: &'static str = "some/**/needle.txt";
 27 | 
 28 | fn new_glob(pat: &str) -> glob::Pattern {
 29 |     glob::Pattern::new(pat).unwrap()
 30 | }
 31 | 
 32 | fn new_reglob(pat: &str) -> GlobMatcher {
 33 |     Glob::new(pat).unwrap().compile_matcher()
 34 | }
 35 | 
 36 | fn new_reglob_many(pats: &[&str]) -> GlobSet {
 37 |     let mut builder = GlobSetBuilder::new();
 38 |     for pat in pats {
 39 |         builder.add(Glob::new(pat).unwrap());
 40 |     }
 41 |     builder.build().unwrap()
 42 | }
 43 | 
 44 | #[bench]
 45 | fn ext_glob(b: &mut test::Bencher) {
 46 |     let pat = new_glob(EXT_PAT);
 47 |     b.iter(|| assert!(pat.matches(EXT)));
 48 | }
 49 | 
 50 | #[bench]
 51 | fn ext_regex(b: &mut test::Bencher) {
 52 |     let set = new_reglob(EXT_PAT);
 53 |     let cand = Candidate::new(EXT);
 54 |     b.iter(|| assert!(set.is_match_candidate(&cand)));
 55 | }
 56 | 
 57 | #[bench]
 58 | fn short_glob(b: &mut test::Bencher) {
 59 |     let pat = new_glob(SHORT_PAT);
 60 |     b.iter(|| assert!(pat.matches(SHORT)));
 61 | }
 62 | 
 63 | #[bench]
 64 | fn short_regex(b: &mut test::Bencher) {
 65 |     let set = new_reglob(SHORT_PAT);
 66 |     let cand = Candidate::new(SHORT);
 67 |     b.iter(|| assert!(set.is_match_candidate(&cand)));
 68 | }
 69 | 
 70 | #[bench]
 71 | fn long_glob(b: &mut test::Bencher) {
 72 |     let pat = new_glob(LONG_PAT);
 73 |     b.iter(|| assert!(pat.matches(LONG)));
 74 | }
 75 | 
 76 | #[bench]
 77 | fn long_regex(b: &mut test::Bencher) {
 78 |     let set = new_reglob(LONG_PAT);
 79 |     let cand = Candidate::new(LONG);
 80 |     b.iter(|| assert!(set.is_match_candidate(&cand)));
 81 | }
 82 | 
 83 | const MANY_SHORT_GLOBS: &'static [&'static str] = &[
 84 |     // Taken from a random .gitignore on my system.
 85 |     ".*.swp",
 86 |     "tags",
 87 |     "target",
 88 |     "*.lock",
 89 |     "tmp",
 90 |     "*.csv",
 91 |     "*.fst",
 92 |     "*-got",
 93 |     "*.csv.idx",
 94 |     "words",
 95 |     "98m*",
 96 |     "dict",
 97 |     "test",
 98 |     "months",
 99 | ];
100 | 
101 | const MANY_SHORT_SEARCH: &'static str = "98m-blah.csv.idx";
102 | 
103 | #[bench]
104 | fn many_short_glob(b: &mut test::Bencher) {
105 |     let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| new_glob(s)).collect();
106 |     b.iter(|| {
107 |         let mut count = 0;
108 |         for pat in &pats {
109 |             if pat.matches(MANY_SHORT_SEARCH) {
110 |                 count += 1;
111 |             }
112 |         }
113 |         assert_eq!(2, count);
114 |     })
115 | }
116 | 
117 | #[bench]
118 | fn many_short_regex_set(b: &mut test::Bencher) {
119 |     let set = new_reglob_many(MANY_SHORT_GLOBS);
120 |     b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
121 | }
122 | 


--------------------------------------------------------------------------------
/src/pathutil.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Cow;
  2 | 
  3 | use bstr::{ByteSlice, ByteVec};
  4 | 
  5 | /// The final component of the path, if it is a normal file.
  6 | ///
  7 | /// If the path terminates in ., .., or consists solely of a root of prefix,
  8 | /// file_name will return None.
  9 | pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
 10 |     if path.is_empty() {
 11 |         return None;
 12 |     } else if path.last_byte() == Some(b'.') {
 13 |         return None;
 14 |     }
 15 |     let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
 16 |     Some(match *path {
 17 |         Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
 18 |         Cow::Owned(ref path) => {
 19 |             let mut path = path.clone();
 20 |             path.drain_bytes(..last_slash);
 21 |             Cow::Owned(path)
 22 |         }
 23 |     })
 24 | }
 25 | 
 26 | /// Return a file extension given a path's file name.
 27 | ///
 28 | /// Note that this does NOT match the semantics of std::path::Path::extension.
 29 | /// Namely, the extension includes the `.` and matching is otherwise more
 30 | /// liberal. Specifically, the extenion is:
 31 | ///
 32 | /// * None, if the file name given is empty;
 33 | /// * None, if there is no embedded `.`;
 34 | /// * Otherwise, the portion of the file name starting with the final `.`.
 35 | ///
 36 | /// e.g., A file name of `.rs` has an extension `.rs`.
 37 | ///
 38 | /// N.B. This is done to make certain glob match optimizations easier. Namely,
 39 | /// a pattern like `*.rs` is obviously trying to match files with a `rs`
 40 | /// extension, but it also matches files like `.rs`, which doesn't have an
 41 | /// extension according to std::path::Path::extension.
 42 | pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
 43 |     if name.is_empty() {
 44 |         return None;
 45 |     }
 46 |     let last_dot_at = match name.rfind_byte(b'.') {
 47 |         None => return None,
 48 |         Some(i) => i,
 49 |     };
 50 |     Some(match *name {
 51 |         Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]),
 52 |         Cow::Owned(ref name) => {
 53 |             let mut name = name.clone();
 54 |             name.drain_bytes(..last_dot_at);
 55 |             Cow::Owned(name)
 56 |         }
 57 |     })
 58 | }
 59 | 
 60 | /// Normalizes a path to use `/` as a separator everywhere, even on platforms
 61 | /// that recognize other characters as separators.
 62 | #[cfg(unix)]
 63 | pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
 64 |     // UNIX only uses /, so we're good.
 65 |     path
 66 | }
 67 | 
 68 | /// Normalizes a path to use `/` as a separator everywhere, even on platforms
 69 | /// that recognize other characters as separators.
 70 | #[cfg(not(unix))]
 71 | pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
 72 |     use std::path::is_separator;
 73 | 
 74 |     for i in 0..path.len() {
 75 |         if path[i] == b'/' || !is_separator(path[i] as char) {
 76 |             continue;
 77 |         }
 78 |         path.to_mut()[i] = b'/';
 79 |     }
 80 |     path
 81 | }
 82 | 
 83 | #[cfg(test)]
 84 | mod tests {
 85 |     use std::borrow::Cow;
 86 | 
 87 |     use bstr::{ByteVec, B};
 88 | 
 89 |     use super::{file_name_ext, normalize_path};
 90 | 
 91 |     macro_rules! ext {
 92 |         ($name:ident, $file_name:expr, $ext:expr) => {
 93 |             #[test]
 94 |             fn $name() {
 95 |                 let bs = Vec::from($file_name);
 96 |                 let got = file_name_ext(&Cow::Owned(bs));
 97 |                 assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
 98 |             }
 99 |         };
100 |     }
101 | 
102 |     ext!(ext1, "foo.rs", Some(".rs"));
103 |     ext!(ext2, ".rs", Some(".rs"));
104 |     ext!(ext3, "..rs", Some(".rs"));
105 |     ext!(ext4, "", None::<&str>);
106 |     ext!(ext5, "foo", None::<&str>);
107 | 
108 |     macro_rules! normalize {
109 |         ($name:ident, $path:expr, $expected:expr) => {
110 |             #[test]
111 |             fn $name() {
112 |                 let bs = Vec::from_slice($path);
113 |                 let got = normalize_path(Cow::Owned(bs));
114 |                 assert_eq!($expected.to_vec(), got.into_owned());
115 |             }
116 |         };
117 |     }
118 | 
119 |     normalize!(normal1, b"foo", b"foo");
120 |     normalize!(normal2, b"foo/bar", b"foo/bar");
121 |     #[cfg(unix)]
122 |     normalize!(normal3, b"foo\\bar", b"foo\\bar");
123 |     #[cfg(not(unix))]
124 |     normalize!(normal3, b"foo\\bar", b"foo/bar");
125 |     #[cfg(unix)]
126 |     normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz");
127 |     #[cfg(not(unix))]
128 |     normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz");
129 | }
130 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | /*!
  2 | The globset crate provides cross platform single glob and glob set matching.
  3 | 
  4 | Glob set matching is the process of matching one or more glob patterns against
  5 | a single candidate path simultaneously, and returning all of the globs that
  6 | matched. For example, given this set of globs:
  7 | 
  8 | ```ignore
  9 | *.rs
 10 | src/lib.rs
 11 | src/**/foo.rs
 12 | ```
 13 | 
 14 | and a path `src/bar/baz/foo.rs`, then the set would report the first and third
 15 | globs as matching.
 16 | 
 17 | # Example: one glob
 18 | 
 19 | This example shows how to match a single glob against a single file path.
 20 | 
 21 | ```
 22 | # fn example() -> Result<(), globset::Error> {
 23 | use globset::Glob;
 24 | 
 25 | let glob = Glob::new("*.rs")?.compile_matcher();
 26 | 
 27 | assert!(glob.is_match("foo.rs"));
 28 | assert!(glob.is_match("foo/bar.rs"));
 29 | assert!(!glob.is_match("Cargo.toml"));
 30 | # Ok(()) } example().unwrap();
 31 | ```
 32 | 
 33 | # Example: configuring a glob matcher
 34 | 
 35 | This example shows how to use a `GlobBuilder` to configure aspects of match
 36 | semantics. In this example, we prevent wildcards from matching path separators.
 37 | 
 38 | ```
 39 | # fn example() -> Result<(), globset::Error> {
 40 | use globset::GlobBuilder;
 41 | 
 42 | let glob = GlobBuilder::new("*.rs")
 43 |     .literal_separator(true).build()?.compile_matcher();
 44 | 
 45 | assert!(glob.is_match("foo.rs"));
 46 | assert!(!glob.is_match("foo/bar.rs")); // no longer matches
 47 | assert!(!glob.is_match("Cargo.toml"));
 48 | # Ok(()) } example().unwrap();
 49 | ```
 50 | 
 51 | # Example: match multiple globs at once
 52 | 
 53 | This example shows how to match multiple glob patterns at once.
 54 | 
 55 | ```
 56 | # fn example() -> Result<(), globset::Error> {
 57 | use globset::{Glob, GlobSetBuilder};
 58 | 
 59 | let mut builder = GlobSetBuilder::new();
 60 | // A GlobBuilder can be used to configure each glob's match semantics
 61 | // independently.
 62 | builder.add(Glob::new("*.rs")?);
 63 | builder.add(Glob::new("src/lib.rs")?);
 64 | builder.add(Glob::new("src/**/foo.rs")?);
 65 | let set = builder.build()?;
 66 | 
 67 | assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
 68 | # Ok(()) } example().unwrap();
 69 | ```
 70 | 
 71 | # Syntax
 72 | 
 73 | Standard Unix-style glob syntax is supported:
 74 | 
 75 | * `?` matches any single character. (If the `literal_separator` option is
 76 |   enabled, then `?` can never match a path separator.)
 77 | * `*` matches zero or more characters. (If the `literal_separator` option is
 78 |   enabled, then `*` can never match a path separator.)
 79 | * `**` recursively matches directories but are only legal in three situations.
 80 |   First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
 81 |   all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
 82 |   and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
 83 |   <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
 84 |   <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
 85 |   Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
 86 |   the pattern, then it matches zero or more directories. Using `**` anywhere
 87 |   else is illegal (N.B. the glob `**` is allowed and means "match everything").
 88 | * `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
 89 |   (N.B. Nesting `{...}` is not currently allowed.)
 90 | * `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
 91 |   `[!ab]` to match any character except for `a` and `b`.
 92 | * Metacharacters such as `*` and `?` can be escaped with character class
 93 |   notation. e.g., `[*]` matches `*`.
 94 | * When backslash escapes are enabled, a backslash (`\`) will escape all meta
 95 |   characters in a glob. If it precedes a non-meta character, then the slash is
 96 |   ignored. A `\\` will match a literal `\\`. Note that this mode is only
 97 |   enabled on Unix platforms by default, but can be enabled on any platform
 98 |   via the `backslash_escape` setting on `Glob`.
 99 | 
100 | A `GlobBuilder` can be used to prevent wildcards from matching path separators,
101 | or to enable case insensitive matching.
102 | */
103 | 
104 | #![deny(missing_docs)]
105 | 
106 | extern crate aho_corasick;
107 | extern crate bstr;
108 | extern crate fnv;
109 | #[macro_use]
110 | extern crate log;
111 | extern crate regex;
112 | 
113 | use std::borrow::Cow;
114 | use std::collections::{BTreeMap, HashMap};
115 | use std::error::Error as StdError;
116 | use std::fmt;
117 | use std::hash;
118 | use std::path::Path;
119 | use std::str;
120 | 
121 | use aho_corasick::AhoCorasick;
122 | use bstr::{ByteSlice, ByteVec, B};
123 | use regex::bytes::{Regex, RegexBuilder, RegexSet};
124 | 
125 | use glob::MatchStrategy;
126 | pub use glob::{Glob, GlobBuilder, GlobMatcher};
127 | use pathutil::{file_name, file_name_ext, normalize_path};
128 | 
129 | mod glob;
130 | mod pathutil;
131 | 
132 | /// Represents an error that can occur when parsing a glob pattern.
133 | #[derive(Clone, Debug, Eq, PartialEq)]
134 | pub struct Error {
135 |     /// The original glob provided by the caller.
136 |     glob: Option<String>,
137 |     /// The kind of error.
138 |     kind: ErrorKind,
139 | }
140 | 
141 | /// The kind of error that can occur when parsing a glob pattern.
142 | #[derive(Clone, Debug, Eq, PartialEq)]
143 | pub enum ErrorKind {
144 |     /// **DEPRECATED**.
145 |     ///
146 |     /// This error used to occur for consistency with git's glob specification,
147 |     /// but the specification now accepts all uses of `**`. When `**` does not
148 |     /// appear adjacent to a path separator or at the beginning/end of a glob,
149 |     /// it is now treated as two consecutive `*` patterns. As such, this error
150 |     /// is no longer used.
151 |     InvalidRecursive,
152 |     /// Occurs when a character class (e.g., `[abc]`) is not closed.
153 |     UnclosedClass,
154 |     /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
155 |     /// example, if the range starts with a lexicographically larger character
156 |     /// than it ends with.
157 |     InvalidRange(char, char),
158 |     /// Occurs when a `}` is found without a matching `{`.
159 |     UnopenedAlternates,
160 |     /// Occurs when a `{` is found without a matching `}`.
161 |     UnclosedAlternates,
162 |     /// Occurs when an alternating group is nested inside another alternating
163 |     /// group, e.g., `{{a,b},{c,d}}`.
164 |     NestedAlternates,
165 |     /// Occurs when an unescaped '\' is found at the end of a glob.
166 |     DanglingEscape,
167 |     /// An error associated with parsing or compiling a regex.
168 |     Regex(String),
169 |     /// Hints that destructuring should not be exhaustive.
170 |     ///
171 |     /// This enum may grow additional variants, so this makes sure clients
172 |     /// don't count on exhaustive matching. (Otherwise, adding a new variant
173 |     /// could break existing code.)
174 |     #[doc(hidden)]
175 |     __Nonexhaustive,
176 | }
177 | 
178 | impl StdError for Error {
179 |     fn description(&self) -> &str {
180 |         self.kind.description()
181 |     }
182 | }
183 | 
184 | impl Error {
185 |     /// Return the glob that caused this error, if one exists.
186 |     pub fn glob(&self) -> Option<&str> {
187 |         self.glob.as_ref().map(|s| &**s)
188 |     }
189 | 
190 |     /// Return the kind of this error.
191 |     pub fn kind(&self) -> &ErrorKind {
192 |         &self.kind
193 |     }
194 | }
195 | 
196 | impl ErrorKind {
197 |     fn description(&self) -> &str {
198 |         match *self {
199 |             ErrorKind::InvalidRecursive => {
200 |                 "invalid use of **; must be one path component"
201 |             }
202 |             ErrorKind::UnclosedClass => {
203 |                 "unclosed character class; missing ']'"
204 |             }
205 |             ErrorKind::InvalidRange(_, _) => "invalid character range",
206 |             ErrorKind::UnopenedAlternates => {
207 |                 "unopened alternate group; missing '{' \
208 |                  (maybe escape '}' with '[}]'?)"
209 |             }
210 |             ErrorKind::UnclosedAlternates => {
211 |                 "unclosed alternate group; missing '}' \
212 |                  (maybe escape '{' with '[{]'?)"
213 |             }
214 |             ErrorKind::NestedAlternates => {
215 |                 "nested alternate groups are not allowed"
216 |             }
217 |             ErrorKind::DanglingEscape => "dangling '\\'",
218 |             ErrorKind::Regex(ref err) => err,
219 |             ErrorKind::__Nonexhaustive => unreachable!(),
220 |         }
221 |     }
222 | }
223 | 
224 | impl fmt::Display for Error {
225 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
226 |         match self.glob {
227 |             None => self.kind.fmt(f),
228 |             Some(ref glob) => {
229 |                 write!(f, "error parsing glob '{}': {}", glob, self.kind)
230 |             }
231 |         }
232 |     }
233 | }
234 | 
235 | impl fmt::Display for ErrorKind {
236 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
237 |         match *self {
238 |             ErrorKind::InvalidRecursive
239 |             | ErrorKind::UnclosedClass
240 |             | ErrorKind::UnopenedAlternates
241 |             | ErrorKind::UnclosedAlternates
242 |             | ErrorKind::NestedAlternates
243 |             | ErrorKind::DanglingEscape
244 |             | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
245 |             ErrorKind::InvalidRange(s, e) => {
246 |                 write!(f, "invalid range; '{}' > '{}'", s, e)
247 |             }
248 |             ErrorKind::__Nonexhaustive => unreachable!(),
249 |         }
250 |     }
251 | }
252 | 
253 | fn new_regex(pat: &str) -> Result<Regex, Error> {
254 |     RegexBuilder::new(pat)
255 |         .dot_matches_new_line(true)
256 |         .size_limit(10 * (1 << 20))
257 |         .dfa_size_limit(10 * (1 << 20))
258 |         .build()
259 |         .map_err(|err| Error {
260 |             glob: Some(pat.to_string()),
261 |             kind: ErrorKind::Regex(err.to_string()),
262 |         })
263 | }
264 | 
265 | fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
266 | where
267 |     S: AsRef<str>,
268 |     I: IntoIterator<Item = S>,
269 | {
270 |     RegexSet::new(pats).map_err(|err| Error {
271 |         glob: None,
272 |         kind: ErrorKind::Regex(err.to_string()),
273 |     })
274 | }
275 | 
276 | type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
277 | 
278 | /// GlobSet represents a group of globs that can be matched together in a
279 | /// single pass.
280 | #[derive(Clone, Debug)]
281 | pub struct GlobSet {
282 |     len: usize,
283 |     strats: Vec<GlobSetMatchStrategy>,
284 | }
285 | 
286 | impl GlobSet {
287 |     /// Create an empty `GlobSet`. An empty set matches nothing.
288 |     #[inline]
289 |     pub fn empty() -> GlobSet {
290 |         GlobSet { len: 0, strats: vec![] }
291 |     }
292 | 
293 |     /// Returns true if this set is empty, and therefore matches nothing.
294 |     #[inline]
295 |     pub fn is_empty(&self) -> bool {
296 |         self.len == 0
297 |     }
298 | 
299 |     /// Returns the number of globs in this set.
300 |     #[inline]
301 |     pub fn len(&self) -> usize {
302 |         self.len
303 |     }
304 | 
305 |     /// Returns true if any glob in this set matches the path given.
306 |     pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
307 |         self.is_match_candidate(&Candidate::new(path.as_ref()))
308 |     }
309 | 
310 |     /// Returns true if any glob in this set matches the path given.
311 |     ///
312 |     /// This takes a Candidate as input, which can be used to amortize the
313 |     /// cost of preparing a path for matching.
314 |     pub fn is_match_candidate(&self, path: &Candidate) -> bool {
315 |         if self.is_empty() {
316 |             return false;
317 |         }
318 |         for strat in &self.strats {
319 |             if strat.is_match(path) {
320 |                 return true;
321 |             }
322 |         }
323 |         false
324 |     }
325 | 
326 |     /// Returns the sequence number of every glob pattern that matches the
327 |     /// given path.
328 |     pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
329 |         self.matches_candidate(&Candidate::new(path.as_ref()))
330 |     }
331 | 
332 |     /// Returns the sequence number of every glob pattern that matches the
333 |     /// given path.
334 |     ///
335 |     /// This takes a Candidate as input, which can be used to amortize the
336 |     /// cost of preparing a path for matching.
337 |     pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
338 |         let mut into = vec![];
339 |         if self.is_empty() {
340 |             return into;
341 |         }
342 |         self.matches_candidate_into(path, &mut into);
343 |         into
344 |     }
345 | 
346 |     /// Adds the sequence number of every glob pattern that matches the given
347 |     /// path to the vec given.
348 |     ///
349 |     /// `into` is is cleared before matching begins, and contains the set of
350 |     /// sequence numbers (in ascending order) after matching ends. If no globs
351 |     /// were matched, then `into` will be empty.
352 |     pub fn matches_into<P: AsRef<Path>>(
353 |         &self,
354 |         path: P,
355 |         into: &mut Vec<usize>,
356 |     ) {
357 |         self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
358 |     }
359 | 
360 |     /// Adds the sequence number of every glob pattern that matches the given
361 |     /// path to the vec given.
362 |     ///
363 |     /// `into` is is cleared before matching begins, and contains the set of
364 |     /// sequence numbers (in ascending order) after matching ends. If no globs
365 |     /// were matched, then `into` will be empty.
366 |     ///
367 |     /// This takes a Candidate as input, which can be used to amortize the
368 |     /// cost of preparing a path for matching.
369 |     pub fn matches_candidate_into(
370 |         &self,
371 |         path: &Candidate,
372 |         into: &mut Vec<usize>,
373 |     ) {
374 |         into.clear();
375 |         if self.is_empty() {
376 |             return;
377 |         }
378 |         for strat in &self.strats {
379 |             strat.matches_into(path, into);
380 |         }
381 |         into.sort();
382 |         into.dedup();
383 |     }
384 | 
385 |     fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
386 |         if pats.is_empty() {
387 |             return Ok(GlobSet { len: 0, strats: vec![] });
388 |         }
389 |         let mut lits = LiteralStrategy::new();
390 |         let mut base_lits = BasenameLiteralStrategy::new();
391 |         let mut exts = ExtensionStrategy::new();
392 |         let mut prefixes = MultiStrategyBuilder::new();
393 |         let mut suffixes = MultiStrategyBuilder::new();
394 |         let mut required_exts = RequiredExtensionStrategyBuilder::new();
395 |         let mut regexes = MultiStrategyBuilder::new();
396 |         for (i, p) in pats.iter().enumerate() {
397 |             match MatchStrategy::new(p) {
398 |                 MatchStrategy::Literal(lit) => {
399 |                     lits.add(i, lit);
400 |                 }
401 |                 MatchStrategy::BasenameLiteral(lit) => {
402 |                     base_lits.add(i, lit);
403 |                 }
404 |                 MatchStrategy::Extension(ext) => {
405 |                     exts.add(i, ext);
406 |                 }
407 |                 MatchStrategy::Prefix(prefix) => {
408 |                     prefixes.add(i, prefix);
409 |                 }
410 |                 MatchStrategy::Suffix { suffix, component } => {
411 |                     if component {
412 |                         lits.add(i, suffix[1..].to_string());
413 |                     }
414 |                     suffixes.add(i, suffix);
415 |                 }
416 |                 MatchStrategy::RequiredExtension(ext) => {
417 |                     required_exts.add(i, ext, p.regex().to_owned());
418 |                 }
419 |                 MatchStrategy::Regex => {
420 |                     debug!("glob converted to regex: {:?}", p);
421 |                     regexes.add(i, p.regex().to_owned());
422 |                 }
423 |             }
424 |         }
425 |         debug!(
426 |             "built glob set; {} literals, {} basenames, {} extensions, \
427 |              {} prefixes, {} suffixes, {} required extensions, {} regexes",
428 |             lits.0.len(),
429 |             base_lits.0.len(),
430 |             exts.0.len(),
431 |             prefixes.literals.len(),
432 |             suffixes.literals.len(),
433 |             required_exts.0.len(),
434 |             regexes.literals.len()
435 |         );
436 |         Ok(GlobSet {
437 |             len: pats.len(),
438 |             strats: vec![
439 |                 GlobSetMatchStrategy::Extension(exts),
440 |                 GlobSetMatchStrategy::BasenameLiteral(base_lits),
441 |                 GlobSetMatchStrategy::Literal(lits),
442 |                 GlobSetMatchStrategy::Suffix(suffixes.suffix()),
443 |                 GlobSetMatchStrategy::Prefix(prefixes.prefix()),
444 |                 GlobSetMatchStrategy::RequiredExtension(
445 |                     required_exts.build()?,
446 |                 ),
447 |                 GlobSetMatchStrategy::Regex(regexes.regex_set()?),
448 |             ],
449 |         })
450 |     }
451 | }
452 | 
453 | /// GlobSetBuilder builds a group of patterns that can be used to
454 | /// simultaneously match a file path.
455 | #[derive(Clone, Debug)]
456 | pub struct GlobSetBuilder {
457 |     pats: Vec<Glob>,
458 | }
459 | 
460 | impl GlobSetBuilder {
461 |     /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
462 |     /// patterns. Once all patterns have been added, `build` should be called
463 |     /// to produce a `GlobSet`, which can then be used for matching.
464 |     pub fn new() -> GlobSetBuilder {
465 |         GlobSetBuilder { pats: vec![] }
466 |     }
467 | 
468 |     /// Builds a new matcher from all of the glob patterns added so far.
469 |     ///
470 |     /// Once a matcher is built, no new patterns can be added to it.
471 |     pub fn build(&self) -> Result<GlobSet, Error> {
472 |         GlobSet::new(&self.pats)
473 |     }
474 | 
475 |     /// Add a new pattern to this set.
476 |     pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
477 |         self.pats.push(pat);
478 |         self
479 |     }
480 | }
481 | 
482 | /// A candidate path for matching.
483 | ///
484 | /// All glob matching in this crate operates on `Candidate` values.
485 | /// Constructing candidates has a very small cost associated with it, so
486 | /// callers may find it beneficial to amortize that cost when matching a single
487 | /// path against multiple globs or sets of globs.
488 | #[derive(Clone, Debug)]
489 | pub struct Candidate<'a> {
490 |     path: Cow<'a, [u8]>,
491 |     basename: Cow<'a, [u8]>,
492 |     ext: Cow<'a, [u8]>,
493 | }
494 | 
495 | impl<'a> Candidate<'a> {
496 |     /// Create a new candidate for matching from the given path.
497 |     pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
498 |         let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
499 |         let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
500 |         let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
501 |         Candidate { path: path, basename: basename, ext: ext }
502 |     }
503 | 
504 |     fn path_prefix(&self, max: usize) -> &[u8] {
505 |         if self.path.len() <= max {
506 |             &*self.path
507 |         } else {
508 |             &self.path[..max]
509 |         }
510 |     }
511 | 
512 |     fn path_suffix(&self, max: usize) -> &[u8] {
513 |         if self.path.len() <= max {
514 |             &*self.path
515 |         } else {
516 |             &self.path[self.path.len() - max..]
517 |         }
518 |     }
519 | }
520 | 
521 | #[derive(Clone, Debug)]
522 | enum GlobSetMatchStrategy {
523 |     Literal(LiteralStrategy),
524 |     BasenameLiteral(BasenameLiteralStrategy),
525 |     Extension(ExtensionStrategy),
526 |     Prefix(PrefixStrategy),
527 |     Suffix(SuffixStrategy),
528 |     RequiredExtension(RequiredExtensionStrategy),
529 |     Regex(RegexSetStrategy),
530 | }
531 | 
532 | impl GlobSetMatchStrategy {
533 |     fn is_match(&self, candidate: &Candidate) -> bool {
534 |         use self::GlobSetMatchStrategy::*;
535 |         match *self {
536 |             Literal(ref s) => s.is_match(candidate),
537 |             BasenameLiteral(ref s) => s.is_match(candidate),
538 |             Extension(ref s) => s.is_match(candidate),
539 |             Prefix(ref s) => s.is_match(candidate),
540 |             Suffix(ref s) => s.is_match(candidate),
541 |             RequiredExtension(ref s) => s.is_match(candidate),
542 |             Regex(ref s) => s.is_match(candidate),
543 |         }
544 |     }
545 | 
546 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
547 |         use self::GlobSetMatchStrategy::*;
548 |         match *self {
549 |             Literal(ref s) => s.matches_into(candidate, matches),
550 |             BasenameLiteral(ref s) => s.matches_into(candidate, matches),
551 |             Extension(ref s) => s.matches_into(candidate, matches),
552 |             Prefix(ref s) => s.matches_into(candidate, matches),
553 |             Suffix(ref s) => s.matches_into(candidate, matches),
554 |             RequiredExtension(ref s) => s.matches_into(candidate, matches),
555 |             Regex(ref s) => s.matches_into(candidate, matches),
556 |         }
557 |     }
558 | }
559 | 
560 | #[derive(Clone, Debug)]
561 | struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
562 | 
563 | impl LiteralStrategy {
564 |     fn new() -> LiteralStrategy {
565 |         LiteralStrategy(BTreeMap::new())
566 |     }
567 | 
568 |     fn add(&mut self, global_index: usize, lit: String) {
569 |         self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
570 |     }
571 | 
572 |     fn is_match(&self, candidate: &Candidate) -> bool {
573 |         self.0.contains_key(candidate.path.as_bytes())
574 |     }
575 | 
576 |     #[inline(never)]
577 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
578 |         if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
579 |             matches.extend(hits);
580 |         }
581 |     }
582 | }
583 | 
584 | #[derive(Clone, Debug)]
585 | struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
586 | 
587 | impl BasenameLiteralStrategy {
588 |     fn new() -> BasenameLiteralStrategy {
589 |         BasenameLiteralStrategy(BTreeMap::new())
590 |     }
591 | 
592 |     fn add(&mut self, global_index: usize, lit: String) {
593 |         self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
594 |     }
595 | 
596 |     fn is_match(&self, candidate: &Candidate) -> bool {
597 |         if candidate.basename.is_empty() {
598 |             return false;
599 |         }
600 |         self.0.contains_key(candidate.basename.as_bytes())
601 |     }
602 | 
603 |     #[inline(never)]
604 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
605 |         if candidate.basename.is_empty() {
606 |             return;
607 |         }
608 |         if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
609 |             matches.extend(hits);
610 |         }
611 |     }
612 | }
613 | 
614 | #[derive(Clone, Debug)]
615 | struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
616 | 
617 | impl ExtensionStrategy {
618 |     fn new() -> ExtensionStrategy {
619 |         ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
620 |     }
621 | 
622 |     fn add(&mut self, global_index: usize, ext: String) {
623 |         self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
624 |     }
625 | 
626 |     fn is_match(&self, candidate: &Candidate) -> bool {
627 |         if candidate.ext.is_empty() {
628 |             return false;
629 |         }
630 |         self.0.contains_key(candidate.ext.as_bytes())
631 |     }
632 | 
633 |     #[inline(never)]
634 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
635 |         if candidate.ext.is_empty() {
636 |             return;
637 |         }
638 |         if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
639 |             matches.extend(hits);
640 |         }
641 |     }
642 | }
643 | 
644 | #[derive(Clone, Debug)]
645 | struct PrefixStrategy {
646 |     matcher: AhoCorasick,
647 |     map: Vec<usize>,
648 |     longest: usize,
649 | }
650 | 
651 | impl PrefixStrategy {
652 |     fn is_match(&self, candidate: &Candidate) -> bool {
653 |         let path = candidate.path_prefix(self.longest);
654 |         for m in self.matcher.find_overlapping_iter(path) {
655 |             if m.start() == 0 {
656 |                 return true;
657 |             }
658 |         }
659 |         false
660 |     }
661 | 
662 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
663 |         let path = candidate.path_prefix(self.longest);
664 |         for m in self.matcher.find_overlapping_iter(path) {
665 |             if m.start() == 0 {
666 |                 matches.push(self.map[m.pattern()]);
667 |             }
668 |         }
669 |     }
670 | }
671 | 
672 | #[derive(Clone, Debug)]
673 | struct SuffixStrategy {
674 |     matcher: AhoCorasick,
675 |     map: Vec<usize>,
676 |     longest: usize,
677 | }
678 | 
679 | impl SuffixStrategy {
680 |     fn is_match(&self, candidate: &Candidate) -> bool {
681 |         let path = candidate.path_suffix(self.longest);
682 |         for m in self.matcher.find_overlapping_iter(path) {
683 |             if m.end() == path.len() {
684 |                 return true;
685 |             }
686 |         }
687 |         false
688 |     }
689 | 
690 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
691 |         let path = candidate.path_suffix(self.longest);
692 |         for m in self.matcher.find_overlapping_iter(path) {
693 |             if m.end() == path.len() {
694 |                 matches.push(self.map[m.pattern()]);
695 |             }
696 |         }
697 |     }
698 | }
699 | 
700 | #[derive(Clone, Debug)]
701 | struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
702 | 
703 | impl RequiredExtensionStrategy {
704 |     fn is_match(&self, candidate: &Candidate) -> bool {
705 |         if candidate.ext.is_empty() {
706 |             return false;
707 |         }
708 |         match self.0.get(candidate.ext.as_bytes()) {
709 |             None => false,
710 |             Some(regexes) => {
711 |                 for &(_, ref re) in regexes {
712 |                     if re.is_match(candidate.path.as_bytes()) {
713 |                         return true;
714 |                     }
715 |                 }
716 |                 false
717 |             }
718 |         }
719 |     }
720 | 
721 |     #[inline(never)]
722 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
723 |         if candidate.ext.is_empty() {
724 |             return;
725 |         }
726 |         if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
727 |             for &(global_index, ref re) in regexes {
728 |                 if re.is_match(candidate.path.as_bytes()) {
729 |                     matches.push(global_index);
730 |                 }
731 |             }
732 |         }
733 |     }
734 | }
735 | 
736 | #[derive(Clone, Debug)]
737 | struct RegexSetStrategy {
738 |     matcher: RegexSet,
739 |     map: Vec<usize>,
740 | }
741 | 
742 | impl RegexSetStrategy {
743 |     fn is_match(&self, candidate: &Candidate) -> bool {
744 |         self.matcher.is_match(candidate.path.as_bytes())
745 |     }
746 | 
747 |     fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
748 |         for i in self.matcher.matches(candidate.path.as_bytes()) {
749 |             matches.push(self.map[i]);
750 |         }
751 |     }
752 | }
753 | 
754 | #[derive(Clone, Debug)]
755 | struct MultiStrategyBuilder {
756 |     literals: Vec<String>,
757 |     map: Vec<usize>,
758 |     longest: usize,
759 | }
760 | 
761 | impl MultiStrategyBuilder {
762 |     fn new() -> MultiStrategyBuilder {
763 |         MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
764 |     }
765 | 
766 |     fn add(&mut self, global_index: usize, literal: String) {
767 |         if literal.len() > self.longest {
768 |             self.longest = literal.len();
769 |         }
770 |         self.map.push(global_index);
771 |         self.literals.push(literal);
772 |     }
773 | 
774 |     fn prefix(self) -> PrefixStrategy {
775 |         PrefixStrategy {
776 |             matcher: AhoCorasick::new_auto_configured(&self.literals),
777 |             map: self.map,
778 |             longest: self.longest,
779 |         }
780 |     }
781 | 
782 |     fn suffix(self) -> SuffixStrategy {
783 |         SuffixStrategy {
784 |             matcher: AhoCorasick::new_auto_configured(&self.literals),
785 |             map: self.map,
786 |             longest: self.longest,
787 |         }
788 |     }
789 | 
790 |     fn regex_set(self) -> Result<RegexSetStrategy, Error> {
791 |         Ok(RegexSetStrategy {
792 |             matcher: new_regex_set(self.literals)?,
793 |             map: self.map,
794 |         })
795 |     }
796 | }
797 | 
798 | #[derive(Clone, Debug)]
799 | struct RequiredExtensionStrategyBuilder(
800 |     HashMap<Vec<u8>, Vec<(usize, String)>>,
801 | );
802 | 
803 | impl RequiredExtensionStrategyBuilder {
804 |     fn new() -> RequiredExtensionStrategyBuilder {
805 |         RequiredExtensionStrategyBuilder(HashMap::new())
806 |     }
807 | 
808 |     fn add(&mut self, global_index: usize, ext: String, regex: String) {
809 |         self.0
810 |             .entry(ext.into_bytes())
811 |             .or_insert(vec![])
812 |             .push((global_index, regex));
813 |     }
814 | 
815 |     fn build(self) -> Result<RequiredExtensionStrategy, Error> {
816 |         let mut exts = HashMap::with_hasher(Fnv::default());
817 |         for (ext, regexes) in self.0.into_iter() {
818 |             exts.insert(ext.clone(), vec![]);
819 |             for (global_index, regex) in regexes {
820 |                 let compiled = new_regex(&regex)?;
821 |                 exts.get_mut(&ext).unwrap().push((global_index, compiled));
822 |             }
823 |         }
824 |         Ok(RequiredExtensionStrategy(exts))
825 |     }
826 | }
827 | 
828 | #[cfg(test)]
829 | mod tests {
830 |     use super::GlobSetBuilder;
831 |     use glob::Glob;
832 | 
833 |     #[test]
834 |     fn set_works() {
835 |         let mut builder = GlobSetBuilder::new();
836 |         builder.add(Glob::new("src/**/*.rs").unwrap());
837 |         builder.add(Glob::new("*.c").unwrap());
838 |         builder.add(Glob::new("src/lib.rs").unwrap());
839 |         let set = builder.build().unwrap();
840 | 
841 |         assert!(set.is_match("foo.c"));
842 |         assert!(set.is_match("src/foo.c"));
843 |         assert!(!set.is_match("foo.rs"));
844 |         assert!(!set.is_match("tests/foo.rs"));
845 |         assert!(set.is_match("src/foo.rs"));
846 |         assert!(set.is_match("src/grep/src/main.rs"));
847 | 
848 |         let matches = set.matches("src/lib.rs");
849 |         assert_eq!(2, matches.len());
850 |         assert_eq!(0, matches[0]);
851 |         assert_eq!(2, matches[1]);
852 |     }
853 | 
854 |     #[test]
855 |     fn empty_set_works() {
856 |         let set = GlobSetBuilder::new().build().unwrap();
857 |         assert!(!set.is_match(""));
858 |         assert!(!set.is_match("a"));
859 |     }
860 | }
861 | 


--------------------------------------------------------------------------------
/src/glob.rs:
--------------------------------------------------------------------------------
   1 | use std::fmt;
   2 | use std::hash;
   3 | use std::iter;
   4 | use std::ops::{Deref, DerefMut};
   5 | use std::path::{is_separator, Path};
   6 | use std::str;
   7 | 
   8 | use regex;
   9 | use regex::bytes::Regex;
  10 | 
  11 | use {new_regex, Candidate, Error, ErrorKind};
  12 | 
  13 | /// Describes a matching strategy for a particular pattern.
  14 | ///
  15 | /// This provides a way to more quickly determine whether a pattern matches
  16 | /// a particular file path in a way that scales with a large number of
  17 | /// patterns. For example, if many patterns are of the form `*.ext`, then it's
  18 | /// possible to test whether any of those patterns matches by looking up a
  19 | /// file path's extension in a hash table.
  20 | #[derive(Clone, Debug, Eq, PartialEq)]
  21 | pub enum MatchStrategy {
  22 |     /// A pattern matches if and only if the entire file path matches this
  23 |     /// literal string.
  24 |     Literal(String),
  25 |     /// A pattern matches if and only if the file path's basename matches this
  26 |     /// literal string.
  27 |     BasenameLiteral(String),
  28 |     /// A pattern matches if and only if the file path's extension matches this
  29 |     /// literal string.
  30 |     Extension(String),
  31 |     /// A pattern matches if and only if this prefix literal is a prefix of the
  32 |     /// candidate file path.
  33 |     Prefix(String),
  34 |     /// A pattern matches if and only if this prefix literal is a prefix of the
  35 |     /// candidate file path.
  36 |     ///
  37 |     /// An exception: if `component` is true, then `suffix` must appear at the
  38 |     /// beginning of a file path or immediately following a `/`.
  39 |     Suffix {
  40 |         /// The actual suffix.
  41 |         suffix: String,
  42 |         /// Whether this must start at the beginning of a path component.
  43 |         component: bool,
  44 |     },
  45 |     /// A pattern matches only if the given extension matches the file path's
  46 |     /// extension. Note that this is a necessary but NOT sufficient criterion.
  47 |     /// Namely, if the extension matches, then a full regex search is still
  48 |     /// required.
  49 |     RequiredExtension(String),
  50 |     /// A regex needs to be used for matching.
  51 |     Regex,
  52 | }
  53 | 
  54 | impl MatchStrategy {
  55 |     /// Returns a matching strategy for the given pattern.
  56 |     pub fn new(pat: &Glob) -> MatchStrategy {
  57 |         if let Some(lit) = pat.basename_literal() {
  58 |             MatchStrategy::BasenameLiteral(lit)
  59 |         } else if let Some(lit) = pat.literal() {
  60 |             MatchStrategy::Literal(lit)
  61 |         } else if let Some(ext) = pat.ext() {
  62 |             MatchStrategy::Extension(ext)
  63 |         } else if let Some(prefix) = pat.prefix() {
  64 |             MatchStrategy::Prefix(prefix)
  65 |         } else if let Some((suffix, component)) = pat.suffix() {
  66 |             MatchStrategy::Suffix { suffix: suffix, component: component }
  67 |         } else if let Some(ext) = pat.required_ext() {
  68 |             MatchStrategy::RequiredExtension(ext)
  69 |         } else {
  70 |             MatchStrategy::Regex
  71 |         }
  72 |     }
  73 | }
  74 | 
  75 | /// Glob represents a successfully parsed shell glob pattern.
  76 | ///
  77 | /// It cannot be used directly to match file paths, but it can be converted
  78 | /// to a regular expression string or a matcher.
  79 | #[derive(Clone, Debug, Eq)]
  80 | pub struct Glob {
  81 |     glob: String,
  82 |     re: String,
  83 |     opts: GlobOptions,
  84 |     tokens: Tokens,
  85 | }
  86 | 
  87 | impl PartialEq for Glob {
  88 |     fn eq(&self, other: &Glob) -> bool {
  89 |         self.glob == other.glob && self.opts == other.opts
  90 |     }
  91 | }
  92 | 
  93 | impl hash::Hash for Glob {
  94 |     fn hash<H: hash::Hasher>(&self, state: &mut H) {
  95 |         self.glob.hash(state);
  96 |         self.opts.hash(state);
  97 |     }
  98 | }
  99 | 
 100 | impl fmt::Display for Glob {
 101 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 102 |         self.glob.fmt(f)
 103 |     }
 104 | }
 105 | 
 106 | /// A matcher for a single pattern.
 107 | #[derive(Clone, Debug)]
 108 | pub struct GlobMatcher {
 109 |     /// The underlying pattern.
 110 |     pat: Glob,
 111 |     /// The pattern, as a compiled regex.
 112 |     re: Regex,
 113 | }
 114 | 
 115 | impl GlobMatcher {
 116 |     /// Tests whether the given path matches this pattern or not.
 117 |     pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
 118 |         self.is_match_candidate(&Candidate::new(path.as_ref()))
 119 |     }
 120 | 
 121 |     /// Tests whether the given path matches this pattern or not.
 122 |     pub fn is_match_candidate(&self, path: &Candidate) -> bool {
 123 |         self.re.is_match(&path.path)
 124 |     }
 125 | }
 126 | 
 127 | /// A strategic matcher for a single pattern.
 128 | #[cfg(test)]
 129 | #[derive(Clone, Debug)]
 130 | struct GlobStrategic {
 131 |     /// The match strategy to use.
 132 |     strategy: MatchStrategy,
 133 |     /// The underlying pattern.
 134 |     pat: Glob,
 135 |     /// The pattern, as a compiled regex.
 136 |     re: Regex,
 137 | }
 138 | 
 139 | #[cfg(test)]
 140 | impl GlobStrategic {
 141 |     /// Tests whether the given path matches this pattern or not.
 142 |     fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
 143 |         self.is_match_candidate(&Candidate::new(path.as_ref()))
 144 |     }
 145 | 
 146 |     /// Tests whether the given path matches this pattern or not.
 147 |     fn is_match_candidate(&self, candidate: &Candidate) -> bool {
 148 |         let byte_path = &*candidate.path;
 149 | 
 150 |         match self.strategy {
 151 |             MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
 152 |             MatchStrategy::BasenameLiteral(ref lit) => {
 153 |                 lit.as_bytes() == &*candidate.basename
 154 |             }
 155 |             MatchStrategy::Extension(ref ext) => {
 156 |                 ext.as_bytes() == &*candidate.ext
 157 |             }
 158 |             MatchStrategy::Prefix(ref pre) => {
 159 |                 starts_with(pre.as_bytes(), byte_path)
 160 |             }
 161 |             MatchStrategy::Suffix { ref suffix, component } => {
 162 |                 if component && byte_path == &suffix.as_bytes()[1..] {
 163 |                     return true;
 164 |                 }
 165 |                 ends_with(suffix.as_bytes(), byte_path)
 166 |             }
 167 |             MatchStrategy::RequiredExtension(ref ext) => {
 168 |                 let ext = ext.as_bytes();
 169 |                 &*candidate.ext == ext && self.re.is_match(byte_path)
 170 |             }
 171 |             MatchStrategy::Regex => self.re.is_match(byte_path),
 172 |         }
 173 |     }
 174 | }
 175 | 
 176 | /// A builder for a pattern.
 177 | ///
 178 | /// This builder enables configuring the match semantics of a pattern. For
 179 | /// example, one can make matching case insensitive.
 180 | ///
 181 | /// The lifetime `'a` refers to the lifetime of the pattern string.
 182 | #[derive(Clone, Debug)]
 183 | pub struct GlobBuilder<'a> {
 184 |     /// The glob pattern to compile.
 185 |     glob: &'a str,
 186 |     /// Options for the pattern.
 187 |     opts: GlobOptions,
 188 | }
 189 | 
 190 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
 191 | struct GlobOptions {
 192 |     /// Whether to match case insensitively.
 193 |     case_insensitive: bool,
 194 |     /// Whether to require a literal separator to match a separator in a file
 195 |     /// path. e.g., when enabled, `*` won't match `/`.
 196 |     literal_separator: bool,
 197 |     /// Whether or not to use `\` to escape special characters.
 198 |     /// e.g., when enabled, `\*` will match a literal `*`.
 199 |     backslash_escape: bool,
 200 | }
 201 | 
 202 | impl GlobOptions {
 203 |     fn default() -> GlobOptions {
 204 |         GlobOptions {
 205 |             case_insensitive: false,
 206 |             literal_separator: false,
 207 |             backslash_escape: !is_separator('\\'),
 208 |         }
 209 |     }
 210 | }
 211 | 
 212 | #[derive(Clone, Debug, Default, Eq, PartialEq)]
 213 | struct Tokens(Vec<Token>);
 214 | 
 215 | impl Deref for Tokens {
 216 |     type Target = Vec<Token>;
 217 |     fn deref(&self) -> &Vec<Token> {
 218 |         &self.0
 219 |     }
 220 | }
 221 | 
 222 | impl DerefMut for Tokens {
 223 |     fn deref_mut(&mut self) -> &mut Vec<Token> {
 224 |         &mut self.0
 225 |     }
 226 | }
 227 | 
 228 | #[derive(Clone, Debug, Eq, PartialEq)]
 229 | enum Token {
 230 |     Literal(char),
 231 |     Any,
 232 |     ZeroOrMore,
 233 |     RecursivePrefix,
 234 |     RecursiveSuffix,
 235 |     RecursiveZeroOrMore,
 236 |     Class { negated: bool, ranges: Vec<(char, char)> },
 237 |     Alternates(Vec<Tokens>),
 238 | }
 239 | 
 240 | impl Glob {
 241 |     /// Builds a new pattern with default options.
 242 |     pub fn new(glob: &str) -> Result<Glob, Error> {
 243 |         GlobBuilder::new(glob).build()
 244 |     }
 245 | 
 246 |     /// Returns a matcher for this pattern.
 247 |     pub fn compile_matcher(&self) -> GlobMatcher {
 248 |         let re =
 249 |             new_regex(&self.re).expect("regex compilation shouldn't fail");
 250 |         GlobMatcher { pat: self.clone(), re: re }
 251 |     }
 252 | 
 253 |     /// Returns a strategic matcher.
 254 |     ///
 255 |     /// This isn't exposed because it's not clear whether it's actually
 256 |     /// faster than just running a regex for a *single* pattern. If it
 257 |     /// is faster, then GlobMatcher should do it automatically.
 258 |     #[cfg(test)]
 259 |     fn compile_strategic_matcher(&self) -> GlobStrategic {
 260 |         let strategy = MatchStrategy::new(self);
 261 |         let re =
 262 |             new_regex(&self.re).expect("regex compilation shouldn't fail");
 263 |         GlobStrategic { strategy: strategy, pat: self.clone(), re: re }
 264 |     }
 265 | 
 266 |     /// Returns the original glob pattern used to build this pattern.
 267 |     pub fn glob(&self) -> &str {
 268 |         &self.glob
 269 |     }
 270 | 
 271 |     /// Returns the regular expression string for this glob.
 272 |     ///
 273 |     /// Note that regular expressions for globs are intended to be matched on
 274 |     /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
 275 |     /// particular, globs are frequently used on file paths, where there is no
 276 |     /// general guarantee that file paths are themselves valid UTF-8. As a
 277 |     /// result, callers will need to ensure that they are using a regex API
 278 |     /// that can match on arbitrary bytes. For example, the
 279 |     /// [`regex`](https://crates.io/regex)
 280 |     /// crate's
 281 |     /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
 282 |     /// API is not suitable for this since it matches on `&str`, but its
 283 |     /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
 284 |     /// API is suitable for this.
 285 |     pub fn regex(&self) -> &str {
 286 |         &self.re
 287 |     }
 288 | 
 289 |     /// Returns the pattern as a literal if and only if the pattern must match
 290 |     /// an entire path exactly.
 291 |     ///
 292 |     /// The basic format of these patterns is `{literal}`.
 293 |     fn literal(&self) -> Option<String> {
 294 |         if self.opts.case_insensitive {
 295 |             return None;
 296 |         }
 297 |         let mut lit = String::new();
 298 |         for t in &*self.tokens {
 299 |             match *t {
 300 |                 Token::Literal(c) => lit.push(c),
 301 |                 _ => return None,
 302 |             }
 303 |         }
 304 |         if lit.is_empty() {
 305 |             None
 306 |         } else {
 307 |             Some(lit)
 308 |         }
 309 |     }
 310 | 
 311 |     /// Returns an extension if this pattern matches a file path if and only
 312 |     /// if the file path has the extension returned.
 313 |     ///
 314 |     /// Note that this extension returned differs from the extension that
 315 |     /// std::path::Path::extension returns. Namely, this extension includes
 316 |     /// the '.'. Also, paths like `.rs` are considered to have an extension
 317 |     /// of `.rs`.
 318 |     fn ext(&self) -> Option<String> {
 319 |         if self.opts.case_insensitive {
 320 |             return None;
 321 |         }
 322 |         let start = match self.tokens.get(0) {
 323 |             Some(&Token::RecursivePrefix) => 1,
 324 |             Some(_) => 0,
 325 |             _ => return None,
 326 |         };
 327 |         match self.tokens.get(start) {
 328 |             Some(&Token::ZeroOrMore) => {
 329 |                 // If there was no recursive prefix, then we only permit
 330 |                 // `*` if `*` can match a `/`. For example, if `*` can't
 331 |                 // match `/`, then `*.c` doesn't match `foo/bar.c`.
 332 |                 if start == 0 && self.opts.literal_separator {
 333 |                     return None;
 334 |                 }
 335 |             }
 336 |             _ => return None,
 337 |         }
 338 |         match self.tokens.get(start + 1) {
 339 |             Some(&Token::Literal('.')) => {}
 340 |             _ => return None,
 341 |         }
 342 |         let mut lit = ".".to_string();
 343 |         for t in self.tokens[start + 2..].iter() {
 344 |             match *t {
 345 |                 Token::Literal('.') | Token::Literal('/') => return None,
 346 |                 Token::Literal(c) => lit.push(c),
 347 |                 _ => return None,
 348 |             }
 349 |         }
 350 |         if lit.is_empty() {
 351 |             None
 352 |         } else {
 353 |             Some(lit)
 354 |         }
 355 |     }
 356 | 
 357 |     /// This is like `ext`, but returns an extension even if it isn't sufficent
 358 |     /// to imply a match. Namely, if an extension is returned, then it is
 359 |     /// necessary but not sufficient for a match.
 360 |     fn required_ext(&self) -> Option<String> {
 361 |         if self.opts.case_insensitive {
 362 |             return None;
 363 |         }
 364 |         // We don't care at all about the beginning of this pattern. All we
 365 |         // need to check for is if it ends with a literal of the form `.ext`.
 366 |         let mut ext: Vec<char> = vec![]; // built in reverse
 367 |         for t in self.tokens.iter().rev() {
 368 |             match *t {
 369 |                 Token::Literal('/') => return None,
 370 |                 Token::Literal(c) => {
 371 |                     ext.push(c);
 372 |                     if c == '.' {
 373 |                         break;
 374 |                     }
 375 |                 }
 376 |                 _ => return None,
 377 |             }
 378 |         }
 379 |         if ext.last() != Some(&'.') {
 380 |             None
 381 |         } else {
 382 |             ext.reverse();
 383 |             Some(ext.into_iter().collect())
 384 |         }
 385 |     }
 386 | 
 387 |     /// Returns a literal prefix of this pattern if the entire pattern matches
 388 |     /// if the literal prefix matches.
 389 |     fn prefix(&self) -> Option<String> {
 390 |         if self.opts.case_insensitive {
 391 |             return None;
 392 |         }
 393 |         let end = match self.tokens.last() {
 394 |             Some(&Token::ZeroOrMore) => {
 395 |                 if self.opts.literal_separator {
 396 |                     // If a trailing `*` can't match a `/`, then we can't
 397 |                     // assume a match of the prefix corresponds to a match
 398 |                     // of the overall pattern. e.g., `foo/*` with
 399 |                     // `literal_separator` enabled matches `foo/bar` but not
 400 |                     // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
 401 |                     // literal prefix.
 402 |                     return None;
 403 |                 }
 404 |                 self.tokens.len() - 1
 405 |             }
 406 |             _ => self.tokens.len(),
 407 |         };
 408 |         let mut lit = String::new();
 409 |         for t in &self.tokens[0..end] {
 410 |             match *t {
 411 |                 Token::Literal(c) => lit.push(c),
 412 |                 _ => return None,
 413 |             }
 414 |         }
 415 |         if lit.is_empty() {
 416 |             None
 417 |         } else {
 418 |             Some(lit)
 419 |         }
 420 |     }
 421 | 
 422 |     /// Returns a literal suffix of this pattern if the entire pattern matches
 423 |     /// if the literal suffix matches.
 424 |     ///
 425 |     /// If a literal suffix is returned and it must match either the entire
 426 |     /// file path or be preceded by a `/`, then also return true. This happens
 427 |     /// with a pattern like `**/foo/bar`. Namely, this pattern matches
 428 |     /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
 429 |     /// suffix returned is `/foo/bar` (but should match the entire path
 430 |     /// `foo/bar`).
 431 |     ///
 432 |     /// When this returns true, the suffix literal is guaranteed to start with
 433 |     /// a `/`.
 434 |     fn suffix(&self) -> Option<(String, bool)> {
 435 |         if self.opts.case_insensitive {
 436 |             return None;
 437 |         }
 438 |         let mut lit = String::new();
 439 |         let (start, entire) = match self.tokens.get(0) {
 440 |             Some(&Token::RecursivePrefix) => {
 441 |                 // We only care if this follows a path component if the next
 442 |                 // token is a literal.
 443 |                 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
 444 |                     lit.push('/');
 445 |                     (1, true)
 446 |                 } else {
 447 |                     (1, false)
 448 |                 }
 449 |             }
 450 |             _ => (0, false),
 451 |         };
 452 |         let start = match self.tokens.get(start) {
 453 |             Some(&Token::ZeroOrMore) => {
 454 |                 // If literal_separator is enabled, then a `*` can't
 455 |                 // necessarily match everything, so reporting a suffix match
 456 |                 // as a match of the pattern would be a false positive.
 457 |                 if self.opts.literal_separator {
 458 |                     return None;
 459 |                 }
 460 |                 start + 1
 461 |             }
 462 |             _ => start,
 463 |         };
 464 |         for t in &self.tokens[start..] {
 465 |             match *t {
 466 |                 Token::Literal(c) => lit.push(c),
 467 |                 _ => return None,
 468 |             }
 469 |         }
 470 |         if lit.is_empty() || lit == "/" {
 471 |             None
 472 |         } else {
 473 |             Some((lit, entire))
 474 |         }
 475 |     }
 476 | 
 477 |     /// If this pattern only needs to inspect the basename of a file path,
 478 |     /// then the tokens corresponding to only the basename match are returned.
 479 |     ///
 480 |     /// For example, given a pattern of `**/*.foo`, only the tokens
 481 |     /// corresponding to `*.foo` are returned.
 482 |     ///
 483 |     /// Note that this will return None if any match of the basename tokens
 484 |     /// doesn't correspond to a match of the entire pattern. For example, the
 485 |     /// glob `foo` only matches when a file path has a basename of `foo`, but
 486 |     /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
 487 |     /// `foo` doesn't match `abc/foo`.
 488 |     fn basename_tokens(&self) -> Option<&[Token]> {
 489 |         if self.opts.case_insensitive {
 490 |             return None;
 491 |         }
 492 |         let start = match self.tokens.get(0) {
 493 |             Some(&Token::RecursivePrefix) => 1,
 494 |             _ => {
 495 |                 // With nothing to gobble up the parent portion of a path,
 496 |                 // we can't assume that matching on only the basename is
 497 |                 // correct.
 498 |                 return None;
 499 |             }
 500 |         };
 501 |         if self.tokens[start..].is_empty() {
 502 |             return None;
 503 |         }
 504 |         for t in &self.tokens[start..] {
 505 |             match *t {
 506 |                 Token::Literal('/') => return None,
 507 |                 Token::Literal(_) => {} // OK
 508 |                 Token::Any | Token::ZeroOrMore => {
 509 |                     if !self.opts.literal_separator {
 510 |                         // In this case, `*` and `?` can match a path
 511 |                         // separator, which means this could reach outside
 512 |                         // the basename.
 513 |                         return None;
 514 |                     }
 515 |                 }
 516 |                 Token::RecursivePrefix
 517 |                 | Token::RecursiveSuffix
 518 |                 | Token::RecursiveZeroOrMore => {
 519 |                     return None;
 520 |                 }
 521 |                 Token::Class { .. } | Token::Alternates(..) => {
 522 |                     // We *could* be a little smarter here, but either one
 523 |                     // of these is going to prevent our literal optimizations
 524 |                     // anyway, so give up.
 525 |                     return None;
 526 |                 }
 527 |             }
 528 |         }
 529 |         Some(&self.tokens[start..])
 530 |     }
 531 | 
 532 |     /// Returns the pattern as a literal if and only if the pattern exclusively
 533 |     /// matches the basename of a file path *and* is a literal.
 534 |     ///
 535 |     /// The basic format of these patterns is `**/{literal}`, where `{literal}`
 536 |     /// does not contain a path separator.
 537 |     fn basename_literal(&self) -> Option<String> {
 538 |         let tokens = match self.basename_tokens() {
 539 |             None => return None,
 540 |             Some(tokens) => tokens,
 541 |         };
 542 |         let mut lit = String::new();
 543 |         for t in tokens {
 544 |             match *t {
 545 |                 Token::Literal(c) => lit.push(c),
 546 |                 _ => return None,
 547 |             }
 548 |         }
 549 |         Some(lit)
 550 |     }
 551 | }
 552 | 
 553 | impl<'a> GlobBuilder<'a> {
 554 |     /// Create a new builder for the pattern given.
 555 |     ///
 556 |     /// The pattern is not compiled until `build` is called.
 557 |     pub fn new(glob: &'a str) -> GlobBuilder<'a> {
 558 |         GlobBuilder { glob: glob, opts: GlobOptions::default() }
 559 |     }
 560 | 
 561 |     /// Parses and builds the pattern.
 562 |     pub fn build(&self) -> Result<Glob, Error> {
 563 |         let mut p = Parser {
 564 |             glob: &self.glob,
 565 |             stack: vec![Tokens::default()],
 566 |             chars: self.glob.chars().peekable(),
 567 |             prev: None,
 568 |             cur: None,
 569 |             opts: &self.opts,
 570 |         };
 571 |         p.parse()?;
 572 |         if p.stack.is_empty() {
 573 |             Err(Error {
 574 |                 glob: Some(self.glob.to_string()),
 575 |                 kind: ErrorKind::UnopenedAlternates,
 576 |             })
 577 |         } else if p.stack.len() > 1 {
 578 |             Err(Error {
 579 |                 glob: Some(self.glob.to_string()),
 580 |                 kind: ErrorKind::UnclosedAlternates,
 581 |             })
 582 |         } else {
 583 |             let tokens = p.stack.pop().unwrap();
 584 |             Ok(Glob {
 585 |                 glob: self.glob.to_string(),
 586 |                 re: tokens.to_regex_with(&self.opts),
 587 |                 opts: self.opts,
 588 |                 tokens: tokens,
 589 |             })
 590 |         }
 591 |     }
 592 | 
 593 |     /// Toggle whether the pattern matches case insensitively or not.
 594 |     ///
 595 |     /// This is disabled by default.
 596 |     pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 597 |         self.opts.case_insensitive = yes;
 598 |         self
 599 |     }
 600 | 
 601 |     /// Toggle whether a literal `/` is required to match a path separator.
 602 |     pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 603 |         self.opts.literal_separator = yes;
 604 |         self
 605 |     }
 606 | 
 607 |     /// When enabled, a back slash (`\`) may be used to escape
 608 |     /// special characters in a glob pattern. Additionally, this will
 609 |     /// prevent `\` from being interpreted as a path separator on all
 610 |     /// platforms.
 611 |     ///
 612 |     /// This is enabled by default on platforms where `\` is not a
 613 |     /// path separator and disabled by default on platforms where `\`
 614 |     /// is a path separator.
 615 |     pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 616 |         self.opts.backslash_escape = yes;
 617 |         self
 618 |     }
 619 | }
 620 | 
 621 | impl Tokens {
 622 |     /// Convert this pattern to a string that is guaranteed to be a valid
 623 |     /// regular expression and will represent the matching semantics of this
 624 |     /// glob pattern and the options given.
 625 |     fn to_regex_with(&self, options: &GlobOptions) -> String {
 626 |         let mut re = String::new();
 627 |         re.push_str("(?-u)");
 628 |         if options.case_insensitive {
 629 |             re.push_str("(?i)");
 630 |         }
 631 |         re.push('^');
 632 |         // Special case. If the entire glob is just `**`, then it should match
 633 |         // everything.
 634 |         if self.len() == 1 && self[0] == Token::RecursivePrefix {
 635 |             re.push_str(".*");
 636 |             re.push('$');
 637 |             return re;
 638 |         }
 639 |         self.tokens_to_regex(options, &self, &mut re);
 640 |         re.push('$');
 641 |         re
 642 |     }
 643 | 
 644 |     fn tokens_to_regex(
 645 |         &self,
 646 |         options: &GlobOptions,
 647 |         tokens: &[Token],
 648 |         re: &mut String,
 649 |     ) {
 650 |         for tok in tokens {
 651 |             match *tok {
 652 |                 Token::Literal(c) => {
 653 |                     re.push_str(&char_to_escaped_literal(c));
 654 |                 }
 655 |                 Token::Any => {
 656 |                     if options.literal_separator {
 657 |                         re.push_str("[^/]");
 658 |                     } else {
 659 |                         re.push_str(".");
 660 |                     }
 661 |                 }
 662 |                 Token::ZeroOrMore => {
 663 |                     if options.literal_separator {
 664 |                         re.push_str("[^/]*");
 665 |                     } else {
 666 |                         re.push_str(".*");
 667 |                     }
 668 |                 }
 669 |                 Token::RecursivePrefix => {
 670 |                     re.push_str("(?:/?|.*/)");
 671 |                 }
 672 |                 Token::RecursiveSuffix => {
 673 |                     re.push_str("(?:/?|/.*)");
 674 |                 }
 675 |                 Token::RecursiveZeroOrMore => {
 676 |                     re.push_str("(?:/|/.*/)");
 677 |                 }
 678 |                 Token::Class { negated, ref ranges } => {
 679 |                     re.push('[');
 680 |                     if negated {
 681 |                         re.push('^');
 682 |                     }
 683 |                     for r in ranges {
 684 |                         if r.0 == r.1 {
 685 |                             // Not strictly necessary, but nicer to look at.
 686 |                             re.push_str(&char_to_escaped_literal(r.0));
 687 |                         } else {
 688 |                             re.push_str(&char_to_escaped_literal(r.0));
 689 |                             re.push('-');
 690 |                             re.push_str(&char_to_escaped_literal(r.1));
 691 |                         }
 692 |                     }
 693 |                     re.push(']');
 694 |                 }
 695 |                 Token::Alternates(ref patterns) => {
 696 |                     let mut parts = vec![];
 697 |                     for pat in patterns {
 698 |                         let mut altre = String::new();
 699 |                         self.tokens_to_regex(options, &pat, &mut altre);
 700 |                         if !altre.is_empty() {
 701 |                             parts.push(altre);
 702 |                         }
 703 |                     }
 704 | 
 705 |                     // It is possible to have an empty set in which case the
 706 |                     // resulting alternation '()' would be an error.
 707 |                     if !parts.is_empty() {
 708 |                         re.push('(');
 709 |                         re.push_str(&parts.join("|"));
 710 |                         re.push(')');
 711 |                     }
 712 |                 }
 713 |             }
 714 |         }
 715 |     }
 716 | }
 717 | 
 718 | /// Convert a Unicode scalar value to an escaped string suitable for use as
 719 | /// a literal in a non-Unicode regex.
 720 | fn char_to_escaped_literal(c: char) -> String {
 721 |     bytes_to_escaped_literal(&c.to_string().into_bytes())
 722 | }
 723 | 
 724 | /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
 725 | /// code units are converted to their escaped form.
 726 | fn bytes_to_escaped_literal(bs: &[u8]) -> String {
 727 |     let mut s = String::with_capacity(bs.len());
 728 |     for &b in bs {
 729 |         if b <= 0x7F {
 730 |             s.push_str(&regex::escape(&(b as char).to_string()));
 731 |         } else {
 732 |             s.push_str(&format!("\\x{:02x}", b));
 733 |         }
 734 |     }
 735 |     s
 736 | }
 737 | 
 738 | struct Parser<'a> {
 739 |     glob: &'a str,
 740 |     stack: Vec<Tokens>,
 741 |     chars: iter::Peekable<str::Chars<'a>>,
 742 |     prev: Option<char>,
 743 |     cur: Option<char>,
 744 |     opts: &'a GlobOptions,
 745 | }
 746 | 
 747 | impl<'a> Parser<'a> {
 748 |     fn error(&self, kind: ErrorKind) -> Error {
 749 |         Error { glob: Some(self.glob.to_string()), kind: kind }
 750 |     }
 751 | 
 752 |     fn parse(&mut self) -> Result<(), Error> {
 753 |         while let Some(c) = self.bump() {
 754 |             match c {
 755 |                 '?' => self.push_token(Token::Any)?,
 756 |                 '*' => self.parse_star()?,
 757 |                 '[' => self.parse_class()?,
 758 |                 '{' => self.push_alternate()?,
 759 |                 '}' => self.pop_alternate()?,
 760 |                 ',' => self.parse_comma()?,
 761 |                 '\\' => self.parse_backslash()?,
 762 |                 c => self.push_token(Token::Literal(c))?,
 763 |             }
 764 |         }
 765 |         Ok(())
 766 |     }
 767 | 
 768 |     fn push_alternate(&mut self) -> Result<(), Error> {
 769 |         if self.stack.len() > 1 {
 770 |             return Err(self.error(ErrorKind::NestedAlternates));
 771 |         }
 772 |         Ok(self.stack.push(Tokens::default()))
 773 |     }
 774 | 
 775 |     fn pop_alternate(&mut self) -> Result<(), Error> {
 776 |         let mut alts = vec![];
 777 |         while self.stack.len() >= 2 {
 778 |             alts.push(self.stack.pop().unwrap());
 779 |         }
 780 |         self.push_token(Token::Alternates(alts))
 781 |     }
 782 | 
 783 |     fn push_token(&mut self, tok: Token) -> Result<(), Error> {
 784 |         if let Some(ref mut pat) = self.stack.last_mut() {
 785 |             return Ok(pat.push(tok));
 786 |         }
 787 |         Err(self.error(ErrorKind::UnopenedAlternates))
 788 |     }
 789 | 
 790 |     fn pop_token(&mut self) -> Result<Token, Error> {
 791 |         if let Some(ref mut pat) = self.stack.last_mut() {
 792 |             return Ok(pat.pop().unwrap());
 793 |         }
 794 |         Err(self.error(ErrorKind::UnopenedAlternates))
 795 |     }
 796 | 
 797 |     fn have_tokens(&self) -> Result<bool, Error> {
 798 |         match self.stack.last() {
 799 |             None => Err(self.error(ErrorKind::UnopenedAlternates)),
 800 |             Some(ref pat) => Ok(!pat.is_empty()),
 801 |         }
 802 |     }
 803 | 
 804 |     fn parse_comma(&mut self) -> Result<(), Error> {
 805 |         // If we aren't inside a group alternation, then don't
 806 |         // treat commas specially. Otherwise, we need to start
 807 |         // a new alternate.
 808 |         if self.stack.len() <= 1 {
 809 |             self.push_token(Token::Literal(','))
 810 |         } else {
 811 |             Ok(self.stack.push(Tokens::default()))
 812 |         }
 813 |     }
 814 | 
 815 |     fn parse_backslash(&mut self) -> Result<(), Error> {
 816 |         if self.opts.backslash_escape {
 817 |             match self.bump() {
 818 |                 None => Err(self.error(ErrorKind::DanglingEscape)),
 819 |                 Some(c) => self.push_token(Token::Literal(c)),
 820 |             }
 821 |         } else if is_separator('\\') {
 822 |             // Normalize all patterns to use / as a separator.
 823 |             self.push_token(Token::Literal('/'))
 824 |         } else {
 825 |             self.push_token(Token::Literal('\\'))
 826 |         }
 827 |     }
 828 | 
 829 |     fn parse_star(&mut self) -> Result<(), Error> {
 830 |         let prev = self.prev;
 831 |         if self.peek() != Some('*') {
 832 |             self.push_token(Token::ZeroOrMore)?;
 833 |             return Ok(());
 834 |         }
 835 |         assert!(self.bump() == Some('*'));
 836 |         if !self.have_tokens()? {
 837 |             if !self.peek().map_or(true, is_separator) {
 838 |                 self.push_token(Token::ZeroOrMore)?;
 839 |                 self.push_token(Token::ZeroOrMore)?;
 840 |             } else {
 841 |                 self.push_token(Token::RecursivePrefix)?;
 842 |                 assert!(self.bump().map_or(true, is_separator));
 843 |             }
 844 |             return Ok(());
 845 |         }
 846 | 
 847 |         if !prev.map(is_separator).unwrap_or(false) {
 848 |             if self.stack.len() <= 1
 849 |                 || (prev != Some(',') && prev != Some('{'))
 850 |             {
 851 |                 self.push_token(Token::ZeroOrMore)?;
 852 |                 self.push_token(Token::ZeroOrMore)?;
 853 |                 return Ok(());
 854 |             }
 855 |         }
 856 |         let is_suffix = match self.peek() {
 857 |             None => {
 858 |                 assert!(self.bump().is_none());
 859 |                 true
 860 |             }
 861 |             Some(',') | Some('}') if self.stack.len() >= 2 => true,
 862 |             Some(c) if is_separator(c) => {
 863 |                 assert!(self.bump().map(is_separator).unwrap_or(false));
 864 |                 false
 865 |             }
 866 |             _ => {
 867 |                 self.push_token(Token::ZeroOrMore)?;
 868 |                 self.push_token(Token::ZeroOrMore)?;
 869 |                 return Ok(());
 870 |             }
 871 |         };
 872 |         match self.pop_token()? {
 873 |             Token::RecursivePrefix => {
 874 |                 self.push_token(Token::RecursivePrefix)?;
 875 |             }
 876 |             Token::RecursiveSuffix => {
 877 |                 self.push_token(Token::RecursiveSuffix)?;
 878 |             }
 879 |             _ => {
 880 |                 if is_suffix {
 881 |                     self.push_token(Token::RecursiveSuffix)?;
 882 |                 } else {
 883 |                     self.push_token(Token::RecursiveZeroOrMore)?;
 884 |                 }
 885 |             }
 886 |         }
 887 |         Ok(())
 888 |     }
 889 | 
 890 |     fn parse_class(&mut self) -> Result<(), Error> {
 891 |         fn add_to_last_range(
 892 |             glob: &str,
 893 |             r: &mut (char, char),
 894 |             add: char,
 895 |         ) -> Result<(), Error> {
 896 |             r.1 = add;
 897 |             if r.1 < r.0 {
 898 |                 Err(Error {
 899 |                     glob: Some(glob.to_string()),
 900 |                     kind: ErrorKind::InvalidRange(r.0, r.1),
 901 |                 })
 902 |             } else {
 903 |                 Ok(())
 904 |             }
 905 |         }
 906 |         let mut ranges = vec![];
 907 |         let negated = match self.chars.peek() {
 908 |             Some(&'!') | Some(&'^') => {
 909 |                 let bump = self.bump();
 910 |                 assert!(bump == Some('!') || bump == Some('^'));
 911 |                 true
 912 |             }
 913 |             _ => false,
 914 |         };
 915 |         let mut first = true;
 916 |         let mut in_range = false;
 917 |         loop {
 918 |             let c = match self.bump() {
 919 |                 Some(c) => c,
 920 |                 // The only way to successfully break this loop is to observe
 921 |                 // a ']'.
 922 |                 None => return Err(self.error(ErrorKind::UnclosedClass)),
 923 |             };
 924 |             match c {
 925 |                 ']' => {
 926 |                     if first {
 927 |                         ranges.push((']', ']'));
 928 |                     } else {
 929 |                         break;
 930 |                     }
 931 |                 }
 932 |                 '-' => {
 933 |                     if first {
 934 |                         ranges.push(('-', '-'));
 935 |                     } else if in_range {
 936 |                         // invariant: in_range is only set when there is
 937 |                         // already at least one character seen.
 938 |                         let r = ranges.last_mut().unwrap();
 939 |                         add_to_last_range(&self.glob, r, '-')?;
 940 |                         in_range = false;
 941 |                     } else {
 942 |                         assert!(!ranges.is_empty());
 943 |                         in_range = true;
 944 |                     }
 945 |                 }
 946 |                 c => {
 947 |                     if in_range {
 948 |                         // invariant: in_range is only set when there is
 949 |                         // already at least one character seen.
 950 |                         add_to_last_range(
 951 |                             &self.glob,
 952 |                             ranges.last_mut().unwrap(),
 953 |                             c,
 954 |                         )?;
 955 |                     } else {
 956 |                         ranges.push((c, c));
 957 |                     }
 958 |                     in_range = false;
 959 |                 }
 960 |             }
 961 |             first = false;
 962 |         }
 963 |         if in_range {
 964 |             // Means that the last character in the class was a '-', so add
 965 |             // it as a literal.
 966 |             ranges.push(('-', '-'));
 967 |         }
 968 |         self.push_token(Token::Class { negated: negated, ranges: ranges })
 969 |     }
 970 | 
 971 |     fn bump(&mut self) -> Option<char> {
 972 |         self.prev = self.cur;
 973 |         self.cur = self.chars.next();
 974 |         self.cur
 975 |     }
 976 | 
 977 |     fn peek(&mut self) -> Option<char> {
 978 |         self.chars.peek().map(|&ch| ch)
 979 |     }
 980 | }
 981 | 
 982 | #[cfg(test)]
 983 | fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
 984 |     needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
 985 | }
 986 | 
 987 | #[cfg(test)]
 988 | fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
 989 |     if needle.len() > haystack.len() {
 990 |         return false;
 991 |     }
 992 |     needle == &haystack[haystack.len() - needle.len()..]
 993 | }
 994 | 
 995 | #[cfg(test)]
 996 | mod tests {
 997 |     use super::Token::*;
 998 |     use super::{Glob, GlobBuilder, Token};
 999 |     use {ErrorKind, GlobSetBuilder};
1000 | 
1001 |     #[derive(Clone, Copy, Debug, Default)]
1002 |     struct Options {
1003 |         casei: Option<bool>,
1004 |         litsep: Option<bool>,
1005 |         bsesc: Option<bool>,
1006 |     }
1007 | 
1008 |     macro_rules! syntax {
1009 |         ($name:ident, $pat:expr, $tokens:expr) => {
1010 |             #[test]
1011 |             fn $name() {
1012 |                 let pat = Glob::new($pat).unwrap();
1013 |                 assert_eq!($tokens, pat.tokens.0);
1014 |             }
1015 |         };
1016 |     }
1017 | 
1018 |     macro_rules! syntaxerr {
1019 |         ($name:ident, $pat:expr, $err:expr) => {
1020 |             #[test]
1021 |             fn $name() {
1022 |                 let err = Glob::new($pat).unwrap_err();
1023 |                 assert_eq!(&$err, err.kind());
1024 |             }
1025 |         };
1026 |     }
1027 | 
1028 |     macro_rules! toregex {
1029 |         ($name:ident, $pat:expr, $re:expr) => {
1030 |             toregex!($name, $pat, $re, Options::default());
1031 |         };
1032 |         ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1033 |             #[test]
1034 |             fn $name() {
1035 |                 let mut builder = GlobBuilder::new($pat);
1036 |                 if let Some(casei) = $options.casei {
1037 |                     builder.case_insensitive(casei);
1038 |                 }
1039 |                 if let Some(litsep) = $options.litsep {
1040 |                     builder.literal_separator(litsep);
1041 |                 }
1042 |                 if let Some(bsesc) = $options.bsesc {
1043 |                     builder.backslash_escape(bsesc);
1044 |                 }
1045 |                 let pat = builder.build().unwrap();
1046 |                 assert_eq!(format!("(?-u){}", $re), pat.regex());
1047 |             }
1048 |         };
1049 |     }
1050 | 
1051 |     macro_rules! matches {
1052 |         ($name:ident, $pat:expr, $path:expr) => {
1053 |             matches!($name, $pat, $path, Options::default());
1054 |         };
1055 |         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1056 |             #[test]
1057 |             fn $name() {
1058 |                 let mut builder = GlobBuilder::new($pat);
1059 |                 if let Some(casei) = $options.casei {
1060 |                     builder.case_insensitive(casei);
1061 |                 }
1062 |                 if let Some(litsep) = $options.litsep {
1063 |                     builder.literal_separator(litsep);
1064 |                 }
1065 |                 if let Some(bsesc) = $options.bsesc {
1066 |                     builder.backslash_escape(bsesc);
1067 |                 }
1068 |                 let pat = builder.build().unwrap();
1069 |                 let matcher = pat.compile_matcher();
1070 |                 let strategic = pat.compile_strategic_matcher();
1071 |                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1072 |                 assert!(matcher.is_match($path));
1073 |                 assert!(strategic.is_match($path));
1074 |                 assert!(set.is_match($path));
1075 |             }
1076 |         };
1077 |     }
1078 | 
1079 |     macro_rules! nmatches {
1080 |         ($name:ident, $pat:expr, $path:expr) => {
1081 |             nmatches!($name, $pat, $path, Options::default());
1082 |         };
1083 |         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1084 |             #[test]
1085 |             fn $name() {
1086 |                 let mut builder = GlobBuilder::new($pat);
1087 |                 if let Some(casei) = $options.casei {
1088 |                     builder.case_insensitive(casei);
1089 |                 }
1090 |                 if let Some(litsep) = $options.litsep {
1091 |                     builder.literal_separator(litsep);
1092 |                 }
1093 |                 if let Some(bsesc) = $options.bsesc {
1094 |                     builder.backslash_escape(bsesc);
1095 |                 }
1096 |                 let pat = builder.build().unwrap();
1097 |                 let matcher = pat.compile_matcher();
1098 |                 let strategic = pat.compile_strategic_matcher();
1099 |                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1100 |                 assert!(!matcher.is_match($path));
1101 |                 assert!(!strategic.is_match($path));
1102 |                 assert!(!set.is_match($path));
1103 |             }
1104 |         };
1105 |     }
1106 | 
1107 |     fn s(string: &str) -> String {
1108 |         string.to_string()
1109 |     }
1110 | 
1111 |     fn class(s: char, e: char) -> Token {
1112 |         Class { negated: false, ranges: vec![(s, e)] }
1113 |     }
1114 | 
1115 |     fn classn(s: char, e: char) -> Token {
1116 |         Class { negated: true, ranges: vec![(s, e)] }
1117 |     }
1118 | 
1119 |     fn rclass(ranges: &[(char, char)]) -> Token {
1120 |         Class { negated: false, ranges: ranges.to_vec() }
1121 |     }
1122 | 
1123 |     fn rclassn(ranges: &[(char, char)]) -> Token {
1124 |         Class { negated: true, ranges: ranges.to_vec() }
1125 |     }
1126 | 
1127 |     syntax!(literal1, "a", vec![Literal('a')]);
1128 |     syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1129 |     syntax!(any1, "?", vec![Any]);
1130 |     syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1131 |     syntax!(seq1, "*", vec![ZeroOrMore]);
1132 |     syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1133 |     syntax!(
1134 |         seq3,
1135 |         "*a*b*",
1136 |         vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1137 |     );
1138 |     syntax!(rseq1, "**", vec![RecursivePrefix]);
1139 |     syntax!(rseq2, "**/", vec![RecursivePrefix]);
1140 |     syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1141 |     syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1142 |     syntax!(
1143 |         rseq5,
1144 |         "a/**/b",
1145 |         vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1146 |     );
1147 |     syntax!(cls1, "[a]", vec![class('a', 'a')]);
1148 |     syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1149 |     syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1150 |     syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1151 |     syntax!(cls5, "[-]", vec![class('-', '-')]);
1152 |     syntax!(cls6, "[]]", vec![class(']', ']')]);
1153 |     syntax!(cls7, "[*]", vec![class('*', '*')]);
1154 |     syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1155 |     syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1156 |     syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1157 |     syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1158 |     syntax!(
1159 |         cls12,
1160 |         "[-a-z-]",
1161 |         vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1162 |     );
1163 |     syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1164 |     syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1165 |     syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1166 |     syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1167 |     syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1168 |     syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1169 |     syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1170 |     syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1171 |     syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1172 | 
1173 |     syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1174 |     syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1175 |     syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1176 |     syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1177 |     syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1178 |     syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1179 | 
1180 |     const CASEI: Options =
1181 |         Options { casei: Some(true), litsep: None, bsesc: None };
1182 |     const SLASHLIT: Options =
1183 |         Options { casei: None, litsep: Some(true), bsesc: None };
1184 |     const NOBSESC: Options =
1185 |         Options { casei: None, litsep: None, bsesc: Some(false) };
1186 |     const BSESC: Options =
1187 |         Options { casei: None, litsep: None, bsesc: Some(true) };
1188 | 
1189 |     toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1190 | 
1191 |     toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1192 |     toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1193 | 
1194 |     toregex!(re1, "a", "^a$");
1195 |     toregex!(re2, "?", "^.$");
1196 |     toregex!(re3, "*", "^.*$");
1197 |     toregex!(re4, "a?", "^a.$");
1198 |     toregex!(re5, "?a", "^.a$");
1199 |     toregex!(re6, "a*", "^a.*$");
1200 |     toregex!(re7, "*a", "^.*a$");
1201 |     toregex!(re8, "[*]", r"^[\*]$");
1202 |     toregex!(re9, "[+]", r"^[\+]$");
1203 |     toregex!(re10, "+", r"^\+$");
1204 |     toregex!(re11, "☃", r"^\xe2\x98\x83$");
1205 |     toregex!(re12, "**", r"^.*$");
1206 |     toregex!(re13, "**/", r"^.*$");
1207 |     toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1208 |     toregex!(re15, "**/**", r"^.*$");
1209 |     toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1210 |     toregex!(re17, "**/**/**", r"^.*$");
1211 |     toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1212 |     toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
1213 |     toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
1214 |     toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
1215 |     toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1216 |     toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1217 |     toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1218 |     toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1219 |     toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1220 |     toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1221 |     toregex!(re28, "a**", r"^a.*.*$");
1222 |     toregex!(re29, "**a", r"^.*.*a$");
1223 |     toregex!(re30, "a**b", r"^a.*.*b$");
1224 |     toregex!(re31, "***", r"^.*.*.*$");
1225 |     toregex!(re32, "/a**", r"^/a.*.*$");
1226 |     toregex!(re33, "/**a", r"^/.*.*a$");
1227 |     toregex!(re34, "/a**b", r"^/a.*.*b$");
1228 | 
1229 |     matches!(match1, "a", "a");
1230 |     matches!(match2, "a*b", "a_b");
1231 |     matches!(match3, "a*b*c", "abc");
1232 |     matches!(match4, "a*b*c", "a_b_c");
1233 |     matches!(match5, "a*b*c", "a___b___c");
1234 |     matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1235 |     matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1236 |     matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1237 |     matches!(match9, "*.rs", ".rs");
1238 |     matches!(match10, "☃", "☃");
1239 | 
1240 |     matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1241 |     matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1242 |     matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1243 |     matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1244 |     matches!(matchrec5, "**", "abcde");
1245 |     matches!(matchrec6, "**", "");
1246 |     matches!(matchrec7, "**", ".asdf");
1247 |     matches!(matchrec8, "**", "/x/.asdf");
1248 |     matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1249 |     matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1250 |     matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1251 |     matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1252 |     matches!(matchrec13, "**/test", "one/two/test");
1253 |     matches!(matchrec14, "**/test", "one/test");
1254 |     matches!(matchrec15, "**/test", "test");
1255 |     matches!(matchrec16, "/**/test", "/one/two/test");
1256 |     matches!(matchrec17, "/**/test", "/one/test");
1257 |     matches!(matchrec18, "/**/test", "/test");
1258 |     matches!(matchrec19, "**/.*", ".abc");
1259 |     matches!(matchrec20, "**/.*", "abc/.abc");
1260 |     matches!(matchrec21, ".*/**", ".abc");
1261 |     matches!(matchrec22, ".*/**", ".abc/abc");
1262 |     matches!(matchrec23, "foo/**", "foo");
1263 |     matches!(matchrec24, "**/foo/bar", "foo/bar");
1264 |     matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
1265 | 
1266 |     matches!(matchrange1, "a[0-9]b", "a0b");
1267 |     matches!(matchrange2, "a[0-9]b", "a9b");
1268 |     matches!(matchrange3, "a[!0-9]b", "a_b");
1269 |     matches!(matchrange4, "[a-z123]", "1");
1270 |     matches!(matchrange5, "[1a-z23]", "1");
1271 |     matches!(matchrange6, "[123a-z]", "1");
1272 |     matches!(matchrange7, "[abc-]", "-");
1273 |     matches!(matchrange8, "[-abc]", "-");
1274 |     matches!(matchrange9, "[-a-c]", "b");
1275 |     matches!(matchrange10, "[a-c-]", "b");
1276 |     matches!(matchrange11, "[-]", "-");
1277 |     matches!(matchrange12, "a[^0-9]b", "a_b");
1278 | 
1279 |     matches!(matchpat1, "*hello.txt", "hello.txt");
1280 |     matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1281 |     matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1282 |     matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1283 |     matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1284 |     matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1285 |     matches!(
1286 |         matchpat7,
1287 |         "*some/path/to/hello.txt",
1288 |         "a/bigger/some/path/to/hello.txt"
1289 |     );
1290 | 
1291 |     matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1292 | 
1293 |     matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1294 |     matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1295 |     matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1296 |     matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1297 | 
1298 |     matches!(matchalt1, "a,b", "a,b");
1299 |     matches!(matchalt2, ",", ",");
1300 |     matches!(matchalt3, "{a,b}", "a");
1301 |     matches!(matchalt4, "{a,b}", "b");
1302 |     matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1303 |     matches!(matchalt6, "{**/src/**,foo}", "foo");
1304 |     matches!(matchalt7, "{[}],foo}", "}");
1305 |     matches!(matchalt8, "{foo}", "foo");
1306 |     matches!(matchalt9, "{}", "");
1307 |     matches!(matchalt10, "{,}", "");
1308 |     matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1309 |     matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1310 |     matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1311 | 
1312 |     matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1313 |     #[cfg(unix)]
1314 |     nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1315 |     #[cfg(not(unix))]
1316 |     nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1317 |     nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1318 |     matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1319 |     #[cfg(unix)]
1320 |     nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1321 |     #[cfg(not(unix))]
1322 |     matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1323 | 
1324 |     matches!(matchbackslash1, "\\[", "[", BSESC);
1325 |     matches!(matchbackslash2, "\\?", "?", BSESC);
1326 |     matches!(matchbackslash3, "\\*", "*", BSESC);
1327 |     matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1328 |     matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1329 |     matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1330 |     #[cfg(unix)]
1331 |     matches!(matchbackslash7, "\\a", "a");
1332 |     #[cfg(not(unix))]
1333 |     matches!(matchbackslash8, "\\a", "/a");
1334 | 
1335 |     nmatches!(matchnot1, "a*b*c", "abcd");
1336 |     nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1337 |     nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1338 |     nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1339 |     nmatches!(matchnot5, "/**/test", "test");
1340 |     nmatches!(matchnot6, "/**/test", "/one/notthis");
1341 |     nmatches!(matchnot7, "/**/test", "/notthis");
1342 |     nmatches!(matchnot8, "**/.*", "ab.c");
1343 |     nmatches!(matchnot9, "**/.*", "abc/ab.c");
1344 |     nmatches!(matchnot10, ".*/**", "a.bc");
1345 |     nmatches!(matchnot11, ".*/**", "abc/a.bc");
1346 |     nmatches!(matchnot12, "a[0-9]b", "a_b");
1347 |     nmatches!(matchnot13, "a[!0-9]b", "a0b");
1348 |     nmatches!(matchnot14, "a[!0-9]b", "a9b");
1349 |     nmatches!(matchnot15, "[!-]", "-");
1350 |     nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1351 |     nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1352 |     nmatches!(
1353 |         matchnot18,
1354 |         "*some/path/to/hello.txt",
1355 |         "some/path/to/hello.txt-and-then-some"
1356 |     );
1357 |     nmatches!(
1358 |         matchnot19,
1359 |         "*some/path/to/hello.txt",
1360 |         "some/other/path/to/hello.txt"
1361 |     );
1362 |     nmatches!(matchnot20, "a", "foo/a");
1363 |     nmatches!(matchnot21, "./foo", "foo");
1364 |     nmatches!(matchnot22, "**/foo", "foofoo");
1365 |     nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1366 |     nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1367 |     nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1368 |     nmatches!(
1369 |         matchnot26,
1370 |         "**/m4/ltoptions.m4",
1371 |         "csharp/src/packages/repositories.config",
1372 |         SLASHLIT
1373 |     );
1374 |     nmatches!(matchnot27, "a[^0-9]b", "a0b");
1375 |     nmatches!(matchnot28, "a[^0-9]b", "a9b");
1376 |     nmatches!(matchnot29, "[^-]", "-");
1377 |     nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1378 |     nmatches!(
1379 |         matchrec31,
1380 |         "some/*/needle.txt",
1381 |         "some/one/two/needle.txt",
1382 |         SLASHLIT
1383 |     );
1384 |     nmatches!(
1385 |         matchrec32,
1386 |         "some/*/needle.txt",
1387 |         "some/one/two/three/needle.txt",
1388 |         SLASHLIT
1389 |     );
1390 | 
1391 |     macro_rules! extract {
1392 |         ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1393 |             extract!($which, $name, $pat, $expect, Options::default());
1394 |         };
1395 |         ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1396 |             #[test]
1397 |             fn $name() {
1398 |                 let mut builder = GlobBuilder::new($pat);
1399 |                 if let Some(casei) = $options.casei {
1400 |                     builder.case_insensitive(casei);
1401 |                 }
1402 |                 if let Some(litsep) = $options.litsep {
1403 |                     builder.literal_separator(litsep);
1404 |                 }
1405 |                 if let Some(bsesc) = $options.bsesc {
1406 |                     builder.backslash_escape(bsesc);
1407 |                 }
1408 |                 let pat = builder.build().unwrap();
1409 |                 assert_eq!($expect, pat.$which());
1410 |             }
1411 |         };
1412 |     }
1413 | 
1414 |     macro_rules! literal {
1415 |         ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1416 |     }
1417 | 
1418 |     macro_rules! basetokens {
1419 |         ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1420 |     }
1421 | 
1422 |     macro_rules! ext {
1423 |         ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1424 |     }
1425 | 
1426 |     macro_rules! required_ext {
1427 |         ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1428 |     }
1429 | 
1430 |     macro_rules! prefix {
1431 |         ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1432 |     }
1433 | 
1434 |     macro_rules! suffix {
1435 |         ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1436 |     }
1437 | 
1438 |     macro_rules! baseliteral {
1439 |         ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1440 |     }
1441 | 
1442 |     literal!(extract_lit1, "foo", Some(s("foo")));
1443 |     literal!(extract_lit2, "foo", None, CASEI);
1444 |     literal!(extract_lit3, "/foo", Some(s("/foo")));
1445 |     literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1446 |     literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1447 |     literal!(extract_lit6, "*.foo", None);
1448 |     literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1449 |     literal!(extract_lit8, "**/foo/bar", None);
1450 | 
1451 |     basetokens!(
1452 |         extract_basetoks1,
1453 |         "**/foo",
1454 |         Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1455 |     );
1456 |     basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1457 |     basetokens!(
1458 |         extract_basetoks3,
1459 |         "**/foo",
1460 |         Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1461 |         SLASHLIT
1462 |     );
1463 |     basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1464 |     basetokens!(extract_basetoks5, "*foo", None);
1465 |     basetokens!(extract_basetoks6, "**/fo*o", None);
1466 |     basetokens!(
1467 |         extract_basetoks7,
1468 |         "**/fo*o",
1469 |         Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1470 |         SLASHLIT
1471 |     );
1472 | 
1473 |     ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1474 |     ext!(extract_ext2, "**/*.rs.bak", None);
1475 |     ext!(extract_ext3, "*.rs", Some(s(".rs")));
1476 |     ext!(extract_ext4, "a*.rs", None);
1477 |     ext!(extract_ext5, "/*.c", None);
1478 |     ext!(extract_ext6, "*.c", None, SLASHLIT);
1479 |     ext!(extract_ext7, "*.c", Some(s(".c")));
1480 | 
1481 |     required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1482 |     required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1483 |     required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1484 |     required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1485 |     required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1486 |     required_ext!(extract_req_ext6, "./rs", None);
1487 |     required_ext!(extract_req_ext7, "foo", None);
1488 |     required_ext!(extract_req_ext8, ".foo/", None);
1489 |     required_ext!(extract_req_ext9, "foo/", None);
1490 | 
1491 |     prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1492 |     prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1493 |     prefix!(extract_prefix3, "**/foo", None);
1494 |     prefix!(extract_prefix4, "foo/**", None);
1495 | 
1496 |     suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1497 |     suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1498 |     suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1499 |     suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1500 |     suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1501 |     suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1502 |     suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1503 | 
1504 |     baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1505 |     baseliteral!(extract_baselit2, "foo", None);
1506 |     baseliteral!(extract_baselit3, "*foo", None);
1507 |     baseliteral!(extract_baselit4, "*/foo", None);
1508 | }
1509 | 


--------------------------------------------------------------------------------