├── .gitignore ├── rustfmt.toml ├── COPYING ├── .github └── workflows │ └── ci.yml ├── README.md ├── Cargo.toml ├── LICENSE-MIT ├── UNLICENSE ├── benches └── bench.rs └── src ├── pathutil.rs ├── lib.rs └── glob.rs /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | tags 3 | target 4 | /Cargo.lock 5 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 79 2 | use_small_heuristics = "max" 3 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | This project is dual-licensed under the Unlicense and MIT licenses. 2 | 3 | You may use this code under the terms of either license. 4 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | jobs: 8 | test: 9 | name: test 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | build: [pinned] 14 | include: 15 | - build: pinned 16 | os: ubuntu-18.04 17 | rust: 1.28.0 18 | steps: 19 | - name: noop 20 | run: echo noop 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **This repository is not maintained. [globset is maintained in ripgrep](https://github.com/BurntSushi/ripgrep/tree/master/crates/globset).** 2 | 3 | globset 4 | ======= 5 | Cross platform single glob and glob set matching. Glob set matching is the 6 | process of matching one or more glob patterns against a single candidate path 7 | simultaneously, and returning all of the globs that matched. 8 | 9 | [![Build status](https://github.com/BurntSushi/globset/workflows/ci/badge.svg)](https://github.com/BurntSushi/globset/actions) 10 | [![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset) 11 | 12 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). 13 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "globset" 3 | version = "0.4.4" #:version 4 | authors = ["Andrew Gallant "] 5 | description = """ 6 | Cross platform single glob and glob set matching. Glob set matching is the 7 | process of matching one or more glob patterns against a single candidate path 8 | simultaneously, and returning all of the globs that matched. 9 | """ 10 | documentation = "https://docs.rs/globset" 11 | homepage = "https://github.com/BurntSushi/ripgrep/tree/master/globset" 12 | repository = "https://github.com/BurntSushi/ripgrep/tree/master/globset" 13 | readme = "README.md" 14 | keywords = ["regex", "glob", "multiple", "set", "pattern"] 15 | license = "Unlicense/MIT" 16 | 17 | [lib] 18 | name = "globset" 19 | bench = false 20 | 21 | [dependencies] 22 | aho-corasick = "0.7.3" 23 | bstr = { version = "0.2.0", default-features = false, features = ["std"] } 24 | fnv = "1.0.6" 25 | log = "0.4.5" 26 | regex = "1.1.5" 27 | 28 | [dev-dependencies] 29 | glob = "0.3.0" 30 | 31 | [features] 32 | simd-accel = [] 33 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Andrew Gallant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | This module benchmarks the glob implementation. For benchmarks on the ripgrep 3 | tool itself, see the benchsuite directory. 4 | */ 5 | #![feature(test)] 6 | 7 | extern crate glob; 8 | extern crate globset; 9 | #[macro_use] 10 | extern crate lazy_static; 11 | extern crate regex; 12 | extern crate test; 13 | 14 | use std::ffi::OsStr; 15 | use std::path::Path; 16 | 17 | use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder}; 18 | 19 | const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt"; 20 | const EXT_PAT: &'static str = "*.txt"; 21 | 22 | const SHORT: &'static str = "some/needle.txt"; 23 | const SHORT_PAT: &'static str = "some/**/needle.txt"; 24 | 25 | const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt"; 26 | const LONG_PAT: &'static str = "some/**/needle.txt"; 27 | 28 | fn new_glob(pat: &str) -> glob::Pattern { 29 | glob::Pattern::new(pat).unwrap() 30 | } 31 | 32 | fn new_reglob(pat: &str) -> GlobMatcher { 33 | Glob::new(pat).unwrap().compile_matcher() 34 | } 35 | 36 | fn new_reglob_many(pats: &[&str]) -> GlobSet { 37 | let mut builder = GlobSetBuilder::new(); 38 | for pat in pats { 39 | builder.add(Glob::new(pat).unwrap()); 40 | } 41 | builder.build().unwrap() 42 | } 43 | 44 | #[bench] 45 | fn ext_glob(b: &mut test::Bencher) { 46 | let pat = new_glob(EXT_PAT); 47 | b.iter(|| assert!(pat.matches(EXT))); 48 | } 49 | 50 | #[bench] 51 | fn ext_regex(b: &mut test::Bencher) { 52 | let set = new_reglob(EXT_PAT); 53 | let cand = Candidate::new(EXT); 54 | b.iter(|| assert!(set.is_match_candidate(&cand))); 55 | } 56 | 57 | #[bench] 58 | fn short_glob(b: &mut test::Bencher) { 59 | let pat = new_glob(SHORT_PAT); 60 | b.iter(|| assert!(pat.matches(SHORT))); 61 | } 62 | 63 | #[bench] 64 | fn short_regex(b: &mut test::Bencher) { 65 | let set = new_reglob(SHORT_PAT); 66 | let cand = Candidate::new(SHORT); 67 | b.iter(|| assert!(set.is_match_candidate(&cand))); 68 | } 69 | 70 | #[bench] 71 | fn long_glob(b: &mut test::Bencher) { 72 | let pat = new_glob(LONG_PAT); 73 | b.iter(|| assert!(pat.matches(LONG))); 74 | } 75 | 76 | #[bench] 77 | fn long_regex(b: &mut test::Bencher) { 78 | let set = new_reglob(LONG_PAT); 79 | let cand = Candidate::new(LONG); 80 | b.iter(|| assert!(set.is_match_candidate(&cand))); 81 | } 82 | 83 | const MANY_SHORT_GLOBS: &'static [&'static str] = &[ 84 | // Taken from a random .gitignore on my system. 85 | ".*.swp", 86 | "tags", 87 | "target", 88 | "*.lock", 89 | "tmp", 90 | "*.csv", 91 | "*.fst", 92 | "*-got", 93 | "*.csv.idx", 94 | "words", 95 | "98m*", 96 | "dict", 97 | "test", 98 | "months", 99 | ]; 100 | 101 | const MANY_SHORT_SEARCH: &'static str = "98m-blah.csv.idx"; 102 | 103 | #[bench] 104 | fn many_short_glob(b: &mut test::Bencher) { 105 | let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| new_glob(s)).collect(); 106 | b.iter(|| { 107 | let mut count = 0; 108 | for pat in &pats { 109 | if pat.matches(MANY_SHORT_SEARCH) { 110 | count += 1; 111 | } 112 | } 113 | assert_eq!(2, count); 114 | }) 115 | } 116 | 117 | #[bench] 118 | fn many_short_regex_set(b: &mut test::Bencher) { 119 | let set = new_reglob_many(MANY_SHORT_GLOBS); 120 | b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count())); 121 | } 122 | -------------------------------------------------------------------------------- /src/pathutil.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | use bstr::{ByteSlice, ByteVec}; 4 | 5 | /// The final component of the path, if it is a normal file. 6 | /// 7 | /// If the path terminates in ., .., or consists solely of a root of prefix, 8 | /// file_name will return None. 9 | pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option> { 10 | if path.is_empty() { 11 | return None; 12 | } else if path.last_byte() == Some(b'.') { 13 | return None; 14 | } 15 | let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0); 16 | Some(match *path { 17 | Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]), 18 | Cow::Owned(ref path) => { 19 | let mut path = path.clone(); 20 | path.drain_bytes(..last_slash); 21 | Cow::Owned(path) 22 | } 23 | }) 24 | } 25 | 26 | /// Return a file extension given a path's file name. 27 | /// 28 | /// Note that this does NOT match the semantics of std::path::Path::extension. 29 | /// Namely, the extension includes the `.` and matching is otherwise more 30 | /// liberal. Specifically, the extenion is: 31 | /// 32 | /// * None, if the file name given is empty; 33 | /// * None, if there is no embedded `.`; 34 | /// * Otherwise, the portion of the file name starting with the final `.`. 35 | /// 36 | /// e.g., A file name of `.rs` has an extension `.rs`. 37 | /// 38 | /// N.B. This is done to make certain glob match optimizations easier. Namely, 39 | /// a pattern like `*.rs` is obviously trying to match files with a `rs` 40 | /// extension, but it also matches files like `.rs`, which doesn't have an 41 | /// extension according to std::path::Path::extension. 42 | pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option> { 43 | if name.is_empty() { 44 | return None; 45 | } 46 | let last_dot_at = match name.rfind_byte(b'.') { 47 | None => return None, 48 | Some(i) => i, 49 | }; 50 | Some(match *name { 51 | Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]), 52 | Cow::Owned(ref name) => { 53 | let mut name = name.clone(); 54 | name.drain_bytes(..last_dot_at); 55 | Cow::Owned(name) 56 | } 57 | }) 58 | } 59 | 60 | /// Normalizes a path to use `/` as a separator everywhere, even on platforms 61 | /// that recognize other characters as separators. 62 | #[cfg(unix)] 63 | pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> { 64 | // UNIX only uses /, so we're good. 65 | path 66 | } 67 | 68 | /// Normalizes a path to use `/` as a separator everywhere, even on platforms 69 | /// that recognize other characters as separators. 70 | #[cfg(not(unix))] 71 | pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> { 72 | use std::path::is_separator; 73 | 74 | for i in 0..path.len() { 75 | if path[i] == b'/' || !is_separator(path[i] as char) { 76 | continue; 77 | } 78 | path.to_mut()[i] = b'/'; 79 | } 80 | path 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use std::borrow::Cow; 86 | 87 | use bstr::{ByteVec, B}; 88 | 89 | use super::{file_name_ext, normalize_path}; 90 | 91 | macro_rules! ext { 92 | ($name:ident, $file_name:expr, $ext:expr) => { 93 | #[test] 94 | fn $name() { 95 | let bs = Vec::from($file_name); 96 | let got = file_name_ext(&Cow::Owned(bs)); 97 | assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got); 98 | } 99 | }; 100 | } 101 | 102 | ext!(ext1, "foo.rs", Some(".rs")); 103 | ext!(ext2, ".rs", Some(".rs")); 104 | ext!(ext3, "..rs", Some(".rs")); 105 | ext!(ext4, "", None::<&str>); 106 | ext!(ext5, "foo", None::<&str>); 107 | 108 | macro_rules! normalize { 109 | ($name:ident, $path:expr, $expected:expr) => { 110 | #[test] 111 | fn $name() { 112 | let bs = Vec::from_slice($path); 113 | let got = normalize_path(Cow::Owned(bs)); 114 | assert_eq!($expected.to_vec(), got.into_owned()); 115 | } 116 | }; 117 | } 118 | 119 | normalize!(normal1, b"foo", b"foo"); 120 | normalize!(normal2, b"foo/bar", b"foo/bar"); 121 | #[cfg(unix)] 122 | normalize!(normal3, b"foo\\bar", b"foo\\bar"); 123 | #[cfg(not(unix))] 124 | normalize!(normal3, b"foo\\bar", b"foo/bar"); 125 | #[cfg(unix)] 126 | normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz"); 127 | #[cfg(not(unix))] 128 | normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz"); 129 | } 130 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | The globset crate provides cross platform single glob and glob set matching. 3 | 4 | Glob set matching is the process of matching one or more glob patterns against 5 | a single candidate path simultaneously, and returning all of the globs that 6 | matched. For example, given this set of globs: 7 | 8 | ```ignore 9 | *.rs 10 | src/lib.rs 11 | src/**/foo.rs 12 | ``` 13 | 14 | and a path `src/bar/baz/foo.rs`, then the set would report the first and third 15 | globs as matching. 16 | 17 | # Example: one glob 18 | 19 | This example shows how to match a single glob against a single file path. 20 | 21 | ``` 22 | # fn example() -> Result<(), globset::Error> { 23 | use globset::Glob; 24 | 25 | let glob = Glob::new("*.rs")?.compile_matcher(); 26 | 27 | assert!(glob.is_match("foo.rs")); 28 | assert!(glob.is_match("foo/bar.rs")); 29 | assert!(!glob.is_match("Cargo.toml")); 30 | # Ok(()) } example().unwrap(); 31 | ``` 32 | 33 | # Example: configuring a glob matcher 34 | 35 | This example shows how to use a `GlobBuilder` to configure aspects of match 36 | semantics. In this example, we prevent wildcards from matching path separators. 37 | 38 | ``` 39 | # fn example() -> Result<(), globset::Error> { 40 | use globset::GlobBuilder; 41 | 42 | let glob = GlobBuilder::new("*.rs") 43 | .literal_separator(true).build()?.compile_matcher(); 44 | 45 | assert!(glob.is_match("foo.rs")); 46 | assert!(!glob.is_match("foo/bar.rs")); // no longer matches 47 | assert!(!glob.is_match("Cargo.toml")); 48 | # Ok(()) } example().unwrap(); 49 | ``` 50 | 51 | # Example: match multiple globs at once 52 | 53 | This example shows how to match multiple glob patterns at once. 54 | 55 | ``` 56 | # fn example() -> Result<(), globset::Error> { 57 | use globset::{Glob, GlobSetBuilder}; 58 | 59 | let mut builder = GlobSetBuilder::new(); 60 | // A GlobBuilder can be used to configure each glob's match semantics 61 | // independently. 62 | builder.add(Glob::new("*.rs")?); 63 | builder.add(Glob::new("src/lib.rs")?); 64 | builder.add(Glob::new("src/**/foo.rs")?); 65 | let set = builder.build()?; 66 | 67 | assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); 68 | # Ok(()) } example().unwrap(); 69 | ``` 70 | 71 | # Syntax 72 | 73 | Standard Unix-style glob syntax is supported: 74 | 75 | * `?` matches any single character. (If the `literal_separator` option is 76 | enabled, then `?` can never match a path separator.) 77 | * `*` matches zero or more characters. (If the `literal_separator` option is 78 | enabled, then `*` can never match a path separator.) 79 | * `**` recursively matches directories but are only legal in three situations. 80 | First, if the glob starts with \*\*/, then it matches 81 | all directories. For example, \*\*/foo matches `foo` 82 | and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with 83 | /\*\*, then it matches all sub-entries. For example, 84 | foo/\*\* matches `foo/a` and `foo/a/b`, but not `foo`. 85 | Thirdly, if the glob contains /\*\*/ anywhere within 86 | the pattern, then it matches zero or more directories. Using `**` anywhere 87 | else is illegal (N.B. the glob `**` is allowed and means "match everything"). 88 | * `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns. 89 | (N.B. Nesting `{...}` is not currently allowed.) 90 | * `[ab]` matches `a` or `b` where `a` and `b` are characters. Use 91 | `[!ab]` to match any character except for `a` and `b`. 92 | * Metacharacters such as `*` and `?` can be escaped with character class 93 | notation. e.g., `[*]` matches `*`. 94 | * When backslash escapes are enabled, a backslash (`\`) will escape all meta 95 | characters in a glob. If it precedes a non-meta character, then the slash is 96 | ignored. A `\\` will match a literal `\\`. Note that this mode is only 97 | enabled on Unix platforms by default, but can be enabled on any platform 98 | via the `backslash_escape` setting on `Glob`. 99 | 100 | A `GlobBuilder` can be used to prevent wildcards from matching path separators, 101 | or to enable case insensitive matching. 102 | */ 103 | 104 | #![deny(missing_docs)] 105 | 106 | extern crate aho_corasick; 107 | extern crate bstr; 108 | extern crate fnv; 109 | #[macro_use] 110 | extern crate log; 111 | extern crate regex; 112 | 113 | use std::borrow::Cow; 114 | use std::collections::{BTreeMap, HashMap}; 115 | use std::error::Error as StdError; 116 | use std::fmt; 117 | use std::hash; 118 | use std::path::Path; 119 | use std::str; 120 | 121 | use aho_corasick::AhoCorasick; 122 | use bstr::{ByteSlice, ByteVec, B}; 123 | use regex::bytes::{Regex, RegexBuilder, RegexSet}; 124 | 125 | use glob::MatchStrategy; 126 | pub use glob::{Glob, GlobBuilder, GlobMatcher}; 127 | use pathutil::{file_name, file_name_ext, normalize_path}; 128 | 129 | mod glob; 130 | mod pathutil; 131 | 132 | /// Represents an error that can occur when parsing a glob pattern. 133 | #[derive(Clone, Debug, Eq, PartialEq)] 134 | pub struct Error { 135 | /// The original glob provided by the caller. 136 | glob: Option, 137 | /// The kind of error. 138 | kind: ErrorKind, 139 | } 140 | 141 | /// The kind of error that can occur when parsing a glob pattern. 142 | #[derive(Clone, Debug, Eq, PartialEq)] 143 | pub enum ErrorKind { 144 | /// **DEPRECATED**. 145 | /// 146 | /// This error used to occur for consistency with git's glob specification, 147 | /// but the specification now accepts all uses of `**`. When `**` does not 148 | /// appear adjacent to a path separator or at the beginning/end of a glob, 149 | /// it is now treated as two consecutive `*` patterns. As such, this error 150 | /// is no longer used. 151 | InvalidRecursive, 152 | /// Occurs when a character class (e.g., `[abc]`) is not closed. 153 | UnclosedClass, 154 | /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For 155 | /// example, if the range starts with a lexicographically larger character 156 | /// than it ends with. 157 | InvalidRange(char, char), 158 | /// Occurs when a `}` is found without a matching `{`. 159 | UnopenedAlternates, 160 | /// Occurs when a `{` is found without a matching `}`. 161 | UnclosedAlternates, 162 | /// Occurs when an alternating group is nested inside another alternating 163 | /// group, e.g., `{{a,b},{c,d}}`. 164 | NestedAlternates, 165 | /// Occurs when an unescaped '\' is found at the end of a glob. 166 | DanglingEscape, 167 | /// An error associated with parsing or compiling a regex. 168 | Regex(String), 169 | /// Hints that destructuring should not be exhaustive. 170 | /// 171 | /// This enum may grow additional variants, so this makes sure clients 172 | /// don't count on exhaustive matching. (Otherwise, adding a new variant 173 | /// could break existing code.) 174 | #[doc(hidden)] 175 | __Nonexhaustive, 176 | } 177 | 178 | impl StdError for Error { 179 | fn description(&self) -> &str { 180 | self.kind.description() 181 | } 182 | } 183 | 184 | impl Error { 185 | /// Return the glob that caused this error, if one exists. 186 | pub fn glob(&self) -> Option<&str> { 187 | self.glob.as_ref().map(|s| &**s) 188 | } 189 | 190 | /// Return the kind of this error. 191 | pub fn kind(&self) -> &ErrorKind { 192 | &self.kind 193 | } 194 | } 195 | 196 | impl ErrorKind { 197 | fn description(&self) -> &str { 198 | match *self { 199 | ErrorKind::InvalidRecursive => { 200 | "invalid use of **; must be one path component" 201 | } 202 | ErrorKind::UnclosedClass => { 203 | "unclosed character class; missing ']'" 204 | } 205 | ErrorKind::InvalidRange(_, _) => "invalid character range", 206 | ErrorKind::UnopenedAlternates => { 207 | "unopened alternate group; missing '{' \ 208 | (maybe escape '}' with '[}]'?)" 209 | } 210 | ErrorKind::UnclosedAlternates => { 211 | "unclosed alternate group; missing '}' \ 212 | (maybe escape '{' with '[{]'?)" 213 | } 214 | ErrorKind::NestedAlternates => { 215 | "nested alternate groups are not allowed" 216 | } 217 | ErrorKind::DanglingEscape => "dangling '\\'", 218 | ErrorKind::Regex(ref err) => err, 219 | ErrorKind::__Nonexhaustive => unreachable!(), 220 | } 221 | } 222 | } 223 | 224 | impl fmt::Display for Error { 225 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 226 | match self.glob { 227 | None => self.kind.fmt(f), 228 | Some(ref glob) => { 229 | write!(f, "error parsing glob '{}': {}", glob, self.kind) 230 | } 231 | } 232 | } 233 | } 234 | 235 | impl fmt::Display for ErrorKind { 236 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 237 | match *self { 238 | ErrorKind::InvalidRecursive 239 | | ErrorKind::UnclosedClass 240 | | ErrorKind::UnopenedAlternates 241 | | ErrorKind::UnclosedAlternates 242 | | ErrorKind::NestedAlternates 243 | | ErrorKind::DanglingEscape 244 | | ErrorKind::Regex(_) => write!(f, "{}", self.description()), 245 | ErrorKind::InvalidRange(s, e) => { 246 | write!(f, "invalid range; '{}' > '{}'", s, e) 247 | } 248 | ErrorKind::__Nonexhaustive => unreachable!(), 249 | } 250 | } 251 | } 252 | 253 | fn new_regex(pat: &str) -> Result { 254 | RegexBuilder::new(pat) 255 | .dot_matches_new_line(true) 256 | .size_limit(10 * (1 << 20)) 257 | .dfa_size_limit(10 * (1 << 20)) 258 | .build() 259 | .map_err(|err| Error { 260 | glob: Some(pat.to_string()), 261 | kind: ErrorKind::Regex(err.to_string()), 262 | }) 263 | } 264 | 265 | fn new_regex_set(pats: I) -> Result 266 | where 267 | S: AsRef, 268 | I: IntoIterator, 269 | { 270 | RegexSet::new(pats).map_err(|err| Error { 271 | glob: None, 272 | kind: ErrorKind::Regex(err.to_string()), 273 | }) 274 | } 275 | 276 | type Fnv = hash::BuildHasherDefault; 277 | 278 | /// GlobSet represents a group of globs that can be matched together in a 279 | /// single pass. 280 | #[derive(Clone, Debug)] 281 | pub struct GlobSet { 282 | len: usize, 283 | strats: Vec, 284 | } 285 | 286 | impl GlobSet { 287 | /// Create an empty `GlobSet`. An empty set matches nothing. 288 | #[inline] 289 | pub fn empty() -> GlobSet { 290 | GlobSet { len: 0, strats: vec![] } 291 | } 292 | 293 | /// Returns true if this set is empty, and therefore matches nothing. 294 | #[inline] 295 | pub fn is_empty(&self) -> bool { 296 | self.len == 0 297 | } 298 | 299 | /// Returns the number of globs in this set. 300 | #[inline] 301 | pub fn len(&self) -> usize { 302 | self.len 303 | } 304 | 305 | /// Returns true if any glob in this set matches the path given. 306 | pub fn is_match>(&self, path: P) -> bool { 307 | self.is_match_candidate(&Candidate::new(path.as_ref())) 308 | } 309 | 310 | /// Returns true if any glob in this set matches the path given. 311 | /// 312 | /// This takes a Candidate as input, which can be used to amortize the 313 | /// cost of preparing a path for matching. 314 | pub fn is_match_candidate(&self, path: &Candidate) -> bool { 315 | if self.is_empty() { 316 | return false; 317 | } 318 | for strat in &self.strats { 319 | if strat.is_match(path) { 320 | return true; 321 | } 322 | } 323 | false 324 | } 325 | 326 | /// Returns the sequence number of every glob pattern that matches the 327 | /// given path. 328 | pub fn matches>(&self, path: P) -> Vec { 329 | self.matches_candidate(&Candidate::new(path.as_ref())) 330 | } 331 | 332 | /// Returns the sequence number of every glob pattern that matches the 333 | /// given path. 334 | /// 335 | /// This takes a Candidate as input, which can be used to amortize the 336 | /// cost of preparing a path for matching. 337 | pub fn matches_candidate(&self, path: &Candidate) -> Vec { 338 | let mut into = vec![]; 339 | if self.is_empty() { 340 | return into; 341 | } 342 | self.matches_candidate_into(path, &mut into); 343 | into 344 | } 345 | 346 | /// Adds the sequence number of every glob pattern that matches the given 347 | /// path to the vec given. 348 | /// 349 | /// `into` is is cleared before matching begins, and contains the set of 350 | /// sequence numbers (in ascending order) after matching ends. If no globs 351 | /// were matched, then `into` will be empty. 352 | pub fn matches_into>( 353 | &self, 354 | path: P, 355 | into: &mut Vec, 356 | ) { 357 | self.matches_candidate_into(&Candidate::new(path.as_ref()), into); 358 | } 359 | 360 | /// Adds the sequence number of every glob pattern that matches the given 361 | /// path to the vec given. 362 | /// 363 | /// `into` is is cleared before matching begins, and contains the set of 364 | /// sequence numbers (in ascending order) after matching ends. If no globs 365 | /// were matched, then `into` will be empty. 366 | /// 367 | /// This takes a Candidate as input, which can be used to amortize the 368 | /// cost of preparing a path for matching. 369 | pub fn matches_candidate_into( 370 | &self, 371 | path: &Candidate, 372 | into: &mut Vec, 373 | ) { 374 | into.clear(); 375 | if self.is_empty() { 376 | return; 377 | } 378 | for strat in &self.strats { 379 | strat.matches_into(path, into); 380 | } 381 | into.sort(); 382 | into.dedup(); 383 | } 384 | 385 | fn new(pats: &[Glob]) -> Result { 386 | if pats.is_empty() { 387 | return Ok(GlobSet { len: 0, strats: vec![] }); 388 | } 389 | let mut lits = LiteralStrategy::new(); 390 | let mut base_lits = BasenameLiteralStrategy::new(); 391 | let mut exts = ExtensionStrategy::new(); 392 | let mut prefixes = MultiStrategyBuilder::new(); 393 | let mut suffixes = MultiStrategyBuilder::new(); 394 | let mut required_exts = RequiredExtensionStrategyBuilder::new(); 395 | let mut regexes = MultiStrategyBuilder::new(); 396 | for (i, p) in pats.iter().enumerate() { 397 | match MatchStrategy::new(p) { 398 | MatchStrategy::Literal(lit) => { 399 | lits.add(i, lit); 400 | } 401 | MatchStrategy::BasenameLiteral(lit) => { 402 | base_lits.add(i, lit); 403 | } 404 | MatchStrategy::Extension(ext) => { 405 | exts.add(i, ext); 406 | } 407 | MatchStrategy::Prefix(prefix) => { 408 | prefixes.add(i, prefix); 409 | } 410 | MatchStrategy::Suffix { suffix, component } => { 411 | if component { 412 | lits.add(i, suffix[1..].to_string()); 413 | } 414 | suffixes.add(i, suffix); 415 | } 416 | MatchStrategy::RequiredExtension(ext) => { 417 | required_exts.add(i, ext, p.regex().to_owned()); 418 | } 419 | MatchStrategy::Regex => { 420 | debug!("glob converted to regex: {:?}", p); 421 | regexes.add(i, p.regex().to_owned()); 422 | } 423 | } 424 | } 425 | debug!( 426 | "built glob set; {} literals, {} basenames, {} extensions, \ 427 | {} prefixes, {} suffixes, {} required extensions, {} regexes", 428 | lits.0.len(), 429 | base_lits.0.len(), 430 | exts.0.len(), 431 | prefixes.literals.len(), 432 | suffixes.literals.len(), 433 | required_exts.0.len(), 434 | regexes.literals.len() 435 | ); 436 | Ok(GlobSet { 437 | len: pats.len(), 438 | strats: vec![ 439 | GlobSetMatchStrategy::Extension(exts), 440 | GlobSetMatchStrategy::BasenameLiteral(base_lits), 441 | GlobSetMatchStrategy::Literal(lits), 442 | GlobSetMatchStrategy::Suffix(suffixes.suffix()), 443 | GlobSetMatchStrategy::Prefix(prefixes.prefix()), 444 | GlobSetMatchStrategy::RequiredExtension( 445 | required_exts.build()?, 446 | ), 447 | GlobSetMatchStrategy::Regex(regexes.regex_set()?), 448 | ], 449 | }) 450 | } 451 | } 452 | 453 | /// GlobSetBuilder builds a group of patterns that can be used to 454 | /// simultaneously match a file path. 455 | #[derive(Clone, Debug)] 456 | pub struct GlobSetBuilder { 457 | pats: Vec, 458 | } 459 | 460 | impl GlobSetBuilder { 461 | /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new 462 | /// patterns. Once all patterns have been added, `build` should be called 463 | /// to produce a `GlobSet`, which can then be used for matching. 464 | pub fn new() -> GlobSetBuilder { 465 | GlobSetBuilder { pats: vec![] } 466 | } 467 | 468 | /// Builds a new matcher from all of the glob patterns added so far. 469 | /// 470 | /// Once a matcher is built, no new patterns can be added to it. 471 | pub fn build(&self) -> Result { 472 | GlobSet::new(&self.pats) 473 | } 474 | 475 | /// Add a new pattern to this set. 476 | pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { 477 | self.pats.push(pat); 478 | self 479 | } 480 | } 481 | 482 | /// A candidate path for matching. 483 | /// 484 | /// All glob matching in this crate operates on `Candidate` values. 485 | /// Constructing candidates has a very small cost associated with it, so 486 | /// callers may find it beneficial to amortize that cost when matching a single 487 | /// path against multiple globs or sets of globs. 488 | #[derive(Clone, Debug)] 489 | pub struct Candidate<'a> { 490 | path: Cow<'a, [u8]>, 491 | basename: Cow<'a, [u8]>, 492 | ext: Cow<'a, [u8]>, 493 | } 494 | 495 | impl<'a> Candidate<'a> { 496 | /// Create a new candidate for matching from the given path. 497 | pub fn new + ?Sized>(path: &'a P) -> Candidate<'a> { 498 | let path = normalize_path(Vec::from_path_lossy(path.as_ref())); 499 | let basename = file_name(&path).unwrap_or(Cow::Borrowed(B(""))); 500 | let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B(""))); 501 | Candidate { path: path, basename: basename, ext: ext } 502 | } 503 | 504 | fn path_prefix(&self, max: usize) -> &[u8] { 505 | if self.path.len() <= max { 506 | &*self.path 507 | } else { 508 | &self.path[..max] 509 | } 510 | } 511 | 512 | fn path_suffix(&self, max: usize) -> &[u8] { 513 | if self.path.len() <= max { 514 | &*self.path 515 | } else { 516 | &self.path[self.path.len() - max..] 517 | } 518 | } 519 | } 520 | 521 | #[derive(Clone, Debug)] 522 | enum GlobSetMatchStrategy { 523 | Literal(LiteralStrategy), 524 | BasenameLiteral(BasenameLiteralStrategy), 525 | Extension(ExtensionStrategy), 526 | Prefix(PrefixStrategy), 527 | Suffix(SuffixStrategy), 528 | RequiredExtension(RequiredExtensionStrategy), 529 | Regex(RegexSetStrategy), 530 | } 531 | 532 | impl GlobSetMatchStrategy { 533 | fn is_match(&self, candidate: &Candidate) -> bool { 534 | use self::GlobSetMatchStrategy::*; 535 | match *self { 536 | Literal(ref s) => s.is_match(candidate), 537 | BasenameLiteral(ref s) => s.is_match(candidate), 538 | Extension(ref s) => s.is_match(candidate), 539 | Prefix(ref s) => s.is_match(candidate), 540 | Suffix(ref s) => s.is_match(candidate), 541 | RequiredExtension(ref s) => s.is_match(candidate), 542 | Regex(ref s) => s.is_match(candidate), 543 | } 544 | } 545 | 546 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 547 | use self::GlobSetMatchStrategy::*; 548 | match *self { 549 | Literal(ref s) => s.matches_into(candidate, matches), 550 | BasenameLiteral(ref s) => s.matches_into(candidate, matches), 551 | Extension(ref s) => s.matches_into(candidate, matches), 552 | Prefix(ref s) => s.matches_into(candidate, matches), 553 | Suffix(ref s) => s.matches_into(candidate, matches), 554 | RequiredExtension(ref s) => s.matches_into(candidate, matches), 555 | Regex(ref s) => s.matches_into(candidate, matches), 556 | } 557 | } 558 | } 559 | 560 | #[derive(Clone, Debug)] 561 | struct LiteralStrategy(BTreeMap, Vec>); 562 | 563 | impl LiteralStrategy { 564 | fn new() -> LiteralStrategy { 565 | LiteralStrategy(BTreeMap::new()) 566 | } 567 | 568 | fn add(&mut self, global_index: usize, lit: String) { 569 | self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); 570 | } 571 | 572 | fn is_match(&self, candidate: &Candidate) -> bool { 573 | self.0.contains_key(candidate.path.as_bytes()) 574 | } 575 | 576 | #[inline(never)] 577 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 578 | if let Some(hits) = self.0.get(candidate.path.as_bytes()) { 579 | matches.extend(hits); 580 | } 581 | } 582 | } 583 | 584 | #[derive(Clone, Debug)] 585 | struct BasenameLiteralStrategy(BTreeMap, Vec>); 586 | 587 | impl BasenameLiteralStrategy { 588 | fn new() -> BasenameLiteralStrategy { 589 | BasenameLiteralStrategy(BTreeMap::new()) 590 | } 591 | 592 | fn add(&mut self, global_index: usize, lit: String) { 593 | self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); 594 | } 595 | 596 | fn is_match(&self, candidate: &Candidate) -> bool { 597 | if candidate.basename.is_empty() { 598 | return false; 599 | } 600 | self.0.contains_key(candidate.basename.as_bytes()) 601 | } 602 | 603 | #[inline(never)] 604 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 605 | if candidate.basename.is_empty() { 606 | return; 607 | } 608 | if let Some(hits) = self.0.get(candidate.basename.as_bytes()) { 609 | matches.extend(hits); 610 | } 611 | } 612 | } 613 | 614 | #[derive(Clone, Debug)] 615 | struct ExtensionStrategy(HashMap, Vec, Fnv>); 616 | 617 | impl ExtensionStrategy { 618 | fn new() -> ExtensionStrategy { 619 | ExtensionStrategy(HashMap::with_hasher(Fnv::default())) 620 | } 621 | 622 | fn add(&mut self, global_index: usize, ext: String) { 623 | self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); 624 | } 625 | 626 | fn is_match(&self, candidate: &Candidate) -> bool { 627 | if candidate.ext.is_empty() { 628 | return false; 629 | } 630 | self.0.contains_key(candidate.ext.as_bytes()) 631 | } 632 | 633 | #[inline(never)] 634 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 635 | if candidate.ext.is_empty() { 636 | return; 637 | } 638 | if let Some(hits) = self.0.get(candidate.ext.as_bytes()) { 639 | matches.extend(hits); 640 | } 641 | } 642 | } 643 | 644 | #[derive(Clone, Debug)] 645 | struct PrefixStrategy { 646 | matcher: AhoCorasick, 647 | map: Vec, 648 | longest: usize, 649 | } 650 | 651 | impl PrefixStrategy { 652 | fn is_match(&self, candidate: &Candidate) -> bool { 653 | let path = candidate.path_prefix(self.longest); 654 | for m in self.matcher.find_overlapping_iter(path) { 655 | if m.start() == 0 { 656 | return true; 657 | } 658 | } 659 | false 660 | } 661 | 662 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 663 | let path = candidate.path_prefix(self.longest); 664 | for m in self.matcher.find_overlapping_iter(path) { 665 | if m.start() == 0 { 666 | matches.push(self.map[m.pattern()]); 667 | } 668 | } 669 | } 670 | } 671 | 672 | #[derive(Clone, Debug)] 673 | struct SuffixStrategy { 674 | matcher: AhoCorasick, 675 | map: Vec, 676 | longest: usize, 677 | } 678 | 679 | impl SuffixStrategy { 680 | fn is_match(&self, candidate: &Candidate) -> bool { 681 | let path = candidate.path_suffix(self.longest); 682 | for m in self.matcher.find_overlapping_iter(path) { 683 | if m.end() == path.len() { 684 | return true; 685 | } 686 | } 687 | false 688 | } 689 | 690 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 691 | let path = candidate.path_suffix(self.longest); 692 | for m in self.matcher.find_overlapping_iter(path) { 693 | if m.end() == path.len() { 694 | matches.push(self.map[m.pattern()]); 695 | } 696 | } 697 | } 698 | } 699 | 700 | #[derive(Clone, Debug)] 701 | struct RequiredExtensionStrategy(HashMap, Vec<(usize, Regex)>, Fnv>); 702 | 703 | impl RequiredExtensionStrategy { 704 | fn is_match(&self, candidate: &Candidate) -> bool { 705 | if candidate.ext.is_empty() { 706 | return false; 707 | } 708 | match self.0.get(candidate.ext.as_bytes()) { 709 | None => false, 710 | Some(regexes) => { 711 | for &(_, ref re) in regexes { 712 | if re.is_match(candidate.path.as_bytes()) { 713 | return true; 714 | } 715 | } 716 | false 717 | } 718 | } 719 | } 720 | 721 | #[inline(never)] 722 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 723 | if candidate.ext.is_empty() { 724 | return; 725 | } 726 | if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) { 727 | for &(global_index, ref re) in regexes { 728 | if re.is_match(candidate.path.as_bytes()) { 729 | matches.push(global_index); 730 | } 731 | } 732 | } 733 | } 734 | } 735 | 736 | #[derive(Clone, Debug)] 737 | struct RegexSetStrategy { 738 | matcher: RegexSet, 739 | map: Vec, 740 | } 741 | 742 | impl RegexSetStrategy { 743 | fn is_match(&self, candidate: &Candidate) -> bool { 744 | self.matcher.is_match(candidate.path.as_bytes()) 745 | } 746 | 747 | fn matches_into(&self, candidate: &Candidate, matches: &mut Vec) { 748 | for i in self.matcher.matches(candidate.path.as_bytes()) { 749 | matches.push(self.map[i]); 750 | } 751 | } 752 | } 753 | 754 | #[derive(Clone, Debug)] 755 | struct MultiStrategyBuilder { 756 | literals: Vec, 757 | map: Vec, 758 | longest: usize, 759 | } 760 | 761 | impl MultiStrategyBuilder { 762 | fn new() -> MultiStrategyBuilder { 763 | MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 } 764 | } 765 | 766 | fn add(&mut self, global_index: usize, literal: String) { 767 | if literal.len() > self.longest { 768 | self.longest = literal.len(); 769 | } 770 | self.map.push(global_index); 771 | self.literals.push(literal); 772 | } 773 | 774 | fn prefix(self) -> PrefixStrategy { 775 | PrefixStrategy { 776 | matcher: AhoCorasick::new_auto_configured(&self.literals), 777 | map: self.map, 778 | longest: self.longest, 779 | } 780 | } 781 | 782 | fn suffix(self) -> SuffixStrategy { 783 | SuffixStrategy { 784 | matcher: AhoCorasick::new_auto_configured(&self.literals), 785 | map: self.map, 786 | longest: self.longest, 787 | } 788 | } 789 | 790 | fn regex_set(self) -> Result { 791 | Ok(RegexSetStrategy { 792 | matcher: new_regex_set(self.literals)?, 793 | map: self.map, 794 | }) 795 | } 796 | } 797 | 798 | #[derive(Clone, Debug)] 799 | struct RequiredExtensionStrategyBuilder( 800 | HashMap, Vec<(usize, String)>>, 801 | ); 802 | 803 | impl RequiredExtensionStrategyBuilder { 804 | fn new() -> RequiredExtensionStrategyBuilder { 805 | RequiredExtensionStrategyBuilder(HashMap::new()) 806 | } 807 | 808 | fn add(&mut self, global_index: usize, ext: String, regex: String) { 809 | self.0 810 | .entry(ext.into_bytes()) 811 | .or_insert(vec![]) 812 | .push((global_index, regex)); 813 | } 814 | 815 | fn build(self) -> Result { 816 | let mut exts = HashMap::with_hasher(Fnv::default()); 817 | for (ext, regexes) in self.0.into_iter() { 818 | exts.insert(ext.clone(), vec![]); 819 | for (global_index, regex) in regexes { 820 | let compiled = new_regex(®ex)?; 821 | exts.get_mut(&ext).unwrap().push((global_index, compiled)); 822 | } 823 | } 824 | Ok(RequiredExtensionStrategy(exts)) 825 | } 826 | } 827 | 828 | #[cfg(test)] 829 | mod tests { 830 | use super::GlobSetBuilder; 831 | use glob::Glob; 832 | 833 | #[test] 834 | fn set_works() { 835 | let mut builder = GlobSetBuilder::new(); 836 | builder.add(Glob::new("src/**/*.rs").unwrap()); 837 | builder.add(Glob::new("*.c").unwrap()); 838 | builder.add(Glob::new("src/lib.rs").unwrap()); 839 | let set = builder.build().unwrap(); 840 | 841 | assert!(set.is_match("foo.c")); 842 | assert!(set.is_match("src/foo.c")); 843 | assert!(!set.is_match("foo.rs")); 844 | assert!(!set.is_match("tests/foo.rs")); 845 | assert!(set.is_match("src/foo.rs")); 846 | assert!(set.is_match("src/grep/src/main.rs")); 847 | 848 | let matches = set.matches("src/lib.rs"); 849 | assert_eq!(2, matches.len()); 850 | assert_eq!(0, matches[0]); 851 | assert_eq!(2, matches[1]); 852 | } 853 | 854 | #[test] 855 | fn empty_set_works() { 856 | let set = GlobSetBuilder::new().build().unwrap(); 857 | assert!(!set.is_match("")); 858 | assert!(!set.is_match("a")); 859 | } 860 | } 861 | -------------------------------------------------------------------------------- /src/glob.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::hash; 3 | use std::iter; 4 | use std::ops::{Deref, DerefMut}; 5 | use std::path::{is_separator, Path}; 6 | use std::str; 7 | 8 | use regex; 9 | use regex::bytes::Regex; 10 | 11 | use {new_regex, Candidate, Error, ErrorKind}; 12 | 13 | /// Describes a matching strategy for a particular pattern. 14 | /// 15 | /// This provides a way to more quickly determine whether a pattern matches 16 | /// a particular file path in a way that scales with a large number of 17 | /// patterns. For example, if many patterns are of the form `*.ext`, then it's 18 | /// possible to test whether any of those patterns matches by looking up a 19 | /// file path's extension in a hash table. 20 | #[derive(Clone, Debug, Eq, PartialEq)] 21 | pub enum MatchStrategy { 22 | /// A pattern matches if and only if the entire file path matches this 23 | /// literal string. 24 | Literal(String), 25 | /// A pattern matches if and only if the file path's basename matches this 26 | /// literal string. 27 | BasenameLiteral(String), 28 | /// A pattern matches if and only if the file path's extension matches this 29 | /// literal string. 30 | Extension(String), 31 | /// A pattern matches if and only if this prefix literal is a prefix of the 32 | /// candidate file path. 33 | Prefix(String), 34 | /// A pattern matches if and only if this prefix literal is a prefix of the 35 | /// candidate file path. 36 | /// 37 | /// An exception: if `component` is true, then `suffix` must appear at the 38 | /// beginning of a file path or immediately following a `/`. 39 | Suffix { 40 | /// The actual suffix. 41 | suffix: String, 42 | /// Whether this must start at the beginning of a path component. 43 | component: bool, 44 | }, 45 | /// A pattern matches only if the given extension matches the file path's 46 | /// extension. Note that this is a necessary but NOT sufficient criterion. 47 | /// Namely, if the extension matches, then a full regex search is still 48 | /// required. 49 | RequiredExtension(String), 50 | /// A regex needs to be used for matching. 51 | Regex, 52 | } 53 | 54 | impl MatchStrategy { 55 | /// Returns a matching strategy for the given pattern. 56 | pub fn new(pat: &Glob) -> MatchStrategy { 57 | if let Some(lit) = pat.basename_literal() { 58 | MatchStrategy::BasenameLiteral(lit) 59 | } else if let Some(lit) = pat.literal() { 60 | MatchStrategy::Literal(lit) 61 | } else if let Some(ext) = pat.ext() { 62 | MatchStrategy::Extension(ext) 63 | } else if let Some(prefix) = pat.prefix() { 64 | MatchStrategy::Prefix(prefix) 65 | } else if let Some((suffix, component)) = pat.suffix() { 66 | MatchStrategy::Suffix { suffix: suffix, component: component } 67 | } else if let Some(ext) = pat.required_ext() { 68 | MatchStrategy::RequiredExtension(ext) 69 | } else { 70 | MatchStrategy::Regex 71 | } 72 | } 73 | } 74 | 75 | /// Glob represents a successfully parsed shell glob pattern. 76 | /// 77 | /// It cannot be used directly to match file paths, but it can be converted 78 | /// to a regular expression string or a matcher. 79 | #[derive(Clone, Debug, Eq)] 80 | pub struct Glob { 81 | glob: String, 82 | re: String, 83 | opts: GlobOptions, 84 | tokens: Tokens, 85 | } 86 | 87 | impl PartialEq for Glob { 88 | fn eq(&self, other: &Glob) -> bool { 89 | self.glob == other.glob && self.opts == other.opts 90 | } 91 | } 92 | 93 | impl hash::Hash for Glob { 94 | fn hash(&self, state: &mut H) { 95 | self.glob.hash(state); 96 | self.opts.hash(state); 97 | } 98 | } 99 | 100 | impl fmt::Display for Glob { 101 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 102 | self.glob.fmt(f) 103 | } 104 | } 105 | 106 | /// A matcher for a single pattern. 107 | #[derive(Clone, Debug)] 108 | pub struct GlobMatcher { 109 | /// The underlying pattern. 110 | pat: Glob, 111 | /// The pattern, as a compiled regex. 112 | re: Regex, 113 | } 114 | 115 | impl GlobMatcher { 116 | /// Tests whether the given path matches this pattern or not. 117 | pub fn is_match>(&self, path: P) -> bool { 118 | self.is_match_candidate(&Candidate::new(path.as_ref())) 119 | } 120 | 121 | /// Tests whether the given path matches this pattern or not. 122 | pub fn is_match_candidate(&self, path: &Candidate) -> bool { 123 | self.re.is_match(&path.path) 124 | } 125 | } 126 | 127 | /// A strategic matcher for a single pattern. 128 | #[cfg(test)] 129 | #[derive(Clone, Debug)] 130 | struct GlobStrategic { 131 | /// The match strategy to use. 132 | strategy: MatchStrategy, 133 | /// The underlying pattern. 134 | pat: Glob, 135 | /// The pattern, as a compiled regex. 136 | re: Regex, 137 | } 138 | 139 | #[cfg(test)] 140 | impl GlobStrategic { 141 | /// Tests whether the given path matches this pattern or not. 142 | fn is_match>(&self, path: P) -> bool { 143 | self.is_match_candidate(&Candidate::new(path.as_ref())) 144 | } 145 | 146 | /// Tests whether the given path matches this pattern or not. 147 | fn is_match_candidate(&self, candidate: &Candidate) -> bool { 148 | let byte_path = &*candidate.path; 149 | 150 | match self.strategy { 151 | MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path, 152 | MatchStrategy::BasenameLiteral(ref lit) => { 153 | lit.as_bytes() == &*candidate.basename 154 | } 155 | MatchStrategy::Extension(ref ext) => { 156 | ext.as_bytes() == &*candidate.ext 157 | } 158 | MatchStrategy::Prefix(ref pre) => { 159 | starts_with(pre.as_bytes(), byte_path) 160 | } 161 | MatchStrategy::Suffix { ref suffix, component } => { 162 | if component && byte_path == &suffix.as_bytes()[1..] { 163 | return true; 164 | } 165 | ends_with(suffix.as_bytes(), byte_path) 166 | } 167 | MatchStrategy::RequiredExtension(ref ext) => { 168 | let ext = ext.as_bytes(); 169 | &*candidate.ext == ext && self.re.is_match(byte_path) 170 | } 171 | MatchStrategy::Regex => self.re.is_match(byte_path), 172 | } 173 | } 174 | } 175 | 176 | /// A builder for a pattern. 177 | /// 178 | /// This builder enables configuring the match semantics of a pattern. For 179 | /// example, one can make matching case insensitive. 180 | /// 181 | /// The lifetime `'a` refers to the lifetime of the pattern string. 182 | #[derive(Clone, Debug)] 183 | pub struct GlobBuilder<'a> { 184 | /// The glob pattern to compile. 185 | glob: &'a str, 186 | /// Options for the pattern. 187 | opts: GlobOptions, 188 | } 189 | 190 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 191 | struct GlobOptions { 192 | /// Whether to match case insensitively. 193 | case_insensitive: bool, 194 | /// Whether to require a literal separator to match a separator in a file 195 | /// path. e.g., when enabled, `*` won't match `/`. 196 | literal_separator: bool, 197 | /// Whether or not to use `\` to escape special characters. 198 | /// e.g., when enabled, `\*` will match a literal `*`. 199 | backslash_escape: bool, 200 | } 201 | 202 | impl GlobOptions { 203 | fn default() -> GlobOptions { 204 | GlobOptions { 205 | case_insensitive: false, 206 | literal_separator: false, 207 | backslash_escape: !is_separator('\\'), 208 | } 209 | } 210 | } 211 | 212 | #[derive(Clone, Debug, Default, Eq, PartialEq)] 213 | struct Tokens(Vec); 214 | 215 | impl Deref for Tokens { 216 | type Target = Vec; 217 | fn deref(&self) -> &Vec { 218 | &self.0 219 | } 220 | } 221 | 222 | impl DerefMut for Tokens { 223 | fn deref_mut(&mut self) -> &mut Vec { 224 | &mut self.0 225 | } 226 | } 227 | 228 | #[derive(Clone, Debug, Eq, PartialEq)] 229 | enum Token { 230 | Literal(char), 231 | Any, 232 | ZeroOrMore, 233 | RecursivePrefix, 234 | RecursiveSuffix, 235 | RecursiveZeroOrMore, 236 | Class { negated: bool, ranges: Vec<(char, char)> }, 237 | Alternates(Vec), 238 | } 239 | 240 | impl Glob { 241 | /// Builds a new pattern with default options. 242 | pub fn new(glob: &str) -> Result { 243 | GlobBuilder::new(glob).build() 244 | } 245 | 246 | /// Returns a matcher for this pattern. 247 | pub fn compile_matcher(&self) -> GlobMatcher { 248 | let re = 249 | new_regex(&self.re).expect("regex compilation shouldn't fail"); 250 | GlobMatcher { pat: self.clone(), re: re } 251 | } 252 | 253 | /// Returns a strategic matcher. 254 | /// 255 | /// This isn't exposed because it's not clear whether it's actually 256 | /// faster than just running a regex for a *single* pattern. If it 257 | /// is faster, then GlobMatcher should do it automatically. 258 | #[cfg(test)] 259 | fn compile_strategic_matcher(&self) -> GlobStrategic { 260 | let strategy = MatchStrategy::new(self); 261 | let re = 262 | new_regex(&self.re).expect("regex compilation shouldn't fail"); 263 | GlobStrategic { strategy: strategy, pat: self.clone(), re: re } 264 | } 265 | 266 | /// Returns the original glob pattern used to build this pattern. 267 | pub fn glob(&self) -> &str { 268 | &self.glob 269 | } 270 | 271 | /// Returns the regular expression string for this glob. 272 | /// 273 | /// Note that regular expressions for globs are intended to be matched on 274 | /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In 275 | /// particular, globs are frequently used on file paths, where there is no 276 | /// general guarantee that file paths are themselves valid UTF-8. As a 277 | /// result, callers will need to ensure that they are using a regex API 278 | /// that can match on arbitrary bytes. For example, the 279 | /// [`regex`](https://crates.io/regex) 280 | /// crate's 281 | /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html) 282 | /// API is not suitable for this since it matches on `&str`, but its 283 | /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html) 284 | /// API is suitable for this. 285 | pub fn regex(&self) -> &str { 286 | &self.re 287 | } 288 | 289 | /// Returns the pattern as a literal if and only if the pattern must match 290 | /// an entire path exactly. 291 | /// 292 | /// The basic format of these patterns is `{literal}`. 293 | fn literal(&self) -> Option { 294 | if self.opts.case_insensitive { 295 | return None; 296 | } 297 | let mut lit = String::new(); 298 | for t in &*self.tokens { 299 | match *t { 300 | Token::Literal(c) => lit.push(c), 301 | _ => return None, 302 | } 303 | } 304 | if lit.is_empty() { 305 | None 306 | } else { 307 | Some(lit) 308 | } 309 | } 310 | 311 | /// Returns an extension if this pattern matches a file path if and only 312 | /// if the file path has the extension returned. 313 | /// 314 | /// Note that this extension returned differs from the extension that 315 | /// std::path::Path::extension returns. Namely, this extension includes 316 | /// the '.'. Also, paths like `.rs` are considered to have an extension 317 | /// of `.rs`. 318 | fn ext(&self) -> Option { 319 | if self.opts.case_insensitive { 320 | return None; 321 | } 322 | let start = match self.tokens.get(0) { 323 | Some(&Token::RecursivePrefix) => 1, 324 | Some(_) => 0, 325 | _ => return None, 326 | }; 327 | match self.tokens.get(start) { 328 | Some(&Token::ZeroOrMore) => { 329 | // If there was no recursive prefix, then we only permit 330 | // `*` if `*` can match a `/`. For example, if `*` can't 331 | // match `/`, then `*.c` doesn't match `foo/bar.c`. 332 | if start == 0 && self.opts.literal_separator { 333 | return None; 334 | } 335 | } 336 | _ => return None, 337 | } 338 | match self.tokens.get(start + 1) { 339 | Some(&Token::Literal('.')) => {} 340 | _ => return None, 341 | } 342 | let mut lit = ".".to_string(); 343 | for t in self.tokens[start + 2..].iter() { 344 | match *t { 345 | Token::Literal('.') | Token::Literal('/') => return None, 346 | Token::Literal(c) => lit.push(c), 347 | _ => return None, 348 | } 349 | } 350 | if lit.is_empty() { 351 | None 352 | } else { 353 | Some(lit) 354 | } 355 | } 356 | 357 | /// This is like `ext`, but returns an extension even if it isn't sufficent 358 | /// to imply a match. Namely, if an extension is returned, then it is 359 | /// necessary but not sufficient for a match. 360 | fn required_ext(&self) -> Option { 361 | if self.opts.case_insensitive { 362 | return None; 363 | } 364 | // We don't care at all about the beginning of this pattern. All we 365 | // need to check for is if it ends with a literal of the form `.ext`. 366 | let mut ext: Vec = vec![]; // built in reverse 367 | for t in self.tokens.iter().rev() { 368 | match *t { 369 | Token::Literal('/') => return None, 370 | Token::Literal(c) => { 371 | ext.push(c); 372 | if c == '.' { 373 | break; 374 | } 375 | } 376 | _ => return None, 377 | } 378 | } 379 | if ext.last() != Some(&'.') { 380 | None 381 | } else { 382 | ext.reverse(); 383 | Some(ext.into_iter().collect()) 384 | } 385 | } 386 | 387 | /// Returns a literal prefix of this pattern if the entire pattern matches 388 | /// if the literal prefix matches. 389 | fn prefix(&self) -> Option { 390 | if self.opts.case_insensitive { 391 | return None; 392 | } 393 | let end = match self.tokens.last() { 394 | Some(&Token::ZeroOrMore) => { 395 | if self.opts.literal_separator { 396 | // If a trailing `*` can't match a `/`, then we can't 397 | // assume a match of the prefix corresponds to a match 398 | // of the overall pattern. e.g., `foo/*` with 399 | // `literal_separator` enabled matches `foo/bar` but not 400 | // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/` 401 | // literal prefix. 402 | return None; 403 | } 404 | self.tokens.len() - 1 405 | } 406 | _ => self.tokens.len(), 407 | }; 408 | let mut lit = String::new(); 409 | for t in &self.tokens[0..end] { 410 | match *t { 411 | Token::Literal(c) => lit.push(c), 412 | _ => return None, 413 | } 414 | } 415 | if lit.is_empty() { 416 | None 417 | } else { 418 | Some(lit) 419 | } 420 | } 421 | 422 | /// Returns a literal suffix of this pattern if the entire pattern matches 423 | /// if the literal suffix matches. 424 | /// 425 | /// If a literal suffix is returned and it must match either the entire 426 | /// file path or be preceded by a `/`, then also return true. This happens 427 | /// with a pattern like `**/foo/bar`. Namely, this pattern matches 428 | /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the 429 | /// suffix returned is `/foo/bar` (but should match the entire path 430 | /// `foo/bar`). 431 | /// 432 | /// When this returns true, the suffix literal is guaranteed to start with 433 | /// a `/`. 434 | fn suffix(&self) -> Option<(String, bool)> { 435 | if self.opts.case_insensitive { 436 | return None; 437 | } 438 | let mut lit = String::new(); 439 | let (start, entire) = match self.tokens.get(0) { 440 | Some(&Token::RecursivePrefix) => { 441 | // We only care if this follows a path component if the next 442 | // token is a literal. 443 | if let Some(&Token::Literal(_)) = self.tokens.get(1) { 444 | lit.push('/'); 445 | (1, true) 446 | } else { 447 | (1, false) 448 | } 449 | } 450 | _ => (0, false), 451 | }; 452 | let start = match self.tokens.get(start) { 453 | Some(&Token::ZeroOrMore) => { 454 | // If literal_separator is enabled, then a `*` can't 455 | // necessarily match everything, so reporting a suffix match 456 | // as a match of the pattern would be a false positive. 457 | if self.opts.literal_separator { 458 | return None; 459 | } 460 | start + 1 461 | } 462 | _ => start, 463 | }; 464 | for t in &self.tokens[start..] { 465 | match *t { 466 | Token::Literal(c) => lit.push(c), 467 | _ => return None, 468 | } 469 | } 470 | if lit.is_empty() || lit == "/" { 471 | None 472 | } else { 473 | Some((lit, entire)) 474 | } 475 | } 476 | 477 | /// If this pattern only needs to inspect the basename of a file path, 478 | /// then the tokens corresponding to only the basename match are returned. 479 | /// 480 | /// For example, given a pattern of `**/*.foo`, only the tokens 481 | /// corresponding to `*.foo` are returned. 482 | /// 483 | /// Note that this will return None if any match of the basename tokens 484 | /// doesn't correspond to a match of the entire pattern. For example, the 485 | /// glob `foo` only matches when a file path has a basename of `foo`, but 486 | /// doesn't *always* match when a file path has a basename of `foo`. e.g., 487 | /// `foo` doesn't match `abc/foo`. 488 | fn basename_tokens(&self) -> Option<&[Token]> { 489 | if self.opts.case_insensitive { 490 | return None; 491 | } 492 | let start = match self.tokens.get(0) { 493 | Some(&Token::RecursivePrefix) => 1, 494 | _ => { 495 | // With nothing to gobble up the parent portion of a path, 496 | // we can't assume that matching on only the basename is 497 | // correct. 498 | return None; 499 | } 500 | }; 501 | if self.tokens[start..].is_empty() { 502 | return None; 503 | } 504 | for t in &self.tokens[start..] { 505 | match *t { 506 | Token::Literal('/') => return None, 507 | Token::Literal(_) => {} // OK 508 | Token::Any | Token::ZeroOrMore => { 509 | if !self.opts.literal_separator { 510 | // In this case, `*` and `?` can match a path 511 | // separator, which means this could reach outside 512 | // the basename. 513 | return None; 514 | } 515 | } 516 | Token::RecursivePrefix 517 | | Token::RecursiveSuffix 518 | | Token::RecursiveZeroOrMore => { 519 | return None; 520 | } 521 | Token::Class { .. } | Token::Alternates(..) => { 522 | // We *could* be a little smarter here, but either one 523 | // of these is going to prevent our literal optimizations 524 | // anyway, so give up. 525 | return None; 526 | } 527 | } 528 | } 529 | Some(&self.tokens[start..]) 530 | } 531 | 532 | /// Returns the pattern as a literal if and only if the pattern exclusively 533 | /// matches the basename of a file path *and* is a literal. 534 | /// 535 | /// The basic format of these patterns is `**/{literal}`, where `{literal}` 536 | /// does not contain a path separator. 537 | fn basename_literal(&self) -> Option { 538 | let tokens = match self.basename_tokens() { 539 | None => return None, 540 | Some(tokens) => tokens, 541 | }; 542 | let mut lit = String::new(); 543 | for t in tokens { 544 | match *t { 545 | Token::Literal(c) => lit.push(c), 546 | _ => return None, 547 | } 548 | } 549 | Some(lit) 550 | } 551 | } 552 | 553 | impl<'a> GlobBuilder<'a> { 554 | /// Create a new builder for the pattern given. 555 | /// 556 | /// The pattern is not compiled until `build` is called. 557 | pub fn new(glob: &'a str) -> GlobBuilder<'a> { 558 | GlobBuilder { glob: glob, opts: GlobOptions::default() } 559 | } 560 | 561 | /// Parses and builds the pattern. 562 | pub fn build(&self) -> Result { 563 | let mut p = Parser { 564 | glob: &self.glob, 565 | stack: vec![Tokens::default()], 566 | chars: self.glob.chars().peekable(), 567 | prev: None, 568 | cur: None, 569 | opts: &self.opts, 570 | }; 571 | p.parse()?; 572 | if p.stack.is_empty() { 573 | Err(Error { 574 | glob: Some(self.glob.to_string()), 575 | kind: ErrorKind::UnopenedAlternates, 576 | }) 577 | } else if p.stack.len() > 1 { 578 | Err(Error { 579 | glob: Some(self.glob.to_string()), 580 | kind: ErrorKind::UnclosedAlternates, 581 | }) 582 | } else { 583 | let tokens = p.stack.pop().unwrap(); 584 | Ok(Glob { 585 | glob: self.glob.to_string(), 586 | re: tokens.to_regex_with(&self.opts), 587 | opts: self.opts, 588 | tokens: tokens, 589 | }) 590 | } 591 | } 592 | 593 | /// Toggle whether the pattern matches case insensitively or not. 594 | /// 595 | /// This is disabled by default. 596 | pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> { 597 | self.opts.case_insensitive = yes; 598 | self 599 | } 600 | 601 | /// Toggle whether a literal `/` is required to match a path separator. 602 | pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> { 603 | self.opts.literal_separator = yes; 604 | self 605 | } 606 | 607 | /// When enabled, a back slash (`\`) may be used to escape 608 | /// special characters in a glob pattern. Additionally, this will 609 | /// prevent `\` from being interpreted as a path separator on all 610 | /// platforms. 611 | /// 612 | /// This is enabled by default on platforms where `\` is not a 613 | /// path separator and disabled by default on platforms where `\` 614 | /// is a path separator. 615 | pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> { 616 | self.opts.backslash_escape = yes; 617 | self 618 | } 619 | } 620 | 621 | impl Tokens { 622 | /// Convert this pattern to a string that is guaranteed to be a valid 623 | /// regular expression and will represent the matching semantics of this 624 | /// glob pattern and the options given. 625 | fn to_regex_with(&self, options: &GlobOptions) -> String { 626 | let mut re = String::new(); 627 | re.push_str("(?-u)"); 628 | if options.case_insensitive { 629 | re.push_str("(?i)"); 630 | } 631 | re.push('^'); 632 | // Special case. If the entire glob is just `**`, then it should match 633 | // everything. 634 | if self.len() == 1 && self[0] == Token::RecursivePrefix { 635 | re.push_str(".*"); 636 | re.push('$'); 637 | return re; 638 | } 639 | self.tokens_to_regex(options, &self, &mut re); 640 | re.push('$'); 641 | re 642 | } 643 | 644 | fn tokens_to_regex( 645 | &self, 646 | options: &GlobOptions, 647 | tokens: &[Token], 648 | re: &mut String, 649 | ) { 650 | for tok in tokens { 651 | match *tok { 652 | Token::Literal(c) => { 653 | re.push_str(&char_to_escaped_literal(c)); 654 | } 655 | Token::Any => { 656 | if options.literal_separator { 657 | re.push_str("[^/]"); 658 | } else { 659 | re.push_str("."); 660 | } 661 | } 662 | Token::ZeroOrMore => { 663 | if options.literal_separator { 664 | re.push_str("[^/]*"); 665 | } else { 666 | re.push_str(".*"); 667 | } 668 | } 669 | Token::RecursivePrefix => { 670 | re.push_str("(?:/?|.*/)"); 671 | } 672 | Token::RecursiveSuffix => { 673 | re.push_str("(?:/?|/.*)"); 674 | } 675 | Token::RecursiveZeroOrMore => { 676 | re.push_str("(?:/|/.*/)"); 677 | } 678 | Token::Class { negated, ref ranges } => { 679 | re.push('['); 680 | if negated { 681 | re.push('^'); 682 | } 683 | for r in ranges { 684 | if r.0 == r.1 { 685 | // Not strictly necessary, but nicer to look at. 686 | re.push_str(&char_to_escaped_literal(r.0)); 687 | } else { 688 | re.push_str(&char_to_escaped_literal(r.0)); 689 | re.push('-'); 690 | re.push_str(&char_to_escaped_literal(r.1)); 691 | } 692 | } 693 | re.push(']'); 694 | } 695 | Token::Alternates(ref patterns) => { 696 | let mut parts = vec![]; 697 | for pat in patterns { 698 | let mut altre = String::new(); 699 | self.tokens_to_regex(options, &pat, &mut altre); 700 | if !altre.is_empty() { 701 | parts.push(altre); 702 | } 703 | } 704 | 705 | // It is possible to have an empty set in which case the 706 | // resulting alternation '()' would be an error. 707 | if !parts.is_empty() { 708 | re.push('('); 709 | re.push_str(&parts.join("|")); 710 | re.push(')'); 711 | } 712 | } 713 | } 714 | } 715 | } 716 | } 717 | 718 | /// Convert a Unicode scalar value to an escaped string suitable for use as 719 | /// a literal in a non-Unicode regex. 720 | fn char_to_escaped_literal(c: char) -> String { 721 | bytes_to_escaped_literal(&c.to_string().into_bytes()) 722 | } 723 | 724 | /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII 725 | /// code units are converted to their escaped form. 726 | fn bytes_to_escaped_literal(bs: &[u8]) -> String { 727 | let mut s = String::with_capacity(bs.len()); 728 | for &b in bs { 729 | if b <= 0x7F { 730 | s.push_str(®ex::escape(&(b as char).to_string())); 731 | } else { 732 | s.push_str(&format!("\\x{:02x}", b)); 733 | } 734 | } 735 | s 736 | } 737 | 738 | struct Parser<'a> { 739 | glob: &'a str, 740 | stack: Vec, 741 | chars: iter::Peekable>, 742 | prev: Option, 743 | cur: Option, 744 | opts: &'a GlobOptions, 745 | } 746 | 747 | impl<'a> Parser<'a> { 748 | fn error(&self, kind: ErrorKind) -> Error { 749 | Error { glob: Some(self.glob.to_string()), kind: kind } 750 | } 751 | 752 | fn parse(&mut self) -> Result<(), Error> { 753 | while let Some(c) = self.bump() { 754 | match c { 755 | '?' => self.push_token(Token::Any)?, 756 | '*' => self.parse_star()?, 757 | '[' => self.parse_class()?, 758 | '{' => self.push_alternate()?, 759 | '}' => self.pop_alternate()?, 760 | ',' => self.parse_comma()?, 761 | '\\' => self.parse_backslash()?, 762 | c => self.push_token(Token::Literal(c))?, 763 | } 764 | } 765 | Ok(()) 766 | } 767 | 768 | fn push_alternate(&mut self) -> Result<(), Error> { 769 | if self.stack.len() > 1 { 770 | return Err(self.error(ErrorKind::NestedAlternates)); 771 | } 772 | Ok(self.stack.push(Tokens::default())) 773 | } 774 | 775 | fn pop_alternate(&mut self) -> Result<(), Error> { 776 | let mut alts = vec![]; 777 | while self.stack.len() >= 2 { 778 | alts.push(self.stack.pop().unwrap()); 779 | } 780 | self.push_token(Token::Alternates(alts)) 781 | } 782 | 783 | fn push_token(&mut self, tok: Token) -> Result<(), Error> { 784 | if let Some(ref mut pat) = self.stack.last_mut() { 785 | return Ok(pat.push(tok)); 786 | } 787 | Err(self.error(ErrorKind::UnopenedAlternates)) 788 | } 789 | 790 | fn pop_token(&mut self) -> Result { 791 | if let Some(ref mut pat) = self.stack.last_mut() { 792 | return Ok(pat.pop().unwrap()); 793 | } 794 | Err(self.error(ErrorKind::UnopenedAlternates)) 795 | } 796 | 797 | fn have_tokens(&self) -> Result { 798 | match self.stack.last() { 799 | None => Err(self.error(ErrorKind::UnopenedAlternates)), 800 | Some(ref pat) => Ok(!pat.is_empty()), 801 | } 802 | } 803 | 804 | fn parse_comma(&mut self) -> Result<(), Error> { 805 | // If we aren't inside a group alternation, then don't 806 | // treat commas specially. Otherwise, we need to start 807 | // a new alternate. 808 | if self.stack.len() <= 1 { 809 | self.push_token(Token::Literal(',')) 810 | } else { 811 | Ok(self.stack.push(Tokens::default())) 812 | } 813 | } 814 | 815 | fn parse_backslash(&mut self) -> Result<(), Error> { 816 | if self.opts.backslash_escape { 817 | match self.bump() { 818 | None => Err(self.error(ErrorKind::DanglingEscape)), 819 | Some(c) => self.push_token(Token::Literal(c)), 820 | } 821 | } else if is_separator('\\') { 822 | // Normalize all patterns to use / as a separator. 823 | self.push_token(Token::Literal('/')) 824 | } else { 825 | self.push_token(Token::Literal('\\')) 826 | } 827 | } 828 | 829 | fn parse_star(&mut self) -> Result<(), Error> { 830 | let prev = self.prev; 831 | if self.peek() != Some('*') { 832 | self.push_token(Token::ZeroOrMore)?; 833 | return Ok(()); 834 | } 835 | assert!(self.bump() == Some('*')); 836 | if !self.have_tokens()? { 837 | if !self.peek().map_or(true, is_separator) { 838 | self.push_token(Token::ZeroOrMore)?; 839 | self.push_token(Token::ZeroOrMore)?; 840 | } else { 841 | self.push_token(Token::RecursivePrefix)?; 842 | assert!(self.bump().map_or(true, is_separator)); 843 | } 844 | return Ok(()); 845 | } 846 | 847 | if !prev.map(is_separator).unwrap_or(false) { 848 | if self.stack.len() <= 1 849 | || (prev != Some(',') && prev != Some('{')) 850 | { 851 | self.push_token(Token::ZeroOrMore)?; 852 | self.push_token(Token::ZeroOrMore)?; 853 | return Ok(()); 854 | } 855 | } 856 | let is_suffix = match self.peek() { 857 | None => { 858 | assert!(self.bump().is_none()); 859 | true 860 | } 861 | Some(',') | Some('}') if self.stack.len() >= 2 => true, 862 | Some(c) if is_separator(c) => { 863 | assert!(self.bump().map(is_separator).unwrap_or(false)); 864 | false 865 | } 866 | _ => { 867 | self.push_token(Token::ZeroOrMore)?; 868 | self.push_token(Token::ZeroOrMore)?; 869 | return Ok(()); 870 | } 871 | }; 872 | match self.pop_token()? { 873 | Token::RecursivePrefix => { 874 | self.push_token(Token::RecursivePrefix)?; 875 | } 876 | Token::RecursiveSuffix => { 877 | self.push_token(Token::RecursiveSuffix)?; 878 | } 879 | _ => { 880 | if is_suffix { 881 | self.push_token(Token::RecursiveSuffix)?; 882 | } else { 883 | self.push_token(Token::RecursiveZeroOrMore)?; 884 | } 885 | } 886 | } 887 | Ok(()) 888 | } 889 | 890 | fn parse_class(&mut self) -> Result<(), Error> { 891 | fn add_to_last_range( 892 | glob: &str, 893 | r: &mut (char, char), 894 | add: char, 895 | ) -> Result<(), Error> { 896 | r.1 = add; 897 | if r.1 < r.0 { 898 | Err(Error { 899 | glob: Some(glob.to_string()), 900 | kind: ErrorKind::InvalidRange(r.0, r.1), 901 | }) 902 | } else { 903 | Ok(()) 904 | } 905 | } 906 | let mut ranges = vec![]; 907 | let negated = match self.chars.peek() { 908 | Some(&'!') | Some(&'^') => { 909 | let bump = self.bump(); 910 | assert!(bump == Some('!') || bump == Some('^')); 911 | true 912 | } 913 | _ => false, 914 | }; 915 | let mut first = true; 916 | let mut in_range = false; 917 | loop { 918 | let c = match self.bump() { 919 | Some(c) => c, 920 | // The only way to successfully break this loop is to observe 921 | // a ']'. 922 | None => return Err(self.error(ErrorKind::UnclosedClass)), 923 | }; 924 | match c { 925 | ']' => { 926 | if first { 927 | ranges.push((']', ']')); 928 | } else { 929 | break; 930 | } 931 | } 932 | '-' => { 933 | if first { 934 | ranges.push(('-', '-')); 935 | } else if in_range { 936 | // invariant: in_range is only set when there is 937 | // already at least one character seen. 938 | let r = ranges.last_mut().unwrap(); 939 | add_to_last_range(&self.glob, r, '-')?; 940 | in_range = false; 941 | } else { 942 | assert!(!ranges.is_empty()); 943 | in_range = true; 944 | } 945 | } 946 | c => { 947 | if in_range { 948 | // invariant: in_range is only set when there is 949 | // already at least one character seen. 950 | add_to_last_range( 951 | &self.glob, 952 | ranges.last_mut().unwrap(), 953 | c, 954 | )?; 955 | } else { 956 | ranges.push((c, c)); 957 | } 958 | in_range = false; 959 | } 960 | } 961 | first = false; 962 | } 963 | if in_range { 964 | // Means that the last character in the class was a '-', so add 965 | // it as a literal. 966 | ranges.push(('-', '-')); 967 | } 968 | self.push_token(Token::Class { negated: negated, ranges: ranges }) 969 | } 970 | 971 | fn bump(&mut self) -> Option { 972 | self.prev = self.cur; 973 | self.cur = self.chars.next(); 974 | self.cur 975 | } 976 | 977 | fn peek(&mut self) -> Option { 978 | self.chars.peek().map(|&ch| ch) 979 | } 980 | } 981 | 982 | #[cfg(test)] 983 | fn starts_with(needle: &[u8], haystack: &[u8]) -> bool { 984 | needle.len() <= haystack.len() && needle == &haystack[..needle.len()] 985 | } 986 | 987 | #[cfg(test)] 988 | fn ends_with(needle: &[u8], haystack: &[u8]) -> bool { 989 | if needle.len() > haystack.len() { 990 | return false; 991 | } 992 | needle == &haystack[haystack.len() - needle.len()..] 993 | } 994 | 995 | #[cfg(test)] 996 | mod tests { 997 | use super::Token::*; 998 | use super::{Glob, GlobBuilder, Token}; 999 | use {ErrorKind, GlobSetBuilder}; 1000 | 1001 | #[derive(Clone, Copy, Debug, Default)] 1002 | struct Options { 1003 | casei: Option, 1004 | litsep: Option, 1005 | bsesc: Option, 1006 | } 1007 | 1008 | macro_rules! syntax { 1009 | ($name:ident, $pat:expr, $tokens:expr) => { 1010 | #[test] 1011 | fn $name() { 1012 | let pat = Glob::new($pat).unwrap(); 1013 | assert_eq!($tokens, pat.tokens.0); 1014 | } 1015 | }; 1016 | } 1017 | 1018 | macro_rules! syntaxerr { 1019 | ($name:ident, $pat:expr, $err:expr) => { 1020 | #[test] 1021 | fn $name() { 1022 | let err = Glob::new($pat).unwrap_err(); 1023 | assert_eq!(&$err, err.kind()); 1024 | } 1025 | }; 1026 | } 1027 | 1028 | macro_rules! toregex { 1029 | ($name:ident, $pat:expr, $re:expr) => { 1030 | toregex!($name, $pat, $re, Options::default()); 1031 | }; 1032 | ($name:ident, $pat:expr, $re:expr, $options:expr) => { 1033 | #[test] 1034 | fn $name() { 1035 | let mut builder = GlobBuilder::new($pat); 1036 | if let Some(casei) = $options.casei { 1037 | builder.case_insensitive(casei); 1038 | } 1039 | if let Some(litsep) = $options.litsep { 1040 | builder.literal_separator(litsep); 1041 | } 1042 | if let Some(bsesc) = $options.bsesc { 1043 | builder.backslash_escape(bsesc); 1044 | } 1045 | let pat = builder.build().unwrap(); 1046 | assert_eq!(format!("(?-u){}", $re), pat.regex()); 1047 | } 1048 | }; 1049 | } 1050 | 1051 | macro_rules! matches { 1052 | ($name:ident, $pat:expr, $path:expr) => { 1053 | matches!($name, $pat, $path, Options::default()); 1054 | }; 1055 | ($name:ident, $pat:expr, $path:expr, $options:expr) => { 1056 | #[test] 1057 | fn $name() { 1058 | let mut builder = GlobBuilder::new($pat); 1059 | if let Some(casei) = $options.casei { 1060 | builder.case_insensitive(casei); 1061 | } 1062 | if let Some(litsep) = $options.litsep { 1063 | builder.literal_separator(litsep); 1064 | } 1065 | if let Some(bsesc) = $options.bsesc { 1066 | builder.backslash_escape(bsesc); 1067 | } 1068 | let pat = builder.build().unwrap(); 1069 | let matcher = pat.compile_matcher(); 1070 | let strategic = pat.compile_strategic_matcher(); 1071 | let set = GlobSetBuilder::new().add(pat).build().unwrap(); 1072 | assert!(matcher.is_match($path)); 1073 | assert!(strategic.is_match($path)); 1074 | assert!(set.is_match($path)); 1075 | } 1076 | }; 1077 | } 1078 | 1079 | macro_rules! nmatches { 1080 | ($name:ident, $pat:expr, $path:expr) => { 1081 | nmatches!($name, $pat, $path, Options::default()); 1082 | }; 1083 | ($name:ident, $pat:expr, $path:expr, $options:expr) => { 1084 | #[test] 1085 | fn $name() { 1086 | let mut builder = GlobBuilder::new($pat); 1087 | if let Some(casei) = $options.casei { 1088 | builder.case_insensitive(casei); 1089 | } 1090 | if let Some(litsep) = $options.litsep { 1091 | builder.literal_separator(litsep); 1092 | } 1093 | if let Some(bsesc) = $options.bsesc { 1094 | builder.backslash_escape(bsesc); 1095 | } 1096 | let pat = builder.build().unwrap(); 1097 | let matcher = pat.compile_matcher(); 1098 | let strategic = pat.compile_strategic_matcher(); 1099 | let set = GlobSetBuilder::new().add(pat).build().unwrap(); 1100 | assert!(!matcher.is_match($path)); 1101 | assert!(!strategic.is_match($path)); 1102 | assert!(!set.is_match($path)); 1103 | } 1104 | }; 1105 | } 1106 | 1107 | fn s(string: &str) -> String { 1108 | string.to_string() 1109 | } 1110 | 1111 | fn class(s: char, e: char) -> Token { 1112 | Class { negated: false, ranges: vec![(s, e)] } 1113 | } 1114 | 1115 | fn classn(s: char, e: char) -> Token { 1116 | Class { negated: true, ranges: vec![(s, e)] } 1117 | } 1118 | 1119 | fn rclass(ranges: &[(char, char)]) -> Token { 1120 | Class { negated: false, ranges: ranges.to_vec() } 1121 | } 1122 | 1123 | fn rclassn(ranges: &[(char, char)]) -> Token { 1124 | Class { negated: true, ranges: ranges.to_vec() } 1125 | } 1126 | 1127 | syntax!(literal1, "a", vec![Literal('a')]); 1128 | syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]); 1129 | syntax!(any1, "?", vec![Any]); 1130 | syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]); 1131 | syntax!(seq1, "*", vec![ZeroOrMore]); 1132 | syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]); 1133 | syntax!( 1134 | seq3, 1135 | "*a*b*", 1136 | vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,] 1137 | ); 1138 | syntax!(rseq1, "**", vec![RecursivePrefix]); 1139 | syntax!(rseq2, "**/", vec![RecursivePrefix]); 1140 | syntax!(rseq3, "/**", vec![RecursiveSuffix]); 1141 | syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]); 1142 | syntax!( 1143 | rseq5, 1144 | "a/**/b", 1145 | vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),] 1146 | ); 1147 | syntax!(cls1, "[a]", vec![class('a', 'a')]); 1148 | syntax!(cls2, "[!a]", vec![classn('a', 'a')]); 1149 | syntax!(cls3, "[a-z]", vec![class('a', 'z')]); 1150 | syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]); 1151 | syntax!(cls5, "[-]", vec![class('-', '-')]); 1152 | syntax!(cls6, "[]]", vec![class(']', ']')]); 1153 | syntax!(cls7, "[*]", vec![class('*', '*')]); 1154 | syntax!(cls8, "[!!]", vec![classn('!', '!')]); 1155 | syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]); 1156 | syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]); 1157 | syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]); 1158 | syntax!( 1159 | cls12, 1160 | "[-a-z-]", 1161 | vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),] 1162 | ); 1163 | syntax!(cls13, "[]-z]", vec![class(']', 'z')]); 1164 | syntax!(cls14, "[--z]", vec![class('-', 'z')]); 1165 | syntax!(cls15, "[ --]", vec![class(' ', '-')]); 1166 | syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]); 1167 | syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]); 1168 | syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]); 1169 | syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]); 1170 | syntax!(cls20, "[^a]", vec![classn('a', 'a')]); 1171 | syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]); 1172 | 1173 | syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass); 1174 | syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass); 1175 | syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass); 1176 | syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass); 1177 | syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a')); 1178 | syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-')); 1179 | 1180 | const CASEI: Options = 1181 | Options { casei: Some(true), litsep: None, bsesc: None }; 1182 | const SLASHLIT: Options = 1183 | Options { casei: None, litsep: Some(true), bsesc: None }; 1184 | const NOBSESC: Options = 1185 | Options { casei: None, litsep: None, bsesc: Some(false) }; 1186 | const BSESC: Options = 1187 | Options { casei: None, litsep: None, bsesc: Some(true) }; 1188 | 1189 | toregex!(re_casei, "a", "(?i)^a$", &CASEI); 1190 | 1191 | toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT); 1192 | toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT); 1193 | 1194 | toregex!(re1, "a", "^a$"); 1195 | toregex!(re2, "?", "^.$"); 1196 | toregex!(re3, "*", "^.*$"); 1197 | toregex!(re4, "a?", "^a.$"); 1198 | toregex!(re5, "?a", "^.a$"); 1199 | toregex!(re6, "a*", "^a.*$"); 1200 | toregex!(re7, "*a", "^.*a$"); 1201 | toregex!(re8, "[*]", r"^[\*]$"); 1202 | toregex!(re9, "[+]", r"^[\+]$"); 1203 | toregex!(re10, "+", r"^\+$"); 1204 | toregex!(re11, "☃", r"^\xe2\x98\x83$"); 1205 | toregex!(re12, "**", r"^.*$"); 1206 | toregex!(re13, "**/", r"^.*$"); 1207 | toregex!(re14, "**/*", r"^(?:/?|.*/).*$"); 1208 | toregex!(re15, "**/**", r"^.*$"); 1209 | toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$"); 1210 | toregex!(re17, "**/**/**", r"^.*$"); 1211 | toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$"); 1212 | toregex!(re19, "a/**", r"^a(?:/?|/.*)$"); 1213 | toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$"); 1214 | toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$"); 1215 | toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$"); 1216 | toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$"); 1217 | toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$"); 1218 | toregex!(re25, "**/b", r"^(?:/?|.*/)b$"); 1219 | toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$"); 1220 | toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$"); 1221 | toregex!(re28, "a**", r"^a.*.*$"); 1222 | toregex!(re29, "**a", r"^.*.*a$"); 1223 | toregex!(re30, "a**b", r"^a.*.*b$"); 1224 | toregex!(re31, "***", r"^.*.*.*$"); 1225 | toregex!(re32, "/a**", r"^/a.*.*$"); 1226 | toregex!(re33, "/**a", r"^/.*.*a$"); 1227 | toregex!(re34, "/a**b", r"^/a.*.*b$"); 1228 | 1229 | matches!(match1, "a", "a"); 1230 | matches!(match2, "a*b", "a_b"); 1231 | matches!(match3, "a*b*c", "abc"); 1232 | matches!(match4, "a*b*c", "a_b_c"); 1233 | matches!(match5, "a*b*c", "a___b___c"); 1234 | matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc"); 1235 | matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); 1236 | matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd"); 1237 | matches!(match9, "*.rs", ".rs"); 1238 | matches!(match10, "☃", "☃"); 1239 | 1240 | matches!(matchrec1, "some/**/needle.txt", "some/needle.txt"); 1241 | matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt"); 1242 | matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt"); 1243 | matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt"); 1244 | matches!(matchrec5, "**", "abcde"); 1245 | matches!(matchrec6, "**", ""); 1246 | matches!(matchrec7, "**", ".asdf"); 1247 | matches!(matchrec8, "**", "/x/.asdf"); 1248 | matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt"); 1249 | matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt"); 1250 | matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt"); 1251 | matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt"); 1252 | matches!(matchrec13, "**/test", "one/two/test"); 1253 | matches!(matchrec14, "**/test", "one/test"); 1254 | matches!(matchrec15, "**/test", "test"); 1255 | matches!(matchrec16, "/**/test", "/one/two/test"); 1256 | matches!(matchrec17, "/**/test", "/one/test"); 1257 | matches!(matchrec18, "/**/test", "/test"); 1258 | matches!(matchrec19, "**/.*", ".abc"); 1259 | matches!(matchrec20, "**/.*", "abc/.abc"); 1260 | matches!(matchrec21, ".*/**", ".abc"); 1261 | matches!(matchrec22, ".*/**", ".abc/abc"); 1262 | matches!(matchrec23, "foo/**", "foo"); 1263 | matches!(matchrec24, "**/foo/bar", "foo/bar"); 1264 | matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt"); 1265 | 1266 | matches!(matchrange1, "a[0-9]b", "a0b"); 1267 | matches!(matchrange2, "a[0-9]b", "a9b"); 1268 | matches!(matchrange3, "a[!0-9]b", "a_b"); 1269 | matches!(matchrange4, "[a-z123]", "1"); 1270 | matches!(matchrange5, "[1a-z23]", "1"); 1271 | matches!(matchrange6, "[123a-z]", "1"); 1272 | matches!(matchrange7, "[abc-]", "-"); 1273 | matches!(matchrange8, "[-abc]", "-"); 1274 | matches!(matchrange9, "[-a-c]", "b"); 1275 | matches!(matchrange10, "[a-c-]", "b"); 1276 | matches!(matchrange11, "[-]", "-"); 1277 | matches!(matchrange12, "a[^0-9]b", "a_b"); 1278 | 1279 | matches!(matchpat1, "*hello.txt", "hello.txt"); 1280 | matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt"); 1281 | matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt"); 1282 | matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt"); 1283 | matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt"); 1284 | matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt"); 1285 | matches!( 1286 | matchpat7, 1287 | "*some/path/to/hello.txt", 1288 | "a/bigger/some/path/to/hello.txt" 1289 | ); 1290 | 1291 | matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_"); 1292 | 1293 | matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI); 1294 | matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI); 1295 | matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI); 1296 | matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI); 1297 | 1298 | matches!(matchalt1, "a,b", "a,b"); 1299 | matches!(matchalt2, ",", ","); 1300 | matches!(matchalt3, "{a,b}", "a"); 1301 | matches!(matchalt4, "{a,b}", "b"); 1302 | matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar"); 1303 | matches!(matchalt6, "{**/src/**,foo}", "foo"); 1304 | matches!(matchalt7, "{[}],foo}", "}"); 1305 | matches!(matchalt8, "{foo}", "foo"); 1306 | matches!(matchalt9, "{}", ""); 1307 | matches!(matchalt10, "{,}", ""); 1308 | matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo"); 1309 | matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar"); 1310 | matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat"); 1311 | 1312 | matches!(matchslash1, "abc/def", "abc/def", SLASHLIT); 1313 | #[cfg(unix)] 1314 | nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT); 1315 | #[cfg(not(unix))] 1316 | nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT); 1317 | nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT); 1318 | matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs 1319 | #[cfg(unix)] 1320 | nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT); 1321 | #[cfg(not(unix))] 1322 | matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT); 1323 | 1324 | matches!(matchbackslash1, "\\[", "[", BSESC); 1325 | matches!(matchbackslash2, "\\?", "?", BSESC); 1326 | matches!(matchbackslash3, "\\*", "*", BSESC); 1327 | matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC); 1328 | matches!(matchbackslash5, "\\?", "\\a", NOBSESC); 1329 | matches!(matchbackslash6, "\\*", "\\\\", NOBSESC); 1330 | #[cfg(unix)] 1331 | matches!(matchbackslash7, "\\a", "a"); 1332 | #[cfg(not(unix))] 1333 | matches!(matchbackslash8, "\\a", "/a"); 1334 | 1335 | nmatches!(matchnot1, "a*b*c", "abcd"); 1336 | nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca"); 1337 | nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt"); 1338 | nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt"); 1339 | nmatches!(matchnot5, "/**/test", "test"); 1340 | nmatches!(matchnot6, "/**/test", "/one/notthis"); 1341 | nmatches!(matchnot7, "/**/test", "/notthis"); 1342 | nmatches!(matchnot8, "**/.*", "ab.c"); 1343 | nmatches!(matchnot9, "**/.*", "abc/ab.c"); 1344 | nmatches!(matchnot10, ".*/**", "a.bc"); 1345 | nmatches!(matchnot11, ".*/**", "abc/a.bc"); 1346 | nmatches!(matchnot12, "a[0-9]b", "a_b"); 1347 | nmatches!(matchnot13, "a[!0-9]b", "a0b"); 1348 | nmatches!(matchnot14, "a[!0-9]b", "a9b"); 1349 | nmatches!(matchnot15, "[!-]", "-"); 1350 | nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some"); 1351 | nmatches!(matchnot17, "*hello.txt", "goodbye.txt"); 1352 | nmatches!( 1353 | matchnot18, 1354 | "*some/path/to/hello.txt", 1355 | "some/path/to/hello.txt-and-then-some" 1356 | ); 1357 | nmatches!( 1358 | matchnot19, 1359 | "*some/path/to/hello.txt", 1360 | "some/other/path/to/hello.txt" 1361 | ); 1362 | nmatches!(matchnot20, "a", "foo/a"); 1363 | nmatches!(matchnot21, "./foo", "foo"); 1364 | nmatches!(matchnot22, "**/foo", "foofoo"); 1365 | nmatches!(matchnot23, "**/foo/bar", "foofoo/bar"); 1366 | nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c"); 1367 | nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT); 1368 | nmatches!( 1369 | matchnot26, 1370 | "**/m4/ltoptions.m4", 1371 | "csharp/src/packages/repositories.config", 1372 | SLASHLIT 1373 | ); 1374 | nmatches!(matchnot27, "a[^0-9]b", "a0b"); 1375 | nmatches!(matchnot28, "a[^0-9]b", "a9b"); 1376 | nmatches!(matchnot29, "[^-]", "-"); 1377 | nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt"); 1378 | nmatches!( 1379 | matchrec31, 1380 | "some/*/needle.txt", 1381 | "some/one/two/needle.txt", 1382 | SLASHLIT 1383 | ); 1384 | nmatches!( 1385 | matchrec32, 1386 | "some/*/needle.txt", 1387 | "some/one/two/three/needle.txt", 1388 | SLASHLIT 1389 | ); 1390 | 1391 | macro_rules! extract { 1392 | ($which:ident, $name:ident, $pat:expr, $expect:expr) => { 1393 | extract!($which, $name, $pat, $expect, Options::default()); 1394 | }; 1395 | ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => { 1396 | #[test] 1397 | fn $name() { 1398 | let mut builder = GlobBuilder::new($pat); 1399 | if let Some(casei) = $options.casei { 1400 | builder.case_insensitive(casei); 1401 | } 1402 | if let Some(litsep) = $options.litsep { 1403 | builder.literal_separator(litsep); 1404 | } 1405 | if let Some(bsesc) = $options.bsesc { 1406 | builder.backslash_escape(bsesc); 1407 | } 1408 | let pat = builder.build().unwrap(); 1409 | assert_eq!($expect, pat.$which()); 1410 | } 1411 | }; 1412 | } 1413 | 1414 | macro_rules! literal { 1415 | ($($tt:tt)*) => { extract!(literal, $($tt)*); } 1416 | } 1417 | 1418 | macro_rules! basetokens { 1419 | ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); } 1420 | } 1421 | 1422 | macro_rules! ext { 1423 | ($($tt:tt)*) => { extract!(ext, $($tt)*); } 1424 | } 1425 | 1426 | macro_rules! required_ext { 1427 | ($($tt:tt)*) => { extract!(required_ext, $($tt)*); } 1428 | } 1429 | 1430 | macro_rules! prefix { 1431 | ($($tt:tt)*) => { extract!(prefix, $($tt)*); } 1432 | } 1433 | 1434 | macro_rules! suffix { 1435 | ($($tt:tt)*) => { extract!(suffix, $($tt)*); } 1436 | } 1437 | 1438 | macro_rules! baseliteral { 1439 | ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); } 1440 | } 1441 | 1442 | literal!(extract_lit1, "foo", Some(s("foo"))); 1443 | literal!(extract_lit2, "foo", None, CASEI); 1444 | literal!(extract_lit3, "/foo", Some(s("/foo"))); 1445 | literal!(extract_lit4, "/foo/", Some(s("/foo/"))); 1446 | literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar"))); 1447 | literal!(extract_lit6, "*.foo", None); 1448 | literal!(extract_lit7, "foo/bar", Some(s("foo/bar"))); 1449 | literal!(extract_lit8, "**/foo/bar", None); 1450 | 1451 | basetokens!( 1452 | extract_basetoks1, 1453 | "**/foo", 1454 | Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]) 1455 | ); 1456 | basetokens!(extract_basetoks2, "**/foo", None, CASEI); 1457 | basetokens!( 1458 | extract_basetoks3, 1459 | "**/foo", 1460 | Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]), 1461 | SLASHLIT 1462 | ); 1463 | basetokens!(extract_basetoks4, "*foo", None, SLASHLIT); 1464 | basetokens!(extract_basetoks5, "*foo", None); 1465 | basetokens!(extract_basetoks6, "**/fo*o", None); 1466 | basetokens!( 1467 | extract_basetoks7, 1468 | "**/fo*o", 1469 | Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]), 1470 | SLASHLIT 1471 | ); 1472 | 1473 | ext!(extract_ext1, "**/*.rs", Some(s(".rs"))); 1474 | ext!(extract_ext2, "**/*.rs.bak", None); 1475 | ext!(extract_ext3, "*.rs", Some(s(".rs"))); 1476 | ext!(extract_ext4, "a*.rs", None); 1477 | ext!(extract_ext5, "/*.c", None); 1478 | ext!(extract_ext6, "*.c", None, SLASHLIT); 1479 | ext!(extract_ext7, "*.c", Some(s(".c"))); 1480 | 1481 | required_ext!(extract_req_ext1, "*.rs", Some(s(".rs"))); 1482 | required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs"))); 1483 | required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs"))); 1484 | required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs"))); 1485 | required_ext!(extract_req_ext5, ".rs", Some(s(".rs"))); 1486 | required_ext!(extract_req_ext6, "./rs", None); 1487 | required_ext!(extract_req_ext7, "foo", None); 1488 | required_ext!(extract_req_ext8, ".foo/", None); 1489 | required_ext!(extract_req_ext9, "foo/", None); 1490 | 1491 | prefix!(extract_prefix1, "/foo", Some(s("/foo"))); 1492 | prefix!(extract_prefix2, "/foo/*", Some(s("/foo/"))); 1493 | prefix!(extract_prefix3, "**/foo", None); 1494 | prefix!(extract_prefix4, "foo/**", None); 1495 | 1496 | suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true))); 1497 | suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false))); 1498 | suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT); 1499 | suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false))); 1500 | suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false))); 1501 | suffix!(extract_suffix6, "*.foo", None, SLASHLIT); 1502 | suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false))); 1503 | 1504 | baseliteral!(extract_baselit1, "**/foo", Some(s("foo"))); 1505 | baseliteral!(extract_baselit2, "foo", None); 1506 | baseliteral!(extract_baselit3, "*foo", None); 1507 | baseliteral!(extract_baselit4, "*/foo", None); 1508 | } 1509 | --------------------------------------------------------------------------------