├── .rustfmt.toml ├── .gitignore ├── format-all.sh ├── README.md ├── Cargo.toml ├── publish-all.sh ├── src ├── error.rs ├── variable.rs ├── explain.rs ├── lib.rs ├── checker.rs └── pattern.rs ├── .travis.yml ├── test-all.sh ├── tests └── basic.rs └── LICENSE /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bk 2 | *.swp 3 | *.swo 4 | *.swx 5 | tags 6 | target 7 | Cargo.lock 8 | .*.rustfmt 9 | rusty-tags.* 10 | -------------------------------------------------------------------------------- /format-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | # Format all sources using rustfmt. 5 | 6 | topdir=$(dirname "$0") 7 | cd "$topdir" 8 | 9 | # Make sure we can find rustfmt. 10 | export PATH="$PATH:$HOME/.cargo/bin" 11 | 12 | exec cargo +stable fmt --all -- "$@" 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a library for writing tests for utilities that read text files and 2 | produce text output. 3 | 4 | [![Build Status](https://travis-ci.org/CraneStation/filecheck.svg?branch=main)](https://travis-ci.org/CraneStation/filecheck) 5 | 6 | It is inspired by and similar to 7 | [LLVM Filecheck](https://llvm.org/docs/CommandGuide/FileCheck.html), but 8 | it is not directly compatible. 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["The Cranelift Project Developers"] 3 | name = "filecheck" 4 | version = "0.5.0" 5 | description = "Library for writing tests for utilities that read text files and produce text output" 6 | license = "Apache-2.0 WITH LLVM-exception" 7 | repository = "https://github.com/CraneStation/filecheck" 8 | documentation = "https://docs.rs/filecheck" 9 | readme = "README.md" 10 | keywords = ["test"] 11 | edition = "2018" 12 | 13 | [lib] 14 | name = "filecheck" 15 | 16 | [dependencies] 17 | regex = "1" 18 | thiserror = "1" 19 | 20 | [badges] 21 | maintenance = { status = "passively-maintained" } 22 | travis-ci = { repository = "Cranelift/filecheck" } 23 | -------------------------------------------------------------------------------- /publish-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | topdir=$(dirname "$0") 5 | cd "$topdir" 6 | 7 | version="0.4.0" 8 | 9 | # Update all of the Cargo.toml files. 10 | # 11 | echo "Updating crate versions to $version" 12 | # Update the version number of this crate to $version. 13 | sed -i.bk -e "s/^version = .*/version = \"$version\"/" Cargo.toml 14 | 15 | # Update our local Cargo.lock (not checked in). 16 | cargo update 17 | ./test-all.sh 18 | 19 | # Commands needed to publish. 20 | # 21 | # Note that libraries need to be published in topological order. 22 | 23 | echo git commit -a -m "\"Bump version to $version"\" 24 | echo git push 25 | echo cargo publish --manifest-path Cargo.toml 26 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::result; 2 | use thiserror::Error; 3 | 4 | /// A result from the filecheck library. 5 | pub type Result = result::Result; 6 | 7 | /// A filecheck error. 8 | #[derive(Error, Debug)] 9 | pub enum Error { 10 | /// A syntax error in a check line. 11 | #[error("{0}")] 12 | Syntax(String), 13 | /// A check refers to an undefined variable. 14 | /// 15 | /// The pattern contains `$foo` where the `foo` variable has not yet been defined. 16 | /// Use `$$` to match a literal dollar sign. 17 | #[error("{0}")] 18 | UndefVariable(String), 19 | /// A pattern contains a back-reference to a variable that was defined in the same pattern. 20 | /// 21 | /// For example, `check: Hello $(world=.*) $world`. Backreferences are not supported. Often the 22 | /// desired effect can be achieved with the `sameln` check: 23 | /// 24 | /// ```text 25 | /// check: Hello $(world=[^ ]*) 26 | /// sameln: $world 27 | /// ``` 28 | #[error("{0}")] 29 | Backref(String), 30 | /// A pattern contains multiple definitions of the same variable. 31 | #[error("{0}")] 32 | DuplicateDef(String), 33 | /// An error in a regular expression. 34 | /// 35 | /// Use `cause()` to get the underlying `Regex` library error. 36 | #[error("{0}")] 37 | Regex(#[from] regex::Error), 38 | } 39 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Travis CI script. See https://travis-ci.org/ for more info. 2 | 3 | language: rust 4 | rust: 5 | - stable 6 | - beta 7 | - nightly 8 | matrix: 9 | allow_failures: 10 | # We try to be compatible with beta and nightly, but they occasionally 11 | # fail, so we don't allow them to hold up people using stable. 12 | - rust: beta 13 | - rust: nightly 14 | # Similarly, we don't need to hold up people using stable while we wait 15 | # for the results which may fail. 16 | fast_finish: true 17 | dist: trusty 18 | sudo: false 19 | before_script: 20 | # If an old version of rustfmt from cargo is already installed, uninstall 21 | # it, since it can prevent the installation of the new version from rustup. 22 | - cargo uninstall rustfmt || true 23 | - cargo install --list 24 | # If we're testing beta or nightly, we still need to install the stable 25 | # toolchain so that we can run the stable version of rustfmt. 26 | - rustup toolchain install stable 27 | # Install the stable version of rustfmt. 28 | - rustup component add --toolchain=stable rustfmt-preview 29 | - rustup component list --toolchain=stable 30 | - rustup show 31 | - rustfmt +stable --version || echo fail 32 | # Sometimes the component isn't actually ready after being installed, and 33 | # rustup update makes it ready. 34 | - rustup update 35 | - rustfmt +stable --version 36 | script: ./test-all.sh 37 | cache: 38 | cargo: true 39 | -------------------------------------------------------------------------------- /test-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | # This is the top-level test script: 5 | # 6 | # - Check code formatting. 7 | # - Make a debug build. 8 | # - Make a release build. 9 | # - Run unit tests for all Rust crates 10 | # - Build API documentation. 11 | # 12 | # All tests run by this script should be passing at all times. 13 | 14 | # Repository top-level directory. 15 | topdir=$(dirname "$0") 16 | cd "$topdir" 17 | 18 | function banner { 19 | echo "====== $* ======" 20 | } 21 | 22 | # Run rustfmt if we have it. 23 | banner "Rust formatting" 24 | if type rustfmt > /dev/null; then 25 | if ! "$topdir/format-all.sh" --check ; then 26 | echo "Formatting diffs detected! Run \"cargo fmt --all\" to correct." 27 | exit 1 28 | fi 29 | else 30 | echo "rustfmt not available; formatting not checked!" 31 | echo 32 | echo "If you are using rustup, rustfmt can be installed via" 33 | echo "\"rustup component add --toolchain=stable rustfmt-preview\", or see" 34 | echo "https://github.com/rust-lang-nursery/rustfmt for more information." 35 | fi 36 | 37 | # Make sure the code builds in release mode. 38 | banner "Rust release build" 39 | cargo build --release 40 | 41 | # Make sure the code builds in debug mode. 42 | banner "Rust debug build" 43 | cargo build 44 | 45 | # Run the tests. We run these in debug mode so that assertions are enabled. 46 | banner "Rust unit tests" 47 | cargo test --all 48 | 49 | # Make sure the documentation builds. 50 | banner "Rust documentation: $topdir/target/doc/filecheck/index.html" 51 | cargo doc 52 | 53 | banner "OK" 54 | -------------------------------------------------------------------------------- /src/variable.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | /// A variable name is one or more ASCII alphanumerical characters, including underscore. 4 | /// Note that numerical variable names like `$45` are allowed too. 5 | /// 6 | /// Try to parse a variable name from the beginning of `s`. 7 | /// Return the index of the character following the varname. 8 | /// This returns 0 if `s` doesn't have a prefix that is a variable name. 9 | pub fn varname_prefix(s: &str) -> usize { 10 | for (idx, ch) in s.char_indices() { 11 | match ch { 12 | 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {} 13 | _ => return idx, 14 | } 15 | } 16 | s.len() 17 | } 18 | 19 | /// A variable can contain either a regular expression or plain text. 20 | #[derive(Debug, Clone, PartialEq, Eq)] 21 | pub enum Value<'a> { 22 | /// Verbatim text. 23 | Text(Cow<'a, str>), 24 | /// Regular expression. 25 | Regex(Cow<'a, str>), 26 | } 27 | 28 | /// Resolve variables by name. 29 | pub trait VariableMap { 30 | /// Get the value of the variable `varname`, or return `None` for an unknown variable name. 31 | fn lookup(&self, varname: &str) -> Option; 32 | } 33 | 34 | impl VariableMap for () { 35 | fn lookup(&self, _: &str) -> Option { 36 | None 37 | } 38 | } 39 | 40 | /// An empty variable map. 41 | pub const NO_VARIABLES: &'static dyn VariableMap = &(); 42 | 43 | #[cfg(test)] 44 | mod tests { 45 | #[test] 46 | fn varname() { 47 | use super::varname_prefix; 48 | 49 | assert_eq!(varname_prefix(""), 0); 50 | assert_eq!(varname_prefix("\0"), 0); 51 | assert_eq!(varname_prefix("_"), 1); 52 | assert_eq!(varname_prefix("0"), 1); 53 | assert_eq!(varname_prefix("01"), 2); 54 | assert_eq!(varname_prefix("b"), 1); 55 | assert_eq!(varname_prefix("C"), 1); 56 | assert_eq!(varname_prefix("."), 0); 57 | assert_eq!(varname_prefix(".s"), 0); 58 | assert_eq!(varname_prefix("0."), 1); 59 | assert_eq!(varname_prefix("01="), 2); 60 | assert_eq!(varname_prefix("0a)"), 2); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/explain.rs: -------------------------------------------------------------------------------- 1 | //! Explaining how *filecheck* matched or failed to match a file. 2 | 3 | use crate::MatchRange; 4 | use std::cmp::min; 5 | use std::fmt::{self, Display, Formatter}; 6 | 7 | /// Record events during matching. 8 | pub trait Recorder { 9 | /// Set the directive we're talking about now. 10 | fn directive(&mut self, dct: usize); 11 | 12 | /// Matched a positive check directive (check/sameln/nextln/unordered). 13 | fn matched_check(&mut self, regex: &str, matched: MatchRange); 14 | 15 | /// Matched a `not:` directive. This means the match will fail. 16 | fn matched_not(&mut self, regex: &str, matched: MatchRange); 17 | 18 | /// Missed a positive check directive. The range given is the range searched for a match. 19 | fn missed_check(&mut self, regex: &str, searched: MatchRange); 20 | 21 | /// Missed `not:` directive (as intended). 22 | fn missed_not(&mut self, regex: &str, searched: MatchRange); 23 | 24 | /// The directive defined a variable. 25 | fn defined_var(&mut self, varname: &str, value: &str); 26 | } 27 | 28 | /// The null recorder just doesn't listen to anything you say. 29 | impl Recorder for () { 30 | fn directive(&mut self, _: usize) {} 31 | fn matched_check(&mut self, _: &str, _: MatchRange) {} 32 | fn matched_not(&mut self, _: &str, _: MatchRange) {} 33 | fn defined_var(&mut self, _: &str, _: &str) {} 34 | fn missed_check(&mut self, _: &str, _: MatchRange) {} 35 | fn missed_not(&mut self, _: &str, _: MatchRange) {} 36 | } 37 | 38 | struct Match { 39 | directive: usize, 40 | is_match: bool, 41 | is_not: bool, 42 | regex: String, 43 | range: MatchRange, 44 | } 45 | 46 | struct VarDef { 47 | directive: usize, 48 | varname: String, 49 | value: String, 50 | } 51 | 52 | /// Record an explanation for the matching process, success or failure. 53 | pub struct Explainer<'a> { 54 | text: &'a str, 55 | directive: usize, 56 | matches: Vec, 57 | vardefs: Vec, 58 | } 59 | 60 | impl<'a> Explainer<'a> { 61 | pub fn new(text: &'a str) -> Explainer { 62 | Explainer { 63 | text, 64 | directive: 0, 65 | matches: Vec::new(), 66 | vardefs: Vec::new(), 67 | } 68 | } 69 | 70 | /// Finish up after recording all events in a match. 71 | pub fn finish(&mut self) { 72 | self.matches.sort_by_key(|m| (m.range, m.directive)); 73 | self.vardefs.sort_by_key(|v| v.directive); 74 | } 75 | } 76 | 77 | impl<'a> Display for Explainer<'a> { 78 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 79 | // Offset of beginning of the last line printed. 80 | let mut curln = 0; 81 | // Offset of beginning of the next line to be printed. 82 | let mut nextln = 0; 83 | 84 | for m in &self.matches { 85 | // Emit lines until m.range.0 is visible. 86 | while nextln <= m.range.0 && nextln < self.text.len() { 87 | let newln = self.text[nextln..] 88 | .find('\n') 89 | .map(|d| nextln + d + 1) 90 | .unwrap_or(self.text.len()); 91 | assert!(newln > nextln); 92 | writeln!(f, "> {}", &self.text[nextln..newln - 1])?; 93 | curln = nextln; 94 | nextln = newln; 95 | } 96 | 97 | // Emit ~~~ under the part of the match in curln. 98 | if m.is_match { 99 | write!(f, " ")?; 100 | let mend = min(m.range.1, nextln - 1); 101 | for pos in curln..mend { 102 | if pos < m.range.0 { 103 | write!(f, " ") 104 | } else if pos == m.range.0 { 105 | write!(f, "^") 106 | } else { 107 | write!(f, "~") 108 | }?; 109 | } 110 | writeln!(f)?; 111 | } 112 | 113 | // Emit the match message itself. 114 | writeln!( 115 | f, 116 | "{} #{}{}: {}", 117 | if m.is_match { "Matched" } else { "Missed" }, 118 | m.directive, 119 | if m.is_not { " not" } else { "" }, 120 | m.regex 121 | )?; 122 | 123 | // Emit any variable definitions. 124 | if let Ok(found) = self 125 | .vardefs 126 | .binary_search_by_key(&m.directive, |v| v.directive) 127 | { 128 | let mut first = found; 129 | while first > 0 && self.vardefs[first - 1].directive == m.directive { 130 | first -= 1; 131 | } 132 | for d in &self.vardefs[first..] { 133 | if d.directive != m.directive { 134 | break; 135 | } 136 | writeln!(f, "Define {}={}", d.varname, d.value)?; 137 | } 138 | } 139 | } 140 | 141 | // Emit trailing lines. 142 | for line in self.text[nextln..].lines() { 143 | writeln!(f, "> {}", line)?; 144 | } 145 | Ok(()) 146 | } 147 | } 148 | 149 | impl<'a> Recorder for Explainer<'a> { 150 | fn directive(&mut self, dct: usize) { 151 | self.directive = dct; 152 | } 153 | 154 | fn matched_check(&mut self, regex: &str, matched: MatchRange) { 155 | self.matches.push(Match { 156 | directive: self.directive, 157 | is_match: true, 158 | is_not: false, 159 | regex: regex.to_owned(), 160 | range: matched, 161 | }); 162 | } 163 | 164 | fn matched_not(&mut self, regex: &str, matched: MatchRange) { 165 | self.matches.push(Match { 166 | directive: self.directive, 167 | is_match: true, 168 | is_not: true, 169 | regex: regex.to_owned(), 170 | range: matched, 171 | }); 172 | } 173 | 174 | fn missed_check(&mut self, regex: &str, searched: MatchRange) { 175 | self.matches.push(Match { 176 | directive: self.directive, 177 | is_match: false, 178 | is_not: false, 179 | regex: regex.to_owned(), 180 | range: searched, 181 | }); 182 | } 183 | 184 | fn missed_not(&mut self, regex: &str, searched: MatchRange) { 185 | self.matches.push(Match { 186 | directive: self.directive, 187 | is_match: false, 188 | is_not: true, 189 | regex: regex.to_owned(), 190 | range: searched, 191 | }); 192 | } 193 | 194 | fn defined_var(&mut self, varname: &str, value: &str) { 195 | self.vardefs.push(VarDef { 196 | directive: self.directive, 197 | varname: varname.to_owned(), 198 | value: value.to_owned(), 199 | }); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides a text pattern matching library with functionality similar to the LLVM 2 | //! project's [FileCheck command](https://llvm.org/docs/CommandGuide/FileCheck.html). 3 | //! 4 | //! A list of directives is typically extracted from a file containing a test case. The test case 5 | //! is then run through the program under test, and its output matched against the directives. 6 | //! 7 | //! See the [`CheckerBuilder`](struct.CheckerBuilder.html) and [`Checker`](struct.Checker.html) 8 | //! types for the main library API. 9 | //! 10 | //! # Directives 11 | //! 12 | //! These are the directives recognized by *filecheck*: 13 | //! 14 | //!
 15 | //! check: <pattern>
 16 | //! sameln: <pattern>
 17 | //! nextln: <pattern>
 18 | //! unordered: <pattern>
 19 | //! not: <pattern>
 20 | //! regex: <variable>=<regex>
 21 | //! 
22 | //! 23 | //! Each directive is described in more detail below. 24 | //! 25 | //! ## Example 26 | //! 27 | //! The Rust program below prints the primes less than 100. It has *filecheck* directives embedded 28 | //! in comments: 29 | //! 30 | //! ```rust 31 | //! fn is_prime(x: u32) -> bool { 32 | //! (2..x).all(|d| x % d != 0) 33 | //! } 34 | //! 35 | //! // Check that we get the primes and nothing else: 36 | //! // regex: NUM=\d+ 37 | //! // not: $NUM 38 | //! // check: 2 39 | //! // nextln: 3 40 | //! // check: 89 41 | //! // nextln: 97 42 | //! // not: $NUM 43 | //! fn main() { 44 | //! for p in (2..10).filter(|&x| is_prime(x)) { 45 | //! println!("{}", p); 46 | //! } 47 | //! } 48 | //! ``` 49 | //! 50 | //! A test driver compiles and runs the program, then pipes the output through *filecheck*: 51 | //! 52 | //! ```sh 53 | //! $ rustc primes.rs 54 | //! $ ./primes | clif-util filecheck -v 55 | //! #0 regex: NUM=\d+ 56 | //! #1 not: $NUM 57 | //! #2 check: 2 58 | //! #3 nextln: 3 59 | //! #4 check: 89 60 | //! #5 nextln: 97 61 | //! #6 not: $NUM 62 | //! no match #1: \d+ 63 | //! > 2 64 | //! ~ 65 | //! match #2: \b2\b 66 | //! > 3 67 | //! ~ 68 | //! match #3: \b3\b 69 | //! > 5 70 | //! > 7 71 | //! ... 72 | //! > 79 73 | //! > 83 74 | //! > 89 75 | //! ~~ 76 | //! match #4: \b89\b 77 | //! > 97 78 | //! ~~ 79 | //! match #5: \b97\b 80 | //! no match #6: \d+ 81 | //! OK 82 | //! ``` 83 | //! 84 | //! ## The `check:` directive 85 | //! 86 | //! Match patterns non-overlapping and in order: 87 | //! 88 | //! ```sh 89 | //! #0 check: one 90 | //! #1 check: two 91 | //! ``` 92 | //! 93 | //! These directives will match the string `"one two"`, but not `"two one"`. The second directive 94 | //! must match after the first one, and it can't overlap. 95 | //! 96 | //! ## The `sameln:` directive 97 | //! 98 | //! Match a pattern in the same line as the previous match. 99 | //! 100 | //! ```sh 101 | //! #0 check: one 102 | //! #1 sameln: two 103 | //! ``` 104 | //! 105 | //! These directives will match the string `"one two"`, but not `"one\ntwo"`. The second match must 106 | //! be in the same line as the first. Like the `check:` directive, the match must also follow the 107 | //! first match, so `"two one" would not be matched. 108 | //! 109 | //! If there is no previous match, `sameln:` matches on the first line of the input. 110 | //! 111 | //! ## The `nextln:` directive 112 | //! 113 | //! Match a pattern in the next line after the previous match. 114 | //! 115 | //! ```sh 116 | //! #0 check: one 117 | //! #1 nextln: two 118 | //! ``` 119 | //! 120 | //! These directives will match the string `"one\ntwo"`, but not `"one two"` or `"one\n\ntwo"`. 121 | //! 122 | //! If there is no previous match, `nextln:` matches on the second line of the input as if there 123 | //! were a previous match on the first line. 124 | //! 125 | //! ## The `unordered:` directive 126 | //! 127 | //! Match patterns in any order, and possibly overlapping each other. 128 | //! 129 | //! ```sh 130 | //! #0 unordered: one 131 | //! #1 unordered: two 132 | //! ``` 133 | //! 134 | //! These directives will match the string `"one two"` *and* the string `"two one"`. 135 | //! 136 | //! When a normal ordered match is inserted into a sequence of `unordered:` directives, it acts as 137 | //! a barrier: 138 | //! 139 | //! ```sh 140 | //! #0 unordered: one 141 | //! #1 unordered: two 142 | //! #2 check: three 143 | //! #3 unordered: four 144 | //! #4 unordered: five 145 | //! ``` 146 | //! 147 | //! These directives will match `"two one three four five"`, but not `"two three one four five"`. 148 | //! The `unordered:` matches are not allowed to cross the ordered `check:` directive. 149 | //! 150 | //! When `unordered:` matches define and use variables, a topological order is enforced. This means 151 | //! that a match referencing a variable must follow the match where the variable was defined: 152 | //! 153 | //! ```sh 154 | //! #0 regex: V=\bv\d+\b 155 | //! #1 unordered: $(va=$V) = load 156 | //! #2 unordered: $(vb=$V) = iadd $va 157 | //! #3 unordered: $(vc=$V) = load 158 | //! #4 unordered: iadd $va, $vc 159 | //! ``` 160 | //! 161 | //! In the above directives, #2 must match after #1, and #4 must match after both #1 and #3, but 162 | //! otherwise they can match in any order. 163 | //! 164 | //! ## The `not:` directive 165 | //! 166 | //! Check that a pattern *does not* appear between matches. 167 | //! 168 | //! ```sh 169 | //! #0 check: one 170 | //! #1 not: two 171 | //! #2 check: three 172 | //! ``` 173 | //! 174 | //! The directives above will match `"one five three"`, but not `"one two three"`. 175 | //! 176 | //! The pattern in a `not:` directive can't define any variables. Since it never matches anything, 177 | //! the variables would not get a value. 178 | //! 179 | //! ## The `regex:` directive 180 | //! 181 | //! Define a shorthand name for a regular expression. 182 | //! 183 | //! ```sh 184 | //! #0 regex: ID=\b[_a-zA-Z][_0-9a-zA-Z]*\b 185 | //! #1 check: $ID + $ID 186 | //! ``` 187 | //! 188 | //! The `regex:` directive gives a name to a regular expression which can then be used as part of a 189 | //! pattern to match. Patterns are otherwise just plain text strings to match, so this is not 190 | //! simple macro expansion. 191 | //! 192 | //! See [the Rust regex crate](../regex/index.html#syntax) for the regular expression syntax. 193 | //! 194 | //! # Patterns and variables 195 | //! 196 | //! Patterns are plain text strings to be matched in the input file. The dollar sign is used as an 197 | //! escape character to expand variables. The following escape sequences are recognized: 198 | //! 199 | //!
200 | //! $$                Match single dollar sign.
201 | //! $()               Match the empty string.
202 | //! $(=<regex>)       Match regular expression <regex>.
203 | //! $<var>            Match contents of variable <var>.
204 | //! $(<var>)          Match contents of variable <var>.
205 | //! $(<var>=<regex>)  Match <regex>, then
206 | //!                   define <var> as the matched text.
207 | //! $(<var>=$<rxvar>) Match regex in <rxvar>, then
208 | //!                   define <var> as the matched text.
209 | //! 
210 | //! 211 | //! Variables can contain either plain text or regular expressions. Plain text variables are 212 | //! defined with the `$(var=...)` syntax in a previous directive. They match the same text again. 213 | //! Backreferences within the same pattern are not allowed. When a variable is defined in a 214 | //! pattern, it can't be referenced again in the same pattern. 215 | //! 216 | //! Regular expression variables are defined with the `regex:` directive. They match the regular 217 | //! expression each time they are used, so the matches don't need to be identical. 218 | //! 219 | //! ## Word boundaries 220 | //! 221 | //! If a pattern begins or ends with a (plain text) letter or number, it will only match on a word 222 | //! boundary. Use the `$()` empty string match to prevent this: 223 | //! 224 | //! ```sh 225 | //! check: one$() 226 | //! ``` 227 | //! 228 | //! This will match `"one"` and `"onetwo"`, but not `"zeroone"`. 229 | //! 230 | //! The empty match syntax can also be used to require leading or trailing whitespace: 231 | //! 232 | //! ```sh 233 | //! check: one, $() 234 | //! ``` 235 | //! 236 | //! This will match `"one, two"` , but not `"one,two"`. Without the `$()`, trailing whitespace 237 | //! would be trimmed from the pattern. 238 | 239 | #![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] 240 | 241 | pub use checker::{Checker, CheckerBuilder}; 242 | pub use error::{Error, Result}; 243 | pub use variable::{Value, VariableMap, NO_VARIABLES}; 244 | 245 | mod checker; 246 | mod error; 247 | mod explain; 248 | mod pattern; 249 | mod variable; 250 | 251 | /// The range of a match in the input text. 252 | pub type MatchRange = (usize, usize); 253 | -------------------------------------------------------------------------------- /tests/basic.rs: -------------------------------------------------------------------------------- 1 | extern crate filecheck; 2 | 3 | use filecheck::{CheckerBuilder, Error as FcError, NO_VARIABLES}; 4 | 5 | fn e2s(e: FcError) -> String { 6 | e.to_string() 7 | } 8 | 9 | #[test] 10 | fn empty() { 11 | let c = CheckerBuilder::new().finish(); 12 | assert!(c.is_empty()); 13 | 14 | // An empty checker matches anything. 15 | assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true)); 16 | assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true)); 17 | } 18 | 19 | #[test] 20 | fn no_directives() { 21 | let c = CheckerBuilder::new().text("nothing here").unwrap().finish(); 22 | assert!(c.is_empty()); 23 | 24 | // An empty checker matches anything. 25 | assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true)); 26 | assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true)); 27 | } 28 | 29 | #[test] 30 | fn no_matches() { 31 | let c = CheckerBuilder::new() 32 | .text("regex: FOO=bar") 33 | .unwrap() 34 | .finish(); 35 | assert!(!c.is_empty()); 36 | 37 | // An empty checker matches anything. 38 | assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true)); 39 | assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true)); 40 | } 41 | 42 | #[test] 43 | fn simple() { 44 | let c = CheckerBuilder::new() 45 | .text( 46 | " 47 | check: one 48 | check: two 49 | ", 50 | ) 51 | .unwrap() 52 | .finish(); 53 | 54 | let t = " 55 | zero 56 | one 57 | and a half 58 | two 59 | three 60 | "; 61 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 62 | 63 | let t = " 64 | zero 65 | and a half 66 | two 67 | one 68 | three 69 | "; 70 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 71 | } 72 | 73 | #[test] 74 | fn sameln() { 75 | let c = CheckerBuilder::new() 76 | .text( 77 | " 78 | check: one 79 | sameln: two 80 | ", 81 | ) 82 | .unwrap() 83 | .finish(); 84 | 85 | let t = " 86 | zero 87 | one 88 | and a half 89 | two 90 | three 91 | "; 92 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 93 | 94 | let t = " 95 | zero 96 | one 97 | two 98 | three 99 | "; 100 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 101 | 102 | let t = " 103 | zero 104 | one two 105 | three 106 | "; 107 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 108 | } 109 | 110 | #[test] 111 | fn nextln() { 112 | let c = CheckerBuilder::new() 113 | .text( 114 | " 115 | check: one 116 | nextln: two 117 | ", 118 | ) 119 | .unwrap() 120 | .finish(); 121 | 122 | let t = " 123 | zero 124 | one 125 | and a half 126 | two 127 | three 128 | "; 129 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 130 | 131 | let t = " 132 | zero 133 | one 134 | two 135 | three 136 | "; 137 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 138 | 139 | let t = " 140 | zero 141 | one two 142 | three 143 | "; 144 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 145 | 146 | let t = " 147 | zero 148 | one 149 | two"; 150 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 151 | } 152 | 153 | #[test] 154 | fn leading_nextln() { 155 | // A leading nextln directive should match from line 2. 156 | // This is somewhat arbitrary, but consistent with a preceding 'check: $()' directive. 157 | let c = CheckerBuilder::new() 158 | .text( 159 | " 160 | nextln: one 161 | nextln: two 162 | ", 163 | ) 164 | .unwrap() 165 | .finish(); 166 | 167 | let t = "zero 168 | one 169 | two 170 | three 171 | "; 172 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 173 | 174 | let t = "one 175 | two 176 | three 177 | "; 178 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 179 | } 180 | 181 | #[test] 182 | fn leading_sameln() { 183 | // A leading sameln directive should match from line 1. 184 | let c = CheckerBuilder::new() 185 | .text( 186 | " 187 | sameln: one 188 | sameln: two 189 | ", 190 | ) 191 | .unwrap() 192 | .finish(); 193 | 194 | let t = "zero 195 | one two three 196 | "; 197 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 198 | 199 | let t = "zero one two three"; 200 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 201 | 202 | let t = "zero one 203 | two three"; 204 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 205 | } 206 | 207 | #[test] 208 | fn syntax_error() { 209 | let mut b = CheckerBuilder::new(); 210 | let c = b.text( 211 | " 212 | check: $( 213 | ", 214 | ); 215 | 216 | assert_eq!( 217 | c.map(|_c| ()).map_err(e2s), 218 | Err("unterminated $(...".into()) 219 | ); 220 | } 221 | 222 | #[test] 223 | fn not() { 224 | let c = CheckerBuilder::new() 225 | .text( 226 | " 227 | check: one$() 228 | not: $()eat$() 229 | check: $()two 230 | ", 231 | ) 232 | .unwrap() 233 | .finish(); 234 | 235 | let t = "onetwo"; 236 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 237 | 238 | let t = "one eat two"; 239 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 240 | 241 | let t = "oneeattwo"; 242 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 243 | 244 | let t = "oneatwo"; 245 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 246 | } 247 | 248 | #[test] 249 | fn notnot() { 250 | let c = CheckerBuilder::new() 251 | .text( 252 | " 253 | check: one$() 254 | not: $()eat$() 255 | not: half 256 | check: $()two 257 | ", 258 | ) 259 | .unwrap() 260 | .finish(); 261 | 262 | let t = "onetwo"; 263 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 264 | 265 | let t = "one eat two"; 266 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 267 | 268 | let t = "one half two"; 269 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 270 | 271 | let t = "oneeattwo"; 272 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 273 | 274 | // The `not: half` pattern only matches whole words, but the bracketing matches are considered 275 | // word boundaries, so it does match in this case. 276 | let t = "onehalftwo"; 277 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); 278 | 279 | let t = "oneatwo"; 280 | assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); 281 | } 282 | 283 | #[test] 284 | fn unordered() { 285 | let c = CheckerBuilder::new() 286 | .text( 287 | " 288 | check: one 289 | unordered: two 290 | unordered: three 291 | check: four 292 | ", 293 | ) 294 | .unwrap() 295 | .finish(); 296 | 297 | assert_eq!( 298 | c.check("one two three four", NO_VARIABLES).map_err(e2s), 299 | Ok(true) 300 | ); 301 | assert_eq!( 302 | c.check("one three two four", NO_VARIABLES).map_err(e2s), 303 | Ok(true) 304 | ); 305 | 306 | assert_eq!( 307 | c.check("one two four three four", NO_VARIABLES,) 308 | .map_err(e2s,), 309 | Ok(true) 310 | ); 311 | assert_eq!( 312 | c.check("one three four two four", NO_VARIABLES,) 313 | .map_err(e2s,), 314 | Ok(true) 315 | ); 316 | 317 | assert_eq!( 318 | c.check("one two four three", NO_VARIABLES).map_err(e2s), 319 | Ok(false) 320 | ); 321 | assert_eq!( 322 | c.check("one three four two", NO_VARIABLES).map_err(e2s), 323 | Ok(false) 324 | ); 325 | } 326 | 327 | #[test] 328 | fn leading_unordered() { 329 | let c = CheckerBuilder::new() 330 | .text( 331 | " 332 | unordered: two 333 | unordered: three 334 | check: four 335 | ", 336 | ) 337 | .unwrap() 338 | .finish(); 339 | 340 | assert_eq!( 341 | c.check("one two three four", NO_VARIABLES).map_err(e2s), 342 | Ok(true) 343 | ); 344 | assert_eq!( 345 | c.check("one three two four", NO_VARIABLES).map_err(e2s), 346 | Ok(true) 347 | ); 348 | 349 | assert_eq!( 350 | c.check("one two four three four", NO_VARIABLES,) 351 | .map_err(e2s,), 352 | Ok(true) 353 | ); 354 | assert_eq!( 355 | c.check("one three four two four", NO_VARIABLES,) 356 | .map_err(e2s,), 357 | Ok(true) 358 | ); 359 | 360 | assert_eq!( 361 | c.check("one two four three", NO_VARIABLES).map_err(e2s), 362 | Ok(false) 363 | ); 364 | assert_eq!( 365 | c.check("one three four two", NO_VARIABLES).map_err(e2s), 366 | Ok(false) 367 | ); 368 | } 369 | 370 | #[test] 371 | fn trailing_unordered() { 372 | let c = CheckerBuilder::new() 373 | .text( 374 | " 375 | check: one 376 | unordered: two 377 | unordered: three 378 | ", 379 | ) 380 | .unwrap() 381 | .finish(); 382 | 383 | assert_eq!( 384 | c.check("one two three four", NO_VARIABLES).map_err(e2s), 385 | Ok(true) 386 | ); 387 | assert_eq!( 388 | c.check("one three two four", NO_VARIABLES).map_err(e2s), 389 | Ok(true) 390 | ); 391 | 392 | assert_eq!( 393 | c.check("one two four three four", NO_VARIABLES,) 394 | .map_err(e2s,), 395 | Ok(true) 396 | ); 397 | assert_eq!( 398 | c.check("one three four two four", NO_VARIABLES,) 399 | .map_err(e2s,), 400 | Ok(true) 401 | ); 402 | 403 | assert_eq!( 404 | c.check("one two four three", NO_VARIABLES).map_err(e2s), 405 | Ok(true) 406 | ); 407 | assert_eq!( 408 | c.check("one three four two", NO_VARIABLES).map_err(e2s), 409 | Ok(true) 410 | ); 411 | } 412 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | 205 | --- LLVM Exceptions to the Apache 2.0 License ---- 206 | 207 | As an exception, if, as a result of your compiling your source code, portions 208 | of this Software are embedded into an Object form of such source code, you 209 | may redistribute such embedded portions in such Object form without complying 210 | with the conditions of Sections 4(a), 4(b) and 4(d) of the License. 211 | 212 | In addition, if you combine or link compiled forms of this Software with 213 | software that is licensed under the GPLv2 ("Combined Software") and if a 214 | court of competent jurisdiction determines that the patent provision (Section 215 | 3), the indemnity provision (Section 9) or other Section of the License 216 | conflicts with the conditions of the GPLv2, you may retroactively and 217 | prospectively choose to deem waived or otherwise exclude such Section(s) of 218 | the License, but only in their entirety and only with respect to the Combined 219 | Software. 220 | 221 | -------------------------------------------------------------------------------- /src/checker.rs: -------------------------------------------------------------------------------- 1 | use crate::error::{Error, Result}; 2 | use crate::explain::{Explainer, Recorder}; 3 | use crate::pattern::Pattern; 4 | use crate::variable::{varname_prefix, Value, VariableMap}; 5 | use crate::MatchRange; 6 | use regex::{Captures, Regex}; 7 | use std::borrow::Cow; 8 | use std::cmp::max; 9 | use std::collections::HashMap; 10 | use std::fmt::{self, Display, Formatter}; 11 | use std::mem; 12 | 13 | // The different kinds of directives we support. 14 | enum Directive { 15 | Check(Pattern), 16 | SameLn(Pattern), 17 | NextLn(Pattern), 18 | Unordered(Pattern), 19 | Not(Pattern), 20 | Regex(String, String), 21 | } 22 | 23 | // Regular expression matching a directive. 24 | // The match groups are: 25 | // 26 | // 1. Keyword. 27 | // 2. Rest of line / pattern. 28 | // 29 | const DIRECTIVE_RX: &str = r"\b(check|sameln|nextln|unordered|not|regex):\s+(.*)"; 30 | 31 | impl Directive { 32 | /// Create a new directive from a `DIRECTIVE_RX` match. 33 | fn new(caps: Captures) -> Result { 34 | let cmd = caps.get(1).map(|m| m.as_str()).expect("group 1 must match"); 35 | let rest = caps.get(2).map(|m| m.as_str()).expect("group 2 must match"); 36 | 37 | if cmd == "regex" { 38 | return Directive::regex(rest); 39 | } 40 | 41 | // All other commands are followed by a pattern. 42 | let pat = rest.parse()?; 43 | 44 | match cmd { 45 | "check" => Ok(Directive::Check(pat)), 46 | "sameln" => Ok(Directive::SameLn(pat)), 47 | "nextln" => Ok(Directive::NextLn(pat)), 48 | "unordered" => Ok(Directive::Unordered(pat)), 49 | "not" => { 50 | if !pat.defs().is_empty() { 51 | let msg = format!( 52 | "can't define variables '$({}=...' in not: {}", 53 | pat.defs()[0], 54 | rest 55 | ); 56 | Err(Error::DuplicateDef(msg)) 57 | } else { 58 | Ok(Directive::Not(pat)) 59 | } 60 | } 61 | _ => panic!("unexpected command {} in regex match", cmd), 62 | } 63 | } 64 | 65 | /// Create a `regex:` directive from a `VAR=...` string. 66 | fn regex(rest: &str) -> Result { 67 | let varlen = varname_prefix(rest); 68 | if varlen == 0 { 69 | return Err(Error::Syntax(format!( 70 | "invalid variable name in regex: {}", 71 | rest 72 | ))); 73 | } 74 | let var = rest[0..varlen].to_string(); 75 | if !rest[varlen..].starts_with('=') { 76 | return Err(Error::Syntax(format!( 77 | "expected '=' after variable '{}' in regex: {}", 78 | var, rest 79 | ))); 80 | } 81 | // Ignore trailing white space in the regex, including CR. 82 | Ok(Directive::Regex( 83 | var, 84 | rest[varlen + 1..].trim_end().to_string(), 85 | )) 86 | } 87 | } 88 | 89 | /// Builder for constructing a `Checker` instance. 90 | pub struct CheckerBuilder { 91 | directives: Vec, 92 | linerx: Regex, 93 | } 94 | 95 | impl CheckerBuilder { 96 | /// Create a new, blank `CheckerBuilder`. 97 | pub fn new() -> Self { 98 | Self { 99 | directives: Vec::new(), 100 | linerx: Regex::new(DIRECTIVE_RX).unwrap(), 101 | } 102 | } 103 | 104 | /// Add a potential directive line. 105 | /// 106 | /// Returns true if this is a directive with one of the known prefixes. 107 | /// Returns false if no known directive was found. 108 | /// Returns an error if there is a problem with the directive. 109 | pub fn directive(&mut self, l: &str) -> Result { 110 | match self.linerx.captures(l) { 111 | Some(caps) => { 112 | self.directives.push(Directive::new(caps)?); 113 | Ok(true) 114 | } 115 | None => Ok(false), 116 | } 117 | } 118 | 119 | /// Add multiple directives. 120 | /// 121 | /// The text is split into lines that are added individually as potential directives. 122 | /// This method can be used to parse a whole test file containing multiple directives. 123 | pub fn text(&mut self, t: &str) -> Result<&mut Self> { 124 | for caps in self.linerx.captures_iter(t) { 125 | self.directives.push(Directive::new(caps)?); 126 | } 127 | Ok(self) 128 | } 129 | 130 | /// Get the finished `Checker`. 131 | pub fn finish(&mut self) -> Checker { 132 | // Move directives into the new checker, leaving `self.directives` empty and ready for 133 | // building a new checker. 134 | let new_directives = mem::replace(&mut self.directives, Vec::new()); 135 | Checker::new(new_directives) 136 | } 137 | } 138 | 139 | /// Verify a list of directives against a test input. 140 | /// 141 | /// Use a `CheckerBuilder` to construct a `Checker`. Then use the `test` method to verify the list 142 | /// of directives against a test input. 143 | pub struct Checker { 144 | directives: Vec, 145 | } 146 | 147 | impl Checker { 148 | fn new(directives: Vec) -> Self { 149 | Self { directives } 150 | } 151 | 152 | /// An empty checker contains no directives, and will match any input string. 153 | pub fn is_empty(&self) -> bool { 154 | self.directives.is_empty() 155 | } 156 | 157 | /// Verify directives against the input text. 158 | /// 159 | /// This returns `true` if the text matches all the directives, `false` if it doesn't. 160 | /// An error is only returned if there is a problem with the directives. 161 | pub fn check(&self, text: &str, vars: &dyn VariableMap) -> Result { 162 | self.run(text, vars, &mut ()) 163 | } 164 | 165 | /// Explain how directives are matched against the input text. 166 | pub fn explain(&self, text: &str, vars: &dyn VariableMap) -> Result<(bool, String)> { 167 | let mut expl = Explainer::new(text); 168 | let success = self.run(text, vars, &mut expl)?; 169 | expl.finish(); 170 | Ok((success, expl.to_string())) 171 | } 172 | 173 | fn run(&self, text: &str, vars: &dyn VariableMap, recorder: &mut dyn Recorder) -> Result { 174 | let mut state = State::new(text, vars, recorder); 175 | 176 | // For each pending `not:` check, store (begin-offset, regex). 177 | let mut nots = Vec::new(); 178 | 179 | for (dct_idx, dct) in self.directives.iter().enumerate() { 180 | let (pat, range) = match *dct { 181 | Directive::Check(ref pat) => (pat, state.check()), 182 | Directive::SameLn(ref pat) => (pat, state.sameln()), 183 | Directive::NextLn(ref pat) => (pat, state.nextln()), 184 | Directive::Unordered(ref pat) => (pat, state.unordered(pat)), 185 | Directive::Not(ref pat) => { 186 | // Resolve `not:` directives immediately to get the right variable values, but 187 | // don't match it until we know the end of the range. 188 | // 189 | // The `not:` directives test the same range as `unordered:` directives. In 190 | // particular, if they refer to defined variables, their range is restricted to 191 | // the text following the match that defined the variable. 192 | nots.push((dct_idx, state.unordered_begin(pat), pat.resolve(&state)?)); 193 | continue; 194 | } 195 | Directive::Regex(ref var, ref rx) => { 196 | state.vars.insert( 197 | var.clone(), 198 | VarDef { 199 | value: Value::Regex(Cow::Borrowed(rx)), 200 | offset: 0, 201 | }, 202 | ); 203 | continue; 204 | } 205 | }; 206 | // Check if `pat` matches in `range`. 207 | state.recorder.directive(dct_idx); 208 | if let Some((match_begin, match_end)) = state.match_positive(pat, range)? { 209 | if let Directive::Unordered(_) = *dct { 210 | // This was an unordered match. 211 | // Keep track of the largest matched position, but leave `last_ordered` alone. 212 | state.max_match = max(state.max_match, match_end); 213 | } else { 214 | // Ordered match. 215 | state.last_ordered = match_end; 216 | state.max_match = match_end; 217 | 218 | // Verify any pending `not:` directives now that we know their range. 219 | for (not_idx, not_begin, rx) in nots.drain(..) { 220 | state.recorder.directive(not_idx); 221 | if let Some(mat) = rx.find(&text[not_begin..match_begin]) { 222 | // Matched `not:` pattern. 223 | state.recorder.matched_not( 224 | rx.as_str(), 225 | (not_begin + mat.start(), not_begin + mat.end()), 226 | ); 227 | return Ok(false); 228 | } else { 229 | state 230 | .recorder 231 | .missed_not(rx.as_str(), (not_begin, match_begin)); 232 | } 233 | } 234 | } 235 | } else { 236 | // No match! 237 | return Ok(false); 238 | } 239 | } 240 | 241 | // Verify any pending `not:` directives after the last ordered directive. 242 | for (not_idx, not_begin, rx) in nots.drain(..) { 243 | state.recorder.directive(not_idx); 244 | if rx.find(&text[not_begin..]).is_some() { 245 | // Matched `not:` pattern. 246 | // TODO: Use matched range for an error message. 247 | return Ok(false); 248 | } 249 | } 250 | 251 | Ok(true) 252 | } 253 | } 254 | 255 | /// A local definition of a variable. 256 | pub struct VarDef<'a> { 257 | /// The value given to the variable. 258 | value: Value<'a>, 259 | /// Offset in input text from where the variable is available. 260 | offset: usize, 261 | } 262 | 263 | struct State<'a> { 264 | text: &'a str, 265 | env_vars: &'a dyn VariableMap, 266 | recorder: &'a mut dyn Recorder, 267 | 268 | vars: HashMap>, 269 | // Offset after the last ordered match. This does not include recent unordered matches. 270 | last_ordered: usize, 271 | // Largest offset following a positive match, including unordered matches. 272 | max_match: usize, 273 | } 274 | 275 | impl<'a> State<'a> { 276 | fn new( 277 | text: &'a str, 278 | env_vars: &'a dyn VariableMap, 279 | recorder: &'a mut dyn Recorder, 280 | ) -> State<'a> { 281 | State { 282 | text, 283 | env_vars, 284 | recorder, 285 | vars: HashMap::new(), 286 | last_ordered: 0, 287 | max_match: 0, 288 | } 289 | } 290 | 291 | // Get the offset following the match that defined `var`, or 0 if var is an environment 292 | // variable or unknown. 293 | fn def_offset(&self, var: &str) -> usize { 294 | self.vars 295 | .get(var) 296 | .map(|&VarDef { offset, .. }| offset) 297 | .unwrap_or(0) 298 | } 299 | 300 | // Get the offset of the beginning of the next line after `pos`. 301 | fn bol(&self, pos: usize) -> usize { 302 | if let Some(offset) = self.text[pos..].find('\n') { 303 | pos + offset + 1 304 | } else { 305 | self.text.len() 306 | } 307 | } 308 | 309 | // Get the range in text to be matched by a `check:`. 310 | fn check(&self) -> MatchRange { 311 | (self.max_match, self.text.len()) 312 | } 313 | 314 | // Get the range in text to be matched by a `sameln:`. 315 | fn sameln(&self) -> MatchRange { 316 | let b = self.max_match; 317 | let e = self.bol(b); 318 | (b, e) 319 | } 320 | 321 | // Get the range in text to be matched by a `nextln:`. 322 | fn nextln(&self) -> MatchRange { 323 | let b = self.bol(self.max_match); 324 | let e = self.bol(b); 325 | (b, e) 326 | } 327 | 328 | // Get the beginning of the range in text to be matched by a `unordered:` or `not:` directive. 329 | // The unordered directive must match after the directives that define the variables used. 330 | fn unordered_begin(&self, pat: &Pattern) -> usize { 331 | pat.parts() 332 | .iter() 333 | .filter_map(|part| part.ref_var()) 334 | .map(|var| self.def_offset(var)) 335 | .fold(self.last_ordered, max) 336 | } 337 | 338 | // Get the range in text to be matched by a `unordered:` directive. 339 | fn unordered(&self, pat: &Pattern) -> MatchRange { 340 | (self.unordered_begin(pat), self.text.len()) 341 | } 342 | 343 | // Search for `pat` in `range`, return the range matched. 344 | // After a positive match, update variable definitions, if any. 345 | fn match_positive(&mut self, pat: &Pattern, range: MatchRange) -> Result> { 346 | let rx = pat.resolve(self)?; 347 | let txt = &self.text[range.0..range.1]; 348 | let defs = pat.defs(); 349 | let matched_range = if defs.is_empty() { 350 | // Pattern defines no variables. Fastest search is `find`. 351 | rx.find(txt) 352 | } else { 353 | // We need the captures to define variables. 354 | rx.captures(txt).map(|caps| { 355 | let matched_range = caps.get(0).expect("whole expression must match"); 356 | for var in defs { 357 | let txtval = caps.name(var).map(|mat| mat.as_str()).unwrap_or(""); 358 | self.recorder.defined_var(var, txtval); 359 | let vardef = VarDef { 360 | value: Value::Text(Cow::Borrowed(txtval)), 361 | // This offset is the end of the whole matched pattern, not just the text 362 | // defining the variable. 363 | offset: range.0 + matched_range.end(), 364 | }; 365 | self.vars.insert(var.clone(), vardef); 366 | } 367 | matched_range 368 | }) 369 | }; 370 | Ok(if let Some(mat) = matched_range { 371 | let r = (range.0 + mat.start(), range.0 + mat.end()); 372 | self.recorder.matched_check(rx.as_str(), r); 373 | Some(r) 374 | } else { 375 | self.recorder.missed_check(rx.as_str(), range); 376 | None 377 | }) 378 | } 379 | } 380 | 381 | impl<'a> VariableMap for State<'a> { 382 | fn lookup(&self, varname: &str) -> Option { 383 | // First look for a local define. 384 | if let Some(&VarDef { ref value, .. }) = self.vars.get(varname) { 385 | Some(value.clone()) 386 | } else { 387 | // No local, maybe an environment variable? 388 | self.env_vars.lookup(varname) 389 | } 390 | } 391 | } 392 | 393 | impl Display for Directive { 394 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 395 | use self::Directive::*; 396 | match *self { 397 | Check(ref pat) => writeln!(f, "check: {}", pat), 398 | SameLn(ref pat) => writeln!(f, "sameln: {}", pat), 399 | NextLn(ref pat) => writeln!(f, "nextln: {}", pat), 400 | Unordered(ref pat) => writeln!(f, "unordered: {}", pat), 401 | Not(ref pat) => writeln!(f, "not: {}", pat), 402 | Regex(ref var, ref rx) => writeln!(f, "regex: {}={}", var, rx), 403 | } 404 | } 405 | } 406 | 407 | impl Display for Checker { 408 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 409 | for (idx, dir) in self.directives.iter().enumerate() { 410 | write!(f, "#{} {}", idx, dir)?; 411 | } 412 | Ok(()) 413 | } 414 | } 415 | 416 | #[cfg(test)] 417 | mod tests { 418 | use super::CheckerBuilder; 419 | use crate::error::Error; 420 | 421 | fn e2s(e: Error) -> String { 422 | e.to_string() 423 | } 424 | 425 | #[test] 426 | fn directive() { 427 | let mut b = CheckerBuilder::new(); 428 | 429 | assert_eq!(b.directive("not here: more text").map_err(e2s), Ok(false)); 430 | assert_eq!( 431 | b.directive("not here: regex: X=more text").map_err(e2s), 432 | Ok(true) 433 | ); 434 | assert_eq!( 435 | b.directive("regex: X = tommy").map_err(e2s), 436 | Err("expected '=' after variable 'X' in regex: X = tommy".to_string(),) 437 | ); 438 | assert_eq!( 439 | b.directive("[arm]not: patt $x $(y) here").map_err(e2s), 440 | Ok(true) 441 | ); 442 | assert_eq!( 443 | b.directive("[x86]sameln: $x $(y=[^]]*) there").map_err(e2s), 444 | Ok(true) 445 | ); 446 | // Windows line ending sneaking in. 447 | assert_eq!(b.directive("regex: Y=foo\r").map_err(e2s), Ok(true)); 448 | 449 | let c = b.finish(); 450 | assert_eq!( 451 | c.to_string(), 452 | "#0 regex: X=more text\n#1 not: patt $(x) $(y) here\n#2 sameln: $(x) \ 453 | $(y=[^]]*) there\n#3 regex: Y=foo\n" 454 | ); 455 | } 456 | } 457 | -------------------------------------------------------------------------------- /src/pattern.rs: -------------------------------------------------------------------------------- 1 | //! Pattern matching for a single directive. 2 | 3 | use crate::error::{Error, Result}; 4 | use crate::variable::{varname_prefix, Value, VariableMap}; 5 | use regex::{escape, Regex, RegexBuilder}; 6 | use std::fmt::{self, Display, Formatter, Write}; 7 | use std::str::FromStr; 8 | 9 | /// A pattern to match as specified in a directive. 10 | /// 11 | /// Each pattern is broken into a sequence of parts that must match in order. The kinds of parts 12 | /// are: 13 | /// 14 | /// 1. Plain text match. 15 | /// 2. Variable match, `$FOO` or `$(FOO)`. The variable `FOO` may expand to plain text or a regex. 16 | /// 3. Variable definition from literal regex, `$(foo=.*)`. Match the regex and assign matching text 17 | /// to variable `foo`. 18 | /// 4. Variable definition from regex variable, `$(foo=$RX)`. Lookup variable `RX` which should 19 | /// expand to a regex, match the regex, and assign matching text to variable `foo`. 20 | /// 21 | pub struct Pattern { 22 | parts: Vec, 23 | // Variables defined by this pattern. 24 | defs: Vec, 25 | } 26 | 27 | /// One atomic part of a pattern. 28 | #[derive(Debug, PartialEq, Eq)] 29 | pub enum Part { 30 | /// Match a plain string. 31 | Text(String), 32 | /// Match a regular expression. The regex has already been wrapped in a non-capturing group if 33 | /// necessary, so it is safe to concatenate. 34 | Regex(String), 35 | /// Match the contents of a variable, which can be plain text or regex. 36 | Var(String), 37 | /// Match literal regex, then assign match to variable. 38 | /// The regex has already been wrapped in a named capture group. 39 | DefLit { def: usize, regex: String }, 40 | /// Lookup variable `var`, match resulting regex, assign matching text to variable `defs[def]`. 41 | DefVar { def: usize, var: String }, 42 | } 43 | 44 | impl Part { 45 | /// Get the variable referenced by this part, if any. 46 | pub fn ref_var(&self) -> Option<&str> { 47 | match *self { 48 | Part::Var(ref var) | Part::DefVar { ref var, .. } => Some(var), 49 | _ => None, 50 | } 51 | } 52 | } 53 | 54 | impl Pattern { 55 | /// Create a new blank pattern. Use the `FromStr` trait to generate Patterns with content. 56 | fn new() -> Self { 57 | Self { 58 | parts: Vec::new(), 59 | defs: Vec::new(), 60 | } 61 | } 62 | 63 | /// Check if the variable `v` is defined by this pattern. 64 | pub fn defines_var(&self, v: &str) -> bool { 65 | self.defs.iter().any(|d| d == v) 66 | } 67 | 68 | /// Add a definition of a new variable. 69 | /// Return the allocated def number. 70 | fn add_def(&mut self, v: &str) -> Result { 71 | if self.defines_var(v) { 72 | Err(Error::DuplicateDef(format!( 73 | "duplicate definition of ${} in same pattern", 74 | v 75 | ))) 76 | } else { 77 | let idx = self.defs.len(); 78 | self.defs.push(v.to_string()); 79 | Ok(idx) 80 | } 81 | } 82 | 83 | /// Parse a `Part` from a prefix of `s`. 84 | /// Return the part and the number of bytes consumed from `s`. 85 | /// Adds defined variables to `self.defs`. 86 | fn parse_part(&mut self, s: &str) -> Result<(Part, usize)> { 87 | let dollar = s.find('$'); 88 | if dollar != Some(0) { 89 | // String doesn't begin with a dollar sign, so match plain text up to the dollar sign. 90 | let end = dollar.unwrap_or(s.len()); 91 | return Ok((Part::Text(s[0..end].to_string()), end)); 92 | } 93 | 94 | // String starts with a dollar sign. Look for these possibilities: 95 | // 96 | // 1. `$$`. 97 | // 2. `$var`. 98 | // 3. `$(var)`. 99 | // 4. `$(var=regex)`. Where `regex` is a regular expression possibly containing matching 100 | // braces. 101 | // 5. `$(var=$VAR)`. 102 | 103 | // A doubled dollar sign matches a single dollar sign. 104 | if s.starts_with("$$") { 105 | return Ok((Part::Text("$".to_string()), 2)); 106 | } 107 | 108 | // Look for `$var`. 109 | let varname_end = 1 + varname_prefix(&s[1..]); 110 | if varname_end != 1 { 111 | return Ok((Part::Var(s[1..varname_end].to_string()), varname_end)); 112 | } 113 | 114 | // All remaining possibilities start with `$(`. 115 | if s.len() < 2 || !s.starts_with("$(") { 116 | return Err(Error::Syntax( 117 | "pattern syntax error, use $$ to match a single $".to_string(), 118 | )); 119 | } 120 | 121 | // Match the variable name, allowing for an empty varname in `$()`, or `$(=...)`. 122 | let varname_end = 2 + varname_prefix(&s[2..]); 123 | let varname = s[2..varname_end].to_string(); 124 | 125 | match s[varname_end..].chars().next() { 126 | None => { 127 | return Err(Error::Syntax(format!("unterminated $({}...", varname))); 128 | } 129 | Some(')') => { 130 | let part = if varname.is_empty() { 131 | // Match `$()`, turn it into an empty text match. 132 | Part::Text(varname) 133 | } else { 134 | // Match `$(var)`. 135 | Part::Var(varname) 136 | }; 137 | return Ok((part, varname_end + 1)); 138 | } 139 | Some('=') => { 140 | // Variable definition. Fall through. 141 | } 142 | Some(ch) => { 143 | return Err(Error::Syntax(format!( 144 | "syntax error in $({}... '{}'", 145 | varname, ch 146 | ))); 147 | } 148 | } 149 | 150 | // This is a variable definition of the form `$(var=...`. 151 | 152 | // Allocate a definition index. 153 | let def = if varname.is_empty() { 154 | None 155 | } else { 156 | Some(self.add_def(&varname)?) 157 | }; 158 | 159 | // Match `$(var=$PAT)`. 160 | if s[varname_end + 1..].starts_with('$') { 161 | let refname_begin = varname_end + 2; 162 | let refname_end = refname_begin + varname_prefix(&s[refname_begin..]); 163 | if refname_begin == refname_end { 164 | return Err(Error::Syntax(format!( 165 | "expected variable name in $({}=$...", 166 | varname 167 | ))); 168 | } 169 | if !s[refname_end..].starts_with(')') { 170 | return Err(Error::Syntax(format!( 171 | "expected ')' after $({}=${}...", 172 | varname, 173 | &s[refname_begin..refname_end] 174 | ))); 175 | } 176 | let refname = s[refname_begin..refname_end].to_string(); 177 | return if let Some(defidx) = def { 178 | Ok(( 179 | Part::DefVar { 180 | def: defidx, 181 | var: refname, 182 | }, 183 | refname_end + 1, 184 | )) 185 | } else { 186 | Err(Error::Syntax(format!( 187 | "expected variable name in $(=${})", 188 | refname 189 | ))) 190 | }; 191 | } 192 | 193 | // Last case: `$(var=...)` where `...` is a regular expression, possibly containing matched 194 | // parentheses. 195 | let rx_begin = varname_end + 1; 196 | let rx_end = rx_begin + regex_prefix(&s[rx_begin..]); 197 | if s[rx_end..].starts_with(')') { 198 | let part = if let Some(defidx) = def { 199 | // Wrap the regex in a named capture group. 200 | Part::DefLit { 201 | def: defidx, 202 | regex: format!("(?P<{}>{})", varname, &s[rx_begin..rx_end]), 203 | } 204 | } else { 205 | // When the varname is empty just match the regex, don't capture any variables. 206 | // This is `$(=[a-z])`. 207 | // Wrap the regex in a non-capturing group to make it concatenation-safe. 208 | Part::Regex(format!("(?:{})", &s[rx_begin..rx_end])) 209 | }; 210 | Ok((part, rx_end + 1)) 211 | } else { 212 | Err(Error::Syntax(format!( 213 | "missing ')' after regex in $({}={}", 214 | varname, 215 | &s[rx_begin..rx_end] 216 | ))) 217 | } 218 | } 219 | } 220 | 221 | /// Compute the length of a regular expression terminated by `)` or `}`. 222 | /// Handle nested and escaped parentheses in the rx, but don't actually parse it. 223 | /// Return the position of the terminating brace or the length of the string. 224 | fn regex_prefix(s: &str) -> usize { 225 | // The previous char was a backslash. 226 | let mut escape = false; 227 | // State around parsing charsets. 228 | enum State { 229 | Normal, // Outside any charset. 230 | Curly, // Inside curly braces. 231 | CSFirst, // Immediately after opening `[`. 232 | CSNeg, // Immediately after `[^`. 233 | CSBody, // Inside `[...`. 234 | } 235 | let mut state = State::Normal; 236 | 237 | // Current nesting level of parens. 238 | let mut nest = 0usize; 239 | 240 | for (idx, ch) in s.char_indices() { 241 | if escape { 242 | escape = false; 243 | continue; 244 | } else if ch == '\\' { 245 | escape = true; 246 | continue; 247 | } 248 | match state { 249 | State::Normal => match ch { 250 | '[' => state = State::CSFirst, 251 | '{' => state = State::Curly, 252 | '(' => nest += 1, 253 | ')' if nest > 0 => nest -= 1, 254 | ')' | '}' => return idx, 255 | _ => {} 256 | }, 257 | State::Curly => { 258 | if ch == '}' { 259 | state = State::Normal; 260 | } 261 | } 262 | State::CSFirst => { 263 | state = match ch { 264 | '^' => State::CSNeg, 265 | _ => State::CSBody, 266 | } 267 | } 268 | State::CSNeg => state = State::CSBody, 269 | State::CSBody => { 270 | if ch == ']' { 271 | state = State::Normal; 272 | } 273 | } 274 | } 275 | } 276 | s.len() 277 | } 278 | 279 | impl FromStr for Pattern { 280 | type Err = Error; 281 | 282 | fn from_str(s: &str) -> Result { 283 | // Always remove leading and trailing whitespace. 284 | // Use `$()` to actually include that in a match. 285 | let s = s.trim(); 286 | let mut pat = Pattern::new(); 287 | let mut pos = 0; 288 | while pos < s.len() { 289 | let (part, len) = pat.parse_part(&s[pos..])?; 290 | if let Some(v) = part.ref_var() { 291 | if pat.defines_var(v) { 292 | return Err(Error::Backref(format!( 293 | "unsupported back-reference to '${}' \ 294 | defined in same pattern", 295 | v 296 | ))); 297 | } 298 | } 299 | pat.parts.push(part); 300 | pos += len; 301 | } 302 | Ok(pat) 303 | } 304 | } 305 | 306 | impl Pattern { 307 | /// Get a list of parts in this pattern. 308 | pub fn parts(&self) -> &[Part] { 309 | &self.parts 310 | } 311 | 312 | /// Get a list of variable names defined when this pattern matches. 313 | pub fn defs(&self) -> &[String] { 314 | &self.defs 315 | } 316 | 317 | /// Resolve all variable references in this pattern, turning it into a regular expression. 318 | pub fn resolve(&self, vmap: &dyn VariableMap) -> Result { 319 | let mut out = String::new(); 320 | 321 | // Add a word boundary check `\b` to the beginning of the regex, but only if the first part 322 | // is a plain text match that starts with a word character. 323 | // 324 | // This behavior can be disabled by starting the pattern with `$()`. 325 | if let Some(&Part::Text(ref s)) = self.parts.first() { 326 | if s.starts_with(char::is_alphanumeric) { 327 | out.push_str(r"\b"); 328 | } 329 | } 330 | 331 | for part in &self.parts { 332 | match *part { 333 | Part::Text(ref s) => { 334 | out.push_str(&escape(s)); 335 | } 336 | Part::Regex(ref rx) => out.push_str(rx), 337 | Part::Var(ref var) => { 338 | // Resolve the variable. We can handle a plain text expansion. 339 | match vmap.lookup(var) { 340 | None => { 341 | return Err(Error::UndefVariable(format!( 342 | "undefined variable ${}", 343 | var 344 | ))); 345 | } 346 | Some(Value::Text(s)) => out.push_str(&escape(&s)), 347 | // Wrap regex in non-capturing group for safe concatenation. 348 | Some(Value::Regex(rx)) => write!(out, "(?:{})", rx).unwrap(), 349 | } 350 | } 351 | Part::DefLit { ref regex, .. } => out.push_str(regex), 352 | Part::DefVar { def, ref var } => { 353 | // Wrap regex in a named capture group. 354 | write!(out, "(?P<{}>", self.defs[def]).unwrap(); 355 | match vmap.lookup(var) { 356 | None => { 357 | return Err(Error::UndefVariable(format!( 358 | "undefined variable ${}", 359 | var 360 | ))); 361 | } 362 | Some(Value::Text(s)) => write!(out, "{})", escape(&s[..])).unwrap(), 363 | Some(Value::Regex(rx)) => write!(out, "{})", rx).unwrap(), 364 | } 365 | } 366 | } 367 | } 368 | 369 | // Add a word boundary check `\b` to the end of the regex, but only if the final part 370 | // is a plain text match that ends with a word character. 371 | // 372 | // This behavior can be disabled by ending the pattern with `$()`. 373 | if let Some(&Part::Text(ref s)) = self.parts.last() { 374 | if s.ends_with(char::is_alphanumeric) { 375 | out.push_str(r"\b"); 376 | } 377 | } 378 | 379 | Ok(RegexBuilder::new(&out).multi_line(true).build()?) 380 | } 381 | } 382 | 383 | impl Display for Pattern { 384 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 385 | for part in &self.parts { 386 | use self::Part::*; 387 | match *part { 388 | Text(ref txt) if txt == "" => write!(f, "$()"), 389 | Text(ref txt) if txt == "$" => write!(f, "$$"), 390 | Text(ref txt) => write!(f, "{}", txt), 391 | Regex(ref rx) => write!(f, "$(={})", rx), 392 | Var(ref var) => write!(f, "$({})", var), 393 | DefLit { def, ref regex } => { 394 | let defvar = &self.defs[def]; 395 | // (?P...). 396 | let litrx = ®ex[5 + defvar.len()..regex.len() - 1]; 397 | write!(f, "$({}={})", defvar, litrx) 398 | } 399 | DefVar { def, ref var } => write!(f, "$({}=${})", self.defs[def], var), 400 | }?; 401 | } 402 | Ok(()) 403 | } 404 | } 405 | 406 | #[cfg(test)] 407 | mod tests { 408 | #[test] 409 | fn regex() { 410 | use super::regex_prefix; 411 | 412 | assert_eq!(regex_prefix(""), 0); 413 | assert_eq!(regex_prefix(")"), 0); 414 | assert_eq!(regex_prefix(")c"), 0); 415 | assert_eq!(regex_prefix("x"), 1); 416 | assert_eq!(regex_prefix("x)x"), 1); 417 | 418 | assert_eq!(regex_prefix("x(c))x"), 4); 419 | assert_eq!(regex_prefix("()x(c))x"), 6); 420 | assert_eq!(regex_prefix("()x(c)"), 6); 421 | 422 | assert_eq!(regex_prefix("x([)]))x"), 6); 423 | assert_eq!(regex_prefix("x[)])x"), 4); 424 | assert_eq!(regex_prefix("x[^)])x"), 5); 425 | assert_eq!(regex_prefix("x[^])x"), 6); 426 | } 427 | 428 | #[test] 429 | fn part() { 430 | use super::{Part, Pattern}; 431 | let mut pat = Pattern::new(); 432 | 433 | // This is dubious, should we panic instead? 434 | assert_eq!(pat.parse_part("").unwrap(), (Part::Text("".to_string()), 0)); 435 | 436 | assert_eq!( 437 | pat.parse_part("x").unwrap(), 438 | (Part::Text("x".to_string()), 1) 439 | ); 440 | assert_eq!( 441 | pat.parse_part("x2").unwrap(), 442 | (Part::Text("x2".to_string()), 2,) 443 | ); 444 | assert_eq!( 445 | pat.parse_part("x$").unwrap(), 446 | (Part::Text("x".to_string()), 1,) 447 | ); 448 | assert_eq!( 449 | pat.parse_part("x$$").unwrap(), 450 | (Part::Text("x".to_string()), 1,) 451 | ); 452 | 453 | assert_eq!( 454 | pat.parse_part("$").unwrap_err().to_string(), 455 | "pattern syntax error, use $$ to match a single $" 456 | ); 457 | 458 | assert_eq!( 459 | pat.parse_part("$$").unwrap(), 460 | (Part::Text("$".to_string()), 2,) 461 | ); 462 | assert_eq!( 463 | pat.parse_part("$$ ").unwrap(), 464 | (Part::Text("$".to_string()), 2,) 465 | ); 466 | 467 | assert_eq!( 468 | pat.parse_part("$0").unwrap(), 469 | (Part::Var("0".to_string()), 2) 470 | ); 471 | assert_eq!( 472 | pat.parse_part("$xx=").unwrap(), 473 | (Part::Var("xx".to_string()), 3,) 474 | ); 475 | assert_eq!( 476 | pat.parse_part("$xx$").unwrap(), 477 | (Part::Var("xx".to_string()), 3,) 478 | ); 479 | 480 | assert_eq!( 481 | pat.parse_part("$(0)").unwrap(), 482 | (Part::Var("0".to_string()), 4,) 483 | ); 484 | assert_eq!( 485 | pat.parse_part("$()").unwrap(), 486 | (Part::Text("".to_string()), 3,) 487 | ); 488 | 489 | assert_eq!( 490 | pat.parse_part("$(0").unwrap_err().to_string(), 491 | ("unterminated $(0...") 492 | ); 493 | assert_eq!( 494 | pat.parse_part("$(foo:").unwrap_err().to_string(), 495 | ("syntax error in $(foo... ':'") 496 | ); 497 | assert_eq!( 498 | pat.parse_part("$(foo =").unwrap_err().to_string(), 499 | ("syntax error in $(foo... ' '") 500 | ); 501 | assert_eq!( 502 | pat.parse_part("$(eo0=$bar").unwrap_err().to_string(), 503 | ("expected ')' after $(eo0=$bar...") 504 | ); 505 | assert_eq!( 506 | pat.parse_part("$(eo1=$bar}").unwrap_err().to_string(), 507 | ("expected ')' after $(eo1=$bar...") 508 | ); 509 | assert_eq!( 510 | pat.parse_part("$(eo2=$)").unwrap_err().to_string(), 511 | ("expected variable name in $(eo2=$...") 512 | ); 513 | assert_eq!( 514 | pat.parse_part("$(eo3=$-)").unwrap_err().to_string(), 515 | ("expected variable name in $(eo3=$...") 516 | ); 517 | } 518 | 519 | #[test] 520 | fn partdefs() { 521 | use super::{Part, Pattern}; 522 | let mut pat = Pattern::new(); 523 | 524 | assert_eq!( 525 | pat.parse_part("$(foo=$bar)").unwrap(), 526 | ( 527 | Part::DefVar { 528 | def: 0, 529 | var: "bar".to_string(), 530 | }, 531 | 11, 532 | ) 533 | ); 534 | assert_eq!( 535 | pat.parse_part("$(foo=$bar)").unwrap_err().to_string(), 536 | "duplicate definition of $foo in same pattern" 537 | ); 538 | 539 | assert_eq!( 540 | pat.parse_part("$(fxo=$bar)x").unwrap(), 541 | ( 542 | Part::DefVar { 543 | def: 1, 544 | var: "bar".to_string(), 545 | }, 546 | 11, 547 | ) 548 | ); 549 | 550 | assert_eq!( 551 | pat.parse_part("$(fo2=[a-z])").unwrap(), 552 | ( 553 | Part::DefLit { 554 | def: 2, 555 | regex: "(?P[a-z])".to_string(), 556 | }, 557 | 12, 558 | ) 559 | ); 560 | assert_eq!( 561 | pat.parse_part("$(fo3=[a-)])").unwrap(), 562 | ( 563 | Part::DefLit { 564 | def: 3, 565 | regex: "(?P[a-)])".to_string(), 566 | }, 567 | 12, 568 | ) 569 | ); 570 | assert_eq!( 571 | pat.parse_part("$(fo4=)").unwrap(), 572 | ( 573 | Part::DefLit { 574 | def: 4, 575 | regex: "(?P)".to_string(), 576 | }, 577 | 7, 578 | ) 579 | ); 580 | 581 | assert_eq!( 582 | pat.parse_part("$(=.*)").unwrap(), 583 | (Part::Regex("(?:.*)".to_string(),), 6,) 584 | ); 585 | 586 | assert_eq!( 587 | pat.parse_part("$(=)").unwrap(), 588 | (Part::Regex("(?:)".to_string(),), 4,) 589 | ); 590 | assert_eq!( 591 | pat.parse_part("$()").unwrap(), 592 | (Part::Text("".to_string()), 3,) 593 | ); 594 | } 595 | 596 | #[test] 597 | fn pattern() { 598 | use super::Pattern; 599 | 600 | let p: Pattern = " Hello world! ".parse().unwrap(); 601 | assert_eq!(format!("{:?}", p.parts), "[Text(\"Hello world!\")]"); 602 | 603 | let p: Pattern = " $foo=$(bar) ".parse().unwrap(); 604 | assert_eq!( 605 | format!("{:?}", p.parts), 606 | "[Var(\"foo\"), Text(\"=\"), Var(\"bar\")]" 607 | ); 608 | } 609 | } 610 | --------------------------------------------------------------------------------