├── tests
    ├── fixtures
    │   └── structured
    │   │   ├── empty.json
    │   │   ├── empty.toml
    │   │   ├── empty.yaml
    │   │   ├── invalid.json
    │   │   ├── invalid.toml
    │   │   ├── floats.yaml
    │   │   ├── binary.yaml
    │   │   ├── floats.toml
    │   │   ├── empty_structures.yaml
    │   │   ├── invalid.yaml
    │   │   ├── deep_nesting.yaml
    │   │   ├── type_mismatch_coercion.toml
    │   │   ├── type_coercion.yaml
    │   │   ├── multi_doc.yaml
    │   │   ├── datetime.toml
    │   │   ├── null_boolean_edge_cases.yaml
    │   │   ├── numeric_edge_cases.json
    │   │   ├── large_config.yaml
    │   │   ├── config.yaml
    │   │   ├── Cargo.toml
    │   │   ├── nested_arrays.json
    │   │   ├── package.json
    │   │   └── unicode_strings.json
    ├── parser_errors.rs
    ├── parser_basic.rs
    ├── temporal_tests.rs
    ├── aliases.rs
    └── navigation_structured.rs
├── .gitignore
├── src
    ├── parser
    │   ├── mod.rs
    │   ├── structured_path.pest
    │   ├── grammar.pest
    │   ├── aliases.rs
    │   ├── ast.rs
    │   ├── raw.rs
    │   ├── structured_path.rs
    │   └── error.rs
    ├── expr
    │   ├── short_circuit.rs
    │   └── frame.rs
    ├── predicate_error.rs
    ├── util.rs
    ├── predicate
    │   └── enum_matcher.rs
    ├── eval.rs
    ├── lib.rs
    ├── expr.rs
    ├── main.rs
    └── eval
    │   └── fs.rs
├── LICENSE-MIT
├── Cargo.toml
├── CHANGELOG.md
├── docs
    ├── examples.md
    ├── operators.md
    └── predicates.md
├── README.md
└── LICENSE-APACHE


/tests/fixtures/structured/empty.json:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/empty.toml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/empty.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /tmp
3 | /test_*
4 | NEXT*.md
5 | .claude/*
6 | CLAUDE.md
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/invalid.json:
--------------------------------------------------------------------------------
1 | {"invalid": "missing closing brace", "unclosed": [1, 2, 3


--------------------------------------------------------------------------------
/tests/fixtures/structured/invalid.toml:
--------------------------------------------------------------------------------
1 | [missing
2 | closing = "bracket"
3 | bad_key with space = "value"
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/floats.yaml:
--------------------------------------------------------------------------------
1 | value: 1.5
2 | negative: -2.7
3 | zero: 0.0
4 | large: 999.99
5 | scientific: 1.23e-4
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/binary.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inanna-malick/detect/HEAD/tests/fixtures/structured/binary.yaml


--------------------------------------------------------------------------------
/tests/fixtures/structured/floats.toml:
--------------------------------------------------------------------------------
1 | value = 2.5
2 | negative = -3.8
3 | zero = 0.0
4 | large = 1234.56
5 | scientific = 6.022e23
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/empty_structures.yaml:
--------------------------------------------------------------------------------
1 | empty_array: []
2 | empty_object: {}
3 | nested_empty:
4 |   inner_array: []
5 |   inner_object: {}
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/invalid.yaml:
--------------------------------------------------------------------------------
1 | invalid: [unclosed array
2 | missing_quote: "unterminated string
3 | bad_indent:
4 |   - item1
5 |  - item2
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/deep_nesting.yaml:
--------------------------------------------------------------------------------
1 | a:
2 |   b:
3 |     c:
4 |       d:
5 |         e:
6 |           f: "deeply_nested_value"
7 |           g: 123
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/type_mismatch_coercion.toml:
--------------------------------------------------------------------------------
1 | port = 8080
2 | version_string = "1.2.3"
3 | version_int = 123
4 | enabled = true
5 | disabled = false
6 | count = 42
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/type_coercion.yaml:
--------------------------------------------------------------------------------
 1 | port: 8080
 2 | version: "1.2.3"
 3 | count: "42"
 4 | flag: true
 5 | items:
 6 |   - id: 1
 7 |     name: "first"
 8 |   - id: 2
 9 |     name: "second"
10 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/multi_doc.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | service: web
 3 | port: 8080
 4 | enabled: true
 5 | ---
 6 | service: api
 7 | port: 3000
 8 | enabled: false
 9 | ---
10 | service: cache
11 | port: 6379
12 | enabled: true
13 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/datetime.toml:
--------------------------------------------------------------------------------
1 | timestamp = 2024-01-15T10:30:00Z
2 | created_date = 2024-01-01T00:00:00Z
3 | future_date = 2025-12-31T23:59:59Z
4 | 
5 | [event]
6 | start = 2024-06-15T09:00:00Z
7 | end = 2024-06-15T17:00:00Z
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/null_boolean_edge_cases.yaml:
--------------------------------------------------------------------------------
1 | null_value: null
2 | null_string: "null"
3 | bool_true: true
4 | bool_false: false
5 | string_true: "true"
6 | string_false: "false"
7 | empty_string: ""
8 | missing_is_not_present: yes
9 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/numeric_edge_cases.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "int_value": 42,
 3 |   "float_value": 1.5,
 4 |   "negative_int": -10,
 5 |   "negative_float": -2.7,
 6 |   "zero": 0,
 7 |   "large_int": 9223372036854775806,
 8 |   "mixed_array": [1, 2.5, -3, 0, 100.99]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/large_config.yaml:
--------------------------------------------------------------------------------
1 | # This file is intentionally large to test size limits
2 | # Adding comments to increase file size beyond the test threshold
3 | # More comments here to make sure we exceed 100 bytes easily
4 | # Yet another comment line for padding
5 | server:
6 |   port: 9999
7 |   host: testhost
8 |   debug: false
9 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/config.yaml:
--------------------------------------------------------------------------------
 1 | server:
 2 |   port: 8080
 3 |   host: localhost
 4 |   debug: true
 5 | 
 6 | database:
 7 |   host: db.example.com
 8 |   port: 5432
 9 |   name: myapp
10 |   credentials:
11 |     username: admin
12 |     password: secret123
13 | 
14 | features:
15 |   - name: auth
16 |     enabled: true
17 |   - name: logging
18 |     enabled: false
19 |   - name: metrics
20 |     enabled: true
21 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "detect"
 3 | version = "0.3.0"
 4 | edition = "2021"
 5 | authors = ["Test Author <test@example.com>"]
 6 | 
 7 | [dependencies]
 8 | serde = { version = "1.0", features = ["derive"] }
 9 | tokio = { version = "1.35", features = ["full"] }
10 | regex = "1.10"
11 | 
12 | [dev-dependencies]
13 | tempfile = "3.8"
14 | proptest = "1.4"
15 | 
16 | [features]
17 | default = ["mcp"]
18 | mcp = []
19 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/nested_arrays.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "services": [
 3 |     {
 4 |       "name": "web",
 5 |       "ports": [8080, 8443],
 6 |       "enabled": true
 7 |     },
 8 |     {
 9 |       "name": "api",
10 |       "ports": [3000],
11 |       "enabled": true
12 |     },
13 |     {
14 |       "name": "cache",
15 |       "ports": [6379],
16 |       "enabled": false
17 |     }
18 |   ],
19 |   "metadata": {
20 |     "version": "2.0",
21 |     "author": "test"
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "my-app",
 3 |   "version": "1.2.3",
 4 |   "description": "A sample application",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "jest",
 8 |     "build": "webpack",
 9 |     "start": "node index.js"
10 |   },
11 |   "dependencies": {
12 |     "express": "^4.18.0",
13 |     "lodash": "^4.17.21"
14 |   },
15 |   "devDependencies": {
16 |     "jest": "^29.0.0",
17 |     "webpack": "^5.75.0"
18 |   },
19 |   "engines": {
20 |     "node": ">=16.0.0"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/parser/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod aliases;
 2 | pub mod ast;
 3 | pub mod error;
 4 | pub mod raw;
 5 | pub mod structured_path;
 6 | pub mod time;
 7 | pub mod typechecker;
 8 | pub mod typed;
 9 | 
10 | // Re-exports
11 | pub use aliases::{resolve_alias, suggest_aliases};
12 | pub use ast::{test_utils, RawExpr, RawPredicate, RawValue};
13 | pub use error::DetectError;
14 | pub use raw::RawParser;
15 | pub use structured_path::{parse_path, PathComponent, PathParseError};
16 | pub use time::parse_time_value;
17 | pub use typechecker::Typechecker;
18 | 


--------------------------------------------------------------------------------
/src/expr/short_circuit.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Display;
 2 | 
 3 | pub enum ShortCircuit<X> {
 4 |     Known(bool),
 5 |     Unknown(X),
 6 | }
 7 | 
 8 | impl<X> From<bool> for ShortCircuit<X> {
 9 |     fn from(value: bool) -> Self {
10 |         ShortCircuit::Known(value)
11 |     }
12 | }
13 | 
14 | impl<X: Display> Display for ShortCircuit<X> {
15 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16 |         match self {
17 |             ShortCircuit::Known(x) => write!(f, "known: {x}"),
18 |             ShortCircuit::Unknown(x) => write!(f, "unknown: {x}"),
19 |         }
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/predicate_error.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | /// Error type for predicate parsing operations
 4 | #[derive(Debug, Error)]
 5 | pub enum PredicateParseError {
 6 |     #[error("Invalid regex pattern")]
 7 |     Regex(#[from] regex::Error),
 8 | 
 9 |     #[error("Invalid number")]
10 |     Numeric(#[from] std::num::ParseIntError),
11 | 
12 |     #[error("Invalid time: {0}")]
13 |     Temporal(String),
14 | 
15 |     #[error("DFA compilation failed: {0}")]
16 |     Dfa(String),
17 | 
18 |     #[error("Incompatible: {0}")]
19 |     Incompatible(String),
20 | 
21 |     #[error("Unknown selector: {0}")]
22 |     UnknownSelector(String),
23 | }
24 | 


--------------------------------------------------------------------------------
/src/parser/structured_path.pest:
--------------------------------------------------------------------------------
 1 | // Grammar for structured data path expressions
 2 | // Parses paths like: .spec.replicas, [0].name, .items[*].id
 3 | 
 4 | WHITESPACE = _{ " " | "\t" }
 5 | 
 6 | path = { SOI ~ component+ ~ EOI }
 7 | 
 8 | component = _{ recursive_key | key_access | index_access | wildcard_access }
 9 | 
10 | // Recursive descent: ..fieldname
11 | recursive_key = { ".." ~ identifier }
12 | 
13 | // Key access: .fieldname
14 | key_access = { "." ~ identifier }
15 | identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-" | "/")* }
16 | 
17 | // Array index: [42]
18 | index_access = { "[" ~ number ~ "]" }
19 | number = @{ ASCII_DIGIT+ }
20 | 
21 | // Wildcard array access: [*]
22 | wildcard_access = { "[" ~ "*" ~ "]" }
23 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Copyright 2022 Inanna Malick
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "detect"
 3 | description = "Expression-based file search combining name, content, metadata, and structured data predicates"
 4 | license = "MIT OR Apache-2.0"
 5 | version = "0.3.0"
 6 | edition = "2021"
 7 | rust-version = "1.70"
 8 | repository = "https://github.com/inanna-malick/detect/"
 9 | homepage = "https://github.com/inanna-malick/detect/"
10 | keywords = ["egrep", "grep", "pattern", "regex", "search"]
11 | categories = ["command-line-utilities", "filesystem"]
12 | 
13 | [[bin]]
14 | name = "detect"
15 | path = "src/main.rs"
16 | 
17 | [dependencies]
18 | clap = {version = "4.5", features = ["derive"]}
19 | futures = "0.3.31"
20 | ignore = "0.4"
21 | pest = "2.7.15"
22 | pest_derive = "2.7.15"
23 | recursion = {version = "0.5", features = ["experimental"]}
24 | regex = "1.12"
25 | regex-automata = "0.4.13"
26 | slog-term = "2.9"
27 | slog = "2.7"
28 | tokio = {version = "1.48", features = ["rt", "rt-multi-thread", "fs", "macros"]}
29 | tokio-util = {version ="0.7.17", features = ["io"] }
30 | serde_json = "1.0"
31 | chrono = "0.4"
32 | thiserror = "2.0"
33 | miette = { version = "7.6.0", features = ["fancy"] }
34 | toml = "0.9.8"
35 | yaml-rust2 = "0.10.4"
36 | 
37 | [dev-dependencies]
38 | tempfile = "3"
39 | 
40 | [profile.release]
41 | opt-level = 3
42 | lto = true
43 | codegen-units = 1
44 | strip = true
45 | 


--------------------------------------------------------------------------------
/tests/fixtures/structured/unicode_strings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "emoji_field": "🚀",
3 |   "emoji_value": "test 🎉 value",
4 |   "unicode_chars": "Ñoño αβγ 中文",
5 |   "multiline": "line1\nline2\nline3",
6 |   "long_string": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?"
7 | }
8 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
 1 | /// uninhabited type, used to signify that something does not exist
 2 | /// provided typeclass instances never invoked but provided for
 3 | /// convenience
 4 | #[derive(Debug, Clone)]
 5 | pub enum Done {}
 6 | 
 7 | impl std::fmt::Display for Done {
 8 |     fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 9 |         unreachable!()
10 |     }
11 | }
12 | 
13 | /// Parse size values like "1mb", "100kb", etc. into bytes
14 | ///
15 | /// Supports units: b, kb, mb, gb, tb (case-insensitive)
16 | ///
17 | /// # Examples
18 | /// ```
19 | /// use detect::util::parse_size;
20 | /// assert_eq!(parse_size("10mb").unwrap(), 10 * 1024 * 1024);
21 | /// assert_eq!(parse_size("500KB").unwrap(), 500 * 1024);
22 | /// ```
23 | pub fn parse_size(s: &str) -> Result<u64, String> {
24 |     let s = s.trim().to_lowercase();
25 | 
26 |     let mut unit_start = 0;
27 |     for (i, ch) in s.char_indices() {
28 |         if !ch.is_ascii_digit() && ch != '.' {
29 |             unit_start = i;
30 |             break;
31 |         }
32 |     }
33 | 
34 |     if unit_start == 0 {
35 |         return Err(format!(
36 |             "Invalid size '{s}': expected format like '10mb', '500kb'"
37 |         ));
38 |     }
39 | 
40 |     let number_str = &s[..unit_start];
41 |     let unit_str = &s[unit_start..];
42 | 
43 |     let number: f64 = number_str
44 |         .parse()
45 |         .map_err(|_| format!("Invalid size '{s}': cannot parse numeric value '{number_str}'"))?;
46 | 
47 |     let multiplier = match unit_str {
48 |         "b" | "byte" | "bytes" => 1.0,
49 |         "k" | "kb" | "kilobyte" | "kilobytes" => 1024.0,
50 |         "m" | "mb" | "megabyte" | "megabytes" => 1024.0 * 1024.0,
51 |         "g" | "gb" | "gigabyte" | "gigabytes" => 1024.0 * 1024.0 * 1024.0,
52 |         "t" | "tb" | "terabyte" | "terabytes" => 1024.0 * 1024.0 * 1024.0 * 1024.0,
53 |         _ => {
54 |             return Err(format!(
55 |                 "Invalid size '{s}': unknown unit '{unit_str}' (expected: b, kb, mb, gb, tb)"
56 |             ))
57 |         }
58 |     };
59 | 
60 |     Ok((number * multiplier) as u64)
61 | }
62 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | ## [0.3.0] - 2025-01-22
 6 | 
 7 | ### Added
 8 | 
 9 | - **Structured data selectors** for querying YAML, JSON, and TOML file contents
10 |   - Dot notation for nested field access: `yaml:.server.port`
11 |   - Array indexing: `json:.dependencies[0]`
12 |   - Wildcards with OR semantics: `yaml:.features[*].enabled`
13 |   - Recursive descent: `yaml:..field` finds field at any depth
14 |   - Comparison operators: `==`, `!=`, `>`, `<`, `>=`, `<=`
15 |   - String matchers: `contains`, `~=` (regex)
16 |   - Automatic type coercion between numbers and strings
17 |   - Fully composable with other predicates: `size < 50kb AND yaml:.server.port > 8000`
18 | - `--max-structured-size` CLI flag to configure maximum file size for structured parsing (default: 10MB)
19 | - Support for multi-document YAML with OR semantics (matches if ANY document matches)
20 | - **Single-word file type aliases**: Use `file`, `dir`, `symlink`, etc. as shorthand for `type == file`, `type == dir`, etc. Enables natural queries like `dir && depth > 0` or `file && size > 1mb`. All file type values work as aliases (case-insensitive).
21 | - MCP (Model Context Protocol) server support for AI assistant integration
22 | - Better error messages with source location tracking and helpful suggestions
23 | - Unquoted regex pattern support - `content ~= [0-9]+` works without quotes
24 | - Parse-time validation for `type` selector values (breaking change - see below)
25 | - Relative path display in search results
26 | - Dual MIT/Apache-2.0 licensing
27 | - Greater than or equal (`>=`) and less than or equal (`<=`) operators for temporal selectors
28 | 
29 | ### Changed
30 | 
31 | - Two-phase parser architecture (raw parsing → type checking) for better errors
32 | - Relative time formats now support full aliases (`-7days`, `-2hours`, etc.)
33 | 
34 | ### Breaking Changes
35 | 
36 | - `type` selector now validates file type values at parse time. Invalid types like `type == dirq` produce parse errors instead of matching nothing. Valid types: `file`, `dir`/`directory`, `symlink`/`link`, `socket`/`sock`, `fifo`/`pipe`, `block`/`blockdev`, `char`/`chardev` (case-insensitive)
37 | 


--------------------------------------------------------------------------------
/src/predicate/enum_matcher.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashSet;
 2 | use std::fmt::{self, Debug, Display};
 3 | use std::hash::Hash;
 4 | 
 5 | /// Generic matcher for enum-valued predicates with parse-time validation
 6 | #[derive(Clone, Debug, PartialEq, Eq)]
 7 | pub enum EnumMatcher<E: EnumPredicate> {
 8 |     Equals(E),
 9 |     NotEquals(E),
10 |     In(HashSet<E>),
11 | }
12 | 
13 | /// Trait for enums usable as predicate values
14 | ///
15 | /// Implementors provide parsing from string aliases, validation,
16 | /// and display logic for enum-based selectors like `type`.
17 | pub trait EnumPredicate: Sized + Eq + Hash + Clone + Debug {
18 |     /// Parse from string, checking all aliases.
19 |     ///
20 |     /// Returns error message on failure (not a structured error type,
21 |     /// since it gets wrapped in `DetectError::InvalidValue` immediately).
22 |     fn from_str(s: &str) -> Result<Self, String>;
23 | 
24 |     /// All valid string representations (for error messages)
25 |     fn all_valid_strings() -> &'static [&'static str];
26 | 
27 |     /// Canonical string representation for this variant
28 |     fn as_str(&self) -> &'static str;
29 | 
30 |     /// All aliases that map to this variant
31 |     fn aliases(&self) -> &'static [&'static str];
32 | }
33 | 
34 | impl<E: EnumPredicate> EnumMatcher<E> {
35 |     /// Check if a value matches this enum matcher
36 |     pub fn is_match(&self, value: &E) -> bool {
37 |         match self {
38 |             EnumMatcher::Equals(v) => value == v,
39 |             EnumMatcher::NotEquals(v) => value != v,
40 |             EnumMatcher::In(set) => set.contains(value),
41 |         }
42 |     }
43 | }
44 | 
45 | impl<E: EnumPredicate> Display for EnumMatcher<E> {
46 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 |         match self {
48 |             EnumMatcher::Equals(v) => write!(f, "== {}", v.as_str()),
49 |             EnumMatcher::NotEquals(v) => write!(f, "!= {}", v.as_str()),
50 |             EnumMatcher::In(set) => {
51 |                 write!(f, "in [")?;
52 |                 let mut items: Vec<_> = set.iter().map(EnumPredicate::as_str).collect();
53 |                 items.sort_unstable(); // Deterministic display order
54 |                 write!(f, "{}", items.join(", "))?;
55 |                 write!(f, "]")
56 |             }
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/eval.rs:
--------------------------------------------------------------------------------
 1 | use crate::expr::short_circuit::ShortCircuit;
 2 | use crate::expr::Expr;
 3 | use crate::predicate::{Predicate, StreamingCompiledContentPredicateRef};
 4 | use crate::util::Done;
 5 | use futures::{Stream, StreamExt};
 6 | use regex_automata::dfa::Automaton;
 7 | use tokio::io::{self};
 8 | 
 9 | pub mod fs;
10 | pub mod structured;
11 | 
12 | pub async fn run_contents_predicate_stream(
13 |     e: Expr<Predicate<Done, Done, StreamingCompiledContentPredicateRef<'_>>>,
14 |     mut s: impl Stream<Item = io::Result<Vec<u8>>> + std::marker::Unpin,
15 | ) -> io::Result<Expr<Predicate<Done, Done, Done>>> {
16 |     let config = regex_automata::util::start::Config::new();
17 | 
18 |     // Initialize state for DFA patterns
19 |     let mut e: Expr<Predicate<Done, Done, _>> = e.map_predicate(|p| match p {
20 |         Predicate::Content(pred) => {
21 |             let dfa = pred.inner;
22 |             let s = dfa
23 |                 .start_state(&config)
24 |                 .expect("DFA start_state failed: invalid regex configuration");
25 |             Predicate::Content((dfa, s))
26 |         }
27 |         _ => unreachable!(),
28 |     });
29 | 
30 |     while let Some(next) = s.next().await {
31 |         // read the next buffered chunk of bytes
32 |         let bytes = next?;
33 | 
34 |         // advance each pattern appropriately
35 |         e = e.reduce_predicate_and_short_circuit(move |p| match p {
36 |             Predicate::Content((dfa, state)) => {
37 |                 // DFA streaming processing
38 |                 let mut next_state = state;
39 |                 let mut iter = bytes.iter();
40 | 
41 |                 loop {
42 |                     if let Some(byte) = iter.next() {
43 |                         next_state = dfa.next_state(next_state, *byte);
44 | 
45 |                         if dfa.is_match_state(next_state) {
46 |                             break ShortCircuit::Known(true);
47 |                         }
48 | 
49 |                         if dfa.is_dead_state(next_state) {
50 |                             break ShortCircuit::Known(false);
51 |                         }
52 |                     } else {
53 |                         break ShortCircuit::Unknown(Predicate::Content((dfa, next_state)));
54 |                     }
55 |                 }
56 |             }
57 |             _ => unreachable!(),
58 |         });
59 |     }
60 | 
61 |     // Final evaluation
62 |     let e = e.reduce_predicate_and_short_circuit(|p| match p {
63 |         Predicate::Content((dfa, state)) => {
64 |             let next_state = dfa.next_eoi_state(state);
65 |             let matched = dfa.is_match_state(next_state);
66 |             ShortCircuit::Known(matched)
67 |         }
68 |         _ => unreachable!(),
69 |     });
70 | 
71 |     Ok(e)
72 | }
73 | 


--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
 1 | # detect Examples
 2 | 
 3 | **Quick tips:**
 4 | - Start with cheap filters (`ext`, `size`, `type`) before expensive ones (`content`, structured)
 5 | - Quote expressions with spaces/special chars: `'ext == rs AND content ~= "async "'`
 6 | - Use `-i` to include gitignored files
 7 | 
 8 | ## Progressive Examples
 9 | 
10 | Each line adds complexity - shows how to combine features:
11 | 
12 | ```bash
13 | # Start simple
14 | detect 'ext == rs'                                           # selector + operator
15 | 
16 | # Combine with AND
17 | detect 'ext in [rs,toml] AND size > 1mb'                    # set membership, numeric
18 | 
19 | # Add temporal predicates
20 | detect 'ext == rs AND size > 1mb AND modified > -7d'        # relative time
21 | 
22 | # Content matching with regex
23 | detect 'ext == ts AND content ~= "class.*Service"'          # regex operator
24 | 
25 | # Boolean logic: grouping, NOT
26 | detect '(ext == rs OR ext == toml) AND NOT path ~= test'    # precedence, path filter
27 | 
28 | # Structured data + file metadata
29 | detect 'yaml:.server.port > 8000 AND size < 100kb'          # structured selector
30 | ```
31 | 
32 | ## Structured Data Patterns
33 | 
34 | Navigate YAML/JSON/TOML with path syntax:
35 | 
36 | ```bash
37 | # Nested field access: .field.field
38 | yaml:.server.port == 8080
39 | 
40 | # Array indexing + field access: [0].field
41 | json:.items[0].name == "first"
42 | 
43 | # Wildcard - matches if ANY element matches: [*]
44 | yaml:.features[*].enabled == true
45 | 
46 | # Recursive descent - finds field at any depth: ..field
47 | toml:..database contains prod
48 | 
49 | # Combine with file predicates
50 | yaml:.replicas > 3 AND size < 100kb AND NOT path ~= test
51 | 
52 | # Multi-format queries with OR
53 | json:.version ~= "^1\\." OR toml:.package.version ~= "^1\\."
54 | ```
55 | 
56 | ## Common Patterns
57 | 
58 | Real-world multi-feature queries:
59 | 
60 | ```bash
61 | # Large recent files with TODOs, excluding tests
62 | detect 'size > 10kb AND modified > -7d AND content contains TODO AND NOT path ~= test'
63 | 
64 | # Security: env files with secrets outside node_modules
65 | detect 'name ~= "^\.env" AND NOT path ~= node_modules AND content ~= "(password|secret|key)"'
66 | 
67 | # Recent config changes
68 | detect 'ext in [json,yaml,toml] AND modified > -3d'
69 | 
70 | # Kubernetes manifests with high replicas
71 | detect 'yaml:.kind == Deployment AND yaml:.spec.replicas > 3'
72 | 
73 | # Find TypeScript async functions in source directories
74 | detect 'path ~= "^\./(src|lib)/" AND ext == ts AND content ~= "async\s+function"'
75 | ```
76 | 
77 | ## Migration from find/grep
78 | 
79 | ```bash
80 | # find . -name "*.js" -size +1M
81 | detect 'ext == js AND size > 1mb'
82 | 
83 | # find . -type f -exec grep -l "TODO" {} \;
84 | detect 'type == file AND content contains TODO'
85 | 
86 | # grep -r "class.*Service" --include="*.ts" .
87 | detect 'ext == ts AND content ~= "class.*Service"'
88 | ```
89 | 


--------------------------------------------------------------------------------
/docs/operators.md:
--------------------------------------------------------------------------------
 1 | # detect Operators Reference
 2 | 
 3 | All operators organized by selector type.
 4 | 
 5 | ## String Operators
 6 | 
 7 | For: `name`, `ext`, `path`, `dir`, `content`
 8 | 
 9 | | Operator    | Description | Example |
10 | |-------------|-------------|---------|
11 | | `==`        | Exact match (case-sensitive) | `name == "README.md"` |
12 | | `!=`        | Not equal | `ext != md` |
13 | | `contains`  | Substring search (literal) | `content contains TODO` |
14 | | `~=`        | Regex pattern matching | `name ~= "test.*\.rs$"` |
15 | | `in [a,b,c]` | Match any item in set | `ext in [js,ts,jsx,tsx]` |
16 | 
17 | Regex uses Rust regex syntax. Set membership allows optional spaces: `ext in [rs, toml]`.
18 | 
19 | ## Numeric Operators
20 | 
21 | For: `size`, `depth`
22 | 
23 | | Operator | Description | Example |
24 | |----------|-------------|---------|
25 | | `==`     | Exact value | `size == 1024` |
26 | | `!=`     | Not equal | `depth != 0` |
27 | | `>`      | Greater than | `size > 1mb` |
28 | | `<`      | Less than | `depth < 5` |
29 | | `>=`     | Greater or equal | `size >= 100kb` |
30 | | `<=`     | Less or equal | `depth <= 2` |
31 | 
32 | Size units: `kb`, `mb`, `gb`, `tb` (lowercase only, e.g. `1kb`, `2.5mb`)
33 | 
34 | ## Temporal Operators
35 | 
36 | For: `modified`, `created`, `accessed`
37 | 
38 | | Operator | Description | Example |
39 | |----------|-------------|---------|
40 | | `>`      | After (newer than) | `modified > -7d` |
41 | | `<`      | Before (older than) | `created < 2024-01-01` |
42 | | `>=`     | At or after | `modified >= -1w` |
43 | | `<=`     | At or before | `accessed <= -1d` |
44 | | `==`     | Exact time | `modified == 2024-01-15` |
45 | | `!=`     | Not at time | `created != 2024-01-01` |
46 | 
47 | **Formats:** Relative `-7d`, `-2h`, `-30m`, `-1w` (units: `s`, `m`/`min`, `h`/`hr`, `d`/`day`, `w`/`week`, with plurals). Absolute `2024-01-15`, `2024-01-15T10:30:00`.
48 | 
49 | ## Enum Operators
50 | 
51 | For: `type`
52 | 
53 | | Operator | Description | Example |
54 | |----------|-------------|---------|
55 | | `==`     | Exact match (validated at parse-time) | `type == file` |
56 | | `!=`     | Not equal | `type != dir` |
57 | | `in [a,b,c]` | Match any type in set | `type in [file,dir,symlink]` |
58 | 
59 | **Valid types (case-insensitive):** `file`, `dir`/`directory`, `symlink`/`link`, `socket`/`sock`, `fifo`/`pipe`, `block`/`blockdev`, `char`/`chardev`. Invalid values caught at parse-time with suggestions.
60 | 
61 | ## Boolean Operators
62 | 
63 | | Operator | Description | Example |
64 | |----------|-------------|---------|
65 | | `AND` / `&&` | Both conditions true | `ext == rs AND size > 1kb` |
66 | | `OR` / `\|\|` | Either condition true | `file OR dir` |
67 | | `NOT` / `!` | Negate condition | `NOT symlink` |
68 | | `( )` | Group expressions | `(file OR dir) AND size > 1kb` |
69 | 
70 | **Precedence:** `NOT` > `AND` > `OR`. Use parentheses for clarity: `(a OR b) AND c`.
71 | 
72 | ## Common Mistakes
73 | 
74 | **Units:** Lowercase only - `1mb` not `1MB`
75 | **Regex quotes:** Quote patterns with spaces - `content ~= "class.*"` not `content ~= class.*`
76 | **Wildcards:** Use `ext == rs` not `*.rs` (or `name ~= ".*\.rs$"` for regex)
77 | 


--------------------------------------------------------------------------------
/docs/predicates.md:
--------------------------------------------------------------------------------
 1 | # detect Predicates Reference
 2 | 
 3 | All selectors and their types. Aliases shown as `primary` / `alias`.
 4 | 
 5 | ## File Identity
 6 | 
 7 | | Selector | Type   | Description | Example |
 8 | |----------|--------|-------------|---------|
 9 | | `name` / `filename` | String | Full filename with extension | `name == "README.md"` |
10 | | `basename` / `stem` | String | Filename without extension | `basename == README` |
11 | | `ext` / `extension` | String | File extension without dot | `ext == rs` |
12 | | `path` | String | Full absolute path | `path ~= "/src/"` |
13 | | `dir` / `parent` / `directory` | String | Parent directory path | `dir == "/usr/bin"` |
14 | 
15 | ## File Properties
16 | 
17 | | Selector | Type    | Description | Example |
18 | |----------|---------|-------------|---------|
19 | | `size` / `filesize` / `bytes` | Numeric | File size in bytes | `size > 1mb` |
20 | | `type` / `filetype` | Enum    | File type (validated at parse-time) | `type == file` |
21 | | `depth` | Numeric | Directory depth from root | `depth <= 3` |
22 | 
23 | **Size units:** `kb`, `mb`, `gb`, `tb` (e.g. `45kb`, `1.5mb`)
24 | 
25 | **Valid types (case-insensitive):** `file`, `dir`/`directory`, `symlink`/`link`, `socket`/`sock`, `fifo`/`pipe`, `block`/`blockdev`, `char`/`chardev`
26 | 
27 | ## Timestamps
28 | 
29 | | Selector | Type | Description | Example |
30 | |----------|------|-------------|---------|
31 | | `modified` / `mtime` | Temporal | Last modification time | `modified > -7d` |
32 | | `created` / `ctime` | Temporal | File creation time | `created > 2024-01-01` |
33 | | `accessed` / `atime` | Temporal | Last access time | `accessed < -1h` |
34 | 
35 | **Formats:** Relative `-7d`/`-7days`, `-2h`/`-2hours` (units: `s`, `m`/`min`, `h`/`hr`, `d`/`day`, `w`/`week` + plurals). Absolute `2024-01-15`, `2024-01-15T10:30:00`.
36 | 
37 | ## Content
38 | 
39 | | Selector | Type | Description | Example |
40 | |----------|------|-------------|---------|
41 | | `content` / `text` / `contents` | String | File text contents | `content contains TODO` |
42 | 
43 | ## Structured Data
44 | 
45 | Query YAML, JSON, TOML by navigating structure:
46 | 
47 | | Selector | Description | Example |
48 | |----------|-------------|---------|
49 | | `yaml:.path` | YAML navigation | `yaml:.server.port == 8080` |
50 | | `json:.path` | JSON navigation | `json:.items[0].name == "test"` |
51 | | `toml:.path` | TOML navigation | `toml:.package.edition == "2021"` |
52 | 
53 | **Navigation syntax:**
54 | 
55 | | Pattern | Meaning | Example |
56 | |---------|---------|---------|
57 | | `.field` | Object field access | `yaml:.server` |
58 | | `.nested.field` | Nested fields | `json:.meta.author` |
59 | | `[0]` | Array index | `yaml:.items[0]` |
60 | | `[*]` | Wildcard - any element | `yaml:.features[*].enabled` |
61 | | `..field` | Recursive - any depth | `toml:..password` |
62 | 
63 | **Operators:** `==`, `!=`, `>`, `<`, `>=`, `<=`, `contains`, `~=` (same as other selectors)
64 | 
65 | **Type coercion:** Numbers/booleans convert to strings - `yaml:.port == 8080` matches both `8080` and `"8080"`
66 | 
67 | **Existence check:** Use selector alone without operator - `yaml:.server` checks if field exists
68 | 
69 | **Limitations:**
70 | - Files > 10MB skipped (configurable: `--max-structured-size`)
71 | - Non-UTF8 files skip structured evaluation
72 | - Invalid YAML/JSON/TOML returns false (no error)
73 | - Multi-document YAML: matches if ANY document matches
74 | 


--------------------------------------------------------------------------------
/src/parser/grammar.pest:
--------------------------------------------------------------------------------
 1 | WHITESPACE = _{ " " | "\t" | NEWLINE }
 2 | 
 3 | program = { SOI ~ expr ~ EOI }
 4 | expr = { prefix* ~ primary ~ (infix ~ prefix* ~ primary )* }
 5 |   infix = _{ and | or }
 6 |     and = { "&&" | ^"and" }
 7 |     or = { "||" | ^"or" }
 8 |   prefix = _{ neg }
 9 |     neg = { "!" | "\\!" | ^"not" }
10 |   primary = _{ predicate | single_word | "(" ~ expr ~ ")" }
11 | 
12 | predicate = { selector ~ operator ~ value }
13 |   selector = @{ (ASCII_ALPHANUMERIC | "." | "_" | "-" | "/" | "[" | "]" | "*" | ":")+ }
14 |   // Parse operators flexibly - validate in typechecker
15 |   // Start with symbols or letters, but not mix arbitrarily
16 |   operator = @{
17 |     // Symbol-based operators (can combine symbols)
18 |     ("=" | "!" | ">" | "<" | "~")+ |
19 |     // Word-based operators (alphanumeric with underscores)
20 |     // But NOT the reserved infix/prefix operators
21 |     !(^"and" | ^"or" | ^"not") ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")*
22 |   }
23 |   
24 |   value = { value_content ~ trailing_quote? }
25 | 
26 |   value_content = _{ quoted_string | unterminated_string | raw_token }
27 | 
28 |   // Error detection: unterminated string literals
29 |   // Matches opening quote without closing quote before EOI/newline
30 |   unterminated_string = @{
31 |     ("\"" ~ (!"\"" ~ (escaped | ANY))* ~ (EOI | &NEWLINE)) |
32 |     ("'" ~ (!"'" ~ (escaped | ANY))* ~ (EOI | &NEWLINE))
33 |   }
34 | 
35 |   // Error detection: trailing quote after value content
36 |   trailing_quote = @{ "\"" | "'" }
37 | 
38 |   // Raw tokens with recursive balanced delimiter matching
39 |   // Supports nested structures like ((pub|async)\s+)* or [[a-z]]
40 |   raw_token = @{ raw_char+ }
41 | 
42 |   raw_char = _{
43 |     "\\" ~ ANY |                                              // Escaped character
44 |     balanced_paren |                                          // Recursive paren matching
45 |     balanced_bracket |                                        // Recursive bracket matching
46 |     balanced_curly |                                          // Recursive curly matching
47 |     !WHITESPACE ~ !"&&" ~ !"||" ~ !")" ~ !"\"" ~ !"'" ~ ANY  // Regular character
48 |   }
49 | 
50 |   balanced_paren   = { "(" ~ ( "\\" ~ ANY | balanced_paren | balanced_bracket | balanced_curly | !")" ~ ANY )* ~ ")" }
51 |   balanced_bracket = { "[" ~ ( "\\" ~ ANY | balanced_paren | balanced_bracket | balanced_curly | !"]" ~ ANY )* ~ "]" }
52 |   balanced_curly   = { "{" ~ ( "\\" ~ ANY | balanced_paren | balanced_bracket | balanced_curly | !"}" ~ ANY )* ~ "}" }
53 |   
54 |   quoted_string = ${ "\"" ~ inner_double ~ "\"" | "'" ~ inner_single ~ "'" }
55 |   inner_double = @{ (!"\"" ~ (escaped | ANY))* }
56 |   inner_single = @{ (!"'" ~ (escaped | ANY))* }
57 |   
58 |   escaped = { "\\" ~ ("\"" | "'" | "\\" | "n" | "t" | "r") }
59 | 
60 | single_word = @{
61 |     // Structured data selectors: word:.path (validated at typecheck)
62 |     (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* ~ ":" ~ (ASCII_ALPHANUMERIC | "." | "_" | "-" | "/" | "[" | "]" | "*")+ |
63 |     // Regular word aliases
64 |     (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")*
65 | }
66 | 
67 | // Separate entry point for parsing set contents
68 | // Used by typechecker when operator is 'in'
69 | set_contents = { SOI ~ set_items? ~ EOI }
70 | set_items = { set_item? ~ ("," ~ set_item?)* }
71 | set_item = { quoted_string | bare_set_item }
72 | // Bare items in sets: stop at comma, whitespace, or quotes
73 | bare_set_item = @{ (!("," | "\"" | "'" | WHITESPACE) ~ ANY)+ }


--------------------------------------------------------------------------------
/src/expr/frame.rs:
--------------------------------------------------------------------------------
  1 | use super::Expr;
  2 | use futures::FutureExt;
  3 | use recursion::{
  4 |     experimental::frame::AsyncMappableFrame, Collapsible, MappableFrame, PartiallyApplied,
  5 | };
  6 | use std::fmt::Display;
  7 | use tokio::try_join;
  8 | 
  9 | /// short-lived single layer of a filesystem entity matcher expression, used for
 10 | /// expressing recursive algorithms over a single layer of a borrowed Expr
 11 | pub enum ExprFrame<X, P> {
 12 |     // borrowed predicate
 13 |     Predicate(P),
 14 |     // boolean operators
 15 |     Not(X),
 16 |     And(X, X),
 17 |     Or(X, X),
 18 |     // literal values
 19 |     Literal(bool),
 20 | }
 21 | 
 22 | impl<P> MappableFrame for ExprFrame<PartiallyApplied, P> {
 23 |     type Frame<X> = ExprFrame<X, P>;
 24 | 
 25 |     fn map_frame<A, B>(input: Self::Frame<A>, mut f: impl FnMut(A) -> B) -> Self::Frame<B> {
 26 |         use ExprFrame::{And, Literal, Not, Or, Predicate};
 27 |         match input {
 28 |             Not(a) => Not(f(a)),
 29 |             And(a, b) => And(f(a), f(b)),
 30 |             Or(a, b) => Or(f(a), f(b)),
 31 |             Predicate(p) => Predicate(p),
 32 |             Literal(bool) => Literal(bool),
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | async fn map_frame_async<'a, A, B, E, P>(
 38 |     input: ExprFrame<A, P>,
 39 |     f: impl Fn(A) -> futures::future::BoxFuture<'a, Result<B, E>> + Send + Sync + 'a,
 40 | ) -> Result<ExprFrame<B, P>, E>
 41 | where
 42 |     E: Send + 'a,
 43 |     A: Send + 'a,
 44 |     B: Send + 'a,
 45 | {
 46 |     use ExprFrame::{And, Literal, Not, Or, Predicate};
 47 |     match input {
 48 |         Not(a) => Ok(Not(f(a).await?)),
 49 |         And(a, b) => {
 50 |             let (a, b) = try_join!(f(a), f(b))?;
 51 |             Ok(And(a, b))
 52 |         }
 53 |         Or(a, b) => {
 54 |             let (a, b) = try_join!(f(a), f(b))?;
 55 |             Ok(Or(a, b))
 56 |         }
 57 |         Predicate(p) => Ok(Predicate(p)),
 58 |         Literal(bool) => Ok(Literal(bool)),
 59 |     }
 60 | }
 61 | 
 62 | impl<P: Send + Sync + 'static> AsyncMappableFrame for ExprFrame<PartiallyApplied, P> {
 63 |     fn map_frame_async<'a, A, B, E>(
 64 |         input: Self::Frame<A>,
 65 |         f: impl Fn(A) -> futures::future::BoxFuture<'a, Result<B, E>> + Send + Sync + 'a,
 66 |     ) -> futures::future::BoxFuture<'a, Result<Self::Frame<B>, E>>
 67 |     where
 68 |         E: Send + 'a,
 69 |         A: Send + 'a,
 70 |         B: Send + 'a,
 71 |     {
 72 |         map_frame_async(input, f).boxed()
 73 |     }
 74 | }
 75 | 
 76 | pub struct MapPredicateRef<'a, P>(pub &'a Expr<P>);
 77 | 
 78 | impl<P: Clone> Collapsible for &Expr<P> {
 79 |     type FrameToken = ExprFrame<PartiallyApplied, P>;
 80 | 
 81 |     fn into_frame(self) -> ExprFrame<Self, P> {
 82 |         match self {
 83 |             Expr::Not(x) => ExprFrame::Not(x),
 84 |             Expr::And(a, b) => ExprFrame::And(a, b),
 85 |             Expr::Or(a, b) => ExprFrame::Or(a, b),
 86 |             Expr::Predicate(p) => ExprFrame::Predicate(p.clone()),
 87 |             Expr::Literal(b) => ExprFrame::Literal(*b),
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | impl<'a, P> Collapsible for MapPredicateRef<'a, P> {
 93 |     type FrameToken = ExprFrame<PartiallyApplied, &'a P>;
 94 | 
 95 |     fn into_frame(self) -> ExprFrame<Self, &'a P> {
 96 |         match self.0 {
 97 |             Expr::Not(x) => ExprFrame::Not(MapPredicateRef(x)),
 98 |             Expr::And(a, b) => ExprFrame::And(MapPredicateRef(a), MapPredicateRef(b)),
 99 |             Expr::Or(a, b) => ExprFrame::Or(MapPredicateRef(a), MapPredicateRef(b)),
100 |             Expr::Predicate(p) => ExprFrame::Predicate(p),
101 |             Expr::Literal(b) => ExprFrame::Literal(*b),
102 |         }
103 |     }
104 | }
105 | 
106 | impl<P: Display> Display for ExprFrame<(), P> {
107 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 |         match self {
109 |             Self::Not(()) => write!(f, "NOT"),
110 |             Self::And((), ()) => {
111 |                 write!(f, "AND")
112 |             }
113 |             Self::Or((), ()) => {
114 |                 write!(f, "OR")
115 |             }
116 |             Self::Predicate(arg0) => write!(f, "{arg0}"),
117 |             Self::Literal(arg0) => write!(f, "{arg0}"),
118 |         }
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![doc = include_str!("../README.md")]
  2 | #![warn(clippy::all)]
  3 | #![warn(clippy::cargo)]
  4 | 
  5 | pub mod eval;
  6 | pub mod expr;
  7 | pub mod parser;
  8 | pub mod predicate;
  9 | mod predicate_error;
 10 | pub mod util;
 11 | 
 12 | use std::{path::Path, sync::Arc, time::Instant};
 13 | 
 14 | use ignore::WalkBuilder;
 15 | use parser::{error::DetectError, RawParser, Typechecker};
 16 | use predicate::Predicate;
 17 | use slog::{debug, info, warn, Logger};
 18 | 
 19 | /// Runtime configuration for detect operations
 20 | #[derive(Debug, Clone)]
 21 | pub struct RuntimeConfig {
 22 |     /// Maximum file size (in bytes) for structured data parsing (YAML/JSON/TOML)
 23 |     /// Files larger than this will skip structured data evaluation
 24 |     pub max_structured_size: u64,
 25 | }
 26 | 
 27 | impl Default for RuntimeConfig {
 28 |     fn default() -> Self {
 29 |         Self {
 30 |             max_structured_size: 10 * 1024 * 1024, // 10MB default
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | pub async fn parse_and_run_fs<F: FnMut(&Path)>(
 36 |     logger: Logger,
 37 |     root: &Path,
 38 |     respect_gitignore: bool,
 39 |     expr: String,
 40 |     config: RuntimeConfig,
 41 |     mut on_match: F,
 42 | ) -> Result<usize, DetectError> {
 43 |     let original_query = expr.clone();
 44 |     let parse_result = RawParser::parse_raw_expr(&expr)
 45 |         .and_then(|raw_expr| Typechecker::typecheck(raw_expr, &expr, &config));
 46 | 
 47 |     match parse_result {
 48 |         Ok(parsed_expr) => {
 49 |             if !root.exists() {
 50 |                 return Err(DetectError::DirectoryNotFound {
 51 |                     path: root.display().to_string(),
 52 |                 });
 53 |             }
 54 |             if !root.is_dir() {
 55 |                 return Err(DetectError::NotADirectory {
 56 |                     path: root.display().to_string(),
 57 |                 });
 58 |             }
 59 | 
 60 |             let walker = WalkBuilder::new(root)
 61 |                 .hidden(false)
 62 |                 .git_ignore(respect_gitignore)
 63 |                 .filter_entry(|entry| {
 64 |                     // Always exclude VCS directories, regardless of gitignore settings
 65 |                     // This matches ripgrep's behavior
 66 |                     !entry
 67 |                         .file_name()
 68 |                         .to_str()
 69 |                         .is_some_and(|s| s == ".git" || s == ".hg" || s == ".svn")
 70 |                 })
 71 |                 .build();
 72 | 
 73 |             let expr = parsed_expr.map_predicate_ref(|p| match p {
 74 |                 Predicate::Name(n) => Predicate::Name(Arc::clone(n)),
 75 |                 Predicate::Metadata(m) => Predicate::Metadata(Arc::clone(m)),
 76 |                 Predicate::Content(c) => Predicate::Content(c.as_ref()),
 77 |                 Predicate::Structured(s) => Predicate::Structured(s.clone()),
 78 |             });
 79 | 
 80 |             info!(logger, "parsed expression"; "expr" => %expr);
 81 | 
 82 |             let mut match_count = 0;
 83 |             for entry in walker {
 84 |                 let entry = match entry {
 85 |                     Ok(e) => e,
 86 |                     Err(e) => {
 87 |                         // Skip entries we can't access (permission denied, etc.)
 88 |                         warn!(logger, "skipping entry due to walker error"; "error" => %e);
 89 |                         continue;
 90 |                     }
 91 |                 };
 92 |                 let path = entry.path();
 93 | 
 94 |                 if path == root {
 95 |                     continue;
 96 |                 }
 97 | 
 98 |                 let start = Instant::now();
 99 | 
100 |                 let is_match = match eval::fs::eval(&logger, &expr, path, Some(root)).await {
101 |                     Ok(result) => result,
102 |                     Err(e) => {
103 |                         // Handle I/O errors gracefully - skip files we can't access
104 |                         if e.kind() == std::io::ErrorKind::PermissionDenied {
105 |                             debug!(logger, "skipping file due to permission denied"; "path" => #?path);
106 |                             continue;
107 |                         }
108 |                         // For other I/O errors, also skip but log at warning level
109 |                         warn!(logger, "skipping file due to I/O error"; "path" => #?path, "error" => %e);
110 |                         continue;
111 |                     }
112 |                 };
113 | 
114 |                 let duration = start.elapsed();
115 | 
116 |                 debug!(logger, "visited entity"; "path" => #?path, "duration" => #?duration, "result" => is_match);
117 | 
118 |                 if is_match {
119 |                     match_count += 1;
120 |                     on_match(path);
121 |                 }
122 |             }
123 | 
124 |             if match_count == 0 {
125 |                 eprintln!("No files matched the query: {original_query}");
126 |                 eprintln!("Searched in: {}", root.display());
127 |                 if respect_gitignore {
128 |                     eprintln!("Hint: Use -i flag to include gitignored files, or try broadening your search criteria");
129 |                 } else {
130 |                     eprintln!("Hint: Try broadening your search criteria or check the path/expression syntax");
131 |                 }
132 |             }
133 | 
134 |             Ok(match_count)
135 |         }
136 |         Err(err) => Err(err),
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/src/parser/aliases.rs:
--------------------------------------------------------------------------------
  1 | //! Single-word predicate aliases
  2 | //!
  3 | //! Provides shorthand syntax like `dir` instead of `type == dir`,
  4 | //! enabling more natural queries: `dir && depth > 0`
  5 | //!
  6 | //! Also handles structured data selectors like `yaml:.field` as existence predicates.
  7 | 
  8 | use std::sync::Arc;
  9 | 
 10 | use super::typed::{parse_structured_selector, AliasError, DataFormat};
 11 | use crate::predicate::{
 12 |     DetectFileType, EnumMatcher, EnumPredicate, MetadataPredicate, Predicate,
 13 |     StructuredDataPredicate,
 14 | };
 15 | 
 16 | /// Resolve a single-word alias to a predicate
 17 | ///
 18 | /// Supports:
 19 | /// - File type aliases: `file`, `dir`, `symlink`, etc.
 20 | /// - Structured data selectors: `yaml:.field`, `json:.path`, `toml:.key` (existence check)
 21 | ///
 22 | /// Example: `resolve_alias("dir")` is equivalent to `type == dir`
 23 | /// Example: `resolve_alias("yaml:.spec")` checks if `.spec` exists in YAML file
 24 | pub fn resolve_alias(word: &str) -> Result<Predicate, AliasError> {
 25 |     // Check if it's a structured selector
 26 |     match parse_structured_selector(word) {
 27 |         Ok(Some((format, components))) => {
 28 |             // Create existence predicate based on format
 29 |             let predicate = match format {
 30 |                 DataFormat::Yaml => StructuredDataPredicate::YamlExists { path: components },
 31 |                 DataFormat::Json => StructuredDataPredicate::JsonExists { path: components },
 32 |                 DataFormat::Toml => StructuredDataPredicate::TomlExists { path: components },
 33 |             };
 34 |             return Ok(Predicate::Structured(predicate));
 35 |         }
 36 |         Ok(None) => {
 37 |             // Not a structured selector, try file type alias
 38 |         }
 39 |         Err(e) => {
 40 |             return Err(AliasError::Structured(e));
 41 |         }
 42 |     }
 43 | 
 44 |     // Try to resolve as file type alias
 45 |     match DetectFileType::from_str(word) {
 46 |         Ok(file_type) => Ok(Predicate::Metadata(Arc::new(MetadataPredicate::Type(
 47 |             EnumMatcher::Equals(file_type),
 48 |         )))),
 49 |         Err(_) => Err(AliasError::UnknownAlias(word.to_string())),
 50 |     }
 51 | }
 52 | 
 53 | /// Suggest similar aliases for an unknown word
 54 | ///
 55 | /// Uses simple edit distance to find close matches
 56 | pub fn suggest_aliases(word: &str) -> Vec<String> {
 57 |     let all_aliases = DetectFileType::all_valid_strings();
 58 | 
 59 |     all_aliases
 60 |         .iter()
 61 |         .filter(|&&alias| levenshtein_distance(word, alias) <= 2)
 62 |         .map(|&s| s.to_string())
 63 |         .collect()
 64 | }
 65 | 
 66 | /// Simple Levenshtein distance implementation for fuzzy matching
 67 | fn levenshtein_distance(a: &str, b: &str) -> usize {
 68 |     let a_chars: Vec<char> = a.chars().collect();
 69 |     let b_chars: Vec<char> = b.chars().collect();
 70 |     let a_len = a_chars.len();
 71 |     let b_len = b_chars.len();
 72 | 
 73 |     if a_len == 0 {
 74 |         return b_len;
 75 |     }
 76 |     if b_len == 0 {
 77 |         return a_len;
 78 |     }
 79 | 
 80 |     let mut prev_row: Vec<usize> = (0..=b_len).collect();
 81 |     let mut curr_row = vec![0; b_len + 1];
 82 | 
 83 |     for (i, a_char) in a_chars.iter().enumerate() {
 84 |         curr_row[0] = i + 1;
 85 | 
 86 |         for (j, b_char) in b_chars.iter().enumerate() {
 87 |             let cost = usize::from(a_char != b_char);
 88 |             curr_row[j + 1] = (curr_row[j] + 1) // insertion
 89 |                 .min(prev_row[j + 1] + 1) // deletion
 90 |                 .min(prev_row[j] + cost); // substitution
 91 |         }
 92 | 
 93 |         std::mem::swap(&mut prev_row, &mut curr_row);
 94 |     }
 95 | 
 96 |     prev_row[b_len]
 97 | }
 98 | 
 99 | #[cfg(test)]
100 | mod tests {
101 |     use super::*;
102 | 
103 |     #[test]
104 |     fn test_file_type_aliases() {
105 |         // All file type aliases should resolve
106 |         assert!(resolve_alias("file").is_ok());
107 |         assert!(resolve_alias("dir").is_ok());
108 |         assert!(resolve_alias("directory").is_ok());
109 |         assert!(resolve_alias("symlink").is_ok());
110 |         assert!(resolve_alias("link").is_ok());
111 |         assert!(resolve_alias("socket").is_ok());
112 |         assert!(resolve_alias("sock").is_ok());
113 |         assert!(resolve_alias("fifo").is_ok());
114 |         assert!(resolve_alias("pipe").is_ok());
115 |         assert!(resolve_alias("block").is_ok());
116 |         assert!(resolve_alias("blockdev").is_ok());
117 |         assert!(resolve_alias("char").is_ok());
118 |         assert!(resolve_alias("chardev").is_ok());
119 |     }
120 | 
121 |     #[test]
122 |     fn test_unknown_alias() {
123 |         let result = resolve_alias("unknown");
124 |         assert!(matches!(result, Err(AliasError::UnknownAlias(_))));
125 |     }
126 | 
127 |     #[test]
128 |     fn test_case_insensitive() {
129 |         // DetectFileType::from_str is case-insensitive
130 |         assert!(resolve_alias("FILE").is_ok());
131 |         assert!(resolve_alias("Dir").is_ok());
132 |         assert!(resolve_alias("SYMLINK").is_ok());
133 |     }
134 | 
135 |     #[test]
136 |     fn test_suggestions() {
137 |         // Close matches should be suggested
138 |         let suggestions = suggest_aliases("fil");
139 |         assert!(suggestions.contains(&"file".to_string()));
140 | 
141 |         let suggestions = suggest_aliases("direktory");
142 |         assert!(suggestions.contains(&"directory".to_string()));
143 |     }
144 | 
145 |     #[test]
146 |     fn test_levenshtein_distance() {
147 |         assert_eq!(levenshtein_distance("file", "file"), 0);
148 |         assert_eq!(levenshtein_distance("file", "fil"), 1);
149 |         assert_eq!(levenshtein_distance("directory", "dir"), 6);
150 |         assert_eq!(levenshtein_distance("", "test"), 4);
151 |         assert_eq!(levenshtein_distance("test", ""), 4);
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/parser/ast.rs:
--------------------------------------------------------------------------------
  1 | #[derive(Debug, Clone, PartialEq)]
  2 | pub struct RawPredicate<'a> {
  3 |     pub selector: &'a str,
  4 |     pub operator: &'a str,
  5 |     pub value: RawValue<'a>,
  6 |     pub span: pest::Span<'a>,
  7 |     // Subcomponent spans for precise error reporting
  8 |     pub selector_span: pest::Span<'a>,
  9 |     pub operator_span: pest::Span<'a>,
 10 |     pub value_span: pest::Span<'a>,
 11 | }
 12 | 
 13 | #[derive(Debug, Clone, PartialEq)]
 14 | pub enum RawValue<'a> {
 15 |     Quoted(&'a str), // Explicitly quoted by user (quotes stripped, escapes preserved)
 16 |     Raw(&'a str),    // Raw token - could be bare word, [set], (group), {curly}, etc.
 17 |                      // Typechecker interprets based on operator context
 18 | }
 19 | 
 20 | #[derive(Debug, Clone, PartialEq)]
 21 | pub enum RawExpr<'a> {
 22 |     Not(Box<RawExpr<'a>>),
 23 |     And(Box<RawExpr<'a>>, Box<RawExpr<'a>>),
 24 |     Or(Box<RawExpr<'a>>, Box<RawExpr<'a>>),
 25 |     Predicate(RawPredicate<'a>),
 26 |     SingleWord(pest::Span<'a>),
 27 | }
 28 | 
 29 | impl<'a> RawExpr<'a> {
 30 |     /// Convert to test-friendly expression without spans
 31 |     pub fn to_test_expr(&self) -> test_utils::RawTestExpr<'a> {
 32 |         match self {
 33 |             RawExpr::Not(expr) => test_utils::RawTestExpr::Not(Box::new(expr.to_test_expr())),
 34 |             RawExpr::And(left, right) => test_utils::RawTestExpr::And(
 35 |                 Box::new(left.to_test_expr()),
 36 |                 Box::new(right.to_test_expr()),
 37 |             ),
 38 |             RawExpr::Or(left, right) => test_utils::RawTestExpr::Or(
 39 |                 Box::new(left.to_test_expr()),
 40 |                 Box::new(right.to_test_expr()),
 41 |             ),
 42 |             RawExpr::Predicate(pred) => {
 43 |                 test_utils::RawTestExpr::Predicate(pred.to_test_predicate())
 44 |             }
 45 |             RawExpr::SingleWord(span) => test_utils::RawTestExpr::SingleWord(span.as_str()),
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl<'a> RawPredicate<'a> {
 51 |     /// Convert to test-friendly predicate without spans
 52 |     pub fn to_test_predicate(&self) -> test_utils::RawTestPredicate<'a> {
 53 |         test_utils::RawTestPredicate {
 54 |             selector: self.selector,
 55 |             operator: self.operator,
 56 |             value: self.value.to_test_value(),
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | impl<'a> RawValue<'a> {
 62 |     /// Get the string value (works for both Quoted and Raw)
 63 |     pub fn as_string(&self) -> &'a str {
 64 |         match self {
 65 |             RawValue::Quoted(s) | RawValue::Raw(s) => s,
 66 |         }
 67 |     }
 68 | 
 69 |     /// Check if this is a quoted value (user explicitly quoted it)
 70 |     pub fn is_quoted(&self) -> bool {
 71 |         matches!(self, RawValue::Quoted(_))
 72 |     }
 73 | 
 74 |     /// Convert to test-friendly value without spans
 75 |     pub fn to_test_value(&self) -> test_utils::RawTestValue<'a> {
 76 |         match self {
 77 |             RawValue::Quoted(s) => test_utils::RawTestValue::Quoted(s),
 78 |             RawValue::Raw(s) => test_utils::RawTestValue::Raw(s),
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | pub mod test_utils {
 84 |     #[derive(Debug, Clone, PartialEq)]
 85 |     pub struct RawTestPredicate<'a> {
 86 |         pub selector: &'a str,
 87 |         pub operator: &'a str,
 88 |         pub value: RawTestValue<'a>,
 89 |     }
 90 | 
 91 |     #[derive(Debug, Clone, PartialEq)]
 92 |     pub enum RawTestValue<'a> {
 93 |         Quoted(&'a str), // Explicitly quoted by user
 94 |         Raw(&'a str),    // Raw token (bare word, [brackets], (parens), {curlies})
 95 |     }
 96 | 
 97 |     impl std::fmt::Display for RawTestValue<'_> {
 98 |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 99 |             match self {
100 |                 RawTestValue::Quoted(s) | RawTestValue::Raw(s) => write!(f, "{s}"),
101 |             }
102 |         }
103 |     }
104 | 
105 |     #[derive(Debug, Clone, PartialEq)]
106 |     pub enum RawTestExpr<'a> {
107 |         Not(Box<RawTestExpr<'a>>),
108 |         And(Box<RawTestExpr<'a>>, Box<RawTestExpr<'a>>),
109 |         Or(Box<RawTestExpr<'a>>, Box<RawTestExpr<'a>>),
110 |         Predicate(RawTestPredicate<'a>),
111 |         SingleWord(&'a str),
112 |     }
113 | 
114 |     impl<'a> RawTestExpr<'a> {
115 |         pub fn string_predicate(selector: &'a str, operator: &'a str, value: &'a str) -> Self {
116 |             RawTestExpr::Predicate(RawTestPredicate {
117 |                 selector,
118 |                 operator,
119 |                 value: RawTestValue::Raw(value),
120 |             })
121 |         }
122 | 
123 |         pub fn quoted_predicate(selector: &'a str, operator: &'a str, value: &'a str) -> Self {
124 |             RawTestExpr::Predicate(RawTestPredicate {
125 |                 selector,
126 |                 operator,
127 |                 value: RawTestValue::Quoted(value),
128 |             })
129 |         }
130 | 
131 |         pub fn set_predicate(selector: &'a str, operator: &'a str, values: Vec<&'a str>) -> Self {
132 |             let value = format!("[{}]", values.join(","));
133 |             RawTestExpr::Predicate(RawTestPredicate {
134 |                 selector,
135 |                 operator,
136 |                 value: RawTestValue::Raw(Box::leak(value.into_boxed_str())),
137 |             })
138 |         }
139 | 
140 |         pub fn single_word(word: &'a str) -> Self {
141 |             RawTestExpr::SingleWord(word)
142 |         }
143 | 
144 |         pub fn and(left: RawTestExpr<'a>, right: RawTestExpr<'a>) -> Self {
145 |             RawTestExpr::And(Box::new(left), Box::new(right))
146 |         }
147 | 
148 |         pub fn or(left: RawTestExpr<'a>, right: RawTestExpr<'a>) -> Self {
149 |             RawTestExpr::Or(Box::new(left), Box::new(right))
150 |         }
151 | 
152 |         #[allow(clippy::should_implement_trait)]
153 |         pub fn not(expr: RawTestExpr<'a>) -> Self {
154 |             RawTestExpr::Not(Box::new(expr))
155 |         }
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/expr.rs:
--------------------------------------------------------------------------------
  1 | pub mod frame;
  2 | pub mod short_circuit;
  3 | 
  4 | use std::{fmt::Display, sync::Arc};
  5 | 
  6 | use crate::{
  7 |     expr::frame::ExprFrame,
  8 |     predicate::{self, Predicate},
  9 | };
 10 | use frame::MapPredicateRef;
 11 | use recursion::CollapsibleExt;
 12 | 
 13 | use self::short_circuit::ShortCircuit;
 14 | 
 15 | /// Filesystem entity matcher expression with boolean logic and predicates
 16 | #[derive(Debug, PartialEq, Eq)]
 17 | pub enum Expr<Predicate = predicate::Predicate> {
 18 |     Not(Box<Self>),
 19 |     And(Box<Self>, Box<Self>),
 20 |     Or(Box<Self>, Box<Self>),
 21 |     Predicate(Predicate),
 22 |     Literal(bool),
 23 | }
 24 | 
 25 | impl<P: std::fmt::Debug> Display for Expr<P> {
 26 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 27 |         match self {
 28 |             Expr::Not(e) => f.write_str(&format!("!{e}")),
 29 |             Expr::And(a, b) => f.write_str(&format!("{a} && {b}")),
 30 |             Expr::Or(a, b) => f.write_str(&format!("{a} || {b}")),
 31 |             Expr::Predicate(p) => write!(f, "{:?}", &p),
 32 |             Expr::Literal(x) => f.write_str(&x.to_string()),
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl<A, B, C> Expr<Predicate<A, B, C>> {
 38 |     pub fn name_predicate(x: A) -> Self {
 39 |         Self::Predicate(Predicate::Name(Arc::new(x)))
 40 |     }
 41 |     pub fn meta_predicate(x: B) -> Self {
 42 |         Self::Predicate(Predicate::Metadata(Arc::new(x)))
 43 |     }
 44 |     pub fn content_predicate(x: C) -> Self {
 45 |         Self::Predicate(Predicate::Content(x))
 46 |     }
 47 | }
 48 | 
 49 | impl<Name, Meta, Content, Structured> Expr<Predicate<Name, Meta, Content, Structured>> {
 50 |     /// Check if expression contains any Structured predicates
 51 |     pub fn contains_structured_predicates(&self) -> bool {
 52 |         MapPredicateRef(self).collapse_frames(|e| match e {
 53 |             ExprFrame::Predicate(Predicate::Structured(_)) => true,
 54 |             ExprFrame::And(a, b) | ExprFrame::Or(a, b) => a || b,
 55 |             ExprFrame::Not(a) => a,
 56 |             ExprFrame::Predicate(_) | ExprFrame::Literal(_) => false,
 57 |         })
 58 |     }
 59 | 
 60 |     /// Check if expression contains any Content predicates
 61 |     pub fn contains_content_predicates(&self) -> bool {
 62 |         MapPredicateRef(self).collapse_frames(|e| match e {
 63 |             ExprFrame::Predicate(Predicate::Content(_)) => true,
 64 |             ExprFrame::And(a, b) | ExprFrame::Or(a, b) => a || b,
 65 |             ExprFrame::Not(a) => a,
 66 |             ExprFrame::Predicate(_) | ExprFrame::Literal(_) => false,
 67 |         })
 68 |     }
 69 | }
 70 | 
 71 | impl<P> Expr<P> {
 72 |     pub fn map_predicate_ref<'a, B>(&'a self, f: impl Fn(&'a P) -> B) -> Expr<B> {
 73 |         MapPredicateRef(self).collapse_frames(|e| match e {
 74 |             // apply 'f' to Predicate expressions
 75 |             ExprFrame::Predicate(p) => Expr::Predicate(f(p)),
 76 |             ExprFrame::And(a, b) => Expr::and(a, b),
 77 |             ExprFrame::Or(a, b) => Expr::or(a, b),
 78 |             ExprFrame::Not(a) => Expr::negate(a),
 79 |             ExprFrame::Literal(x) => Expr::Literal(x),
 80 |         })
 81 |     }
 82 | 
 83 |     pub fn and(a: Self, b: Self) -> Self {
 84 |         Self::And(Box::new(a), Box::new(b))
 85 |     }
 86 |     pub fn or(a: Self, b: Self) -> Self {
 87 |         Self::Or(Box::new(a), Box::new(b))
 88 |     }
 89 |     pub fn negate(a: Self) -> Self {
 90 |         Self::Not(Box::new(a))
 91 |     }
 92 | }
 93 | 
 94 | impl<P: Clone> Expr<P> {
 95 |     pub fn map_predicate<B>(self, f: impl Fn(P) -> B) -> Expr<B> {
 96 |         self.collapse_frames(|e| match e {
 97 |             // apply 'f' to Predicate expressions
 98 |             ExprFrame::Predicate(p) => Expr::Predicate(f(p)),
 99 |             ExprFrame::And(a, b) => Expr::and(a, b),
100 |             ExprFrame::Or(a, b) => Expr::or(a, b),
101 |             ExprFrame::Not(a) => Expr::negate(a),
102 |             ExprFrame::Literal(x) => Expr::Literal(x),
103 |         })
104 |     }
105 | 
106 |     pub fn map_predicate_err<E, B>(self, f: impl Fn(P) -> Result<B, E>) -> Result<Expr<B>, E> {
107 |         self.collapse_frames(|e| match e {
108 |             // apply 'f' to Predicate expressions
109 |             ExprFrame::Predicate(p) => Ok(Expr::Predicate(f(p)?)),
110 |             ExprFrame::And(a, b) => Ok(Expr::and(a?, b?)),
111 |             ExprFrame::Or(a, b) => Ok(Expr::or(a?, b?)),
112 |             ExprFrame::Not(a) => Ok(Expr::negate(a?)),
113 |             ExprFrame::Literal(x) => Ok(Expr::Literal(x)),
114 |         })
115 |     }
116 | }
117 | 
118 | impl<P: Clone> Expr<P> {
119 |     pub fn reduce_predicate_and_short_circuit<B, X: Into<ShortCircuit<B>>>(
120 |         &self,
121 |         mut f: impl FnMut(P) -> X,
122 |     ) -> Expr<B> {
123 |         self.collapse_frames(|e| match e {
124 |             // apply 'f' to Predicate expressions
125 |             ExprFrame::Predicate(p) => match f(p).into() {
126 |                 ShortCircuit::Known(b) => Expr::Literal(b),
127 |                 ShortCircuit::Unknown(p) => Expr::Predicate(p),
128 |             },
129 |             // reduce And expressions
130 |             ExprFrame::And(Expr::Literal(false), _) => Expr::Literal(false),
131 |             ExprFrame::And(_, Expr::Literal(false)) => Expr::Literal(false),
132 |             ExprFrame::And(x, Expr::Literal(true)) => x,
133 |             ExprFrame::And(Expr::Literal(true), x) => x,
134 |             ExprFrame::And(a, b) => Expr::and(a, b),
135 |             // reduce Or expressions
136 |             ExprFrame::Or(Expr::Literal(true), _) => Expr::Literal(true),
137 |             ExprFrame::Or(_, Expr::Literal(true)) => Expr::Literal(true),
138 |             ExprFrame::Or(x, Expr::Literal(false)) => x,
139 |             ExprFrame::Or(Expr::Literal(false), x) => x,
140 |             ExprFrame::Or(a, b) => Expr::or(a, b),
141 |             // reduce Not expressions
142 |             ExprFrame::Not(Expr::Literal(k)) => Expr::Literal(!k),
143 |             ExprFrame::Not(x) => Expr::negate(x),
144 |             // Literal expressions are unchanged
145 |             ExprFrame::Literal(x) => Expr::Literal(x),
146 |         })
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::{env::current_dir, io::Write, path::PathBuf, str::FromStr};
  2 | 
  3 | use clap::Parser;
  4 | use detect::{parse_and_run_fs, RuntimeConfig};
  5 | use slog::{o, Drain, Level, Logger};
  6 | 
  7 | const EXAMPLES: &str = include_str!("../docs/examples.md");
  8 | const PREDICATES: &str = include_str!("../docs/predicates.md");
  9 | const OPERATORS: &str = include_str!("../docs/operators.md");
 10 | 
 11 | #[derive(Parser, Debug)]
 12 | #[command(
 13 |     name = "detect",
 14 |     author,
 15 |     version,
 16 |     about = "Find filesystem entities using expressions",
 17 |     long_about = "Find filesystem entities using expressions
 18 | 
 19 | EXIT CODES:
 20 |   0  Matches found
 21 |   1  No matches found
 22 |   2  Error (parse error, directory not found, etc.)"
 23 | )]
 24 | struct Args {
 25 |     /// Show help on specific topics: examples, predicates, operators
 26 |     ///
 27 |     /// Without argument, lists available topics
 28 |     #[arg(
 29 |         long = "explain",
 30 |         value_name = "TOPIC",
 31 |         num_args = 0..=1,
 32 |         default_missing_value = "list",
 33 |         require_equals = false,
 34 |     )]
 35 |     explain: Option<String>,
 36 | 
 37 |     /// filtering expr
 38 |     #[clap(index = 1, required_unless_present = "explain")]
 39 |     expr: Option<String>,
 40 | 
 41 |     /// target dir
 42 |     #[clap(index = 2)]
 43 |     path: Option<PathBuf>,
 44 |     /// include gitignored files
 45 |     #[arg(short = 'i')]
 46 |     visit_gitignored: bool,
 47 |     /// log level (trace/debug/info/warning/error/critical)
 48 |     #[arg(short = 'l', default_value = "warning")]
 49 |     log_level: String,
 50 |     /// Maximum file size for structured data parsing (yaml/json/toml)
 51 |     /// Supports units: kb, mb, gb (e.g., "10mb", "500kb")
 52 |     #[arg(long = "max-structured-size", default_value = "10mb")]
 53 |     max_structured_size: String,
 54 | }
 55 | 
 56 | #[tokio::main]
 57 | pub async fn main() -> Result<(), Box<dyn std::error::Error>> {
 58 |     let args = Args::parse();
 59 | 
 60 |     // Handle --explain flag
 61 |     if let Some(topic) = args.explain {
 62 |         match topic.to_lowercase().as_str() {
 63 |             "list" => {
 64 |                 println!("Available help topics:\n");
 65 |                 println!("  examples    - Practical usage examples for common tasks");
 66 |                 println!(
 67 |                     "  predicates  - Reference of all selectors (name, size, content, yaml, etc.)"
 68 |                 );
 69 |                 println!("  operators   - Reference of all operators (==, contains, ~=, etc.)");
 70 |                 println!("\nUsage: detect --explain <TOPIC>");
 71 |                 println!("   or: detect --explain (shows this list)");
 72 |             }
 73 |             "examples" => println!("{}", EXAMPLES),
 74 |             "predicates" | "selectors" => println!("{}", PREDICATES),
 75 |             "operators" | "ops" => println!("{}", OPERATORS),
 76 |             _ => {
 77 |                 eprintln!("Error: Unknown topic '{}'\n", topic);
 78 |                 eprintln!("Available topics: examples, predicates, operators");
 79 |                 eprintln!("Run 'detect --explain' to see all topics");
 80 |                 std::process::exit(2);
 81 |             }
 82 |         }
 83 |         return Ok(());
 84 |     }
 85 | 
 86 |     let expr = args
 87 |         .expr
 88 |         .expect("Expression required when --explain isn't used, should be present");
 89 | 
 90 |     let max_structured_size =
 91 |         detect::util::parse_size(&args.max_structured_size).unwrap_or_else(|e| {
 92 |             eprintln!("Error: {e}");
 93 |             std::process::exit(1);
 94 |         });
 95 | 
 96 |     let config = RuntimeConfig {
 97 |         max_structured_size,
 98 |     };
 99 | 
100 |     let log_level = Level::from_str(&args.log_level).unwrap_or_else(|_| {
101 |         eprintln!(
102 |             "Error: Invalid log level '{}'\nValid options: trace, debug, info, warning, error, critical",
103 |             args.log_level
104 |         );
105 |         std::process::exit(1);
106 |     });
107 | 
108 |     let plain = slog_term::PlainSyncDecorator::new(std::io::stdout());
109 |     let logger = Logger::root(
110 |         RuntimeLevelFilter {
111 |             drain: slog_term::FullFormat::new(plain).build(),
112 |             level: log_level,
113 |         }
114 |         .fuse(),
115 |         o!(),
116 |     );
117 | 
118 |     let root_path = match args.path {
119 |         Some(path) => path,
120 |         None => current_dir()?,
121 |     };
122 | 
123 |     // Canonicalize root path for relative path computation
124 |     let canonical_root = root_path
125 |         .canonicalize()
126 |         .unwrap_or_else(|_| root_path.clone());
127 | 
128 |     let mut output = std::io::stdout();
129 | 
130 |     let result = parse_and_run_fs(
131 |         logger,
132 |         &root_path,
133 |         !args.visit_gitignored,
134 |         expr,
135 |         config,
136 |         |s| {
137 |             let display_path = s
138 |                 .strip_prefix(&canonical_root)
139 |                 .unwrap_or(s)
140 |                 .to_string_lossy();
141 | 
142 |             if let Err(e) = writeln!(output, "./{}", display_path) {
143 |                 if e.kind() == std::io::ErrorKind::BrokenPipe {
144 |                     // Unix convention: exit 0 on SIGPIPE/BrokenPipe
145 |                     std::process::exit(0);
146 |                 } else {
147 |                     eprintln!("Output error: {}", e);
148 |                     std::process::exit(1);
149 |                 }
150 |             }
151 |         },
152 |     )
153 |     .await;
154 | 
155 |     match result {
156 |         Ok(match_count) => {
157 |             if match_count > 0 {
158 |                 std::process::exit(0); // Matches found
159 |             } else {
160 |                 std::process::exit(1); // No matches
161 |             }
162 |         }
163 |         Err(e) => {
164 |             eprintln!("{:?}", miette::Report::new(e));
165 |             std::process::exit(2); // Error
166 |         }
167 |     }
168 | }
169 | 
170 | struct RuntimeLevelFilter<D> {
171 |     drain: D,
172 |     level: Level,
173 | }
174 | 
175 | impl<D> Drain for RuntimeLevelFilter<D>
176 | where
177 |     D: Drain,
178 | {
179 |     type Ok = Option<D::Ok>;
180 |     type Err = Option<D::Err>;
181 | 
182 |     fn log(
183 |         &self,
184 |         record: &slog::Record,
185 |         values: &slog::OwnedKVList,
186 |     ) -> Result<Self::Ok, Self::Err> {
187 |         if record.level().is_at_least(self.level) {
188 |             self.drain.log(record, values).map(Some).map_err(Some)
189 |         } else {
190 |             Ok(None)
191 |         }
192 |     }
193 | }
194 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Crates.io](https://img.shields.io/crates/v/detect.svg)](https://crates.io/crates/detect)
  2 | 
  3 | # detect
  4 | 
  5 | A modern replacement for find/grep using an intuitive expression language.
  6 | 
  7 | - **Readable syntax**: `ext == ts AND size > 50kb` instead of `find . -name "*.ts" -size +50k`
  8 | - **Unified queries**: Combine filename + content + metadata instead of chaining multiple processes
  9 | - **Lazy evaluation**: Detect checks cheap predicates first (filename, metadata) and short circuits whenever possible
 10 | 
 11 | [Quick start](#quick-start) • [Installation](#installation) • [Query language](#query-language) • [Examples](#examples)
 12 | 
 13 | Traditional Unix tools require chaining multiple commands with cryptic syntax:
 14 | 
 15 | ```bash
 16 | # Find Rust files importing BOTH tokio and serde
 17 | detect 'ext == rs 
 18 |         AND content contains "use tokio" 
 19 |         AND content contains "use serde"'
 20 | 
 21 | # Traditional approach - scan all .rs files, then scan matches a second time
 22 | grep -rl 'use tokio' --include="*.rs" | xargs grep -l 'use serde'
 23 | ```
 24 | 
 25 | Detect also supports searches inspecting structured data in YAML, TOML, and JSON files:
 26 | 
 27 | ```bash
 28 | # Find Cargo.toml files with package edition 2018
 29 | detect 'name == "Cargo.toml" AND toml:.package.edition == 2018'
 30 | 
 31 | # using regexes (may result in false positives)
 32 | find . -name "Cargo.toml" -exec grep -q 'edition.*"2018"' {} \; -print
 33 | 
 34 | # using cryptaliagy's tomlq crate
 35 | find . -name "Cargo.toml" -exec sh -c '
 36 |   tq -f "$1" -r ".package.edition" 2>/dev/null | grep -q "2018"
 37 | ' _ {} \; -print
 38 | ```
 39 | 
 40 | 
 41 | ## Installation
 42 | 
 43 | ### From crates.io
 44 | 
 45 | ```bash
 46 | cargo install detect
 47 | ```
 48 | 
 49 | ### Building from source
 50 | 
 51 | **Prerequisites:** Rust toolchain (1.70+)
 52 | 
 53 | ```bash
 54 | git clone https://github.com/inanna-malick/detect.git
 55 | cd detect
 56 | cargo build --release
 57 | 
 58 | # Binary will be at ./target/release/detect
 59 | # Optionally install globally:
 60 | cargo install --path .
 61 | ```
 62 | 
 63 | ## Quick start
 64 | 
 65 | ```bash
 66 | detect 'ext == rs'                                    # selector + operator
 67 | detect 'ext in [rs,toml] AND size > 1mb'             # sets, AND, numeric
 68 | detect 'ext == ts AND modified > -7d'                 # temporal predicates
 69 | detect 'ext == ts AND content ~= "class.*Service"'   # content, regex
 70 | detect '(file OR dir) AND NOT path ~= test'          # aliases, grouping, NOT
 71 | detect 'yaml:.server.port > 8000 AND size < 0.5mb'   # structured data
 72 | ```
 73 | 
 74 | ## Query language
 75 | 
 76 | ### Selectors
 77 | 
 78 | #### File Identity
 79 | | Selector | Type | Description | Example |
 80 | |----------|------|-------------|---------|
 81 | | `name` / `filename` | String | Full filename with extension | `name == "README.md"` |
 82 | | `basename` / `stem` | String | Filename without extension | `basename == README` |
 83 | | `ext` / `extension` | String | File extension (no dot) | `ext == rs` |
 84 | | `path` | String | Full absolute path | `path contains /src/` |
 85 | | `dir` / `parent` / `directory` | String | Parent directory path | `dir contains lib` |
 86 | 
 87 | #### File Properties
 88 | | Selector | Type | Description | Example |
 89 | |----------|------|-------------|---------|
 90 | | `size` | Numeric | File size in bytes | `size > 1mb` |
 91 | | `type` | Enum | File type (parse-time validated) | `type == file` |
 92 | | `depth` | Numeric | Directory depth from search root | `depth <= 3` |
 93 | 
 94 | **Size units:** `kb`, `mb`, `gb`, `tb` (e.g., `1.5mb`, `500kb`)
 95 | 
 96 | **File types** (case-insensitive): `file`, `dir`/`directory`, `symlink`/`link`, `socket`/`sock`, `fifo`/`pipe`, `block`/`blockdev`, `char`/`chardev`
 97 | 
 98 | #### Timestamps
 99 | | Selector | Type | Description | Example |
100 | |----------|------|-------------|---------|
101 | | `modified` / `mtime` | Temporal | Last modification time | `modified > -7d` |
102 | | `created` / `ctime` | Temporal | File creation time | `created > 2024-01-01` |
103 | | `accessed` / `atime` | Temporal | Last access time | `accessed < -1h` |
104 | 
105 | **Time formats:** Relative `-7d`/`-7days`, `-2h`/`-2hours`, `-1w`/`-1week` (units: `s`, `m`/`min`, `h`/`hr`, `d`/`day`, `w`/`week` + plurals). Absolute `2024-01-15`, `2024-01-15T10:30:00`.
106 | 
107 | #### Content
108 | | Selector | Type | Description | Example |
109 | |----------|------|-------------|---------|
110 | | `content` / `text` / `contents` | String | File text contents | `content contains TODO` |
111 | 
112 | #### Structured Data
113 | 
114 | Query YAML, JSON, and TOML:
115 | 
116 | ```bash
117 | yaml:.server                        # existence check (no operator needed)
118 | yaml:.server.port == 8080           # nested field value
119 | toml:.package.edition == "2021"     # value match
120 | yaml:.features[*].enabled == true   # wildcard - any array element
121 | json:..password contains prod       # recursive - any depth
122 | ```
123 | 
124 | Navigate with `.field`, `.nested.field`, `[0]`, `[*]`, `..field`. Auto-converts between numbers and strings (`yaml:.port == 8080` matches both `8080` and `"8080"`). Default max file size: 10MB (configurable with `--max-structured-size`).
125 | 
126 | ### Operators
127 | 
128 | | Type | Operators | Example |
129 | |------|-----------|---------|
130 | | String | `==`, `!=`, `contains`, `~=`, `in [a,b]` | `content contains TODO` |
131 | | Numeric | `==`, `!=`, `>`, `<`, `>=`, `<=` | `size > 1mb` |
132 | | Temporal | `>`, `<`, `>=`, `<=`, `==`, `!=` | `modified > -7d` |
133 | | Enum | `==`, `!=`, `in [a,b]` | `type == file` |
134 | | Boolean | `AND`/`&&`, `OR`/`||`, `NOT`/`!`, `()` | `a AND (b OR c)` |
135 | 
136 | **Precedence:** `NOT` > `AND` > `OR`
137 | 
138 | Full reference: `detect --operators`
139 | 
140 | ## Examples
141 | 
142 | ```bash
143 | # File metadata combinations
144 | detect 'ext == rs AND size > 1mb AND modified > -7d'
145 | 
146 | # Content matching with regex
147 | detect 'ext == ts AND content ~= "class.*Service"'
148 | 
149 | # Structured data navigation
150 | detect 'yaml:.server.port == 8080'
151 | detect 'toml:.package.edition == "2021"'
152 | 
153 | # Multi-feature real-world queries
154 | detect 'size > 10kb AND modified > -7d AND content contains TODO AND NOT path ~= test'
155 | detect 'yaml:.spec.replicas > 3 AND size < 100kb'
156 | 
157 | # Security scanning
158 | detect 'name ~= "^\.env" AND content ~= "(password|secret|key)" AND NOT path ~= node_modules'
159 | 
160 | # Migration from find/grep
161 | find . -name "*.ts" -size +1M -mtime -7  →  detect 'ext == ts AND size > 1mb AND modified > -7d'
162 | 
163 | # CLI options
164 | detect 'ext == rs' ./src                              # search specific directory
165 | detect -i 'content contains SECRET'                   # include gitignored files
166 | detect --max-structured-size 50mb 'yaml:.config'      # configure size limit for structured files
167 | ```
168 | 
169 | **More examples:** `detect --examples`
170 | 
171 | ## Exit codes
172 | 
173 | Compatible with scripting and CI/CD pipelines (same as `grep`/`ripgrep`):
174 | 
175 | - **0** - Matches found
176 | - **1** - No matches
177 | - **2** - Error (parse error, directory not found, etc.)
178 | 
179 | ```bash
180 | # Use in conditionals
181 | if detect 'size > 100mb'; then
182 |     echo "Found large files"
183 | fi
184 | 
185 | # CI: fail build if TODOs found
186 | detect 'path contains src AND content contains TODO' && exit 1
187 | ```
188 | 
189 | ## Performance
190 | 
191 | Queries are evaluated in four phases: name → metadata → structured → content. Each phase can eliminate files before more expensive operations. Content is never read unless the file passes all earlier checks.
192 | 
193 | Respects `.gitignore` by default. Traverses directories in parallel. Structured data parsing is limited to 10MB files (configurable).
194 | 
195 | ## Contributing
196 | 
197 | Contributions welcome. File an issue before major changes.
198 | 
199 | ## License
200 | 
201 | Licensed under either of:
202 | 
203 | - Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
204 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
205 | 
206 | at your option.
207 | 


--------------------------------------------------------------------------------
/src/parser/raw.rs:
--------------------------------------------------------------------------------
  1 | use pest::{
  2 |     iterators::Pair,
  3 |     pratt_parser::{Assoc::Left, Op, PrattParser},
  4 |     Parser,
  5 | };
  6 | use pest_derive::Parser;
  7 | 
  8 | use super::{
  9 |     ast::{RawExpr, RawPredicate, RawValue},
 10 |     error::{DetectError, SpanExt},
 11 | };
 12 | 
 13 | #[derive(Parser)]
 14 | #[grammar = "parser/grammar.pest"]
 15 | pub struct RawParser;
 16 | 
 17 | impl RawParser {
 18 |     /// Parse an expression from input string into a Raw AST
 19 |     pub fn parse_raw_expr(input: &str) -> Result<RawExpr<'_>, DetectError> {
 20 |         let mut pairs = Self::parse(Rule::program, input)
 21 |             .map_err(|e| DetectError::from_pest(Box::new(e), input.to_string()))?;
 22 | 
 23 |         let program_pair = pairs
 24 |             .next()
 25 |             .ok_or_else(|| DetectError::internal("Grammar guarantees program exists"))?;
 26 | 
 27 |         let expr_pair = program_pair
 28 |             .into_inner()
 29 |             .next()
 30 |             .ok_or_else(|| DetectError::internal("Grammar guarantees program contains expr"))?;
 31 | 
 32 |         Self::parse_expr(expr_pair).map_err(|e| e.with_source(input.to_string()))
 33 |     }
 34 | 
 35 |     /// Parse set contents from a string like "rs, js, ts" or "foo, \"bar, baz\", qux"
 36 |     /// Used by typechecker for 'in' operator
 37 |     ///
 38 |     /// Properly handles:
 39 |     /// - Quoted items with commas: `"foo, bar", baz`
 40 |     /// - Bare items: `rs, js, ts`
 41 |     /// - Mixed: `foo, "bar baz", qux`
 42 |     /// - Trailing commas: `rs, js,`
 43 |     /// - Empty sets: ``
 44 |     pub fn parse_set_contents(input: &str) -> Result<Vec<String>, DetectError> {
 45 |         let pairs = Self::parse(Rule::set_contents, input)
 46 |             .map_err(|e| DetectError::from_pest(Box::new(e), input.to_string()))?;
 47 | 
 48 |         let items: Vec<String> = pairs
 49 |             .flat_map(pest::iterators::Pair::into_inner) // set_contents -> set_items or EOI
 50 |             .filter(|pair| pair.as_rule() == Rule::set_items)
 51 |             .flat_map(pest::iterators::Pair::into_inner) // set_items -> set_item*
 52 |             .filter_map(|item_pair| {
 53 |                 // set_item -> quoted_string | bare_set_item
 54 |                 item_pair.into_inner().next()
 55 |             })
 56 |             .map(|inner| {
 57 |                 match inner.as_rule() {
 58 |                     Rule::quoted_string => {
 59 |                         // quoted_string -> inner_double | inner_single (quotes stripped)
 60 |                         // Preserve all whitespace inside quotes
 61 |                         inner
 62 |                             .into_inner()
 63 |                             .next()
 64 |                             .map(|s| s.as_str().to_string())
 65 |                             .unwrap_or_default()
 66 |                     }
 67 |                     Rule::bare_set_item => {
 68 |                         // Trim whitespace from bare items
 69 |                         inner.as_str().trim().to_string()
 70 |                     }
 71 |                     _ => String::new(), // Should never happen
 72 |                 }
 73 |             })
 74 |             .filter(|s| !s.is_empty())
 75 |             .collect();
 76 | 
 77 |         Ok(items)
 78 |     }
 79 | 
 80 |     fn parse_expr(pair: Pair<'_, Rule>) -> Result<RawExpr<'_>, DetectError> {
 81 |         let pratt = PrattParser::new()
 82 |             .op(Op::infix(Rule::or, Left))
 83 |             .op(Op::infix(Rule::and, Left))
 84 |             .op(Op::prefix(Rule::neg));
 85 | 
 86 |         pratt
 87 |             .map_primary(Self::parse_primary)
 88 |             .map_infix(Self::parse_infix)
 89 |             .map_prefix(Self::parse_prefix)
 90 |             .parse(pair.into_inner())
 91 |     }
 92 | 
 93 |     fn parse_primary(pair: Pair<'_, Rule>) -> Result<RawExpr<'_>, DetectError> {
 94 |         match pair.as_rule() {
 95 |             Rule::predicate => Self::parse_predicate(pair),
 96 |             Rule::single_word => Ok(RawExpr::SingleWord(pair.as_span())),
 97 |             Rule::expr => Self::parse_expr(pair),
 98 |             rule => Err(DetectError::internal(format!(
 99 |                 "Unexpected primary rule: {rule:?}"
100 |             ))),
101 |         }
102 |     }
103 | 
104 |     fn parse_infix<'a>(
105 |         lhs: Result<RawExpr<'a>, DetectError>,
106 |         _pair: Pair<'a, Rule>,
107 |         rhs: Result<RawExpr<'a>, DetectError>,
108 |     ) -> Result<RawExpr<'a>, DetectError> {
109 |         match _pair.as_rule() {
110 |             Rule::and => Ok(RawExpr::And(Box::new(lhs?), Box::new(rhs?))),
111 |             Rule::or => Ok(RawExpr::Or(Box::new(lhs?), Box::new(rhs?))),
112 |             rule => Err(DetectError::internal(format!(
113 |                 "Unexpected infix rule: {rule:?}"
114 |             ))),
115 |         }
116 |     }
117 | 
118 |     fn parse_prefix<'a>(
119 |         _pair: Pair<'a, Rule>,
120 |         rhs: Result<RawExpr<'a>, DetectError>,
121 |     ) -> Result<RawExpr<'a>, DetectError> {
122 |         match _pair.as_rule() {
123 |             Rule::neg => Ok(RawExpr::Not(Box::new(rhs?))),
124 |             rule => Err(DetectError::internal(format!(
125 |                 "Unexpected prefix rule: {rule:?}"
126 |             ))),
127 |         }
128 |     }
129 | 
130 |     fn parse_predicate(pair: Pair<'_, Rule>) -> Result<RawExpr<'_>, DetectError> {
131 |         let span = pair.as_span();
132 |         let mut inner = pair.into_inner();
133 | 
134 |         let selector_pair = inner
135 |             .next()
136 |             .ok_or_else(|| DetectError::internal("Grammar guarantees predicate has selector"))?;
137 |         let selector = selector_pair.as_str();
138 |         let selector_span = selector_pair.as_span();
139 | 
140 |         let operator_pair = inner
141 |             .next()
142 |             .ok_or_else(|| DetectError::internal("Grammar guarantees predicate has operator"))?;
143 |         let operator = operator_pair.as_str();
144 |         let operator_span = operator_pair.as_span();
145 | 
146 |         let value_pair = inner
147 |             .next()
148 |             .ok_or_else(|| DetectError::internal("Grammar guarantees predicate has value"))?;
149 |         let value_span = value_pair.as_span();
150 |         let value = Self::parse_value(value_pair)?;
151 | 
152 |         Ok(RawExpr::Predicate(RawPredicate {
153 |             selector,
154 |             operator,
155 |             value,
156 |             span,
157 |             selector_span,
158 |             operator_span,
159 |             value_span,
160 |         }))
161 |     }
162 | 
163 |     fn parse_value(pair: Pair<'_, Rule>) -> Result<RawValue<'_>, DetectError> {
164 |         match pair.as_rule() {
165 |             Rule::value => {
166 |                 // value = { value_content ~ trailing_quote? }
167 |                 // Check if there's a trailing quote error
168 |                 let mut inner = pair.into_inner();
169 |                 let value_content = inner
170 |                     .next()
171 |                     .ok_or_else(|| DetectError::internal("Grammar guarantees value has content"))?;
172 | 
173 |                 // Check for trailing quote
174 |                 if let Some(trailing) = inner.next() {
175 |                     if trailing.as_rule() == Rule::trailing_quote {
176 |                         let span = trailing.as_span();
177 |                         let quote = span.as_str().chars().next().unwrap_or('"');
178 |                         return Err(DetectError::StrayQuote {
179 |                             span: span.to_source_span(),
180 |                             quote,
181 |                             src: String::new(), // Will be filled by with_source()
182 |                         });
183 |                     }
184 |                 }
185 | 
186 |                 // No trailing quote, parse the value content
187 |                 Self::parse_value(value_content)
188 |             }
189 |             Rule::quoted_string => {
190 |                 // Grammar already parsed inner content without quotes
191 |                 let inner = pair.into_inner().next().ok_or_else(|| {
192 |                     DetectError::internal("Grammar guarantees quoted_string has inner content")
193 |                 })?;
194 |                 Ok(RawValue::Quoted(inner.as_str()))
195 |             }
196 |             Rule::unterminated_string => {
197 |                 // Matched an unterminated string literal - return error with proper span
198 |                 let span = pair.as_span();
199 |                 let text = span.as_str();
200 |                 let quote = text.chars().next().unwrap_or('"');
201 | 
202 |                 // Point span at just the opening quote and a few chars (not extending to EOI)
203 |                 let start = span.start();
204 |                 let length = text.len().min(10); // Show first 10 chars max
205 |                 let error_span = (start, length).into();
206 | 
207 |                 Err(DetectError::UnterminatedString {
208 |                     span: error_span,
209 |                     quote,
210 |                     src: String::new(), // Will be filled by with_source()
211 |                 })
212 |             }
213 |             Rule::raw_token => {
214 |                 // All raw tokens stored as-is, typechecker decides meaning based on operator
215 |                 Ok(RawValue::Raw(pair.as_str()))
216 |             }
217 |             rule => Err(DetectError::internal(format!(
218 |                 "Unexpected value rule: {rule:?}"
219 |             ))),
220 |         }
221 |     }
222 | }
223 | 


--------------------------------------------------------------------------------
/tests/parser_errors.rs:
--------------------------------------------------------------------------------
  1 | use detect::parser::test_utils::RawTestExpr;
  2 | use detect::parser::*;
  3 | 
  4 | // ==============================================================================
  5 | 
  6 | #[test]
  7 | fn test_unterminated_double_quote() {
  8 |     // Unterminated double quote at various positions
  9 |     let result = RawParser::parse_raw_expr(r#"contents ~= "a"#);
 10 |     assert!(result.is_err(), "Unterminated double quote should fail");
 11 | 
 12 |     let result = RawParser::parse_raw_expr(r#"name == "test"#);
 13 |     assert!(result.is_err(), "Unterminated double quote should fail");
 14 | 
 15 |     let result = RawParser::parse_raw_expr(r#"ext == "some long string"#);
 16 |     assert!(result.is_err(), "Unterminated double quote should fail");
 17 | }
 18 | 
 19 | #[test]
 20 | fn test_unterminated_single_quote() {
 21 |     // Unterminated single quote at various positions
 22 |     let result = RawParser::parse_raw_expr("contents ~= 'a");
 23 |     assert!(result.is_err(), "Unterminated single quote should fail");
 24 | 
 25 |     let result = RawParser::parse_raw_expr("name == 'test");
 26 |     assert!(result.is_err(), "Unterminated single quote should fail");
 27 | 
 28 |     let result = RawParser::parse_raw_expr("ext == 'foo bar baz");
 29 |     assert!(result.is_err(), "Unterminated single quote should fail");
 30 | }
 31 | 
 32 | #[test]
 33 | fn test_stray_double_quote_after_value() {
 34 |     // Stray double quote immediately after valid bare token
 35 |     let result = RawParser::parse_raw_expr(r#"contents ~= a""#);
 36 |     assert!(result.is_err(), "Stray double quote should fail");
 37 | 
 38 |     let result = RawParser::parse_raw_expr(r#"name == foo""#);
 39 |     assert!(result.is_err(), "Stray double quote should fail");
 40 | 
 41 |     let result = RawParser::parse_raw_expr(r#"ext == test.rs""#);
 42 |     assert!(result.is_err(), "Stray double quote should fail");
 43 | }
 44 | 
 45 | #[test]
 46 | fn test_stray_single_quote_after_value() {
 47 |     // Stray single quote immediately after valid bare token
 48 |     let result = RawParser::parse_raw_expr("contents ~= a'");
 49 |     assert!(result.is_err(), "Stray single quote should fail");
 50 | 
 51 |     let result = RawParser::parse_raw_expr("name == foo'");
 52 |     assert!(result.is_err(), "Stray single quote should fail");
 53 | 
 54 |     let result = RawParser::parse_raw_expr("ext == test.rs'");
 55 |     assert!(result.is_err(), "Stray single quote should fail");
 56 | }
 57 | 
 58 | #[test]
 59 | fn test_quote_errors_in_complex_expressions() {
 60 |     // Unterminated quote in boolean expressions
 61 |     let result = RawParser::parse_raw_expr(r#"ext == rs AND name == "unterminated"#);
 62 |     assert!(
 63 |         result.is_err(),
 64 |         "Unterminated quote in AND expression should fail"
 65 |     );
 66 | 
 67 |     let result = RawParser::parse_raw_expr(r#"ext == rs OR name == 'foo"#);
 68 |     assert!(
 69 |         result.is_err(),
 70 |         "Unterminated quote in OR expression should fail"
 71 |     );
 72 | 
 73 |     // Stray quote in boolean expressions
 74 |     let result = RawParser::parse_raw_expr(r#"ext == rs AND name == foo""#);
 75 |     assert!(result.is_err(), "Stray quote in AND expression should fail");
 76 | 
 77 |     let result = RawParser::parse_raw_expr(r#"ext == rs OR name == bar'"#);
 78 |     assert!(result.is_err(), "Stray quote in OR expression should fail");
 79 | }
 80 | 
 81 | #[test]
 82 | fn test_lone_quote_as_value() {
 83 |     // Single quote character alone should be unterminated
 84 |     let result = RawParser::parse_raw_expr(r#"contents ~= ""#);
 85 |     assert!(result.is_err(), "Lone double quote should fail");
 86 | 
 87 |     let result = RawParser::parse_raw_expr("contents ~= '");
 88 |     assert!(result.is_err(), "Lone single quote should fail");
 89 | }
 90 | 
 91 | #[test]
 92 | fn test_properly_quoted_strings_still_work() {
 93 |     // Verify that proper quotes continue to work after adding error detection
 94 |     let result = RawParser::parse_raw_expr(r#"name == "properly quoted""#);
 95 |     assert!(result.is_ok(), "Properly quoted double quotes should work");
 96 |     let expected = RawTestExpr::quoted_predicate("name", "==", "properly quoted");
 97 |     assert_eq!(result.unwrap().to_test_expr(), expected);
 98 | 
 99 |     let result = RawParser::parse_raw_expr("name == 'properly quoted'");
100 |     assert!(result.is_ok(), "Properly quoted single quotes should work");
101 |     let expected = RawTestExpr::quoted_predicate("name", "==", "properly quoted");
102 |     assert_eq!(result.unwrap().to_test_expr(), expected);
103 | 
104 |     // With spaces
105 |     let result = RawParser::parse_raw_expr(r#"content ~= "test string with spaces""#);
106 |     assert!(result.is_ok(), "Quoted string with spaces should work");
107 |     let expected = RawTestExpr::quoted_predicate("content", "~=", "test string with spaces");
108 |     assert_eq!(result.unwrap().to_test_expr(), expected);
109 | }
110 | 
111 | #[test]
112 | fn test_edge_case_empty_inputs() {
113 |     // Empty string
114 |     let result = RawParser::parse_raw_expr("");
115 |     assert!(result.is_err(), "Empty string should fail");
116 | 
117 |     // Just whitespace
118 |     let result = RawParser::parse_raw_expr("   ");
119 |     assert!(result.is_err(), "Whitespace only should fail");
120 | 
121 |     // Just operators
122 |     let result = RawParser::parse_raw_expr("==");
123 |     assert!(result.is_err(), "Operator only should fail");
124 | }
125 | 
126 | #[test]
127 | fn test_malformed_sets() {
128 |     // Set with no closing bracket
129 |     let result = RawParser::parse_raw_expr("name in [foo, bar");
130 |     assert!(result.is_err(), "Unclosed set should fail");
131 | 
132 |     // Set with no opening bracket - actually just parses as "foo" bare token
133 |     let result = RawParser::parse_raw_expr("name in foo, bar]");
134 |     assert!(result.is_err(), "Malformed syntax should fail");
135 | 
136 |     // Nested sets - simplified grammar now allows this to parse (will fail at typecheck)
137 |     let result = RawParser::parse_raw_expr("name in [foo, [bar]]");
138 |     assert!(
139 |         result.is_ok(),
140 |         "Simplified grammar allows nested brackets (typecheck will handle validity)"
141 |     );
142 | 
143 |     // Set with trailing comma - now allowed, typechecker filters empty items
144 |     let result = RawParser::parse_raw_expr("name in [foo, bar,]");
145 |     assert!(result.is_ok(), "Trailing comma is now allowed");
146 | 
147 |     // Set with only commas - parses as empty set after filtering
148 |     let result = RawParser::parse_raw_expr("name in [,,,]");
149 |     assert!(result.is_ok(), "Only commas parses as empty set");
150 | }
151 | 
152 | #[test]
153 | fn test_malformed_quotes() {
154 |     // Mismatched quotes
155 |     let result = RawParser::parse_raw_expr(r#"name == "foo'"#);
156 |     assert!(result.is_err(), "Mismatched quotes should fail");
157 | 
158 |     let result = RawParser::parse_raw_expr(r#"name == 'foo""#);
159 |     assert!(result.is_err(), "Mismatched quotes should fail");
160 | 
161 |     // Escaped quote at end without closing
162 |     let result = RawParser::parse_raw_expr(r#"name == "foo\""#);
163 |     assert!(result.is_err(), "Escaped quote at end should fail");
164 | 
165 |     // Multiple quotes
166 |     let result = RawParser::parse_raw_expr(r#"name == ""foo""#);
167 |     assert!(result.is_err(), "Double quotes should fail");
168 | 
169 |     // Quote in the middle of bare value
170 |     let result = RawParser::parse_raw_expr(r#"name == fo"o"#);
171 |     assert!(result.is_err(), "Quote in middle should fail");
172 | }
173 | 
174 | #[test]
175 | fn test_boolean_logic_edge_cases() {
176 |     // Incomplete boolean expressions
177 |     let result = RawParser::parse_raw_expr("name == foo AND");
178 |     assert!(result.is_err(), "Incomplete AND should fail");
179 | 
180 |     let result = RawParser::parse_raw_expr("OR name == foo");
181 |     assert!(result.is_err(), "Leading OR should fail");
182 | 
183 |     let result = RawParser::parse_raw_expr("NOT");
184 |     assert!(result.is_err(), "Standalone NOT should fail");
185 | 
186 |     // Multiple consecutive operators
187 |     let result = RawParser::parse_raw_expr("name == foo AND OR bar == baz");
188 |     assert!(result.is_err(), "AND OR should fail");
189 | 
190 |     let result = RawParser::parse_raw_expr("name == foo NOT AND bar == baz");
191 |     assert!(result.is_err(), "NOT AND should fail");
192 | 
193 |     // Multiple NOT
194 |     let result = RawParser::parse_raw_expr("NOT NOT name == foo");
195 |     let expected = RawTestExpr::not(RawTestExpr::not(RawTestExpr::string_predicate(
196 |         "name", "==", "foo",
197 |     )));
198 |     assert_eq!(result.unwrap().to_test_expr(), expected);
199 | 
200 |     // Mixed NOT usage - prefix operator with NOT as value
201 |     let result = RawParser::parse_raw_expr("NOT filename == NOT");
202 |     let expected = RawTestExpr::not(RawTestExpr::string_predicate("filename", "==", "NOT"));
203 |     assert_eq!(result.unwrap().to_test_expr(), expected);
204 | }
205 | 
206 | #[test]
207 | fn test_parentheses_edge_cases() {
208 |     // Unmatched parentheses
209 |     let result = RawParser::parse_raw_expr("((name == foo)");
210 |     assert!(result.is_err(), "Unmatched opening parens should fail");
211 | 
212 |     let result = RawParser::parse_raw_expr("(name == foo))");
213 |     assert!(result.is_err(), "Unmatched closing parens should fail");
214 | 
215 |     // Empty parentheses
216 |     let result = RawParser::parse_raw_expr("()");
217 |     assert!(result.is_err(), "Empty parentheses should fail");
218 | 
219 |     // Parentheses around operators
220 |     let result = RawParser::parse_raw_expr("name (==) foo");
221 |     assert!(result.is_err(), "Parentheses around operators should fail");
222 | 
223 |     // Basic nested parentheses (deep nesting tested in test_extreme_nesting_limits)
224 |     let result = RawParser::parse_raw_expr("((name == foo))");
225 |     let expected = RawTestExpr::string_predicate("name", "==", "foo");
226 |     assert_eq!(result.unwrap().to_test_expr(), expected);
227 | }
228 | 


--------------------------------------------------------------------------------
/src/eval/fs.rs:
--------------------------------------------------------------------------------
  1 | use crate::expr::short_circuit::ShortCircuit;
  2 | use crate::expr::Expr;
  3 | use crate::predicate::{
  4 |     MetadataPredicate, NamePredicate, Predicate, StreamingCompiledContentPredicateRef,
  5 | };
  6 | use crate::util::Done;
  7 | use futures::{stream, TryStreamExt};
  8 | use slog::{debug, o, Logger};
  9 | use std::path::Path;
 10 | use tokio::fs::File;
 11 | use tokio::io::BufStream;
 12 | use tokio_util::io::ReaderStream;
 13 | 
 14 | use crate::eval::run_contents_predicate_stream;
 15 | use crate::eval::structured::{eval_structured_predicate, ParsedDocuments};
 16 | 
 17 | /// multipass evaluation with short circuiting, runs, in order:
 18 | /// - file name matchers
 19 | /// - metadata matchers
 20 | /// - file content matchers
 21 | pub async fn eval<'dfa>(
 22 |     logger: &Logger,
 23 |     e: &'dfa Expr<
 24 |         Predicate<NamePredicate, MetadataPredicate, StreamingCompiledContentPredicateRef<'dfa>>,
 25 |     >,
 26 |     path: &Path,
 27 |     base_path: Option<&Path>,
 28 | ) -> std::io::Result<bool> {
 29 |     let logger = logger.new(o!("path" => format!("{:?}", path)));
 30 | 
 31 |     debug!(logger, "visit entity"; "expr" => %e);
 32 | 
 33 |     let e: Expr<Predicate<Done, MetadataPredicate, StreamingCompiledContentPredicateRef<'dfa>>> =
 34 |         e.reduce_predicate_and_short_circuit(|p| p.eval_name_predicate(path, base_path));
 35 | 
 36 |     if let Expr::Literal(b) = e {
 37 |         debug!(logger, "short circuit after path predicate eval"; "expr" => %e, "result" => %b);
 38 |         return Ok(b);
 39 |     }
 40 | 
 41 |     debug!(logger, "reduced expr after path predicate eval";  "expr" => %e);
 42 | 
 43 |     let file = File::open(path).await?;
 44 |     let metadata = file.metadata().await?;
 45 | 
 46 |     let e: Expr<Predicate<Done, Done, StreamingCompiledContentPredicateRef<'dfa>>> =
 47 |         e.reduce_predicate_and_short_circuit(|p| p.eval_metadata_predicate(&metadata));
 48 | 
 49 |     if let Expr::Literal(b) = e {
 50 |         debug!(logger, "short circuit after metadata predicate eval";  "expr" => %e, "result" => %b);
 51 |         return Ok(b);
 52 |     }
 53 | 
 54 |     debug!(logger, "reduced expr after metadata predicate eval";  "expr" => %e);
 55 | 
 56 |     // Determine which predicates remain for optimized file reading
 57 |     let has_structured = e.contains_structured_predicates();
 58 |     let has_content = e.contains_content_predicates();
 59 | 
 60 |     if !metadata.is_file() {
 61 |         debug!(
 62 |             logger,
 63 |             "not a file, all structured/content predicates eval to false"
 64 |         );
 65 |         let e: Expr<Predicate<Done, Done, Done, Done>> =
 66 |             e.reduce_predicate_and_short_circuit(|p| match p {
 67 |                 Predicate::Content(_) => ShortCircuit::Known(false),
 68 |                 Predicate::Structured(_) => ShortCircuit::Known(false),
 69 |                 _ => unreachable!(
 70 |                     "only Content and Structured predicates should remain after metadata phase"
 71 |                 ),
 72 |             });
 73 | 
 74 |         if let Expr::Literal(b) = e {
 75 |             debug!(logger, "evaluation finished"; "result" => b);
 76 |             return Ok(b);
 77 |         }
 78 |         unreachable!("all predicates should be reduced to literals after evaluation")
 79 |     }
 80 | 
 81 |     match (has_structured, has_content) {
 82 |         (true, true) => {
 83 |             debug!(
 84 |                 logger,
 85 |                 "evaluating both structured and content predicates - single file read"
 86 |             );
 87 |             let bytes = tokio::fs::read(path).await?;
 88 | 
 89 |             if let Ok(contents) = std::str::from_utf8(&bytes) {
 90 |                 // UTF-8: evaluate structured predicates first
 91 |                 let mut cache = ParsedDocuments::new();
 92 |                 let e = e.reduce_predicate_and_short_circuit(|p| match p {
 93 |                     Predicate::Structured(s) => {
 94 |                         match eval_structured_predicate(&s, contents, &mut cache) {
 95 |                             Ok(result) => ShortCircuit::Known(result),
 96 |                             Err(_) => ShortCircuit::Known(false),
 97 |                         }
 98 |                     }
 99 |                     Predicate::Content(c) => ShortCircuit::Unknown(Predicate::Content(c)),
100 |                     _ => unreachable!("only Structured and Content predicates should remain"),
101 |                 });
102 | 
103 |                 if let Expr::Literal(b) = e {
104 |                     debug!(logger, "short circuit after structured predicates"; "result" => b);
105 |                     return Ok(b);
106 |                 }
107 | 
108 |                 // Evaluate content predicates using in-memory stream (8KB chunks)
109 |                 const CHUNK_SIZE: usize = 8192;
110 |                 let chunks: Vec<Result<Vec<u8>, std::io::Error>> = bytes
111 |                     .chunks(CHUNK_SIZE)
112 |                     .map(|chunk| Ok(chunk.to_vec()))
113 |                     .collect();
114 | 
115 |                 let e = run_contents_predicate_stream(e, stream::iter(chunks)).await?;
116 | 
117 |                 if let Expr::Literal(b) = e {
118 |                     debug!(logger, "evaluation finished"; "result" => b);
119 |                     Ok(b)
120 |                 } else {
121 |                     unreachable!(
122 |                         "all content predicates should be reduced to literals after streaming"
123 |                     )
124 |                 }
125 |             } else {
126 |                 debug!(
127 |                     logger,
128 |                     "file is not UTF-8, structured predicates = false, using streaming content"
129 |                 );
130 |                 // Non-UTF-8: structured predicates fail, stream content
131 |                 let e = e.reduce_predicate_and_short_circuit(|p| match p {
132 |                     Predicate::Structured(_) => ShortCircuit::Known(false),
133 |                     Predicate::Content(c) => ShortCircuit::Unknown(Predicate::Content(c)),
134 |                     _ => unreachable!("only Structured and Content predicates should remain"),
135 |                 });
136 | 
137 |                 if let Expr::Literal(b) = e {
138 |                     debug!(logger, "short circuit after structured=false"; "result" => b);
139 |                     return Ok(b);
140 |                 }
141 | 
142 |                 const CHUNK_SIZE: usize = 8192;
143 |                 let chunks: Vec<Result<Vec<u8>, std::io::Error>> = bytes
144 |                     .chunks(CHUNK_SIZE)
145 |                     .map(|chunk| Ok(chunk.to_vec()))
146 |                     .collect();
147 | 
148 |                 let e = run_contents_predicate_stream(e, stream::iter(chunks)).await?;
149 | 
150 |                 if let Expr::Literal(b) = e {
151 |                     debug!(logger, "evaluation finished"; "result" => b);
152 |                     Ok(b)
153 |                 } else {
154 |                     unreachable!(
155 |                         "all content predicates should be reduced to literals after streaming"
156 |                     )
157 |                 }
158 |             }
159 |         }
160 |         (true, false) => {
161 |             debug!(logger, "evaluating structured predicates only");
162 |             let e = match tokio::fs::read_to_string(path).await {
163 |                 Ok(contents) => {
164 |                     let mut cache = ParsedDocuments::new();
165 |                     e.reduce_predicate_and_short_circuit(|p| match p {
166 |                         Predicate::Structured(s) => {
167 |                             match eval_structured_predicate(&s, &contents, &mut cache) {
168 |                                 Ok(result) => {
169 |                                     ShortCircuit::<Predicate<Done, Done, Done>>::Known(result)
170 |                                 }
171 |                                 Err(_) => ShortCircuit::<Predicate<Done, Done, Done>>::Known(false),
172 |                             }
173 |                         }
174 |                         _ => unreachable!(
175 |                             "only Structured predicates should remain when has_content is false"
176 |                         ),
177 |                     })
178 |                 }
179 |                 Err(_) => {
180 |                     // Non-UTF-8 or read error: all structured predicates = false
181 |                     e.reduce_predicate_and_short_circuit(|p| match p {
182 |                         Predicate::Structured(_) => {
183 |                             ShortCircuit::<Predicate<Done, Done, Done>>::Known(false)
184 |                         }
185 |                         _ => unreachable!(
186 |                             "only Structured predicates should remain when has_content is false"
187 |                         ),
188 |                     })
189 |                 }
190 |             };
191 | 
192 |             if let Expr::Literal(b) = e {
193 |                 debug!(logger, "evaluation finished"; "result" => b);
194 |                 Ok(b)
195 |             } else {
196 |                 unreachable!(
197 |                     "all structured predicates should be reduced to literals after evaluation"
198 |                 )
199 |             }
200 |         }
201 |         (false, true) => {
202 |             debug!(logger, "evaluating content predicates only - streaming");
203 |             let e = run_contents_predicate_stream(
204 |                 e,
205 |                 ReaderStream::new(BufStream::new(file)).map_ok(|b| b.to_vec()),
206 |             )
207 |             .await?;
208 | 
209 |             if let Expr::Literal(b) = e {
210 |                 debug!(logger, "evaluation finished"; "result" => b);
211 |                 Ok(b)
212 |             } else {
213 |                 unreachable!("all content predicates should be reduced to literals after streaming")
214 |             }
215 |         }
216 |         (false, false) => {
217 |             // No structured or content predicates remain (already short-circuited)
218 |             unreachable!(
219 |                 "both has_structured and has_content are false - should have short-circuited"
220 |             )
221 |         }
222 |     }
223 | }
224 | 


--------------------------------------------------------------------------------
/src/parser/structured_path.rs:
--------------------------------------------------------------------------------
  1 | //! Parser for structured data path expressions
  2 | //!
  3 | //! Handles paths like:
  4 | //! - `.spec.replicas` → [Key("spec"), Key("replicas")]
  5 | //! - `[0].name` → [Index(0), Key("name")]
  6 | //! - `.items[*].id` → [Key("items"), `WildcardIndex`, Key("id")]
  7 | 
  8 | use pest::{iterators::Pair, Parser};
  9 | use pest_derive::Parser;
 10 | use thiserror::Error;
 11 | 
 12 | #[derive(Parser)]
 13 | #[grammar = "parser/structured_path.pest"]
 14 | pub struct PathParser;
 15 | 
 16 | /// A single component in a path expression
 17 | #[derive(Debug, Clone, PartialEq, Eq)]
 18 | pub enum PathComponent {
 19 |     /// Object field access: .fieldname
 20 |     Key(String),
 21 |     /// Recursive descent: ..fieldname (matches key at any depth)
 22 |     RecursiveKey(String),
 23 |     /// Array index access: [42]
 24 |     Index(usize),
 25 |     /// Array wildcard access: [*]
 26 |     WildcardIndex,
 27 | }
 28 | 
 29 | /// Errors that can occur during path parsing
 30 | #[derive(Debug, Error, Clone, PartialEq, Eq)]
 31 | pub enum PathParseError {
 32 |     /// Syntax error from Pest parser
 33 |     #[error("Path syntax error: {0}")]
 34 |     Syntax(String),
 35 | 
 36 |     /// Invalid numeric index value
 37 |     #[error("Invalid array index '{value}': {reason}")]
 38 |     InvalidIndex { value: String, reason: String },
 39 | 
 40 |     /// Empty path (no components)
 41 |     #[error("Path cannot be empty")]
 42 |     EmptyPath,
 43 | }
 44 | 
 45 | /// Parse a path expression into a vector of components
 46 | ///
 47 | /// # Examples
 48 | /// ```
 49 | /// use detect::parser::structured_path::{parse_path, PathComponent};
 50 | ///
 51 | /// let components = parse_path(".spec.replicas").unwrap();
 52 | /// assert_eq!(components, vec![
 53 | ///     PathComponent::Key("spec".to_string()),
 54 | ///     PathComponent::Key("replicas".to_string()),
 55 | /// ]);
 56 | ///
 57 | /// let components = parse_path("[0].name").unwrap();
 58 | /// assert_eq!(components, vec![
 59 | ///     PathComponent::Index(0),
 60 | ///     PathComponent::Key("name".to_string()),
 61 | /// ]);
 62 | /// ```
 63 | pub fn parse_path(input: &str) -> Result<Vec<PathComponent>, PathParseError> {
 64 |     if input.is_empty() {
 65 |         return Err(PathParseError::EmptyPath);
 66 |     }
 67 | 
 68 |     let pairs = PathParser::parse(Rule::path, input)
 69 |         .map_err(|e| PathParseError::Syntax(format!("Failed to parse path '{input}': {e}")))?;
 70 | 
 71 |     let mut components = Vec::new();
 72 | 
 73 |     for pair in pairs {
 74 |         match pair.as_rule() {
 75 |             Rule::path => {
 76 |                 // Recurse into path components
 77 |                 for component_pair in pair.into_inner() {
 78 |                     if let Some(component) = parse_component(component_pair)? {
 79 |                         components.push(component);
 80 |                     }
 81 |                 }
 82 |             }
 83 |             Rule::EOI => {} // End of input, ignore
 84 |             _ => {
 85 |                 return Err(PathParseError::Syntax(format!(
 86 |                     "Unexpected rule: {:?}",
 87 |                     pair.as_rule()
 88 |                 )))
 89 |             }
 90 |         }
 91 |     }
 92 | 
 93 |     if components.is_empty() {
 94 |         return Err(PathParseError::EmptyPath);
 95 |     }
 96 | 
 97 |     Ok(components)
 98 | }
 99 | 
100 | fn parse_component(pair: Pair<'_, Rule>) -> Result<Option<PathComponent>, PathParseError> {
101 |     match pair.as_rule() {
102 |         Rule::recursive_key => {
103 |             // recursive_key -> identifier
104 |             let identifier = pair
105 |                 .into_inner()
106 |                 .next()
107 |                 .ok_or_else(|| PathParseError::Syntax("Missing identifier".to_string()))?;
108 |             Ok(Some(PathComponent::RecursiveKey(
109 |                 identifier.as_str().to_string(),
110 |             )))
111 |         }
112 |         Rule::key_access => {
113 |             // key_access -> identifier
114 |             let identifier = pair
115 |                 .into_inner()
116 |                 .next()
117 |                 .ok_or_else(|| PathParseError::Syntax("Missing identifier".to_string()))?;
118 |             Ok(Some(PathComponent::Key(identifier.as_str().to_string())))
119 |         }
120 |         Rule::index_access => {
121 |             // index_access -> number
122 |             let number_pair = pair
123 |                 .into_inner()
124 |                 .next()
125 |                 .ok_or_else(|| PathParseError::Syntax("Missing number".to_string()))?;
126 |             let number_str = number_pair.as_str();
127 | 
128 |             let index = number_str
129 |                 .parse::<usize>()
130 |                 .map_err(|e| PathParseError::InvalidIndex {
131 |                     value: number_str.to_string(),
132 |                     reason: e.to_string(),
133 |                 })?;
134 | 
135 |             Ok(Some(PathComponent::Index(index)))
136 |         }
137 |         Rule::wildcard_access => Ok(Some(PathComponent::WildcardIndex)),
138 |         _ => Ok(None), // Skip unknown rules
139 |     }
140 | }
141 | 
142 | #[cfg(test)]
143 | mod tests {
144 |     use super::*;
145 | 
146 |     #[test]
147 |     fn test_simple_key() {
148 |         let result = parse_path(".name").unwrap();
149 |         assert_eq!(result, vec![PathComponent::Key("name".to_string())]);
150 |     }
151 | 
152 |     #[test]
153 |     fn test_nested_keys() {
154 |         let result = parse_path(".spec.replicas").unwrap();
155 |         assert_eq!(
156 |             result,
157 |             vec![
158 |                 PathComponent::Key("spec".to_string()),
159 |                 PathComponent::Key("replicas".to_string()),
160 |             ]
161 |         );
162 |     }
163 | 
164 |     #[test]
165 |     fn test_deep_nesting() {
166 |         let result = parse_path(".a.b.c.d").unwrap();
167 |         assert_eq!(
168 |             result,
169 |             vec![
170 |                 PathComponent::Key("a".to_string()),
171 |                 PathComponent::Key("b".to_string()),
172 |                 PathComponent::Key("c".to_string()),
173 |                 PathComponent::Key("d".to_string()),
174 |             ]
175 |         );
176 |     }
177 | 
178 |     #[test]
179 |     fn test_single_index() {
180 |         let result = parse_path("[0]").unwrap();
181 |         assert_eq!(result, vec![PathComponent::Index(0)]);
182 |     }
183 | 
184 |     #[test]
185 |     fn test_index_then_key() {
186 |         let result = parse_path("[0].name").unwrap();
187 |         assert_eq!(
188 |             result,
189 |             vec![
190 |                 PathComponent::Index(0),
191 |                 PathComponent::Key("name".to_string()),
192 |             ]
193 |         );
194 |     }
195 | 
196 |     #[test]
197 |     fn test_key_then_index() {
198 |         let result = parse_path(".items[0]").unwrap();
199 |         assert_eq!(
200 |             result,
201 |             vec![
202 |                 PathComponent::Key("items".to_string()),
203 |                 PathComponent::Index(0),
204 |             ]
205 |         );
206 |     }
207 | 
208 |     #[test]
209 |     fn test_wildcard() {
210 |         let result = parse_path("[*]").unwrap();
211 |         assert_eq!(result, vec![PathComponent::WildcardIndex]);
212 |     }
213 | 
214 |     #[test]
215 |     fn test_wildcard_with_keys() {
216 |         let result = parse_path(".items[*].id").unwrap();
217 |         assert_eq!(
218 |             result,
219 |             vec![
220 |                 PathComponent::Key("items".to_string()),
221 |                 PathComponent::WildcardIndex,
222 |                 PathComponent::Key("id".to_string()),
223 |             ]
224 |         );
225 |     }
226 | 
227 |     #[test]
228 |     fn test_multiple_indices() {
229 |         let result = parse_path("[0][1][2]").unwrap();
230 |         assert_eq!(
231 |             result,
232 |             vec![
233 |                 PathComponent::Index(0),
234 |                 PathComponent::Index(1),
235 |                 PathComponent::Index(2),
236 |             ]
237 |         );
238 |     }
239 | 
240 |     #[test]
241 |     fn test_complex_path() {
242 |         let result = parse_path(".spec.containers[0].image").unwrap();
243 |         assert_eq!(
244 |             result,
245 |             vec![
246 |                 PathComponent::Key("spec".to_string()),
247 |                 PathComponent::Key("containers".to_string()),
248 |                 PathComponent::Index(0),
249 |                 PathComponent::Key("image".to_string()),
250 |             ]
251 |         );
252 |     }
253 | 
254 |     #[test]
255 |     fn test_underscore_in_key() {
256 |         let result = parse_path(".my_field").unwrap();
257 |         assert_eq!(result, vec![PathComponent::Key("my_field".to_string())]);
258 |     }
259 | 
260 |     #[test]
261 |     fn test_mixed_case_key() {
262 |         let result = parse_path(".camelCase").unwrap();
263 |         assert_eq!(result, vec![PathComponent::Key("camelCase".to_string())]);
264 |     }
265 | 
266 |     #[test]
267 |     fn test_large_index() {
268 |         let result = parse_path("[999]").unwrap();
269 |         assert_eq!(result, vec![PathComponent::Index(999)]);
270 |     }
271 | 
272 |     #[test]
273 |     fn test_error_empty_path() {
274 |         let result = parse_path("");
275 |         assert!(matches!(result, Err(PathParseError::EmptyPath)));
276 |     }
277 | 
278 |     #[test]
279 |     fn test_error_no_dot_before_key() {
280 |         let result = parse_path("name");
281 |         assert!(matches!(result, Err(PathParseError::Syntax(_))));
282 |     }
283 | 
284 |     #[test]
285 |     fn test_error_missing_bracket() {
286 |         let result = parse_path("[0");
287 |         assert!(matches!(result, Err(PathParseError::Syntax(_))));
288 |     }
289 | 
290 |     #[test]
291 |     fn test_error_missing_closing_bracket() {
292 |         let result = parse_path(".items[0");
293 |         assert!(matches!(result, Err(PathParseError::Syntax(_))));
294 |     }
295 | 
296 |     #[test]
297 |     fn test_error_empty_brackets() {
298 |         let result = parse_path("[]");
299 |         assert!(matches!(result, Err(PathParseError::Syntax(_))));
300 |     }
301 | 
302 |     #[test]
303 |     fn test_hyphen_in_key() {
304 |         let result = parse_path(".field-name").unwrap();
305 |         assert_eq!(result, vec![PathComponent::Key("field-name".to_string())]);
306 |     }
307 | 
308 |     #[test]
309 |     fn test_error_triple_dot() {
310 |         // Triple dots are invalid (recursive descent is only double dots)
311 |         let result = parse_path("...field");
312 |         assert!(matches!(result, Err(PathParseError::Syntax(_))));
313 |     }
314 | 
315 |     #[test]
316 |     fn test_error_space_in_key() {
317 |         let result = parse_path(".my field");
318 |         assert!(matches!(result, Err(PathParseError::Syntax(_))));
319 |     }
320 | }
321 | 


--------------------------------------------------------------------------------
/tests/parser_basic.rs:
--------------------------------------------------------------------------------
  1 | use detect::parser::test_utils::RawTestExpr;
  2 | use detect::parser::*;
  3 | 
  4 | // ==============================================================================
  5 | // Basic Syntax Tests - Predicates, Values, Quotes
  6 | // ==============================================================================
  7 | 
  8 | #[test]
  9 | fn test_simple_predicate() {
 10 |     let result = RawParser::parse_raw_expr("name == foo").unwrap();
 11 |     let expected = RawTestExpr::string_predicate("name", "==", "foo");
 12 |     assert_eq!(result.to_test_expr(), expected);
 13 | }
 14 | 
 15 | #[test]
 16 | fn test_quoted_values() {
 17 |     let result = RawParser::parse_raw_expr(r#"filename == "my file.txt""#).unwrap();
 18 |     let expected = RawTestExpr::quoted_predicate("filename", "==", "my file.txt");
 19 |     assert_eq!(result.to_test_expr(), expected);
 20 | }
 21 | 
 22 | #[test]
 23 | fn test_single_quoted_values() {
 24 |     let result = RawParser::parse_raw_expr("filename == 'my file.txt'").unwrap();
 25 |     let expected = RawTestExpr::quoted_predicate("filename", "==", "my file.txt");
 26 |     assert_eq!(result.to_test_expr(), expected);
 27 | }
 28 | 
 29 | #[test]
 30 | fn test_escape_sequences() {
 31 |     // Test double quote escapes
 32 |     let result = RawParser::parse_raw_expr(r#"name == "file\"with\"quotes""#).unwrap();
 33 |     let expected = RawTestExpr::quoted_predicate("name", "==", r#"file\"with\"quotes"#);
 34 |     assert_eq!(result.to_test_expr(), expected);
 35 | 
 36 |     // Test various escape sequences
 37 |     let result = RawParser::parse_raw_expr(r#"content == "line1\nline2\ttab\\backslash""#).unwrap();
 38 |     let expected =
 39 |         RawTestExpr::quoted_predicate("content", "==", r#"line1\nline2\ttab\\backslash"#);
 40 |     assert_eq!(result.to_test_expr(), expected);
 41 | 
 42 |     // Test single quote escapes
 43 |     let result = RawParser::parse_raw_expr(r"name == 'file\'with\'quotes'").unwrap();
 44 |     let expected = RawTestExpr::quoted_predicate("name", "==", r"file\'with\'quotes");
 45 |     assert_eq!(result.to_test_expr(), expected);
 46 | }
 47 | 
 48 | #[test]
 49 | fn test_set_values() {
 50 |     let result = RawParser::parse_raw_expr("ext in [rs, js, ts]").unwrap();
 51 |     // With new parser, sets are raw tokens - spaces preserved
 52 |     let expected = RawTestExpr::string_predicate("ext", "in", "[rs, js, ts]");
 53 |     assert_eq!(result.to_test_expr(), expected);
 54 | }
 55 | 
 56 | #[test]
 57 | fn test_mixed_set() {
 58 |     let result = RawParser::parse_raw_expr(r#"name in [README, "my file", config]"#).unwrap();
 59 |     let expected = RawTestExpr::string_predicate("name", "in", r#"[README, "my file", config]"#);
 60 |     assert_eq!(result.to_test_expr(), expected);
 61 | }
 62 | 
 63 | #[test]
 64 | fn test_set_with_quotes_and_escapes() {
 65 |     let result = RawParser::parse_raw_expr(r#"name in ["file\"1", 'file\'2', plain]"#).unwrap();
 66 |     let expected = RawTestExpr::string_predicate("name", "in", r#"["file\"1", 'file\'2', plain]"#);
 67 |     assert_eq!(result.to_test_expr(), expected);
 68 | }
 69 | 
 70 | #[test]
 71 | fn test_empty_set() {
 72 |     let result = RawParser::parse_raw_expr("ext in []").unwrap();
 73 |     let expected = RawTestExpr::string_predicate("ext", "in", "[]");
 74 |     assert_eq!(result.to_test_expr(), expected);
 75 | }
 76 | 
 77 | // ==============================================================================
 78 | // Boolean Logic Tests - AND, OR, NOT, Precedence
 79 | // ==============================================================================
 80 | 
 81 | #[test]
 82 | fn test_boolean_logic() {
 83 |     let result = RawParser::parse_raw_expr("name == foo AND size > 1000").unwrap();
 84 |     let expected = RawTestExpr::and(
 85 |         RawTestExpr::string_predicate("name", "==", "foo"),
 86 |         RawTestExpr::string_predicate("size", ">", "1000"),
 87 |     );
 88 |     assert_eq!(result.to_test_expr(), expected);
 89 | }
 90 | 
 91 | #[test]
 92 | fn test_or_logic() {
 93 |     let result = RawParser::parse_raw_expr("name == foo OR name == bar").unwrap();
 94 |     let expected = RawTestExpr::or(
 95 |         RawTestExpr::string_predicate("name", "==", "foo"),
 96 |         RawTestExpr::string_predicate("name", "==", "bar"),
 97 |     );
 98 |     assert_eq!(result.to_test_expr(), expected);
 99 | }
100 | 
101 | #[test]
102 | fn test_negation_variants() {
103 |     // Test NOT keyword
104 |     let result = RawParser::parse_raw_expr("NOT name == foo").unwrap();
105 |     let expected = RawTestExpr::not(RawTestExpr::string_predicate("name", "==", "foo"));
106 |     assert_eq!(result.to_test_expr(), expected);
107 | 
108 |     // Test ! symbol
109 |     let result = RawParser::parse_raw_expr("! name == foo").unwrap();
110 |     let expected = RawTestExpr::not(RawTestExpr::string_predicate("name", "==", "foo"));
111 |     assert_eq!(result.to_test_expr(), expected);
112 | 
113 |     // Test escaped ! symbol
114 |     let result = RawParser::parse_raw_expr("\\! name == foo").unwrap();
115 |     let expected = RawTestExpr::not(RawTestExpr::string_predicate("name", "==", "foo"));
116 |     assert_eq!(result.to_test_expr(), expected);
117 | }
118 | 
119 | #[test]
120 | fn test_operator_precedence() {
121 |     // AND should bind tighter than OR
122 |     let result = RawParser::parse_raw_expr("a == b OR c == d AND e == f").unwrap();
123 |     let expected = RawTestExpr::or(
124 |         RawTestExpr::string_predicate("a", "==", "b"),
125 |         RawTestExpr::and(
126 |             RawTestExpr::string_predicate("c", "==", "d"),
127 |             RawTestExpr::string_predicate("e", "==", "f"),
128 |         ),
129 |     );
130 |     assert_eq!(result.to_test_expr(), expected);
131 | }
132 | 
133 | #[test]
134 | fn test_parentheses() {
135 |     let result = RawParser::parse_raw_expr("(a == b OR c == d) AND e == f").unwrap();
136 |     let expected = RawTestExpr::and(
137 |         RawTestExpr::or(
138 |             RawTestExpr::string_predicate("a", "==", "b"),
139 |             RawTestExpr::string_predicate("c", "==", "d"),
140 |         ),
141 |         RawTestExpr::string_predicate("e", "==", "f"),
142 |     );
143 |     assert_eq!(result.to_test_expr(), expected);
144 | }
145 | 
146 | #[test]
147 | fn test_complex_expression() {
148 |     let result =
149 |         RawParser::parse_raw_expr(r#"(name == "test.rs" OR ext in [js, ts]) AND NOT size > 1mb"#)
150 |             .unwrap();
151 | 
152 |     let expected = RawTestExpr::and(
153 |         RawTestExpr::or(
154 |             RawTestExpr::quoted_predicate("name", "==", "test.rs"),
155 |             RawTestExpr::string_predicate("ext", "in", "[js, ts]"),
156 |         ),
157 |         RawTestExpr::not(RawTestExpr::string_predicate("size", ">", "1mb")),
158 |     );
159 |     assert_eq!(result.to_test_expr(), expected);
160 | }
161 | 
162 | #[test]
163 | fn test_all_operators() {
164 |     let test_cases = vec![
165 |         ("name == foo", "=="),
166 |         ("name != foo", "!="),
167 |         ("name ~= foo", "~="),
168 |         ("name > foo", ">"),
169 |         ("name < foo", "<"),
170 |         ("name >= foo", ">="),
171 |         ("name <= foo", "<="),
172 |         ("name contains foo", "contains"),
173 |         ("name in [foo]", "in"),
174 |     ];
175 | 
176 |     for (input, expected_op) in test_cases {
177 |         let result = RawParser::parse_raw_expr(input).unwrap();
178 |         match result.to_test_expr() {
179 |             RawTestExpr::Predicate(pred) => {
180 |                 assert_eq!(pred.operator, expected_op, "Failed for input: {}", input);
181 |             }
182 |             _ => panic!("Expected predicate for input: {}", input),
183 |         }
184 |     }
185 | }
186 | 
187 | #[test]
188 | fn test_complex_selectors() {
189 |     let result = RawParser::parse_raw_expr("name == test.rs").unwrap();
190 |     let expected = RawTestExpr::string_predicate("name", "==", "test.rs");
191 |     assert_eq!(result.to_test_expr(), expected);
192 | 
193 |     let result = RawParser::parse_raw_expr("meta.size > 1000").unwrap();
194 |     let expected = RawTestExpr::string_predicate("meta.size", ">", "1000");
195 |     assert_eq!(result.to_test_expr(), expected);
196 | }
197 | 
198 | #[test]
199 | fn test_case_insensitive_keywords() {
200 |     let result = RawParser::parse_raw_expr("name == foo AND name == bar").unwrap();
201 |     let result_upper = RawParser::parse_raw_expr("name == foo AND name == bar").unwrap();
202 |     assert_eq!(result.to_test_expr(), result_upper.to_test_expr());
203 | 
204 |     let result = RawParser::parse_raw_expr("NOT name == foo").unwrap();
205 |     let result_lower = RawParser::parse_raw_expr("not name == foo").unwrap();
206 |     assert_eq!(result.to_test_expr(), result_lower.to_test_expr());
207 | }
208 | 
209 | #[test]
210 | fn test_syntax_errors() {
211 |     // Missing value
212 |     let result = RawParser::parse_raw_expr("name ==");
213 |     assert!(result.is_err());
214 | 
215 |     // Missing operator
216 |     let result = RawParser::parse_raw_expr("name foo");
217 |     assert!(result.is_err());
218 | 
219 |     // Unclosed parentheses
220 |     let result = RawParser::parse_raw_expr("(name == foo");
221 |     assert!(result.is_err());
222 | 
223 |     // Unclosed bracket - now parses as bare token (grammar is permissive)
224 |     // This is valid: searches for files named "[foo"
225 |     let result = RawParser::parse_raw_expr("name in [foo");
226 |     assert!(
227 |         result.is_ok(),
228 |         "Permissive grammar allows [foo as bare token"
229 |     );
230 | 
231 |     // Unclosed quote
232 |     let result = RawParser::parse_raw_expr(r#"name == "unclosed"#);
233 |     assert!(result.is_err());
234 | }
235 | 
236 | #[test]
237 | fn test_invalid_escape_sequences() {
238 |     // Since we're a syntax-only parser, we preserve escape sequences without validating them
239 |     // This previously "invalid" escape sequence is now just preserved as-is
240 |     let result = RawParser::parse_raw_expr(r#"name == "invalid\x""#);
241 |     let expected = RawTestExpr::quoted_predicate("name", "==", r"invalid\x");
242 |     assert_eq!(result.unwrap().to_test_expr(), expected);
243 | 
244 |     // Unterminated string (this is actually a syntax error, not escape error)
245 |     let result = RawParser::parse_raw_expr("name == \"unterminated");
246 |     assert!(result.is_err());
247 | }
248 | 
249 | // ==============================================================================
250 | #[test]
251 | fn test_whitespace_handling() {
252 |     // Basic predicate whitespace tolerance
253 |     let result1 = RawParser::parse_raw_expr("name==foo").unwrap();
254 |     let result2 = RawParser::parse_raw_expr("name == foo").unwrap();
255 |     let result3 = RawParser::parse_raw_expr("  name   ==   foo  ").unwrap();
256 | 
257 |     assert_eq!(result1.to_test_expr(), result2.to_test_expr());
258 |     assert_eq!(result2.to_test_expr(), result3.to_test_expr());
259 | 
260 |     // Whitespace in sets (preserved as raw token)
261 |     let result = RawParser::parse_raw_expr("ext in [ rs , js , ts ]").unwrap();
262 |     let expected = RawTestExpr::string_predicate("ext", "in", "[ rs , js , ts ]");
263 |     assert_eq!(result.to_test_expr(), expected);
264 | }
265 | 
266 | #[test]
267 | fn test_edge_cases() {
268 |     // Empty string value
269 |     let result = RawParser::parse_raw_expr(r#"name == """#).unwrap();
270 |     let expected = RawTestExpr::quoted_predicate("name", "==", "");
271 |     assert_eq!(result.to_test_expr(), expected);
272 | 
273 |     // Value with special characters
274 |     let result = RawParser::parse_raw_expr("name == foo-bar_baz.txt").unwrap();
275 |     let expected = RawTestExpr::string_predicate("name", "==", "foo-bar_baz.txt");
276 |     assert_eq!(result.to_test_expr(), expected);
277 | 
278 |     // Selector with dots and underscores
279 |     let result = RawParser::parse_raw_expr("path.name_with_underscores == foo").unwrap();
280 |     let expected = RawTestExpr::string_predicate("path.name_with_underscores", "==", "foo");
281 |     assert_eq!(result.to_test_expr(), expected);
282 | }
283 | 
284 | // Bug: Reserved word substrings in bare values
285 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/src/parser/error.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unused_assignments)] // Fields are used by miette's derive macros
  2 | 
  3 | use miette::{Diagnostic, SourceSpan};
  4 | use thiserror::Error;
  5 | 
  6 | use super::raw::Rule;
  7 | 
  8 | /// Main error type for detect expressions, using miette for diagnostics
  9 | #[derive(Debug, Clone, Diagnostic, Error)]
 10 | pub enum DetectError {
 11 |     // Syntax errors from pest
 12 |     #[error("Syntax error at line {line}, column {col}")]
 13 |     #[diagnostic(code(detect::syntax))]
 14 |     Syntax {
 15 |         #[source_code]
 16 |         src: String,
 17 |         #[label("{expected_msg}")]
 18 |         span: SourceSpan,
 19 |         #[help]
 20 |         help: Option<String>,
 21 |         expected_msg: String,
 22 |         line: usize,
 23 |         col: usize,
 24 |     },
 25 | 
 26 |     // Typechecker errors with spans
 27 |     #[error("Unknown selector: {selector}")]
 28 |     #[diagnostic(code(detect::unknown_selector), help("Valid selectors: name, basename, ext, path, dir, size, type, depth, modified, created, accessed, content"))]
 29 |     UnknownSelector {
 30 |         selector: String,
 31 |         #[label("unknown selector")]
 32 |         span: SourceSpan,
 33 |         #[source_code]
 34 |         src: String,
 35 |     },
 36 | 
 37 |     #[error("Invalid {format} selector path: {path}")]
 38 |     #[diagnostic(
 39 |         code(detect::invalid_structured_path),
 40 |         help("Structured selectors use format: {format}:.path.to.field")
 41 |     )]
 42 |     InvalidStructuredPath {
 43 |         format: String,
 44 |         path: String,
 45 |         #[label("invalid path: {reason}")]
 46 |         span: SourceSpan,
 47 |         reason: String,
 48 |         #[source_code]
 49 |         src: String,
 50 |     },
 51 | 
 52 |     #[error("Unknown structured data format: '{format}'")]
 53 |     #[diagnostic(code(detect::unknown_structured_format))]
 54 |     UnknownStructuredFormat {
 55 |         format: String,
 56 |         #[label("unknown format")]
 57 |         span: SourceSpan,
 58 |         #[source_code]
 59 |         src: String,
 60 |         #[help]
 61 |         suggestions: Option<String>,
 62 |     },
 63 | 
 64 |     #[error("Unknown operator: {operator}")]
 65 |     #[diagnostic(
 66 |         code(detect::unknown_operator),
 67 |         help("Valid operators include: ==, !=, >, <, contains, matches, etc.")
 68 |     )]
 69 |     UnknownOperator {
 70 |         operator: String,
 71 |         #[label("unknown operator")]
 72 |         span: SourceSpan,
 73 |         #[source_code]
 74 |         src: String,
 75 |     },
 76 | 
 77 |     #[error("Unknown alias: '{word}'")]
 78 |     #[diagnostic(code(detect::unknown_alias))]
 79 |     UnknownAlias {
 80 |         word: String,
 81 |         #[label("unknown alias")]
 82 |         span: SourceSpan,
 83 |         #[source_code]
 84 |         src: String,
 85 |         #[help]
 86 |         suggestions: Option<String>,
 87 |     },
 88 | 
 89 |     #[error("Operator '{operator}' is not compatible with selector '{selector}'")]
 90 |     #[diagnostic(
 91 |         code(detect::incompatible_operator),
 92 |         help("This selector requires a different type of operator")
 93 |     )]
 94 |     IncompatibleOperator {
 95 |         selector: String,
 96 |         operator: String,
 97 |         #[label("incompatible operator")]
 98 |         operator_span: SourceSpan,
 99 |         #[label("for this selector")]
100 |         selector_span: SourceSpan,
101 |         #[source_code]
102 |         src: String,
103 |     },
104 | 
105 |     #[error("Expected {expected} value, found: {found}")]
106 |     #[diagnostic(
107 |         code(detect::invalid_value),
108 |         help("Check the value type for this selector")
109 |     )]
110 |     InvalidValue {
111 |         expected: String,
112 |         found: String,
113 |         #[label("invalid value")]
114 |         span: SourceSpan,
115 |         #[source_code]
116 |         src: String,
117 |     },
118 | 
119 |     // Escape errors
120 |     #[error("Invalid escape sequence '\\{char}'")]
121 |     #[diagnostic(
122 |         code(detect::invalid_escape),
123 |         help("Valid escape sequences: \\n, \\t, \\\\, \\\", \\'")
124 |     )]
125 |     InvalidEscape {
126 |         char: char,
127 |         #[label("invalid escape")]
128 |         span: SourceSpan,
129 |         #[source_code]
130 |         src: String,
131 |     },
132 | 
133 |     #[error("Unterminated escape sequence")]
134 |     #[diagnostic(code(detect::unterminated_escape))]
135 |     UnterminatedEscape {
136 |         #[label("escape sequence not completed")]
137 |         span: SourceSpan,
138 |         #[source_code]
139 |         src: String,
140 |     },
141 | 
142 |     // Quote errors
143 |     #[error("Unterminated string literal")]
144 |     #[diagnostic(code(detect::unterminated_string))]
145 |     UnterminatedString {
146 |         #[label("missing closing {quote} quote")]
147 |         span: SourceSpan,
148 |         quote: char,
149 |         #[source_code]
150 |         src: String,
151 |     },
152 | 
153 |     #[error("Stray {quote} quote")]
154 |     #[diagnostic(
155 |         code(detect::stray_quote),
156 |         help("Remove the quote or add matching opening quote")
157 |     )]
158 |     StrayQuote {
159 |         #[label("unexpected quote")]
160 |         span: SourceSpan,
161 |         quote: char,
162 |         #[source_code]
163 |         src: String,
164 |     },
165 | 
166 |     // Filesystem errors
167 |     #[error("Directory not found: {path}")]
168 |     #[diagnostic(
169 |         code(detect::directory_not_found),
170 |         help("Check that the directory path exists and is accessible")
171 |     )]
172 |     DirectoryNotFound { path: String },
173 | 
174 |     #[error("Path is not a directory: {path}")]
175 |     #[diagnostic(
176 |         code(detect::not_a_directory),
177 |         help("The path must be a directory, not a file")
178 |     )]
179 |     NotADirectory { path: String },
180 | 
181 |     // I/O errors
182 |     #[error("I/O error: {message}")]
183 |     #[diagnostic(code(detect::io_error))]
184 |     IoError { message: String },
185 | 
186 |     // Internal errors
187 |     #[error("Internal parser error: {message}")]
188 |     #[diagnostic(code(detect::internal))]
189 |     Internal {
190 |         message: String,
191 |         #[source_code]
192 |         src: String,
193 |     },
194 | }
195 | 
196 | // Extension trait for span location extraction
197 | pub trait SpanExt {
198 |     fn to_location(&self) -> (usize, usize);
199 |     fn to_source_span(&self) -> SourceSpan;
200 | }
201 | 
202 | impl SpanExt for pest::Span<'_> {
203 |     #[inline]
204 |     fn to_location(&self) -> (usize, usize) {
205 |         self.start_pos().line_col()
206 |     }
207 | 
208 |     #[inline]
209 |     fn to_source_span(&self) -> SourceSpan {
210 |         (self.start(), self.end() - self.start()).into()
211 |     }
212 | }
213 | 
214 | /// Convert pest Rule enum to user-friendly names
215 | fn rule_to_friendly_name(rule: &Rule) -> &'static str {
216 |     match rule {
217 |         Rule::program => "program",
218 |         Rule::expr => "expression",
219 |         Rule::infix => "operator (AND/OR)",
220 |         Rule::and => "AND",
221 |         Rule::or => "OR",
222 |         Rule::prefix => "prefix operator (NOT)",
223 |         Rule::neg => "NOT",
224 |         Rule::primary => "predicate or expression",
225 |         Rule::predicate => "predicate",
226 |         Rule::selector => "selector",
227 |         Rule::operator => "operator",
228 |         Rule::value => "value",
229 |         Rule::value_content => "value",
230 |         Rule::raw_token => "value",
231 |         Rule::quoted_string => "quoted string",
232 |         Rule::unterminated_string => "unterminated string",
233 |         Rule::trailing_quote => "trailing quote",
234 |         Rule::single_word => "single-word alias",
235 |         Rule::set_contents => "set contents",
236 |         Rule::set_items => "set items",
237 |         Rule::set_item => "set item",
238 |         Rule::bare_set_item => "item",
239 |         Rule::inner_double => "string content",
240 |         Rule::inner_single => "string content",
241 |         Rule::escaped => "escape sequence",
242 |         Rule::raw_char => "character",
243 |         Rule::balanced_paren => "balanced parentheses",
244 |         Rule::balanced_bracket => "balanced brackets",
245 |         Rule::balanced_curly => "balanced braces",
246 |         Rule::WHITESPACE => "whitespace",
247 |         Rule::EOI => "end of input",
248 |     }
249 | }
250 | 
251 | /// Generate contextual help text based on error patterns
252 | fn generate_help_text(positives: &[Rule], found_eoi: bool) -> Option<String> {
253 |     if positives.is_empty() {
254 |         return None;
255 |     }
256 | 
257 |     // Check for common patterns
258 |     if positives.contains(&Rule::value) {
259 |         if found_eoi {
260 |             return Some("Try adding a value after the operator, like: ext == rs".to_string());
261 |         }
262 |         return Some("Expected a value here (e.g., a string, number, or [set])".to_string());
263 |     }
264 | 
265 |     if (positives.contains(&Rule::expr) || positives.contains(&Rule::predicate)) && found_eoi {
266 |         return Some("Expression is incomplete. Add a predicate after the operator.".to_string());
267 |     }
268 | 
269 |     if positives.contains(&Rule::EOI) {
270 |         return Some("Unexpected input. Check for unbalanced parentheses or quotes.".to_string());
271 |     }
272 | 
273 |     None
274 | }
275 | 
276 | impl DetectError {
277 |     /// Create a syntax error from pest error with diagnostic information
278 |     pub fn from_pest(pest_err: Box<pest::error::Error<Rule>>, src: String) -> Self {
279 |         use pest::error::{ErrorVariant, InputLocation};
280 | 
281 |         // Extract position information with non-zero width for miette arrow rendering
282 |         let (span, _pos) = match pest_err.location {
283 |             InputLocation::Pos(pos) => {
284 |                 // For point locations, ensure non-zero width for miette arrow
285 |                 // If at/past EOI, point backwards at last char; otherwise point at current position
286 |                 if pos >= src.len() && pos > 0 {
287 |                     ((pos - 1, 1).into(), pos)
288 |                 } else if pos < src.len() {
289 |                     ((pos, 1).into(), pos)
290 |                 } else {
291 |                     // Empty input
292 |                     ((0, 0).into(), pos)
293 |                 }
294 |             }
295 |             InputLocation::Span((start, end)) => {
296 |                 let width = end.saturating_sub(start).max(1); // Ensure at least width 1
297 |                 ((start, width).into(), start)
298 |             }
299 |         };
300 | 
301 |         // Get line and column
302 |         let (line, col) = match pest_err.line_col {
303 |             pest::error::LineColLocation::Pos((line, col)) => (line, col),
304 |             pest::error::LineColLocation::Span((line, col), _) => (line, col),
305 |         };
306 | 
307 |         // Extract expected tokens and generate user-friendly message
308 |         let (expected_msg, help) = match &pest_err.variant {
309 |             ErrorVariant::ParsingError {
310 |                 positives,
311 |                 negatives: _,
312 |             } => {
313 |                 let found_eoi = match pest_err.location {
314 |                     InputLocation::Pos(p) => p >= src.len(),
315 |                     InputLocation::Span((_, end)) => end >= src.len(),
316 |                 };
317 | 
318 |                 let expected_msg = if positives.is_empty() {
319 |                     "Unexpected input".to_string()
320 |                 } else if positives.len() == 1 {
321 |                     format!("Expected {}", rule_to_friendly_name(&positives[0]))
322 |                 } else {
323 |                     let names: Vec<&str> = positives.iter().map(rule_to_friendly_name).collect();
324 |                     if names.len() <= 3 {
325 |                         format!("Expected one of: {}", names.join(", "))
326 |                     } else {
327 |                         format!("Expected one of: {}, ...", names[..3].join(", "))
328 |                     }
329 |                 };
330 | 
331 |                 let help = generate_help_text(positives, found_eoi);
332 |                 (expected_msg, help)
333 |             }
334 |             ErrorVariant::CustomError { message } => (message.clone(), None),
335 |         };
336 | 
337 |         DetectError::Syntax {
338 |             src,
339 |             span,
340 |             help,
341 |             expected_msg,
342 |             line,
343 |             col,
344 |         }
345 |     }
346 | 
347 |     /// Create an internal error
348 |     pub fn internal(msg: impl Into<String>) -> Self {
349 |         DetectError::Internal {
350 |             message: msg.into(),
351 |             src: String::new(),
352 |         }
353 |     }
354 | 
355 |     /// Add source code to the error
356 |     pub fn with_source(mut self, src: String) -> Self {
357 |         match &mut self {
358 |             DetectError::Syntax { src: s, .. }
359 |             | DetectError::UnknownSelector { src: s, .. }
360 |             | DetectError::InvalidStructuredPath { src: s, .. }
361 |             | DetectError::UnknownStructuredFormat { src: s, .. }
362 |             | DetectError::UnknownOperator { src: s, .. }
363 |             | DetectError::UnknownAlias { src: s, .. }
364 |             | DetectError::IncompatibleOperator { src: s, .. }
365 |             | DetectError::InvalidValue { src: s, .. }
366 |             | DetectError::InvalidEscape { src: s, .. }
367 |             | DetectError::UnterminatedEscape { src: s, .. }
368 |             | DetectError::UnterminatedString { src: s, .. }
369 |             | DetectError::StrayQuote { src: s, .. }
370 |             | DetectError::Internal { src: s, .. } => {
371 |                 *s = src;
372 |             }
373 |             // Filesystem and I/O errors don't have source code
374 |             DetectError::DirectoryNotFound { .. }
375 |             | DetectError::NotADirectory { .. }
376 |             | DetectError::IoError { .. } => {}
377 |         }
378 |         self
379 |     }
380 | }
381 | 


--------------------------------------------------------------------------------
/tests/temporal_tests.rs:
--------------------------------------------------------------------------------
  1 | use slog::{o, Discard, Logger};
  2 | use std::{fs, time::SystemTime};
  3 | use tempfile::TempDir;
  4 | 
  5 | // Shared helper to run temporal test cases
  6 | async fn run_temporal_test(
  7 |     tmp_dir: &TempDir,
  8 |     expr: &str,
  9 |     expected_files: Vec<&str>,
 10 |     not_expected: Vec<&str>,
 11 | ) {
 12 |     let mut found = Vec::new();
 13 |     detect::parse_and_run_fs(
 14 |         Logger::root(Discard, o!()),
 15 |         tmp_dir.path(),
 16 |         false,
 17 |         expr.to_owned(),
 18 |         detect::RuntimeConfig::default(),
 19 |         |p| found.push(p.file_name().unwrap().to_string_lossy().to_string()),
 20 |     )
 21 |     .await
 22 |     .unwrap();
 23 | 
 24 |     for file in expected_files {
 25 |         assert!(
 26 |             found.contains(&file.to_string()),
 27 |             "Expression '{}' should find '{}', but found: {:?}",
 28 |             expr,
 29 |             file,
 30 |             found
 31 |         );
 32 |     }
 33 | 
 34 |     for file in not_expected {
 35 |         assert!(
 36 |             !found.contains(&file.to_string()),
 37 |             "Expression '{}' should not find '{}', but found: {:?}",
 38 |             expr,
 39 |             file,
 40 |             found
 41 |         );
 42 |     }
 43 | }
 44 | 
 45 | #[tokio::test]
 46 | async fn test_relative_time_operations() {
 47 |     let tmp_dir = tempfile::Builder::new()
 48 |         .prefix("detect-temporal-relative")
 49 |         .tempdir()
 50 |         .unwrap();
 51 | 
 52 |     // Create files with different ages
 53 |     let files = vec![
 54 |         ("1sec.txt", 1),
 55 |         ("10secs.txt", 10),
 56 |         ("5mins.txt", 5 * 60),
 57 |         ("2hours.txt", 2 * 60 * 60),
 58 |         ("3days.txt", 3 * 24 * 60 * 60),
 59 |         ("1week.txt", 7 * 24 * 60 * 60 - 1), // Just under 7 days to pass > -7.days test
 60 |         ("30days.txt", 30 * 24 * 60 * 60),
 61 |     ];
 62 | 
 63 |     for (name, age_secs) in &files {
 64 |         let path = tmp_dir.path().join(name);
 65 |         std::fs::write(&path, "content").unwrap();
 66 |         let mtime = SystemTime::now() - std::time::Duration::from_secs(*age_secs);
 67 |         fs::File::open(&path).unwrap().set_modified(mtime).unwrap();
 68 |     }
 69 | 
 70 |     // Test various relative time expressions
 71 |     let test_cases = vec![
 72 |         // Seconds
 73 |         (
 74 |             "modified > \"-2.seconds\"",
 75 |             vec!["1sec.txt"],
 76 |             vec!["10secs.txt"],
 77 |         ),
 78 |         (
 79 |             "modified > \"-30.seconds\"",
 80 |             vec!["1sec.txt", "10secs.txt"],
 81 |             vec!["5mins.txt"],
 82 |         ),
 83 |         // Minutes
 84 |         (
 85 |             "modified > \"-10.minutes\"",
 86 |             vec!["1sec.txt", "10secs.txt", "5mins.txt"],
 87 |             vec!["2hours.txt"],
 88 |         ),
 89 |         (
 90 |             "modified > \"-1.minute\"",
 91 |             vec!["1sec.txt", "10secs.txt"],
 92 |             vec!["5mins.txt"],
 93 |         ),
 94 |         // Hours
 95 |         (
 96 |             "modified > \"-3.hours\"",
 97 |             vec!["1sec.txt", "10secs.txt", "5mins.txt", "2hours.txt"],
 98 |             vec!["3days.txt"],
 99 |         ),
100 |         (
101 |             "modified > \"-1.hour\"",
102 |             vec!["1sec.txt", "10secs.txt", "5mins.txt"],
103 |             vec!["2hours.txt"],
104 |         ),
105 |         // Days
106 |         (
107 |             "modified > \"-5.days\"",
108 |             vec![
109 |                 "1sec.txt",
110 |                 "10secs.txt",
111 |                 "5mins.txt",
112 |                 "2hours.txt",
113 |                 "3days.txt",
114 |             ],
115 |             vec!["1week.txt"],
116 |         ),
117 |         (
118 |             "modified > \"-7.days\"",
119 |             vec![
120 |                 "1sec.txt",
121 |                 "10secs.txt",
122 |                 "5mins.txt",
123 |                 "2hours.txt",
124 |                 "3days.txt",
125 |                 "1week.txt",
126 |             ],
127 |             vec!["30days.txt"],
128 |         ),
129 |         // Weeks
130 |         (
131 |             "modified > \"-2.weeks\"",
132 |             vec![
133 |                 "1sec.txt",
134 |                 "10secs.txt",
135 |                 "5mins.txt",
136 |                 "2hours.txt",
137 |                 "3days.txt",
138 |                 "1week.txt",
139 |             ],
140 |             vec!["30days.txt"],
141 |         ),
142 |         // Test with different units abbreviations
143 |         (
144 |             "modified > -30s",
145 |             vec!["1sec.txt", "10secs.txt"],
146 |             vec!["5mins.txt"],
147 |         ),
148 |         (
149 |             "modified > -10m",
150 |             vec!["1sec.txt", "10secs.txt", "5mins.txt"],
151 |             vec!["2hours.txt"],
152 |         ),
153 |         (
154 |             "modified > -3h",
155 |             vec!["1sec.txt", "10secs.txt", "5mins.txt", "2hours.txt"],
156 |             vec!["3days.txt"],
157 |         ),
158 |         (
159 |             "modified > -5d",
160 |             vec![
161 |                 "1sec.txt",
162 |                 "10secs.txt",
163 |                 "5mins.txt",
164 |                 "2hours.txt",
165 |                 "3days.txt",
166 |             ],
167 |             vec!["1week.txt"],
168 |         ),
169 |         (
170 |             "modified > -2w",
171 |             vec![
172 |                 "1sec.txt",
173 |                 "10secs.txt",
174 |                 "5mins.txt",
175 |                 "2hours.txt",
176 |                 "3days.txt",
177 |                 "1week.txt",
178 |             ],
179 |             vec!["30days.txt"],
180 |         ),
181 |     ];
182 | 
183 |     for (expr, expected, not_expected) in test_cases {
184 |         run_temporal_test(&tmp_dir, expr, expected, not_expected).await;
185 |     }
186 | }
187 | 
188 | #[tokio::test]
189 | async fn test_absolute_dates() {
190 |     let tmp_dir = tempfile::Builder::new()
191 |         .prefix("detect-temporal-absolute")
192 |         .tempdir()
193 |         .unwrap();
194 | 
195 |     // Create files with specific dates
196 |     let today_file = tmp_dir.path().join("today.txt");
197 |     let yesterday_file = tmp_dir.path().join("yesterday.txt");
198 |     let week_old_file = tmp_dir.path().join("week_old.txt");
199 |     let year_2020_file = tmp_dir.path().join("year_2020.txt");
200 |     let year_2023_file = tmp_dir.path().join("year_2023.txt");
201 | 
202 |     // Create files
203 |     std::fs::write(&today_file, "today").unwrap();
204 |     std::fs::write(&yesterday_file, "yesterday").unwrap();
205 |     std::fs::write(&week_old_file, "week").unwrap();
206 |     std::fs::write(&year_2020_file, "2020").unwrap();
207 |     std::fs::write(&year_2023_file, "2023").unwrap();
208 | 
209 |     // Set modification times
210 |     let now = SystemTime::now();
211 |     let yesterday = now - std::time::Duration::from_secs(24 * 60 * 60);
212 |     let week_ago = now - std::time::Duration::from_secs(7 * 24 * 60 * 60);
213 |     let year_2020 = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(1577836800); // 2020-01-01
214 |     let year_2023 = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(1672531200); // 2023-01-01
215 | 
216 |     fs::File::open(&today_file)
217 |         .unwrap()
218 |         .set_modified(now)
219 |         .unwrap();
220 |     fs::File::open(&yesterday_file)
221 |         .unwrap()
222 |         .set_modified(yesterday)
223 |         .unwrap();
224 |     fs::File::open(&week_old_file)
225 |         .unwrap()
226 |         .set_modified(week_ago)
227 |         .unwrap();
228 |     fs::File::open(&year_2020_file)
229 |         .unwrap()
230 |         .set_modified(year_2020)
231 |         .unwrap();
232 |     fs::File::open(&year_2023_file)
233 |         .unwrap()
234 |         .set_modified(year_2023)
235 |         .unwrap();
236 | 
237 |     // Test absolute date queries (quoted and unquoted)
238 |     run_temporal_test(
239 |         &tmp_dir,
240 |         "modified > \"2021-01-01\"",
241 |         vec![
242 |             "today.txt",
243 |             "yesterday.txt",
244 |             "week_old.txt",
245 |             "year_2023.txt",
246 |         ],
247 |         vec!["year_2020.txt"],
248 |     )
249 |     .await;
250 |     run_temporal_test(
251 |         &tmp_dir,
252 |         "modified > 2021-01-01",
253 |         vec![
254 |             "today.txt",
255 |             "yesterday.txt",
256 |             "week_old.txt",
257 |             "year_2023.txt",
258 |         ],
259 |         vec!["year_2020.txt"],
260 |     )
261 |     .await;
262 |     run_temporal_test(
263 |         &tmp_dir,
264 |         "modified < 2022-01-01",
265 |         vec!["year_2020.txt"],
266 |         vec!["year_2023.txt", "today.txt"],
267 |     )
268 |     .await;
269 | }
270 | 
271 | #[tokio::test]
272 | async fn test_time_selectors() {
273 |     let tmp_dir = tempfile::Builder::new()
274 |         .prefix("detect-temporal-selectors")
275 |         .tempdir()
276 |         .unwrap();
277 | 
278 |     // Create test files
279 |     let test_file = tmp_dir.path().join("test.txt");
280 |     let old_file = tmp_dir.path().join("old.txt");
281 | 
282 |     std::fs::write(&test_file, "content").unwrap();
283 |     std::fs::write(&old_file, "old").unwrap();
284 | 
285 |     // Set old file to be old
286 |     let week_ago = SystemTime::now() - std::time::Duration::from_secs(7 * 24 * 60 * 60);
287 |     fs::File::open(&old_file)
288 |         .unwrap()
289 |         .set_modified(week_ago)
290 |         .unwrap();
291 | 
292 |     // Test modified selector (already tested above, but verify syntax variants)
293 |     run_temporal_test(
294 |         &tmp_dir,
295 |         "modified > -1hour",
296 |         vec!["test.txt"],
297 |         vec!["old.txt"],
298 |     )
299 |     .await;
300 |     run_temporal_test(
301 |         &tmp_dir,
302 |         "modified > -1hour",
303 |         vec!["test.txt"],
304 |         vec!["old.txt"],
305 |     )
306 |     .await;
307 | 
308 |     // Test created selector (created - creation time is OS-specific, just verify it runs)
309 |     let mut created_files = Vec::new();
310 |     detect::parse_and_run_fs(
311 |         Logger::root(Discard, o!()),
312 |         tmp_dir.path(),
313 |         false,
314 |         "created > -1hour".to_owned(),
315 |         detect::RuntimeConfig::default(),
316 |         |p| created_files.push(p.file_name().unwrap().to_string_lossy().to_string()),
317 |     )
318 |     .await
319 |     .unwrap();
320 |     // Just verify it doesn't crash - actual results are OS-dependent
321 | 
322 |     // Test accessed selector
323 |     // Read the test file to update access time
324 |     let _ = std::fs::read_to_string(&test_file).unwrap();
325 | 
326 |     run_temporal_test(&tmp_dir, "accessed > -1minute", vec!["test.txt"], vec![]).await;
327 |     run_temporal_test(&tmp_dir, "accessed > -1minute", vec!["test.txt"], vec![]).await;
328 | 
329 |     // Test created time variants
330 |     let mut created_files = Vec::new();
331 |     detect::parse_and_run_fs(
332 |         Logger::root(Discard, o!()),
333 |         tmp_dir.path(),
334 |         false,
335 |         "created > -1hour".to_owned(),
336 |         detect::RuntimeConfig::default(),
337 |         |p| created_files.push(p.file_name().unwrap().to_string_lossy().to_string()),
338 |     )
339 |     .await
340 |     .unwrap();
341 |     // Just verify syntax works
342 | }
343 | 
344 | #[tokio::test]
345 | async fn test_temporal_combined_queries() {
346 |     let tmp_dir = tempfile::Builder::new()
347 |         .prefix("detect-temporal-combined")
348 |         .tempdir()
349 |         .unwrap();
350 | 
351 |     // Create various files
352 |     let files = vec![
353 |         ("old.rs", "rust code", 10 * 24 * 60 * 60), // 10 days old
354 |         ("new.rs", "rust code", 60),                // 1 minute old
355 |         ("old.txt", "text content", 10 * 24 * 60 * 60), // 10 days old
356 |         ("new.txt", "text content", 60),            // 1 minute old
357 |         ("old_todo.rs", "// TODO: fix", 10 * 24 * 60 * 60), // 10 days old
358 |         ("new_todo.rs", "// TODO: implement", 60),  // 1 minute old
359 |     ];
360 | 
361 |     for (name, content, age_secs) in &files {
362 |         let path = tmp_dir.path().join(name);
363 |         std::fs::write(&path, content).unwrap();
364 |         let mtime = SystemTime::now() - std::time::Duration::from_secs(*age_secs);
365 |         fs::File::open(&path).unwrap().set_modified(mtime).unwrap();
366 |     }
367 | 
368 |     // Test temporal + extension
369 |     run_temporal_test(
370 |         &tmp_dir,
371 |         "ext == rs && modified > -1day",
372 |         vec!["new.rs", "new_todo.rs"],
373 |         vec!["old.rs", "new.txt", "old.txt", "old_todo.rs"],
374 |     )
375 |     .await;
376 | 
377 |     // Test temporal + content
378 |     run_temporal_test(
379 |         &tmp_dir,
380 |         r#"content contains "TODO" && modified > -1day"#,
381 |         vec!["new_todo.rs"],
382 |         vec!["old_todo.rs", "new.rs", "new.txt"],
383 |     )
384 |     .await;
385 | 
386 |     // Test multiple temporal selectors
387 |     run_temporal_test(
388 |         &tmp_dir,
389 |         "modified > -1hour && accessed > -1hour",
390 |         vec!["new.rs", "new.txt", "new_todo.rs"],
391 |         vec!["old.rs", "old.txt", "old_todo.rs"],
392 |     )
393 |     .await;
394 | 
395 |     // Test temporal with size (all our test files are small)
396 |     run_temporal_test(
397 |         &tmp_dir,
398 |         "size < 100 && modified > -1day",
399 |         vec!["new.rs", "new.txt", "new_todo.rs"],
400 |         vec!["old.rs", "old.txt", "old_todo.rs"],
401 |     )
402 |     .await;
403 | 
404 |     // Test temporal with negation
405 |     run_temporal_test(
406 |         &tmp_dir,
407 |         r#"!(basename contains "old") && modified > -1day"#,
408 |         vec!["new.rs", "new.txt", "new_todo.rs"],
409 |         vec!["old.rs", "old.txt", "old_todo.rs"],
410 |     )
411 |     .await;
412 | }
413 | 
414 | #[tokio::test]
415 | async fn test_greater_less_or_equal_operators() {
416 |     let tmp_dir = tempfile::Builder::new()
417 |         .prefix("detect-temporal-gte-lte")
418 |         .tempdir()
419 |         .unwrap();
420 | 
421 |     // Create files with precise timestamps
422 |     // Using slightly offset times to avoid boundary issues
423 |     let now = SystemTime::now();
424 |     let thirty_mins_ago = now - std::time::Duration::from_secs(30 * 60);
425 |     let ninety_mins_ago = now - std::time::Duration::from_secs(90 * 60);
426 |     let two_and_half_hours_ago = now - std::time::Duration::from_secs(150 * 60);
427 |     let four_hours_ago = now - std::time::Duration::from_secs(4 * 60 * 60);
428 | 
429 |     let recent = tmp_dir.path().join("recent.txt");
430 |     let thirty_mins = tmp_dir.path().join("thirty_mins.txt");
431 |     let ninety_mins = tmp_dir.path().join("ninety_mins.txt");
432 |     let two_half_hours = tmp_dir.path().join("two_half_hours.txt");
433 |     let four_hours = tmp_dir.path().join("four_hours.txt");
434 | 
435 |     std::fs::write(&recent, "recent").unwrap();
436 |     std::fs::write(&thirty_mins, "thirty_mins").unwrap();
437 |     std::fs::write(&ninety_mins, "ninety_mins").unwrap();
438 |     std::fs::write(&two_half_hours, "two_half_hours").unwrap();
439 |     std::fs::write(&four_hours, "four_hours").unwrap();
440 | 
441 |     fs::File::open(&recent).unwrap().set_modified(now).unwrap();
442 |     fs::File::open(&thirty_mins)
443 |         .unwrap()
444 |         .set_modified(thirty_mins_ago)
445 |         .unwrap();
446 |     fs::File::open(&ninety_mins)
447 |         .unwrap()
448 |         .set_modified(ninety_mins_ago)
449 |         .unwrap();
450 |     fs::File::open(&two_half_hours)
451 |         .unwrap()
452 |         .set_modified(two_and_half_hours_ago)
453 |         .unwrap();
454 |     fs::File::open(&four_hours)
455 |         .unwrap()
456 |         .set_modified(four_hours_ago)
457 |         .unwrap();
458 | 
459 |     // Test >= operator (after or equal) - should include files at or after the threshold
460 |     run_temporal_test(
461 |         &tmp_dir,
462 |         "modified >= -1hour",
463 |         vec!["recent.txt", "thirty_mins.txt"],
464 |         vec!["ninety_mins.txt", "two_half_hours.txt", "four_hours.txt"],
465 |     )
466 |     .await;
467 | 
468 |     run_temporal_test(
469 |         &tmp_dir,
470 |         "modified >= -2hours",
471 |         vec!["recent.txt", "thirty_mins.txt", "ninety_mins.txt"],
472 |         vec!["two_half_hours.txt", "four_hours.txt"],
473 |     )
474 |     .await;
475 | 
476 |     // Test <= operator (before or equal) - should include files at or before the threshold
477 |     run_temporal_test(
478 |         &tmp_dir,
479 |         "modified <= -1hour",
480 |         vec!["ninety_mins.txt", "two_half_hours.txt", "four_hours.txt"],
481 |         vec!["recent.txt", "thirty_mins.txt"],
482 |     )
483 |     .await;
484 | 
485 |     run_temporal_test(
486 |         &tmp_dir,
487 |         "modified <= -3hours",
488 |         vec!["four_hours.txt"],
489 |         vec![
490 |             "recent.txt",
491 |             "thirty_mins.txt",
492 |             "ninety_mins.txt",
493 |             "two_half_hours.txt",
494 |         ],
495 |     )
496 |     .await;
497 | 
498 |     // Test combining >= and <= to create a time range
499 |     run_temporal_test(
500 |         &tmp_dir,
501 |         "modified >= -3hours AND modified <= -1hour",
502 |         vec!["ninety_mins.txt", "two_half_hours.txt"],
503 |         vec!["recent.txt", "thirty_mins.txt", "four_hours.txt"],
504 |     )
505 |     .await;
506 | }
507 | 


--------------------------------------------------------------------------------
/tests/aliases.rs:
--------------------------------------------------------------------------------
  1 | //! Tests for single-word predicate aliases
  2 | //!
  3 | //! Verifies that file type aliases like `dir`, `file`, `symlink` work correctly
  4 | //! in parsing and typechecking.
  5 | 
  6 | use detect::{
  7 |     expr::Expr,
  8 |     parser::{RawParser, Typechecker},
  9 |     predicate::{DetectFileType, EnumMatcher, MetadataPredicate, Predicate},
 10 | };
 11 | 
 12 | /// Helper to parse and typecheck an expression
 13 | fn parse_and_typecheck(input: &str) -> Result<Expr<Predicate>, detect::parser::error::DetectError> {
 14 |     let raw = RawParser::parse_raw_expr(input)?;
 15 |     Typechecker::typecheck(raw, input, &detect::RuntimeConfig::default())
 16 | }
 17 | 
 18 | #[test]
 19 | fn test_all_file_type_aliases_parse() {
 20 |     // All file type aliases should parse and typecheck successfully
 21 |     let aliases = [
 22 |         "file",
 23 |         "dir",
 24 |         "directory",
 25 |         "symlink",
 26 |         "link",
 27 |         "socket",
 28 |         "sock",
 29 |         "fifo",
 30 |         "pipe",
 31 |         "block",
 32 |         "blockdev",
 33 |         "char",
 34 |         "chardev",
 35 |     ];
 36 | 
 37 |     for alias in &aliases {
 38 |         let result = parse_and_typecheck(alias);
 39 |         assert!(
 40 |             result.is_ok(),
 41 |             "Alias '{}' should parse successfully, got: {:?}",
 42 |             alias,
 43 |             result.err()
 44 |         );
 45 |     }
 46 | }
 47 | 
 48 | #[test]
 49 | fn test_alias_case_insensitive() {
 50 |     // Aliases should be case-insensitive
 51 |     assert!(parse_and_typecheck("FILE").is_ok());
 52 |     assert!(parse_and_typecheck("Dir").is_ok());
 53 |     assert!(parse_and_typecheck("DIRECTORY").is_ok());
 54 |     assert!(parse_and_typecheck("SyMlInK").is_ok());
 55 | }
 56 | 
 57 | #[test]
 58 | fn test_alias_equivalence_to_explicit_predicate() {
 59 |     // `dir` should be equivalent to `type == dir`
 60 |     let alias_result = parse_and_typecheck("dir").unwrap();
 61 |     let explicit_result = parse_and_typecheck("type == dir").unwrap();
 62 | 
 63 |     // Both should produce MetadataPredicate::Type with Equals matcher
 64 |     match (&alias_result, &explicit_result) {
 65 |         (Expr::Predicate(Predicate::Metadata(a)), Expr::Predicate(Predicate::Metadata(e))) => {
 66 |             assert_eq!(a, e, "Alias and explicit predicate should be equal");
 67 |         }
 68 |         _ => panic!("Both should be Predicate::Metadata"),
 69 |     }
 70 | }
 71 | 
 72 | #[test]
 73 | fn test_alias_in_boolean_expression() {
 74 |     // Aliases should work in boolean expressions
 75 |     let result = parse_and_typecheck("dir && depth > 0");
 76 |     assert!(result.is_ok(), "Boolean expression with alias should parse");
 77 | 
 78 |     let result = parse_and_typecheck("file || symlink");
 79 |     assert!(result.is_ok(), "OR with aliases should parse");
 80 | 
 81 |     let result = parse_and_typecheck("NOT dir");
 82 |     assert!(result.is_ok(), "NOT with alias should parse");
 83 | }
 84 | 
 85 | #[test]
 86 | fn test_alias_with_word_form_operators() {
 87 |     // Test word-form AND operator
 88 |     let result = parse_and_typecheck("file AND size > 10kb");
 89 |     assert!(
 90 |         result.is_ok(),
 91 |         "Alias with word-form AND should parse, got: {:?}",
 92 |         result.err()
 93 |     );
 94 | 
 95 |     // Test word-form OR operator
 96 |     let result = parse_and_typecheck("dir OR file");
 97 |     assert!(
 98 |         result.is_ok(),
 99 |         "Alias with word-form OR should parse, got: {:?}",
100 |         result.err()
101 |     );
102 | 
103 |     // Test case-insensitive word operators
104 |     let result = parse_and_typecheck("file and size > 1mb");
105 |     assert!(
106 |         result.is_ok(),
107 |         "Alias with lowercase 'and' should parse, got: {:?}",
108 |         result.err()
109 |     );
110 | 
111 |     let result = parse_and_typecheck("file or dir");
112 |     assert!(
113 |         result.is_ok(),
114 |         "Alias with lowercase 'or' should parse, got: {:?}",
115 |         result.err()
116 |     );
117 | 
118 |     // Mixed case
119 |     let result = parse_and_typecheck("file And size > 1kb");
120 |     assert!(
121 |         result.is_ok(),
122 |         "Alias with mixed-case 'And' should parse, got: {:?}",
123 |         result.err()
124 |     );
125 | 
126 |     // Complex expressions with multiple word operators
127 |     let result = parse_and_typecheck("file AND size > 1mb OR dir AND depth < 3");
128 |     assert!(
129 |         result.is_ok(),
130 |         "Complex expression with multiple word operators should parse, got: {:?}",
131 |         result.err()
132 |     );
133 | 
134 |     // Parenthesized expressions
135 |     let result = parse_and_typecheck("(file OR dir) AND size > 100kb");
136 |     assert!(
137 |         result.is_ok(),
138 |         "Parenthesized expression with word operators should parse, got: {:?}",
139 |         result.err()
140 |     );
141 | 
142 |     // With NOT
143 |     let result = parse_and_typecheck("NOT file AND size > 1kb");
144 |     assert!(
145 |         result.is_ok(),
146 |         "NOT with word-form AND should parse, got: {:?}",
147 |         result.err()
148 |     );
149 | }
150 | 
151 | #[test]
152 | fn test_unknown_alias_error() {
153 |     // Unknown aliases should produce helpful errors
154 |     let result = parse_and_typecheck("unknownalias");
155 |     assert!(result.is_err());
156 | 
157 |     if let Err(err) = result {
158 |         assert!(
159 |             matches!(err, detect::parser::error::DetectError::UnknownAlias { .. }),
160 |             "Should produce UnknownAlias error"
161 |         );
162 |     }
163 | }
164 | 
165 | #[test]
166 | fn test_alias_suggestions() {
167 |     // Close typos should get suggestions
168 |     let result = parse_and_typecheck("fil");
169 |     assert!(result.is_err());
170 | 
171 |     if let Err(detect::parser::error::DetectError::UnknownAlias { suggestions, .. }) = result {
172 |         assert!(
173 |             suggestions.is_some(),
174 |             "Should provide suggestions for typos"
175 |         );
176 |         let sugg = suggestions.unwrap();
177 |         assert!(
178 |             sugg.contains("file"),
179 |             "Should suggest 'file' for 'fil', got: {}",
180 |             sugg
181 |         );
182 |     }
183 | }
184 | 
185 | #[test]
186 | fn test_wildcard_rejected() {
187 |     // Wildcards should no longer parse as single words
188 |     let result = RawParser::parse_raw_expr("*.rs");
189 |     assert!(
190 |         result.is_err(),
191 |         "Wildcards should be rejected by new grammar"
192 |     );
193 | 
194 |     let result = RawParser::parse_raw_expr("**/*.js");
195 |     assert!(result.is_err(), "Complex glob patterns should be rejected");
196 | }
197 | 
198 | // Filesystem evaluation is tested in integration.rs
199 | // This file focuses on parsing and typechecking of aliases
200 | 
201 | #[test]
202 | fn test_complex_alias_expressions() {
203 |     // Test complex boolean logic with aliases
204 |     let result = parse_and_typecheck("(file || dir) && depth < 5");
205 |     assert!(result.is_ok(), "Complex alias expression should parse");
206 | 
207 |     let result = parse_and_typecheck("NOT (symlink || socket) && file");
208 |     assert!(result.is_ok(), "Complex negation with aliases should parse");
209 | }
210 | 
211 | #[test]
212 | fn test_alias_constructed_predicates() {
213 |     // Verify that aliases construct the correct predicate internally
214 |     let typed = parse_and_typecheck("file").unwrap();
215 | 
216 |     match typed {
217 |         Expr::Predicate(Predicate::Metadata(meta)) => match meta.as_ref() {
218 |             MetadataPredicate::Type(EnumMatcher::Equals(file_type)) => {
219 |                 assert_eq!(
220 |                     file_type,
221 |                     &DetectFileType::File,
222 |                     "Alias 'file' should construct DetectFileType::File"
223 |                 );
224 |             }
225 |             _ => panic!("Expected Type predicate with Equals matcher"),
226 |         },
227 |         _ => panic!("Expected Predicate::Metadata"),
228 |     }
229 | }
230 | 
231 | // ============================================================================
232 | // Structured Selector Alias Tests
233 | // ============================================================================
234 | 
235 | #[test]
236 | fn test_structured_selector_alias_parsing() {
237 |     // YAML structured selectors should parse
238 |     assert!(parse_and_typecheck("yaml:.field").is_ok());
239 |     assert!(parse_and_typecheck("yaml:.server.port").is_ok());
240 |     assert!(parse_and_typecheck("yaml:.items[0]").is_ok());
241 |     assert!(parse_and_typecheck("yaml:.items[*].name").is_ok());
242 |     assert!(parse_and_typecheck("yaml:..recursive").is_ok());
243 | 
244 |     // JSON structured selectors should parse
245 |     assert!(parse_and_typecheck("json:.version").is_ok());
246 |     assert!(parse_and_typecheck("json:.dependencies.lodash").is_ok());
247 | 
248 |     // TOML structured selectors should parse
249 |     assert!(parse_and_typecheck("toml:.package.name").is_ok());
250 |     assert!(parse_and_typecheck("toml:.dependencies.serde").is_ok());
251 | }
252 | 
253 | #[test]
254 | fn test_structured_selector_case_insensitive() {
255 |     // Format prefix should be case-insensitive
256 |     assert!(parse_and_typecheck("YAML:.field").is_ok());
257 |     assert!(parse_and_typecheck("Json:.field").is_ok());
258 |     assert!(parse_and_typecheck("toml:.field").is_ok());
259 |     assert!(parse_and_typecheck("YaML:.field").is_ok());
260 | }
261 | 
262 | #[test]
263 | fn test_structured_selector_with_boolean_logic() {
264 |     // Structured selectors should work in boolean expressions
265 |     assert!(parse_and_typecheck("yaml:.field AND size > 1kb").is_ok());
266 |     assert!(parse_and_typecheck("json:.version OR toml:.version").is_ok());
267 |     assert!(parse_and_typecheck("NOT yaml:.field").is_ok());
268 |     assert!(parse_and_typecheck("(yaml:.a OR json:.b) && file").is_ok());
269 | }
270 | 
271 | #[test]
272 | fn test_structured_selector_complex_and_logic() {
273 |     // Complex AND expressions with structured selectors
274 |     assert!(parse_and_typecheck("yaml:.field AND json:.field").is_ok());
275 |     assert!(parse_and_typecheck("yaml:.a AND yaml:.b AND yaml:.c").is_ok());
276 |     assert!(parse_and_typecheck("yaml:.field && json:.field && toml:.field").is_ok());
277 | 
278 |     // Mixed word-form and symbol operators
279 |     assert!(parse_and_typecheck("yaml:.field AND json:.field && toml:.field").is_ok());
280 |     assert!(parse_and_typecheck("yaml:.a && yaml:.b AND yaml:.c").is_ok());
281 | }
282 | 
283 | #[test]
284 | fn test_structured_selector_complex_or_logic() {
285 |     // Complex OR expressions with structured selectors
286 |     assert!(parse_and_typecheck("yaml:.field OR json:.field").is_ok());
287 |     assert!(parse_and_typecheck("yaml:.a OR yaml:.b OR yaml:.c").is_ok());
288 |     assert!(parse_and_typecheck("yaml:.field || json:.field || toml:.field").is_ok());
289 | 
290 |     // Mixed word-form and symbol operators
291 |     assert!(parse_and_typecheck("yaml:.field OR json:.field || toml:.field").is_ok());
292 |     assert!(parse_and_typecheck("yaml:.a || yaml:.b OR yaml:.c").is_ok());
293 | }
294 | 
295 | #[test]
296 | fn test_structured_selector_negation_variants() {
297 |     // All negation forms should work
298 |     assert!(parse_and_typecheck("NOT yaml:.field").is_ok());
299 |     assert!(parse_and_typecheck("not yaml:.field").is_ok());
300 |     assert!(parse_and_typecheck("! yaml:.field").is_ok());
301 |     assert!(parse_and_typecheck("\\! yaml:.field").is_ok());
302 | 
303 |     // Double negation
304 |     assert!(parse_and_typecheck("NOT NOT yaml:.field").is_ok());
305 |     assert!(parse_and_typecheck("!! yaml:.field").is_ok());
306 | 
307 |     // Negation with multiple selectors
308 |     assert!(parse_and_typecheck("NOT (yaml:.a OR yaml:.b)").is_ok());
309 |     assert!(parse_and_typecheck("!(yaml:.a AND yaml:.b)").is_ok());
310 | }
311 | 
312 | #[test]
313 | fn test_structured_selector_precedence_and_grouping() {
314 |     // Test operator precedence with parentheses
315 |     assert!(parse_and_typecheck("yaml:.a AND (yaml:.b OR yaml:.c)").is_ok());
316 |     assert!(parse_and_typecheck("(yaml:.a OR yaml:.b) AND yaml:.c").is_ok());
317 |     assert!(parse_and_typecheck("yaml:.a OR yaml:.b AND yaml:.c").is_ok());
318 | 
319 |     // Complex nested grouping
320 |     assert!(parse_and_typecheck("((yaml:.a OR yaml:.b) AND yaml:.c) OR yaml:.d").is_ok());
321 |     assert!(parse_and_typecheck("yaml:.a AND (yaml:.b OR (yaml:.c AND yaml:.d))").is_ok());
322 | 
323 |     // Negation with grouping
324 |     assert!(parse_and_typecheck("NOT (yaml:.a AND yaml:.b)").is_ok());
325 |     assert!(parse_and_typecheck("!(yaml:.a || yaml:.b) AND yaml:.c").is_ok());
326 | }
327 | 
328 | #[test]
329 | fn test_structured_selector_mixed_with_aliases() {
330 |     // Structured selectors combined with file type aliases
331 |     assert!(parse_and_typecheck("yaml:.field AND file").is_ok());
332 |     assert!(parse_and_typecheck("file AND yaml:.field").is_ok());
333 |     assert!(parse_and_typecheck("dir OR yaml:.config").is_ok());
334 |     assert!(parse_and_typecheck("yaml:.field AND NOT symlink").is_ok());
335 | 
336 |     // Complex combinations
337 |     assert!(parse_and_typecheck("(file OR dir) AND yaml:.field").is_ok());
338 |     assert!(parse_and_typecheck("yaml:.a AND (file OR symlink) AND json:.b").is_ok());
339 |     assert!(parse_and_typecheck("NOT file OR yaml:.config").is_ok());
340 | }
341 | 
342 | #[test]
343 | fn test_structured_selector_mixed_with_predicates() {
344 |     // Structured selectors with other predicate types
345 |     assert!(parse_and_typecheck("yaml:.field AND size > 10kb").is_ok());
346 |     assert!(parse_and_typecheck("yaml:.field AND name == test.yaml").is_ok());
347 |     assert!(parse_and_typecheck("yaml:.field AND ext == yaml").is_ok());
348 |     assert!(parse_and_typecheck("yaml:.field AND modified > -7d").is_ok());
349 | 
350 |     // Complex mixed predicates
351 |     assert!(parse_and_typecheck("yaml:.field AND size > 1mb AND ext == yaml").is_ok());
352 |     assert!(parse_and_typecheck("(yaml:.field OR json:.field) AND size < 10mb").is_ok());
353 |     assert!(parse_and_typecheck("yaml:.a AND yaml:.b AND name ~= \"config.*\"").is_ok());
354 | 
355 |     // With negation
356 |     assert!(parse_and_typecheck("yaml:.field AND NOT (size > 1gb)").is_ok());
357 |     assert!(parse_and_typecheck("NOT yaml:.field AND ext == yaml").is_ok());
358 | }
359 | 
360 | #[test]
361 | fn test_structured_selector_de_morgan_laws() {
362 |     // Test De Morgan's law equivalences parse correctly
363 |     // NOT (A AND B) is equivalent to (NOT A) OR (NOT B)
364 |     assert!(parse_and_typecheck("NOT (yaml:.a AND yaml:.b)").is_ok());
365 |     assert!(parse_and_typecheck("NOT yaml:.a OR NOT yaml:.b").is_ok());
366 | 
367 |     // NOT (A OR B) is equivalent to (NOT A) AND (NOT B)
368 |     assert!(parse_and_typecheck("NOT (yaml:.a OR yaml:.b)").is_ok());
369 |     assert!(parse_and_typecheck("NOT yaml:.a AND NOT yaml:.b").is_ok());
370 | }
371 | 
372 | #[test]
373 | fn test_structured_selector_all_formats_combined() {
374 |     // All three formats in complex expressions
375 |     assert!(parse_and_typecheck("yaml:.a AND json:.b AND toml:.c").is_ok());
376 |     assert!(parse_and_typecheck("yaml:.a OR json:.b OR toml:.c").is_ok());
377 |     assert!(parse_and_typecheck("(yaml:.a AND json:.b) OR toml:.c").is_ok());
378 |     assert!(parse_and_typecheck("yaml:.a AND (json:.b OR toml:.c)").is_ok());
379 | 
380 |     // With negation
381 |     assert!(parse_and_typecheck("yaml:.a AND NOT json:.b AND toml:.c").is_ok());
382 |     assert!(parse_and_typecheck("NOT (yaml:.a OR json:.b OR toml:.c)").is_ok());
383 | 
384 |     // Mixed with other predicates
385 |     assert!(parse_and_typecheck("yaml:.a AND json:.b AND toml:.c AND file").is_ok());
386 |     assert!(parse_and_typecheck("(yaml:.a OR json:.b OR toml:.c) AND size > 1kb").is_ok());
387 | }
388 | 
389 | #[test]
390 | fn test_structured_selector_invalid_format() {
391 |     // Invalid format prefixes should produce UnknownStructuredFormat error
392 |     let result = parse_and_typecheck("xml:.field");
393 |     assert!(result.is_err());
394 | 
395 |     if let Err(err) = result {
396 |         let err_str = format!("{:?}", err);
397 |         assert!(
398 |             err_str.contains("UnknownStructuredFormat") || err_str.contains("xml"),
399 |             "Expected UnknownStructuredFormat error, got: {:?}",
400 |             err
401 |         );
402 |     }
403 | 
404 |     // Other invalid formats
405 |     assert!(parse_and_typecheck("csv:.column").is_err());
406 |     assert!(parse_and_typecheck("ini:.section").is_err());
407 | }
408 | 
409 | #[test]
410 | fn test_structured_selector_invalid_path() {
411 |     // Empty path should produce error
412 |     let result = parse_and_typecheck("yaml:");
413 |     assert!(result.is_err(), "Empty path should fail");
414 | 
415 |     // Invalid path syntax should produce error
416 |     let result = parse_and_typecheck("yaml:[");
417 |     assert!(result.is_err(), "Unclosed bracket should fail");
418 | }
419 | 
420 | #[test]
421 | fn test_structured_selector_constructs_exists_predicate() {
422 |     // Verify that structured selectors construct StructuredDataPredicate::*Exists
423 |     use detect::predicate::StructuredDataPredicate;
424 | 
425 |     let typed = parse_and_typecheck("yaml:.field").unwrap();
426 | 
427 |     match typed {
428 |         Expr::Predicate(Predicate::Structured(predicate)) => match predicate {
429 |             StructuredDataPredicate::YamlExists { path } => {
430 |                 assert_eq!(path.len(), 1, "Should have one path component");
431 |             }
432 |             _ => panic!("Expected YamlExists predicate, got: {:?}", predicate),
433 |         },
434 |         _ => panic!("Expected Predicate::Structured, got: {:?}", typed),
435 |     }
436 | 
437 |     let typed = parse_and_typecheck("json:.version").unwrap();
438 | 
439 |     match typed {
440 |         Expr::Predicate(Predicate::Structured(StructuredDataPredicate::JsonExists { .. })) => {
441 |             // Success
442 |         }
443 |         _ => panic!("Expected JsonExists predicate"),
444 |     }
445 | 
446 |     let typed = parse_and_typecheck("toml:.package").unwrap();
447 | 
448 |     match typed {
449 |         Expr::Predicate(Predicate::Structured(StructuredDataPredicate::TomlExists { .. })) => {
450 |             // Success
451 |         }
452 |         _ => panic!("Expected TomlExists predicate"),
453 |     }
454 | }
455 | 


--------------------------------------------------------------------------------
/tests/navigation_structured.rs:
--------------------------------------------------------------------------------
  1 | //! Comprehensive tests for structured data navigation (YAML/JSON/TOML)
  2 | //!
  3 | //! Tests the iterative, zero-clone traversal of parsed documents.
  4 | 
  5 | use detect::eval::structured::{navigate_json, navigate_toml, navigate_yaml};
  6 | use detect::parser::structured_path::parse_path;
  7 | 
  8 | // ============================================================================
  9 | // YAML Test Helpers
 10 | // ============================================================================
 11 | 
 12 | /// Helper to construct YAML integer
 13 | fn yaml_int(i: i64) -> yaml_rust2::Yaml {
 14 |     yaml_rust2::Yaml::Integer(i)
 15 | }
 16 | 
 17 | /// Helper to construct YAML string
 18 | fn yaml_str(s: &str) -> yaml_rust2::Yaml {
 19 |     yaml_rust2::Yaml::String(s.to_string())
 20 | }
 21 | 
 22 | /// Helper to construct YAML boolean
 23 | fn yaml_bool(b: bool) -> yaml_rust2::Yaml {
 24 |     yaml_rust2::Yaml::Boolean(b)
 25 | }
 26 | 
 27 | /// Helper to construct YAML array
 28 | fn yaml_array(items: Vec<yaml_rust2::Yaml>) -> yaml_rust2::Yaml {
 29 |     yaml_rust2::Yaml::Array(items)
 30 | }
 31 | 
 32 | /// Helper to construct YAML hash/object
 33 | fn yaml_hash(pairs: Vec<(&str, yaml_rust2::Yaml)>) -> yaml_rust2::Yaml {
 34 |     use yaml_rust2::yaml::Hash;
 35 |     let mut map = Hash::new();
 36 |     for (k, v) in pairs {
 37 |         map.insert(yaml_rust2::Yaml::String(k.to_string()), v);
 38 |     }
 39 |     yaml_rust2::Yaml::Hash(map)
 40 | }
 41 | 
 42 | /// YAML navigation test case
 43 | struct YamlNavCase {
 44 |     name: &'static str,
 45 |     document: yaml_rust2::Yaml,
 46 |     path: &'static str,
 47 |     expected: Vec<yaml_rust2::Yaml>,
 48 | }
 49 | 
 50 | /// Check if two slices contain the same elements (order-insensitive)
 51 | fn yaml_sets_equal(a: &[&yaml_rust2::Yaml], b: &[yaml_rust2::Yaml]) -> bool {
 52 |     if a.len() != b.len() {
 53 |         return false;
 54 |     }
 55 | 
 56 |     // Check every element in 'a' exists in 'b'
 57 |     for item_a in a {
 58 |         if !b.iter().any(|item_b| item_a == &item_b) {
 59 |             return false;
 60 |         }
 61 |     }
 62 | 
 63 |     // Check every element in 'b' exists in 'a'
 64 |     for item_b in b {
 65 |         if !a.contains(&item_b) {
 66 |             return false;
 67 |         }
 68 |     }
 69 | 
 70 |     true
 71 | }
 72 | 
 73 | /// Run a batch of YAML navigation test cases
 74 | fn run_yaml_tests(test_cases: &[YamlNavCase]) {
 75 |     for case in test_cases {
 76 |         let path = parse_path(case.path).unwrap_or_else(|e| {
 77 |             panic!(
 78 |                 "Test '{}': Failed to parse path '{}': {:?}",
 79 |                 case.name, case.path, e
 80 |             )
 81 |         });
 82 | 
 83 |         let results = navigate_yaml(&case.document, &path);
 84 | 
 85 |         // Order-insensitive comparison
 86 |         if !yaml_sets_equal(&results, &case.expected) {
 87 |             panic!(
 88 |                 "Test '{}' failed:\n  Path: {}\n  Expected {} results: {:?}\n  Got {} results: {:?}",
 89 |                 case.name,
 90 |                 case.path,
 91 |                 case.expected.len(),
 92 |                 case.expected,
 93 |                 results.len(),
 94 |                 results
 95 |             );
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | // ============================================================================
101 | // JSON Test Helpers
102 | // ============================================================================
103 | 
104 | /// Helper to construct JSON integer
105 | fn json_int(i: i64) -> serde_json::Value {
106 |     serde_json::Value::Number(serde_json::Number::from(i))
107 | }
108 | 
109 | /// Helper to construct JSON string
110 | fn json_str(s: &str) -> serde_json::Value {
111 |     serde_json::Value::String(s.to_string())
112 | }
113 | 
114 | /// Helper to construct JSON array
115 | fn json_array(items: Vec<serde_json::Value>) -> serde_json::Value {
116 |     serde_json::Value::Array(items)
117 | }
118 | 
119 | /// Helper to construct JSON object
120 | fn json_object(pairs: Vec<(&str, serde_json::Value)>) -> serde_json::Value {
121 |     let map: serde_json::Map<String, serde_json::Value> =
122 |         pairs.into_iter().map(|(k, v)| (k.to_string(), v)).collect();
123 |     serde_json::Value::Object(map)
124 | }
125 | 
126 | /// JSON navigation test case
127 | struct JsonNavCase {
128 |     name: &'static str,
129 |     document: serde_json::Value,
130 |     path: &'static str,
131 |     expected: Vec<serde_json::Value>,
132 | }
133 | 
134 | /// Check if two slices contain the same JSON elements (order-insensitive)
135 | fn json_sets_equal(a: &[&serde_json::Value], b: &[serde_json::Value]) -> bool {
136 |     if a.len() != b.len() {
137 |         return false;
138 |     }
139 | 
140 |     for item_a in a {
141 |         if !b.iter().any(|item_b| item_a == &item_b) {
142 |             return false;
143 |         }
144 |     }
145 | 
146 |     for item_b in b {
147 |         if !a.contains(&item_b) {
148 |             return false;
149 |         }
150 |     }
151 | 
152 |     true
153 | }
154 | 
155 | /// Run a batch of JSON navigation test cases
156 | fn run_json_tests(test_cases: &[JsonNavCase]) {
157 |     for case in test_cases {
158 |         let path = parse_path(case.path).unwrap_or_else(|e| {
159 |             panic!(
160 |                 "Test '{}': Failed to parse path '{}': {:?}",
161 |                 case.name, case.path, e
162 |             )
163 |         });
164 | 
165 |         let results = navigate_json(&case.document, &path);
166 | 
167 |         if !json_sets_equal(&results, &case.expected) {
168 |             panic!(
169 |                 "Test '{}' failed:\n  Path: {}\n  Expected {} results: {:?}\n  Got {} results: {:?}",
170 |                 case.name,
171 |                 case.path,
172 |                 case.expected.len(),
173 |                 case.expected,
174 |                 results.len(),
175 |                 results
176 |             );
177 |         }
178 |     }
179 | }
180 | 
181 | // ============================================================================
182 | // TOML Test Helpers
183 | // ============================================================================
184 | 
185 | /// Helper to construct TOML integer
186 | fn toml_int(i: i64) -> toml::Value {
187 |     toml::Value::Integer(i)
188 | }
189 | 
190 | /// Helper to construct TOML string
191 | fn toml_str(s: &str) -> toml::Value {
192 |     toml::Value::String(s.to_string())
193 | }
194 | 
195 | /// Helper to construct TOML array
196 | fn toml_array(items: Vec<toml::Value>) -> toml::Value {
197 |     toml::Value::Array(items)
198 | }
199 | 
200 | /// Helper to construct TOML table
201 | fn toml_table(pairs: Vec<(&str, toml::Value)>) -> toml::Value {
202 |     let map: toml::map::Map<String, toml::Value> =
203 |         pairs.into_iter().map(|(k, v)| (k.to_string(), v)).collect();
204 |     toml::Value::Table(map)
205 | }
206 | 
207 | /// TOML navigation test case
208 | struct TomlNavCase {
209 |     name: &'static str,
210 |     document: toml::Value,
211 |     path: &'static str,
212 |     expected: Vec<toml::Value>,
213 | }
214 | 
215 | /// Check if two slices contain the same TOML elements (order-insensitive)
216 | fn toml_sets_equal(a: &[&toml::Value], b: &[toml::Value]) -> bool {
217 |     if a.len() != b.len() {
218 |         return false;
219 |     }
220 | 
221 |     for item_a in a {
222 |         if !b.iter().any(|item_b| item_a == &item_b) {
223 |             return false;
224 |         }
225 |     }
226 | 
227 |     for item_b in b {
228 |         if !a.contains(&item_b) {
229 |             return false;
230 |         }
231 |     }
232 | 
233 |     true
234 | }
235 | 
236 | /// Run a batch of TOML navigation test cases
237 | fn run_toml_tests(test_cases: &[TomlNavCase]) {
238 |     for case in test_cases {
239 |         let path = parse_path(case.path).unwrap_or_else(|e| {
240 |             panic!(
241 |                 "Test '{}': Failed to parse path '{}': {:?}",
242 |                 case.name, case.path, e
243 |             )
244 |         });
245 | 
246 |         let results = navigate_toml(&case.document, &path);
247 | 
248 |         if !toml_sets_equal(&results, &case.expected) {
249 |             panic!(
250 |                 "Test '{}' failed:\n  Path: {}\n  Expected {} results: {:?}\n  Got {} results: {:?}",
251 |                 case.name,
252 |                 case.path,
253 |                 case.expected.len(),
254 |                 case.expected,
255 |                 results.len(),
256 |                 results
257 |             );
258 |         }
259 |     }
260 | }
261 | 
262 | // ============================================================================
263 | // YAML Navigation Tests
264 | // ============================================================================
265 | 
266 | #[test]
267 | fn test_yaml_basic_navigation() {
268 |     let test_cases = vec![
269 |         YamlNavCase {
270 |             name: "simple key",
271 |             document: yaml_hash(vec![("port", yaml_int(8080))]),
272 |             path: ".port",
273 |             expected: vec![yaml_int(8080)],
274 |         },
275 |         YamlNavCase {
276 |             name: "nested keys",
277 |             document: yaml_hash(vec![("server", yaml_hash(vec![("port", yaml_int(8080))]))]),
278 |             path: ".server.port",
279 |             expected: vec![yaml_int(8080)],
280 |         },
281 |         YamlNavCase {
282 |             name: "deep nesting",
283 |             document: yaml_hash(vec![(
284 |                 "a",
285 |                 yaml_hash(vec![(
286 |                     "b",
287 |                     yaml_hash(vec![("c", yaml_hash(vec![("d", yaml_str("deep"))]))]),
288 |                 )]),
289 |             )]),
290 |             path: ".a.b.c.d",
291 |             expected: vec![yaml_str("deep")],
292 |         },
293 |         YamlNavCase {
294 |             name: "missing key returns empty",
295 |             document: yaml_hash(vec![("port", yaml_int(8080))]),
296 |             path: ".missing",
297 |             expected: vec![],
298 |         },
299 |         YamlNavCase {
300 |             name: "missing nested key returns empty",
301 |             document: yaml_hash(vec![("server", yaml_hash(vec![("port", yaml_int(8080))]))]),
302 |             path: ".server.missing",
303 |             expected: vec![],
304 |         },
305 |     ];
306 | 
307 |     run_yaml_tests(&test_cases);
308 | }
309 | 
310 | #[test]
311 | fn test_yaml_array_navigation() {
312 |     let test_cases = vec![
313 |         YamlNavCase {
314 |             name: "array index 0",
315 |             document: yaml_hash(vec![(
316 |                 "items",
317 |                 yaml_array(vec![
318 |                     yaml_str("first"),
319 |                     yaml_str("second"),
320 |                     yaml_str("third"),
321 |                 ]),
322 |             )]),
323 |             path: ".items[0]",
324 |             expected: vec![yaml_str("first")],
325 |         },
326 |         YamlNavCase {
327 |             name: "array index middle",
328 |             document: yaml_hash(vec![(
329 |                 "items",
330 |                 yaml_array(vec![
331 |                     yaml_str("first"),
332 |                     yaml_str("second"),
333 |                     yaml_str("third"),
334 |                 ]),
335 |             )]),
336 |             path: ".items[1]",
337 |             expected: vec![yaml_str("second")],
338 |         },
339 |         YamlNavCase {
340 |             name: "array index last",
341 |             document: yaml_hash(vec![(
342 |                 "items",
343 |                 yaml_array(vec![
344 |                     yaml_str("first"),
345 |                     yaml_str("second"),
346 |                     yaml_str("third"),
347 |                 ]),
348 |             )]),
349 |             path: ".items[2]",
350 |             expected: vec![yaml_str("third")],
351 |         },
352 |         YamlNavCase {
353 |             name: "array out of bounds returns empty",
354 |             document: yaml_hash(vec![("items", yaml_array(vec![yaml_str("first")]))]),
355 |             path: ".items[999]",
356 |             expected: vec![],
357 |         },
358 |         YamlNavCase {
359 |             name: "chained array access",
360 |             document: yaml_hash(vec![(
361 |                 "matrix",
362 |                 yaml_array(vec![
363 |                     yaml_array(vec![yaml_int(1), yaml_int(2)]),
364 |                     yaml_array(vec![yaml_int(3), yaml_int(4)]),
365 |                 ]),
366 |             )]),
367 |             path: ".matrix[1][0]",
368 |             expected: vec![yaml_int(3)],
369 |         },
370 |         YamlNavCase {
371 |             name: "array then key",
372 |             document: yaml_hash(vec![(
373 |                 "users",
374 |                 yaml_array(vec![
375 |                     yaml_hash(vec![("name", yaml_str("alice"))]),
376 |                     yaml_hash(vec![("name", yaml_str("bob"))]),
377 |                 ]),
378 |             )]),
379 |             path: ".users[1].name",
380 |             expected: vec![yaml_str("bob")],
381 |         },
382 |     ];
383 | 
384 |     run_yaml_tests(&test_cases);
385 | }
386 | 
387 | #[test]
388 | fn test_yaml_wildcard_navigation() {
389 |     let test_cases = vec![
390 |         YamlNavCase {
391 |             name: "wildcard all array elements",
392 |             document: yaml_hash(vec![(
393 |                 "items",
394 |                 yaml_array(vec![yaml_int(1), yaml_int(2), yaml_int(3)]),
395 |             )]),
396 |             path: ".items[*]",
397 |             expected: vec![yaml_int(1), yaml_int(2), yaml_int(3)],
398 |         },
399 |         YamlNavCase {
400 |             name: "wildcard with mixed types",
401 |             document: yaml_hash(vec![(
402 |                 "mixed",
403 |                 yaml_array(vec![yaml_int(42), yaml_str("hello"), yaml_bool(true)]),
404 |             )]),
405 |             path: ".mixed[*]",
406 |             expected: vec![yaml_int(42), yaml_str("hello"), yaml_bool(true)],
407 |         },
408 |         YamlNavCase {
409 |             name: "wildcard then field",
410 |             document: yaml_hash(vec![(
411 |                 "users",
412 |                 yaml_array(vec![
413 |                     yaml_hash(vec![("id", yaml_int(1))]),
414 |                     yaml_hash(vec![("id", yaml_int(2))]),
415 |                     yaml_hash(vec![("id", yaml_int(3))]),
416 |                 ]),
417 |             )]),
418 |             path: ".users[*].id",
419 |             expected: vec![yaml_int(1), yaml_int(2), yaml_int(3)],
420 |         },
421 |         YamlNavCase {
422 |             name: "wildcard on empty array",
423 |             document: yaml_hash(vec![("empty", yaml_array(vec![]))]),
424 |             path: ".empty[*]",
425 |             expected: vec![],
426 |         },
427 |     ];
428 | 
429 |     run_yaml_tests(&test_cases);
430 | }
431 | 
432 | #[test]
433 | fn test_yaml_recursive_descent() {
434 |     let test_cases = vec![
435 |         YamlNavCase {
436 |             name: "recursive finds single occurrence",
437 |             document: yaml_hash(vec![(
438 |                 "config",
439 |                 yaml_hash(vec![(
440 |                     "database",
441 |                     yaml_hash(vec![("host", yaml_str("localhost"))]),
442 |                 )]),
443 |             )]),
444 |             path: "..host",
445 |             expected: vec![yaml_str("localhost")],
446 |         },
447 |         YamlNavCase {
448 |             name: "recursive finds multiple occurrences",
449 |             document: yaml_hash(vec![
450 |                 ("db1", yaml_hash(vec![("host", yaml_str("server1"))])),
451 |                 ("db2", yaml_hash(vec![("host", yaml_str("server2"))])),
452 |             ]),
453 |             path: "..host",
454 |             expected: vec![yaml_str("server1"), yaml_str("server2")],
455 |         },
456 |         YamlNavCase {
457 |             name: "recursive descent then field",
458 |             document: yaml_hash(vec![
459 |                 (
460 |                     "app1",
461 |                     yaml_hash(vec![(
462 |                         "database",
463 |                         yaml_hash(vec![("host", yaml_str("db1"))]),
464 |                     )]),
465 |                 ),
466 |                 (
467 |                     "app2",
468 |                     yaml_hash(vec![(
469 |                         "database",
470 |                         yaml_hash(vec![("host", yaml_str("db2"))]),
471 |                     )]),
472 |                 ),
473 |             ]),
474 |             path: "..database.host",
475 |             expected: vec![yaml_str("db1"), yaml_str("db2")],
476 |         },
477 |         YamlNavCase {
478 |             name: "recursive in nested arrays",
479 |             document: yaml_hash(vec![(
480 |                 "items",
481 |                 yaml_array(vec![
482 |                     yaml_hash(vec![("id", yaml_int(1))]),
483 |                     yaml_hash(vec![("id", yaml_int(2))]),
484 |                 ]),
485 |             )]),
486 |             path: "..id",
487 |             expected: vec![yaml_int(1), yaml_int(2)],
488 |         },
489 |         YamlNavCase {
490 |             name: "recursive descent with wildcard",
491 |             document: yaml_hash(vec![(
492 |                 "services",
493 |                 yaml_array(vec![
494 |                     yaml_hash(vec![(
495 |                         "ports",
496 |                         yaml_array(vec![yaml_int(8080), yaml_int(8081)]),
497 |                     )]),
498 |                     yaml_hash(vec![("ports", yaml_array(vec![yaml_int(9090)]))]),
499 |                 ]),
500 |             )]),
501 |             path: "..ports[*]",
502 |             expected: vec![yaml_int(8080), yaml_int(8081), yaml_int(9090)],
503 |         },
504 |     ];
505 | 
506 |     run_yaml_tests(&test_cases);
507 | }
508 | 
509 | // ============================================================================
510 | // JSON Navigation Tests
511 | // ============================================================================
512 | 
513 | #[test]
514 | fn test_json_basic_navigation() {
515 |     let test_cases = vec![
516 |         JsonNavCase {
517 |             name: "simple key",
518 |             document: json_object(vec![("port", json_int(8080))]),
519 |             path: ".port",
520 |             expected: vec![json_int(8080)],
521 |         },
522 |         JsonNavCase {
523 |             name: "nested keys",
524 |             document: json_object(vec![(
525 |                 "server",
526 |                 json_object(vec![("port", json_int(8080))]),
527 |             )]),
528 |             path: ".server.port",
529 |             expected: vec![json_int(8080)],
530 |         },
531 |         JsonNavCase {
532 |             name: "array index",
533 |             document: json_object(vec![(
534 |                 "items",
535 |                 json_array(vec![json_str("a"), json_str("b")]),
536 |             )]),
537 |             path: ".items[1]",
538 |             expected: vec![json_str("b")],
539 |         },
540 |         JsonNavCase {
541 |             name: "wildcard",
542 |             document: json_object(vec![("nums", json_array(vec![json_int(1), json_int(2)]))]),
543 |             path: ".nums[*]",
544 |             expected: vec![json_int(1), json_int(2)],
545 |         },
546 |     ];
547 | 
548 |     run_json_tests(&test_cases);
549 | }
550 | 
551 | // ============================================================================
552 | // TOML Navigation Tests
553 | // ============================================================================
554 | 
555 | #[test]
556 | fn test_toml_basic_navigation() {
557 |     let test_cases = vec![
558 |         TomlNavCase {
559 |             name: "simple key",
560 |             document: toml_table(vec![("port", toml_int(8080))]),
561 |             path: ".port",
562 |             expected: vec![toml_int(8080)],
563 |         },
564 |         TomlNavCase {
565 |             name: "nested table",
566 |             document: toml_table(vec![("server", toml_table(vec![("port", toml_int(8080))]))]),
567 |             path: ".server.port",
568 |             expected: vec![toml_int(8080)],
569 |         },
570 |         TomlNavCase {
571 |             name: "array index",
572 |             document: toml_table(vec![(
573 |                 "items",
574 |                 toml_array(vec![toml_str("a"), toml_str("b")]),
575 |             )]),
576 |             path: ".items[1]",
577 |             expected: vec![toml_str("b")],
578 |         },
579 |         TomlNavCase {
580 |             name: "wildcard",
581 |             document: toml_table(vec![("nums", toml_array(vec![toml_int(1), toml_int(2)]))]),
582 |             path: ".nums[*]",
583 |             expected: vec![toml_int(1), toml_int(2)],
584 |         },
585 |     ];
586 | 
587 |     run_toml_tests(&test_cases);
588 | }
589 | 


--------------------------------------------------------------------------------