├── .gitignore ├── .github └── workflows │ └── tests.yml ├── src ├── cookbook.rs ├── lib.rs ├── flags.rs ├── capturing.rs ├── direct.rs ├── emptymatches.rs ├── repetitions.rs ├── humanregex.rs ├── logical.rs ├── shorthand.rs └── ascii.rs ├── Cargo.toml ├── examples ├── match_date.rs ├── match_html_tags.rs ├── remove_stop_words_with_regex.rs ├── foreword.txt └── mla_citation.rs ├── tests └── regex_crate_examples.rs └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | Cargo.lock 3 | .idea/ 4 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: [ubuntu-latest, windows-latest, macos-latest] 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Build 21 | run: cargo build --verbose 22 | - name: Run tests 23 | run: cargo test --verbose 24 | -------------------------------------------------------------------------------- /src/cookbook.rs: -------------------------------------------------------------------------------- 1 | //! # A Cookbook of Common Tasks 2 | //! ## Matching a date 3 | //! ```rust 4 | #![doc = include_str ! ("../examples/match_date.rs")] 5 | //! ``` 6 | //! ## Matching and Capturing HTML Tags 7 | //! ```rust 8 | #![doc = include_str ! ("../examples/match_html_tags.rs")] 9 | //! ``` 10 | //! ## Removing stop words from a passage 11 | //! ```rust 12 | #![doc = include_str ! ("../examples/remove_stop_words_with_regex.rs")] 13 | //! ``` 14 | //! ## Matching and Capturing an MLA-Formatted Citation 15 | //! ```rust 16 | #![doc = include_str ! ("../examples/mla_citation.rs")] 17 | //! ``` 18 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "human_regex" 3 | version = "0.3.0" 4 | authors = ["Chris McComb "] 5 | description = "A regex library for humans" 6 | edition = "2021" 7 | readme = "README.md" 8 | repository = "https://github.com/cmccomb/human_regex" 9 | homepage = "https://github.com/cmccomb/human_regex" 10 | documentation = "https://docs.rs/human_regex" 11 | license = "MIT OR Apache-2.0" 12 | keywords = ["regex", "human-readable"] 13 | categories = ["text-processing", "parser-implementations"] 14 | 15 | [dependencies] 16 | regex = "1.7.1" 17 | 18 | [dev-dependencies] 19 | stop-words = "0.7.0" 20 | -------------------------------------------------------------------------------- /examples/match_date.rs: -------------------------------------------------------------------------------- 1 | use human_regex::{beginning, digit, end, exactly, text}; 2 | 3 | fn main() { 4 | // Build the first match pattern 5 | let regex_string_1 = beginning() 6 | + exactly(4, digit()) 7 | + text("-") 8 | + exactly(2, digit()) 9 | + text("-") 10 | + exactly(2, digit()) 11 | + end(); 12 | 13 | // Build the second match pattern 14 | let regex_string_2 = 15 | beginning() + exactly(4, digit()) + exactly(2, text("-") + exactly(2, digit())) + end(); 16 | 17 | // Check the match 18 | println!("{}", regex_string_1.to_regex().is_match("2014-01-01")); 19 | 20 | // Check the match 21 | println!("{}", regex_string_2.to_regex().is_match("2014-01-01")); 22 | } 23 | -------------------------------------------------------------------------------- /examples/match_html_tags.rs: -------------------------------------------------------------------------------- 1 | use human_regex::{any, one_or_more, text}; 2 | 3 | fn main() { 4 | // Define a string to match against 5 | let matching_string = "

Wow, so cool!

"; 6 | 7 | // Try it with a greedy match 8 | let greedy_regex_string = text("<") + one_or_more(any()) + text(">"); 9 | for capture in greedy_regex_string 10 | .to_regex() 11 | .captures_iter(matching_string) 12 | { 13 | println!("Greedy: {}", &capture[0]); 14 | } 15 | 16 | // Try it with a lazy match 17 | let greedy_regex_string = text("<") + one_or_more(any()).lazy() + text(">"); 18 | for capture in greedy_regex_string 19 | .to_regex() 20 | .captures_iter(matching_string) 21 | { 22 | println!("Lazy: {}", &capture[0]); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::all)] 2 | #![warn(missing_docs)] 3 | #![warn(clippy::missing_docs_in_private_items)] 4 | #![doc = include_str!("../README.md")] 5 | 6 | pub mod cookbook; 7 | 8 | mod humanregex; 9 | #[doc(inline)] 10 | pub use humanregex::HumanRegex; 11 | 12 | pub mod shorthand; 13 | #[doc(inline)] 14 | pub use shorthand::*; 15 | 16 | pub mod repetitions; 17 | #[doc(inline)] 18 | pub use repetitions::*; 19 | 20 | pub mod logical; 21 | #[doc(inline)] 22 | pub use logical::*; 23 | 24 | pub mod direct; 25 | #[doc(inline)] 26 | pub use direct::*; 27 | 28 | pub mod capturing; 29 | #[doc(inline)] 30 | pub use capturing::*; 31 | 32 | pub mod emptymatches; 33 | #[doc(inline)] 34 | pub use emptymatches::*; 35 | 36 | pub mod ascii; 37 | #[doc(inline)] 38 | pub use ascii::*; 39 | 40 | pub mod flags; 41 | #[doc(inline)] 42 | pub use flags::*; 43 | -------------------------------------------------------------------------------- /examples/remove_stop_words_with_regex.rs: -------------------------------------------------------------------------------- 1 | use human_regex::{escape_all, exactly, one_or_more, or, punctuation, whitespace, word_boundary}; 2 | use stop_words::{get, LANGUAGE}; 3 | 4 | fn main() { 5 | // Read in a file 6 | let document = std::fs::read_to_string("examples/foreword.txt").expect("Cannot read file"); 7 | 8 | // Print the contents 9 | println!("Original text:\n{}", document); 10 | 11 | // Get the stopwords 12 | let words = get(LANGUAGE::English); 13 | 14 | // Remove punctuation and lowercase the text to make parsing easier 15 | let lowercase_doc = document.to_ascii_lowercase(); 16 | let regex_for_punctuation = one_or_more(punctuation()); 17 | let text_without_punctuation = regex_for_punctuation 18 | .to_regex() 19 | .replace_all(&*lowercase_doc, ""); 20 | 21 | // Make a regex to match stopwords with trailing spaces and punctuation 22 | let regex_for_stop_words = word_boundary() 23 | + exactly(1, or(&escape_all(&words))) 24 | + word_boundary() 25 | + one_or_more(whitespace()); 26 | 27 | // Remove stop words 28 | let clean_text = regex_for_stop_words 29 | .to_regex() 30 | .replace_all(&*text_without_punctuation, ""); 31 | println!("\nClean text:\n{}", clean_text); 32 | } 33 | -------------------------------------------------------------------------------- /src/flags.rs: -------------------------------------------------------------------------------- 1 | //! Functions for adding flags 2 | // i case-insensitive: letters match both upper and lower case 3 | // m multi-line mode: ^ and $ match begin/end of line 4 | // s allow . to match \n 5 | // u Unicode support (enabled by default) 6 | 7 | use super::humanregex::*; 8 | use std::marker::PhantomData as pd; 9 | 10 | /// Makes all matches case insensitive, matching both upper and lowercase letters. 11 | /// ``` 12 | /// use human_regex::{case_insensitive, text}; 13 | /// let regex_string = case_insensitive(text("spongebob")); 14 | /// assert!(regex_string.to_regex().is_match("SpOnGeBoB")); 15 | /// assert!(regex_string.to_regex().is_match("spongebob")); 16 | /// assert!(!regex_string.to_regex().is_match("PaTrIcK")); 17 | /// ``` 18 | pub fn case_insensitive(target: HumanRegex) -> HumanRegex { 19 | HumanRegex(format!("(?i:{})", target), pd::) 20 | } 21 | 22 | /// Enables multiline mode, which will allow `beginning()` and `end()` to match the beginning and end of lines 23 | pub fn multi_line_mode(target: HumanRegex) -> HumanRegex { 24 | HumanRegex(format!("(?m:{})", target), pd::) 25 | } 26 | 27 | /// A function that will allow `.` to match newlines (`\n`) 28 | pub fn dot_matches_newline_too(target: HumanRegex) -> HumanRegex { 29 | HumanRegex(format!("(?s:{})", target), pd::) 30 | } 31 | 32 | /// A function to disable unicode support 33 | pub fn disable_unicode(target: HumanRegex) -> HumanRegex { 34 | HumanRegex(format!("(?-u:{})", target), pd::) 35 | } 36 | -------------------------------------------------------------------------------- /src/capturing.rs: -------------------------------------------------------------------------------- 1 | //! Functions for capturing matches 2 | 3 | use super::humanregex::*; 4 | use std::marker::PhantomData as pd; 5 | 6 | /// Add a numbered capturing group around an expression 7 | /// ``` 8 | /// use human_regex::{capture, digit, exactly, text}; 9 | /// let regex_string = capture(exactly(4, digit())) 10 | /// + text("-") 11 | /// + capture(exactly(2, digit())) 12 | /// + text("-") 13 | /// + capture(exactly(2, digit())); 14 | /// 15 | /// let caps = regex_string.to_regex().captures("2010-03-14").unwrap(); 16 | /// 17 | /// assert_eq!("2010", caps.get(1).unwrap().as_str()); 18 | /// assert_eq!("03", caps.get(2).unwrap().as_str()); 19 | /// assert_eq!("14", caps.get(3).unwrap().as_str()); 20 | /// ``` 21 | 22 | pub fn capture(target: HumanRegex) -> HumanRegex { 23 | HumanRegex(format!("({})", target), pd::) 24 | } 25 | 26 | /// Add a named capturing group around an expression 27 | /// ``` 28 | /// use human_regex::{named_capture, digit, exactly, text}; 29 | /// let regex_string = named_capture(exactly(4, digit()), "year") 30 | /// + text("-") 31 | /// + named_capture(exactly(2, digit()), "month") 32 | /// + text("-") 33 | /// + named_capture(exactly(2, digit()), "day"); 34 | /// 35 | /// let caps = regex_string.to_regex().captures("2010-03-14").unwrap(); 36 | /// assert_eq!("2010", &caps["year"]); 37 | /// assert_eq!("03", &caps["month"]); 38 | /// assert_eq!("14", &caps["day"]); 39 | /// ``` 40 | pub fn named_capture(target: HumanRegex, name: &str) -> HumanRegex { 41 | HumanRegex(format!("(?P<{}>{})", name, target), pd::) 42 | } 43 | -------------------------------------------------------------------------------- /tests/regex_crate_examples.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tests { 3 | use human_regex as hr; 4 | 5 | #[test] 6 | fn find_a_date() { 7 | let regex_string = hr::beginning() 8 | + hr::exactly(4, hr::digit()) 9 | + hr::text("-") 10 | + hr::exactly(2, hr::digit()) 11 | + hr::text("-") 12 | + hr::exactly(2, hr::digit()) 13 | + hr::end(); 14 | assert!(regex_string.to_regex().is_match("2014-01-01")) 15 | } 16 | 17 | #[test] 18 | fn iterating_over_capture_groups() { 19 | let regex_string = hr::beginning() 20 | + hr::capture(hr::exactly(4, hr::digit())) 21 | + hr::text("-") 22 | + hr::capture(hr::exactly(2, hr::digit())) 23 | + hr::text("-") 24 | + hr::capture(hr::exactly(2, hr::digit())) 25 | + hr::end(); 26 | let text = "2012-03-14, 2013-01-01 and 2014-07-05"; 27 | let match_text = ["M03D14Y2012", "M01D01Y2013", "M07D05Y2014"]; 28 | for (i, cap) in regex_string.to_regex().captures_iter(text).enumerate() { 29 | assert_eq!( 30 | format!("M{}D{}Y{}", &cap[2], &cap[3], &cap[1]), 31 | match_text[i] 32 | ); 33 | } 34 | } 35 | 36 | #[test] 37 | fn replacement_with_named_capture_groups() { 38 | let regex_string = hr::named_capture(hr::exactly(4, hr::digit()), "y") 39 | + hr::text("-") 40 | + hr::named_capture(hr::exactly(2, hr::digit()), "m") 41 | + hr::text("-") 42 | + hr::named_capture(hr::exactly(2, hr::digit()), "d"); 43 | let before = "2012-03-14, 2013-01-01 and 2014-07-05"; 44 | let after = regex_string.to_regex().replace_all(before, "$m/$d/$y"); 45 | assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/direct.rs: -------------------------------------------------------------------------------- 1 | //! Functions for directly matching text or adding known regex strings 2 | 3 | use super::humanregex::*; 4 | use regex::escape; 5 | use std::marker::PhantomData as pd; 6 | 7 | /// Add matching text to the regex string. Text that is added through this function is automatically escaped. 8 | /// ``` 9 | /// let regex_string = human_regex::text("asdf"); 10 | /// assert!(regex_string.to_regex().is_match("asdf")); 11 | /// assert!(!regex_string.to_regex().is_match("asddf")); 12 | /// ``` 13 | pub fn text(text: T) -> HumanRegex 14 | where 15 | T: Into + fmt::Display, 16 | { 17 | HumanRegex( 18 | format!("(?:{})", escape(&*text.to_string())), 19 | pd::, 20 | ) 21 | } 22 | 23 | /// Escapes an entire list for use in something like an [or] or an [and] expression. 24 | /// 25 | /// See the [cookbook] stop words example for an example of the utility of this function. 26 | /// ``` 27 | /// use human_regex::direct::escape_all; 28 | /// let escaped_vec = escape_all(&vec!["et-al", "short-term", "full-scale"]); 29 | /// assert_eq!(escaped_vec, vec![r"et\-al", r"short\-term", r"full\-scale"]); 30 | ///``` 31 | pub fn escape_all(options: &[T]) -> Vec 32 | where 33 | T: Into + fmt::Display, 34 | { 35 | options 36 | .iter() 37 | .map(|string| escape(&string.to_string())) 38 | .collect() 39 | } 40 | 41 | /// This text is not escaped. You can use it, for instance, to add a regex string directly to the object. 42 | /// ``` 43 | /// let regex_string = human_regex::nonescaped_text(r"^\d{2}$"); 44 | /// println!("{}", regex_string.to_string()); 45 | /// assert!(regex_string.to_regex().is_match("21")); 46 | /// assert!(!regex_string.to_regex().is_match("007")); 47 | /// ``` 48 | pub fn nonescaped_text(text: &str) -> HumanRegex { 49 | HumanRegex(format!("(?:{})", text.to_string()), pd::) 50 | } 51 | -------------------------------------------------------------------------------- /examples/foreword.txt: -------------------------------------------------------------------------------- 1 | It wasn't always so clear, but the Rust programming language is fundamentally about empowerment: no matter what kind of code you are writing now, Rust empowers you to reach farther, to program with confidence in a wider variety of domains than you did before. 2 | Take, for example, "systems-level" work that deals with low-level details of memory management, data representation, and concurrency. Traditionally, this realm of programming is seen as arcane, accessible only to a select few who have devoted the necessary years learning to avoid its infamous pitfalls. And even those who practice it do so with caution, lest their code be open to exploits, crashes, or corruption. 3 | Rust breaks down these barriers by eliminating the old pitfalls and providing a friendly, polished set of tools to help you along the way. Programmers who need to "dip down" into lower-level control can do so with Rust, without taking on the customary risk of crashes or security holes, and without having to learn the fine points of a fickle toolchain. Better yet, the language is designed to guide you naturally towards reliable code that is efficient in terms of speed and memory usage. 4 | Programmers who are already working with low-level code can use Rust to raise their ambitions. For example, introducing parallelism in Rust is a relatively low-risk operation: the compiler will catch the classical mistakes for you. And you can tackle more aggressive optimizations in your code with the confidence that you won't accidentally introduce crashes or vulnerabilities. 5 | But Rust isn't limited to low-level systems programming. It's expressive and ergonomic enough to make CLI apps, web servers, and many other kinds of code quite pleasant to write - you'll find simple examples of both later in the book. Working with Rust allows you to build skills that transfer from one domain to another; you can learn Rust by writing a web app, then apply those same skills to target your Raspberry Pi. 6 | This book fully embraces the potential of Rust to empower its users. It's a friendly and approachable text intended to help you level up not just your knowledge of Rust, but also your reach and confidence as a programmer in general. So dive in, get ready to learn - and welcome to the Rust community! -------------------------------------------------------------------------------- /src/emptymatches.rs: -------------------------------------------------------------------------------- 1 | //! Functions for the empty matches 2 | 3 | use super::humanregex::*; 4 | use std::marker::PhantomData as pd; 5 | 6 | /// A function to match a word boundary 7 | pub fn word_boundary() -> HumanRegex> { 8 | HumanRegex(r"\b".to_string(), pd::>) 9 | } 10 | 11 | /// A function to match anything BUT a word boundary 12 | pub fn non_word_boundary() -> HumanRegex> { 13 | HumanRegex(r"\B".to_string(), pd::>) 14 | } 15 | 16 | /// A function to match the beginning of text (or start-of-line with multi-line mode) 17 | /// ``` 18 | /// use human_regex::{beginning, text}; 19 | /// let regex_string = beginning() + text("hex"); 20 | /// assert!(regex_string.to_regex().is_match("hexagon")); 21 | /// assert!(!regex_string.to_regex().is_match("chlorhexadine")); 22 | /// ``` 23 | pub fn beginning() -> HumanRegex> { 24 | HumanRegex(r"^".to_string(), pd::>) 25 | } 26 | 27 | /// A function to match the end of text (or end-of-line with multi-line mode) 28 | /// ``` 29 | /// use human_regex::{end, text}; 30 | /// let regex_string = text("end") + end(); 31 | /// assert!(regex_string.to_regex().is_match("mend")); 32 | /// assert!(!regex_string.to_regex().is_match("endocrinologist")); 33 | /// ``` 34 | pub fn end() -> HumanRegex> { 35 | HumanRegex(r"$".to_string(), pd::>) 36 | } 37 | 38 | /// A function to match the beginning of text (even with multi-line mode enabled) 39 | /// ``` 40 | /// use human_regex::{beginning_of_text, text}; 41 | /// let regex_string = beginning_of_text() + text("hex"); 42 | /// assert!(regex_string.to_regex().is_match("hexagon")); 43 | /// assert!(!regex_string.to_regex().is_match("chlorhexadine")); 44 | /// ``` 45 | pub fn beginning_of_text() -> HumanRegex> { 46 | HumanRegex(r"\A".to_string(), pd::>) 47 | } 48 | 49 | /// A function to match the end of text (even with multi-line mode enabled) 50 | /// ``` 51 | /// use human_regex::{end_of_text, text}; 52 | /// let regex_string = text("end") + end_of_text(); 53 | /// assert!(regex_string.to_regex().is_match("mend")); 54 | /// assert!(!regex_string.to_regex().is_match("endocrinologist")); 55 | /// ``` 56 | pub fn end_of_text() -> HumanRegex> { 57 | HumanRegex(r"\z".to_string(), pd::>) 58 | } 59 | -------------------------------------------------------------------------------- /src/repetitions.rs: -------------------------------------------------------------------------------- 1 | //! Functions for matching repetitions 2 | 3 | use super::humanregex::*; 4 | use std::marker::PhantomData as pd; 5 | 6 | /// Match at least _n_ of a certain target 7 | /// ``` 8 | /// use human_regex::{at_least, text}; 9 | /// let regex_string = at_least(3, text("a")); 10 | /// assert!(regex_string.to_regex().is_match("aaaa")); 11 | /// assert!(!regex_string.to_regex().is_match("aa")); 12 | /// ``` 13 | pub fn at_least(n: u8, target: HumanRegex) -> HumanRegex { 14 | HumanRegex(format!("(?:{}){{{},}}", target, n), pd::) 15 | } 16 | 17 | /// Match at least _n_ and at most _m_ of a certain target 18 | /// ``` 19 | /// use human_regex::{between, text}; 20 | /// let regex_string = between(3, 5, text("a")); 21 | /// assert!(regex_string.to_regex().is_match("aaaa")); 22 | /// assert!(!regex_string.to_regex().is_match("aa")); 23 | /// ``` 24 | pub fn between(n: u8, m: u8, target: HumanRegex) -> HumanRegex { 25 | HumanRegex(format!("(?:{}){{{},{}}}", target, n, m), pd::) 26 | } 27 | 28 | /// Match one or more of a certain target 29 | /// ``` 30 | /// use human_regex::{one_or_more, text}; 31 | /// let regex_string = one_or_more(text("a")); 32 | /// assert!(regex_string.to_regex().is_match("aaaa")); 33 | /// assert!(!regex_string.to_regex().is_match("bb")); 34 | /// ``` 35 | pub fn one_or_more(target: HumanRegex) -> HumanRegex { 36 | HumanRegex(format!("(?:{})+", target), pd::) 37 | } 38 | 39 | /// Match zero or more of a certain target 40 | /// ``` 41 | /// use human_regex::{zero_or_more, text}; 42 | /// let regex_string = zero_or_more(text("a")); 43 | /// assert!(regex_string.to_regex().is_match("a")); 44 | /// assert!(regex_string.to_regex().is_match("aaaaa")); 45 | /// assert!(regex_string.to_regex().is_match("bb")); 46 | /// ``` 47 | pub fn zero_or_more(target: HumanRegex) -> HumanRegex { 48 | HumanRegex(format!("(?:{})*", target), pd::) 49 | } 50 | 51 | /// Match zero or one of a certain target 52 | /// ``` 53 | /// use human_regex::{zero_or_one, text}; 54 | /// let regex_string = zero_or_one(text("a")); 55 | /// assert!(regex_string.to_regex().is_match("a")); 56 | /// assert!(regex_string.to_regex().is_match("bb")); 57 | /// ``` 58 | pub fn zero_or_one(target: HumanRegex) -> HumanRegex { 59 | HumanRegex(format!("(?:{})?", target), pd::) 60 | } 61 | 62 | /// Match exactly _n_ of a certain target 63 | /// ``` 64 | /// use human_regex::{exactly, text}; 65 | /// let regex_string = exactly(5, text("a")); 66 | /// assert!(regex_string.to_regex().is_match("aaaaa")); 67 | /// assert!(!regex_string.to_regex().is_match("aaa")); 68 | /// ``` 69 | pub fn exactly(n: u8, target: HumanRegex) -> HumanRegex { 70 | HumanRegex(format!("(?:{}){{{}}}", target, n), pd::) 71 | } 72 | -------------------------------------------------------------------------------- /examples/mla_citation.rs: -------------------------------------------------------------------------------- 1 | use human_regex::{ 2 | digit, exactly, named_capture, nonescaped_text, one_or_more, text, whitespace, word, 3 | zero_or_more, zero_or_one, 4 | }; 5 | 6 | fn main() { 7 | // Define a citation to play with 8 | let citations_in_mla = "\ 9 | McComb, Christopher, Jonathan Cagan, and Kenneth Kotovsky. \"Lifting the Veil: Drawing insights \ 10 | about design teams from a cognitively-inspired computational model.\" Design Studies 40 (2015): \ 11 | 119-142.\ 12 | McComb, Christopher, Jonathan Cagan, and Kenneth Kotovsky. \"Rolling with the punches: An \ 13 | examination of team performance in a design task subject to drastic changes.\" Design Studies \ 14 | 36 (2015): 99-121.\ 15 | Raina, Ayush, Christopher McComb, and Jonathan Cagan. \"Learning to design from humans: \ 16 | Imitating human designers through deep learning.\" Journal of Mechanical Design 141 (2019).\ 17 | "; 18 | 19 | // The authors are a bit challenging 20 | let first_author = exactly(1, one_or_more(word()) + text(", ") + one_or_more(word())).lazy(); 21 | let middle_authors = 22 | zero_or_more(text(", ") + one_or_more(word()) + text(" ") + one_or_more(word())); 23 | let last_author = 24 | zero_or_more(text(", and ") + one_or_more(word()) + text(" ") + one_or_more(word())).lazy(); 25 | 26 | let mla_authors = named_capture( 27 | first_author + middle_authors + last_author + text(". "), 28 | "authors", 29 | ); 30 | 31 | // The rest is easy 32 | let mla_title = 33 | text("\"") + named_capture(one_or_more(nonescaped_text("[^\"]")), "title") + text(".\" "); 34 | let mla_journal = named_capture(one_or_more(one_or_more(word()) + text(" ")), "journal"); 35 | let mla_volume = zero_or_one(named_capture(one_or_more(digit()), "volume")); 36 | let mla_year = zero_or_one(text(" (") + named_capture(exactly(4, digit()), "year") + text(")")); 37 | let mla_pp = zero_or_one( 38 | text(": ") 39 | + named_capture(one_or_more(digit()), "lower_page") 40 | + text("-") 41 | + named_capture(one_or_more(digit()), "upper_page"), 42 | ); 43 | 44 | // Combine independent pieces 45 | let mla_regex = one_or_more( 46 | mla_authors 47 | + mla_title 48 | + mla_journal 49 | + mla_volume 50 | + mla_year 51 | + mla_pp 52 | + text(".") 53 | + zero_or_more(whitespace()), 54 | ) 55 | .lazy(); 56 | 57 | // Return matches 58 | for capture in mla_regex.to_regex().captures_iter(citations_in_mla) { 59 | println!("Full citation: {}", &capture[0]); 60 | println!("\t- authors: {}", &capture[1]); 61 | println!("\t- title: {}", &capture[2]); 62 | println!("\t- journal: {}", &capture[3]); 63 | println!( 64 | "\t- volume: {}", 65 | &capture.get(4).map_or("N/A", |x| { x.as_str() }), 66 | ); 67 | println!( 68 | "\t- year: {}", 69 | &capture.get(5).map_or("N/A", |x| { x.as_str() }), 70 | ); 71 | println!( 72 | "\t- pages: from {} to {}", 73 | &capture.get(6).map_or("N/A", |x| { x.as_str() }), 74 | &capture.get(7).map_or("N/A", |x| { x.as_str() }), 75 | ); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/humanregex.rs: -------------------------------------------------------------------------------- 1 | use regex::Regex; 2 | 3 | pub(crate) use std::fmt; 4 | use std::marker::PhantomData as pd; 5 | use std::ops::Add; 6 | 7 | /// Represents the state when [HumanRegex] is a wrapper for a standard single-character class (the kind that starts with a backslash followed by a letter) 8 | pub struct Standard; 9 | 10 | /// Represents the state when [HumanRegex] is a wrapper for a custom single-character class (the kind surrounded by one layer of square brackets) 11 | pub struct Custom; 12 | 13 | /// Represents the state when [HumanRegex] is a wrapper for a single-character ASCII class (the kind surrounded by colons and two layers of square brackets) 14 | pub struct Ascii; 15 | 16 | /// Represents the state when [HumanRegex] is a wrapper for any type of single-character class 17 | pub struct SymbolClass(std::marker::PhantomData); 18 | 19 | /// Represents the state when [HumanRegex] is a wrapper for a literal string of characters 20 | pub struct LiteralSymbolChain; 21 | 22 | /// Represents the state when [HumanRegex] is a wrapper for any arbitrary regular expression 23 | pub struct SymbolChain; 24 | 25 | /// Represents the state when [HumanRegex] is a wrapper for a quantifier (e.g., an expression that 26 | /// matches a given number of a target). Importantly, these expressions are greedy by default and 27 | /// can be converted to a lazy match with the [lazy] method. 28 | pub struct Quantifier; 29 | 30 | /// The HumanRegex struct which maintains and updates the regex string. For most use cases it will 31 | /// never be necessary to instantiate this directly. 32 | #[derive(Debug)] 33 | pub struct HumanRegex(pub String, pub std::marker::PhantomData); 34 | 35 | impl HumanRegex { 36 | /// Convert to a rust Regex 37 | pub fn to_regex(&self) -> Regex { 38 | Regex::new(&*self.0).unwrap() 39 | } 40 | } 41 | 42 | impl HumanRegex { 43 | /// Add a lazy modifier to quantifier match. 44 | /// ``` 45 | /// let lazy_regex = human_regex::at_least(2, human_regex::text("asdf")).lazy(); 46 | /// ``` 47 | /// However, some things cannot be made lazy! The following will not compile: 48 | /// ```ignore 49 | /// let lazy_regex = human_regex::text("asdf").lazy(); 50 | /// ``` 51 | pub fn lazy(&self) -> HumanRegex { 52 | HumanRegex(format!("{}?", &*self.0), pd::) 53 | } 54 | } 55 | 56 | /// One of the three fundemental operations on Regular Languages, concatenation! 57 | /// ``` 58 | /// use human_regex::{zero_or_one, text}; 59 | /// let regex_string = zero_or_one(text("chris")) + text("mccomb"); 60 | /// assert!(regex_string.to_regex().is_match("mccomb")); 61 | /// assert!(regex_string.to_regex().is_match("chrismccomb")); 62 | /// ``` 63 | impl Add> for HumanRegex { 64 | type Output = HumanRegex; 65 | 66 | fn add(self, rhs: HumanRegex) -> Self::Output { 67 | HumanRegex( 68 | format!("{}{}", self.to_string(), rhs.to_string()), 69 | pd::, 70 | ) 71 | } 72 | } 73 | 74 | // Implement the Display trait for HumanRegex 75 | impl fmt::Display for HumanRegex { 76 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 77 | write!(f, "{}", self.0) 78 | } 79 | } 80 | 81 | // Make it possible to create strings from HumanRegex 82 | impl From> for String { 83 | fn from(hr: HumanRegex) -> Self { 84 | hr.to_string() 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/logical.rs: -------------------------------------------------------------------------------- 1 | //! Functions for performing logical operations 2 | 3 | use super::humanregex::*; 4 | use std::marker::PhantomData as pd; 5 | 6 | /// A function for establishing an OR relationship between two or more possible matches 7 | /// ``` 8 | /// use human_regex::{text, logical::or}; 9 | /// let regex_string = text("gr") + or(&[text("a"), text("e")]) + text("y"); 10 | /// println!("{}", regex_string.to_string()); 11 | /// assert!(regex_string.to_regex().is_match("grey")); 12 | /// assert!(regex_string.to_regex().is_match("gray")); 13 | /// assert!(!regex_string.to_regex().is_match("graey")); 14 | /// ``` 15 | pub fn or(options: &[T]) -> HumanRegex 16 | where 17 | T: Into + fmt::Display, 18 | { 19 | let mut regex_string = format!("{}", options[0].to_string()); 20 | for idx in 1..options.len() { 21 | regex_string = format!("{}|{}", regex_string, options[idx].to_string()) 22 | } 23 | HumanRegex(format!("(:?{})", regex_string), pd::) 24 | } 25 | 26 | /// Xor on two [SymbolClass]es, also known as symmetric difference. 27 | /// 28 | /// ``` 29 | /// use human_regex::{xor, within_range}; 30 | /// let regex_string = xor(within_range('a'..='g'), within_range('b'..='h')); 31 | /// println!("{}", regex_string); 32 | /// assert!(regex_string.to_regex().is_match("a")); 33 | /// assert!(regex_string.to_regex().is_match("h")); 34 | /// assert!(!regex_string.to_regex().is_match("d")); 35 | /// ``` 36 | pub fn xor( 37 | lhs: HumanRegex>, 38 | rhs: HumanRegex>, 39 | ) -> HumanRegex> { 40 | HumanRegex( 41 | format!("[{}~~{}]", lhs.to_string(), rhs.to_string()), 42 | pd::>, 43 | ) 44 | } 45 | 46 | /// A function for establishing an AND relationship between two or more possible matches 47 | /// ``` 48 | /// use human_regex::{and, within_range, within_set}; 49 | /// let regex_string = and(within_range('a'..='y'),within_set(&['x','y','z'])); 50 | /// println!("{}", regex_string); 51 | /// assert!(regex_string.to_regex().is_match("x")); 52 | /// assert!(regex_string.to_regex().is_match("y")); 53 | /// assert!(!regex_string.to_regex().is_match("z")); 54 | /// ``` 55 | pub fn and( 56 | lhs: HumanRegex>, 57 | rhs: HumanRegex>, 58 | ) -> HumanRegex> { 59 | lhs & rhs 60 | } 61 | 62 | /// See [and] 63 | /// ``` 64 | /// use human_regex::{and, within_range, within_set}; 65 | /// let regex_string = (within_range('a'..='y') & within_set(&['x','y','z'])); 66 | /// println!("{}", regex_string); 67 | /// assert!(regex_string.to_regex().is_match("x")); 68 | /// assert!(regex_string.to_regex().is_match("y")); 69 | /// assert!(!regex_string.to_regex().is_match("z")); 70 | /// ``` 71 | impl std::ops::BitAnd>> for HumanRegex> { 72 | type Output = HumanRegex>; 73 | 74 | fn bitand(self, rhs: HumanRegex>) -> Self::Output { 75 | HumanRegex( 76 | format!("[{}&&{}]", self.to_string(), rhs.to_string()), 77 | pd::>, 78 | ) 79 | } 80 | } 81 | 82 | /// Removes the characters in the second character class from the characters in the first 83 | /// ``` 84 | /// use human_regex::{subtract, within_range, within_set}; 85 | /// let regex_string = subtract(within_range('0'..='9'), within_set(&['4'])); 86 | /// println!("{}", regex_string); 87 | /// assert!(regex_string.to_regex().is_match("3")); 88 | /// assert!(regex_string.to_regex().is_match("9")); 89 | /// assert!(!regex_string.to_regex().is_match("4")); 90 | /// ``` 91 | pub fn subtract( 92 | from: HumanRegex>, 93 | subtract: HumanRegex>, 94 | ) -> HumanRegex> { 95 | HumanRegex( 96 | format!("[{}--{}]", from.to_string(), subtract.to_string()), 97 | pd::>, 98 | ) 99 | } 100 | 101 | /// Negation for standard symbol classes. 102 | /// ``` 103 | /// use human_regex::{digit}; 104 | /// assert_eq!(digit().to_string().replace(r"\d",r"\D"), r"\D"); 105 | /// ``` 106 | impl std::ops::Not for HumanRegex> { 107 | type Output = Self; 108 | 109 | fn not(self) -> Self::Output { 110 | if self.to_string().len() < 2 { 111 | return self; 112 | } 113 | if self 114 | .to_string() 115 | .chars() 116 | .nth(1) 117 | .expect("All classes shorter than 2 characters filtered above") 118 | .is_lowercase() 119 | { 120 | HumanRegex( 121 | self.to_string() 122 | .replace(r"\d", r"\D") 123 | .replace(r"\p", r"\P") 124 | .replace(r"\w", r"\W") 125 | .replace(r"\s", r"\S") 126 | .replace(r"\b", r"\B"), 127 | pd::>, 128 | ) 129 | } else { 130 | HumanRegex( 131 | self.to_string() 132 | .replace(r"\D", r"\d") 133 | .replace(r"\P", r"\p") 134 | .replace(r"\W", r"\w") 135 | .replace(r"\S", r"\s") 136 | .replace(r"\B", r"\b"), 137 | pd::>, 138 | ) 139 | } 140 | } 141 | } 142 | 143 | impl std::ops::Not for HumanRegex> { 144 | type Output = Self; 145 | 146 | fn not(self) -> Self::Output { 147 | if self 148 | .to_string() 149 | .chars() 150 | .nth(1) 151 | .expect("Should always be at least 2 characters in SymbolClass") 152 | != '^' 153 | { 154 | HumanRegex( 155 | self.to_string().replace("[", "[^"), 156 | pd::>, 157 | ) 158 | } else { 159 | HumanRegex( 160 | self.to_string().replace("[^", "["), 161 | pd::>, 162 | ) 163 | } 164 | } 165 | } 166 | 167 | impl std::ops::Not for HumanRegex> { 168 | type Output = Self; 169 | 170 | fn not(self) -> Self::Output { 171 | if self 172 | .to_string() 173 | .chars() 174 | .nth(3) 175 | .expect("Should always be at least 4 characters in SymbolClass") 176 | != '^' 177 | { 178 | HumanRegex( 179 | self.to_string().replace("[[:", "[[:^"), 180 | pd::>, 181 | ) 182 | } else { 183 | HumanRegex( 184 | self.to_string().replace("[[:^", "[[:"), 185 | pd::>, 186 | ) 187 | } 188 | } 189 | } 190 | 191 | impl std::ops::Not for HumanRegex { 192 | type Output = HumanRegex; 193 | 194 | fn not(self) -> Self::Output { 195 | HumanRegex( 196 | self.to_string() 197 | .chars() 198 | .into_iter() 199 | .map(|chr| format!("[^{}]", chr)) 200 | .collect::(), 201 | pd::, 202 | ) 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Github CI](https://github.com/cmccomb/human_regex/actions/workflows/tests.yml/badge.svg)](https://github.com/cmccomb/human_regex/actions) 2 | [![Crates.io](https://img.shields.io/crates/v/human_regex.svg)](https://crates.io/crates/human_regex) 3 | [![docs.rs](https://img.shields.io/docsrs/human_regex/latest?logo=rust)](https://docs.rs/human_regex) 4 | 5 | # Regex for Humans 6 | The goal of this crate is simple: give everybody the power of regular expressions without having 7 | to learn the complicated syntax. It is inspired by [ReadableRegex.jl](https://github.com/jkrumbiegel/ReadableRegex.jl). 8 | This crate is a wrapper around the [core Rust regex library](https://crates.io/crates/regex). 9 | 10 | # Example usage 11 | If you want to match a date of the format `2021-10-30`, you could use the following code to generate a regex: 12 | ```rust 13 | use human_regex::{beginning, digit, exactly, text, end}; 14 | let regex_string = beginning() 15 | + exactly(4, digit()) 16 | + text("-") 17 | + exactly(2, digit()) 18 | + text("-") 19 | + exactly(2, digit()) 20 | + end(); 21 | assert!(regex_string.to_regex().is_match("2014-01-01")); 22 | ``` 23 | The `to_regex()` method returns a [standard Rust regex](https://docs.rs/regex/1.5.4/regex/struct.Regex.html). We can do this another way with slightly less repetition though! 24 | ```rust 25 | use human_regex::{beginning, digit, exactly, text, end}; 26 | let first_regex_string = text("-") + exactly(2, digit()); 27 | let second_regex_string = beginning() 28 | + exactly(4, digit()) 29 | + exactly(2, first_regex_string) 30 | + end(); 31 | assert!(second_regex_string.to_regex().is_match("2014-01-01")); 32 | ``` 33 | For a more extensive set of examples, please see [The Cookbook](crate::cookbook). 34 | 35 | # Features 36 | This crate currently supports the vast majority of syntax available in the [core Rust regex library](https://crates.io/crates/regex) through a human-readable API. 37 | The type model that the API is built upon reflects the underlying syntax rules of regular languages/expressions, meaning you get the same instant compiler feedback you're use to in Rust while writing regex. 38 | No more runtime regex panics or unexplained behavior, if it compiles, what can plainly read is what you get. 39 | ## Single Character 40 | 41 | | Implemented? | Expression | Description | 42 | |:-------------------------------------------:|:-------------------:|:--------------------------------------------------------------| 43 | | `any()` | `.` | any character except new line (includes new line with s flag) | 44 | | `digit()` | `\d` | digit (`\p{Nd}`) | 45 | | `non_digit()` | `\D` | not digit | 46 | | `unicode_category(UnicodeCategory)` | `\p{L}` | Unicode non-script category | 47 | | `unicode_script(UnicodeScript)` | `\p{Greek}` | Unicode script category | 48 | | `non_unicode_category(UnicodeCategory)` | `\P{L}` | Negated one-letter name Unicode character class | 49 | | `non_unicode_script(UnicodeCategory)` | `\P{Greek}` | negated Unicode character class (general category or script) | 50 | 51 | ## Character Classes 52 | 53 | | Implemented? | Expression | Description | 54 | |:----------------------------:|:--------------:|:------------------------------------------------------------------------------------| 55 | |`within_set(&['x', 'y', 'z'])`| `[xyz]` | A character class matching either x, y or z. | 56 | |`wthout_set(&['x', 'y', 'z'])`| `[^xyz]` | A character class matching any character except x, y and z. | 57 | |`within_range('a'..='z')` | `[a-z]` | A character class matching any character in range a-z. | 58 | |`without_range('a'..='z')` | `[^a-z]` | A character class matching any character outside range a-z. | 59 | | See below | `[[:alpha:]]` | ASCII character class (`[A-Za-z]`) | 60 | | `non_alphanumeric()` | `[[:^alpha:]]` | Negated ASCII character class (`[^A-Za-z]`) | 61 | |`within_set()` | `[x[^xyz]]` | Nested/grouping character class (matching any character except y and z) | 62 | | `and(lhs, rhs)`/`lhs & rhs` | `[a-y&&xyz]` | Intersection (a-y AND xyz = xy) | 63 | |`within_range()&without_set()`| `[0-9&&[^4]]` | Subtraction using intersection and negation (matching 0-9 except 4) | 64 | | `subtract(lhs, rhs)` | `[0-9--4]` | Direct subtraction (matching 0-9 except 4). | 65 | | `xor(lhs, rhs)` | `[a-g~~b-h]` | Symmetric difference (matching `a` and `h` only). | 66 | |`within_set(&escape_all())` | `[\[\]]` | Escaping in character classes (matching `[` or `]`) | 67 | 68 | ## Perl Character Classes 69 | 70 | | Implemented? | Expression | Description | 71 | |:------------------:| :--------: |:---------------------------------------------------------------------------| 72 | | `digit()` | `\d` | digit (`\p{Nd}`) | 73 | | `non_digit()` | `\D` | not digit | 74 | | `whitespace()` | `\s` | whitespace (`\p{White_Space}`) | 75 | | `non_whitespace()` | `\S` | not whitespace | 76 | | `word()` | `\w` | word character (`\p{Alphabetic} + \p{M} + \d + \p{Pc} + \p{Join_Control}`) | 77 | | `non_word()` | `\W` | not word character | 78 | 79 | ## ASCII Character Classes 80 | 81 | | Implemented? | Expression | Description | 82 | |:----------------:|:--------------:|:----------------------------------| 83 | | `alphanumeric()` | `[[:alnum:]]` | alphanumeric (`[0-9A-Za-z]`) | 84 | | `alphabetic()` | `[[:alpha:]]` | alphabetic (`[A-Za-z]`) | 85 | | `ascii()` | `[[:ascii:]]` | ASCII (`[\x00-\x7F]`) | 86 | | `blank()` | `[[:blank:]]` | blank (`[\t ]`) | 87 | | `control()` | `[[:cntrl:]]` | control (`[\x00-\x1F\x7F]`) | 88 | | `digit()` | `[[:digit:]]` | digits (`[0-9]`) | 89 | | `graphical()` | `[[:graph:]]` | graphical (`[!-~]`) | 90 | | `uppercase()` | `[[:lower:]]` | lower case (`[a-z]`) | 91 | | `printable()` | `[[:print:]]` | printable (`[ -~]`) | 92 | | `punctuation()` | `[[:punct:]]` | punctuation (``[!-/:-@\[-`{-~]``) | 93 | | `whitespace()` | `[[:space:]]` | whitespace (`[\t\n\v\f\r ]`) | 94 | | `lowercase()` | `[[:upper:]]` | upper case (`[A-Z]`) | 95 | | `word()` | `[[:word:]]` | word characters (`[0-9A-Za-z_]`) | 96 | | `hexdigit()` | `[[:xdigit:]]` | hex digit (`[0-9A-Fa-f]`) | 97 | 98 | ## Repetitions 99 | 100 | | Implemented? | Expression | Description | 101 | |:-------------------------:|:----------:|:---------------------------------------------| 102 | | `zero_or_more(x)` | `x*` | zero or more of x (greedy) | 103 | | `one_or_more(x)` | `x+` | one or more of x (greedy) | 104 | | `zero_or_one(x)` | `x?` | zero or one of x (greedy) | 105 | | `zero_or_more(x)` | `x*?` | zero or more of x (ungreedy/lazy) | 106 | | `one_or_more(x).lazy()` | `x+?` | one or more of x (ungreedy/lazy) | 107 | | `zero_or_more(x).lazy()` | `x??` | zero or one of x (ungreedy/lazy) | 108 | | `between(n, m, x)` | `x{n,m}` | at least n x and at most m x (greedy) | 109 | | `at_least(n, x)` | `x{n,}` | at least n x (greedy) | 110 | | `exactly(n, x)` | `x{n}` | exactly n x | 111 | | `between(n, m, x).lazy()` | `x{n,m}?` | at least n x and at most m x (ungreedy/lazy) | 112 | | `at_least(n, x).lazy()` | `x{n,}?` | at least n x (ungreedy/lazy) | 113 | 114 | ## General Operations 115 | 116 | | Implemented? | Expression | Description | 117 | |:------------:|:----------------------------:|:--------------------------------------------------------------------| 118 | | `+` | `xy` | concatenation (x followed by y) | 119 | | `or()` | `x\|y` | alternation (x or y, prefer x) | 120 | | `!` |`\d->\D`, `[xy]->[^xy]`, etc. | negation (works on any character class, or literal strings of text).| 121 | 122 | ## Empty matches 123 | 124 | | Implemented? | Expression | Description | 125 | |:---------------------:|:----------:|:--------------------------------------------------------------------| 126 | | `beginning()` | `^` | the beginning of text (or start-of-line with multi-line mode) | 127 | | `end()` | `$` | the end of text (or end-of-line with multi-line mode) | 128 | | `beginning_of_text()` | `\A` | only the beginning of text (even with multi-line mode enabled) | 129 | | `end_of_text()` | `\z` | only the end of text (even with multi-line mode enabled) | 130 | | `word_boundary()` | `\b` | a Unicode word boundary (\w on one side and \W, \A, or \z on other) | 131 | | `non_word_boundary()` | `\B` | not a Unicode word boundary | 132 | 133 | ## Groupings 134 | 135 | | Implemented? | Expression | Description | 136 | |:-------------------------------------------------:|:---------------:|:--------------------------------------------------------| 137 | | `capture(exp)` | `(exp)` | numbered capture group (indexed by opening parenthesis) | 138 | | `named_capture(exp, name)` | `(?Pexp)` | named (also numbered) capture group | 139 | | Handled implicitly through functional composition | `(?:exp)` | non-capturing group | 140 | | See below | `(?flags)` | set flags within current group | 141 | | See below | `(?flags:exp)` | set flags for exp (non-capturing) | 142 | 143 | ## Flags 144 | 145 | | Implemented? | Expression | Description | 146 | |:-----------------------------------:|:----------:|:--------------------------------------------------------------| 147 | | `case_insensitive(exp)` | `i` | case-insensitive: letters match both upper and lower case | 148 | | `multi_line_mode(exp)` | `m` | multi-line mode: `^` and `$` match begin/end of line | 149 | | `dot_matches_newline_too(exp)` | `s` | allow `.` to match `\n` | 150 | | will not be implemented1 | `U` | swap the meaning of `x*` and `x*?` | 151 | | `disable_unicode(exp)` | `u` | Unicode support (enabled by default) | 152 | | will not be implemented2 | `x` | ignore whitespace and allow line comments (starting with `#`) | 153 | 154 | 1. With the declarative nature of this library, use of this flag would just obfuscate meaning. 155 | 2. When using `human_regex`, comments should be added in source code rather than in the regex string. 156 | -------------------------------------------------------------------------------- /src/shorthand.rs: -------------------------------------------------------------------------------- 1 | //! Functions for general purpose matches 2 | 3 | use super::humanregex::*; 4 | use std::marker::PhantomData as pd; 5 | 6 | /// A function for matching any character (except for \n) 7 | /// ``` 8 | /// use human_regex::{text, any, exactly}; 9 | /// let regex_string = text("h") + exactly(2, any()) + text("l"); 10 | /// assert!(regex_string.to_regex().is_match("hurl")); 11 | /// assert!(regex_string.to_regex().is_match("heal")); 12 | /// ``` 13 | pub fn any() -> HumanRegex> { 14 | HumanRegex(r".".to_string(), pd::>) 15 | } 16 | 17 | /// A function for the digit character class (i.e., the digits 0 through 9) 18 | /// ``` 19 | /// use human_regex::{beginning, end, one_or_more, digit}; 20 | /// let regex_string = beginning() + one_or_more(digit()) + end(); 21 | /// assert!(regex_string.to_regex().is_match("010101010100100100100101")); 22 | /// assert!(!regex_string.to_regex().is_match("a string that is not composed of digits will fail")); 23 | /// ``` 24 | pub fn digit() -> HumanRegex> { 25 | HumanRegex(r"\d".to_string(), pd::>) 26 | } 27 | 28 | /// A function for the non-digit character class (i.e., everything BUT the digits 0-9) 29 | /// ``` 30 | /// use human_regex::{beginning, end, one_or_more, non_digit}; 31 | /// let regex_string = beginning() + one_or_more(non_digit()) + end(); 32 | /// assert!(regex_string.to_regex().is_match("a string without digits will pass")); 33 | /// assert!(!regex_string.to_regex().is_match("a string with digits like 99 will fail")); 34 | /// ``` 35 | pub fn non_digit() -> HumanRegex> { 36 | HumanRegex(r"\D".to_string(), pd::>) 37 | } 38 | 39 | /// A function for the word character class (i.e., all alphanumeric characters plus underscore) 40 | pub fn word() -> HumanRegex> { 41 | HumanRegex(r"\w".to_string(), pd::>) 42 | } 43 | 44 | /// A function for the non-word character class (i.e., everything BUT the alphanumeric characters plus underscore) 45 | pub fn non_word() -> HumanRegex> { 46 | HumanRegex(r"\W".to_string(), pd::>) 47 | } 48 | 49 | /// A constant for the whitespace character class (i.e., space and tab) 50 | /// ``` 51 | /// use human_regex::{one_or_more, text, whitespace}; 52 | /// let regex_string = text("at") + one_or_more(whitespace()) + text("least"); 53 | /// assert!(!regex_string.to_regex().is_match("atleast")); 54 | /// assert!(regex_string.to_regex().is_match("at least")); 55 | /// assert!(regex_string.to_regex().is_match("at least")); 56 | /// ``` 57 | pub fn whitespace() -> HumanRegex> { 58 | HumanRegex(r"\s".to_string(), pd::>) 59 | } 60 | 61 | /// A function for the whitespace character class (i.e., everything BUT space and tab) 62 | /// ``` 63 | /// use human_regex::{beginning, end, one_or_more, non_whitespace}; 64 | /// let regex_string = beginning() + one_or_more(non_whitespace()) + end(); 65 | /// assert!(regex_string.to_regex().is_match("supercalifragilisticexpialidocious")); 66 | /// assert!(regex_string.to_regex().is_match("a-sluggified-thingamajig")); 67 | /// assert!(!regex_string.to_regex().is_match("something with spaces won't pass")); 68 | /// ``` 69 | pub fn non_whitespace() -> HumanRegex> { 70 | HumanRegex(r"\S".to_string(), pd::>) 71 | } 72 | 73 | /// Matches anything within a range of characters 74 | ///``` 75 | /// use human_regex::{within_range}; 76 | /// let regex_string = within_range('a'..='d'); 77 | /// assert!(regex_string.to_regex().is_match("c")); 78 | /// assert!(!regex_string.to_regex().is_match("h")); 79 | ///``` 80 | pub fn within_range(range: std::ops::RangeInclusive) -> HumanRegex> { 81 | HumanRegex( 82 | format!("[{}-{}]", range.start(), range.end()), 83 | pd::>, 84 | ) 85 | } 86 | /// Matches anything outside of a range of characters 87 | ///``` 88 | /// use human_regex::{without_range}; 89 | /// let regex_string = without_range('a'..='d'); 90 | /// assert!(regex_string.to_regex().is_match("h")); 91 | /// assert!(!regex_string.to_regex().is_match("c")); 92 | ///``` 93 | pub fn without_range(range: std::ops::RangeInclusive) -> HumanRegex> { 94 | HumanRegex( 95 | format!("[^{}-{}]", range.start(), range.end()), 96 | pd::>, 97 | ) 98 | } 99 | 100 | /// Matches anything within a specified set of characters 101 | /// ``` 102 | /// use human_regex::{text,within_set}; 103 | /// let regex_string = text("gr") + within_set(&['a','e']) + text("y"); 104 | /// assert!(regex_string.to_regex().is_match("gray")); 105 | /// assert!(regex_string.to_regex().is_match("grey")); 106 | /// assert!(!regex_string.to_regex().is_match("groy")); 107 | /// ``` 108 | pub fn within_set(set: &[T]) -> HumanRegex> 109 | where 110 | T: Into + fmt::Display, 111 | { 112 | HumanRegex( 113 | format!( 114 | "[{}]", 115 | set.into_iter().map(|c| c.to_string()).collect::() 116 | ), 117 | pd::>, 118 | ) 119 | } 120 | 121 | /// Matches anything outside a specified set of characters 122 | /// ``` 123 | /// use human_regex::{text,without_set}; 124 | /// let regex_string = text("gr") + without_set(&['a','e']) + text("y"); 125 | /// assert!(regex_string.to_regex().is_match("groy")); 126 | /// assert!(!regex_string.to_regex().is_match("gray")); 127 | /// assert!(!regex_string.to_regex().is_match("grey")); 128 | /// ``` 129 | pub fn without_set(set: &[T]) -> HumanRegex> 130 | where 131 | T: Into + fmt::Display, 132 | { 133 | HumanRegex( 134 | format!( 135 | "[^{}]", 136 | set.into_iter().map(|c| c.to_string()).collect::() 137 | ), 138 | pd::>, 139 | ) 140 | } 141 | 142 | /// An enum covering all Unicode character categories 143 | /// 144 | /// Used in the [unicode_category] function. 145 | #[allow(missing_docs)] // variants are self documenting 146 | pub enum UnicodeCategory { 147 | Letter, 148 | LowercaseLetter, 149 | UppercaseLetter, 150 | TitlecaseLetter, 151 | CasedLetter, 152 | ModifierLetter, 153 | OtherLetter, 154 | Mark, 155 | NonSpacingMark, 156 | SpaceCombiningMark, 157 | EnclosingMark, 158 | Separator, 159 | SpaceSeparator, 160 | LineSeparator, 161 | ParagraphSeparator, 162 | Symbol, 163 | MathSymbol, 164 | CurrencySymbol, 165 | ModifierSymbol, 166 | OtherSymbol, 167 | Number, 168 | DecimalDigitNumber, 169 | LetterNumber, 170 | OtherNumber, 171 | Punctuation, 172 | DashPunctuation, 173 | OpenPunctuation, 174 | ClosePunctuation, 175 | InitialPunctuation, 176 | FinalPunctuation, 177 | ConnectorPunctuation, 178 | OtherPunctuation, 179 | Other, 180 | Control, 181 | Format, 182 | PrivateUse, 183 | Surrogate, 184 | Unassigned, 185 | } 186 | 187 | /// A function for matching Unicode character categories. For matching script categories see [unicode_script]. 188 | /// ``` 189 | /// use human_regex::{beginning, end, one_or_more, unicode_category, UnicodeCategory}; 190 | /// let regex_string = beginning() 191 | /// + one_or_more(unicode_category(UnicodeCategory::CurrencySymbol)) 192 | /// + end(); 193 | /// assert!(regex_string.to_regex().is_match("$¥₹")); 194 | /// assert!(!regex_string.to_regex().is_match("normal words")); 195 | /// ``` 196 | pub fn unicode_category(category: UnicodeCategory) -> HumanRegex> { 197 | HumanRegex( 198 | match category { 199 | UnicodeCategory::Letter => r"\p{Letter}".to_string(), 200 | UnicodeCategory::LowercaseLetter => r"\p{Lowercase_Letter}".to_string(), 201 | UnicodeCategory::UppercaseLetter => r"\p{Uppercase_Letter}".to_string(), 202 | UnicodeCategory::TitlecaseLetter => r"\p{Titlecase_Letter}".to_string(), 203 | UnicodeCategory::CasedLetter => r"\p{Cased_Letter}".to_string(), 204 | UnicodeCategory::ModifierLetter => r"\p{Modifier_Letter}".to_string(), 205 | UnicodeCategory::OtherLetter => r"\p{Other_Letter}".to_string(), 206 | UnicodeCategory::Mark => r"\p{Mark}".to_string(), 207 | UnicodeCategory::NonSpacingMark => r"\p{NonSpacing_Mark}".to_string(), 208 | UnicodeCategory::SpaceCombiningMark => r"\p{SpaceCombining_Mark}".to_string(), 209 | UnicodeCategory::EnclosingMark => r"\p{Enclosing_Mark}".to_string(), 210 | UnicodeCategory::Separator => r"\p{Separator}".to_string(), 211 | UnicodeCategory::SpaceSeparator => r"\p{Space_Separator}".to_string(), 212 | UnicodeCategory::LineSeparator => r"\p{Line_Separator}".to_string(), 213 | UnicodeCategory::ParagraphSeparator => r"\p{Paragraph_Separator}".to_string(), 214 | UnicodeCategory::Symbol => r"\p{Symbol}".to_string(), 215 | UnicodeCategory::MathSymbol => r"\p{Math_Symbol}".to_string(), 216 | UnicodeCategory::CurrencySymbol => r"\p{Currency_Symbol}".to_string(), 217 | UnicodeCategory::ModifierSymbol => r"\p{Modifier_Symbol}".to_string(), 218 | UnicodeCategory::OtherSymbol => r"\p{Other_Symbol}".to_string(), 219 | UnicodeCategory::Number => r"\p{Number}".to_string(), 220 | UnicodeCategory::DecimalDigitNumber => r"\p{DecimalDigit_Number}".to_string(), 221 | UnicodeCategory::LetterNumber => r"\p{Letter_Number}".to_string(), 222 | UnicodeCategory::OtherNumber => r"\p{Other_Number}".to_string(), 223 | UnicodeCategory::Punctuation => r"\p{Punctuation}".to_string(), 224 | UnicodeCategory::DashPunctuation => r"\p{Dash_Punctuation}".to_string(), 225 | UnicodeCategory::OpenPunctuation => r"\p{Open_Punctuation}".to_string(), 226 | UnicodeCategory::ClosePunctuation => r"\p{Close_Punctuation}".to_string(), 227 | UnicodeCategory::InitialPunctuation => r"\p{Initial_Punctuation}".to_string(), 228 | UnicodeCategory::FinalPunctuation => r"\p{Final_Punctuation}".to_string(), 229 | UnicodeCategory::ConnectorPunctuation => r"\p{Connector_Punctuation}".to_string(), 230 | UnicodeCategory::OtherPunctuation => r"\p{Other_Punctuation}".to_string(), 231 | UnicodeCategory::Other => r"\p{Other}".to_string(), 232 | UnicodeCategory::Control => r"\p{Control}".to_string(), 233 | UnicodeCategory::Format => r"\p{Format}".to_string(), 234 | UnicodeCategory::PrivateUse => r"\p{Private_Use}".to_string(), 235 | UnicodeCategory::Surrogate => r"\p{Surrogate}".to_string(), 236 | UnicodeCategory::Unassigned => r"\p{Unassigned}".to_string(), 237 | }, 238 | pd::>, 239 | ) 240 | } 241 | 242 | /// A function for not matching Unicode character categories. For matching script categories see [non_unicode_script]. 243 | /// ``` 244 | /// use human_regex::{one_or_more, non_unicode_category, UnicodeCategory}; 245 | /// let regex_string =one_or_more(non_unicode_category(UnicodeCategory::CurrencySymbol)); 246 | /// assert!(regex_string.to_regex().is_match("normal words")); 247 | /// assert!(!regex_string.to_regex().is_match("$¥₹")); 248 | /// ``` 249 | pub fn non_unicode_category(category: UnicodeCategory) -> HumanRegex> { 250 | !unicode_category(category) 251 | } 252 | 253 | /// An enum for covering all Unicode script categories 254 | /// 255 | /// Used in the [unicode_script] function 256 | #[allow(missing_docs)] // variants are self documenting 257 | pub enum UnicodeScript { 258 | Common, 259 | Arabic, 260 | Armenian, 261 | Bengali, 262 | Bopomofo, 263 | Braille, 264 | Buhid, 265 | CandianAboriginal, 266 | Cherokee, 267 | Cyrillic, 268 | Devanagari, 269 | Ethiopic, 270 | Georgian, 271 | Greek, 272 | Gujarati, 273 | Gurkmukhi, 274 | Han, 275 | Hangul, 276 | Hanunoo, 277 | Hebrew, 278 | Hirigana, 279 | Inherited, 280 | Kannada, 281 | Katakana, 282 | Khmer, 283 | Lao, 284 | Latin, 285 | Limbu, 286 | Malayalam, 287 | Mongolian, 288 | Myanmar, 289 | Ogham, 290 | Oriya, 291 | Runic, 292 | Sinhala, 293 | Syriac, 294 | Tagalog, 295 | Tagbanwa, 296 | TaiLe, 297 | Tamil, 298 | Telugu, 299 | Thaana, 300 | Thai, 301 | Tibetan, 302 | Yi, 303 | } 304 | 305 | /// A function for matching Unicode characters belonging to a certain script category. For matching other categories see [unicode_category]. 306 | /// ``` 307 | /// use human_regex::{beginning, end, one_or_more, unicode_script, UnicodeScript}; 308 | /// let regex_string = beginning() 309 | /// + one_or_more(unicode_script(UnicodeScript::Han)) 310 | /// + end(); 311 | /// assert!(regex_string.to_regex().is_match("蟹")); 312 | /// assert!(!regex_string.to_regex().is_match("latin text")); 313 | /// ``` 314 | pub fn unicode_script(category: UnicodeScript) -> HumanRegex> { 315 | HumanRegex( 316 | match category { 317 | UnicodeScript::Common => r"\p{Common}".to_string(), 318 | UnicodeScript::Arabic => r"\p{Arabic}".to_string(), 319 | UnicodeScript::Armenian => r"\p{Armenian}".to_string(), 320 | UnicodeScript::Bengali => r"\p{Bengali}".to_string(), 321 | UnicodeScript::Bopomofo => r"\p{Bopomofo}".to_string(), 322 | UnicodeScript::Braille => r"\p{Braille}".to_string(), 323 | UnicodeScript::Buhid => r"\p{Buhid}".to_string(), 324 | UnicodeScript::CandianAboriginal => r"\p{CandianAboriginal}".to_string(), 325 | UnicodeScript::Cherokee => r"\p{Cherokee}".to_string(), 326 | UnicodeScript::Cyrillic => r"\p{Cyrillic}".to_string(), 327 | UnicodeScript::Devanagari => r"\p{Devanagari}".to_string(), 328 | UnicodeScript::Ethiopic => r"\p{Ethiopic}".to_string(), 329 | UnicodeScript::Georgian => r"\p{Georgian}".to_string(), 330 | UnicodeScript::Greek => r"\p{Greek}".to_string(), 331 | UnicodeScript::Gujarati => r"\p{Gujarati}".to_string(), 332 | UnicodeScript::Gurkmukhi => r"\p{Gurkmukhi}".to_string(), 333 | UnicodeScript::Han => r"\p{Han}".to_string(), 334 | UnicodeScript::Hangul => r"\p{Hangul}".to_string(), 335 | UnicodeScript::Hanunoo => r"\p{Hanunoo}".to_string(), 336 | UnicodeScript::Hebrew => r"\p{Hebrew}".to_string(), 337 | UnicodeScript::Hirigana => r"\p{Hirigana}".to_string(), 338 | UnicodeScript::Inherited => r"\p{Inherited}".to_string(), 339 | UnicodeScript::Kannada => r"\p{Kannada}".to_string(), 340 | UnicodeScript::Katakana => r"\p{Katakana}".to_string(), 341 | UnicodeScript::Khmer => r"\p{Khmer}".to_string(), 342 | UnicodeScript::Lao => r"\p{Lao}".to_string(), 343 | UnicodeScript::Latin => r"\p{Latin}".to_string(), 344 | UnicodeScript::Limbu => r"\p{Limbu}".to_string(), 345 | UnicodeScript::Malayalam => r"\p{Malayalam}".to_string(), 346 | UnicodeScript::Mongolian => r"\p{Mongolian}".to_string(), 347 | UnicodeScript::Myanmar => r"\p{Myanmar}".to_string(), 348 | UnicodeScript::Ogham => r"\p{Ogham}".to_string(), 349 | UnicodeScript::Oriya => r"\p{Oriya}".to_string(), 350 | UnicodeScript::Runic => r"\p{Runic}".to_string(), 351 | UnicodeScript::Sinhala => r"\p{Sinhala}".to_string(), 352 | UnicodeScript::Syriac => r"\p{Syriac}".to_string(), 353 | UnicodeScript::Tagalog => r"\p{Tagalog}".to_string(), 354 | UnicodeScript::Tagbanwa => r"\p{Tagbanwa}".to_string(), 355 | UnicodeScript::TaiLe => r"\p{TaiLe}".to_string(), 356 | UnicodeScript::Tamil => r"\p{Tamil}".to_string(), 357 | UnicodeScript::Telugu => r"\p{Telugu}".to_string(), 358 | UnicodeScript::Thaana => r"\p{Thaana}".to_string(), 359 | UnicodeScript::Thai => r"\p{Thai}".to_string(), 360 | UnicodeScript::Tibetan => r"\p{Tibetan}".to_string(), 361 | UnicodeScript::Yi => r"\p{Yi}".to_string(), 362 | }, 363 | pd::>, 364 | ) 365 | } 366 | 367 | /// A function for matching Unicode characters not belonging to a certain script category. For matching other categories see [non_unicode_category]. 368 | /// ``` 369 | /// use human_regex::{beginning, end, one_or_more, non_unicode_script, UnicodeScript}; 370 | /// let regex_string =one_or_more(non_unicode_script(UnicodeScript::Han)); 371 | /// assert!(regex_string.to_regex().is_match("latin text")); 372 | /// assert!(!regex_string.to_regex().is_match("蟹")); 373 | /// ``` 374 | pub fn non_unicode_script(category: UnicodeScript) -> HumanRegex> { 375 | !unicode_script(category) 376 | } 377 | -------------------------------------------------------------------------------- /src/ascii.rs: -------------------------------------------------------------------------------- 1 | //! Functions for ASCII character classes 2 | 3 | use super::humanregex::*; 4 | use std::marker::PhantomData as pd; 5 | 6 | /// A function to match any alphanumeric character (`[0-9A-Za-z]`) 7 | /// ``` 8 | /// use std::ops::Not; 9 | /// let regex_string = human_regex::alphanumeric(); 10 | /// assert!(regex_string.to_regex().is_match("[").not()); 11 | /// assert!(regex_string.to_regex().is_match("a")); 12 | /// assert!(regex_string.to_regex().is_match("A")); 13 | /// assert!(regex_string.to_regex().is_match("1")); 14 | /// assert!(regex_string.to_regex().is_match("¡").not()); 15 | /// assert!(regex_string.to_regex().is_match("!").not()); 16 | /// assert!(regex_string.to_regex().is_match(" ").not()); 17 | /// assert!(regex_string.to_regex().is_match("\n").not()); 18 | /// ``` 19 | pub fn alphanumeric() -> HumanRegex> { 20 | HumanRegex(r"[[:alnum:]]".to_string(), pd::>) 21 | } 22 | 23 | /// A function to match any non-alphanumeric character (`[^0-9A-Za-z]`) 24 | /// ``` 25 | /// use std::ops::Not; 26 | /// let regex_string = human_regex::non_alphanumeric(); 27 | /// assert!(regex_string.to_regex().is_match("[")); 28 | /// assert!(regex_string.to_regex().is_match("a").not()); 29 | /// assert!(regex_string.to_regex().is_match("A").not()); 30 | /// assert!(regex_string.to_regex().is_match("1").not()); 31 | /// assert!(regex_string.to_regex().is_match("¡")); 32 | /// assert!(regex_string.to_regex().is_match("!")); 33 | /// assert!(regex_string.to_regex().is_match(" ")); 34 | /// assert!(regex_string.to_regex().is_match("\n")); 35 | /// ``` 36 | pub fn non_alphanumeric() -> HumanRegex> { 37 | HumanRegex(r"[[:^alnum:]]".to_string(), pd::>) 38 | } 39 | 40 | /// A function to match any alphabetic character (`[A-Za-z]`) 41 | /// ``` 42 | /// use std::ops::Not; 43 | /// let regex_string = human_regex::alphabetic(); 44 | /// assert!(regex_string.to_regex().is_match("[").not()); 45 | /// assert!(regex_string.to_regex().is_match("a")); 46 | /// assert!(regex_string.to_regex().is_match("A")); 47 | /// assert!(regex_string.to_regex().is_match("1").not()); 48 | /// assert!(regex_string.to_regex().is_match("¡").not()); 49 | /// assert!(regex_string.to_regex().is_match("!").not()); 50 | /// assert!(regex_string.to_regex().is_match(" ").not()); 51 | /// assert!(regex_string.to_regex().is_match("\n").not()); 52 | /// ``` 53 | pub fn alphabetic() -> HumanRegex> { 54 | HumanRegex(r"[[:alpha:]]".to_string(), pd::>) 55 | } 56 | 57 | /// A function to match any non-alphabetic character (`[^A-Za-z]`) 58 | /// ``` 59 | /// use std::ops::Not; 60 | /// let regex_string = human_regex::non_alphabetic(); 61 | /// assert!(regex_string.to_regex().is_match("[")); 62 | /// assert!(regex_string.to_regex().is_match("a").not()); 63 | /// assert!(regex_string.to_regex().is_match("A").not()); 64 | /// assert!(regex_string.to_regex().is_match("1")); 65 | /// assert!(regex_string.to_regex().is_match("¡")); 66 | /// assert!(regex_string.to_regex().is_match("!")); 67 | /// assert!(regex_string.to_regex().is_match(" ")); 68 | /// assert!(regex_string.to_regex().is_match("\n")); 69 | /// ``` 70 | pub fn non_alphabetic() -> HumanRegex> { 71 | HumanRegex(r"[[:^alpha:]]".to_string(), pd::>) 72 | } 73 | 74 | /// A function to match any lowercase character (`[a-z]`) 75 | /// ``` 76 | /// use std::ops::Not; 77 | /// let regex_string = human_regex::lowercase(); 78 | /// assert!(regex_string.to_regex().is_match("[").not()); 79 | /// assert!(regex_string.to_regex().is_match("a")); 80 | /// assert!(regex_string.to_regex().is_match("A").not()); 81 | /// assert!(regex_string.to_regex().is_match("1").not()); 82 | /// assert!(regex_string.to_regex().is_match("¡").not()); 83 | /// assert!(regex_string.to_regex().is_match("!").not()); 84 | /// assert!(regex_string.to_regex().is_match(" ").not()); 85 | /// assert!(regex_string.to_regex().is_match("\n").not()); 86 | /// ``` 87 | pub fn lowercase() -> HumanRegex> { 88 | HumanRegex(r"[[:lower:]]".to_string(), pd::>) 89 | } 90 | 91 | /// A function to match any non-lowercase character (`[^a-z]`) 92 | /// ``` 93 | /// use std::ops::Not; 94 | /// let regex_string = human_regex::non_lowercase(); 95 | /// assert!(regex_string.to_regex().is_match("[")); 96 | /// assert!(regex_string.to_regex().is_match("a").not()); 97 | /// assert!(regex_string.to_regex().is_match("A")); 98 | /// assert!(regex_string.to_regex().is_match("1")); 99 | /// assert!(regex_string.to_regex().is_match("¡")); 100 | /// assert!(regex_string.to_regex().is_match("!")); 101 | /// assert!(regex_string.to_regex().is_match(" ")); 102 | /// assert!(regex_string.to_regex().is_match("\n")); 103 | /// ``` 104 | pub fn non_lowercase() -> HumanRegex> { 105 | HumanRegex(r"[[:^lower:]]".to_string(), pd::>) 106 | } 107 | 108 | /// A function to match any uppercase character (`[A-Z]`) 109 | /// ``` 110 | /// use std::ops::Not; 111 | /// let regex_string = human_regex::uppercase(); 112 | /// assert!(regex_string.to_regex().is_match("[").not()); 113 | /// assert!(regex_string.to_regex().is_match("a").not()); 114 | /// assert!(regex_string.to_regex().is_match("A")); 115 | /// assert!(regex_string.to_regex().is_match("1").not()); 116 | /// assert!(regex_string.to_regex().is_match("¡").not()); 117 | /// assert!(regex_string.to_regex().is_match("!").not()); 118 | /// assert!(regex_string.to_regex().is_match(" ").not()); 119 | /// assert!(regex_string.to_regex().is_match("\n").not()); 120 | /// ``` 121 | pub fn uppercase() -> HumanRegex> { 122 | HumanRegex(r"[[:upper:]]".to_string(), pd::>) 123 | } 124 | 125 | /// A function to match any non-uppercase character (`[^A-Z]`) 126 | /// ``` 127 | /// use std::ops::Not; 128 | /// let regex_string = human_regex::non_uppercase(); 129 | /// assert!(regex_string.to_regex().is_match("[")); 130 | /// assert!(regex_string.to_regex().is_match("a")); 131 | /// assert!(regex_string.to_regex().is_match("A").not()); 132 | /// assert!(regex_string.to_regex().is_match("1")); 133 | /// assert!(regex_string.to_regex().is_match("¡")); 134 | /// assert!(regex_string.to_regex().is_match("!")); 135 | /// assert!(regex_string.to_regex().is_match(" ")); 136 | /// assert!(regex_string.to_regex().is_match("\n")); 137 | /// ``` 138 | pub fn non_uppercase() -> HumanRegex> { 139 | HumanRegex(r"[[:^upper:]]".to_string(), pd::>) 140 | } 141 | 142 | /// A function to match any digit that would appear in a hexadecimal number (`[A-Fa-f0-9]`) 143 | /// ``` 144 | /// use std::ops::Not; 145 | /// let regex_string = human_regex::hexdigit(); 146 | /// assert!(regex_string.to_regex().is_match("[").not()); 147 | /// assert!(regex_string.to_regex().is_match("a")); 148 | /// assert!(regex_string.to_regex().is_match("A")); 149 | /// assert!(regex_string.to_regex().is_match("g").not()); 150 | /// assert!(regex_string.to_regex().is_match("G").not()); 151 | /// assert!(regex_string.to_regex().is_match("1")); 152 | /// assert!(regex_string.to_regex().is_match("¡").not()); 153 | /// assert!(regex_string.to_regex().is_match("!").not()); 154 | /// assert!(regex_string.to_regex().is_match(" ").not()); 155 | /// assert!(regex_string.to_regex().is_match("\n").not()); 156 | /// ``` 157 | pub fn hexdigit() -> HumanRegex> { 158 | HumanRegex(r"[[:xdigit:]]".to_string(), pd::>) 159 | } 160 | 161 | /// A function to match any digit that wouldn't appear in a hexadecimal number (`[^A-Fa-f0-9]`) 162 | /// ``` 163 | /// use std::ops::Not; 164 | /// let regex_string = human_regex::non_hexdigit(); 165 | /// assert!(regex_string.to_regex().is_match("[")); 166 | /// assert!(regex_string.to_regex().is_match("a").not()); 167 | /// assert!(regex_string.to_regex().is_match("A").not()); 168 | /// assert!(regex_string.to_regex().is_match("g")); 169 | /// assert!(regex_string.to_regex().is_match("G")); 170 | /// assert!(regex_string.to_regex().is_match("1").not()); 171 | /// assert!(regex_string.to_regex().is_match("¡")); 172 | /// assert!(regex_string.to_regex().is_match("!")); 173 | /// assert!(regex_string.to_regex().is_match(" ")); 174 | /// assert!(regex_string.to_regex().is_match("\n")); 175 | /// ``` 176 | pub fn non_hexdigit() -> HumanRegex> { 177 | HumanRegex(r"[[:^xdigit:]]".to_string(), pd::>) 178 | } 179 | 180 | /// A function to match any ascii digit (`[\x00-\x7F]`) 181 | /// ``` 182 | /// use std::ops::Not; 183 | /// let regex_string = human_regex::ascii(); 184 | /// assert!(regex_string.to_regex().is_match("[")); 185 | /// assert!(regex_string.to_regex().is_match("a")); 186 | /// assert!(regex_string.to_regex().is_match("A")); 187 | /// assert!(regex_string.to_regex().is_match("1")); 188 | /// assert!(regex_string.to_regex().is_match("¡").not()); 189 | /// assert!(regex_string.to_regex().is_match("!")); 190 | /// assert!(regex_string.to_regex().is_match(" ")); 191 | /// assert!(regex_string.to_regex().is_match("\n")); 192 | /// ``` 193 | pub fn ascii() -> HumanRegex> { 194 | HumanRegex(r"[[:ascii:]]".to_string(), pd::>) 195 | } 196 | 197 | /// A function to match any non-ascii digit (`[^\x00-\x7F]`) 198 | /// ``` 199 | /// use std::ops::Not; 200 | /// let regex_string = human_regex::non_ascii(); 201 | /// assert!(regex_string.to_regex().is_match("[").not()); 202 | /// assert!(regex_string.to_regex().is_match("a").not()); 203 | /// assert!(regex_string.to_regex().is_match("G").not()); 204 | /// assert!(regex_string.to_regex().is_match("1").not()); 205 | /// assert!(regex_string.to_regex().is_match("¡")); 206 | /// assert!(regex_string.to_regex().is_match("!").not()); 207 | /// assert!(regex_string.to_regex().is_match(" ").not()); 208 | /// assert!(regex_string.to_regex().is_match("\n").not()); 209 | /// ``` 210 | pub fn non_ascii() -> HumanRegex> { 211 | HumanRegex(r"[[:^ascii:]]".to_string(), pd::>) 212 | } 213 | 214 | /// A function to match blank characters (`[\t ]`) 215 | /// ``` 216 | /// use std::ops::Not; 217 | /// let regex_string = human_regex::blank(); 218 | /// assert!(regex_string.to_regex().is_match("[").not()); 219 | /// assert!(regex_string.to_regex().is_match("a").not()); 220 | /// assert!(regex_string.to_regex().is_match("A").not()); 221 | /// assert!(regex_string.to_regex().is_match("1").not()); 222 | /// assert!(regex_string.to_regex().is_match("¡").not()); 223 | /// assert!(regex_string.to_regex().is_match("!").not()); 224 | /// assert!(regex_string.to_regex().is_match(" ")); 225 | /// assert!(regex_string.to_regex().is_match("\n").not()); 226 | /// ``` 227 | pub fn blank() -> HumanRegex> { 228 | HumanRegex(r"[[:blank:]]".to_string(), pd::>) 229 | } 230 | 231 | /// A function to match non-blank characters (`[^\t ]`) 232 | /// ``` 233 | /// use std::ops::Not; 234 | /// let regex_string = human_regex::non_blank(); 235 | /// assert!(regex_string.to_regex().is_match("[")); 236 | /// assert!(regex_string.to_regex().is_match("a")); 237 | /// assert!(regex_string.to_regex().is_match("A")); 238 | /// assert!(regex_string.to_regex().is_match("1")); 239 | /// assert!(regex_string.to_regex().is_match("¡")); 240 | /// assert!(regex_string.to_regex().is_match("!")); 241 | /// assert!(regex_string.to_regex().is_match(" ").not()); 242 | /// assert!(regex_string.to_regex().is_match("\n")); 243 | /// ``` 244 | pub fn non_blank() -> HumanRegex> { 245 | HumanRegex(r"[[:^blank:]]".to_string(), pd::>) 246 | } 247 | 248 | /// A function to match control characters (`[\x00-\x1F\x7F]`) 249 | /// ``` 250 | /// use std::ops::Not; 251 | /// let regex_string = human_regex::control(); 252 | /// assert!(regex_string.to_regex().is_match("[").not()); 253 | /// assert!(regex_string.to_regex().is_match("a").not()); 254 | /// assert!(regex_string.to_regex().is_match("A").not()); 255 | /// assert!(regex_string.to_regex().is_match("1").not()); 256 | /// assert!(regex_string.to_regex().is_match("¡").not()); 257 | /// assert!(regex_string.to_regex().is_match("!").not()); 258 | /// assert!(regex_string.to_regex().is_match(" ").not()); 259 | /// assert!(regex_string.to_regex().is_match("\n")); 260 | /// ``` 261 | pub fn control() -> HumanRegex> { 262 | HumanRegex(r"[[:cntrl:]]".to_string(), pd::>) 263 | } 264 | 265 | /// A function to match non-control characters (`[^\x00-\x1F\x7F]`) 266 | /// ``` 267 | /// use std::ops::Not; 268 | /// let regex_string = human_regex::non_control(); 269 | /// assert!(regex_string.to_regex().is_match("[")); 270 | /// assert!(regex_string.to_regex().is_match("a")); 271 | /// assert!(regex_string.to_regex().is_match("A")); 272 | /// assert!(regex_string.to_regex().is_match("1")); 273 | /// assert!(regex_string.to_regex().is_match("¡")); 274 | /// assert!(regex_string.to_regex().is_match("!")); 275 | /// assert!(regex_string.to_regex().is_match(" ")); 276 | /// assert!(regex_string.to_regex().is_match("\n").not()); 277 | /// ``` 278 | pub fn non_control() -> HumanRegex> { 279 | HumanRegex(r"[[:^cntrl:]]".to_string(), pd::>) 280 | } 281 | 282 | /// A function to match graphical characters (`[!-~]`) 283 | /// ``` 284 | /// use std::ops::Not; 285 | /// let regex_string = human_regex::graphical(); 286 | /// assert!(regex_string.to_regex().is_match("[")); 287 | /// assert!(regex_string.to_regex().is_match("a")); 288 | /// assert!(regex_string.to_regex().is_match("A")); 289 | /// assert!(regex_string.to_regex().is_match("1")); 290 | /// assert!(regex_string.to_regex().is_match("¡").not()); 291 | /// assert!(regex_string.to_regex().is_match("!")); 292 | /// assert!(regex_string.to_regex().is_match(" ").not()); 293 | /// assert!(regex_string.to_regex().is_match("\n").not()); 294 | /// ``` 295 | pub fn graphical() -> HumanRegex> { 296 | HumanRegex(r"[[:graph:]]".to_string(), pd::>) 297 | } 298 | 299 | /// A function to match non-graphical characters (`[^!-~]`) 300 | /// ``` 301 | /// use std::ops::Not; 302 | /// let regex_string = human_regex::non_graphical(); 303 | /// assert!(regex_string.to_regex().is_match("[").not()); 304 | /// assert!(regex_string.to_regex().is_match("a").not()); 305 | /// assert!(regex_string.to_regex().is_match("A").not()); 306 | /// assert!(regex_string.to_regex().is_match("1").not()); 307 | /// assert!(regex_string.to_regex().is_match("¡")); 308 | /// assert!(regex_string.to_regex().is_match("!").not()); 309 | /// assert!(regex_string.to_regex().is_match(" ")); 310 | /// assert!(regex_string.to_regex().is_match("\n")); 311 | /// ``` 312 | pub fn non_graphical() -> HumanRegex> { 313 | HumanRegex(r"[[:^graph:]]".to_string(), pd::>) 314 | } 315 | 316 | /// A function to match printable characters (`[ -~]`) 317 | /// ``` 318 | /// use std::ops::Not; 319 | /// let regex_string = human_regex::printable(); 320 | /// assert!(regex_string.to_regex().is_match("[")); 321 | /// assert!(regex_string.to_regex().is_match("a")); 322 | /// assert!(regex_string.to_regex().is_match("A")); 323 | /// assert!(regex_string.to_regex().is_match("1")); 324 | /// assert!(regex_string.to_regex().is_match("¡").not()); 325 | /// assert!(regex_string.to_regex().is_match("!")); 326 | /// assert!(regex_string.to_regex().is_match(" ")); 327 | /// assert!(regex_string.to_regex().is_match("\n").not()); 328 | /// ``` 329 | pub fn printable() -> HumanRegex> { 330 | HumanRegex(r"[[:print:]]".to_string(), pd::>) 331 | } 332 | 333 | /// A function to match unprintable characters (`[^ -~]`) 334 | /// ``` 335 | /// use std::ops::Not; 336 | /// let regex_string = human_regex::non_printable(); 337 | /// assert!(regex_string.to_regex().is_match("[").not()); 338 | /// assert!(regex_string.to_regex().is_match("a").not()); 339 | /// assert!(regex_string.to_regex().is_match("A").not()); 340 | /// assert!(regex_string.to_regex().is_match("1").not()); 341 | /// assert!(regex_string.to_regex().is_match("¡")); 342 | /// assert!(regex_string.to_regex().is_match("!").not()); 343 | /// assert!(regex_string.to_regex().is_match(" ").not()); 344 | /// assert!(regex_string.to_regex().is_match("\n")); 345 | /// ``` 346 | pub fn non_printable() -> HumanRegex> { 347 | HumanRegex(r"[[:^print:]]".to_string(), pd::>) 348 | } 349 | 350 | /// A function to match punctuation (`[!-/:-@\[-`{-~]`) 351 | /// ``` 352 | /// use std::ops::Not; 353 | /// let regex_string = human_regex::punctuation(); 354 | /// assert!(regex_string.to_regex().is_match("[")); 355 | /// assert!(regex_string.to_regex().is_match("a").not()); 356 | /// assert!(regex_string.to_regex().is_match("A").not()); 357 | /// assert!(regex_string.to_regex().is_match("1").not()); 358 | /// assert!(regex_string.to_regex().is_match("¡").not()); 359 | /// assert!(regex_string.to_regex().is_match("!")); 360 | /// assert!(regex_string.to_regex().is_match(" ").not()); 361 | /// assert!(regex_string.to_regex().is_match("\n").not()); 362 | /// ``` 363 | pub fn punctuation() -> HumanRegex> { 364 | HumanRegex(r"[[:punct:]]".to_string(), pd::>) 365 | } 366 | 367 | /// A function to match non-punctuation (`[^!-/:-@\[-`{-~]`) 368 | /// ``` 369 | /// use std::ops::Not; 370 | /// let regex_string = human_regex::non_punctuation(); 371 | /// assert!(regex_string.to_regex().is_match("[").not()); 372 | /// assert!(regex_string.to_regex().is_match("a")); 373 | /// assert!(regex_string.to_regex().is_match("A")); 374 | /// assert!(regex_string.to_regex().is_match("1")); 375 | /// assert!(regex_string.to_regex().is_match("¡")); 376 | /// assert!(regex_string.to_regex().is_match("!").not()); 377 | /// assert!(regex_string.to_regex().is_match(" ")); 378 | /// assert!(regex_string.to_regex().is_match("\n")); 379 | /// ``` 380 | pub fn non_punctuation() -> HumanRegex> { 381 | HumanRegex(r"[[:^punct:]]".to_string(), pd::>) 382 | } 383 | --------------------------------------------------------------------------------