├── .clang-tidy ├── .github └── workflows │ └── build.yml ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── example ├── CMakeLists.txt ├── basic.cpp ├── extracting_captures.cpp ├── forward_list.cpp ├── optional.cpp ├── regex_range.cpp ├── regex_searcher.cpp ├── structured_bindings.cpp ├── url_parsing.cpp └── using_expressions.cpp ├── include ├── mregex.hpp └── mregex │ ├── ast │ ├── ast.hpp │ ├── astfwd.hpp │ ├── build.hpp │ ├── indexing.hpp │ ├── inversion.hpp │ ├── match_result.hpp │ ├── nodes │ │ ├── alternation.hpp │ │ ├── assertion.hpp │ │ ├── capture.hpp │ │ ├── lookaround.hpp │ │ ├── negated.hpp │ │ ├── quantifiers │ │ │ ├── fixed_repetition.hpp │ │ │ └── repetition.hpp │ │ ├── sequence.hpp │ │ ├── set.hpp │ │ ├── terminals │ │ │ ├── anchors.hpp │ │ │ ├── backref.hpp │ │ │ ├── empty.hpp │ │ │ ├── literal.hpp │ │ │ ├── range.hpp │ │ │ └── wildcard.hpp │ │ └── trivially_matchable.hpp │ ├── predicates │ │ ├── anchor_predicates.hpp │ │ ├── lookaround_predicate.hpp │ │ └── negate.hpp │ ├── traits.hpp │ └── transform.hpp │ ├── grammar │ ├── actions.hpp │ ├── backrefs.hpp │ ├── escapes.hpp │ ├── grammar.hpp │ ├── hexadecimal.hpp │ ├── names.hpp │ └── quantifiers.hpp │ ├── match_result.hpp │ ├── match_result_generator.hpp │ ├── parser │ ├── lexer.hpp │ ├── parser.hpp │ └── state.hpp │ ├── regex.hpp │ ├── regex_capture.hpp │ ├── regex_capture_storage.hpp │ ├── regex_flags.hpp │ ├── regex_match_context.hpp │ ├── regex_methods.hpp │ ├── regex_traits.hpp │ ├── symbols │ ├── actions.hpp │ ├── core.hpp │ ├── names.hpp │ └── quantifiers.hpp │ ├── utility │ ├── char_traits.hpp │ ├── concepts.hpp │ ├── continuations.hpp │ ├── distance.hpp │ ├── enums.hpp │ ├── input_range_adapter.hpp │ ├── reversed_range_view.hpp │ ├── static_string.hpp │ ├── tuple.hpp │ ├── type_sequence.hpp │ └── type_traits.hpp │ └── xpr │ ├── adapters.hpp │ ├── components.hpp │ ├── flags.hpp │ ├── operators.hpp │ └── providers.hpp └── test ├── .clang-tidy ├── CMakeLists.txt ├── ast_indexing_tests.cpp ├── ast_inversion_tests.cpp ├── ast_traits_tests.cpp ├── iterator_api_tests.cpp ├── match_result_tests.cpp ├── parser_accept_tests.cpp ├── parser_ast_tests.cpp ├── parser_error_tests.cpp ├── regex_iteration_tests.cpp ├── regex_match_tests.cpp ├── regex_search_tests.cpp ├── regex_traits_tests.cpp ├── static_string_tests.cpp ├── symbol_tests.cpp ├── tests.cpp ├── type_sequence_tests.cpp ├── type_traits_tests.cpp └── xpr_ast_tests.cpp /.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: 2 | abseil-no-namespace, 3 | bugprone-*, 4 | clang-analyzer-cplusplus.*, 5 | clang-analyzer-deadcode.*, 6 | clang-analyzer-nullability.*, 7 | -cert-dcl21-cpp, 8 | google-default-arguments, 9 | google-runtime-operator, 10 | misc-*, 11 | -misc-forwarding-reference-overload, 12 | -misc-non-private-member-variables-in-classes, 13 | modernize-*, 14 | -modernize-avoid-c-arrays, 15 | -modernize-use-trailing-return-type, 16 | -modernize-use-nodiscard, 17 | performance-*, 18 | readability-*, 19 | -readability-braces-around-statements, 20 | -readability-identifier-length, 21 | -readability-named-parameter, 22 | -readability-uppercase-literal-suffix 23 | FormatStyle: file 24 | CheckOptions: 25 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case } 26 | - { key: readability-identifier-naming.ClassCase, value: lower_case } 27 | - { key: readability-identifier-naming.StructCase, value: lower_case } 28 | - { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase } 29 | - { key: readability-identifier-naming.FunctionCase, value: lower_case } 30 | - { key: readability-identifier-naming.VariableCase, value: lower_case } 31 | - { key: readability-identifier-naming.PrivateMemberPrefix, value: _ } 32 | - { key: readability-identifier-naming.ProtectedMemberPrefix, value: _ } 33 | - { key: readability-identifier-naming.MacroDefinitionCase, value: UPPER_CASE } 34 | - { key: readability-identifier-naming.EnumConstantCase, value: lower_case } 35 | - { key: readability-identifier-naming.ConstexprVariableCase, value: lower_case } 36 | - { key: readability-identifier-naming.GlobalConstantCase, value: lower_case } 37 | - { key: readability-identifier-naming.MemberConstantCase, value: lower_case } 38 | - { key: readability-identifier-naming.StaticConstantCase, value: lower_case } 39 | - { key: readability-magic-numbers.IgnoredIntegerValues, value: 0;1;10 } 40 | - { key: readability-magic-numbers.IgnorePowersOf2IntegerValues, value: 1 } -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | on: [ push, pull_request ] 2 | name: build 3 | jobs: 4 | build: 5 | strategy: 6 | matrix: 7 | compiler: [ "gcc", "clang" ] 8 | include: 9 | - compiler: "gcc" 10 | CXX: "g++-11" 11 | - compiler: "clang" 12 | CXX: "clang++-13" 13 | runs-on: ubuntu-latest 14 | env: 15 | CXX: ${{ matrix.CXX }} 16 | 17 | steps: 18 | - name: Checkout pushed code 19 | uses: actions/checkout@v2 20 | 21 | - name: Install Clang 22 | if: ${{ matrix.compiler == 'clang' }} 23 | run: | 24 | printf "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee /etc/apt/sources.list.d/llvm-toolchain-focal-13.list 25 | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - 26 | sudo apt update 27 | sudo apt install llvm-13 28 | $CXX --version 29 | 30 | - name: Install GCC 31 | if: ${{ matrix.compiler == 'gcc' }} 32 | run: | 33 | sudo apt-get update 34 | sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test 35 | sudo apt install -y g++-11 36 | $CXX --version 37 | 38 | - name: Install CMake 39 | run: | 40 | sudo apt install cmake 41 | cmake --version 42 | 43 | - name: Run CMake 44 | run: cmake -S . -B build -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release 45 | 46 | - name: Build project 47 | run: cmake --build build --target all --config Release -- -j4 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .idea -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | dist: focal 3 | os: linux 4 | jobs: 5 | include: 6 | - compiler: gcc 7 | env: CXX_COMPILER=g++-11 8 | addons: 9 | apt: 10 | packages: 11 | - g++-11 12 | 13 | - compiler: clang 14 | env: CXX_COMPILER=clang++-12 15 | addons: 16 | apt: 17 | packages: 18 | - clang-12 19 | - g++-11 20 | sources: 21 | - sourceline: 'deb https://apt.llvm.org/focal/ llvm-toolchain-focal-12 main' 22 | key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' 23 | 24 | before_install: 25 | - sudo apt-get update 26 | - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test 27 | 28 | install: 29 | - sudo apt install -y g++-11 30 | - sudo apt install cmake 31 | - cmake --version 32 | - export CXX=${CXX_COMPILER} 33 | - $CXX --version 34 | 35 | script: 36 | - cmake -S . -B build -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release 37 | - cmake --build build --target all --config Release -- -j4 -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | project(mregex VERSION 2.0 LANGUAGES CXX) 3 | 4 | set(PROJECT_DESCRIPTION "Metaprogramming-based, compile-time implementation of regular expressions in C++20.") 5 | set(CMAKE_CXX_STANDARD 20) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | 8 | # Compiler options 9 | if(MSVC) 10 | add_compile_options(/W4) 11 | if(CMAKE_BUILD_TYPE MATCHES Debug) 12 | add_compile_options(/Od) 13 | else() 14 | add_compile_options(/O2) 15 | endif() 16 | else() 17 | add_compile_options(-Wall -Wextra -Wpedantic -Wuninitialized -Wshadow -Wunused) 18 | if(CMAKE_BUILD_TYPE MATCHES Debug) 19 | add_compile_options(-Og) 20 | else() 21 | add_compile_options(-O3) 22 | endif() 23 | endif() 24 | 25 | # Main library target 26 | add_library(mregex INTERFACE) 27 | target_include_directories(mregex INTERFACE include/) 28 | 29 | # Building tests 30 | option(MREGEX_BUILD_TESTS "build tests" ON) 31 | if(${MREGEX_BUILD_TESTS}) 32 | add_subdirectory(test/) 33 | endif() 34 | 35 | # Building examples 36 | option(MREGEX_BUILD_EXAMPLES "build examples" ON) 37 | if(${MREGEX_BUILD_EXAMPLES}) 38 | add_subdirectory(example/) 39 | endif() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2024 Iulian Rusu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(mregex_add_example filename) 2 | set(target_name example_${filename}) 3 | add_executable(${target_name} ${filename}.cpp) 4 | target_link_libraries(${target_name} PUBLIC mregex) 5 | set_property(TARGET ${target_name} PROPERTY CXX_STANDARD 20) 6 | endfunction() 7 | 8 | mregex_add_example(basic) 9 | mregex_add_example(extracting_captures) 10 | mregex_add_example(forward_list) 11 | mregex_add_example(optional) 12 | mregex_add_example(regex_range) 13 | mregex_add_example(regex_searcher) 14 | mregex_add_example(structured_bindings) 15 | mregex_add_example(url_parsing) 16 | mregex_add_example(using_expressions) -------------------------------------------------------------------------------- /example/basic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | // Every regex is mapped to a C++ type 7 | using my_regex = meta::regex<"ab+">; 8 | 9 | auto match = my_regex::match("abbbb"); 10 | std::cout << (match ? "Matched!" : "Not matched") << '\n'; 11 | } -------------------------------------------------------------------------------- /example/extracting_captures.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | // This regular expression has 3 capturing groups: 1 and 2 are unnamed, while 3 is named "country_name" 7 | using my_regex = meta::regex\w+);)">; 8 | 9 | std::string_view str = "Name: John, age: 42, country: Australia;"; 10 | if (auto result = my_regex::match(str)) 11 | { 12 | // The group<>() method returns a special object that contains the captured content 13 | auto name = result.group<1>(); 14 | // Regex captures are also implicitly convertible to std::string_view (when using contiguous iterators) 15 | std::string_view age = result.group<2>(); 16 | // Named capturing groups can be accessed either by index or by their name 17 | auto country = result.group<"country_name">(); 18 | std::cout << name << ", " << age << ", lives in " << country << '\n'; 19 | } 20 | } -------------------------------------------------------------------------------- /example/forward_list.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() 6 | { 7 | using my_regex = meta::regex[A-Z]++)\]: (?.++))">; 8 | 9 | // Matching can be done on any forward range that contains char-like elements 10 | // In this example, matching is done on a linked list 11 | std::string_view message = "[ERROR]: Segmentation fault (core dumped)"; 12 | std::forward_list list{message.begin(), message.end()}; 13 | 14 | // The result type of matching can be named explicitly using this type alias 15 | using result_type = meta::match_result_view::const_iterator>; 16 | 17 | result_type result = my_regex::match(list); 18 | std::cout << "Log level: " << result.group<"level">() << '\n'; 19 | std::cout << "Message: '" << result.group<"msg">() << "'\n"; 20 | } -------------------------------------------------------------------------------- /example/optional.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace detail 6 | { 7 | // Until we get std::optional::transform in C++23, we have to implement this manually 8 | template 9 | constexpr auto transform(Opt &&opt, Func &&func) 10 | { 11 | using result_type = std::remove_reference_t(func), *std::forward(opt)))>; 12 | 13 | if (!opt.has_value()) 14 | return std::optional{std::nullopt}; 15 | return std::optional{std::invoke(std::forward(func), *std::forward(opt))}; 16 | } 17 | } 18 | 19 | using my_regex = meta::regex[A-Z][a-z]+))">; 20 | 21 | constexpr auto try_parse_day(std::string_view text) -> std::optional 22 | { 23 | // Regex results can be converted to std::optional 24 | // This makes it easier to use standard interface methods like transform, and_then, or_else etc. 25 | return detail::transform( 26 | my_regex::match(text).as_optional(), 27 | meta::get_group_named<"day"> 28 | ); 29 | } 30 | 31 | int main() 32 | { 33 | if (auto opt_match = try_parse_day("Today is Monday")) 34 | std::cout << "Matched day: " << *opt_match << '\n'; 35 | else 36 | std::cout << "Not matched :(\n"; 37 | } -------------------------------------------------------------------------------- /example/regex_range.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | using word_regex = meta::regex; 7 | 8 | // The find_all() method returns an input range that lazily generates matches 9 | std::cout << "With find_all():\n"; 10 | for (auto &&word : word_regex::find_all("Find all word-like sequences in this string!")) 11 | std::cout << word << '\n'; 12 | 13 | // Tokenizing stops at the first non-match position (in this case, after matching "Find") 14 | std::cout << "\nWith tokenize():\n"; 15 | for (auto &&token : word_regex::tokenize("Find all word-like sequences in this string!")) 16 | std::cout << token << '\n'; 17 | } -------------------------------------------------------------------------------- /example/regex_searcher.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | // This regex matches words with 4 or 5 letters 9 | using my_regex = meta::regex; 10 | 11 | std::string_view text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; 12 | constexpr auto word_count = 5; 13 | std::vector words(word_count); 14 | // Fills the vector with the generated matches 15 | // When using contiguous iterators, captures can be implicitly converted to std::string_view 16 | std::generate(words.begin(), words.end(), my_regex::searcher(text)); 17 | for (auto &&word : words) 18 | std::cout << word << '\n'; 19 | } -------------------------------------------------------------------------------- /example/structured_bindings.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | // This regex has 4 capturing groups (counting the implicit group 0) 7 | // Its match result can be decomposed into 4 elements 8 | using date_regex = meta::regex; 9 | 10 | auto const get_date_info = []() -> meta::match_result { 11 | std::string date = "Today is 07/08/2021"; 12 | // By default, all methods return results that have views into the original input string 13 | // In this case, the lifetime of the original std::string is too short 14 | // By calling as_memory_owner(), a deep copy of all captures is performed (with heap allocations) 15 | return date_regex::search(date).as_memory_owner(); 16 | }; 17 | // Each variable binds to one capturing group 18 | auto [_, day, month, year] = get_date_info(); 19 | std::cout << "Day: " << day << "\nMonth: " << month << "\nYear: " << year << '\n'; 20 | } -------------------------------------------------------------------------------- /example/url_parsing.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | using url_regex = meta::regex; 7 | 8 | // Matching can also be done at compile-time 9 | constexpr std::string_view url = "https://username:password@hostname.com:8080/path/to/resource?id=12345"; 10 | constexpr auto result = url_regex::match(url); 11 | // The matched() method can be used to check if the regex has matched 12 | std::cout << "Matched: " << result.matched() << '\n'; 13 | // Captured content can be accessed with the group<>() method 14 | // Group 0 is the entire regex, other captures are indexed from 1 15 | std::cout << "Schema: " << result.group<1>() << '\n'; 16 | std::cout << "User: " << result.group<2>() << '\n'; 17 | std::cout << "Pass: " << result.group<3>() << '\n'; 18 | std::cout << "Host: " << result.group<4>() << '\n'; 19 | std::cout << "Port: " << result.group<5>() << '\n'; 20 | std::cout << "Path: " << result.group<6>() << '\n'; 21 | std::cout << "Query: " << result.group<7>() << "\n"; 22 | } -------------------------------------------------------------------------------- /example/using_expressions.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Alias for convenience 5 | namespace xpr = meta::xpr; 6 | 7 | int main() 8 | { 9 | /** 10 | * Instead of defining the entire regex as a string, we can build it from smaller components, like a C++ expression. 11 | * 12 | * This example will use overloaded operators to replace standard regex operators. 13 | * You can also use functions like xpr::concat() or xpr::either() to achieve the same effect. 14 | * 15 | * Concatenation is done with the >> operator. 16 | * Alternation is done with the | operator. 17 | * The Kleene star and plus quantifiers are achieved with unary * and + operators. 18 | * Negation of character classes and lookarounds is done with the ! operator. 19 | */ 20 | 21 | // Opting in for overloaded operators is done by importing the namespace xpr::operators 22 | using namespace xpr::operators; 23 | 24 | // Constructing a regex equivalent to '(?:ftp|ssh|https?)://(?:[-.a-z])+.com(?:/\S*)?' 25 | auto schema = xpr::str<"ftp"> | xpr::str<"ssh"> | xpr::regex<"https?">; 26 | auto domain = +xpr::regex<"[-.a-z]"> >> xpr::str<".com">; 27 | auto path = xpr::chr<'/'> >> *!xpr::whitespace; 28 | auto url = schema >> xpr::str<"://"> >> xpr::capture<1, "domain">(domain) >> xpr::maybe(path); 29 | 30 | if (auto match = url.match("https://google.com")) 31 | std::cout << "Matched domain: " << match.group<"domain">() << '\n'; 32 | else 33 | std::cout << "Not matched :(\n"; 34 | } -------------------------------------------------------------------------------- /include/mregex.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_HPP 2 | #define MREGEX_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #endif //MREGEX_HPP -------------------------------------------------------------------------------- /include/mregex/ast/ast.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_HPP 2 | #define MREGEX_AST_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #endif //MREGEX_AST_HPP -------------------------------------------------------------------------------- /include/mregex/ast/astfwd.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_ASTFWD_HPP 2 | #define MREGEX_AST_ASTFWD_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::ast 9 | { 10 | template 11 | struct sequence; 12 | 13 | template 14 | struct alternation; 15 | 16 | template 17 | struct capture; 18 | 19 | template 20 | using named_capture = capture, Inner>; 21 | 22 | template 23 | using unnamed_capture = capture; 24 | 25 | // Repetition 26 | template 27 | struct basic_repetition; 28 | 29 | template 30 | using repetition = basic_repetition; 31 | 32 | template 33 | using lazy_repetition = basic_repetition; 34 | 35 | template 36 | using possessive_repetition = basic_repetition; 37 | 38 | // Fixed repetition 39 | template 40 | using basic_fixed_repetition = basic_repetition, symbol::quantifier_value, Inner>; 41 | 42 | template 43 | using fixed_repetition = basic_fixed_repetition; 44 | 45 | template 46 | using lazy_fixed_repetition = basic_fixed_repetition; 47 | 48 | template 49 | using possessive_fixed_repetition = basic_fixed_repetition; 50 | 51 | template 52 | using atomic = possessive_fixed_repetition<1, Inner>; 53 | 54 | // Kleene star 55 | template 56 | using basic_star = basic_repetition, symbol::infinity, Inner>; 57 | 58 | template 59 | using star = basic_star; 60 | 61 | template 62 | using lazy_star = basic_star; 63 | 64 | template 65 | using possessive_star = basic_star; 66 | 67 | // Plus 68 | template 69 | using basic_plus = basic_repetition, symbol::infinity, Inner>; 70 | 71 | template 72 | using plus = basic_plus; 73 | 74 | template 75 | using lazy_plus = basic_plus; 76 | 77 | template 78 | using possessive_plus = basic_plus; 79 | 80 | // Optional 81 | template 82 | using basic_optional = basic_repetition, symbol::quantifier_value<1>, Inner>; 83 | 84 | template 85 | using optional = basic_optional; 86 | 87 | template 88 | using lazy_optional = basic_optional; 89 | 90 | template 91 | using possessive_optional = basic_optional; 92 | 93 | template 94 | struct set; 95 | 96 | template 97 | struct negated; 98 | 99 | // Terminals 100 | struct empty; 101 | struct beginning_of_line; 102 | struct beginning_of_input; 103 | struct end_of_line; 104 | struct end_of_input; 105 | struct word_boundary; 106 | struct wildcard; 107 | 108 | template 109 | struct literal; 110 | 111 | template 112 | struct range; 113 | 114 | using lower = range<'a', 'z'>; 115 | using upper = range<'A', 'Z'>; 116 | using digit = range<'0', '9'>; 117 | 118 | using nothing = set<>; 119 | using word = set>; 120 | using linebreak = set, literal<'\r'>>; 121 | using whitespace = set, literal<'\t'>, literal<'\v'>, literal<'\f'>>; 122 | 123 | template 124 | struct backref; 125 | 126 | template 127 | struct named_backref; 128 | 129 | // Lookarounds 130 | template 131 | struct lookaround; 132 | 133 | template 134 | using positive_lookahead = lookaround; 135 | 136 | template 137 | using negative_lookahead = negated>; 138 | 139 | template 140 | using positive_lookbehind = lookaround; 141 | 142 | template 143 | using negative_lookbehind = negated>; 144 | } 145 | #endif //MREGEX_AST_ASTFWD_HPP -------------------------------------------------------------------------------- /include/mregex/ast/build.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_BUILD_HPP 2 | #define MREGEX_AST_BUILD_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace meta::ast 11 | { 12 | /** 13 | * Metafunction used to build the Abstract Syntax Tree. 14 | * 15 | * @tparam Symbol The current symbol on the parsing stack 16 | * @tparam Token The current token being parsed 17 | * @tparam Nodes The stack with the AST nodes 18 | */ 19 | template 20 | struct build; 21 | 22 | template 23 | using build_t = typename build::type; 24 | 25 | template 26 | struct build, Nodes> 27 | { 28 | using type = push_t>; 29 | }; 30 | 31 | template 32 | struct build, Token, Nodes> 33 | { 34 | using type = push_t>; 35 | }; 36 | 37 | template 38 | struct build 39 | { 40 | using type = push_t; 41 | }; 42 | 43 | template 44 | struct build 45 | { 46 | using type = push_t; 47 | }; 48 | 49 | template 50 | struct build 51 | { 52 | using type = push_t; 53 | }; 54 | 55 | template 56 | struct build 57 | { 58 | using type = push_t; 59 | }; 60 | 61 | template 62 | struct build 63 | { 64 | using type = push_t; 65 | }; 66 | 67 | template 68 | struct build 69 | { 70 | using type = push_t; 71 | }; 72 | 73 | template 74 | struct build 75 | { 76 | using type = push_t; 77 | }; 78 | 79 | template 80 | struct build 81 | { 82 | using type = push_t; 83 | }; 84 | 85 | template 86 | struct build 87 | { 88 | using type = push_t; 89 | }; 90 | 91 | template 92 | struct build 93 | { 94 | using type = push_t; 95 | }; 96 | 97 | template 98 | struct build 99 | { 100 | using type = push_t; 101 | }; 102 | 103 | template 104 | struct build, Token, type_sequence> 105 | { 106 | using type = type_sequence, Rest ...>; 107 | }; 108 | 109 | template 110 | struct build> 111 | { 112 | using type = type_sequence, Rest ...>; 113 | }; 114 | 115 | // Combine any two non-sequence symbols into a sequence 116 | template 117 | struct build> 118 | { 119 | using type = type_sequence, Rest ...>; 120 | }; 121 | 122 | // Add current symbol to previous sequence 123 | template 124 | struct build, Rest ...>> 125 | { 126 | using type = type_sequence, Rest ...>; 127 | }; 128 | 129 | // Add previous symbol to current sequence 130 | template 131 | struct build, Second, Rest ...>> 132 | { 133 | using type = type_sequence, Rest ...>; 134 | }; 135 | 136 | // Concatenate two sequences 137 | template 138 | struct build, sequence, Rest ...>> 139 | { 140 | using type = type_sequence, Rest ...>; 141 | }; 142 | 143 | template 144 | struct build> 145 | { 146 | using type = type_sequence, Rest ...>; 147 | }; 148 | 149 | template 150 | struct build, Rest ...>> 151 | { 152 | using type = type_sequence, Rest ...>; 153 | }; 154 | 155 | template 156 | struct build, Token, type_sequence> 157 | { 158 | static constexpr std::size_t group_id = (capture_count + ... + capture_count) + 1; 159 | 160 | using type = type_sequence, Rest ...>; 161 | }; 162 | 163 | template 164 | struct build, Token, type_sequence> 165 | { 166 | using type = type_sequence, Nodes ...>; 167 | }; 168 | 169 | template 170 | struct build, Token, type_sequence> 171 | { 172 | using type = type_sequence, Nodes ...>; 173 | }; 174 | 175 | template 176 | struct build, Token, type_sequence> 177 | { 178 | using type = type_sequence, Rest ...>; 179 | }; 180 | 181 | // Set building rules 182 | template 183 | struct build> 184 | { 185 | using type = type_sequence, Nodes ...>; 186 | }; 187 | 188 | // Create set from current character 189 | template 190 | struct build, type_sequence> 191 | { 192 | using type = type_sequence>, Nodes ...>; 193 | }; 194 | 195 | template 196 | struct build, type_sequence, Rest ...>> 197 | { 198 | using type = type_sequence, First ...>, Rest ...>; 199 | }; 200 | 201 | // Create set from the last generated AST node 202 | template 203 | struct build> 204 | { 205 | using type = type_sequence, Rest ...>; 206 | }; 207 | 208 | template 209 | struct build, Rest ...>> 210 | { 211 | using type = type_sequence, Rest ...>; 212 | }; 213 | 214 | template 215 | struct build, set, Rest ...>> 216 | { 217 | using type = type_sequence, Rest ...>; 218 | }; 219 | 220 | // Create a range inside the set 221 | template 222 | struct build, type_sequence, Second ...>, Rest ...>> 223 | { 224 | using type = type_sequence, Second ...>, Rest ...>; 225 | }; 226 | 227 | template 228 | struct build, type_sequence, Rest ...>> 229 | { 230 | using type = type_sequence, literal<'-'>, First, Second ...>, Rest ...>; 231 | }; 232 | 233 | // Create a range from two last generated AST nodes in the set 234 | template 235 | struct build, literal, Second ...>, Rest ...>> 236 | { 237 | using type = type_sequence, Second ...>, Rest ...>; 238 | }; 239 | 240 | template 241 | struct build, First, Second ...>, Rest ...>> 242 | { 243 | using type = type_sequence, literal<'-'>, First, Second ...>, Rest ...>; 244 | }; 245 | } 246 | #endif //MREGEX_AST_BUILD_HPP -------------------------------------------------------------------------------- /include/mregex/ast/indexing.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_INDEXING_HPP 2 | #define MREGEX_AST_INDEXING_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | namespace detail 12 | { 13 | template 14 | struct capture_index_offsets; 15 | 16 | template 17 | using capture_index_offsets_t = typename capture_index_offsets::type; 18 | 19 | template 20 | struct capture_index_offsets 21 | { 22 | using type = std::index_sequence<>; 23 | }; 24 | 25 | template 26 | struct capture_index_offsets 27 | { 28 | using rest_offsets = capture_index_offsets_t; 29 | using type = decltype([](std::index_sequence) { 30 | return std::index_sequence{}; 31 | }(rest_offsets{})); 32 | }; 33 | 34 | template 35 | struct capture_indexer 36 | { 37 | template 38 | struct index : transform {}; 39 | 40 | template 41 | requires (capture_count == 0) 42 | struct index 43 | { 44 | using type = Node; 45 | }; 46 | 47 | template 48 | struct index> 49 | { 50 | using indexed_inner_type = typename capture_indexer::template index_t; 51 | using type = capture; 52 | }; 53 | 54 | template 55 | struct index> 56 | { 57 | using inner_offsets = capture_index_offsets_t ...>; 58 | using type = decltype([](std::index_sequence) { 59 | return sequence::template index::type ...>{}; 60 | }(inner_offsets{})); 61 | }; 62 | 63 | template 64 | using index_t = typename index::type; 65 | }; 66 | } 67 | 68 | /** 69 | * Metafunction that indexes capturing groups according to Preorder AST traversal. 70 | * During expression parsing and initial AST generation, capturing groups are 71 | * assigned IDs according to Postorder traversal. This is due to the stack-like 72 | * behavior of the parsing algorithm. 73 | * 74 | * @tparam Node The AST node type being indexed 75 | */ 76 | template 77 | using index_captures_t = typename detail::capture_indexer<1>::template index_t; 78 | } 79 | #endif //MREGEX_AST_INDEXING_HPP -------------------------------------------------------------------------------- /include/mregex/ast/inversion.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_INVERSION_HPP 2 | #define MREGEX_AST_INVERSION_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::ast 9 | { 10 | namespace detail 11 | { 12 | template 13 | constexpr auto to_sequence(type_sequence) noexcept -> sequence { return {}; } 14 | } 15 | 16 | /** 17 | * Metafunction that inverts (mirrors) the AST at compile-time. This is required to implement 18 | * lookbehind assertions which need to match a regular expression backwards. 19 | * 20 | * @tparam Node The AST node type being inverted 21 | */ 22 | template 23 | struct invert : transform {}; 24 | 25 | template 26 | using invert_t = typename invert::type; 27 | 28 | template 29 | struct invert> 30 | { 31 | using type = decltype(detail::to_sequence(reverse_t ...>>{})); 32 | }; 33 | 34 | template 35 | struct invert> 36 | { 37 | using type = positive_lookbehind; 38 | }; 39 | 40 | template 41 | struct invert> 42 | { 43 | using type = negative_lookbehind; 44 | }; 45 | 46 | template 47 | struct invert> 48 | { 49 | using type = positive_lookahead; 50 | }; 51 | 52 | template 53 | struct invert> 54 | { 55 | using type = negative_lookahead; 56 | }; 57 | } 58 | #endif //MREGEX_AST_INVERSION_HPP -------------------------------------------------------------------------------- /include/mregex/ast/match_result.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_MATCH_RESULT_HPP 2 | #define MREGEX_AST_MATCH_RESULT_HPP 3 | 4 | #include 5 | 6 | namespace meta::ast 7 | { 8 | /** 9 | * Data structure returned by functions that match AST nodes. 10 | */ 11 | template 12 | struct match_result 13 | { 14 | Iter end{}; 15 | bool matched{}; 16 | 17 | constexpr explicit operator bool() const noexcept 18 | { 19 | return matched; 20 | } 21 | 22 | constexpr bool operator==(bool value) const noexcept 23 | { 24 | return matched == value; 25 | } 26 | }; 27 | 28 | template 29 | static constexpr auto non_match(Iter current) noexcept -> match_result 30 | { 31 | return {current, false}; 32 | } 33 | } 34 | #endif //MREGEX_AST_MATCH_RESULT_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/alternation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_ALTERNATION_HPP 2 | #define MREGEX_NODES_ALTERNATION_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | template 12 | struct alternation 13 | { 14 | template Cont> 15 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 16 | -> match_result 17 | { 18 | if (auto first_match = First::match(begin, end, current, ctx, cont)) 19 | return first_match; 20 | return alternation::match(begin, end, current, ctx, cont); 21 | } 22 | }; 23 | 24 | template 25 | requires are_trivially_matchable 26 | struct alternation : set {}; 27 | 28 | template 29 | struct alternation : Inner {}; 30 | } 31 | #endif //MREGEX_NODES_ALTERNATION_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/assertion.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_ASSERTION_HPP 2 | #define MREGEX_NODES_ASSERTION_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::ast 9 | { 10 | /** 11 | * Base type for AST nodes which indicate assertions. 12 | * Assertions are zero-length matchers that test the ability to match a pattern 13 | * without consuming any input characters. 14 | * 15 | * @tparam Predicate A predicate type which is used to test for a match at a given position 16 | */ 17 | template 18 | struct assertion : zero_length_matcher 19 | { 20 | using predicate_type = Predicate; 21 | 22 | template Cont> 23 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 24 | -> match_result 25 | { 26 | if (Predicate::is_match(begin, end, current, ctx)) 27 | return cont(current); 28 | return non_match(current); 29 | } 30 | }; 31 | } 32 | #endif //MREGEX_NODES_ASSERTION_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/capture.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_CAPTURE_HPP 2 | #define MREGEX_NODES_CAPTURE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | /** 12 | * @note Since matching captures implies side effects like storing and clearing matched 13 | * content, they cannot be considered trivially matchable even when the inner node is. 14 | */ 15 | template 16 | struct capture 17 | { 18 | template Cont> 19 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 20 | -> match_result 21 | { 22 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 23 | capture_matched_range(current, next, ctx); 24 | return cont(next); 25 | }; 26 | if (auto inner_match = Inner::match(begin, end, current, ctx, continuation)) 27 | return inner_match; 28 | std::get(ctx.captures).clear(); 29 | return non_match(current); 30 | } 31 | 32 | private: 33 | template 34 | static constexpr auto capture_matched_range(Iter begin, Iter end, Context &ctx) noexcept 35 | { 36 | using base_iterator = typename Context::iterator; 37 | using capture_view_type = regex_capture_view; 38 | 39 | // Iterator types might be different if matching was done inside a lookbehind 40 | if constexpr (std::is_same_v) 41 | std::get(ctx.captures) = capture_view_type{begin, end}; 42 | else 43 | std::get(ctx.captures) = capture_view_type{end.base(), begin.base()}; 44 | } 45 | }; 46 | } 47 | #endif //MREGEX_NODES_CAPTURE_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/lookaround.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_LOOKAROUND_HPP 2 | #define MREGEX_NODES_LOOKAROUND_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | template 12 | struct lookaround: assertion> {}; 13 | } 14 | #endif //MREGEX_NODES_LOOKAROUND_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/negated.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_NEGATED_HPP 2 | #define MREGEX_NODES_NEGATED_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | template 12 | struct negated : trivially_matchable> 13 | { 14 | static_assert(is_trivially_matchable, "to negate a generic AST node, it must be trivially matchable"); 15 | 16 | template 17 | static constexpr bool match_one(char input, Context &ctx) noexcept 18 | { 19 | return !Inner::match_one(input, ctx); 20 | } 21 | }; 22 | 23 | template 24 | requires is_assertion 25 | struct negated : assertion> {}; 26 | } 27 | #endif //MREGEX_NODES_NEGATED_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/quantifiers/fixed_repetition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_FIXED_REPETITION_HPP 2 | #define MREGEX_NODES_FIXED_REPETITION_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace meta::ast 11 | { 12 | template 13 | struct basic_repetition, symbol::quantifier_value, Inner> 14 | { 15 | static_assert(N > 1, "this specialization does not handle trivial repetitions"); 16 | 17 | template Cont> 18 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 19 | -> match_result 20 | { 21 | if constexpr (Mode == match_mode::possessive) 22 | return possessive_match(begin, end, current, ctx, cont); 23 | else 24 | return backtracking_match(begin, end, current, ctx, cont); 25 | } 26 | 27 | private: 28 | template Cont> 29 | static constexpr auto possessive_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 30 | -> match_result 31 | { 32 | if (auto result = backtracking_match(begin, end, current, ctx, continuations::success)) 33 | return cont(result.end); 34 | return non_match(current); 35 | } 36 | 37 | template Cont> 38 | static constexpr auto backtracking_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 39 | -> match_result 40 | { 41 | if constexpr (Context::flags::unroll) 42 | return unrolled_backtracking_match(begin, end, current, ctx, cont); 43 | else 44 | return non_unrolled_backtracking_match(begin, end, current, ctx, cont); 45 | } 46 | 47 | template Cont> 48 | static constexpr auto backtracking_match(Iter /*begin*/, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 49 | -> match_result 50 | requires is_trivially_matchable 51 | { 52 | if (distance_less_than(current, end)) 53 | return non_match(current); 54 | if constexpr (Context::flags::unroll) 55 | return unrolled_trivial_match(current, ctx, cont, std::make_index_sequence{}); 56 | else 57 | return non_unrolled_trivial_match(current, ctx, cont); 58 | } 59 | 60 | template Cont> 61 | static constexpr auto unrolled_backtracking_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 62 | -> match_result 63 | { 64 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 65 | return fixed_repetition::match(begin, end, next, ctx, cont); 66 | }; 67 | return Inner::match(begin, end, current, ctx, continuation); 68 | } 69 | 70 | template Cont> 71 | static constexpr auto non_unrolled_backtracking_match( 72 | Iter begin, Iter end, Iter current, 73 | Context &ctx, Cont &&cont, 74 | std::size_t repeats = N 75 | ) noexcept -> match_result 76 | { 77 | if (repeats == 1) 78 | return Inner::match(begin, end, current, ctx, cont); 79 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 80 | return non_unrolled_backtracking_match(begin, end, next, ctx, cont, repeats - 1); 81 | }; 82 | return Inner::match(begin, end, current, ctx, continuation); 83 | } 84 | 85 | template Cont, std::size_t... Indices> 86 | static constexpr auto unrolled_trivial_match( 87 | Iter current, Context &ctx, Cont &&cont, 88 | std::index_sequence 89 | ) noexcept -> match_result 90 | { 91 | if ((Inner::match_one(*std::next(current, Indices), ctx) && ...)) 92 | return cont(std::next(current, N)); 93 | return non_match(current); 94 | } 95 | 96 | template Cont, std::size_t... Indices> 97 | static constexpr auto unrolled_trivial_match( 98 | Iter current, Context &ctx, Cont &&cont, 99 | std::index_sequence 100 | ) noexcept -> match_result 101 | { 102 | if (((Indices, Inner::match_one(*current++, ctx)) && ...)) 103 | return cont(current); 104 | return non_match(current); 105 | } 106 | 107 | template Cont> 108 | static constexpr auto non_unrolled_trivial_match(Iter current, Context &ctx, Cont &&cont) noexcept 109 | -> match_result 110 | { 111 | for (std::size_t offset = 0; offset != N; ++offset) 112 | { 113 | if (!Inner::match_one(*std::next(current, offset), ctx)) 114 | return non_match(current); 115 | } 116 | return cont(std::next(current, N)); 117 | } 118 | 119 | template Cont> 120 | static constexpr auto non_unrolled_trivial_match(Iter current, Context &ctx, Cont &&cont) noexcept 121 | -> match_result 122 | { 123 | for (std::size_t match_count = 0; match_count != N; ++match_count) 124 | { 125 | if (!Inner::match_one(*current++, ctx)) 126 | return non_match(current); 127 | } 128 | return cont(current); 129 | } 130 | }; 131 | 132 | /** 133 | * @note A possessive quantifier with exactly 1 repetition is essentially an atomic group. 134 | * This specialization is needed to implement atomic matching for non-trivial AST nodes. 135 | * For trivial nodes, atomic groups have no effect and can be ignored. 136 | */ 137 | template 138 | requires (!is_trivially_matchable) 139 | struct basic_repetition, symbol::quantifier_value<1>, Inner> 140 | { 141 | template Cont> 142 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 143 | -> match_result 144 | { 145 | if (auto inner_match = Inner::match(begin, end, current, ctx, continuations::success)) 146 | return cont(inner_match.end); 147 | return non_match(current); 148 | } 149 | }; 150 | 151 | template 152 | struct basic_repetition, symbol::quantifier_value<1>, Inner> : Inner {}; 153 | 154 | template 155 | struct basic_repetition, symbol::quantifier_value<0>, Inner> : empty {}; 156 | } 157 | #endif //MREGEX_NODES_FIXED_REPETITION_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/quantifiers/repetition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_REPETITION_HPP 2 | #define MREGEX_NODES_REPETITION_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | template 12 | struct basic_repetition 13 | { 14 | static_assert(symbol::is_valid_range, "invalid range bounds"); 15 | static_assert(!std::is_same_v, "this implementation does not handle fixed repetitions"); 16 | 17 | template Cont> 18 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 19 | -> match_result 20 | { 21 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 22 | return match_between_bounds(begin, end, next, ctx, cont); 23 | }; 24 | return basic_fixed_repetition, Inner>::match(begin, end, current, ctx, continuation); 25 | } 26 | 27 | template Cont> 28 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 29 | -> match_result 30 | requires symbol::is_zero 31 | { 32 | return match_between_bounds(begin, end, current, ctx, cont); 33 | } 34 | 35 | private: 36 | template Cont> 37 | static constexpr auto match_between_bounds(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 38 | -> match_result 39 | { 40 | using range_size = symbol::subtract_t; 41 | 42 | if constexpr (Mode == match_mode::possessive) 43 | return bounded_possessive_match(begin, end, current, ctx, cont); 44 | else if constexpr ((Mode == match_mode::greedy) ^ Context::flags::ungreedy) 45 | return bounded_greedy_match(begin, end, current, ctx, cont); 46 | else 47 | return bounded_lazy_match(begin, end, current, ctx, cont); 48 | } 49 | 50 | template Cont> 51 | static constexpr auto bounded_possessive_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 52 | -> match_result 53 | { 54 | for (std::size_t match_count = 0; !symbol::equals(match_count); ++match_count) 55 | { 56 | auto inner_match = Inner::match(begin, end, current, ctx, continuations::success); 57 | if (!inner_match || will_loop_forever(current, inner_match.end)) 58 | break; 59 | current = inner_match.end; 60 | } 61 | return cont(current); 62 | } 63 | 64 | template Cont> 65 | static constexpr auto bounded_possessive_match(Iter /*begin*/, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 66 | -> match_result 67 | requires is_trivially_matchable 68 | { 69 | for (std::size_t match_count = 0; !symbol::equals(match_count); ++match_count) 70 | { 71 | if (current == end || !Inner::match_one(*current, ctx)) 72 | break; 73 | ++current; 74 | } 75 | return cont(current); 76 | } 77 | 78 | template Cont> 79 | static constexpr auto bounded_greedy_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 80 | -> match_result 81 | { 82 | if constexpr (!symbol::is_zero) 83 | { 84 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 85 | return bounded_greedy_match>(begin, end, next, ctx, cont); 86 | }; 87 | if (auto inner_match = Inner::match(begin, end, current, ctx, continue_unless_infinite_loop(current, continuation))) 88 | return inner_match; 89 | } 90 | return cont(current); 91 | } 92 | 93 | template Cont> 94 | static constexpr auto bounded_greedy_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 95 | -> match_result 96 | requires is_trivially_matchable 97 | { 98 | auto continuation = [initial = current, &cont](Iter next) noexcept -> match_result { 99 | while (true) 100 | { 101 | if (auto rest_match = cont(next)) 102 | return rest_match; 103 | if (next == initial) 104 | break; 105 | --next; 106 | } 107 | return non_match(initial); 108 | }; 109 | return bounded_possessive_match(begin, end, current, ctx, continuation); 110 | } 111 | 112 | template Cont> 113 | static constexpr auto bounded_lazy_match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 114 | -> match_result 115 | { 116 | if (auto rest_match = cont(current)) 117 | return rest_match; 118 | if constexpr (!symbol::is_zero) 119 | { 120 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 121 | return bounded_lazy_match>(begin, end, next, ctx, cont); 122 | }; 123 | return Inner::match(begin, end, current, ctx, continue_unless_infinite_loop(current, continuation)); 124 | } 125 | return non_match(current); 126 | } 127 | 128 | template Cont> 129 | static constexpr auto bounded_lazy_match(Iter /*begin*/, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 130 | -> match_result 131 | requires is_trivially_matchable 132 | { 133 | for (std::size_t match_count = 0;; ++match_count, ++current) 134 | { 135 | if (auto rest_match = cont(current)) 136 | return rest_match; 137 | if (symbol::equals(match_count) || current == end || !Inner::match_one(*current, ctx)) 138 | break; 139 | } 140 | return non_match(current); 141 | } 142 | 143 | template Cont> 144 | static constexpr auto continue_unless_infinite_loop(Iter current, Cont &&cont) noexcept 145 | { 146 | return [=, &cont](Iter next) noexcept -> match_result { 147 | if (will_loop_forever(current, next)) 148 | return non_match(current); 149 | return cont(next); 150 | }; 151 | } 152 | 153 | template 154 | static constexpr bool will_loop_forever(Iter /*current*/, Iter /*next*/) noexcept 155 | { 156 | return false; 157 | } 158 | 159 | template 160 | static constexpr bool will_loop_forever(Iter current, Iter next) noexcept 161 | requires (symbol::is_infinity && is_zero_length_matcher) 162 | { 163 | return current == next; 164 | } 165 | }; 166 | } 167 | #endif //MREGEX_NODES_REPETITION_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/sequence.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_SEQUENCE_HPP 2 | #define MREGEX_NODES_SEQUENCE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace meta::ast 11 | { 12 | template 13 | struct sequence 14 | { 15 | static constexpr std::size_t size = 1 + sizeof...(Rest); 16 | 17 | template Cont> 18 | static constexpr auto match(Iter begin, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 19 | -> match_result 20 | { 21 | auto continuation = [=, &ctx, &cont](Iter next) noexcept -> match_result { 22 | return sequence::match(begin, end, next, ctx, cont); 23 | }; 24 | return First::match(begin, end, current, ctx, continuation); 25 | } 26 | 27 | template Cont> 28 | static constexpr auto match(Iter /*begin*/, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 29 | -> match_result 30 | requires are_trivially_matchable 31 | { 32 | if (distance_less_than(current, end)) 33 | return non_match(current); 34 | if constexpr (std::random_access_iterator) 35 | return unrolled_trivial_match(current, ctx, cont, std::make_index_sequence{}); 36 | else 37 | return unrolled_trivial_match(current, ctx, cont); 38 | } 39 | 40 | private: 41 | template Cont, std::size_t... Indices> 42 | static constexpr auto unrolled_trivial_match( 43 | Iter current, Context &ctx, Cont &&cont, 44 | std::index_sequence<0, Indices ...> 45 | ) noexcept -> match_result 46 | { 47 | if (First::match_one(*current, ctx) && (Rest::match_one(*std::next(current, Indices), ctx) && ...)) 48 | return cont(std::next(current, size)); 49 | return non_match(current); 50 | } 51 | 52 | template Cont> 53 | static constexpr auto unrolled_trivial_match(Iter current, Context &ctx, Cont &&cont) noexcept 54 | -> match_result 55 | { 56 | if (First::match_one(*current, ctx) && (Rest::match_one(*++current, ctx) && ...)) 57 | return cont(std::next(current)); 58 | return non_match(current); 59 | } 60 | }; 61 | 62 | template 63 | struct sequence : Inner {}; 64 | } 65 | #endif //MREGEX_NODES_SEQUENCE_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/set.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_SET_HPP 2 | #define MREGEX_NODES_SET_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta::ast 8 | { 9 | template 10 | struct set : trivially_matchable> 11 | { 12 | static_assert(are_trivially_matchable, "only trivially matchable AST nodes can form a set"); 13 | 14 | template 15 | static constexpr bool match_one(char input, Context &ctx) noexcept 16 | { 17 | return (Nodes::match_one(input, ctx) || ...); 18 | } 19 | }; 20 | } 21 | #endif //MREGEX_NODES_SET_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/terminals/anchors.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_ANCHORS_HPP 2 | #define MREGEX_NODES_ANCHORS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::ast 9 | { 10 | struct beginning_of_line : assertion> {}; 11 | struct beginning_of_input : assertion> {}; 12 | struct end_of_line : assertion> {}; 13 | struct end_of_input : assertion> {}; 14 | struct word_boundary : assertion {}; 15 | } 16 | #endif //MREGEX_NODES_ANCHORS_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/terminals/backref.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_BACKREF_HPP 2 | #define MREGEX_NODES_BACKREF_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace meta::ast 13 | { 14 | namespace detail 15 | { 16 | template 17 | struct id_lookup_method 18 | { 19 | template 20 | static constexpr decltype(auto) get_capture(CaptureStorage &captures) noexcept 21 | { 22 | return std::get(captures); 23 | } 24 | }; 25 | 26 | template 27 | struct name_lookup_method 28 | { 29 | template 30 | static constexpr decltype(auto) get_capture(CaptureStorage &captures) noexcept 31 | { 32 | using capture_type = rename_capture_t, symbol::name>; 33 | return std::get(captures); 34 | } 35 | }; 36 | } 37 | 38 | template 39 | struct backref_base : zero_length_matcher 40 | { 41 | template Cont> 42 | static constexpr auto match(Iter /*begin*/, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 43 | -> match_result 44 | { 45 | auto const captured = LookupMethod::get_capture(ctx.captures); 46 | std::size_t const length_to_match = captured.length(); 47 | if (distance_less_than(length_to_match, current, end)) 48 | return non_match(current); 49 | 50 | // Lookbehinds use reverse iterators. To check for equality, iterator directions must be the same 51 | constexpr bool different_iterators = !std::is_same_v; 52 | for (auto target : reverse_if(captured)) 53 | { 54 | if (!equals(target, *current)) 55 | return non_match(current); 56 | ++current; 57 | } 58 | return cont(current); 59 | } 60 | 61 | private: 62 | template 63 | static constexpr auto reverse_if(Range &range) noexcept 64 | { 65 | if constexpr (Condition) 66 | return reversed_range_view{range}; 67 | else 68 | return range; 69 | } 70 | 71 | template 72 | static constexpr bool equals(char a, char b) noexcept 73 | { 74 | return a == b || (IgnoreCase && invert_case(a) == b); 75 | } 76 | }; 77 | 78 | template 79 | struct backref : backref_base> {}; 80 | 81 | template 82 | struct named_backref : backref_base> {}; 83 | } 84 | #endif //MREGEX_NODES_BACKREF_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/terminals/empty.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_EMPTY_HPP 2 | #define MREGEX_NODES_EMPTY_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | struct empty : zero_length_matcher 12 | { 13 | template Cont> 14 | static constexpr auto match(Iter /*begin*/, Iter /*end*/, Iter current, Context &, Cont &&cont) noexcept 15 | -> match_result 16 | { 17 | return cont(current); 18 | } 19 | }; 20 | } 21 | #endif //MREGEX_NODES_EMPTY_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/terminals/literal.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_LITERAL_HPP 2 | #define MREGEX_NODES_LITERAL_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta::ast 8 | { 9 | template 10 | struct literal : trivially_matchable> 11 | { 12 | template 13 | static constexpr bool match_one(char input, Context &) noexcept 14 | { 15 | bool result = C == input; 16 | if constexpr (Context::flags::icase && is_alpha(C)) 17 | result = result || flip_lowercase_bit(C) == input; 18 | return result; 19 | } 20 | }; 21 | } 22 | #endif //MREGEX_NODES_LITERAL_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/terminals/range.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_RANGE_HPP 2 | #define MREGEX_NODES_RANGE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | namespace detail 12 | { 13 | template 14 | struct range_intersection 15 | { 16 | static constexpr std::uint8_t start = std::max(R1::start, R2::start); 17 | static constexpr std::uint8_t stop = std::min(R1::stop, R2::stop); 18 | static constexpr bool is_empty = start > stop; 19 | 20 | static constexpr bool contains(char input) noexcept 21 | { 22 | return is_in_ascii_range(input); 23 | } 24 | 25 | static constexpr bool contains(char) noexcept 26 | requires is_empty 27 | { 28 | return false; 29 | } 30 | }; 31 | } 32 | 33 | template 34 | struct range : trivially_matchable> 35 | { 36 | static constexpr std::uint8_t start = A; 37 | static constexpr std::uint8_t stop = B; 38 | 39 | static_assert(start < stop, "invalid range bounds"); 40 | 41 | template 42 | static constexpr bool match_one(char input, Context &) noexcept 43 | { 44 | bool result = is_in_ascii_range(input); 45 | if constexpr (Context::flags::icase) 46 | result = result || is_in_alpha_subrange(flip_lowercase_bit(input)); 47 | return result; 48 | } 49 | 50 | private: 51 | static constexpr bool is_in_alpha_subrange(char input) noexcept 52 | { 53 | using this_range = range; 54 | using lower_in_this_range = detail::range_intersection; 55 | using upper_in_this_range = detail::range_intersection; 56 | 57 | return lower_in_this_range::contains(input) || upper_in_this_range::contains(input); 58 | } 59 | }; 60 | 61 | template 62 | struct range : literal {}; 63 | } 64 | #endif //MREGEX_NODES_RANGE_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/terminals/wildcard.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_WILDCARD_HPP 2 | #define MREGEX_NODES_WILDCARD_HPP 3 | 4 | #include 5 | 6 | namespace meta::ast 7 | { 8 | struct wildcard : trivially_matchable 9 | { 10 | template 11 | static constexpr bool match_one(char input, Context &ctx) noexcept 12 | { 13 | if constexpr (Context::flags::dotall) 14 | return true; 15 | return !linebreak::match_one(input, ctx); 16 | } 17 | }; 18 | } 19 | #endif //MREGEX_NODES_WILDCARD_HPP -------------------------------------------------------------------------------- /include/mregex/ast/nodes/trivially_matchable.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_NODES_TRIVIALLY_MATCHABLE_HPP 2 | #define MREGEX_NODES_TRIVIALLY_MATCHABLE_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta::ast 8 | { 9 | /** 10 | * Base type that offers a generic matching interface for trivially matchable AST nodes. 11 | * 12 | * @tparam Node The trivially matchable AST node derived from this type 13 | */ 14 | template 15 | struct trivially_matchable 16 | { 17 | template Cont> 18 | static constexpr auto match(Iter /*begin*/, Iter end, Iter current, Context &ctx, Cont &&cont) noexcept 19 | -> match_result 20 | { 21 | if (current != end && Node::match_one(*current, ctx)) 22 | return cont(std::next(current)); 23 | return non_match(current); 24 | } 25 | }; 26 | } 27 | #endif //MREGEX_NODES_TRIVIALLY_MATCHABLE_HPP -------------------------------------------------------------------------------- /include/mregex/ast/predicates/anchor_predicates.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_PREDICATES_ANCHOR_PREDICATES_HPP 2 | #define MREGEX_PREDICATES_ANCHOR_PREDICATES_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta::ast::predicates 8 | { 9 | template 10 | struct beginning_predicate 11 | { 12 | template 13 | static constexpr bool is_match(Iter begin, Iter /*end*/, Iter current, Context &ctx) noexcept 14 | { 15 | if (current == begin) 16 | return true; 17 | if constexpr (Mode == line_mode::multiline && Context::flags::multiline) 18 | { 19 | if (linebreak::match_one(*std::prev(current), ctx)) 20 | return true; 21 | } 22 | return false; 23 | } 24 | }; 25 | 26 | template 27 | struct end_predicate 28 | { 29 | template 30 | static constexpr bool is_match(Iter /*begin*/, Iter end, Iter current, Context &ctx) noexcept 31 | { 32 | if (current == end) 33 | return true; 34 | if constexpr (Mode == line_mode::multiline && Context::flags::multiline) 35 | { 36 | if (linebreak::match_one(*current, ctx)) 37 | return true; 38 | } 39 | return false; 40 | } 41 | }; 42 | 43 | struct word_boundary_predicate 44 | { 45 | template 46 | static constexpr bool is_match(Iter begin, Iter end, Iter current, Context &ctx) noexcept 47 | { 48 | bool is_word_behind = current != begin && word::match_one(*std::prev(current), ctx); 49 | bool is_word_ahead = current != end && word::match_one(*current, ctx); 50 | return is_word_behind ^ is_word_ahead; 51 | } 52 | }; 53 | } 54 | #endif //MREGEX_PREDICATES_ANCHOR_PREDICATES_HPP -------------------------------------------------------------------------------- /include/mregex/ast/predicates/lookaround_predicate.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_PREDICATES_LOOKAROUND_PREDICATE_HPP 2 | #define MREGEX_PREDICATES_LOOKAROUND_PREDICATE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::ast::predicates 9 | { 10 | template 11 | struct lookahead_predicate 12 | { 13 | template 14 | static constexpr bool is_match(Iter begin, Iter end, Iter current, Context &ctx) noexcept 15 | { 16 | auto inner_match = Inner::match(begin, end, current, ctx, continuations::success); 17 | return inner_match.matched; 18 | } 19 | 20 | template 21 | static constexpr bool is_match(Iter /*begin*/, Iter end, Iter current, Context &ctx) noexcept 22 | requires is_trivially_matchable 23 | { 24 | return current != end && Inner::match_one(*current, ctx); 25 | } 26 | }; 27 | 28 | template 29 | struct lookbehind_predicate 30 | { 31 | template 32 | static constexpr bool is_match(Iter begin, Iter end, Iter current, Context &ctx) noexcept 33 | { 34 | // For non-trivial nodes, the AST is inverted to match the regex backwards 35 | using ast_type = invert_t; 36 | using iterator = std::reverse_iterator; 37 | 38 | auto rbegin = std::make_reverse_iterator(end); // Reversed end becomes new begin 39 | auto rend = std::make_reverse_iterator(begin); // Reversed begin becomes new end 40 | auto rcurrent = std::make_reverse_iterator(current); 41 | auto result = ast_type::match(rbegin, rend, rcurrent, ctx, continuations::success); 42 | return result.matched; 43 | } 44 | 45 | template 46 | static constexpr bool is_match(Iter begin, Iter /*end*/, Iter current, Context &ctx) noexcept 47 | requires is_trivially_matchable 48 | { 49 | // For trivially matchable nodes, a single step backwards is enough 50 | return current != begin && Inner::match_one(*std::prev(current), ctx); 51 | } 52 | }; 53 | 54 | template 55 | struct lookaround_predicate; 56 | 57 | template 58 | struct lookaround_predicate : lookahead_predicate {}; 59 | 60 | template 61 | struct lookaround_predicate : lookbehind_predicate {}; 62 | } 63 | #endif //MREGEX_PREDICATES_LOOKAROUND_PREDICATE_HPP -------------------------------------------------------------------------------- /include/mregex/ast/predicates/negate.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_PREDICATES_NEGATE_HPP 2 | #define MREGEX_PREDICATES_NEGATE_HPP 3 | 4 | #include 5 | 6 | namespace meta::ast::predicates 7 | { 8 | template 9 | struct negate 10 | { 11 | template 12 | static constexpr bool is_match(Iter begin, Iter end, Iter current, Context &ctx) noexcept 13 | { 14 | return !Predicate::is_match(begin, end, current, ctx); 15 | } 16 | }; 17 | } 18 | #endif //MREGEX_PREDICATES_NEGATE_HPP -------------------------------------------------------------------------------- /include/mregex/ast/traits.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_TRAITS_HPP 2 | #define MREGEX_AST_TRAITS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace meta::ast 10 | { 11 | /** 12 | * Type trait used to detect trivially matchable AST nodes. 13 | * 14 | * @note Semantically, a node is considered trivially matchable if it can be matched 15 | * just by looking at one input character. Additionally, in case of a successful match, 16 | * the given input character will always be consumed. 17 | * 18 | * @note Syntactically, a node is detected as trivially matchable by checking if it 19 | * contains a static member function with a specific signature. 20 | * This does not enforce any semantic guarantees on the node and should be treated as 21 | * a marker trait. 22 | * 23 | * @tparam Node The AST node 24 | */ 25 | template 26 | inline constexpr bool is_trivially_matchable = requires (char input, type_sequence<> ctx) 27 | { 28 | { Node::match_one(input, ctx) } -> std::same_as; 29 | }; 30 | 31 | template 32 | inline constexpr bool are_trivially_matchable = (is_trivially_matchable && ...); 33 | 34 | /** 35 | * Metafunction used to count the number of capturing groups in the regex AST. 36 | * 37 | * @tparam Node The AST node 38 | */ 39 | template 40 | inline constexpr std::size_t capture_count = 0; 41 | 42 | template typename Wrapper, typename... Nodes> 43 | inline constexpr std::size_t capture_count> = (capture_count + ... + 0); 44 | 45 | template 46 | inline constexpr std::size_t capture_count> = capture_count; 47 | 48 | template 49 | inline constexpr std::size_t capture_count> = capture_count; 50 | 51 | template 52 | inline constexpr std::size_t capture_count> = capture_count + 1; 53 | 54 | /** 55 | * Metafunction that builds a specification for the names of all capturing groups in the AST. 56 | * The specification lists all capture names according to Preorder AST traversal. 57 | * 58 | * @tparam Node The AST node 59 | */ 60 | template 61 | struct capture_name_spec 62 | { 63 | using type = type_sequence<>; 64 | }; 65 | 66 | template 67 | using capture_name_spec_t = typename capture_name_spec::type; 68 | 69 | template typename Wrapper, typename... Nodes> 70 | struct capture_name_spec> 71 | { 72 | using type = concat_t ...>; 73 | }; 74 | 75 | template 76 | struct capture_name_spec> 77 | { 78 | using type = capture_name_spec_t; 79 | }; 80 | 81 | template 82 | struct capture_name_spec> 83 | { 84 | using type = capture_name_spec_t; 85 | }; 86 | 87 | template 88 | struct capture_name_spec> 89 | { 90 | using type = push_t, Name>; 91 | }; 92 | 93 | /** 94 | * Base type used to tag all AST nodes which are zero-length matchers. 95 | * A node is a zero-length matcher if it may match inputs of length zero. 96 | */ 97 | struct zero_length_matcher {}; 98 | 99 | /** 100 | * Type trait used to identify zero-length matchers. 101 | * 102 | * @tparam Node The AST node 103 | */ 104 | template 105 | inline constexpr bool is_zero_length_matcher = std::is_base_of_v; 106 | 107 | template 108 | inline constexpr bool is_zero_length_matcher> = is_zero_length_matcher; 109 | 110 | template 111 | inline constexpr bool is_zero_length_matcher> = symbol::is_zero || is_zero_length_matcher; 112 | 113 | template 114 | inline constexpr bool is_zero_length_matcher> = (is_zero_length_matcher && ...); 115 | 116 | template 117 | inline constexpr bool is_zero_length_matcher> = (is_zero_length_matcher || ...); 118 | 119 | /** 120 | * Type trait used to identify AST nodes which are assertions. 121 | * 122 | * @tparam Node The AST node 123 | */ 124 | template 125 | inline constexpr bool is_assertion = is_zero_length_matcher && requires (char *iter, type_sequence<> ctx) 126 | { 127 | typename Node::predicate_type; 128 | { Node::predicate_type::is_match(iter, iter, iter, ctx) } -> std::same_as; 129 | }; 130 | } 131 | #endif //MREGEX_AST_TRAITS_HPP -------------------------------------------------------------------------------- /include/mregex/ast/transform.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_AST_TRANSFORM_HPP 2 | #define MREGEX_AST_TRANSFORM_HPP 3 | 4 | #include 5 | 6 | namespace meta::ast 7 | { 8 | /** 9 | * Generic metafunction that implements boilerplate code for transforming the AST. 10 | * The result is a new AST generated by applying a given metafunction on each node. 11 | * 12 | * @tparam Node The AST node type being transformed 13 | * @tparam Func Metafunction that provides a mapping between AST nodes 14 | */ 15 | template typename Func> 16 | struct transform 17 | { 18 | using type = Node; 19 | }; 20 | 21 | template typename Wrapper, typename... Nodes, template typename Func> 22 | struct transform, Func> 23 | { 24 | using type = Wrapper::type ...>; 25 | }; 26 | 27 | template typename Func> 28 | struct transform, Func> 29 | { 30 | using type = basic_repetition::type>; 31 | }; 32 | 33 | template typename Func> 34 | struct transform, Func> 35 | { 36 | using type = lookaround::type>; 37 | }; 38 | 39 | template typename Func> 40 | struct transform, Func> 41 | { 42 | using type = capture::type>; 43 | }; 44 | } 45 | #endif //MREGEX_AST_TRANSFORM_HPP -------------------------------------------------------------------------------- /include/mregex/grammar/actions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_GRAMMAR_ACTIONS_HPP 2 | #define MREGEX_GRAMMAR_ACTIONS_HPP 3 | 4 | #include 5 | 6 | namespace meta::grammar 7 | { 8 | // Ignore the current symbol at the front of the stack and continue 9 | struct ignore {}; 10 | 11 | // Move to the next token in the input 12 | struct advance {}; 13 | 14 | // Accept the input 15 | struct accept : std::true_type {}; 16 | 17 | // Reject the input 18 | struct reject : std::false_type {}; 19 | } 20 | #endif //MREGEX_GRAMMAR_ACTIONS_HPP -------------------------------------------------------------------------------- /include/mregex/grammar/backrefs.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_GRAMMAR_BACKREFS_HPP 2 | #define MREGEX_GRAMMAR_BACKREFS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::grammar 9 | { 10 | /** 11 | * Metafunction that defines symbols used to begin parsing a backreference ID. 12 | * 13 | * @tparam C The current character in the input pattern 14 | */ 15 | template 16 | struct begin_backref_id 17 | { 18 | using type = 19 | type_sequence 20 | < 21 | advance, 22 | symbol::backref_id 23 | >; 24 | }; 25 | 26 | /** 27 | * Metafunction that defines symbols used to begin parsing a backreference name. 28 | */ 29 | struct begin_named_backref 30 | { 31 | using type = 32 | type_sequence 33 | < 34 | advance, 35 | symbol::expect<'<'>, 36 | symbol::backref_name_begin 37 | >; 38 | }; 39 | 40 | /** 41 | * Metafunction that defines symbols used to continue parsing a backreference ID. 42 | * 43 | * @tparam ID The backreference ID being parsed 44 | * @tparam C The current character in the input pattern 45 | */ 46 | template 47 | struct continue_backref_id 48 | { 49 | using type = symbol::make_backref; 50 | }; 51 | 52 | template 53 | requires (is_digit(C)) 54 | struct continue_backref_id 55 | { 56 | using type = 57 | type_sequence 58 | < 59 | advance, 60 | symbol::backref_id<10 * ID + C - '0'> 61 | >; 62 | }; 63 | 64 | template 65 | using continue_backref_id_t = typename continue_backref_id::type; 66 | } 67 | #endif //MREGEX_GRAMMAR_BACKREFS_HPP -------------------------------------------------------------------------------- /include/mregex/grammar/hexadecimal.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_GRAMMAR_HEXADECIMAL_HPP 2 | #define MREGEX_GRAMMAR_HEXADECIMAL_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::grammar 9 | { 10 | namespace detail 11 | { 12 | template 13 | struct continue_hex_escape_sequence 14 | { 15 | using type = reject; 16 | }; 17 | 18 | template 19 | requires (is_hexdigit(C)) 20 | struct continue_hex_escape_sequence, C> 21 | { 22 | static constexpr std::uint8_t digit = is_digit(C) ? C - '0' : 10 + set_lowercase_bit(C) - 'a'; 23 | 24 | using type = 25 | type_sequence 26 | < 27 | advance, 28 | symbol::hex_esc_seq 29 | >; 30 | }; 31 | 32 | template 33 | using continue_hex_escape_sequence_t = typename continue_hex_escape_sequence::type; 34 | } 35 | 36 | /** 37 | * Metafunction that defines symbols used to begin parsing a hexadecimal escape sequence. 38 | */ 39 | struct begin_hex_escape_sequence 40 | { 41 | using type = 42 | type_sequence 43 | < 44 | advance, 45 | symbol::hex_esc_seq<> 46 | >; 47 | }; 48 | 49 | /** 50 | * Metafunction that defines symbols used to continue parsing a hexadecimal escape sequence. 51 | * 52 | * @tparam Sequence The hexadecimal escape sequence being parsed 53 | * @tparam Token The current token being parsed 54 | */ 55 | template 56 | struct continue_hex_escape_sequence 57 | { 58 | using type = reject; 59 | }; 60 | 61 | template 62 | requires (sizeof...(Hexdigits) < 2) 63 | struct continue_hex_escape_sequence, symbol::character> 64 | { 65 | using type = detail::continue_hex_escape_sequence_t, C>; 66 | }; 67 | 68 | // The escape sequence is finished when it contains 2 hex digits 69 | template 70 | struct continue_hex_escape_sequence, Token> 71 | { 72 | using type = symbol::push_literal(First * 16 + Second)>; 73 | }; 74 | 75 | template 76 | using continue_hex_escape_sequence_t = typename continue_hex_escape_sequence::type; 77 | } 78 | #endif //MREGEX_GRAMMAR_HEXADECIMAL_HPP 79 | -------------------------------------------------------------------------------- /include/mregex/grammar/names.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_GRAMMAR_NAMES_HPP 2 | #define MREGEX_GRAMMAR_NAMES_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::grammar 9 | { 10 | /** 11 | * Metafunction that defines symbols used to begin parsing a name sequence. 12 | * 13 | * @tparam Name The metacontainer for the name sequence 14 | * @tparam C The current character being parsed 15 | */ 16 | template typename Name, char C> 17 | struct begin_name 18 | { 19 | using type = reject; 20 | }; 21 | 22 | template typename Name, char C> 23 | requires (is_word(C) && !is_digit(C)) 24 | struct begin_name 25 | { 26 | using type = 27 | type_sequence 28 | < 29 | advance, 30 | Name 31 | >; 32 | }; 33 | 34 | template typename Name, char C> 35 | using begin_name_t = typename begin_name::type; 36 | 37 | /** 38 | * Metafunction that defines symbols used to continue parsing a name sequence. 39 | * 40 | * @tparam Name The name sequence being parsed 41 | * @tparam C The current character being parsed 42 | */ 43 | template 44 | struct continue_name 45 | { 46 | using type = reject; 47 | }; 48 | 49 | template typename Name, char... Chars, char C> 50 | requires (is_word(C)) 51 | struct continue_name, C> 52 | { 53 | using type = 54 | type_sequence 55 | < 56 | advance, 57 | Name 58 | >; 59 | }; 60 | 61 | template 62 | using continue_name_t = typename continue_name::type; 63 | } 64 | #endif //MREGEX_GRAMMAR_NAMES_HPP -------------------------------------------------------------------------------- /include/mregex/grammar/quantifiers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_GRAMMAR_QUANTIFIERS_HPP 2 | #define MREGEX_GRAMMAR_QUANTIFIERS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::grammar 9 | { 10 | /** 11 | * Metafunction that defines symbols used to abort parsing a quantifier. 12 | * In this case, the parser has to recover the skipped metacharacter used to 13 | * mark the start of a quantifier, like '{'. 14 | * 15 | * @tparam C The character that is parsed as replacement for the failed quantifier 16 | */ 17 | template 18 | struct abort_quantifier_parsing 19 | { 20 | using type = 21 | type_sequence 22 | < 23 | symbol::push_literal, 24 | symbol::mod, 25 | symbol::make_sequence, 26 | symbol::seq, 27 | symbol::alt 28 | >; 29 | }; 30 | 31 | template 32 | using abort_quantifier_parsing_t = typename abort_quantifier_parsing::type; 33 | 34 | /** 35 | * Metafunction that decides if the parser will begin parsing a quantifier value 36 | * or a normal character sequence. 37 | * 38 | * @tparam C The current character being parsed 39 | */ 40 | template 41 | struct begin_quantifier_value 42 | { 43 | using type = abort_quantifier_parsing_t<'{'>; 44 | }; 45 | 46 | template 47 | requires (is_digit(C)) 48 | struct begin_quantifier_value 49 | { 50 | using type = 51 | type_sequence 52 | < 53 | advance, 54 | symbol::quantifier_value 55 | >; 56 | }; 57 | 58 | template 59 | using begin_quantifier_value_t = typename begin_quantifier_value::type; 60 | 61 | /** 62 | * Metafunction that defines symbols used to continue parsing quantifier values. 63 | * 64 | * @tparam Symbol The symbolic quantifier being parsed 65 | * @tparam C The current character being parsed 66 | */ 67 | template 68 | struct continue_quantifier_value 69 | { 70 | using type = reject; 71 | }; 72 | 73 | template 74 | requires (is_digit(C)) 75 | struct continue_quantifier_value, C> 76 | { 77 | using type = 78 | type_sequence 79 | < 80 | advance, 81 | symbol::quantifier_value<10 * N + C - '0'> 82 | >; 83 | }; 84 | 85 | template 86 | requires (is_digit(C)) 87 | struct continue_quantifier_value>, C> 88 | { 89 | using type = 90 | type_sequence 91 | < 92 | advance, 93 | symbol::quantifier_range> 94 | >; 95 | }; 96 | 97 | template 98 | requires (is_digit(C)) 99 | struct continue_quantifier_value, C> 100 | { 101 | using type = 102 | type_sequence 103 | < 104 | advance, 105 | symbol::quantifier_range> 106 | >; 107 | }; 108 | 109 | template 110 | using continue_quantifier_value_t = typename continue_quantifier_value::type; 111 | } 112 | #endif //MREGEX_GRAMMAR_QUANTIFIERS_HPP -------------------------------------------------------------------------------- /include/mregex/match_result_generator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_MATCH_RESULT_GENERATOR_HPP 2 | #define MREGEX_MATCH_RESULT_GENERATOR_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta 8 | { 9 | /** 10 | * A functor that generates match results by iterating a forward range. 11 | * At most one empty result will be generated. 12 | * 13 | * @tparam Method The regex method used to generate results 14 | * @tparam Iter The forward iterator type used to access the input 15 | */ 16 | template 17 | struct match_result_generator 18 | { 19 | using method = Method; 20 | using iterator = Iter; 21 | using regex_type = typename method::regex_type; 22 | using ast_type = regex_ast_t; 23 | 24 | constexpr match_result_generator(iterator begin, iterator end) 25 | : _begin{begin}, _end{end}, _current{begin} 26 | {} 27 | 28 | [[nodiscard]] constexpr auto operator()() noexcept 29 | { 30 | return next(); 31 | } 32 | 33 | [[nodiscard]] constexpr auto next() noexcept 34 | { 35 | regex_match_context ctx{}; 36 | if (!_active) 37 | return match_result_view{std::move(ctx.captures), false}; 38 | 39 | auto result = method::invoke(_begin, _end, _current, ctx); 40 | _active = result.matched && !std::get<0>(ctx.captures).is_empty(); 41 | _current = result.end; 42 | return match_result_view{std::move(ctx.captures), result.matched}; 43 | } 44 | 45 | constexpr bool active() const noexcept 46 | { 47 | return _active; 48 | } 49 | 50 | private: 51 | iterator _begin; 52 | iterator _end; 53 | iterator _current; 54 | bool _active{true}; 55 | }; 56 | } 57 | #endif //MREGEX_MATCH_RESULT_GENERATOR_HPP -------------------------------------------------------------------------------- /include/mregex/parser/lexer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_PARSER_LEXER_HPP 2 | #define MREGEX_PARSER_LEXER_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta 8 | { 9 | /** 10 | * Compile-time lexer that extracts tokens from the input pattern. 11 | * 12 | * @tparam Pattern The input string to be tokenized 13 | */ 14 | template 15 | struct lexer 16 | { 17 | template 18 | struct token 19 | { 20 | using type = symbol::empty; 21 | }; 22 | 23 | template 24 | requires (I < Pattern.length()) 25 | struct token 26 | { 27 | using type = symbol::character; 28 | }; 29 | 30 | template 31 | using token_t = typename token::type; 32 | }; 33 | } 34 | #endif //MREGEX_PARSER_LEXER_HPP -------------------------------------------------------------------------------- /include/mregex/parser/parser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_PARSER_HPP 2 | #define MREGEX_PARSER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace meta 11 | { 12 | /** 13 | * Compile-time parser using the LL(1) algorithm. 14 | * 15 | * @tparam Pattern The input string to be parsed 16 | */ 17 | template 18 | struct parser 19 | { 20 | /** 21 | * Metacontainer used to capture the current state of the parser. 22 | */ 23 | template 24 | using state = parser_state, I, Nodes, Symbols>; 25 | 26 | /** 27 | * Metafunction used to extract tokens (characters or empty tokens) from the input pattern. 28 | * 29 | * @tparam I The current position in the input 30 | */ 31 | template 32 | using token_t = typename lexer::template token_t; 33 | 34 | /** 35 | * Metafunction that models the transition of the parser automaton. 36 | * 37 | * @tparam I The current position in the input 38 | * @tparam NewSymbols The new parsing symbols generated by the grammar 39 | * @tparam Nodes The stack with the AST nodes 40 | * @tparam Symbols The stack with the current parsing symbols 41 | */ 42 | template 43 | struct transition; 44 | 45 | template 46 | using transition_t = typename transition::type; 47 | 48 | /** 49 | * Main metafunction used to parse the pattern. 50 | * 51 | * @tparam I The current position in the input 52 | * @tparam Nodes The stack with the AST nodes 53 | * @tparam Symbols The stack with the current parsing symbols 54 | */ 55 | template 56 | struct parse 57 | { 58 | using next_symbols = grammar::rule_t, token_t>; 59 | using type = transition_t>; 60 | }; 61 | 62 | template 63 | using parse_t = typename parse::type; 64 | 65 | template 66 | requires symbol::is_semantic_action> 67 | struct parse 68 | { 69 | using next_nodes = ast::build_t, token_t, Nodes>; 70 | using type = parse_t>; 71 | }; 72 | 73 | // Base case - push the symbols on the stack 74 | template 75 | struct transition 76 | { 77 | using type = parse_t>; 78 | }; 79 | 80 | // Don't push anything 81 | template 82 | struct transition 83 | { 84 | using type = parse_t; 85 | }; 86 | 87 | // Advance to the next token 88 | template 89 | struct transition 90 | { 91 | using type = state; 92 | }; 93 | 94 | // Advance and also push the remaining symbols on the stack 95 | template 96 | struct transition, Nodes, type_sequence> 97 | { 98 | using type = state>; 99 | }; 100 | 101 | // Reject the input pattern 102 | template 103 | struct transition 104 | { 105 | using type = parser_result>; 106 | }; 107 | 108 | // Accept the input pattern 109 | template 110 | struct transition 111 | { 112 | using type = parser_result>, parsing::success>; 113 | }; 114 | 115 | using initial_state = state<0, type_sequence<>, type_sequence>; 116 | using result = final_parser_state; 117 | using ast_type = typename result::ast_type; 118 | using verdict = typename result::verdict; 119 | 120 | static constexpr bool accepted = std::is_same_v; 121 | }; 122 | 123 | template 124 | using ast_of = typename parser::ast_type; 125 | 126 | template 127 | using parser_verdict = typename parser::verdict; 128 | 129 | /** 130 | * Causes a compilation failure if the pattern contains a syntax error. 131 | * 132 | * @tparam Pattern The regex pattern to be parsed 133 | * @return Always true, to allow usage in a requires clause 134 | */ 135 | template 136 | constexpr bool fail_on_syntax_error() noexcept 137 | { 138 | static_assert(std::is_same_v, parsing::success>, "syntax error in regular expression"); 139 | return true; 140 | } 141 | } 142 | #endif //MREGEX_PARSER_HPP -------------------------------------------------------------------------------- /include/mregex/parser/state.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_PARSER_STATE_HPP 2 | #define MREGEX_PARSER_STATE_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | /** 9 | * Types that define the possible verdicts after parsing is finished. 10 | */ 11 | namespace parsing 12 | { 13 | // Parsing finished with success 14 | struct success : std::true_type {}; 15 | 16 | // Parsing finished with syntax error 17 | template 18 | struct syntax_error : std::false_type {}; 19 | } 20 | 21 | /** 22 | * Data type returned by the compile-time parser. 23 | * 24 | * @tparam AST The generated Abstract Syntax Tree 25 | * @tparam Verdict Type that holds information about the verdict of the parser 26 | */ 27 | template 28 | struct parser_result 29 | { 30 | using ast_type = AST; 31 | using verdict = Verdict; 32 | }; 33 | 34 | template 35 | struct parser_result 36 | { 37 | using ast_type = ast::empty; 38 | using verdict = Verdict; 39 | }; 40 | 41 | /** 42 | * Metacontainer that captures the state of the parser at a given parsing step. 43 | * 44 | * @tparam Parser The parser metafunction type 45 | * @tparam I The current position in the input 46 | * @tparam Nodes The stack with the AST nodes 47 | * @tparam Symbols The stack with the current parsing symbols 48 | */ 49 | template 50 | struct parser_state {}; 51 | 52 | namespace detail 53 | { 54 | /** 55 | * To minimize template recursion depth, parsing is done by advancing 56 | * the parser state using a fold expression. 57 | */ 58 | 59 | template 60 | constexpr auto operator<<(parser_state, std::size_t) noexcept 61 | { 62 | return typename Parser::template parse_t{}; 63 | } 64 | 65 | template 66 | constexpr auto operator<<(parser_result, std::size_t) noexcept 67 | { 68 | return parser_result{}; 69 | } 70 | 71 | template 72 | constexpr auto advance_parser_state(State initial_state, std::index_sequence) noexcept 73 | { 74 | return (initial_state << ... << Indices); 75 | } 76 | } 77 | 78 | /** 79 | * Metafunction that computes the final parser state starting from an initial state 80 | * and advancing it N times. 81 | * 82 | * @tparam State The initial parser state 83 | * @tparam N The number of times to advance the parser state 84 | */ 85 | template 86 | using final_parser_state = decltype(detail::advance_parser_state(State{}, std::make_index_sequence{})); 87 | } 88 | #endif //MREGEX_PARSER_STATE_HPP -------------------------------------------------------------------------------- /include/mregex/regex_capture.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_REGEX_CAPTURE_HPP 2 | #define MREGEX_REGEX_CAPTURE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace meta 11 | { 12 | /** 13 | * Concept used to constrain a type that saves the content captured by regex groups. 14 | */ 15 | template 16 | concept captured_content = std::ranges::forward_range && requires (Capture capture) 17 | { 18 | { capture.content() } -> char_range; 19 | { capture.length() } -> std::convertible_to; 20 | { capture.is_empty() } -> std::same_as; 21 | static_cast(capture); 22 | }; 23 | 24 | template 25 | struct regex_capture_base 26 | { 27 | static constexpr bool has_name() noexcept 28 | { 29 | return !std::is_same_v; 30 | } 31 | 32 | static constexpr auto name() noexcept -> std::string_view 33 | { 34 | if constexpr (has_name()) 35 | return Name::value; 36 | return ""; 37 | } 38 | }; 39 | 40 | /** 41 | * Class that holds a view into the content captured by a regex group. 42 | * 43 | * @tparam Iter The forward iterator type used to access the input 44 | * @tparam Name The name of the capturing group (optional) 45 | */ 46 | template 47 | struct regex_capture_view : regex_capture_base 48 | { 49 | static constexpr bool is_contiguous_view = std::contiguous_iterator; 50 | 51 | constexpr regex_capture_view() noexcept = default; 52 | 53 | constexpr explicit regex_capture_view(Iter begin, Iter end) noexcept 54 | : _begin{begin}, _end{end} 55 | {} 56 | 57 | constexpr void clear() noexcept 58 | { 59 | _end = _begin; 60 | } 61 | 62 | constexpr bool is_empty() const noexcept 63 | { 64 | return _begin == _end; 65 | } 66 | 67 | constexpr auto length() const noexcept -> std::size_t 68 | { 69 | return std::distance(_begin, _end); 70 | } 71 | 72 | constexpr auto begin() const noexcept 73 | { 74 | return _begin; 75 | } 76 | 77 | constexpr auto end() const noexcept 78 | { 79 | return _end; 80 | } 81 | 82 | [[nodiscard]] constexpr auto content() const noexcept(is_contiguous_view) 83 | { 84 | if constexpr (is_contiguous_view) 85 | return std::string_view{_begin, _end}; 86 | else 87 | return std::string{_begin, _end}; 88 | } 89 | 90 | constexpr auto &operator[](std::size_t index) noexcept 91 | requires std::random_access_iterator 92 | { 93 | return _begin[index]; 94 | } 95 | 96 | constexpr auto const &operator[](std::size_t index) const noexcept 97 | requires std::random_access_iterator 98 | { 99 | return _begin[index]; 100 | } 101 | 102 | constexpr explicit operator bool() const noexcept 103 | { 104 | return is_empty(); 105 | } 106 | 107 | constexpr explicit(false) operator std::string_view() const noexcept 108 | requires std::contiguous_iterator 109 | { 110 | return {_begin, _end}; 111 | } 112 | 113 | private: 114 | Iter _begin{}; 115 | Iter _end{}; 116 | }; 117 | 118 | /** 119 | * Class that holds ownership on the content captured by a regex group. 120 | * 121 | * @tparam Name The name of the capturing group (optional) 122 | */ 123 | template 124 | struct regex_capture : regex_capture_base 125 | { 126 | regex_capture() noexcept = default; 127 | 128 | template 129 | explicit regex_capture(regex_capture_view const &capture_view) 130 | : _capture{capture_view.begin(), capture_view.end()} 131 | {} 132 | 133 | bool is_empty() const noexcept 134 | { 135 | return _capture.empty(); 136 | } 137 | 138 | auto length() const noexcept -> std::size_t 139 | { 140 | return _capture.length(); 141 | } 142 | 143 | auto begin() noexcept 144 | { 145 | return _capture.begin(); 146 | } 147 | 148 | auto begin() const noexcept 149 | { 150 | return _capture.cbegin(); 151 | } 152 | 153 | auto end() noexcept 154 | { 155 | return _capture.end(); 156 | } 157 | 158 | auto end() const noexcept 159 | { 160 | return _capture.cend(); 161 | } 162 | 163 | auto &content() & noexcept 164 | { 165 | return _capture; 166 | } 167 | 168 | auto const &content() const & noexcept 169 | { 170 | return _capture; 171 | } 172 | 173 | auto &&content() && noexcept 174 | { 175 | return _capture; 176 | } 177 | 178 | auto const &&content() const && noexcept 179 | { 180 | return _capture; 181 | } 182 | 183 | auto &operator[](std::size_t index) noexcept 184 | { 185 | return _capture[index]; 186 | } 187 | 188 | auto const &operator[](std::size_t index) const noexcept 189 | { 190 | return _capture[index]; 191 | } 192 | 193 | explicit operator bool() const noexcept 194 | { 195 | return is_empty(); 196 | } 197 | 198 | explicit(false) operator std::string_view() const noexcept 199 | { 200 | return _capture; 201 | } 202 | 203 | private: 204 | std::string _capture; 205 | }; 206 | 207 | /** 208 | * Type trait used to check if a given capture type is an instance of 209 | * the regex_capture_view template. 210 | * 211 | * @tparam Capture The regex capture type 212 | */ 213 | template 214 | inline constexpr bool is_capture_view = false; 215 | 216 | template 217 | inline constexpr bool is_capture_view> = true; 218 | 219 | /** 220 | * Metafunction used to rename a given regex capture type using pattern matching. 221 | * 222 | * @tparam Capture The capture type to be renamed 223 | * @tparam Name The new name for the capture type 224 | */ 225 | template 226 | struct rename_capture; 227 | 228 | template 229 | struct rename_capture, Name> 230 | { 231 | using type = regex_capture_view; 232 | }; 233 | 234 | template 235 | struct rename_capture, Name> 236 | { 237 | using type = regex_capture; 238 | }; 239 | 240 | template 241 | using rename_capture_t = typename rename_capture::type; 242 | } 243 | 244 | template 245 | std::ostream &operator<<(std::ostream &os, Capture const &capture) 246 | { 247 | std::copy(capture.begin(), capture.end(), std::ostream_iterator{os}); 248 | return os; 249 | } 250 | #endif //MREGEX_REGEX_CAPTURE_HPP -------------------------------------------------------------------------------- /include/mregex/regex_capture_storage.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_REGEX_CAPTURE_STORAGE_HPP 2 | #define MREGEX_REGEX_CAPTURE_STORAGE_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta 8 | { 9 | namespace detail 10 | { 11 | template 12 | constexpr auto make_regex_capture_view_storage(type_sequence) noexcept 13 | { 14 | return std::tuple ...>{}; 15 | } 16 | 17 | template 18 | constexpr auto make_regex_capture_storage(type_sequence) noexcept 19 | { 20 | return std::tuple ...>{}; 21 | } 22 | } 23 | 24 | /** 25 | * Concept used to constrain a type that stores regex captures. 26 | */ 27 | template 28 | concept capture_storage = requires(CaptureStorage captures) 29 | { 30 | { std::tuple_size_v> } -> std::convertible_to; 31 | { std::get<0>(captures) } -> captured_content; 32 | }; 33 | 34 | /** 35 | * Storage type that holds views into regex captures. 36 | * 37 | * @tparam Regex The regex type used for matching 38 | * @tparam Iter The forward iterator type used to view the captured content 39 | */ 40 | template 41 | using regex_capture_view_storage = decltype(detail::make_regex_capture_view_storage(regex_capture_name_spec_t{})); 42 | 43 | /** 44 | * Storage type that holds memory-owning regex captures. 45 | * 46 | * @tparam Regex The regex type used for matching 47 | */ 48 | template 49 | using regex_capture_storage = decltype(detail::make_regex_capture_storage(regex_capture_name_spec_t{})); 50 | } 51 | #endif //MREGEX_REGEX_CAPTURE_STORAGE_HPP 52 | -------------------------------------------------------------------------------- /include/mregex/regex_flags.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_REGEX_FLAGS_HPP 2 | #define MREGEX_REGEX_FLAGS_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | enum struct regex_flag 9 | { 10 | // Makes matching case-insensitive 11 | icase, 12 | // Makes wildcards (.) match line break characters 13 | dotall, 14 | // Makes anchors ($ and ^) also match the beginning/end of lines 15 | multiline, 16 | // Swaps the behavior of lazy and greedy quantifiers 17 | ungreedy, 18 | // Enables the compiler to be more aggressive when unrolling repetition loops 19 | unroll 20 | }; 21 | 22 | /** 23 | * Metacontainer for a sequence of flags. 24 | */ 25 | template 26 | struct regex_flag_sequence 27 | { 28 | template 29 | static constexpr bool test = ((Flag == Flags) || ...); 30 | 31 | static constexpr bool icase = test; 32 | static constexpr bool dotall = test; 33 | static constexpr bool multiline = test; 34 | static constexpr bool ungreedy = test; 35 | static constexpr bool unroll = test; 36 | }; 37 | } 38 | #endif //MREGEX_REGEX_FLAGS_HPP -------------------------------------------------------------------------------- /include/mregex/regex_match_context.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_REGEX_MATCH_CONTEXT_HPP 2 | #define MREGEX_REGEX_MATCH_CONTEXT_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta 8 | { 9 | /** 10 | * Data structure that holds static information about the regex as well as 11 | * dynamic information required for matching. 12 | * 13 | * @tparam Regex The regex type used for matching 14 | * @tparam Iter The forward iterator type used to access the input 15 | */ 16 | template 17 | struct regex_match_context 18 | { 19 | using regex_type = Regex; 20 | using iterator = Iter; 21 | using ast_type = regex_ast_t; 22 | using flags = regex_flags; 23 | 24 | regex_capture_view_storage captures{}; 25 | 26 | constexpr void clear() noexcept 27 | { 28 | iterate_tuple(captures, [](auto &capture) noexcept { 29 | capture.clear(); 30 | }); 31 | } 32 | }; 33 | } 34 | #endif //MREGEX_REGEX_MATCH_CONTEXT_HPP -------------------------------------------------------------------------------- /include/mregex/regex_methods.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_REGEX_METHODS_HPP 2 | #define MREGEX_REGEX_METHODS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta 9 | { 10 | /** 11 | * Method used for exact matching of a range. 12 | */ 13 | template 14 | struct regex_match_method 15 | { 16 | using regex_type = Regex; 17 | using ast_type = regex_ast_t; 18 | 19 | template 20 | static constexpr auto invoke(Iter begin, Iter end, Iter current, Context &ctx) noexcept -> ast::match_result 21 | { 22 | auto result = ast_type::match(begin, end, current, ctx, continuations::equals(end)); 23 | std::get<0>(ctx.captures) = regex_capture_view{current, result.end}; 24 | return result; 25 | } 26 | }; 27 | 28 | /** 29 | * Method used for matching the prefix of a range. 30 | */ 31 | template 32 | struct regex_match_prefix_method 33 | { 34 | using regex_type = Regex; 35 | using ast_type = regex_ast_t; 36 | 37 | template 38 | static constexpr auto invoke(Iter begin, Iter end, Iter current, Context &ctx) noexcept -> ast::match_result 39 | { 40 | auto result = ast_type::match(begin, end, current, ctx, continuations::success); 41 | std::get<0>(ctx.captures) = regex_capture_view{current, result.end}; 42 | return result; 43 | } 44 | }; 45 | 46 | /** 47 | * Method used for searching the first match in a range. 48 | */ 49 | template 50 | struct regex_search_method 51 | { 52 | using regex_type = Regex; 53 | using ast_type = regex_ast_t; 54 | 55 | template 56 | static constexpr auto invoke(Iter begin, Iter end, Iter current, Context &ctx) noexcept -> ast::match_result 57 | { 58 | for (;; ++current) 59 | { 60 | if (auto result = ast_type::match(begin, end, current, ctx, continuations::success)) 61 | { 62 | std::get<0>(ctx.captures) = regex_capture_view{current, result.end}; 63 | return result; 64 | } 65 | if (current == end) 66 | break; 67 | } 68 | return ast::non_match(current); 69 | } 70 | }; 71 | } 72 | #endif //MREGEX_REGEX_METHODS_HPP -------------------------------------------------------------------------------- /include/mregex/regex_traits.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_REGEX_TRAITS_HPP 2 | #define MREGEX_REGEX_TRAITS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta 9 | { 10 | template 11 | using regex_ast_t = typename Regex::ast_type; 12 | 13 | template 14 | using regex_flags = typename Regex::flags; 15 | 16 | template 17 | using regex_tokenizer_t = typename Regex::template tokenizer_type; 18 | 19 | template 20 | using regex_searcher_t = typename Regex::template searcher_type; 21 | 22 | template 23 | using regex_token_range_t = typename Regex::template token_range_type; 24 | 25 | template 26 | using regex_match_range_t = typename Regex::template match_range_type; 27 | 28 | // Builds a capture name specification for the entire regex, including the implicit capturing group 29 | template 30 | using regex_capture_name_spec_t = push_t>, symbol::unnamed>; 31 | } 32 | #endif //MREGEX_REGEX_TRAITS_HPP -------------------------------------------------------------------------------- /include/mregex/symbols/actions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_SYMBOLS_ACTIONS_HPP 2 | #define MREGEX_SYMBOLS_ACTIONS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::symbol 9 | { 10 | /** 11 | * Symbols which indicate a semantic action for the parser. 12 | * Typically they are used to update the state of the AST during parsing. 13 | */ 14 | 15 | // Base type for symbols which require a semantic action 16 | struct semantic_action {}; 17 | 18 | struct make_sequence : semantic_action {}; 19 | struct make_alternation : semantic_action {}; 20 | struct make_set : semantic_action {}; 21 | struct make_set_from_current_char : semantic_action {}; 22 | struct make_set_from_stack : semantic_action {}; 23 | struct make_range : semantic_action {}; 24 | struct make_range_from_stack : semantic_action {}; 25 | struct make_digit : semantic_action {}; 26 | struct make_word : semantic_action {}; 27 | struct make_linebreak : semantic_action {}; 28 | struct make_negated : semantic_action {}; 29 | struct make_empty : semantic_action {}; 30 | struct make_beginning_of_line : semantic_action {}; 31 | struct make_beginning_of_input : semantic_action {}; 32 | struct make_end_of_line : semantic_action {}; 33 | struct make_end_of_input : semantic_action {}; 34 | struct make_word_boundary : semantic_action {}; 35 | struct make_whitespace : semantic_action {}; 36 | struct make_wildcard : semantic_action {}; 37 | struct make_literal : semantic_action {}; 38 | 39 | template 40 | struct push_literal : semantic_action {}; 41 | 42 | template 43 | struct make_capture : semantic_action {}; 44 | 45 | template 46 | struct make_repetition : semantic_action {}; 47 | 48 | template 49 | using make_star = make_repetition, infinity>; 50 | 51 | template 52 | using make_plus = make_repetition, infinity>; 53 | 54 | template 55 | using make_optional = make_repetition, quantifier_value<1>>; 56 | 57 | using make_atomic = make_repetition, quantifier_value<1>>; 58 | 59 | template 60 | struct make_backref : semantic_action {}; 61 | 62 | template 63 | struct make_named_backref : semantic_action {}; 64 | 65 | template 66 | struct make_lookaround : semantic_action {}; 67 | 68 | /** 69 | * Type trait used to distinguish semantic action symbols from other symbols. 70 | * 71 | * @tparam Symbol The symbolic type 72 | */ 73 | template 74 | inline constexpr bool is_semantic_action = std::is_base_of_v; 75 | } 76 | #endif //MREGEX_SYMBOLS_ACTIONS_HPP -------------------------------------------------------------------------------- /include/mregex/symbols/core.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_SYMBOLS_CORE_HPP 2 | #define MREGEX_SYMBOLS_CORE_HPP 3 | 4 | #include 5 | 6 | namespace meta::symbol 7 | { 8 | /** 9 | * Core symbols used in grammar definitions, parsing, AST and matching. 10 | */ 11 | 12 | // Symbols for parsing tokens 13 | template 14 | struct character {}; 15 | 16 | struct empty {}; 17 | 18 | template 19 | struct expect {}; 20 | 21 | // Symbols for parsing basic operators 22 | struct begin {}; 23 | struct esc {}; 24 | struct seq {}; 25 | struct alt {}; 26 | struct alt_seq {}; 27 | struct mod {}; 28 | 29 | // Symbols for parsing groups 30 | struct group_begin_or_mod {}; 31 | struct group_mod {}; 32 | struct group_mod_less_than {}; 33 | struct group_begin {}; 34 | 35 | // Symbols for parsing sets 36 | struct set_begin_or_neg {}; 37 | struct set_begin {}; 38 | struct set_seq {}; 39 | struct set_esc {}; 40 | struct set_range_begin {}; 41 | struct set_range_seq {}; 42 | struct set_range_esc {}; 43 | 44 | // Symbol for parsing backreferences 45 | struct backref_name_begin {}; 46 | 47 | template 48 | struct backref_id {}; 49 | 50 | template 51 | struct backref_name_seq {}; 52 | 53 | // Symbols for parsing named captures 54 | template 55 | struct capture_name_seq {}; 56 | 57 | // Symbols for parsing hexadecimal escape sequences 58 | template 59 | struct hex_esc_seq {}; 60 | } 61 | #endif //MREGEX_SYMBOLS_CORE_HPP -------------------------------------------------------------------------------- /include/mregex/symbols/names.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_SYMBOLS_NAMES_HPP 2 | #define MREGEX_SYMBOLS_NAMES_HPP 3 | 4 | #include 5 | 6 | namespace meta::symbol 7 | { 8 | /** 9 | * Symbols used to parse and build named AST nodes. 10 | */ 11 | 12 | template 13 | struct name 14 | { 15 | static constexpr auto value = static_cast(Name); 16 | }; 17 | 18 | struct unnamed {}; 19 | 20 | /** 21 | * Metafunction that converts a pack of characters to a symbolic name type. 22 | */ 23 | template 24 | using make_name = name()>; 25 | } 26 | #endif //MREGEX_SYMBOLS_NAMES_HPP -------------------------------------------------------------------------------- /include/mregex/symbols/quantifiers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_SYMBOLS_QUANTIFIERS_HPP 2 | #define MREGEX_SYMBOLS_QUANTIFIERS_HPP 3 | 4 | #include 5 | 6 | namespace meta::symbol 7 | { 8 | /** 9 | * Symbols for parsing and working with quantifiers. 10 | */ 11 | 12 | struct quantifier_begin {}; 13 | 14 | template 15 | struct quantifier_mod {}; 16 | 17 | struct unquantifiable {}; 18 | struct infinity {}; 19 | 20 | template 21 | struct quantifier_value {}; 22 | 23 | /** 24 | * Type trait used to check if a type is a symbolic quantifier. 25 | * 26 | * @tparam Symbol The symbolic type 27 | */ 28 | template 29 | inline constexpr bool is_quantifier = false; 30 | 31 | template 32 | inline constexpr bool is_quantifier> = true; 33 | 34 | template<> 35 | inline constexpr bool is_quantifier = true; 36 | 37 | template 38 | concept quantifier = is_quantifier; 39 | 40 | /** 41 | * Type trait that checks if a symbolic quantifier is equivalent to infinity. 42 | * 43 | * @tparam Symbol The symbolic quantifier 44 | */ 45 | template 46 | inline constexpr bool is_infinity = false; 47 | 48 | template<> 49 | inline constexpr bool is_infinity = true; 50 | 51 | template 52 | concept finite_quantifier = quantifier && !is_infinity; 53 | 54 | /** 55 | * Type trait that checks if a symbolic quantifier is equivalent to 0. 56 | * 57 | * @tparam Symbol The symbolic quantifier 58 | */ 59 | template 60 | inline constexpr bool is_zero = false; 61 | 62 | template<> 63 | inline constexpr bool is_zero> = true; 64 | 65 | /** 66 | * Type trait that checks if a pair of quantifiers forms a valid range (interval). 67 | * 68 | * @tparam A The beginning of the range 69 | * @tparam B The end (inclusive) of the range 70 | */ 71 | template 72 | inline constexpr bool is_valid_range = false; 73 | 74 | template 75 | inline constexpr bool is_valid_range, quantifier_value> = A <= B; 76 | 77 | template 78 | inline constexpr bool is_valid_range, infinity> = true; 79 | 80 | /** 81 | * Metafunction that projects a finite symbolic quantifier to its numerical value. 82 | */ 83 | template 84 | inline constexpr std::size_t get_value = {}; 85 | 86 | template 87 | inline constexpr std::size_t get_value> = N; 88 | 89 | /** 90 | * Predicate that checks if a symbolic quantifier is equal to some integral value. 91 | */ 92 | template 93 | inline constexpr auto equals = [](std::size_t) noexcept -> bool { return false; }; 94 | 95 | template 96 | inline constexpr auto equals> = [](std::size_t value) noexcept -> bool { return value == N; }; 97 | 98 | /** 99 | * Metafunction used to decrement symbolic quantifiers. 100 | */ 101 | template 102 | struct decrement; 103 | 104 | template 105 | requires (N > 0) 106 | struct decrement> 107 | { 108 | using type = quantifier_value; 109 | }; 110 | 111 | template<> 112 | struct decrement 113 | { 114 | using type = infinity; 115 | }; 116 | 117 | template 118 | using decrement_t = typename decrement::type; 119 | 120 | /** 121 | * Metafunction that handles subtraction of symbolic quantifiers. 122 | */ 123 | template 124 | struct subtract; 125 | 126 | template 127 | requires (A >= B) 128 | struct subtract, quantifier_value> 129 | { 130 | using type = quantifier_value; 131 | }; 132 | 133 | template 134 | struct subtract> 135 | { 136 | using type = infinity; 137 | }; 138 | 139 | template 140 | using subtract_t = typename subtract::type; 141 | 142 | /** 143 | * Metacontainer for the upper and lower bound of a range. 144 | */ 145 | template 146 | struct quantifier_range {}; 147 | } 148 | #endif //MREGEX_SYMBOLS_QUANTIFIERS_HPP -------------------------------------------------------------------------------- /include/mregex/utility/char_traits.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_CHAR_TRAITS_HPP 2 | #define MREGEX_UTILITY_CHAR_TRAITS_HPP 3 | 4 | namespace meta 5 | { 6 | namespace detail 7 | { 8 | /** 9 | * In ASCII encoding, each lowercase letter is offset by 32 from its uppercase equivalent. 10 | * Setting the fifth bit of an alphabetical character will make it lowercase. 11 | */ 12 | inline constexpr std::uint8_t lowercase_bit_mask = 0x20; 13 | } 14 | 15 | /** 16 | * Checks if the given ASCII code is in the specified range (inclusive). 17 | * This should be used instead of directly comparing chars since char is a signed type. 18 | */ 19 | template 20 | constexpr bool is_in_ascii_range(std::uint8_t input) noexcept 21 | { 22 | return A <= input && input <= B; 23 | } 24 | 25 | constexpr char flip_lowercase_bit(char input) noexcept 26 | { 27 | return static_cast(input ^ detail::lowercase_bit_mask); 28 | } 29 | 30 | constexpr char set_lowercase_bit(char input) noexcept 31 | { 32 | return static_cast(input | detail::lowercase_bit_mask); 33 | } 34 | 35 | constexpr bool is_digit(char input) noexcept 36 | { 37 | return is_in_ascii_range<'0', '9'>(input); 38 | } 39 | 40 | constexpr bool is_hexdigit(char input) noexcept 41 | { 42 | return is_digit(input) || is_in_ascii_range<'a', 'f'>(set_lowercase_bit(input)); 43 | } 44 | 45 | constexpr bool is_lower(char input) noexcept 46 | { 47 | return is_in_ascii_range<'a', 'z'>(input); 48 | } 49 | 50 | constexpr bool is_upper(char input) noexcept 51 | { 52 | return is_in_ascii_range<'A', 'Z'>(input); 53 | } 54 | 55 | constexpr bool is_alpha(char input) noexcept 56 | { 57 | return is_lower(set_lowercase_bit(input)); 58 | } 59 | 60 | constexpr bool is_word(char input) noexcept 61 | { 62 | return is_alpha(input) || is_digit(input) || input == '_'; 63 | } 64 | 65 | constexpr char to_lower(char input) noexcept 66 | { 67 | return is_upper(input) ? flip_lowercase_bit(input) : input; 68 | } 69 | 70 | constexpr char to_upper(char input) noexcept 71 | { 72 | return is_lower(input) ? flip_lowercase_bit(input) : input; 73 | } 74 | 75 | constexpr char invert_case(char input) noexcept 76 | { 77 | return is_alpha(input) ? flip_lowercase_bit(input) : input; 78 | } 79 | } 80 | #endif //MREGEX_UTILITY_CHAR_TRAITS_HPP -------------------------------------------------------------------------------- /include/mregex/utility/concepts.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_CONCEPTS_HPP 2 | #define MREGEX_UTILITY_CONCEPTS_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | /** 9 | * Concept used to constrain the generic type accepted by matching/searching functions. 10 | */ 11 | template 12 | concept char_range = std::ranges::forward_range && requires (Range range) 13 | { 14 | { *std::begin(range) } -> std::convertible_to; 15 | }; 16 | 17 | /** 18 | * Concept used to constrain a functor type that can be called to generate 19 | * values convertible to bool. 20 | */ 21 | template 22 | concept bool_testable_generator = requires (Generator generator) 23 | { 24 | static_cast(generator()); 25 | }; 26 | } 27 | #endif //MREGEX_UTILITY_CONCEPTS_HPP -------------------------------------------------------------------------------- /include/mregex/utility/continuations.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_CONTINUATIONS_HPP 2 | #define MREGEX_UTILITY_CONTINUATIONS_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | template 9 | concept match_continuation = std::forward_iterator && requires (Cont cont, Iter current) 10 | { 11 | { cont(current) } -> std::same_as>; 12 | }; 13 | 14 | /** 15 | * Struct containing static lambda expressions used as continuations for regex matching. 16 | * Calling a continuation is equivalent to evaluating the rest of the regex from 17 | * the calling point onwards. 18 | */ 19 | template 20 | struct continuations 21 | { 22 | static constexpr auto success = [](Iter next) noexcept -> ast::match_result { 23 | return {next, true}; 24 | }; 25 | 26 | static constexpr auto equals = [](Iter target) noexcept { 27 | return [=](Iter next) noexcept -> ast::match_result { 28 | return {next, next == target}; 29 | }; 30 | }; 31 | }; 32 | } 33 | #endif //MREGEX_UTILITY_CONTINUATIONS_HPP -------------------------------------------------------------------------------- /include/mregex/utility/distance.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_DISTANCE_HPP 2 | #define MREGEX_UTILITY_DISTANCE_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | template 9 | constexpr bool distance_less_than(Iter begin, Iter end) noexcept 10 | { 11 | std::ptrdiff_t offset = 0; 12 | while (offset != N && begin != end) 13 | { 14 | ++offset; 15 | ++begin; 16 | } 17 | return offset != N; 18 | } 19 | 20 | template 21 | constexpr bool distance_less_than(Iter begin, Iter end) noexcept 22 | { 23 | return std::distance(begin, end) < N; 24 | } 25 | 26 | template 27 | constexpr bool distance_less_than(std::ptrdiff_t required_dist, Iter begin, Iter end) noexcept 28 | { 29 | std::ptrdiff_t offset = 0; 30 | while (offset != required_dist && begin != end) 31 | { 32 | ++offset; 33 | ++begin; 34 | } 35 | return offset != required_dist; 36 | } 37 | 38 | template 39 | constexpr bool distance_less_than(std::ptrdiff_t required_dist, Iter begin, Iter end) noexcept 40 | { 41 | return std::distance(begin, end) < required_dist; 42 | } 43 | } 44 | #endif //MREGEX_UTILITY_DISTANCE_HPP -------------------------------------------------------------------------------- /include/mregex/utility/enums.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_ENUMS_HPP 2 | #define MREGEX_UTILITY_ENUMS_HPP 3 | 4 | namespace meta 5 | { 6 | /** 7 | * Enumerator which lists the possible match modes 8 | * for various regex quantifiers. 9 | */ 10 | enum struct match_mode 11 | { 12 | greedy, 13 | lazy, 14 | possessive 15 | }; 16 | 17 | /** 18 | * Enumerator which lists line sensitivity modes 19 | * for matching regex patterns. 20 | */ 21 | enum struct line_mode 22 | { 23 | singleline, 24 | multiline 25 | }; 26 | 27 | /** 28 | * Enumerator which lists the possible directions in which 29 | * lookaround assertions can match. 30 | */ 31 | enum struct lookaround_direction 32 | { 33 | ahead, 34 | behind 35 | }; 36 | } 37 | #endif //MREGEX_UTILITY_ENUMS_HPP -------------------------------------------------------------------------------- /include/mregex/utility/input_range_adapter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_INPUT_RANGE_ADAPTER_HPP 2 | #define MREGEX_UTILITY_INPUT_RANGE_ADAPTER_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta 8 | { 9 | /** 10 | * Adapter that allows iterating through the results returned by a generating function. 11 | * The generator is required to return a boolean-convertible value to signal the end of iteration. 12 | * The generator may be called once when constructing this object. 13 | * 14 | * @tparam Generator The callable type used to generate the data 15 | */ 16 | template 17 | struct input_range_adapter : private Generator 18 | { 19 | // All top-level qualifiers are removed to avoid storing references and const members inside the struct. 20 | using value_type = std::remove_cvref_t>; 21 | 22 | static constexpr bool is_nothrow_invocable = std::is_nothrow_invocable_v; 23 | 24 | template 25 | constexpr explicit input_range_adapter(Gen &&generator) 26 | noexcept(std::is_nothrow_constructible_v && is_nothrow_invocable) 27 | : Generator{std::forward(generator)}, _current_result{Generator::operator()()} 28 | {} 29 | 30 | struct iterator 31 | { 32 | using value_type = typename input_range_adapter::value_type; 33 | using pointer = std::add_pointer_t; 34 | using reference = std::add_lvalue_reference_t; 35 | using difference_type = std::ptrdiff_t; 36 | using iterator_category = std::input_iterator_tag; 37 | 38 | /** 39 | * Note: this default constructor is technically not required for the iterator 40 | * to satisfy std::input_iterator. However, due to a wrong implementation in some older versions of 41 | * the standard library (https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p2325r3.html), 42 | * a default constructor is required to satisfy the std::weakly_incrementable concept. 43 | */ 44 | constexpr iterator() noexcept = default; 45 | 46 | constexpr iterator(input_range_adapter &generator) noexcept 47 | : _target{&generator} 48 | {} 49 | 50 | constexpr reference operator*() const noexcept 51 | { 52 | return _target->current_result(); 53 | } 54 | 55 | constexpr pointer operator->() const noexcept 56 | { 57 | return &_target->current_result(); 58 | } 59 | 60 | constexpr iterator &operator++() noexcept(is_nothrow_invocable) 61 | { 62 | _target->compute_next(); 63 | return *this; 64 | } 65 | 66 | constexpr iterator operator++(int) noexcept(is_nothrow_invocable) 67 | { 68 | iterator old_iter{_target}; 69 | this->operator++(); 70 | return old_iter; 71 | } 72 | 73 | constexpr explicit operator bool() const noexcept 74 | { 75 | return _target->active(); 76 | } 77 | 78 | constexpr bool operator==(std::default_sentinel_t) const noexcept 79 | { 80 | return !_target->active(); 81 | } 82 | 83 | private: 84 | input_range_adapter *_target{nullptr}; 85 | }; 86 | 87 | /** 88 | * Returns an input iterator to the current element of the generator. 89 | * Calling this multiple times will usually result in different values 90 | * since the range provides input iterators only. 91 | * 92 | * @return An input iterator pointing to the first element of the range 93 | */ 94 | constexpr auto begin() noexcept 95 | { 96 | return iterator{*this}; 97 | } 98 | 99 | /** 100 | * Returns a sentinel object for the iterator of the range. 101 | * This sentinel can only be used to compare for equality with an iterator. 102 | * 103 | * @return A sentinel object suitable for the iterator of the range 104 | */ 105 | constexpr auto end() const noexcept 106 | { 107 | return std::default_sentinel; 108 | } 109 | 110 | constexpr bool active() const noexcept 111 | { 112 | return static_cast(_current_result); 113 | } 114 | 115 | constexpr auto ¤t_result() & noexcept 116 | { 117 | return _current_result; 118 | } 119 | 120 | constexpr auto const ¤t_result() const & noexcept 121 | { 122 | return _current_result; 123 | } 124 | 125 | constexpr auto &¤t_result() && noexcept 126 | { 127 | return std::move(_current_result); 128 | } 129 | 130 | constexpr auto const &¤t_result() const && noexcept 131 | { 132 | return std::move(_current_result); 133 | } 134 | 135 | constexpr void compute_next() noexcept(is_nothrow_invocable) 136 | { 137 | _current_result = Generator::operator()(); 138 | } 139 | 140 | private: 141 | value_type _current_result; 142 | }; 143 | 144 | template 145 | input_range_adapter(Gen &&) -> input_range_adapter>; 146 | } 147 | #endif //MREGEX_UTILITY_INPUT_RANGE_ADAPTER_HPP -------------------------------------------------------------------------------- /include/mregex/utility/reversed_range_view.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_REVERSED_RANGE_VIEW_HPP 2 | #define MREGEX_UTILITY_REVERSED_RANGE_VIEW_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | /** 9 | * Minimalist view that reverses the normal iteration direction on a given bidirectional range. 10 | * This adapter is used as a workaround since std::views::reverse isn't working in libstdc++. 11 | * 12 | * @tparam Range The bidirectional range type to be reversed 13 | */ 14 | template 15 | struct reversed_range_view 16 | { 17 | using value_type = std::ranges::range_value_t; 18 | using iterator = std::reverse_iterator>; 19 | 20 | constexpr explicit reversed_range_view(Range &range) noexcept 21 | : _begin{std::make_reverse_iterator(std::end(range))}, 22 | _end{std::make_reverse_iterator(std::begin(range))} 23 | {} 24 | 25 | constexpr auto begin() const noexcept 26 | { 27 | return _begin; 28 | } 29 | 30 | constexpr auto end() const noexcept 31 | { 32 | return _end; 33 | } 34 | 35 | private: 36 | iterator _begin; 37 | iterator _end; 38 | }; 39 | } 40 | #endif //MREGEX_UTILITY_REVERSED_RANGE_VIEW_HPP -------------------------------------------------------------------------------- /include/mregex/utility/static_string.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_STATIC_STRING_HPP 2 | #define MREGEX_UTILITY_STATIC_STRING_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta 9 | { 10 | /** 11 | * Helper data structure that implements a constexpr string as a literal type. 12 | * 13 | * @note The string is not null-terminated. 14 | * 15 | * @tparam N The length of the string 16 | */ 17 | template 18 | struct static_string 19 | { 20 | std::array data{}; 21 | 22 | /** 23 | * Constructs a static string from a null-terminated array of characters. 24 | * 25 | * @note The null terminator is not copied into the string. 26 | */ 27 | constexpr static_string(char const (&str)[N + 1]) noexcept 28 | { 29 | if constexpr (N != 0) 30 | std::copy(str, str + N, std::begin(data)); 31 | } 32 | 33 | constexpr auto length() const noexcept -> std::size_t 34 | { 35 | return N; 36 | } 37 | 38 | constexpr auto begin() noexcept 39 | { 40 | return std::begin(data); 41 | } 42 | 43 | constexpr auto begin() const noexcept 44 | { 45 | return std::cbegin(data); 46 | } 47 | 48 | constexpr auto end() noexcept 49 | { 50 | return std::end(data); 51 | } 52 | 53 | constexpr auto end() const noexcept 54 | { 55 | return std::cend(data); 56 | } 57 | 58 | constexpr explicit operator std::string_view() const noexcept 59 | { 60 | return std::string_view(std::cbegin(data), N); 61 | } 62 | 63 | constexpr char operator[](std::size_t i) const noexcept 64 | { 65 | return data[i]; 66 | } 67 | }; 68 | 69 | template 70 | static_string(char const (&)[N]) -> static_string; 71 | 72 | /** 73 | * Constructs a static string from a pack of characters. 74 | * 75 | * @note No null-termination is assumed, all characters become part of the string. 76 | */ 77 | template 78 | constexpr auto make_static_string() noexcept -> static_string 79 | { 80 | return {{Chars ...}}; 81 | } 82 | } 83 | 84 | template 85 | std::ostream &operator<<(std::ostream &os, meta::static_string const &str) 86 | { 87 | return os << static_cast(str); 88 | } 89 | #endif //MREGEX_UTILITY_STATIC_STRING_HPP -------------------------------------------------------------------------------- /include/mregex/utility/tuple.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_TUPLE_HPP 2 | #define MREGEX_UTILITY_TUPLE_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta 8 | { 9 | namespace detail 10 | { 11 | template 12 | constexpr auto transform_tuple(Tuple const &tuple, Func &&func, std::index_sequence) 13 | { 14 | return std::tuple{std::invoke(func, std::get(tuple)) ...}; 15 | } 16 | 17 | template 18 | constexpr void iterate_tuple(Tuple &&tuple, Func &&func, std::index_sequence) 19 | { 20 | (std::invoke(func, std::get(std::forward(tuple))), ...); 21 | } 22 | } 23 | 24 | /** 25 | * Creates a new tuple by applying an invocable object on each element of 26 | * another tuple. 27 | * 28 | * @param tuple The original tuple 29 | * @param func The function invoked to transform each tuple element 30 | * @return A new std::tuple that contains the generated elements 31 | */ 32 | template 33 | [[nodiscard]] constexpr auto transform_tuple(Tuple const &tuple, Func &&func) 34 | { 35 | return detail::transform_tuple( 36 | tuple, 37 | std::forward(func), 38 | std::make_index_sequence>>{} 39 | ); 40 | } 41 | 42 | /** 43 | * Iterates a tuple-like container and invokes a function on each element. 44 | * 45 | * @param tuple The tuple to be iterated 46 | * @param func The function invoked on each tuple element 47 | */ 48 | template 49 | constexpr void iterate_tuple(Tuple &&tuple, Func &&func) 50 | { 51 | detail::iterate_tuple( 52 | std::forward(tuple), 53 | std::forward(func), 54 | std::make_index_sequence>>{} 55 | ); 56 | } 57 | } 58 | #endif //MREGEX_UTILITY_TUPLE_HPP -------------------------------------------------------------------------------- /include/mregex/utility/type_sequence.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_TYPE_SEQUENCE_HPP 2 | #define MREGEX_UTILITY_TYPE_SEQUENCE_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | /** 9 | * Metacontainer for a sequence of types. 10 | */ 11 | template 12 | struct type_sequence {}; 13 | 14 | /** 15 | * Checks if a given type sequence has no elements. 16 | */ 17 | template 18 | inline constexpr bool is_empty = false; 19 | 20 | template<> 21 | inline constexpr bool is_empty> = true; 22 | 23 | template 24 | struct front; 25 | 26 | template 27 | struct front> 28 | { 29 | using type = First; 30 | }; 31 | 32 | template<> 33 | struct front> 34 | { 35 | using type = symbol::empty; 36 | }; 37 | 38 | /** 39 | * Queries the type at the front of the sequence. 40 | * Returns symbol::empty for empty sequences. 41 | */ 42 | template 43 | using front_t = typename front::type; 44 | 45 | template 46 | struct push; 47 | 48 | template 49 | struct push, T> 50 | { 51 | using type = type_sequence; 52 | }; 53 | 54 | template 55 | struct push, type_sequence> 56 | { 57 | using type = type_sequence; 58 | }; 59 | 60 | /** 61 | * Pushes a type to the front of the type sequence. 62 | * If the type is another type sequence, its elements are pushed instead. 63 | */ 64 | template 65 | using push_t = typename push::type; 66 | 67 | template 68 | struct pop; 69 | 70 | template 71 | struct pop> 72 | { 73 | using type = type_sequence; 74 | }; 75 | 76 | template<> 77 | struct pop> 78 | { 79 | using type = type_sequence<>; 80 | }; 81 | 82 | /** 83 | * Removes a type from the front of the type sequence. 84 | * Has no effect on empty sequences. 85 | */ 86 | template 87 | using pop_t = typename pop::type; 88 | 89 | template 90 | struct concat; 91 | 92 | template<> 93 | struct concat<> 94 | { 95 | using type = type_sequence<>; 96 | }; 97 | 98 | template 99 | struct concat 100 | { 101 | using type = Sequence; 102 | }; 103 | 104 | template 105 | struct concat, type_sequence> 106 | { 107 | using type = type_sequence; 108 | }; 109 | 110 | template 111 | struct concat, type_sequence, type_sequence> 112 | { 113 | using type = type_sequence; 114 | }; 115 | 116 | template 117 | struct concat, type_sequence, type_sequence, type_sequence, Sequences ...> 118 | { 119 | using type = typename concat, Sequences ...>::type; 120 | }; 121 | 122 | /** 123 | * Concatenates multiple type sequences. 124 | */ 125 | template 126 | using concat_t = typename concat::type; 127 | 128 | template 129 | struct reverse_type_pack 130 | { 131 | using type = type_sequence; 132 | }; 133 | 134 | template 135 | struct reverse_type_pack 136 | { 137 | using type = type_sequence; 138 | }; 139 | 140 | template 141 | struct reverse_type_pack 142 | { 143 | using type = type_sequence; 144 | }; 145 | 146 | template 147 | struct reverse_type_pack 148 | { 149 | using type = concat_t::type, type_sequence>; 150 | }; 151 | 152 | /** 153 | * Reverses the order of elements in a template type pack. 154 | */ 155 | template 156 | using reverse_type_pack_t = typename reverse_type_pack::type; 157 | 158 | template 159 | struct reverse; 160 | 161 | template 162 | struct reverse> 163 | { 164 | using type = reverse_type_pack_t; 165 | }; 166 | 167 | /** 168 | * Reverses the order of elements in a type sequence. 169 | */ 170 | template 171 | using reverse_t = typename reverse::type; 172 | } 173 | #endif //MREGEX_UTILITY_TYPE_SEQUENCE_HPP -------------------------------------------------------------------------------- /include/mregex/utility/type_traits.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_UTILITY_TYPE_TRAITS_HPP 2 | #define MREGEX_UTILITY_TYPE_TRAITS_HPP 3 | 4 | #include 5 | 6 | namespace meta 7 | { 8 | /** 9 | * Type trait that checks if T is an rvalue reference to an object that is non-trivially destructible. 10 | * This usually indicates a temporary object which needs to free memory upon destruction. 11 | * Although this does not guarantee that T has dynamically allocated memory, it is a good heuristic. 12 | * 13 | * @tparam T The (possibly CV-qualified) type 14 | */ 15 | template 16 | inline constexpr bool is_expiring_memory_owner = std::is_rvalue_reference_v && 17 | !std::is_trivially_destructible_v>; 18 | } 19 | #endif //MREGEX_UTILITY_TYPE_TRAITS_HPP -------------------------------------------------------------------------------- /include/mregex/xpr/adapters.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_XPR_ADAPTERS_HPP 2 | #define MREGEX_XPR_ADAPTERS_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace meta::xpr 8 | { 9 | namespace detail 10 | { 11 | template 12 | constexpr auto to_sequence(std::index_sequence) noexcept 13 | { 14 | return ast::sequence ...>{}; 15 | } 16 | 17 | template 18 | constexpr auto flatten(Wrapper) noexcept -> Wrapper { return {}; } 19 | 20 | template typename Wrapper, typename First, typename... Second> 21 | constexpr auto flatten(Wrapper>) noexcept -> Wrapper { return {}; } 22 | 23 | template typename Wrapper, typename... First, typename Second> 24 | constexpr auto flatten(Wrapper, Second>) noexcept -> Wrapper { return {}; } 25 | 26 | template typename Wrapper, typename... First, typename... Second> 27 | constexpr auto flatten(Wrapper, Wrapper>) noexcept -> Wrapper { return {}; } 28 | } 29 | 30 | /** 31 | * Converts a static string into an instance of ast::sequence. 32 | * 33 | * @tparam String The static string to be converted 34 | * @return An instance of ast::sequence equivalent to the original string 35 | */ 36 | template 37 | constexpr auto to_sequence() noexcept 38 | { 39 | return detail::to_sequence(std::make_index_sequence{}); 40 | } 41 | 42 | /** 43 | * Wraps an Abstract Syntax Tree in a type which provides a high-level regex API. 44 | * 45 | * @tparam AST The Abstract Syntax Tree of the regular expression 46 | * @return A wrapper object which provides regex methods 47 | */ 48 | template 49 | constexpr auto to_regex(AST) noexcept -> regex_adapter { return {}; } 50 | 51 | /** 52 | * Packs a sequence of elements into a wrapper that holds a single type. 53 | * If the list contains more than one type, it is wrapped into an ast::sequence. 54 | * 55 | * @tparam Wrapper The type of the wrapper 56 | * @tparam Nodes The list of elements to be wrapped 57 | * @return An instance of the wrapper type 58 | */ 59 | template typename Wrapper, typename... Nodes> 60 | constexpr auto pack_sequence(Nodes...) noexcept -> Wrapper> { return {}; } 61 | 62 | template typename Wrapper, typename Node> 63 | constexpr auto pack_sequence(Node) noexcept -> Wrapper { return {}; } 64 | 65 | /** 66 | * Wraps a sequence of elements into a wrapper type and flattens the result. 67 | * Flattening will remove any redundant wrappers from the resulting type. 68 | * 69 | * @tparam Wrapper The type of the wrapper 70 | * @tparam First The first element in the sequence 71 | * @tparam Second The second element in the sequence 72 | * @tparam Rest The rest of the sequence 73 | * @return A flattened instance of the wrapper type 74 | */ 75 | template typename Wrapper, typename First, typename Second, typename... Rest> 76 | constexpr auto flat_wrap_sequence(First, Second, Rest... rest) noexcept 77 | { 78 | constexpr auto wrapped = detail::flatten(Wrapper{}); 79 | if constexpr(sizeof...(Rest) == 0) 80 | return wrapped; 81 | else 82 | return flat_wrap_sequence(wrapped, rest ...); 83 | } 84 | 85 | template typename Wrapper, typename Node> 86 | constexpr auto flat_wrap_sequence(Node) noexcept -> Node { return {}; } 87 | 88 | template typename Wrapper> 89 | constexpr auto flat_wrap_sequence() noexcept -> Wrapper<> { return {}; } 90 | } 91 | #endif //MREGEX_XPR_ADAPTERS_HPP -------------------------------------------------------------------------------- /include/mregex/xpr/components.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_XPR_COMPONENTS_HPP 2 | #define MREGEX_XPR_COMPONENTS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace meta::xpr 9 | { 10 | /** 11 | * Expression-based API for constructing a regular expression. 12 | */ 13 | 14 | // Sequences 15 | template 16 | constexpr auto concat(regex_adapter...) noexcept 17 | { 18 | return to_regex(flat_wrap_sequence(Nodes{} ...)); 19 | } 20 | 21 | // Alternations 22 | template 23 | constexpr auto either(regex_adapter...) noexcept 24 | { 25 | return to_regex(flat_wrap_sequence(Nodes{} ...)); 26 | } 27 | 28 | // Atomics 29 | template 30 | constexpr auto atomic(regex_adapter...) noexcept 31 | { 32 | return to_regex(pack_sequence(Nodes{} ...)); 33 | } 34 | 35 | // Captures 36 | template 37 | constexpr auto capture(regex_adapter...) noexcept 38 | { 39 | using wrapper_provider = capture_provider>; 40 | return to_regex(pack_sequence(Nodes{} ...)); 41 | } 42 | 43 | template 44 | constexpr auto capture(regex_adapter...) noexcept 45 | { 46 | using wrapper_provider = capture_provider; 47 | return to_regex(pack_sequence(Nodes{} ...)); 48 | } 49 | 50 | // Repetition 51 | template 52 | constexpr auto between(regex_adapter...) noexcept 53 | { 54 | using wrapper_provider = repetition_provider, symbol::quantifier_value>; 55 | return to_regex(pack_sequence(Nodes{} ...)); 56 | } 57 | 58 | template 59 | constexpr auto between(regex_adapter... expressions) noexcept 60 | { 61 | return between(expressions ...); 62 | } 63 | 64 | template 65 | constexpr auto at_least(regex_adapter...) noexcept 66 | { 67 | using wrapper_provider = repetition_provider, symbol::infinity>; 68 | return to_regex(pack_sequence(Nodes{} ...)); 69 | } 70 | 71 | template 72 | constexpr auto at_least(regex_adapter... expressions) noexcept 73 | { 74 | return at_least(expressions ...); 75 | } 76 | 77 | // Fixed repetition 78 | template 79 | constexpr auto exactly(regex_adapter... expressions) noexcept 80 | { 81 | return between(expressions ...); 82 | } 83 | 84 | template 85 | constexpr auto exactly(regex_adapter... expressions) noexcept 86 | { 87 | return between(expressions ...); 88 | } 89 | 90 | // Kleene star 91 | template 92 | constexpr auto zero_or_more(regex_adapter... expressions) noexcept 93 | { 94 | return at_least<0, Mode>(expressions ...); 95 | } 96 | 97 | template 98 | constexpr auto zero_or_more(regex_adapter... expressions) noexcept 99 | { 100 | return at_least<0>(expressions ...); 101 | } 102 | 103 | // Plus 104 | template 105 | constexpr auto one_or_more(regex_adapter... expressions) noexcept 106 | { 107 | return at_least<1, Mode>(expressions ...); 108 | } 109 | 110 | template 111 | constexpr auto one_or_more(regex_adapter... expressions) noexcept 112 | { 113 | return at_least<1>(expressions ...); 114 | } 115 | 116 | // Optional 117 | template 118 | constexpr auto maybe(regex_adapter... expressions) noexcept 119 | { 120 | return between<0, 1, Mode>(expressions ...); 121 | } 122 | 123 | template 124 | constexpr auto maybe(regex_adapter... expressions) noexcept 125 | { 126 | return between<0, 1>(expressions ...); 127 | } 128 | 129 | // Terminals 130 | inline constexpr auto empty = regex_adapter{}; 131 | inline constexpr auto nothing = regex_adapter{}; 132 | inline constexpr auto begin_line = regex_adapter{}; 133 | inline constexpr auto end_line = regex_adapter{}; 134 | inline constexpr auto begin_input = regex_adapter{}; 135 | inline constexpr auto end_input = regex_adapter{}; 136 | inline constexpr auto word_boundary = regex_adapter{}; 137 | inline constexpr auto whitespace = regex_adapter{}; 138 | inline constexpr auto wildcard = regex_adapter{}; 139 | inline constexpr auto lower = regex_adapter{}; 140 | inline constexpr auto upper = regex_adapter{}; 141 | inline constexpr auto digit = regex_adapter{}; 142 | inline constexpr auto word = regex_adapter{}; 143 | inline constexpr auto linebreak = regex_adapter{}; 144 | 145 | template 146 | inline constexpr auto chr = regex_adapter>{}; 147 | 148 | template 149 | inline constexpr auto str = to_regex(to_sequence()); 150 | 151 | template 152 | inline constexpr auto range = regex_adapter>{}; 153 | 154 | template 155 | inline constexpr auto group = regex_adapter>{}; 156 | 157 | template 158 | inline constexpr auto group_named = regex_adapter>{}; 159 | 160 | template 161 | constexpr auto negate(regex_adapter) noexcept -> regex_adapter> { return {}; } 162 | 163 | // Lookarounds 164 | template 165 | constexpr auto ahead(regex_adapter...) noexcept 166 | { 167 | return to_regex(pack_sequence(Nodes{} ...)); 168 | } 169 | 170 | template 171 | constexpr auto behind(regex_adapter...) noexcept 172 | { 173 | return to_regex(pack_sequence(Nodes{} ...)); 174 | } 175 | 176 | template 177 | inline constexpr auto regex = meta::regex{}; 178 | } 179 | #endif //MREGEX_XPR_COMPONENTS_HPP -------------------------------------------------------------------------------- /include/mregex/xpr/flags.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_XPR_FLAGS_HPP 2 | #define MREGEX_XPR_FLAGS_HPP 3 | 4 | #include 5 | 6 | namespace meta::xpr 7 | { 8 | /** 9 | * Adds a sequence of flags to a given regular expression. 10 | */ 11 | template 12 | constexpr auto add_flags(Regex) noexcept -> typename Regex::template add_flags { return {}; } 13 | 14 | /** 15 | * Clears all flags from a given regular expression. 16 | */ 17 | template 18 | constexpr auto clear_flags(Regex) noexcept -> typename Regex::clear_flags { return {}; } 19 | } 20 | #endif //MREGEX_XPR_FLAGS_HPP -------------------------------------------------------------------------------- /include/mregex/xpr/operators.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_XPR_OPERATORS_HPP 2 | #define MREGEX_XPR_OPERATORS_HPP 3 | 4 | #include 5 | 6 | namespace meta::xpr::operators 7 | { 8 | template 9 | constexpr auto operator>>(regex_adapter lhs, regex_adapter rhs) noexcept 10 | { 11 | return concat(lhs, rhs); 12 | } 13 | 14 | template 15 | constexpr auto operator|(regex_adapter lhs, regex_adapter rhs) noexcept 16 | { 17 | return either(lhs, rhs); 18 | } 19 | 20 | template 21 | constexpr auto operator!(regex_adapter expression) noexcept 22 | { 23 | return negate(expression); 24 | } 25 | 26 | template 27 | constexpr auto operator*(regex_adapter expression) noexcept 28 | { 29 | return zero_or_more(expression); 30 | } 31 | 32 | template 33 | constexpr auto operator+(regex_adapter expression) noexcept 34 | { 35 | return one_or_more(expression); 36 | } 37 | } 38 | #endif //MREGEX_XPR_OPERATORS_HPP -------------------------------------------------------------------------------- /include/mregex/xpr/providers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MREGEX_XPR_PROVIDERS_HPP 2 | #define MREGEX_XPR_PROVIDERS_HPP 3 | 4 | #include 5 | 6 | namespace meta::xpr 7 | { 8 | /** 9 | * Provider metafunctions used to partially evaluate the declaration of AST nodes. 10 | */ 11 | 12 | template 13 | struct capture_provider 14 | { 15 | template 16 | using type = ast::capture; 17 | }; 18 | 19 | template 20 | struct repetition_provider 21 | { 22 | template 23 | using type = ast::basic_repetition; 24 | }; 25 | } 26 | #endif //MREGEX_XPR_PROVIDERS_HPP -------------------------------------------------------------------------------- /test/.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: 2 | -readability-simplify-boolean-expr, 3 | -readability-magic-numbers 4 | InheritParentConfig: true -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Test source files 2 | set( 3 | test_sources 4 | tests.cpp 5 | ast_indexing_tests.cpp 6 | ast_inversion_tests.cpp 7 | ast_traits_tests.cpp 8 | iterator_api_tests.cpp 9 | match_result_tests.cpp 10 | parser_accept_tests.cpp 11 | parser_ast_tests.cpp 12 | parser_error_tests.cpp 13 | regex_iteration_tests.cpp 14 | regex_match_tests.cpp 15 | regex_search_tests.cpp 16 | regex_traits_tests.cpp 17 | static_string_tests.cpp 18 | symbol_tests.cpp 19 | type_traits_tests.cpp 20 | type_sequence_tests.cpp 21 | xpr_ast_tests.cpp 22 | ) 23 | 24 | # Add target to run all tests 25 | add_executable(tests ${test_sources}) 26 | set_target_properties(tests PROPERTIES LINKER_LANGUAGE CXX) 27 | target_link_libraries(tests PUBLIC mregex) 28 | set_property(TARGET tests PROPERTY CXX_STANDARD 20) -------------------------------------------------------------------------------- /test/ast_indexing_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using namespace ast; 6 | 7 | namespace 8 | { 9 | template 10 | inline constexpr bool is_same_after_indexing = std::is_same_v, AST>; 11 | 12 | template 13 | inline constexpr bool is_indexing_of = std::is_same_v, Expected>; 14 | 15 | template 16 | struct make_capture 17 | { 18 | using type = unnamed_capture>; 19 | }; 20 | 21 | template 22 | struct make_capture 23 | { 24 | using type = unnamed_capture; 25 | }; 26 | 27 | template 28 | struct make_capture 29 | { 30 | using type = unnamed_capture; 31 | }; 32 | 33 | template 34 | using id = typename make_capture::type; 35 | 36 | template 37 | using group = typename make_capture<0, Nodes ...>::type; 38 | } 39 | 40 | static_assert(is_same_after_indexing); 41 | static_assert(is_same_after_indexing); 42 | static_assert(is_same_after_indexing>); 43 | static_assert(is_same_after_indexing); 44 | static_assert(is_same_after_indexing); 45 | static_assert(is_same_after_indexing>); 46 | static_assert(is_same_after_indexing>); 47 | static_assert(is_same_after_indexing>>); 48 | static_assert(is_same_after_indexing>>); 49 | static_assert(is_same_after_indexing>>); 50 | static_assert(is_same_after_indexing, digit>>); 51 | static_assert(is_same_after_indexing>, digit>>); 52 | 53 | static_assert( 54 | is_indexing_of 55 | < 56 | group>, 57 | id<1, id<2>> 58 | > 59 | ); 60 | static_assert( 61 | is_indexing_of 62 | < 63 | sequence, group<>>, 64 | sequence, id<2>> 65 | > 66 | ); 67 | static_assert( 68 | is_indexing_of 69 | < 70 | group>>>, 71 | id<1, id<2, id<3, id<4>>>> 72 | > 73 | ); 74 | static_assert( 75 | is_indexing_of 76 | < 77 | sequence>>>, group>>>>, 78 | sequence>>>, id<5, id<6, id<7, id<8>>>>> 79 | > 80 | ); 81 | static_assert( 82 | is_indexing_of 83 | < 84 | group, group<>, group<>>, 85 | id<1, id<2>, id<3>, id<4>> 86 | > 87 | ); 88 | static_assert( 89 | is_indexing_of 90 | < 91 | group, group<>>, group<>, group<>, group, group>, group<>>>, 92 | id<1, id<2, id<3>, id<4>>, id<5>, id<6>, id<7, id<8>, id<9, id<10>>, id<11>>> 93 | > 94 | ); 95 | } -------------------------------------------------------------------------------- /test/ast_inversion_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using namespace ast; 6 | 7 | namespace 8 | { 9 | template 10 | inline constexpr bool is_inversion = std::is_same_v, Expected>; 11 | } 12 | 13 | static_assert(is_inversion); 14 | static_assert(is_inversion, literal<'a'>>); 15 | static_assert( 16 | is_inversion 17 | < 18 | plus>, 19 | plus> 20 | > 21 | ); 22 | static_assert( 23 | is_inversion 24 | < 25 | unnamed_capture<1, literal<'a'>>, 26 | unnamed_capture<1, literal<'a'>> 27 | > 28 | ); 29 | static_assert( 30 | is_inversion 31 | < 32 | alternation>, 33 | alternation> 34 | > 35 | ); 36 | static_assert( 37 | is_inversion 38 | < 39 | repetition, symbol::quantifier_value<5>, literal<'a'>>, 40 | repetition, symbol::quantifier_value<5>, literal<'a'>> 41 | > 42 | ); 43 | static_assert( 44 | is_inversion 45 | < 46 | sequence>, 47 | sequence, wildcard> 48 | > 49 | ); 50 | static_assert( 51 | is_inversion 52 | < 53 | sequence, star>, literal<'c'>>, 54 | sequence, star>, literal<'a'>> 55 | > 56 | ); 57 | static_assert( 58 | is_inversion 59 | < 60 | sequence, literal<'b'>, literal<'c'>, literal<'d'>>, 61 | sequence, literal<'c'>, literal<'b'>, literal<'a'>> 62 | > 63 | ); 64 | static_assert( 65 | is_inversion 66 | < 67 | sequence, literal<'b'>, sequence>>, 68 | sequence, empty>, literal<'b'>, literal<'a'>> 69 | > 70 | ); 71 | static_assert( 72 | is_inversion 73 | < 74 | alternation, literal<'b'>, sequence>>, 75 | alternation, literal<'b'>, sequence, empty>> 76 | > 77 | ); 78 | static_assert( 79 | is_inversion 80 | < 81 | alternation, literal<'b'>, positive_lookahead>>>, 82 | alternation, literal<'b'>, positive_lookbehind>>> 83 | > 84 | ); 85 | static_assert( 86 | is_inversion 87 | < 88 | sequence 89 | < 90 | atomic>, 91 | optional 92 | < 93 | sequence 94 | < 95 | literal<'a'>, 96 | negative_lookbehind, 97 | literal<'b'> 98 | > 99 | >, 100 | literal<'y'> 101 | >, 102 | sequence 103 | < 104 | literal<'y'>, 105 | optional 106 | < 107 | sequence 108 | < 109 | literal<'b'>, 110 | negative_lookahead, 111 | literal<'a'> 112 | > 113 | >, 114 | atomic> 115 | > 116 | > 117 | ); 118 | } -------------------------------------------------------------------------------- /test/ast_traits_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using namespace ast; 6 | using symbol::name; 7 | using symbol::unnamed; 8 | 9 | namespace 10 | { 11 | template 12 | inline constexpr bool is_name_spec_of = std::is_same_v>, Expected>; 13 | } 14 | 15 | // Nodes which are trivially matchable by definition 16 | static_assert(is_trivially_matchable); 17 | static_assert(is_trivially_matchable>); 18 | static_assert(is_trivially_matchable>); 19 | static_assert(is_trivially_matchable); 20 | static_assert(is_trivially_matchable); 21 | static_assert(is_trivially_matchable>); 22 | static_assert(is_trivially_matchable, whitespace>>); 23 | // Nodes which should be deduced as trivially matchable 24 | static_assert(is_trivially_matchable>); 25 | static_assert(is_trivially_matchable>); 26 | static_assert(is_trivially_matchable>); 27 | static_assert(is_trivially_matchable>); 28 | static_assert(is_trivially_matchable, literal<'b'>>>); 29 | // Nodes which should not be detected as trivially matchable 30 | static_assert(is_trivially_matchable == false); 31 | static_assert(is_trivially_matchable == false); 32 | static_assert(is_trivially_matchable> == false); 33 | static_assert(is_trivially_matchable> == false); 34 | static_assert(is_trivially_matchable>> == false); 35 | static_assert(is_trivially_matchable> == false); 36 | static_assert(is_trivially_matchable>> == false); 37 | static_assert(is_trivially_matchable> == false); 38 | static_assert(is_trivially_matchable> == false); 39 | static_assert(is_trivially_matchable>> == false); 40 | static_assert(is_trivially_matchable, literal<'a'>, literal<'b'>>> == false); 41 | 42 | static_assert(is_name_spec_of<"", type_sequence<>>); 43 | static_assert(is_name_spec_of<"abc", type_sequence<>>); 44 | static_assert(is_name_spec_of<"a?b+|c*d", type_sequence<>>); 45 | static_assert(is_name_spec_of<"(a)", type_sequence>); 46 | static_assert(is_name_spec_of<"(a){5}", type_sequence>); 47 | static_assert(is_name_spec_of<"(a)+", type_sequence>); 48 | static_assert(is_name_spec_of<"a?(b+)c*", type_sequence>); 49 | static_assert(is_name_spec_of<"(a?)(b+(c{1,2}))c*", type_sequence>); 50 | static_assert(is_name_spec_of<"(?a)", type_sequence>>); 51 | static_assert(is_name_spec_of<"(?a)+", type_sequence>>); 52 | static_assert(is_name_spec_of<"(?a){5,10}", type_sequence>>); 53 | static_assert(is_name_spec_of<"(a?)(?b+)c*", type_sequence>>); 54 | static_assert(is_name_spec_of<"(a?)(?b+b(b))c*", type_sequence, unnamed>>); 55 | static_assert(is_name_spec_of<"(?=(?a)+)(?a)+)", type_sequence, name<"behind">>>); 56 | static_assert(is_name_spec_of<"(?a?)(?b+b(b))c*", type_sequence, name<"a2">, unnamed>>); 57 | static_assert( 58 | is_name_spec_of 59 | < 60 | "((?.)(?(.))((?.(?(?.).))(.)))", 61 | type_sequence, name<"a2">, unnamed, unnamed, name<"a3">, name<"a4">, name<"a5">, unnamed> 62 | > 63 | ); 64 | static_assert( 65 | is_name_spec_of 66 | < 67 | "(?.){5}(?.){5,10}?(?.){5,}+", 68 | type_sequence, name<"a2">, name<"a3">> 69 | > 70 | ); 71 | 72 | // Nodes which are zero length matchers by definition 73 | static_assert(is_zero_length_matcher); 74 | static_assert(is_zero_length_matcher>); 75 | static_assert(is_zero_length_matcher>); 76 | static_assert(is_zero_length_matcher); 77 | static_assert(is_zero_length_matcher); 78 | static_assert(is_zero_length_matcher); 79 | static_assert(is_zero_length_matcher); 80 | static_assert(is_zero_length_matcher); 81 | static_assert(is_zero_length_matcher>); 82 | static_assert(is_zero_length_matcher>); 83 | static_assert(is_zero_length_matcher>); 84 | static_assert(is_zero_length_matcher>); 85 | static_assert(is_zero_length_matcher>); 86 | // Nodes which should be deduced as zero length matchers 87 | static_assert(is_zero_length_matcher>); 88 | static_assert(is_zero_length_matcher>>); 89 | static_assert(is_zero_length_matcher>>>); 90 | static_assert(is_zero_length_matcher, empty>>); 91 | static_assert(is_zero_length_matcher>>); 92 | static_assert(is_zero_length_matcher, word_boundary>>); 93 | static_assert(is_zero_length_matcher>>); 94 | static_assert(is_zero_length_matcher>, star>>); 95 | static_assert(is_zero_length_matcher>, star>>>); 96 | // Nodes which should not be detected as zero length matchers 97 | static_assert(is_zero_length_matcher> == false); 98 | static_assert(is_zero_length_matcher> == false); 99 | static_assert(is_zero_length_matcher> == false); 100 | static_assert(is_zero_length_matcher> == false); 101 | static_assert(is_zero_length_matcher>> == false); 102 | static_assert(is_zero_length_matcher> == false); 103 | static_assert(is_zero_length_matcher>, plus>> == false); 104 | static_assert(is_zero_length_matcher>> == false); 105 | 106 | // Nodes which should be detected as assertions 107 | static_assert(is_assertion); 108 | static_assert(is_assertion); 109 | static_assert(is_assertion); 110 | static_assert(is_assertion); 111 | static_assert(is_assertion); 112 | static_assert(is_assertion>); 113 | static_assert(is_assertion>); 114 | static_assert(is_assertion>); 115 | static_assert(is_assertion>); 116 | static_assert(is_assertion>); 117 | static_assert(is_assertion>); 118 | // Nodes which should not be detected as assertions 119 | static_assert(is_assertion == false); 120 | static_assert(is_assertion> == false); 121 | static_assert(is_assertion> == false); 122 | static_assert(is_assertion>> == false); 123 | } -------------------------------------------------------------------------------- /test/iterator_api_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | namespace 6 | { 7 | template 8 | constexpr bool match_reverse(std::string_view sv) 9 | { 10 | using pattern = regex; 11 | return pattern::match(sv.crbegin(), sv.crend()) == true; 12 | } 13 | } 14 | 15 | static_assert(match_reverse("")); 16 | static_assert(match_reverse("a")); 17 | static_assert(match_reverse("")); 18 | static_assert(match_reverse("a")); 19 | static_assert(match_reverse("ba")); 20 | static_assert(match_reverse("cba")); 21 | static_assert(match_reverse("hello")); 22 | static_assert(match_reverse("hello")); 23 | static_assert(match_reverse("hello")); 24 | static_assert(match_reverse("bcccaa")); 25 | static_assert(match_reverse("bccaa")); 26 | static_assert(match_reverse("bcaa")); 27 | static_assert(match_reverse("[xyz ]ba[xyz ]")); 28 | 29 | static_assert(match_reverse("a") == false); 30 | static_assert(match_reverse("") == false); 31 | static_assert(match_reverse("aa") == false); 32 | static_assert(match_reverse("ab") == false); 33 | static_assert(match_reverse("abc") == false); 34 | static_assert(match_reverse("bccccaa") == false); 35 | static_assert(match_reverse("baa") == false); 36 | static_assert(match_reverse("bccca") == false); 37 | static_assert(match_reverse("[ xyz]ba[xyz ]") == false); 38 | } -------------------------------------------------------------------------------- /test/match_result_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using test_regex = regex<"test">; 6 | using result_type = match_result_view; 7 | 8 | namespace 9 | { 10 | inline constexpr std::string_view expected_match_content = "test"; 11 | 12 | constexpr bool is_expected_match(result_type const &res) noexcept 13 | { 14 | return res.matched() && 15 | res.length() == expected_match_content.length() && 16 | !res.group<0>().is_empty() && 17 | res == expected_match_content; 18 | } 19 | 20 | constexpr bool is_expected_non_match(result_type const &res) noexcept 21 | { 22 | return !res.matched() && 23 | res.length() == 0 && 24 | res.group<0>().is_empty(); 25 | } 26 | } 27 | 28 | static_assert(result_type::is_view); 29 | static_assert(result_type::group_count == 1); 30 | 31 | static_assert(is_expected_match(test_regex::match("test"))); 32 | static_assert(is_expected_non_match(test_regex::match("xxx"))); 33 | 34 | static_assert(is_expected_match(test_regex::match_prefix("test___"))); 35 | static_assert(is_expected_non_match(test_regex::match_prefix("_test"))); 36 | 37 | static_assert(is_expected_match(test_regex::search("___test___"))); 38 | static_assert(is_expected_non_match(test_regex::search("xxx"))); 39 | } -------------------------------------------------------------------------------- /test/parser_error_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta 4 | { 5 | namespace 6 | { 7 | template 8 | struct has_error 9 | { 10 | template< std::size_t I> 11 | static constexpr bool at_position = std::is_same_v, parsing::syntax_error>; 12 | }; 13 | } 14 | 15 | static_assert(has_error::at_position<1>); 16 | static_assert(has_error::at_position<0>); 17 | static_assert(has_error::at_position<1>); 18 | static_assert(has_error::at_position<0>); 19 | static_assert(has_error::at_position<0>); 20 | static_assert(has_error::at_position<0>); 21 | static_assert(has_error::at_position<1>); 22 | static_assert(has_error::at_position<2>); 23 | static_assert(has_error::at_position<2>); 24 | static_assert(has_error::at_position<3>); 25 | static_assert(has_error::at_position<3>); 26 | static_assert(has_error::at_position<5>); 27 | static_assert(has_error::at_position<2>); 28 | static_assert(has_error::at_position<3>); 29 | static_assert(has_error::at_position<5>); 30 | static_assert(has_error::at_position<4>); 31 | static_assert(has_error::at_position<5>); 32 | static_assert(has_error::at_position<2>); 33 | static_assert(has_error::at_position<2>); 34 | static_assert(has_error::at_position<3>); 35 | static_assert(has_error::at_position<3>); 36 | static_assert(has_error::at_position<12>); 37 | static_assert(has_errorx))">::at_position<3>); 38 | static_assert(has_errorx))">::at_position<7>); 39 | static_assert(has_error::at_position<8>); 40 | static_assert(has_error::at_position<3>); 41 | static_assert(has_error::at_position<7>); 42 | static_assert(has_error)">::at_position<2>); 43 | } -------------------------------------------------------------------------------- /test/regex_iteration_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | namespace 6 | { 7 | template 8 | constexpr auto match_count(std::string_view sv) 9 | { 10 | using pattern = regex; 11 | return std::ranges::distance(pattern::find_all(sv)); 12 | } 13 | 14 | template 15 | constexpr auto token_count(std::string_view sv) 16 | { 17 | using pattern = regex; 18 | return std::ranges::distance(pattern::tokenize(sv)); 19 | } 20 | } 21 | 22 | static_assert(match_count("") == 1); 23 | static_assert(match_count("a") == 1); 24 | static_assert(match_count("a") == 1); 25 | static_assert(match_count("") == 0); 26 | static_assert(match_count("a a a") == 3); 27 | static_assert(match_count("aaaaa") == 5); 28 | static_assert(match_count("b") == 0); 29 | static_assert(match_count("") == 1); 30 | static_assert(match_count("aa") == 2); 31 | static_assert(match_count("") == 0); 32 | static_assert(match_count("aa") == 1); 33 | static_assert(match_count("a aa aaa") == 3); 34 | static_assert(match_count("bbb ccc ddd") == 0); 35 | static_assert(match_count("a aa aaa") == 6); 36 | static_assert(match_count("a aa aaa") == 3); 37 | static_assert(match_count("aaaa aaa aa a aa aaa aaaa") == 7); 38 | static_assert(match_count("a") == 0); 39 | static_assert(match_count("abcd abcd") == 2); 40 | static_assert(match_count("abcd abcd") == 1); 41 | static_assert(match_count("abcd abcd") == 0); 42 | static_assert(match_count("abcd abc") == 1); 43 | static_assert(match_count("abcd abc") == 0); 44 | static_assert(match_count("abc") == 1); 45 | static_assert(match_count("abc") == 0); 46 | static_assert(match_count("abc def") == 1); 47 | static_assert(match_count("abcd abcd") == 1); 48 | static_assert(match_count("a aa aaa aaaa a a aaaaaa") == 4); 49 | static_assert(match_count("aab ab aa bb aaaaaabb aaaaaab") == 3); 50 | static_assert(match_count("a b c d\te\rf\ng") == 6); 51 | static_assert(match_count("123456") == 6); 52 | static_assert(match_count("") == 0); 53 | static_assert(match_count("a") == 1); 54 | static_assert(match_count("123456") == 1); 55 | static_assert(match_count("123456") == 6); 56 | static_assert(match_count("123456") == 1); 57 | 58 | static_assert(token_count("") == 1); 59 | static_assert(token_count("a") == 1); 60 | static_assert(token_count("a") == 1); 61 | static_assert(token_count("") == 0); 62 | static_assert(token_count("b") == 0); 63 | static_assert(token_count("ababab") == 1); 64 | static_assert(token_count("") == 1); 65 | static_assert(token_count("aa") == 2); 66 | static_assert(token_count("") == 0); 67 | static_assert(token_count("aa") == 1); 68 | static_assert(token_count("a aa aaa") == 1); 69 | static_assert(token_count("ababab") == 6); 70 | static_assert(token_count("ababab") == 1); 71 | static_assert(token_count("ababab") == 0); 72 | static_assert(token_count("a11 b22 c33") == 8); 73 | static_assert(token_count("a11 b2_ c33") == 4); 74 | } -------------------------------------------------------------------------------- /test/regex_search_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using uri_regex = regex; 6 | using email_regex = regex; 7 | using non_empty_regex = regex; 8 | 9 | static_assert(uri_regex::search("https://google.com")); 10 | static_assert(uri_regex::search(" https://google.com")); 11 | static_assert(uri_regex::search("https://google.com ")); 12 | static_assert(uri_regex::search("... https://google.com ...")); 13 | static_assert(uri_regex::search("... https://google.com\n ... https://youtube.com ...")); 14 | static_assert(email_regex::search("sample@gmail.com")); 15 | static_assert(email_regex::search(" sample@gmail.com")); 16 | static_assert(email_regex::search("sample@gmail.com ")); 17 | static_assert(email_regex::search("... sample@gmail.com ...")); 18 | static_assert(email_regex::search("... sample@gmail.com\n ... name.user@service.com ...")); 19 | static_assert(non_empty_regex::search(" ")); 20 | static_assert(non_empty_regex::search("a")); 21 | static_assert(non_empty_regex::search("0")); 22 | static_assert(non_empty_regex::search("abcd0123@#$%")); 23 | static_assert(regex<"^abc">::search("abcdefg")); 24 | static_assert(regex<"abc$">::search("... abc")); 25 | static_assert(regex<"^">::search("abcdefg")); 26 | static_assert(regex<"$">::search("... abc")); 27 | static_assert(regex<"^">::search("")); 28 | static_assert(regex<"$">::search("")); 29 | static_assert(regex<"^$">::search("")); 30 | static_assert(regex<"^$">::search("")); 31 | 32 | static_assert(uri_regex::search("https:// google.com") == false); 33 | static_assert(uri_regex::search("") == false); 34 | static_assert(uri_regex::search(" google.com ") == false); 35 | static_assert(uri_regex::search("... ") == false); 36 | static_assert(email_regex::search("samplegmail.com") == false); 37 | static_assert(email_regex::search("") == false); 38 | static_assert(email_regex::search(" samp @gmail.com") == false); 39 | static_assert(email_regex::search("...") == false); 40 | static_assert(non_empty_regex::search("") == false); 41 | static_assert(regex<"^abc">::search(" abcdefg") == false); 42 | static_assert(regex<"abc$">::search("... abc ") == false); 43 | static_assert(regex<" ^">::search("") == false); 44 | static_assert(regex<"$ ">::search("") == false); 45 | } -------------------------------------------------------------------------------- /test/regex_traits_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using symbol::name; 6 | using symbol::unnamed; 7 | 8 | using empty_regex = regex<"">; 9 | using uri_regex = regex; 10 | using email_regex = regex[^ @]+)@([^ @]+))">; 11 | 12 | using iterator = std::string_view::iterator; 13 | 14 | using uri_tokenizer_t = regex_tokenizer_t; 15 | using uri_searcher_t = regex_searcher_t; 16 | using uri_token_range_t = regex_token_range_t; 17 | using uri_match_range_t = regex_match_range_t; 18 | 19 | using empty_regex_with_flags = empty_regex::add_flags; 20 | static_assert(std::is_same_v, empty_regex_with_flags>); 21 | static_assert(regex_flags::dotall); 22 | static_assert(regex_flags::icase); 23 | static_assert(regex_flags::multiline == false); 24 | static_assert(regex_flags::ungreedy == false); 25 | 26 | static_assert(std::is_same_v, ast::empty>); 27 | static_assert(std::is_same_v, uri_regex::ast_type>); 28 | 29 | static_assert(std::is_same_v, type_sequence>); 30 | static_assert(std::is_same_v, type_sequence>); 31 | static_assert(std::is_same_v, type_sequence, unnamed>>); 32 | 33 | static_assert(std::is_same_v); 34 | static_assert(std::is_same_v == false); 35 | static_assert(std::is_same_v == false); 36 | static_assert(std::is_same_v == false); 37 | 38 | static_assert(std::is_same_v); 39 | static_assert(std::is_same_v == false); 40 | static_assert(std::is_same_v == false); 41 | static_assert(std::is_same_v == false); 42 | 43 | static_assert(std::is_same_v); 44 | static_assert(std::is_same_v == false); 45 | static_assert(std::is_same_v == false); 46 | static_assert(std::is_same_v == false); 47 | 48 | static_assert(std::is_same_v); 49 | static_assert(std::is_same_v == false); 50 | static_assert(std::is_same_v == false); 51 | static_assert(std::is_same_v == false); 52 | } -------------------------------------------------------------------------------- /test/static_string_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace meta::tests 5 | { 6 | namespace 7 | { 8 | template 9 | constexpr auto string_equals(std::string_view expected) noexcept 10 | { 11 | return static_cast(String) == expected; 12 | } 13 | 14 | template 15 | constexpr auto range_equals(std::initializer_list expected) noexcept 16 | { 17 | return std::equal(String.begin(), String.end(), expected.begin(), expected.end()); 18 | } 19 | } 20 | 21 | static_assert(static_string{""}.length() == 0); 22 | static_assert(static_string{"a"}.length() == 1); 23 | static_assert(static_string{"abc"}.length() == 3); 24 | static_assert(static_string{"abc\0"}.length() == 4); 25 | static_assert(static_string{"abc\0d"}.length() == 5); 26 | 27 | static_assert(string_equals<"">("")); 28 | static_assert(string_equals<"a">("a")); 29 | static_assert(string_equals<"abc">("abc")); 30 | static_assert(string_equals<"abc\0">({"abc\0", 4})); 31 | static_assert(string_equals<"abc\0d">({"abc\0d", 5})); 32 | 33 | static_assert(range_equals<"">({})); 34 | static_assert(range_equals<"a">({'a'})); 35 | static_assert(range_equals<"abc">({'a', 'b', 'c'})); 36 | static_assert(range_equals<"abc\0">({'a', 'b', 'c', '\0'})); 37 | static_assert(range_equals<"abc\0d">({'a', 'b', 'c', '\0', 'd'})); 38 | 39 | static_assert(range_equals({})); 40 | static_assert(range_equals()>({'a'})); 41 | static_assert(range_equals()>({'a', 'b', 'c'})); 42 | static_assert(range_equals()>({'a', 'b', 'c', '\0'})); 43 | static_assert(range_equals()>({'a', 'b', 'c', '\0', 'd'})); 44 | } -------------------------------------------------------------------------------- /test/symbol_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using zero = symbol::quantifier_value<0>; 6 | using one = symbol::quantifier_value<1>; 7 | using two = symbol::quantifier_value<2>; 8 | 9 | static_assert(symbol::is_quantifier); 10 | static_assert(symbol::is_quantifier); 11 | static_assert(symbol::is_quantifier == false); 12 | 13 | static_assert(symbol::is_zero); 14 | static_assert(symbol::is_zero == false); 15 | static_assert(symbol::is_zero == false); 16 | 17 | static_assert(symbol::is_infinity); 18 | static_assert(symbol::is_infinity == false); 19 | static_assert(symbol::is_infinity == false); 20 | 21 | static_assert(symbol::get_value == 0); 22 | static_assert(symbol::get_value == 1); 23 | static_assert(symbol::get_value == 2); 24 | 25 | static_assert(symbol::equals(1)); 26 | static_assert(symbol::equals(2) == false); 27 | static_assert(symbol::equals(0) == false); 28 | 29 | static_assert(std::is_same_v, one>); 30 | static_assert(std::is_same_v, zero>); 31 | static_assert(std::is_same_v, symbol::infinity>); 32 | 33 | static_assert(std::is_same_v, zero>); 34 | static_assert(std::is_same_v, one>); 35 | static_assert(std::is_same_v, symbol::infinity>); 36 | 37 | static_assert(std::is_same_v, symbol::name<"">>); 38 | static_assert(std::is_same_v, symbol::name<"a">>); 39 | static_assert(std::is_same_v, symbol::name<"ab">>); 40 | } 41 | -------------------------------------------------------------------------------- /test/tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | std::cout << "All compile-time tests were successful.\n"; 6 | } -------------------------------------------------------------------------------- /test/type_sequence_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | static_assert(std::is_same_v>, symbol::empty>); 6 | static_assert(std::is_same_v>, char>); 7 | static_assert(std::is_same_v>, type_sequence<>>); 8 | static_assert(std::is_same_v, int>>, type_sequence<>>); 9 | static_assert(std::is_same_v, type_sequence<>>, type_sequence<>>); 10 | static_assert(std::is_same_v, int>, type_sequence>); 11 | static_assert( 12 | std::is_same_v 13 | < 14 | push_t, double>, int>, char>, 15 | type_sequence 16 | > 17 | ); 18 | static_assert( 19 | std::is_same_v 20 | < 21 | pop_t, double>, int>, char>>>, 22 | type_sequence 23 | > 24 | ); 25 | static_assert( 26 | std::is_same_v 27 | < 28 | push_t, type_sequence>, 29 | type_sequence 30 | > 31 | ); 32 | static_assert( 33 | std::is_same_v 34 | < 35 | concat_t 36 | < 37 | type_sequence, 38 | type_sequence, 39 | type_sequence, 40 | type_sequence, 41 | type_sequence 42 | >, 43 | type_sequence 44 | > 45 | ); 46 | static_assert( 47 | std::is_same_v 48 | < 49 | reverse_t>, 50 | type_sequence<> 51 | > 52 | ); 53 | static_assert( 54 | std::is_same_v 55 | < 56 | reverse_t>, 57 | type_sequence 58 | > 59 | ); 60 | static_assert( 61 | std::is_same_v 62 | < 63 | reverse_t>, 64 | type_sequence 65 | > 66 | ); 67 | static_assert( 68 | std::is_same_v 69 | < 70 | reverse_t>, 71 | type_sequence 72 | > 73 | ); 74 | static_assert( 75 | std::is_same_v 76 | < 77 | reverse_t>, 78 | type_sequence 79 | > 80 | ); 81 | static_assert( 82 | std::is_same_v 83 | < 84 | reverse_t>, 85 | type_sequence 86 | > 87 | ); 88 | } -------------------------------------------------------------------------------- /test/type_traits_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | static_assert(is_expiring_memory_owner == false); 6 | static_assert(is_expiring_memory_owner == false); 7 | static_assert(is_expiring_memory_owner == false); 8 | static_assert(is_expiring_memory_owner == false); 9 | static_assert(is_expiring_memory_owner); 10 | static_assert(is_expiring_memory_owner); 11 | static_assert(is_expiring_memory_owner == false); 12 | static_assert(is_expiring_memory_owner == false); 13 | static_assert(is_expiring_memory_owner == false); 14 | static_assert(is_expiring_memory_owner == false); 15 | static_assert(is_expiring_memory_owner == false); 16 | static_assert(is_expiring_memory_owner == false); 17 | } -------------------------------------------------------------------------------- /test/xpr_ast_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace meta::tests 4 | { 5 | using namespace ast; 6 | using namespace xpr::operators; 7 | 8 | namespace 9 | { 10 | template 11 | inline constexpr bool is_ast_of = std::is_same_v, Expected>; 12 | } 13 | 14 | static_assert(is_ast_of); 15 | static_assert(is_ast_of), literal<'a'>>); 16 | static_assert(is_ast_of)), literal<'a'>>); 17 | static_assert( 18 | is_ast_of 19 | < 20 | decltype(xpr::chr<'a'> >> xpr::chr<'b'>), 21 | sequence 22 | < 23 | literal<'a'>, 24 | literal<'b'> 25 | > 26 | > 27 | ); 28 | static_assert( 29 | is_ast_of 30 | < 31 | decltype(xpr::chr<'a'> >> xpr::chr<'b'> >> xpr::chr<'c'>), 32 | sequence 33 | < 34 | literal<'a'>, 35 | literal<'b'>, 36 | literal<'c'> 37 | > 38 | > 39 | ); 40 | static_assert( 41 | is_ast_of 42 | < 43 | decltype(xpr::concat(xpr::chr<'a'>,xpr::chr<'b'>,xpr::chr<'c'>)), 44 | sequence 45 | < 46 | literal<'a'>, 47 | literal<'b'>, 48 | literal<'c'> 49 | > 50 | > 51 | ); 52 | static_assert( 53 | is_ast_of 54 | < 55 | decltype( 56 | xpr::concat( 57 | xpr::chr<'a'>, 58 | xpr::str<"bcd">, 59 | xpr::chr<'e'>, 60 | xpr::wildcard >> xpr::word, 61 | xpr::lower | xpr::upper, 62 | xpr::concat( 63 | xpr::chr<'f'>, 64 | xpr::chr<'g'> 65 | ), 66 | xpr::regex<"hi"> 67 | ) 68 | ), 69 | sequence 70 | < 71 | literal<'a'>, 72 | literal<'b'>, 73 | literal<'c'>, 74 | literal<'d'>, 75 | literal<'e'>, 76 | wildcard, 77 | word, 78 | alternation, 79 | literal<'f'>, 80 | literal<'g'>, 81 | literal<'h'>, 82 | literal<'i'> 83 | > 84 | > 85 | ); 86 | static_assert( 87 | is_ast_of 88 | < 89 | decltype(xpr::str<"abc">), 90 | sequence 91 | < 92 | literal<'a'>, 93 | literal<'b'>, 94 | literal<'c'> 95 | > 96 | > 97 | ); 98 | static_assert( 99 | is_ast_of 100 | < 101 | decltype(not xpr::word >> xpr::str<"abc">), 102 | sequence 103 | < 104 | negated, 105 | literal<'a'>, 106 | literal<'b'>, 107 | literal<'c'> 108 | > 109 | > 110 | ); 111 | static_assert( 112 | is_ast_of 113 | < 114 | decltype(xpr::str<"ab"> >> xpr::zero_or_more(!xpr::digit) >> xpr::str<"xy">), 115 | sequence 116 | < 117 | literal<'a'>, 118 | literal<'b'>, 119 | star>, 120 | literal<'x'>, 121 | literal<'y'> 122 | > 123 | > 124 | ); 125 | static_assert( 126 | is_ast_of 127 | < 128 | decltype(xpr::regex<"ab?c+"> >> xpr::regex<".*">), 129 | sequence 130 | < 131 | literal<'a'>, 132 | optional>, 133 | plus>, 134 | star 135 | > 136 | > 137 | ); 138 | static_assert( 139 | is_ast_of 140 | < 141 | decltype(xpr::chr<'a'> | xpr::chr<'b'>), 142 | alternation 143 | < 144 | literal<'a'>, 145 | literal<'b'> 146 | > 147 | > 148 | ); 149 | static_assert( 150 | is_ast_of 151 | < 152 | decltype(xpr::chr<'a'> | xpr::chr<'b'> | xpr::chr<'c'>), 153 | alternation 154 | < 155 | literal<'a'>, 156 | literal<'b'>, 157 | literal<'c'> 158 | > 159 | > 160 | ); 161 | static_assert( 162 | is_ast_of 163 | < 164 | decltype(xpr::either(xpr::chr<'a'>, xpr::chr<'b'>, xpr::chr<'c'>)), 165 | alternation 166 | < 167 | literal<'a'>, 168 | literal<'b'>, 169 | literal<'c'> 170 | > 171 | > 172 | ); 173 | static_assert( 174 | is_ast_of 175 | < 176 | decltype(xpr::atomic(xpr::chr<'a'>, xpr::chr<'b'>, xpr::chr<'c'>)), 177 | atomic 178 | < 179 | sequence 180 | < 181 | literal<'a'>, 182 | literal<'b'>, 183 | literal<'c'> 184 | > 185 | > 186 | > 187 | ); 188 | static_assert( 189 | is_ast_of 190 | < 191 | decltype(xpr::str<"ab"> >> (xpr::chr<'c'> | xpr::chr<'d'>)), 192 | sequence 193 | < 194 | literal<'a'>, 195 | literal<'b'>, 196 | alternation 197 | < 198 | literal<'c'>, 199 | literal<'d'> 200 | > 201 | > 202 | > 203 | ); 204 | static_assert( 205 | is_ast_of 206 | < 207 | decltype(xpr::begin_line >> xpr::str<"ab"> >> +(xpr::chr<'c'> | xpr::chr<'d'>) >> xpr::end_line), 208 | sequence 209 | < 210 | beginning_of_line, 211 | literal<'a'>, 212 | literal<'b'>, 213 | plus 214 | < 215 | alternation 216 | < 217 | literal<'c'>, 218 | literal<'d'> 219 | > 220 | >, 221 | end_of_line 222 | > 223 | > 224 | ); 225 | static_assert( 226 | is_ast_of 227 | < 228 | decltype( 229 | xpr::exactly<5>( 230 | xpr::either( 231 | xpr::begin_line, 232 | xpr::chr<'a'> >> xpr::maybe(xpr::chr<'b'>), 233 | xpr::atomic(*xpr::str<"cd">), 234 | xpr::end_line 235 | ) 236 | ) 237 | ), 238 | fixed_repetition 239 | < 240 | 5, 241 | alternation 242 | < 243 | beginning_of_line, 244 | sequence 245 | < 246 | literal<'a'>, 247 | optional> 248 | >, 249 | atomic 250 | < 251 | star 252 | < 253 | sequence 254 | < 255 | literal<'c'>, 256 | literal<'d'> 257 | > 258 | > 259 | >, 260 | end_of_line 261 | > 262 | > 263 | > 264 | ); 265 | static_assert( 266 | is_ast_of 267 | < 268 | decltype( 269 | xpr::concat( 270 | xpr::ahead(xpr::one_or_more(xpr::word)), 271 | xpr::capture<1, "test">(xpr::wildcard), 272 | xpr::capture<2>(xpr::word), 273 | xpr::negate(xpr::behind(xpr::regex<"ab{2}c">)) 274 | ) 275 | ), 276 | sequence 277 | < 278 | positive_lookahead>, 279 | named_capture<1, "test", wildcard>, 280 | unnamed_capture<2, word>, 281 | negative_lookbehind 282 | < 283 | sequence 284 | < 285 | literal<'a'>, 286 | fixed_repetition<2, literal<'b'>>, 287 | literal<'c'> 288 | > 289 | > 290 | > 291 | > 292 | ); 293 | } --------------------------------------------------------------------------------