├── docs ├── .gitignore ├── requirements.txt ├── index.rst ├── Makefile ├── make.bat ├── conf.py ├── debugging.rst └── installation.rst ├── .gitignore ├── examples ├── CMakeLists.txt ├── ini │ ├── CMakeLists.txt │ ├── input.ini │ └── ini.cpp └── calculator │ ├── CMakeLists.txt │ └── calculator.cpp ├── include └── pog │ ├── pog.h │ ├── action.h │ ├── types │ ├── state_and_rule.h │ └── state_and_symbol.h │ ├── precedence.h │ ├── operations │ ├── operation.h │ ├── follow.h │ ├── read.h │ └── lookahead.h │ ├── relations │ ├── relation.h │ └── lookback.h │ ├── symbol.h │ ├── errors.h │ ├── utils.h │ ├── filter_view.h │ ├── token.h │ ├── parser_report.h │ ├── digraph_algo.h │ ├── rule.h │ ├── token_builder.h │ ├── item.h │ ├── rule_builder.h │ ├── state.h │ └── automaton.h ├── deps ├── CMakeLists.txt ├── fmt │ ├── fmt │ │ ├── support │ │ │ └── cmake │ │ │ │ ├── fmt-config.cmake.in │ │ │ │ ├── fmt.pc.in │ │ │ │ ├── FindSetEnv.cmake │ │ │ │ └── cxx14.cmake │ │ ├── LICENSE.rst │ │ ├── src │ │ │ └── format.cc │ │ └── include │ │ │ └── fmt │ │ │ ├── locale.h │ │ │ └── ostream.h │ └── CMakeLists.txt └── re2 │ ├── re2 │ ├── util │ │ ├── strutil.h │ │ ├── malloc_counter.h │ │ ├── fuzz.cc │ │ ├── flags.h │ │ ├── test.cc │ │ ├── mix.h │ │ ├── util.h │ │ ├── test.h │ │ ├── utf.h │ │ ├── logging.h │ │ ├── benchmark.cc │ │ ├── benchmark.h │ │ ├── mutex.h │ │ └── strutil.cc │ ├── re2 │ │ ├── pod_array.h │ │ ├── unicode_groups.h │ │ ├── stringpiece.cc │ │ ├── set.h │ │ ├── unicode_casefold.h │ │ ├── bitmap256.h │ │ ├── perl_groups.cc │ │ ├── prefilter.h │ │ ├── filtered_re2.cc │ │ ├── filtered_re2.h │ │ ├── set.cc │ │ └── prefilter_tree.h │ ├── LICENSE │ └── CMakeLists.txt │ └── CMakeLists.txt ├── tests ├── pog_tests.cpp ├── googletest │ └── CMakeLists.txt ├── test_parsing_table.cpp ├── CMakeLists.txt ├── test_symbol.cpp ├── test_utils.cpp ├── test_filter_view.cpp ├── test_precedence.cpp ├── test_token.cpp ├── test_rule.cpp ├── test_token_builder.cpp └── test_rule_builder.cpp ├── coverage.sh ├── share ├── pog.pc.in └── pog-config.cmake.in ├── cmake ├── Findre2.cmake └── fmt │ └── Findfmt.cmake ├── .travis.yml ├── LICENSE ├── .appveyor.yml ├── CHANGELOG.md └── README.md /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx==2.1.2 2 | sphinx_rtd_theme==0.4.3 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | install/ 3 | 4 | coverage.info 5 | 6 | .ycm* 7 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(calculator) 2 | add_subdirectory(ini) 3 | -------------------------------------------------------------------------------- /examples/ini/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(example-ini ini.cpp) 2 | target_link_libraries(example-ini pog) 3 | -------------------------------------------------------------------------------- /examples/calculator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(example-calculator calculator.cpp) 2 | target_link_libraries(example-calculator pog) 3 | -------------------------------------------------------------------------------- /include/pog/pog.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define POG_VERSION "0.5.3" 4 | 5 | #include 6 | #include 7 | -------------------------------------------------------------------------------- /deps/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(POG_BUNDLED_RE2) 2 | add_subdirectory(re2) 3 | endif() 4 | if(POG_BUNDLED_FMT) 5 | add_subdirectory(fmt) 6 | endif() 7 | -------------------------------------------------------------------------------- /deps/fmt/fmt/support/cmake/fmt-config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake) 4 | check_required_components(fmt) 5 | -------------------------------------------------------------------------------- /tests/pog_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) 4 | { 5 | ::testing::InitGoogleTest(&argc, argv); 6 | return RUN_ALL_TESTS(); 7 | } 8 | -------------------------------------------------------------------------------- /tests/googletest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(googletest STATIC gmock-gtest-all.cc) 2 | target_include_directories(googletest PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 3 | target_link_libraries(googletest Threads::Threads) 4 | -------------------------------------------------------------------------------- /coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./build/tests/pog_tests && \ 4 | lcov --directory . --capture --output-file coverage.info && \ 5 | lcov --remove coverage.info '/usr/*' '*/tests/*' --output-file coverage.info && \ 6 | lcov --list coverage.info 7 | -------------------------------------------------------------------------------- /examples/ini/input.ini: -------------------------------------------------------------------------------- 1 | abc=42 2 | def=3.14 3 | ghi=hello 4 | boolt=true 5 | boolf=false 6 | 7 | [sect1] 8 | abc=42 9 | def=3.14 10 | ghi=hello 11 | boolt=true 12 | boolf=false 13 | 14 | [sect2] 15 | abc=42 16 | def=3.14 17 | ghi=hello 18 | boolt=true 19 | boolf=false 20 | -------------------------------------------------------------------------------- /share/pog.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | libdir=@POG_INSTALL_LIB_DIR@ 3 | includedir=@POG_INSTALL_INCLUDE_DIR@ 4 | 5 | Name: pog 6 | Description: Parser generator library. 7 | Version: @PROJECT_VERSION@ 8 | @POG_PC_REQUIREMENT@ 9 | Cflags: -I${includedir} 10 | -------------------------------------------------------------------------------- /tests/test_parsing_table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | using namespace pog; 6 | 7 | class TestParsingTable : public ::testing::Test {}; 8 | 9 | TEST_F(TestParsingTable, 10 | AddAccept) { 11 | //ParsingTable pt; 12 | } 13 | -------------------------------------------------------------------------------- /deps/fmt/fmt/support/cmake/fmt.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=@CMAKE_INSTALL_PREFIX@ 3 | libdir=@CMAKE_INSTALL_FULL_LIBDIR@ 4 | includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ 5 | 6 | Name: fmt 7 | Description: A modern formatting library 8 | Version: @FMT_VERSION@ 9 | Libs: -L${libdir} -lfmt 10 | Cflags: -I${includedir} 11 | 12 | -------------------------------------------------------------------------------- /deps/fmt/fmt/support/cmake/FindSetEnv.cmake: -------------------------------------------------------------------------------- 1 | # A CMake script to find SetEnv.cmd. 2 | 3 | find_program(WINSDK_SETENV NAMES SetEnv.cmd 4 | PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows;CurrentInstallFolder]/bin") 5 | if (WINSDK_SETENV AND PRINT_PATH) 6 | execute_process(COMMAND ${CMAKE_COMMAND} -E echo "${WINSDK_SETENV}") 7 | endif () 8 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. pog documentation master file, created by 2 | sphinx-quickstart on Sun Aug 11 17:50:11 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to pog's documentation! 7 | =============================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | installation 13 | writing_parser 14 | advanced 15 | debugging 16 | -------------------------------------------------------------------------------- /include/pog/action.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace pog { 10 | 11 | template 12 | struct Shift 13 | { 14 | const State* state; 15 | }; 16 | 17 | template 18 | struct Reduce 19 | { 20 | const Rule* rule; 21 | }; 22 | 23 | struct Accept {}; 24 | 25 | template 26 | using Action = std::variant, Reduce, Accept>; 27 | 28 | } // namespace pog 29 | -------------------------------------------------------------------------------- /deps/re2/re2/util/strutil.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_STRUTIL_H_ 6 | #define UTIL_STRUTIL_H_ 7 | 8 | #include 9 | 10 | #include "re2/stringpiece.h" 11 | #include "util/util.h" 12 | 13 | namespace re2 { 14 | 15 | std::string CEscape(const StringPiece& src); 16 | void PrefixSuccessor(std::string* prefix); 17 | std::string StringPrintf(const char* format, ...); 18 | 19 | } // namespace re2 20 | 21 | #endif // UTIL_STRUTIL_H_ 22 | -------------------------------------------------------------------------------- /deps/re2/re2/util/malloc_counter.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_MALLOC_COUNTER_H_ 6 | #define UTIL_MALLOC_COUNTER_H_ 7 | 8 | namespace testing { 9 | class MallocCounter { 10 | public: 11 | MallocCounter(int x) {} 12 | static const int THIS_THREAD_ONLY = 0; 13 | long long HeapGrowth() { return 0; } 14 | long long PeakHeapGrowth() { return 0; } 15 | void Reset() {} 16 | }; 17 | } // namespace testing 18 | 19 | #endif // UTIL_MALLOC_COUNTER_H_ 20 | -------------------------------------------------------------------------------- /deps/re2/re2/util/fuzz.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | // Entry point for libFuzzer. 10 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); 11 | 12 | int main(int argc, char** argv) { 13 | uint8_t data[32]; 14 | for (int i = 0; i < 32; i++) { 15 | for (int j = 0; j < 32; j++) { 16 | data[j] = random() & 0xFF; 17 | } 18 | LLVMFuzzerTestOneInput(data, 32); 19 | } 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(googletest) 2 | 3 | set(TEST_FILES 4 | pog_tests.cpp 5 | test_automaton.cpp 6 | test_filter_view.cpp 7 | test_grammar.cpp 8 | test_item.cpp 9 | test_parser.cpp 10 | test_parsing_table.cpp 11 | test_precedence.cpp 12 | test_rule.cpp 13 | test_rule_builder.cpp 14 | test_state.cpp 15 | test_symbol.cpp 16 | test_token.cpp 17 | test_tokenizer.cpp 18 | test_token_builder.cpp 19 | test_utils.cpp 20 | ) 21 | 22 | add_executable(pog_tests ${TEST_FILES}) 23 | if (MSVC) 24 | # This flag is needed for MSVC because test_parser.cpp exceeds limit of number of section in Debug x64 build 25 | set_source_files_properties(test_parser.cpp PROPERTIES COMPILE_FLAGS /bigobj) 26 | endif() 27 | target_link_libraries(pog_tests pog googletest) 28 | -------------------------------------------------------------------------------- /deps/re2/re2/util/flags.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_FLAGS_H_ 6 | #define UTIL_FLAGS_H_ 7 | 8 | // Simplified version of Google's command line flags. 9 | // Does not support parsing the command line. 10 | // If you want to do that, see 11 | // https://gflags.github.io/gflags/ 12 | 13 | #define DEFINE_FLAG(type, name, deflt, desc) \ 14 | namespace re2 { type FLAGS_##name = deflt; } 15 | 16 | #define DECLARE_FLAG(type, name) \ 17 | namespace re2 { extern type FLAGS_##name; } 18 | 19 | namespace re2 { 20 | template 21 | T GetFlag(const T& flag) { 22 | return flag; 23 | } 24 | } // namespace re2 25 | 26 | #endif // UTIL_FLAGS_H_ 27 | -------------------------------------------------------------------------------- /deps/re2/re2/util/test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include 6 | #include 7 | 8 | #include "util/test.h" 9 | 10 | namespace testing { 11 | std::string TempDir() { return "/tmp/"; } 12 | } // namespace testing 13 | 14 | struct Test { 15 | void (*fn)(void); 16 | const char *name; 17 | }; 18 | 19 | static Test tests[10000]; 20 | static int ntests; 21 | 22 | void RegisterTest(void (*fn)(void), const char *name) { 23 | tests[ntests].fn = fn; 24 | tests[ntests++].name = name; 25 | } 26 | 27 | int main(int argc, char** argv) { 28 | for (int i = 0; i < ntests; i++) { 29 | printf("%s\n", tests[i].name); 30 | tests[i].fn(); 31 | } 32 | printf("PASS\n"); 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /include/pog/types/state_and_rule.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace pog { 7 | 8 | template 9 | struct StateAndRule 10 | { 11 | const State* state; 12 | const Rule* rule; 13 | 14 | bool operator==(const StateAndRule& rhs) const 15 | { 16 | return state->get_index() == rhs.state->get_index() && rule->get_index() == rhs.rule->get_index(); 17 | } 18 | 19 | bool operator!=(const StateAndRule& rhs) const 20 | { 21 | return !(*this == rhs); 22 | } 23 | }; 24 | 25 | } // namespace pog 26 | 27 | namespace std { 28 | 29 | template 30 | struct hash> 31 | { 32 | std::size_t operator()(const pog::StateAndRule& sr) const 33 | { 34 | return pog::hash_combine(sr.state->get_index(), sr.rule->get_index()); 35 | } 36 | }; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /include/pog/types/state_and_symbol.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace pog { 7 | 8 | template 9 | struct StateAndSymbol 10 | { 11 | const State* state; 12 | const Symbol* symbol; 13 | 14 | bool operator==(const StateAndSymbol& rhs) const 15 | { 16 | return state->get_index() == rhs.state->get_index() && symbol->get_index() == rhs.symbol->get_index(); 17 | } 18 | 19 | bool operator!=(const StateAndSymbol& rhs) const 20 | { 21 | return !(*this == rhs); 22 | } 23 | }; 24 | 25 | } // namespace pog 26 | 27 | namespace std { 28 | 29 | template 30 | struct hash> 31 | { 32 | std::size_t operator()(const pog::StateAndSymbol& ss) const 33 | { 34 | return pog::hash_combine(ss.state->get_index(), ss.symbol->get_index()); 35 | } 36 | }; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cmake/Findre2.cmake: -------------------------------------------------------------------------------- 1 | if(UNIX) 2 | find_package(PkgConfig QUIET) 3 | pkg_check_modules(PKG_RE2 QUIET re2) 4 | endif() 5 | 6 | find_path( 7 | RE2_INCLUDE_DIR 8 | NAMES 9 | re2.h 10 | HINTS 11 | ${PKG_RE2_INCLUDEDIR} 12 | PATH_SUFFIXES 13 | re2 14 | ) 15 | 16 | find_library( 17 | RE2_LIBRARY 18 | NAMES 19 | re2 20 | HINTS 21 | ${PKG_RE2_LIBDIR} 22 | PATH_SUFFIXES 23 | lib 24 | ) 25 | 26 | mark_as_advanced(RE2_INCLUDE_DIR RE2_LIBRARY) 27 | 28 | if(RE2_INCLUDE_DIR AND RE2_LIBRARY) 29 | set(RE2_FOUND 1) 30 | 31 | if(NOT TARGET re2::re2) 32 | add_library(re2::re2 UNKNOWN IMPORTED) 33 | set_target_properties(re2::re2 PROPERTIES 34 | INTERFACE_INCLUDE_DIRECTORIES "${RE2_INCLUDE_DIR}" 35 | IMPORTED_LOCATION "${RE2_LIBRARY}" 36 | ) 37 | endif() 38 | endif() 39 | 40 | find_package_handle_standard_args( 41 | RE2 42 | REQUIRED_VARS 43 | RE2_INCLUDE_DIR 44 | RE2_LIBRARY 45 | ) 46 | -------------------------------------------------------------------------------- /cmake/fmt/Findfmt.cmake: -------------------------------------------------------------------------------- 1 | if(UNIX) 2 | find_package(PkgConfig QUIET) 3 | pkg_check_modules(PKG_FMT QUIET fmt) 4 | endif() 5 | 6 | find_path( 7 | FMT_INCLUDE_DIR 8 | NAMES 9 | format.h 10 | HINTS 11 | ${PKG_FMT_INCLUDEDIR} 12 | PATH_SUFFIXES 13 | fmt 14 | ) 15 | 16 | find_library( 17 | FMT_LIBRARY 18 | NAMES 19 | fmt 20 | HINTS 21 | ${PKG_FMT_LIBDIR} 22 | PATH_SUFFIXES 23 | lib 24 | lib64 25 | ) 26 | 27 | mark_as_advanced(FMT_INCLUDE_DIR FMT_LIBRARY) 28 | 29 | if(FMT_INCLUDE_DIR AND FMT_LIBRARY) 30 | set(FMT_FOUND 1) 31 | 32 | if(NOT TARGET fmt::fmt) 33 | add_library(fmt::fmt UNKNOWN IMPORTED) 34 | set_target_properties(fmt::fmt PROPERTIES 35 | INTERFACE_INCLUDE_DIRECTORIES "${FMT_INCLUDE_DIR}" 36 | IMPORTED_LOCATION "${FMT_LIBRARY}" 37 | ) 38 | endif() 39 | endif() 40 | 41 | find_package_handle_standard_args( 42 | FMT 43 | REQUIRED_VARS 44 | FMT_INCLUDE_DIR 45 | FMT_LIBRARY 46 | ) 47 | 48 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /include/pog/precedence.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace pog { 6 | 7 | enum class Associativity 8 | { 9 | Left, 10 | Right 11 | }; 12 | 13 | struct Precedence 14 | { 15 | std::uint32_t level; 16 | Associativity assoc; 17 | 18 | bool operator==(const Precedence& rhs) const { return level == rhs.level && assoc == rhs.assoc; } 19 | bool operator!=(const Precedence& rhs) const { return !(*this == rhs); } 20 | 21 | bool operator<(const Precedence& rhs) const 22 | { 23 | if (level < rhs.level) 24 | return true; 25 | else if (level == rhs.level) 26 | { 27 | if (assoc == Associativity::Right) 28 | return true; 29 | } 30 | 31 | return false; 32 | } 33 | 34 | bool operator>(const Precedence& rhs) const 35 | { 36 | if (level > rhs.level) 37 | return true; 38 | else if (level == rhs.level) 39 | { 40 | if (assoc == Associativity::Left) 41 | return true; 42 | } 43 | 44 | return false; 45 | } 46 | }; 47 | 48 | } // namespace pog 49 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | matrix: 4 | include: 5 | - os: linux 6 | dist: bionic 7 | install: sudo apt-get install -y lcov libfmt-dev libre2-dev tree 8 | after_success: 9 | - cd build && rm -rf * 10 | - cmake -DCMAKE_BUILD_TYPE=Debug -DPOG_TESTS=ON -DPOG_COVERAGE=ON .. 11 | - cmake --build . -- -j 12 | - cd .. && ./coverage.sh 13 | - bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports" 14 | - os: osx 15 | osx_image: xcode11 16 | install: brew install fmt re2 tree 17 | 18 | script: 19 | - mkdir build && cd build 20 | - cmake -DCMAKE_BUILD_TYPE=Release -DPOG_TESTS=ON -DPOG_EXAMPLES=ON -DCMAKE_INSTALL_PREFIX=../install .. 21 | - cmake --build . --target install -- -j 22 | - ./tests/pog_tests 23 | - tree ../install 24 | 25 | branches: 26 | only: 27 | - master 28 | - /^v?\d+\.\d+.*$/ 29 | 30 | notifications: 31 | email: 32 | on_success: never 33 | -------------------------------------------------------------------------------- /share/pog-config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | set(POG_BUNDLED_RE2 @POG_BUNDLED_RE2@) 4 | set(POG_BUNDLED_FMT @POG_BUNDLED_FMT@) 5 | 6 | if(POG_BUNDLED_RE2) 7 | find_package(Threads REQUIRED) 8 | add_library(re2::re2 STATIC IMPORTED) 9 | set_target_properties(re2::re2 PROPERTIES 10 | INTERFACE_INCLUDE_DIRECTORIES @PACKAGE_RE2_INCLUDE_DIR@ 11 | IMPORTED_LOCATION @PACKAGE_RE2_LIBRARY@ 12 | ) 13 | target_link_libraries(re2::re2 INTERFACE Threads::Threads) 14 | else() 15 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") 16 | find_package(re2 REQUIRED) 17 | endif() 18 | 19 | if(POG_BUNDLED_FMT) 20 | add_library(fmt::fmt STATIC IMPORTED) 21 | set_target_properties(fmt::fmt PROPERTIES 22 | INTERFACE_INCLUDE_DIRECTORIES @PACKAGE_FMT_INCLUDE_DIR@ 23 | IMPORTED_LOCATION @PACKAGE_FMT_LIBRARY@ 24 | ) 25 | else() 26 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") 27 | find_package(fmt REQUIRED) 28 | endif() 29 | 30 | include(${CMAKE_CURRENT_LIST_DIR}/pog-targets.cmake) 31 | check_required_components(pog) 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Milkovic Marek 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /.appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{build}' 2 | 3 | configuration: 4 | - Debug 5 | - Release 6 | 7 | environment: 8 | matrix: 9 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 10 | CMAKE_GENERATOR: Visual Studio 16 2019 11 | CMAKE_ARGS: -A x64 12 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 13 | CMAKE_GENERATOR: Visual Studio 15 2017 Win64 14 | 15 | before_build: 16 | - cmd: mkdir build 17 | - cmd: cd build 18 | - cmd: cmake -G "%CMAKE_GENERATOR%" -DCMAKE_INSTALL_PREFIX=..\install -DPOG_TESTS=ON -DPOG_EXAMPLES=ON -DPOG_BUNDLED_RE2=ON -DPOG_BUNDLED_FMT=ON %CMAKE_ARGS% .. 19 | 20 | build_script: 21 | - cmd: cmake --build . --config "%CONFIGURATION%" --target install -- -m 22 | 23 | after_build: 24 | - cmd: tree ..\install /F 25 | 26 | test_script: 27 | - cmd: tests\%CONFIGURATION%\pog_tests.exe 28 | 29 | branches: 30 | only: 31 | - master 32 | - /^v?\d+\.\d+.*$/ 33 | 34 | notifications: 35 | - provider: Email 36 | to: 37 | - '{{commitAuthorEmail}}' 38 | on_build_success: false 39 | on_build_failure: true 40 | on_build_status_changed: true 41 | -------------------------------------------------------------------------------- /deps/re2/re2/util/mix.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_MIX_H_ 6 | #define UTIL_MIX_H_ 7 | 8 | #include 9 | #include 10 | 11 | namespace re2 { 12 | 13 | // Silence "truncation of constant value" warning for kMul in 32-bit mode. 14 | // Since this is a header file, push and then pop to limit the scope. 15 | #ifdef _MSC_VER 16 | #pragma warning(push) 17 | #pragma warning(disable: 4309) 18 | #endif 19 | 20 | class HashMix { 21 | public: 22 | HashMix() : hash_(1) {} 23 | explicit HashMix(size_t val) : hash_(val + 83) {} 24 | void Mix(size_t val) { 25 | static const size_t kMul = static_cast(0xdc3eb94af8ab4c93ULL); 26 | hash_ *= kMul; 27 | hash_ = ((hash_ << 19) | 28 | (hash_ >> (std::numeric_limits::digits - 19))) + val; 29 | } 30 | size_t get() const { return hash_; } 31 | private: 32 | size_t hash_; 33 | }; 34 | 35 | #ifdef _MSC_VER 36 | #pragma warning(pop) 37 | #endif 38 | 39 | } // namespace re2 40 | 41 | #endif // UTIL_MIX_H_ 42 | -------------------------------------------------------------------------------- /deps/re2/re2/util/util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_UTIL_H_ 6 | #define UTIL_UTIL_H_ 7 | 8 | #define arraysize(array) (sizeof(array)/sizeof((array)[0])) 9 | 10 | #ifndef ATTRIBUTE_NORETURN 11 | #if defined(__GNUC__) 12 | #define ATTRIBUTE_NORETURN __attribute__((noreturn)) 13 | #elif defined(_MSC_VER) 14 | #define ATTRIBUTE_NORETURN __declspec(noreturn) 15 | #else 16 | #define ATTRIBUTE_NORETURN 17 | #endif 18 | #endif 19 | 20 | #ifndef ATTRIBUTE_UNUSED 21 | #if defined(__GNUC__) 22 | #define ATTRIBUTE_UNUSED __attribute__((unused)) 23 | #else 24 | #define ATTRIBUTE_UNUSED 25 | #endif 26 | #endif 27 | 28 | #ifndef FALLTHROUGH_INTENDED 29 | #if defined(__clang__) 30 | #define FALLTHROUGH_INTENDED [[clang::fallthrough]] 31 | #elif defined(__GNUC__) && __GNUC__ >= 7 32 | #define FALLTHROUGH_INTENDED [[gnu::fallthrough]] 33 | #else 34 | #define FALLTHROUGH_INTENDED do {} while (0) 35 | #endif 36 | #endif 37 | 38 | #ifndef NO_THREAD_SAFETY_ANALYSIS 39 | #define NO_THREAD_SAFETY_ANALYSIS 40 | #endif 41 | 42 | #endif // UTIL_UTIL_H_ 43 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/pod_array.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_POD_ARRAY_H_ 6 | #define RE2_POD_ARRAY_H_ 7 | 8 | #include 9 | #include 10 | 11 | namespace re2 { 12 | 13 | template 14 | class PODArray { 15 | public: 16 | static_assert(std::is_pod::value, 17 | "T must be POD"); 18 | 19 | PODArray() 20 | : ptr_() {} 21 | explicit PODArray(int len) 22 | : ptr_(std::allocator().allocate(len), Deleter(len)) {} 23 | 24 | T* data() const { 25 | return ptr_.get(); 26 | } 27 | 28 | int size() const { 29 | return ptr_.get_deleter().len_; 30 | } 31 | 32 | T& operator[](int pos) const { 33 | return ptr_[pos]; 34 | } 35 | 36 | private: 37 | struct Deleter { 38 | Deleter() 39 | : len_(0) {} 40 | explicit Deleter(int len) 41 | : len_(len) {} 42 | 43 | void operator()(T* ptr) const { 44 | std::allocator().deallocate(ptr, len_); 45 | } 46 | 47 | int len_; 48 | }; 49 | 50 | std::unique_ptr ptr_; 51 | }; 52 | 53 | } // namespace re2 54 | 55 | #endif // RE2_POD_ARRAY_H_ 56 | -------------------------------------------------------------------------------- /deps/re2/re2/util/test.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_TEST_H_ 6 | #define UTIL_TEST_H_ 7 | 8 | #include "util/util.h" 9 | #include "util/logging.h" 10 | 11 | namespace testing { 12 | std::string TempDir(); 13 | } // namespace testing 14 | 15 | #define TEST(x, y) \ 16 | void x##y(void); \ 17 | TestRegisterer r##x##y(x##y, # x "." # y); \ 18 | void x##y(void) 19 | 20 | void RegisterTest(void (*)(void), const char*); 21 | 22 | class TestRegisterer { 23 | public: 24 | TestRegisterer(void (*fn)(void), const char *s) { 25 | RegisterTest(fn, s); 26 | } 27 | }; 28 | 29 | // fatal assertions 30 | #define ASSERT_TRUE CHECK 31 | #define ASSERT_FALSE(x) CHECK(!(x)) 32 | #define ASSERT_EQ CHECK_EQ 33 | #define ASSERT_NE CHECK_NE 34 | #define ASSERT_LT CHECK_LT 35 | #define ASSERT_LE CHECK_LE 36 | #define ASSERT_GT CHECK_GT 37 | #define ASSERT_GE CHECK_GE 38 | 39 | // nonfatal assertions 40 | // TODO(rsc): Do a better job? 41 | #define EXPECT_TRUE CHECK 42 | #define EXPECT_FALSE(x) CHECK(!(x)) 43 | #define EXPECT_EQ CHECK_EQ 44 | #define EXPECT_NE CHECK_NE 45 | #define EXPECT_LT CHECK_LT 46 | #define EXPECT_LE CHECK_LE 47 | #define EXPECT_GT CHECK_GT 48 | #define EXPECT_GE CHECK_GE 49 | 50 | #endif // UTIL_TEST_H_ 51 | -------------------------------------------------------------------------------- /include/pog/operations/operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace pog { 10 | 11 | template 12 | class Operation 13 | { 14 | public: 15 | using AutomatonType = Automaton; 16 | using GrammarType = Grammar; 17 | 18 | Operation(const AutomatonType* automaton, const GrammarType* grammar) : _automaton(automaton), _grammar(grammar) {} 19 | Operation(const Operation&) = delete; 20 | Operation(Operation&&) noexcept = default; 21 | virtual ~Operation() = default; 22 | 23 | virtual void calculate() = 0; 24 | 25 | auto& operator[](const ArgT& key) { return _operation[key]; } 26 | auto& operator[](ArgT& key) { return _operation[key]; } 27 | 28 | template 29 | std::unordered_set* find(const T& key) 30 | { 31 | auto itr = _operation.find(key); 32 | if (itr == _operation.end()) 33 | return nullptr; 34 | 35 | return &itr->second; 36 | } 37 | 38 | template 39 | const std::unordered_set* find(const T& key) const 40 | { 41 | auto itr = _operation.find(key); 42 | if (itr == _operation.end()) 43 | return nullptr; 44 | 45 | return &itr->second; 46 | } 47 | 48 | protected: 49 | const AutomatonType* _automaton; 50 | const GrammarType* _grammar; 51 | std::unordered_map> _operation; 52 | }; 53 | 54 | } // namespace pog 55 | -------------------------------------------------------------------------------- /deps/fmt/fmt/LICENSE.rst: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 - present, Victor Zverovich 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | --- Optional exception to the license --- 23 | 24 | As an exception, if, as a result of your compiling your source code, portions 25 | of this Software are embedded into a machine-executable object form of such 26 | source code, you may redistribute such embedded portions in such object form 27 | without including the above copyright and permission notices. 28 | -------------------------------------------------------------------------------- /examples/calculator/calculator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | using namespace pog; 8 | 9 | int main() 10 | { 11 | Parser p; 12 | 13 | p.token(R"(\s+)"); 14 | p.token(R"(\+)").symbol("+").precedence(1, Associativity::Left); 15 | p.token(R"(\*)").symbol("*").precedence(2, Associativity::Left); 16 | p.token(R"(-)").symbol("-").precedence(1, Associativity::Left); 17 | p.token("\\(").symbol("("); 18 | p.token("\\)").symbol(")"); 19 | p.token("[0-9]+").symbol("num").action([](std::string_view str) { 20 | return std::stoi(std::string{str}); 21 | }); 22 | 23 | p.set_start_symbol("E"); 24 | p.rule("E") // E -> 25 | .production("E", "+", "E", [](auto&& args) { // E + E 26 | return args[0] + args[2]; 27 | }) 28 | .production("E", "-", "E", [](auto&& args) { // E - E 29 | return args[0] - args[2]; 30 | }) 31 | .production("E", "*", "E", [](auto&& args) { // E * E 32 | return args[0] * args[2]; 33 | }) 34 | .production("(", "E", ")", [](auto&& args) { // ( E ) 35 | return args[1]; 36 | }) 37 | .production("num", [](auto&& args) { // num 38 | return args[0]; 39 | }) 40 | .production("-", "E", [](auto&& args) { // - E 41 | return -args[1]; 42 | }).precedence(3, Associativity::Right); 43 | 44 | auto report = p.prepare(); 45 | if (!report) 46 | { 47 | fmt::print("{}\n", report.to_string()); 48 | return 1; 49 | } 50 | 51 | std::stringstream input("11 + 4 * 3 + 2"); 52 | auto result = p.parse(input); 53 | fmt::print("Result: {}\n", result.value()); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /deps/re2/re2/LICENSE: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 The RE2 Authors. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /include/pog/relations/relation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace pog { 10 | 11 | template 12 | struct BacktrackingInfo 13 | { 14 | const State* state; 15 | Item item; 16 | }; 17 | 18 | template 19 | class Relation 20 | { 21 | public: 22 | using AutomatonType = Automaton; 23 | using GrammarType = Grammar; 24 | 25 | Relation(const AutomatonType* automaton, const GrammarType* grammar) : _automaton(automaton), _grammar(grammar) {} 26 | Relation(const Relation&) = delete; 27 | Relation(Relation&&) noexcept = default; 28 | virtual ~Relation() = default; 29 | 30 | virtual void calculate() = 0; 31 | 32 | auto begin() { return _relation.begin(); } 33 | auto end() { return _relation.end(); } 34 | 35 | auto begin() const { return _relation.begin(); } 36 | auto end() const { return _relation.end(); } 37 | 38 | template 39 | std::unordered_set* find(const T& key) 40 | { 41 | auto itr = _relation.find(key); 42 | if (itr == _relation.end()) 43 | return nullptr; 44 | 45 | return &itr->second; 46 | } 47 | 48 | template 49 | const std::unordered_set* find(const T& key) const 50 | { 51 | auto itr = _relation.find(key); 52 | if (itr == _relation.end()) 53 | return nullptr; 54 | 55 | return &itr->second; 56 | } 57 | 58 | protected: 59 | const AutomatonType* _automaton; 60 | const GrammarType* _grammar; 61 | std::unordered_map> _relation; 62 | }; 63 | 64 | } // namespace pog 65 | -------------------------------------------------------------------------------- /deps/re2/re2/util/utf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * The authors of this software are Rob Pike and Ken Thompson. 3 | * Copyright (c) 2002 by Lucent Technologies. 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose without fee is hereby granted, provided that this entire notice 6 | * is included in all copies of any software which is or includes a copy 7 | * or modification of this software and in all copies of the supporting 8 | * documentation for such software. 9 | * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED 10 | * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY 11 | * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY 12 | * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. 13 | * 14 | * This file and rune.cc have been converted to compile as C++ code 15 | * in name space re2. 16 | */ 17 | 18 | #ifndef UTIL_UTF_H_ 19 | #define UTIL_UTF_H_ 20 | 21 | #include 22 | 23 | namespace re2 { 24 | 25 | typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ 26 | 27 | enum 28 | { 29 | UTFmax = 4, /* maximum bytes per rune */ 30 | Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ 31 | Runeself = 0x80, /* rune and UTF sequences are the same (<) */ 32 | Runeerror = 0xFFFD, /* decoding error in UTF */ 33 | Runemax = 0x10FFFF, /* maximum rune value */ 34 | }; 35 | 36 | int runetochar(char* s, const Rune* r); 37 | int chartorune(Rune* r, const char* s); 38 | int fullrune(const char* s, int n); 39 | int utflen(const char* s); 40 | char* utfrune(const char*, Rune); 41 | 42 | } // namespace re2 43 | 44 | #endif // UTIL_UTF_H_ 45 | -------------------------------------------------------------------------------- /include/pog/symbol.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace pog { 11 | 12 | enum class SymbolKind 13 | { 14 | End, 15 | Nonterminal, 16 | Terminal 17 | }; 18 | 19 | template 20 | class Symbol 21 | { 22 | public: 23 | Symbol(std::uint32_t index, SymbolKind kind, const std::string& name) : _index(index), _kind(kind), _name(name) {} 24 | 25 | std::uint32_t get_index() const { return _index; } 26 | const Precedence& get_precedence() const { return _precedence.value(); } 27 | const std::string& get_name() const { return _name; } 28 | const std::string& get_description() const { return _description.has_value() ? *_description : _name; } 29 | 30 | bool has_precedence() const { return static_cast(_precedence); } 31 | bool is_end() const { return _kind == SymbolKind::End; } 32 | bool is_nonterminal() const { return _kind == SymbolKind::Nonterminal; } 33 | bool is_terminal() const { return _kind == SymbolKind::Terminal; } 34 | 35 | void set_precedence(std::uint32_t level, Associativity assoc) { _precedence = Precedence{level, assoc}; } 36 | void set_description(const std::string& description) { _description = description; } 37 | 38 | private: 39 | std::uint32_t _index; 40 | SymbolKind _kind; 41 | std::string _name; 42 | std::optional _description; 43 | std::optional _precedence; 44 | }; 45 | 46 | 47 | template 48 | struct SymbolLess 49 | { 50 | bool operator()(const Symbol* lhs, const Symbol* rhs) const 51 | { 52 | return lhs->get_index() < rhs->get_index(); 53 | } 54 | }; 55 | 56 | } // namespace pog 57 | -------------------------------------------------------------------------------- /tests/test_symbol.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | class TestSymbol : public ::testing::Test {}; 6 | 7 | using namespace pog; 8 | 9 | TEST_F(TestSymbol, 10 | Nonterminal) { 11 | Symbol symbol(42, SymbolKind::Nonterminal, "testing_nonterminal"); 12 | 13 | EXPECT_EQ(symbol.get_index(), 42u); 14 | EXPECT_EQ(symbol.get_name(), "testing_nonterminal"); 15 | EXPECT_FALSE(symbol.is_end()); 16 | EXPECT_TRUE(symbol.is_nonterminal()); 17 | EXPECT_FALSE(symbol.is_terminal()); 18 | EXPECT_FALSE(symbol.has_precedence()); 19 | } 20 | 21 | TEST_F(TestSymbol, 22 | Terminal) { 23 | Symbol symbol(42, SymbolKind::Terminal, "testing_terminal"); 24 | 25 | EXPECT_EQ(symbol.get_index(), 42u); 26 | EXPECT_EQ(symbol.get_name(), "testing_terminal"); 27 | EXPECT_FALSE(symbol.is_end()); 28 | EXPECT_FALSE(symbol.is_nonterminal()); 29 | EXPECT_TRUE(symbol.is_terminal()); 30 | EXPECT_FALSE(symbol.has_precedence()); 31 | } 32 | 33 | TEST_F(TestSymbol, 34 | End) { 35 | Symbol symbol(42, SymbolKind::End, "testing_end"); 36 | 37 | EXPECT_EQ(symbol.get_index(), 42u); 38 | EXPECT_EQ(symbol.get_name(), "testing_end"); 39 | EXPECT_TRUE(symbol.is_end()); 40 | EXPECT_FALSE(symbol.is_nonterminal()); 41 | EXPECT_FALSE(symbol.is_terminal()); 42 | EXPECT_FALSE(symbol.has_precedence()); 43 | } 44 | 45 | TEST_F(TestSymbol, 46 | Precedence) { 47 | Symbol symbol(42, SymbolKind::Terminal, "testing_terminal"); 48 | symbol.set_precedence(1, Associativity::Right); 49 | 50 | EXPECT_EQ(symbol.get_index(), 42u); 51 | EXPECT_EQ(symbol.get_name(), "testing_terminal"); 52 | EXPECT_FALSE(symbol.is_end()); 53 | EXPECT_FALSE(symbol.is_nonterminal()); 54 | EXPECT_TRUE(symbol.is_terminal()); 55 | EXPECT_TRUE(symbol.has_precedence()); 56 | EXPECT_EQ(symbol.get_precedence(), (Precedence{1, Associativity::Right})); 57 | } 58 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/unicode_groups.h: -------------------------------------------------------------------------------- 1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_UNICODE_GROUPS_H_ 6 | #define RE2_UNICODE_GROUPS_H_ 7 | 8 | // Unicode character groups. 9 | 10 | // The codes get split into ranges of 16-bit codes 11 | // and ranges of 32-bit codes. It would be simpler 12 | // to use only 32-bit ranges, but these tables are large 13 | // enough to warrant extra care. 14 | // 15 | // Using just 32-bit ranges gives 27 kB of data. 16 | // Adding 16-bit ranges gives 18 kB of data. 17 | // Adding an extra table of 16-bit singletons would reduce 18 | // to 16.5 kB of data but make the data harder to use; 19 | // we don't bother. 20 | 21 | #include 22 | 23 | #include "util/util.h" 24 | #include "util/utf.h" 25 | 26 | namespace re2 { 27 | 28 | struct URange16 29 | { 30 | uint16_t lo; 31 | uint16_t hi; 32 | }; 33 | 34 | struct URange32 35 | { 36 | Rune lo; 37 | Rune hi; 38 | }; 39 | 40 | struct UGroup 41 | { 42 | const char *name; 43 | int sign; // +1 for [abc], -1 for [^abc] 44 | const URange16 *r16; 45 | int nr16; 46 | const URange32 *r32; 47 | int nr32; 48 | }; 49 | 50 | // Named by property or script name (e.g., "Nd", "N", "Han"). 51 | // Negated groups are not included. 52 | extern const UGroup unicode_groups[]; 53 | extern const int num_unicode_groups; 54 | 55 | // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). 56 | // Negated groups are included. 57 | extern const UGroup posix_groups[]; 58 | extern const int num_posix_groups; 59 | 60 | // Named by Perl name (e.g., "\\d", "\\D"). 61 | // Negated groups are included. 62 | extern const UGroup perl_groups[]; 63 | extern const int num_perl_groups; 64 | 65 | } // namespace re2 66 | 67 | #endif // RE2_UNICODE_GROUPS_H_ 68 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v0.5.3 (2020-02-06) 2 | 3 | * Reusing parser after it has ended unsuccessfully no longer causes crash 4 | 5 | # v0.5.2 (2019-11-29) 6 | 7 | * Option to download RE2 and fmt are now replaced with options to use bundled RE2 and fmt 8 | 9 | # v0.5.1 (2019-11-29) 10 | 11 | * Added option to specify where to download fmt and re2 from using environment variables `(FMT|RE2)_DOWNLOAD_URL` and `(FMT|RE2)_DOWNLOAD_URL_SHA256` 12 | 13 | # v0.5.0 (2019-11-10) 14 | 15 | * Added support for global tokenizer actions 16 | * Added option to specify symbol description which can provide more human friendly output for the symbol in case of a syntax error 17 | * Improved performance of constructing parser (construction of LR automaton to be precise) 18 | * Debugging traces of parser and tokenizer now have no effect when debugging is not turned on 19 | 20 | # v0.4.0 (2019-09-28) 21 | 22 | * Fixed calulcation of includes and lookback relations when there are more instances of the same symbol inspected 23 | * Added option to define `POG_DEBUG` to print debugging messages from parser adn tokenizer 24 | 25 | # v0.3.0 (2019-09-22) 26 | 27 | * Midrule actions and all preceding symbols are now accessible from later actions in that rule 28 | * Explicit switching of tokenizer state with `enter_tokenizer_state` method 29 | * Implicit end of input token now has modifiable states in which it is active 30 | 31 | # v0.2.1 (2019-09-13) 32 | 33 | * Added option `POG_PIC` to build position-independent code 34 | 35 | # v0.2.0 (2019-08-12) 36 | 37 | * Added support for tokenizer action when end of input stream is reached 38 | * Token actions are now always called exactly once for each individual token 39 | * Tokenizer now supports states 40 | * Generation of HTML report for parsers 41 | * Added support for build on Windows (using MSVC) and macOS 42 | * Added support for mid-rule actions 43 | 44 | # v0.1.0 (2019-07-27) 45 | 46 | * Initial release 47 | -------------------------------------------------------------------------------- /include/pog/errors.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace pog { 13 | 14 | class Error : public std::exception 15 | { 16 | public: 17 | Error() : _msg() {} 18 | template 19 | Error(T&& msg) noexcept : _msg(std::forward(msg)) {} 20 | Error(const Error& o) noexcept : _msg(o._msg) {} 21 | virtual ~Error() noexcept {} 22 | 23 | virtual const char* what() const noexcept override { return _msg.c_str(); } 24 | 25 | protected: 26 | std::string _msg; 27 | }; 28 | 29 | class SyntaxError : public Error 30 | { 31 | public: 32 | template 33 | SyntaxError(const Symbol* unexpected_symbol, const std::vector*>& expected_symbols) : Error() 34 | { 35 | std::vector expected_symbols_str(expected_symbols.size()); 36 | std::transform(expected_symbols.begin(), expected_symbols.end(), expected_symbols_str.begin(), [](const auto& sym) { 37 | return sym->get_description(); 38 | }); 39 | 40 | _msg = fmt::format( 41 | "Syntax error: Unexpected {}, expected one of {}", 42 | unexpected_symbol->get_description(), 43 | fmt::join(expected_symbols_str.begin(), expected_symbols_str.end(), ", ") 44 | ); 45 | } 46 | 47 | template 48 | SyntaxError(const std::vector*>& expected_symbols) : Error() 49 | { 50 | std::vector expected_symbols_str(expected_symbols.size()); 51 | std::transform(expected_symbols.begin(), expected_symbols.end(), expected_symbols_str.begin(), [](const auto& sym) { 52 | return sym->get_description(); 53 | }); 54 | 55 | _msg = fmt::format( 56 | "Syntax error: Unknown symbol on input, expected one of {}", 57 | fmt::join(expected_symbols_str.begin(), expected_symbols_str.end(), ", ") 58 | ); 59 | } 60 | }; 61 | 62 | } // namespace pog 63 | -------------------------------------------------------------------------------- /tests/test_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | class TestUtils : public ::testing::Test {}; 6 | 7 | TEST_F(TestUtils, 8 | transform_if) { 9 | std::vector v{0, 1, 2, 3, 4, 5, 6}; 10 | 11 | std::vector result; 12 | pog::transform_if(v.begin(), v.end(), std::back_inserter(result), 13 | [](auto i) { return i % 2 == 0; }, 14 | [](auto i) { return i + 10; } 15 | ); 16 | EXPECT_EQ(result, (std::vector{10, 12, 14, 16})); 17 | 18 | result.clear(); 19 | pog::transform_if(v.begin(), v.end(), std::back_inserter(result), 20 | [](auto i) { return i < 100; }, 21 | [](auto i) { return i + 10; } 22 | ); 23 | EXPECT_EQ(result, (std::vector{10, 11, 12, 13, 14, 15, 16})); 24 | 25 | result.clear(); 26 | pog::transform_if(v.begin(), v.end(), std::back_inserter(result), 27 | [](auto i) { return i > 100; }, 28 | [](auto i) { return i + 10; } 29 | ); 30 | EXPECT_EQ(result, (std::vector{})); 31 | } 32 | 33 | TEST_F(TestUtils, 34 | accumulate_if) { 35 | std::vector v{1, 2, 3, 4, 5, 6}; 36 | 37 | auto result = pog::accumulate_if(v.begin(), v.end(), 0, 38 | [](auto i) { return i % 2 == 0; }, 39 | [](auto res, auto i) { return res + i; } 40 | ); 41 | EXPECT_EQ(result, 12); 42 | 43 | result = pog::accumulate_if(v.begin(), v.end(), 0, 44 | [](auto i) { return i < 100; }, 45 | [](auto res, auto i) { return res + i; } 46 | ); 47 | EXPECT_EQ(result, 21); 48 | 49 | result = pog::accumulate_if(v.begin(), v.end(), 0, 50 | [](auto i) { return i > 100; }, 51 | [](auto res, auto i) { return res + i; } 52 | ); 53 | EXPECT_EQ(result, 0); 54 | } 55 | 56 | TEST_F(TestUtils, 57 | hash_combine) { 58 | EXPECT_EQ(pog::hash_combine(1, 2), pog::hash_combine(1, 2)); 59 | EXPECT_NE(pog::hash_combine(1, 2), pog::hash_combine(1, 3)); 60 | EXPECT_NE(pog::hash_combine(1, 2), pog::hash_combine(2, 1)); 61 | EXPECT_NE(pog::hash_combine(1, 2), pog::hash_combine(1, 2, 3)); 62 | } 63 | -------------------------------------------------------------------------------- /deps/re2/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(ExternalProject) 2 | 3 | include(GNUInstallDirs) 4 | 5 | set(RE2_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/re2) 6 | set(RE2_INCLUDE_DIR ${RE2_INSTALL_DIR}/include) 7 | set(RE2_INSTALLED_LIBRARY ${RE2_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}re2${CMAKE_STATIC_LIBRARY_SUFFIX}) 8 | set(RE2_LIBRARY ${RE2_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}pog_re2${CMAKE_STATIC_LIBRARY_SUFFIX}) 9 | 10 | if(MSVC) 11 | set(RE2_BUILD_COMMAND ${CMAKE_COMMAND} --build . --config $ -- -m) 12 | set(RE2_INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config $) 13 | else() 14 | set(RE2_BUILD_COMMAND ${CMAKE_COMMAND} --build . -- -j) 15 | set(RE2_INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install) 16 | endif() 17 | 18 | ExternalProject_Add( 19 | re2-dep 20 | PREFIX "re2" 21 | SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/re2" 22 | CMAKE_ARGS 23 | -DCMAKE_INSTALL_PREFIX=${RE2_INSTALL_DIR} 24 | -DCMAKE_POSITION_INDEPENDENT_CODE=${POG_PIC} 25 | -DCMAKE_BUILD_TYPE=Release 26 | -DRE2_BUILD_TESTING=OFF 27 | BUILD_COMMAND 28 | ${RE2_BUILD_COMMAND} 29 | INSTALL_COMMAND 30 | ${RE2_INSTALL_COMMAND} 31 | ) 32 | 33 | ExternalProject_Add_Step( 34 | re2-dep rename 35 | DEPENDEES install 36 | COMMAND ${CMAKE_COMMAND} -E rename ${RE2_INSTALLED_LIBRARY} ${RE2_LIBRARY} 37 | ) 38 | 39 | add_library(re2::re2 STATIC IMPORTED GLOBAL) 40 | set_target_properties(re2::re2 PROPERTIES 41 | INTERFACE_INCLUDE_DIRECTORIES "${RE2_INCLUDE_DIR}" 42 | IMPORTED_LOCATION "${RE2_LIBRARY}" 43 | ) 44 | target_link_libraries(re2::re2 INTERFACE Threads::Threads) 45 | add_dependencies(re2::re2 re2-dep) 46 | 47 | install( 48 | FILES ${RE2_LIBRARY} 49 | DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} 50 | ) 51 | install( 52 | DIRECTORY ${RE2_INCLUDE_DIR}/re2/ 53 | DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/pog/re2 54 | ) 55 | -------------------------------------------------------------------------------- /tests/test_filter_view.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | class TestFilterView : public ::testing::Test {}; 6 | 7 | TEST_F(TestFilterView, 8 | EmptyContainer) { 9 | std::vector v; 10 | 11 | FilterView fv(v.begin(), v.end(), [](int x) { return x & 1; }); 12 | 13 | std::vector actual; 14 | for (auto x : fv) 15 | actual.push_back(x); 16 | 17 | EXPECT_EQ(actual, (std::vector{})); 18 | } 19 | 20 | TEST_F(TestFilterView, 21 | BasicFilter) { 22 | std::vector v = {0, 1, 2, 3, 4, 5, 6}; 23 | 24 | FilterView fv(v.begin(), v.end(), [](int x) { return x & 1; }); 25 | 26 | std::vector actual; 27 | for (auto x : fv) 28 | actual.push_back(x); 29 | 30 | EXPECT_EQ(actual, (std::vector{1, 3, 5})); 31 | } 32 | 33 | TEST_F(TestFilterView, 34 | SharedEnd) { 35 | std::vector v = {0, 1, 2, 3, 4, 5}; 36 | 37 | FilterView fv(v.begin(), v.end(), [](int x) { return x & 1; }); 38 | 39 | std::vector actual; 40 | for (auto x : fv) 41 | actual.push_back(x); 42 | 43 | EXPECT_EQ(actual, (std::vector{1, 3, 5})); 44 | } 45 | 46 | TEST_F(TestFilterView, 47 | SharedBegin) { 48 | std::vector v = {1, 2, 3, 4, 5, 6}; 49 | 50 | FilterView fv(v.begin(), v.end(), [](int x) { return x & 1; }); 51 | 52 | std::vector actual; 53 | for (auto x : fv) 54 | actual.push_back(x); 55 | 56 | EXPECT_EQ(actual, (std::vector{1, 3, 5})); 57 | } 58 | 59 | TEST_F(TestFilterView, 60 | SharedBeginAndEnd) { 61 | std::vector v = {1, 2, 3, 4, 5}; 62 | 63 | FilterView fv(v.begin(), v.end(), [](int x) { return x & 1; }); 64 | 65 | std::vector actual; 66 | for (auto x : fv) 67 | actual.push_back(x); 68 | 69 | EXPECT_EQ(actual, (std::vector{1, 3, 5})); 70 | } 71 | 72 | TEST_F(TestFilterView, 73 | NoElements) { 74 | std::vector v = {0, 1, 2, 3, 4, 5, 6}; 75 | 76 | FilterView fv(v.begin(), v.end(), [](int x) { return x > 10; }); 77 | 78 | std::vector actual; 79 | for (auto x : fv) 80 | actual.push_back(x); 81 | 82 | EXPECT_EQ(actual, (std::vector{})); 83 | } 84 | -------------------------------------------------------------------------------- /include/pog/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace pog { 11 | 12 | template 13 | OutputIterator transform_if(InputIterator first1, InputIterator last1, OutputIterator result, Pred pred, UnaryOperator op) 14 | { 15 | while (first1 != last1) 16 | { 17 | if (pred(*first1)) { 18 | *result = op(*first1); 19 | ++result; 20 | } 21 | ++first1; 22 | } 23 | return result; 24 | } 25 | 26 | template 27 | T accumulate_if(InputIterator first, InputIterator last, T init, Pred pred, BinaryOperation op) 28 | { 29 | for (; first != last; ++first) 30 | { 31 | if (pred(*first)) 32 | init = op(std::move(init), *first); 33 | } 34 | return init; 35 | } 36 | 37 | inline void hash_combine(std::size_t&) { } 38 | 39 | template 40 | inline void hash_combine(std::size_t& seed, const T& v, const Rest&... rest) { 41 | seed ^= std::hash{}(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); 42 | hash_combine(seed, rest...); 43 | } 44 | 45 | template 46 | inline std::size_t hash_combine(const Rest&... rest) 47 | { 48 | std::size_t seed = 0; 49 | hash_combine(seed, rest...); 50 | return seed; 51 | } 52 | 53 | template struct overloaded : Ts... { using Ts::operator()...; }; 54 | template overloaded(Ts...) -> overloaded; 55 | 56 | template 57 | auto visit_with(Variant& v, Fs&&... fs) 58 | { 59 | return std::visit(overloaded{ 60 | std::forward(fs)... 61 | }, v); 62 | } 63 | 64 | template 65 | inline std::string current_time(FormatT&& format) 66 | { 67 | auto now = std::time(nullptr); 68 | auto tm = std::localtime(&now); 69 | 70 | std::ostringstream ss; 71 | ss << std::put_time(tm, std::forward(format)); 72 | return ss.str(); 73 | } 74 | 75 | } // namespace pog 76 | -------------------------------------------------------------------------------- /deps/fmt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(GNUInstallDirs) 2 | include(ExternalProject) 3 | 4 | set(FMT_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/fmt) 5 | set(FMT_INCLUDE_DIR ${FMT_INSTALL_DIR}/include) 6 | set(FMT_INSTALLED_LIBRARY ${FMT_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}fmt${CMAKE_STATIC_LIBRARY_SUFFIX}) 7 | set(FMT_INSTALLED_LIBRARY_DEBUG ${FMT_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}fmtd${CMAKE_STATIC_LIBRARY_SUFFIX}) 8 | set(FMT_LIBRARY ${FMT_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}pog_fmt${CMAKE_STATIC_LIBRARY_SUFFIX}) 9 | 10 | if(MSVC) 11 | set(FMT_BUILD_COMMAND ${CMAKE_COMMAND} --build . --config $ -- -m) 12 | set(FMT_INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config $) 13 | else() 14 | set(FMT_BUILD_COMMAND ${CMAKE_COMMAND} --build . -- -j) 15 | set(FMT_INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install) 16 | endif() 17 | 18 | ExternalProject_Add( 19 | fmt-dep 20 | PREFIX "fmt" 21 | SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/fmt" 22 | CMAKE_ARGS 23 | -DCMAKE_INSTALL_PREFIX=${FMT_INSTALL_DIR} 24 | -DCMAKE_BUILD_TYPE=Release 25 | -DCMAKE_POSITION_INDEPENDENT_CODE=${POG_PIC} 26 | -DFMT_TEST=OFF 27 | -DFMT_DOC=OFF 28 | BUILD_COMMAND 29 | ${FMT_BUILD_COMMAND} 30 | INSTALL_COMMAND 31 | ${FMT_INSTALL_COMMAND} 32 | ) 33 | 34 | ExternalProject_Add_Step( 35 | fmt-dep rename 36 | DEPENDEES install 37 | COMMAND ${CMAKE_COMMAND} -E rename $,${FMT_INSTALLED_LIBRARY_DEBUG},${FMT_INSTALLED_LIBRARY}> ${FMT_LIBRARY} 38 | ) 39 | 40 | add_library(fmt::fmt STATIC IMPORTED GLOBAL) 41 | set_target_properties(fmt::fmt PROPERTIES 42 | INTERFACE_INCLUDE_DIRECTORIES "${FMT_INCLUDE_DIR}" 43 | IMPORTED_LOCATION "${FMT_LIBRARY}" 44 | ) 45 | add_dependencies(fmt::fmt fmt-dep) 46 | 47 | install( 48 | FILES ${FMT_LIBRARY} 49 | DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} 50 | ) 51 | install( 52 | DIRECTORY ${FMT_INCLUDE_DIR}/fmt/ 53 | DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/pog/fmt 54 | ) 55 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'pog' 21 | copyright = '2019, Marek Milkovic' 22 | author = 'Marek Milkovic' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '0.2.0' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx_rtd_theme' 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # List of patterns, relative to source directory, that match files and 41 | # directories to ignore when looking for source files. 42 | # This pattern also affects html_static_path and html_extra_path. 43 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 44 | 45 | 46 | # -- Options for HTML output ------------------------------------------------- 47 | 48 | # The theme to use for HTML and HTML Help pages. See the documentation for 49 | # a list of builtin themes. 50 | # 51 | html_theme = 'sphinx_rtd_theme' 52 | 53 | # Add any paths that contain custom static files (such as style sheets) here, 54 | # relative to this directory. They are copied after the builtin static files, 55 | # so a file named "default.css" will overwrite the builtin "default.css". 56 | html_static_path = ['_static'] 57 | -------------------------------------------------------------------------------- /docs/debugging.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Debugging 3 | ========= 4 | 5 | Debugging errors in parser can be a hard process since its whole complexity and the amount of data and code you need to go through to even reach the source of your problems. This sections describes 6 | some options you have to debug the problems in your parser. 7 | 8 | HTML report 9 | =========== 10 | 11 | You are able to generate HTML report out of your parser. The output is HTML file which contains full parsing table, contains information about LR automaton and it even includes Graphviz 12 | representation of LR automaton. In order to generate HTML report, initialize it with your ``Parser`` after you've prepared it. 13 | 14 | .. code-block:: cpp 15 | 16 | Parser parser; 17 | 18 | // tokens & rules 19 | 20 | // Don't forget to first call prepare() 21 | parser.prepare(); 22 | 23 | HtmlReport html(parser); 24 | html.save("parser.html"); 25 | 26 | You can now open ``parser.html`` in your current working directory and inspect the inner structure of your parser. 27 | 28 | LR automaton 29 | ============ 30 | 31 | Generated HTML report will contain `Graphviz `_ representation of LR automaton at the very bottom. Copy it over to some other file and run following command to turn it 32 | into PNG image. 33 | 34 | .. code-block:: bash 35 | 36 | dot -Tpng -o automaton.png 37 | 38 | Runtime debugging 39 | ================= 40 | 41 | Since parsers are rather complex, it is hard to debug them line by line because there is too much going on and we are only mostly interested in what is parser doing and not what is the whole framework doing. 42 | Therefore we provide an option to compile your project so that *pog* prints out additional debug messages on standard debug output. In order to use them, define preprocessor macro ``POG_DEBUG`` 43 | before including the very first *pog* related header file. That will cause parser and tokenizer to print out debugging messages. If you want only parser or only tokenizer messages, define 44 | ``POG_DEBUG_PARSER`` or ``POG_DEBUG_TOKENIZER`` instead of ``POG_DEBUG``. These messages have no overhead if they are not requested to be printed so you don't have to worry about that. 45 | -------------------------------------------------------------------------------- /tests/test_precedence.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | class TestPrecedence : public ::testing::Test {}; 6 | 7 | using namespace pog; 8 | 9 | TEST_F(TestPrecedence, 10 | Equality) { 11 | Precedence p1{1, Associativity::Left}; 12 | Precedence p2{1, Associativity::Left}; 13 | Precedence p3{1, Associativity::Right}; 14 | Precedence p4{0, Associativity::Left}; 15 | Precedence p5{2, Associativity::Left}; 16 | 17 | EXPECT_EQ(p1, p2); 18 | EXPECT_NE(p1, p3); 19 | EXPECT_NE(p1, p4); 20 | EXPECT_NE(p1, p5); 21 | } 22 | 23 | TEST_F(TestPrecedence, 24 | SameLevelLeftAssociative) { 25 | EXPECT_FALSE( 26 | (Precedence{1, Associativity::Left}) < (Precedence{1, Associativity::Left}) 27 | ); 28 | EXPECT_TRUE( 29 | (Precedence{1, Associativity::Left}) > (Precedence{1, Associativity::Left}) 30 | ); 31 | } 32 | 33 | TEST_F(TestPrecedence, 34 | SameLevelRightAssociative) { 35 | EXPECT_TRUE( 36 | (Precedence{1, Associativity::Right}) < (Precedence{1, Associativity::Right}) 37 | ); 38 | EXPECT_FALSE( 39 | (Precedence{1, Associativity::Right}) > (Precedence{1, Associativity::Right}) 40 | ); 41 | } 42 | 43 | TEST_F(TestPrecedence, 44 | LowerLevelLeftAssociative) { 45 | EXPECT_TRUE( 46 | (Precedence{0, Associativity::Left}) < (Precedence{1, Associativity::Left}) 47 | ); 48 | EXPECT_FALSE( 49 | (Precedence{0, Associativity::Left}) > (Precedence{1, Associativity::Left}) 50 | ); 51 | } 52 | 53 | TEST_F(TestPrecedence, 54 | LowerLevelRightAssociative) { 55 | EXPECT_TRUE( 56 | (Precedence{0, Associativity::Right}) < (Precedence{1, Associativity::Right}) 57 | ); 58 | EXPECT_FALSE( 59 | (Precedence{0, Associativity::Right}) > (Precedence{1, Associativity::Right}) 60 | ); 61 | } 62 | 63 | TEST_F(TestPrecedence, 64 | HigherLevelLeftAssociative) { 65 | EXPECT_FALSE( 66 | (Precedence{2, Associativity::Left}) < (Precedence{1, Associativity::Left}) 67 | ); 68 | EXPECT_TRUE( 69 | (Precedence{2, Associativity::Left}) > (Precedence{1, Associativity::Left}) 70 | ); 71 | } 72 | 73 | TEST_F(TestPrecedence, 74 | HigherLevelRightAssociative) { 75 | EXPECT_FALSE( 76 | (Precedence{2, Associativity::Right}) < (Precedence{1, Associativity::Right}) 77 | ); 78 | EXPECT_TRUE( 79 | (Precedence{2, Associativity::Right}) > (Precedence{1, Associativity::Right}) 80 | ); 81 | } 82 | -------------------------------------------------------------------------------- /include/pog/operations/follow.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace pog { 10 | 11 | /** 12 | * Follow operations maps (q,x) where q is state and x is symbol to set of symbols. 13 | * 14 | * Formal definition for what Follow() represents is 15 | * Follow(q, A) = Read(q, A) union (union { Follow(p, B) | (q, A) includes (p, B) }) 16 | * So Follow(q, A) represents Read(q, A) with union of all Follow sets of (p, B) such that 17 | * (q, A) is in include relation with (p, B). 18 | * 19 | * To put it simply by individual parts: 20 | * 1. Read(q, A) means what symbols can directly follow A in state q. See Read() operation for 21 | * more information. 22 | * 2. Includes relation of (q, A) with (p, B) means that when we are about to read A in state q 23 | * and everything after A can be empty string, meaning that what can follow B in state p can 24 | * also follow A in state q. 25 | * 26 | * So Follow(q, A) represents what symbols can follow A while in state q. It is constructed from 27 | * generated follow using Read(q, A) and propagated follow using include relation. 28 | */ 29 | template 30 | class Follow : public Operation, const Symbol*> 31 | { 32 | public: 33 | using Parent = Operation, const Symbol*>; 34 | 35 | using AutomatonType = Automaton; 36 | using GrammarType = Grammar; 37 | 38 | using StateAndSymbolType = StateAndSymbol; 39 | 40 | Follow(const AutomatonType* automaton, const GrammarType* grammar, const Includes& includes, Read& read_op) 41 | : Parent(automaton, grammar), _includes(includes), _read_op(read_op) {} 42 | Follow(const Follow&) = delete; 43 | Follow(Follow&&) noexcept = default; 44 | 45 | virtual void calculate() override 46 | { 47 | // We use digraph algorithm which calculates Follow() for us. See digraph_algo() for more information. 48 | digraph_algo(_includes, _read_op, Parent::_operation); 49 | } 50 | 51 | private: 52 | const Includes& _includes; 53 | Read& _read_op; 54 | }; 55 | 56 | } // namespace pog 57 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/stringpiece.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2004 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "re2/stringpiece.h" 6 | 7 | #include 8 | 9 | #include "util/util.h" 10 | 11 | namespace re2 { 12 | 13 | const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h 14 | 15 | StringPiece::size_type StringPiece::copy(char* buf, size_type n, 16 | size_type pos) const { 17 | size_type ret = std::min(size_ - pos, n); 18 | memcpy(buf, data_ + pos, ret); 19 | return ret; 20 | } 21 | 22 | StringPiece StringPiece::substr(size_type pos, size_type n) const { 23 | if (pos > size_) pos = size_; 24 | if (n > size_ - pos) n = size_ - pos; 25 | return StringPiece(data_ + pos, n); 26 | } 27 | 28 | StringPiece::size_type StringPiece::find(const StringPiece& s, 29 | size_type pos) const { 30 | if (pos > size_) return npos; 31 | const_pointer result = std::search(data_ + pos, data_ + size_, 32 | s.data_, s.data_ + s.size_); 33 | size_type xpos = result - data_; 34 | return xpos + s.size_ <= size_ ? xpos : npos; 35 | } 36 | 37 | StringPiece::size_type StringPiece::find(char c, size_type pos) const { 38 | if (size_ <= 0 || pos >= size_) return npos; 39 | const_pointer result = std::find(data_ + pos, data_ + size_, c); 40 | return result != data_ + size_ ? result - data_ : npos; 41 | } 42 | 43 | StringPiece::size_type StringPiece::rfind(const StringPiece& s, 44 | size_type pos) const { 45 | if (size_ < s.size_) return npos; 46 | if (s.size_ == 0) return std::min(size_, pos); 47 | const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_; 48 | const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_); 49 | return result != last ? result - data_ : npos; 50 | } 51 | 52 | StringPiece::size_type StringPiece::rfind(char c, size_type pos) const { 53 | if (size_ <= 0) return npos; 54 | for (size_t i = std::min(pos + 1, size_); i != 0;) { 55 | if (data_[--i] == c) return i; 56 | } 57 | return npos; 58 | } 59 | 60 | std::ostream& operator<<(std::ostream& o, const StringPiece& p) { 61 | o.write(p.data(), p.size()); 62 | return o; 63 | } 64 | 65 | } // namespace re2 66 | -------------------------------------------------------------------------------- /deps/fmt/fmt/src/format.cc: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ 2 | // 3 | // Copyright (c) 2012 - 2016, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #include "fmt/format-inl.h" 9 | 10 | FMT_BEGIN_NAMESPACE 11 | template struct FMT_API internal::basic_data; 12 | 13 | // Workaround a bug in MSVC2013 that prevents instantiation of grisu_format. 14 | bool (*instantiate_grisu_format)(double, internal::buffer&, int, unsigned, 15 | int&) = internal::grisu_format; 16 | 17 | #ifndef FMT_STATIC_THOUSANDS_SEPARATOR 18 | template FMT_API internal::locale_ref::locale_ref(const std::locale& loc); 19 | template FMT_API std::locale internal::locale_ref::get() const; 20 | #endif 21 | 22 | // Explicit instantiations for char. 23 | 24 | template FMT_API char internal::thousands_sep_impl(locale_ref); 25 | template FMT_API char internal::decimal_point_impl(locale_ref); 26 | 27 | template FMT_API void internal::buffer::append(const char*, const char*); 28 | 29 | template FMT_API void internal::arg_map::init( 30 | const basic_format_args& args); 31 | 32 | template FMT_API std::string internal::vformat( 33 | string_view, basic_format_args); 34 | 35 | template FMT_API format_context::iterator internal::vformat_to( 36 | internal::buffer&, string_view, basic_format_args); 37 | 38 | template FMT_API char* internal::sprintf_format(double, internal::buffer&, 39 | sprintf_specs); 40 | template FMT_API char* internal::sprintf_format(long double, 41 | internal::buffer&, 42 | sprintf_specs); 43 | 44 | // Explicit instantiations for wchar_t. 45 | 46 | template FMT_API wchar_t internal::thousands_sep_impl(locale_ref); 47 | template FMT_API wchar_t internal::decimal_point_impl(locale_ref); 48 | 49 | template FMT_API void internal::buffer::append(const wchar_t*, 50 | const wchar_t*); 51 | 52 | template FMT_API void internal::arg_map::init( 53 | const basic_format_args&); 54 | 55 | template FMT_API std::wstring internal::vformat( 56 | wstring_view, basic_format_args); 57 | FMT_END_NAMESPACE 58 | -------------------------------------------------------------------------------- /include/pog/filter_view.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | template 8 | class FilterView 9 | { 10 | public: 11 | using ValueType = typename std::iterator_traits::value_type; 12 | 13 | class iterator 14 | { 15 | public: 16 | using difference_type = typename std::iterator_traits::difference_type; 17 | using value_type = typename std::iterator_traits::value_type; 18 | using reference = typename std::iterator_traits::reference; 19 | using pointer = typename std::iterator_traits::pointer; 20 | using iterator_category = std::forward_iterator_tag; 21 | 22 | iterator(const FilterView* parent) : _parent(parent), _itr(_parent->_begin) { _find_next(); } 23 | iterator(const FilterView* parent, const It& itr) : _parent(parent), _itr(itr) {} 24 | iterator(const iterator&) = default; 25 | iterator(iterator&&) noexcept = default; 26 | 27 | iterator& operator=(const iterator&) = default; 28 | iterator& operator=(iterator&&) noexcept = default; 29 | 30 | reference operator*() const { return *_itr; } 31 | pointer operator->() const { return &*_itr; } 32 | 33 | iterator& operator++() 34 | { 35 | ++_itr; 36 | _find_next(); 37 | return *this; 38 | } 39 | 40 | iterator operator++(int) 41 | { 42 | auto tmp = *this; 43 | ++_itr; 44 | _find_next(); 45 | return tmp; 46 | } 47 | 48 | bool operator==(const iterator& rhs) const { return _itr == rhs._itr; } 49 | bool operator!=(const iterator& rhs) const { return !(*this == rhs); } 50 | 51 | private: 52 | void _find_next() 53 | { 54 | while (_itr != _parent->_end && !_parent->_filter(*_itr)) 55 | ++_itr; 56 | } 57 | 58 | const FilterView* _parent; 59 | It _itr; 60 | }; 61 | 62 | template 63 | FilterView(I&& begin, I&& end, F&& filter) 64 | : _begin(std::forward(begin)), _end(std::forward(end)), _filter(std::forward(filter)) {} 65 | 66 | FilterView(const FilterView&) = default; 67 | FilterView(FilterView&&) noexcept = default; 68 | 69 | FilterView& operator=(const FilterView&) = default; 70 | FilterView& operator=(FilterView&&) noexcept = default; 71 | 72 | auto begin() const { return iterator{this}; } 73 | auto end() const { return iterator{this, _end}; } 74 | 75 | private: 76 | It _begin, _end; 77 | std::function _filter; 78 | }; 79 | 80 | template 81 | FilterView(It&&, It&&, Filter&&) -> FilterView; 82 | -------------------------------------------------------------------------------- /include/pog/token.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | namespace pog { 13 | 14 | template 15 | class Token 16 | { 17 | public: 18 | using SymbolType = Symbol; 19 | using CallbackType = std::function; 20 | 21 | template 22 | Token(std::uint32_t index, const std::string& pattern, StatesT&& active_in_states) : Token(index, pattern, std::forward(active_in_states), nullptr) {} 23 | 24 | template 25 | Token(std::uint32_t index, const std::string& pattern, StatesT&& active_in_states, const SymbolType* symbol) 26 | : _index(index), _pattern(pattern), _symbol(symbol), _regexp(std::make_unique(_pattern)), _action(), 27 | _enter_state(), _active_in_states(std::forward(active_in_states)) {} 28 | 29 | std::uint32_t get_index() const { return _index; } 30 | const std::string& get_pattern() const { return _pattern; } 31 | const SymbolType* get_symbol() const { return _symbol; } 32 | const re2::RE2* get_regexp() const { return _regexp.get(); } 33 | 34 | bool has_symbol() const { return _symbol != nullptr; } 35 | bool has_action() const { return static_cast(_action); } 36 | bool has_transition_to_state() const { return static_cast(_enter_state); } 37 | 38 | template 39 | void set_action(CallbackT&& action) 40 | { 41 | _action = std::forward(action); 42 | } 43 | 44 | template 45 | ValueT perform_action(Args&&... args) const 46 | { 47 | return _action(std::forward(args)...); 48 | } 49 | 50 | void set_transition_to_state(const std::string& state) 51 | { 52 | _enter_state = state; 53 | } 54 | 55 | const std::string& get_transition_to_state() const 56 | { 57 | return _enter_state.value(); 58 | } 59 | 60 | template 61 | void add_active_in_state(StrT&& state) 62 | { 63 | _active_in_states.push_back(std::forward(state)); 64 | } 65 | 66 | const std::vector& get_active_in_states() const 67 | { 68 | return _active_in_states; 69 | } 70 | 71 | private: 72 | std::uint32_t _index; 73 | std::string _pattern; 74 | const SymbolType* _symbol; 75 | std::unique_ptr _regexp; 76 | CallbackType _action; 77 | std::optional _enter_state; 78 | std::vector _active_in_states; 79 | }; 80 | 81 | } // namespace pog 82 | -------------------------------------------------------------------------------- /tests/test_token.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | using namespace pog; 6 | using namespace ::testing; 7 | 8 | class TestToken : public ::testing::Test {}; 9 | 10 | TEST_F(TestToken, 11 | SimpleTokenWithoutSymbol) { 12 | Token t(1, "abc", std::vector{"s1", "s2"}); 13 | 14 | EXPECT_EQ(t.get_index(), 1u); 15 | EXPECT_EQ(t.get_pattern(), "abc"); 16 | EXPECT_EQ(t.get_active_in_states(), (std::vector{"s1", "s2"})); 17 | EXPECT_EQ(t.get_symbol(), nullptr); 18 | EXPECT_THAT(t.get_regexp(), A()); 19 | 20 | EXPECT_FALSE(t.has_symbol()); 21 | EXPECT_FALSE(t.has_action()); 22 | EXPECT_FALSE(t.has_transition_to_state()); 23 | } 24 | 25 | TEST_F(TestToken, 26 | SimpleTokenWithSymbol) { 27 | Symbol s(1, SymbolKind::Nonterminal, "a"); 28 | Token t(1, "abc", std::vector{"s1", "s2"}, &s); 29 | 30 | EXPECT_EQ(t.get_index(), 1u); 31 | EXPECT_EQ(t.get_pattern(), "abc"); 32 | EXPECT_EQ(t.get_active_in_states(), (std::vector{"s1", "s2"})); 33 | EXPECT_EQ(t.get_symbol(), &s); 34 | EXPECT_THAT(t.get_regexp(), A()); 35 | 36 | EXPECT_TRUE(t.has_symbol()); 37 | EXPECT_FALSE(t.has_action()); 38 | EXPECT_FALSE(t.has_transition_to_state()); 39 | } 40 | 41 | TEST_F(TestToken, 42 | TransitionToState) { 43 | Token t(1, "abc", std::vector{"s1", "s2"}); 44 | t.set_transition_to_state("dest_state"); 45 | 46 | EXPECT_EQ(t.get_index(), 1u); 47 | EXPECT_EQ(t.get_pattern(), "abc"); 48 | EXPECT_EQ(t.get_active_in_states(), (std::vector{"s1", "s2"})); 49 | EXPECT_EQ(t.get_symbol(), nullptr); 50 | EXPECT_THAT(t.get_regexp(), A()); 51 | 52 | EXPECT_FALSE(t.has_symbol()); 53 | EXPECT_FALSE(t.has_action()); 54 | EXPECT_TRUE(t.has_transition_to_state()); 55 | EXPECT_EQ(t.get_transition_to_state(), "dest_state"); 56 | } 57 | 58 | TEST_F(TestToken, 59 | AddActiveInState) { 60 | Token t(1, "abc", std::vector{"s1", "s2"}); 61 | 62 | t.add_active_in_state("s3"); 63 | 64 | EXPECT_EQ(t.get_active_in_states(), (std::vector{"s1", "s2", "s3"})); 65 | } 66 | 67 | TEST_F(TestToken, 68 | Action) { 69 | bool called = false; 70 | 71 | Token t(1, "abc", std::vector{"s1", "s2"}); 72 | t.set_action([&](std::string_view str) -> int { 73 | called = true; 74 | return static_cast(str.length()); 75 | }); 76 | 77 | EXPECT_EQ(t.get_index(), 1u); 78 | EXPECT_TRUE(t.has_action()); 79 | EXPECT_EQ(t.perform_action("abcdef"), 6); 80 | EXPECT_TRUE(called); 81 | } 82 | -------------------------------------------------------------------------------- /include/pog/operations/read.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace pog { 7 | 8 | /** 9 | * Read operations maps (q,x) where q is state and x is symbol into set of symbols. 10 | * 11 | * Let's take state Q and non-final item A -> a <*> B b. Read(Q, B) represents set 12 | * of symbols we can possibly read after reading B while in state Q. Originally, 13 | * in all papers you see this proposed to be calculated using DirectRead() function 14 | * and reads relation but in the end, it's the same as if we do First(b). First(b) 15 | * incorporates situation if some symbol in sequence b can be reduced to empty string. 16 | * 17 | * So to put it shortly, for state Q and item A -> a <*> B b, Read(Q, B) = First(b). 18 | */ 19 | template 20 | class Read : public Operation, const Symbol*> 21 | { 22 | public: 23 | using Parent = Operation, const Symbol*>; 24 | 25 | using AutomatonType = Automaton; 26 | using GrammarType = Grammar; 27 | using SymbolType = Symbol; 28 | 29 | using StateAndSymbolType = StateAndSymbol; 30 | 31 | Read(const AutomatonType* automaton, const GrammarType* grammar) : Parent(automaton, grammar) {} 32 | Read(const Read&) = delete; 33 | Read(Read&&) noexcept = default; 34 | 35 | virtual void calculate() override 36 | { 37 | // Iterate over all states of LR automaton 38 | for (const auto& state : Parent::_automaton->get_states()) 39 | { 40 | for (const auto& item : *state.get()) 41 | { 42 | // We don't care about final items, only those in form A -> a <*> B b 43 | if (item->is_final()) 44 | continue; 45 | 46 | // Symbol right to <*> needs to be nonterminal 47 | auto next_symbol = item->get_read_symbol(); 48 | if (!next_symbol->is_nonterminal()) 49 | continue; 50 | 51 | // Observe everything right of B, so in this case 'b' and calculate First() 52 | auto right_rest = item->get_right_side_without_read_symbol(); 53 | auto symbols = Parent::_grammar->first(right_rest); 54 | 55 | // Insert operation result 56 | auto ss = StateAndSymbolType{state.get(), next_symbol}; 57 | auto itr = Parent::_operation.find(ss); 58 | if (itr == Parent::_operation.end()) 59 | Parent::_operation.emplace(std::move(ss), std::move(symbols)); 60 | else 61 | { 62 | // TODO: std::vector + std::set_union or std::unordered_set::extract 63 | std::copy(symbols.begin(), symbols.end(), std::inserter(itr->second, itr->second.begin())); 64 | } 65 | } 66 | } 67 | } 68 | }; 69 | 70 | } // namespace pog 71 | -------------------------------------------------------------------------------- /deps/fmt/fmt/support/cmake/cxx14.cmake: -------------------------------------------------------------------------------- 1 | # C++14 feature support detection 2 | 3 | include(CheckCXXSourceCompiles) 4 | include(CheckCXXCompilerFlag) 5 | 6 | if (NOT CMAKE_CXX_STANDARD) 7 | set(CMAKE_CXX_STANDARD 11) 8 | endif() 9 | message(STATUS "CXX_STANDARD: ${CMAKE_CXX_STANDARD}") 10 | 11 | if (CMAKE_CXX_STANDARD EQUAL 20) 12 | check_cxx_compiler_flag(-std=c++20 has_std_20_flag) 13 | check_cxx_compiler_flag(-std=c++2a has_std_2a_flag) 14 | 15 | if (has_std_20_flag) 16 | set(CXX_STANDARD_FLAG -std=c++20) 17 | elseif (has_std_2a_flag) 18 | set(CXX_STANDARD_FLAG -std=c++2a) 19 | endif () 20 | elseif (CMAKE_CXX_STANDARD EQUAL 17) 21 | check_cxx_compiler_flag(-std=c++17 has_std_17_flag) 22 | check_cxx_compiler_flag(-std=c++1z has_std_1z_flag) 23 | 24 | if (has_std_17_flag) 25 | set(CXX_STANDARD_FLAG -std=c++17) 26 | elseif (has_std_1z_flag) 27 | set(CXX_STANDARD_FLAG -std=c++1z) 28 | endif () 29 | elseif (CMAKE_CXX_STANDARD EQUAL 14) 30 | check_cxx_compiler_flag(-std=c++14 has_std_14_flag) 31 | check_cxx_compiler_flag(-std=c++1y has_std_1y_flag) 32 | 33 | if (has_std_14_flag) 34 | set(CXX_STANDARD_FLAG -std=c++14) 35 | elseif (has_std_1y_flag) 36 | set(CXX_STANDARD_FLAG -std=c++1y) 37 | endif () 38 | elseif (CMAKE_CXX_STANDARD EQUAL 11) 39 | check_cxx_compiler_flag(-std=c++11 has_std_11_flag) 40 | check_cxx_compiler_flag(-std=c++0x has_std_0x_flag) 41 | 42 | if (has_std_11_flag) 43 | set(CXX_STANDARD_FLAG -std=c++11) 44 | elseif (has_std_0x_flag) 45 | set(CXX_STANDARD_FLAG -std=c++0x) 46 | endif () 47 | endif () 48 | 49 | set(CMAKE_REQUIRED_FLAGS ${CXX_STANDARD_FLAG}) 50 | 51 | # Check if variadic templates are working and not affected by GCC bug 39653: 52 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39653 53 | # Can be removed once gcc 4.4 support is dropped. 54 | check_cxx_source_compiles(" 55 | template 56 | struct S { typedef typename S::type type; }; 57 | int main() {}" SUPPORTS_VARIADIC_TEMPLATES) 58 | if (NOT SUPPORTS_VARIADIC_TEMPLATES) 59 | set (SUPPORTS_VARIADIC_TEMPLATES OFF) 60 | endif () 61 | 62 | # Check if user-defined literals are available 63 | check_cxx_source_compiles(" 64 | void operator\"\" _udl(long double); 65 | int main() {}" 66 | SUPPORTS_USER_DEFINED_LITERALS) 67 | if (NOT SUPPORTS_USER_DEFINED_LITERALS) 68 | set (SUPPORTS_USER_DEFINED_LITERALS OFF) 69 | endif () 70 | 71 | # Check if is available 72 | set(CMAKE_REQUIRED_FLAGS -std=c++1z) 73 | check_cxx_source_compiles(" 74 | #include 75 | int main() {}" 76 | FMT_HAS_VARIANT) 77 | if (NOT FMT_HAS_VARIANT) 78 | set (FMT_HAS_VARIANT OFF) 79 | endif () 80 | 81 | set(CMAKE_REQUIRED_FLAGS ) 82 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/set.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_SET_H_ 6 | #define RE2_SET_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "re2/re2.h" 13 | 14 | namespace re2 { 15 | class Prog; 16 | class Regexp; 17 | } // namespace re2 18 | 19 | namespace re2 { 20 | 21 | // An RE2::Set represents a collection of regexps that can 22 | // be searched for simultaneously. 23 | class RE2::Set { 24 | public: 25 | enum ErrorKind { 26 | kNoError = 0, 27 | kNotCompiled, // The set is not compiled. 28 | kOutOfMemory, // The DFA ran out of memory. 29 | kInconsistent, // The result is inconsistent. This should never happen. 30 | }; 31 | 32 | struct ErrorInfo { 33 | ErrorKind kind; 34 | }; 35 | 36 | Set(const RE2::Options& options, RE2::Anchor anchor); 37 | ~Set(); 38 | 39 | // Adds pattern to the set using the options passed to the constructor. 40 | // Returns the index that will identify the regexp in the output of Match(), 41 | // or -1 if the regexp cannot be parsed. 42 | // Indices are assigned in sequential order starting from 0. 43 | // Errors do not increment the index; if error is not NULL, *error will hold 44 | // the error message from the parser. 45 | int Add(const StringPiece& pattern, std::string* error); 46 | 47 | // Compiles the set in preparation for matching. 48 | // Returns false if the compiler runs out of memory. 49 | // Add() must not be called again after Compile(). 50 | // Compile() must be called before Match(). 51 | bool Compile(); 52 | 53 | // Returns true if text matches at least one of the regexps in the set. 54 | // Fills v (if not NULL) with the indices of the matching regexps. 55 | // Callers must not expect v to be sorted. 56 | bool Match(const StringPiece& text, std::vector* v) const; 57 | 58 | // As above, but populates error_info (if not NULL) when none of the regexps 59 | // in the set matched. This can inform callers when DFA execution fails, for 60 | // example, because they might wish to handle that case differently. 61 | bool Match(const StringPiece& text, std::vector* v, 62 | ErrorInfo* error_info) const; 63 | 64 | private: 65 | typedef std::pair Elem; 66 | 67 | RE2::Options options_; 68 | RE2::Anchor anchor_; 69 | std::vector elem_; 70 | re2::Prog* prog_; 71 | bool compiled_; 72 | int size_; 73 | 74 | Set(const Set&) = delete; 75 | Set& operator=(const Set&) = delete; 76 | }; 77 | 78 | } // namespace re2 79 | 80 | #endif // RE2_SET_H_ 81 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/unicode_casefold.h: -------------------------------------------------------------------------------- 1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_UNICODE_CASEFOLD_H_ 6 | #define RE2_UNICODE_CASEFOLD_H_ 7 | 8 | // Unicode case folding tables. 9 | 10 | // The Unicode case folding tables encode the mapping from one Unicode point 11 | // to the next largest Unicode point with equivalent folding. The largest 12 | // point wraps back to the first. For example, the tables map: 13 | // 14 | // 'A' -> 'a' 15 | // 'a' -> 'A' 16 | // 17 | // 'K' -> 'k' 18 | // 'k' -> 'K' (Kelvin symbol) 19 | // 'K' -> 'K' 20 | // 21 | // Like everything Unicode, these tables are big. If we represent the table 22 | // as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB. 23 | // Most table entries look like the ones around them: 24 | // 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc. 25 | // Instead of listing all the pairs explicitly, we make a list of ranges 26 | // and deltas, so that the table entries for 'A' through 'Z' can be represented 27 | // as a single entry { 'A', 'Z', +32 }. 28 | // 29 | // In addition to blocks that map to each other (A-Z mapping to a-z) 30 | // there are blocks of pairs that individually map to each other 31 | // (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...). 32 | // For those, the special delta value EvenOdd marks even/odd pairs 33 | // (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs. 34 | // 35 | // In this form, the table has 274 entries, about 3kB. If we were to split 36 | // the table into one for 16-bit codes and an overflow table for larger ones, 37 | // we could get it down to about 1.5kB, but that's not worth the complexity. 38 | // 39 | // The grouped form also allows for efficient fold range calculations 40 | // rather than looping one character at a time. 41 | 42 | #include 43 | 44 | #include "util/util.h" 45 | #include "util/utf.h" 46 | 47 | namespace re2 { 48 | 49 | enum { 50 | EvenOdd = 1, 51 | OddEven = -1, 52 | EvenOddSkip = 1<<30, 53 | OddEvenSkip, 54 | }; 55 | 56 | struct CaseFold { 57 | Rune lo; 58 | Rune hi; 59 | int32_t delta; 60 | }; 61 | 62 | extern const CaseFold unicode_casefold[]; 63 | extern const int num_unicode_casefold; 64 | 65 | extern const CaseFold unicode_tolower[]; 66 | extern const int num_unicode_tolower; 67 | 68 | // Returns the CaseFold* in the tables that contains rune. 69 | // If rune is not in the tables, returns the first CaseFold* after rune. 70 | // If rune is larger than any value in the tables, returns NULL. 71 | extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune); 72 | 73 | // Returns the result of applying the fold f to the rune r. 74 | extern Rune ApplyFold(const CaseFold *f, Rune r); 75 | 76 | } // namespace re2 77 | 78 | #endif // RE2_UNICODE_CASEFOLD_H_ 79 | -------------------------------------------------------------------------------- /include/pog/parser_report.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace pog { 10 | 11 | template 12 | struct ShiftReduceConflict 13 | { 14 | const State* state; 15 | const Symbol* symbol; 16 | const Rule* rule; 17 | 18 | std::string to_string(std::string_view arrow = "->", std::string_view eps = "") const 19 | { 20 | return fmt::format("Shift-reduce conflict of symbol \'{}\' and rule \'{}\' in state {}", symbol->get_name(), rule->to_string(arrow, eps), state->get_index()); 21 | } 22 | }; 23 | 24 | template 25 | struct ReduceReduceConflict 26 | { 27 | const State* state; 28 | const Rule* rule1; 29 | const Rule* rule2; 30 | 31 | std::string to_string(std::string_view arrow = "->", std::string_view eps = "") const 32 | { 33 | return fmt::format("Reduce-reduce conflict of rule \'{}\' and rule \'{}\' in state {}", rule1->to_string(arrow, eps), rule2->to_string(arrow, eps), state->get_index()); 34 | } 35 | }; 36 | 37 | template 38 | using Issue = std::variant, ReduceReduceConflict>; 39 | 40 | template 41 | class ParserReport 42 | { 43 | public: 44 | using IssueType = Issue; 45 | using RuleType = Rule; 46 | using StateType = State; 47 | using SymbolType = Symbol; 48 | 49 | using ReduceReduceConflictType = ReduceReduceConflict; 50 | using ShiftReduceConflictType = ShiftReduceConflict; 51 | 52 | bool ok() const { return _issues.empty(); } 53 | operator bool() const { return ok(); } 54 | 55 | std::size_t number_of_issues() const { return _issues.size(); } 56 | auto begin() { return _issues.begin(); } 57 | auto end() { return _issues.end(); } 58 | auto begin() const { return _issues.begin(); } 59 | auto end() const { return _issues.end(); } 60 | 61 | void add_shift_reduce_conflict(const StateType* state, const SymbolType* symbol, const RuleType* rule) 62 | { 63 | _issues.push_back(ShiftReduceConflictType{state, symbol, rule}); 64 | } 65 | 66 | void add_reduce_reduce_conflict(const StateType* state, const RuleType* rule1, const RuleType* rule2) 67 | { 68 | _issues.push_back(ReduceReduceConflictType{state, rule1, rule2}); 69 | } 70 | 71 | std::string to_string(std::string_view arrow = "->", std::string_view eps = "") const 72 | { 73 | std::vector issues_str(_issues.size()); 74 | std::transform(_issues.begin(), _issues.end(), issues_str.begin(), [&](const auto& issue) { 75 | return visit_with(issue, 76 | [&](const ShiftReduceConflictType& sr) { return sr.to_string(arrow, eps); }, 77 | [&](const ReduceReduceConflictType& rr) { return rr.to_string(arrow, eps); } 78 | ); 79 | }); 80 | return fmt::format("{}", fmt::join(issues_str.begin(), issues_str.end(), "\n")); 81 | } 82 | 83 | private: 84 | std::vector _issues; 85 | }; 86 | 87 | } // namespace pog 88 | -------------------------------------------------------------------------------- /include/pog/digraph_algo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace pog { 7 | 8 | namespace detail { 9 | 10 | template 11 | void digraph_traverse(const NodeT& x, std::deque& stack, std::unordered_map& depths, const R& rel, BaseF& base_f, F& f) 12 | { 13 | stack.push_back(x); // push x 14 | std::size_t current_depth = stack.size(); // d <- depth of stack 15 | depths.insert_or_assign(x, current_depth); // N[x] <- d 16 | f.insert_or_assign(x, base_f[x]); // F(x) <- F'(x) 17 | 18 | auto rel_with = rel.find(x); 19 | if (rel_with) 20 | { 21 | for (const auto& y : *rel_with) // for each y such that xRy 22 | { 23 | auto include_itr = depths.find(y); 24 | if (include_itr == depths.end()) // if N[y] == 0 25 | digraph_traverse(y, stack, depths, rel, base_f, f); // recursive call Traverse(y) 26 | 27 | include_itr = depths.find(y); // possible iterator invalidation 28 | include_itr->second = std::min(depths[x], include_itr->second); // N[y] <- min(N[x], N[y]) 29 | auto& fx = f[x]; 30 | auto& fy = f[y]; 31 | std::copy(fy.begin(), fy.end(), std::inserter(fx, fx.begin())); // F(x) <- F(x) union F(y) 32 | } 33 | } 34 | 35 | if (depths[x] == current_depth) // if N[x] == d 36 | { 37 | auto top_x = std::move(stack.back()); 38 | stack.pop_back(); 39 | depths[top_x] = std::numeric_limits::max(); // N(top of stack) <- Infinity 40 | if (top_x != x) 41 | f[top_x] = f[x]; // F(top of stack) <- F(x) 42 | 43 | while (top_x != x) // while top of stack != x 44 | { 45 | top_x = std::move(stack.back()); 46 | stack.pop_back(); 47 | depths[top_x] = std::numeric_limits::max(); // N(top of stack) <- Infinity 48 | if (top_x != x) 49 | f[top_x] = f[x]; // F(top of stack) <- F(x) 50 | } 51 | } 52 | } 53 | 54 | } // namespace detail 55 | 56 | /** 57 | * Digraph algorithm for finding SCCs (Strongly Connected Components). It is used for 58 | * computation of function F(x) using base function F'(x) over directed graph. It first 59 | * computes F'(x) as F(x) for each node x and then perform unions of F(x) over edges 60 | * of directed graph. Finding SCC is a crucial part to not get into infinite loops and properly 61 | * propagate F(x) in looped relations. 62 | * 63 | * You can specify custom relation R which specifies edges of the directed graph, base function 64 | * F'(x) which needs to be already precomunted. The output is operation F(x) which will 65 | * be computed along the way. 66 | * 67 | * TODO: base_f should not be non-const but we require operator[] right now 68 | */ 69 | template 70 | void digraph_algo(const R& rel, BaseF& base_f, F& f) 71 | { 72 | std::unordered_map depths; 73 | std::deque stack; 74 | for (const auto& x : rel) 75 | { 76 | detail::digraph_traverse(x.first, stack, depths, rel, base_f, f); 77 | } 78 | } 79 | 80 | } // namespace pog 81 | -------------------------------------------------------------------------------- /include/pog/operations/lookahead.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace pog { 9 | 10 | /** 11 | * Lookahead operation maps (q, R), where q is state and R is rule from grammar, to set of symbols. 12 | * 13 | * Formal definition for Lookahead(q, A -> x) is 14 | * Lookahead(q, A -> x) = union { Follow(p, B) | (q, A -> x) lookback (p, B) } 15 | * So it's union of all Follow sets of state p and symbol B such that (q, A -> x) is in lookback 16 | * relation with (p, B). 17 | * 18 | * To put it simply: 19 | * 1. Follow set of (p, B) represents what symbols can follow symbol B when we are in the state B. 20 | * 2. Lookback relation represents that in order to preform some reduction A -> x in state q, we first 21 | * had to go through some other state p and use what follows A in B -> a A b to know when to perform 22 | * reduction. 23 | * So we'll take all rules A -> x and find in which state they can be reduced (there is an item A -> x <*>). 24 | * We'll then union all Follow() sets according to lookback relation and for each state and rule, we now 25 | * know what symbols need to follow in order to perform reductions by such rule in that particular state. 26 | */ 27 | template 28 | class Lookahead : public Operation, const Symbol*> 29 | { 30 | public: 31 | using Parent = Operation, const Symbol*>; 32 | 33 | using AutomatonType = Automaton; 34 | using GrammarType = Grammar; 35 | 36 | using StateAndRuleType = StateAndRule; 37 | 38 | // TODO: Follow<> should not be non-const but we need it for operator[] 39 | Lookahead(const AutomatonType* automaton, const GrammarType* grammar, const Lookback& lookback, Follow& follow_op) 40 | : Parent(automaton, grammar), _lookback(lookback), _follow_op(follow_op) {} 41 | Lookahead(const Lookahead&) = delete; 42 | Lookahead(Lookahead&&) noexcept = default; 43 | 44 | virtual void calculate() override 45 | { 46 | // Iterate over all rules in grammar 47 | for (const auto& rule : Parent::_grammar->get_rules()) 48 | { 49 | for (const auto& state : Parent::_automaton->get_states()) 50 | { 51 | // Find lookback of the current state and rule 52 | auto sr = StateAndRuleType{state.get(), rule.get()}; 53 | auto lookback_with = _lookback.find(sr); 54 | if (!lookback_with) 55 | continue; 56 | 57 | // Union all Follow() sets of the current state and rule to compute Lookahead() 58 | for (const auto& ss : *lookback_with) 59 | { 60 | if (auto itr = Parent::_operation.find(sr); itr == Parent::_operation.end()) 61 | Parent::_operation.emplace(std::move(sr), _follow_op[ss]); 62 | else if (auto follow_res = _follow_op.find(ss); follow_res) 63 | std::copy(follow_res->begin(), follow_res->end(), std::inserter(itr->second, itr->second.begin())); 64 | } 65 | } 66 | } 67 | 68 | } 69 | 70 | private: 71 | const Lookback& _lookback; 72 | Follow& _follow_op; 73 | }; 74 | 75 | } // namespace pog 76 | -------------------------------------------------------------------------------- /deps/fmt/fmt/include/fmt/locale.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - std::locale support 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_LOCALE_H_ 9 | #define FMT_LOCALE_H_ 10 | 11 | #include 12 | #include "format.h" 13 | 14 | FMT_BEGIN_NAMESPACE 15 | 16 | namespace internal { 17 | template 18 | typename buffer_context::iterator vformat_to( 19 | const std::locale& loc, buffer& buf, 20 | basic_string_view format_str, 21 | basic_format_args> args) { 22 | using range = buffer_range; 23 | return vformat_to>(buf, to_string_view(format_str), args, 24 | internal::locale_ref(loc)); 25 | } 26 | 27 | template 28 | std::basic_string vformat(const std::locale& loc, 29 | basic_string_view format_str, 30 | basic_format_args> args) { 31 | basic_memory_buffer buffer; 32 | internal::vformat_to(loc, buffer, format_str, args); 33 | return fmt::to_string(buffer); 34 | } 35 | } // namespace internal 36 | 37 | template > 38 | inline std::basic_string vformat( 39 | const std::locale& loc, const S& format_str, 40 | basic_format_args> args) { 41 | return internal::vformat(loc, to_string_view(format_str), args); 42 | } 43 | 44 | template > 45 | inline std::basic_string format(const std::locale& loc, 46 | const S& format_str, Args&&... args) { 47 | return internal::vformat( 48 | loc, to_string_view(format_str), 49 | {internal::make_args_checked(format_str, args...)}); 50 | } 51 | 52 | template ::value, char_t>> 55 | inline OutputIt vformat_to(OutputIt out, const std::locale& loc, 56 | const S& format_str, 57 | format_args_t args) { 58 | using range = internal::output_range; 59 | return vformat_to>( 60 | range(out), to_string_view(format_str), args, internal::locale_ref(loc)); 61 | } 62 | 63 | template ::value&& 65 | internal::is_string::value)> 66 | inline OutputIt format_to(OutputIt out, const std::locale& loc, 67 | const S& format_str, Args&&... args) { 68 | internal::check_format_string(format_str); 69 | using context = format_context_t>; 70 | format_arg_store as{args...}; 71 | return vformat_to(out, loc, to_string_view(format_str), 72 | basic_format_args(as)); 73 | } 74 | 75 | FMT_END_NAMESPACE 76 | 77 | #endif // FMT_LOCALE_H_ 78 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/bitmap256.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_BITMAP256_H_ 6 | #define RE2_BITMAP256_H_ 7 | 8 | #ifdef _MSC_VER 9 | #include 10 | #endif 11 | #include 12 | #include 13 | 14 | #include "util/util.h" 15 | #include "util/logging.h" 16 | 17 | namespace re2 { 18 | 19 | class Bitmap256 { 20 | public: 21 | Bitmap256() { 22 | Clear(); 23 | } 24 | 25 | // Clears all of the bits. 26 | void Clear() { 27 | memset(words_, 0, sizeof words_); 28 | } 29 | 30 | // Tests the bit with index c. 31 | bool Test(int c) const { 32 | DCHECK_GE(c, 0); 33 | DCHECK_LE(c, 255); 34 | 35 | return (words_[c / 64] & (1ULL << (c % 64))) != 0; 36 | } 37 | 38 | // Sets the bit with index c. 39 | void Set(int c) { 40 | DCHECK_GE(c, 0); 41 | DCHECK_LE(c, 255); 42 | 43 | words_[c / 64] |= (1ULL << (c % 64)); 44 | } 45 | 46 | // Finds the next non-zero bit with index >= c. 47 | // Returns -1 if no such bit exists. 48 | int FindNextSetBit(int c) const; 49 | 50 | private: 51 | // Finds the least significant non-zero bit in n. 52 | static int FindLSBSet(uint64_t n) { 53 | DCHECK_NE(n, 0); 54 | 55 | #if defined(__GNUC__) 56 | return __builtin_ctzll(n); 57 | #elif defined(_MSC_VER) && defined(_M_X64) 58 | unsigned long c; 59 | _BitScanForward64(&c, n); 60 | return static_cast(c); 61 | #elif defined(_MSC_VER) && defined(_M_IX86) 62 | unsigned long c; 63 | if (static_cast(n) != 0) { 64 | _BitScanForward(&c, static_cast(n)); 65 | return static_cast(c); 66 | } else { 67 | _BitScanForward(&c, static_cast(n >> 32)); 68 | return static_cast(c) + 32; 69 | } 70 | #else 71 | int c = 63; 72 | for (int shift = 1 << 5; shift != 0; shift >>= 1) { 73 | uint64_t word = n << shift; 74 | if (word != 0) { 75 | n = word; 76 | c -= shift; 77 | } 78 | } 79 | return c; 80 | #endif 81 | } 82 | 83 | uint64_t words_[4]; 84 | }; 85 | 86 | int Bitmap256::FindNextSetBit(int c) const { 87 | DCHECK_GE(c, 0); 88 | DCHECK_LE(c, 255); 89 | 90 | // Check the word that contains the bit. Mask out any lower bits. 91 | int i = c / 64; 92 | uint64_t word = words_[i] & (~0ULL << (c % 64)); 93 | if (word != 0) 94 | return (i * 64) + FindLSBSet(word); 95 | 96 | // Check any following words. 97 | i++; 98 | switch (i) { 99 | case 1: 100 | if (words_[1] != 0) 101 | return (1 * 64) + FindLSBSet(words_[1]); 102 | FALLTHROUGH_INTENDED; 103 | case 2: 104 | if (words_[2] != 0) 105 | return (2 * 64) + FindLSBSet(words_[2]); 106 | FALLTHROUGH_INTENDED; 107 | case 3: 108 | if (words_[3] != 0) 109 | return (3 * 64) + FindLSBSet(words_[3]); 110 | FALLTHROUGH_INTENDED; 111 | default: 112 | return -1; 113 | } 114 | } 115 | 116 | } // namespace re2 117 | 118 | #endif // RE2_BITMAP256_H_ 119 | -------------------------------------------------------------------------------- /include/pog/rule.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace pog { 15 | 16 | template 17 | class Rule 18 | { 19 | public: 20 | using SymbolType = Symbol; 21 | using CallbackType = std::function&&)>; 22 | 23 | Rule(std::uint32_t index, const SymbolType* lhs, const std::vector& rhs) 24 | : _index(index), _lhs(lhs), _rhs(rhs), _action(), _midrule_size(std::nullopt), _start(false) {} 25 | 26 | template 27 | Rule(std::uint32_t index, const SymbolType* lhs, const std::vector& rhs, CallbackT&& action) 28 | : _index(index), _lhs(lhs), _rhs(rhs), _action(std::forward(action)), _midrule_size(std::nullopt), _start(false) {} 29 | 30 | std::uint32_t get_index() const { return _index; } 31 | const SymbolType* get_lhs() const { return _lhs; } 32 | const std::vector& get_rhs() const { return _rhs; } 33 | 34 | bool has_precedence() const { return static_cast(_precedence); } 35 | const Precedence& get_precedence() const { return _precedence.value(); } 36 | void set_precedence(std::uint32_t level, Associativity assoc) { _precedence = Precedence{level, assoc}; } 37 | 38 | std::size_t get_number_of_required_arguments_for_action() const 39 | { 40 | return is_midrule() ? get_midrule_size() : get_rhs().size(); 41 | } 42 | 43 | const SymbolType* get_rightmost_terminal() const 44 | { 45 | auto itr = std::find_if(_rhs.rbegin(), _rhs.rend(), [](const auto& symbol) { 46 | return symbol->is_terminal(); 47 | }); 48 | 49 | return itr != _rhs.rend() ? *itr : nullptr; 50 | } 51 | 52 | std::string to_string(std::string_view arrow = "->", std::string_view eps = "") const 53 | { 54 | std::vector rhs_strings(_rhs.size()); 55 | std::transform(_rhs.begin(), _rhs.end(), rhs_strings.begin(), [](const SymbolType* s) { 56 | return s->get_name(); 57 | }); 58 | 59 | if (rhs_strings.empty()) 60 | rhs_strings.push_back(std::string{eps}); 61 | 62 | return fmt::format("{} {} {}", _lhs->get_name(), arrow, fmt::join(rhs_strings.begin(), rhs_strings.end(), " ")); 63 | } 64 | 65 | bool has_action() const { return static_cast(_action); } 66 | bool is_start_rule() const { return _start; } 67 | 68 | void set_start_rule(bool set) { _start = set; } 69 | void set_midrule(std::size_t size) { _midrule_size = size; } 70 | bool is_midrule() const { return static_cast(_midrule_size); } 71 | std::size_t get_midrule_size() const { return _midrule_size.value(); } 72 | 73 | template 74 | ValueT perform_action(Args&&... args) const { return _action(std::forward(args)...); } 75 | 76 | bool operator==(const Rule& rhs) const { return _index == rhs._index; } 77 | bool operator!=(const Rule& rhs) const { return !(*this == rhs); } 78 | 79 | private: 80 | std::uint32_t _index; 81 | const SymbolType* _lhs; 82 | std::vector _rhs; 83 | CallbackType _action; 84 | std::optional _precedence; 85 | std::optional _midrule_size; 86 | bool _start; 87 | }; 88 | 89 | } // namespace pog 90 | -------------------------------------------------------------------------------- /include/pog/token_builder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace pog { 8 | 9 | template 10 | class TokenBuilder 11 | { 12 | public: 13 | using GrammarType = Grammar; 14 | using SymbolType = Symbol; 15 | using TokenType = Token; 16 | using TokenizerType = Tokenizer; 17 | 18 | TokenBuilder(GrammarType* grammar, TokenizerType* tokenizer) : _grammar(grammar), _tokenizer(tokenizer), _pattern("$"), 19 | _symbol_name(), _precedence(), _action(), _fullword(false), _end_token(true), _in_states{std::string{TokenizerType::DefaultState}}, _enter_state() {} 20 | 21 | TokenBuilder(GrammarType* grammar, TokenizerType* tokenizer, const std::string& pattern) : _grammar(grammar), _tokenizer(tokenizer), _pattern(pattern), 22 | _symbol_name(), _precedence(), _action(), _fullword(false), _end_token(false), _in_states{std::string{TokenizerType::DefaultState}}, _enter_state() {} 23 | 24 | void done() 25 | { 26 | TokenType* token; 27 | if (!_end_token) 28 | { 29 | auto* symbol = !_symbol_name.empty() ? _grammar->add_symbol(SymbolKind::Terminal, _symbol_name) : nullptr; 30 | token = _tokenizer->add_token(_fullword ? fmt::format("{}(\\b|$)", _pattern) : _pattern, symbol, std::move(_in_states)); 31 | if (symbol && _precedence) 32 | { 33 | const auto& prec = _precedence.value(); 34 | symbol->set_precedence(prec.level, prec.assoc); 35 | } 36 | 37 | if(symbol && _description.size() != 0) 38 | symbol->set_description(_description); 39 | 40 | if (_enter_state) 41 | token->set_transition_to_state(_enter_state.value()); 42 | } 43 | else 44 | { 45 | token = _tokenizer->get_end_token(); 46 | for (auto&& state : _in_states) 47 | token->add_active_in_state(std::move(state)); 48 | } 49 | 50 | if (_action) 51 | token->set_action(std::move(_action)); 52 | } 53 | 54 | TokenBuilder& symbol(const std::string& symbol_name) 55 | { 56 | _symbol_name = symbol_name; 57 | return *this; 58 | } 59 | 60 | TokenBuilder& precedence(std::uint32_t level, Associativity assoc) 61 | { 62 | _precedence = Precedence{level, assoc}; 63 | return *this; 64 | } 65 | 66 | TokenBuilder& description(const std::string& text) 67 | { 68 | _description = text; 69 | return *this; 70 | } 71 | 72 | template 73 | TokenBuilder& action(CallbackT&& action) 74 | { 75 | _action = std::forward(action); 76 | return *this; 77 | } 78 | 79 | TokenBuilder& fullword() 80 | { 81 | _fullword = true; 82 | return *this; 83 | } 84 | 85 | template 86 | TokenBuilder& states(Args&&... args) 87 | { 88 | _in_states = {std::forward(args)...}; 89 | return *this; 90 | } 91 | 92 | TokenBuilder& enter_state(const std::string& state) 93 | { 94 | _enter_state = state; 95 | return *this; 96 | } 97 | 98 | private: 99 | GrammarType* _grammar; 100 | TokenizerType* _tokenizer; 101 | std::string _description; 102 | std::string _pattern; 103 | std::string _symbol_name; 104 | std::optional _precedence; 105 | typename TokenType::CallbackType _action; 106 | bool _fullword; 107 | bool _end_token; 108 | std::vector _in_states; 109 | std::optional _enter_state; 110 | }; 111 | 112 | } // namespace pog 113 | -------------------------------------------------------------------------------- /deps/re2/re2/util/logging.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_LOGGING_H_ 6 | #define UTIL_LOGGING_H_ 7 | 8 | // Simplified version of Google's logging. 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "util/util.h" 17 | 18 | // Debug-only checking. 19 | #define DCHECK(condition) assert(condition) 20 | #define DCHECK_EQ(val1, val2) assert((val1) == (val2)) 21 | #define DCHECK_NE(val1, val2) assert((val1) != (val2)) 22 | #define DCHECK_LE(val1, val2) assert((val1) <= (val2)) 23 | #define DCHECK_LT(val1, val2) assert((val1) < (val2)) 24 | #define DCHECK_GE(val1, val2) assert((val1) >= (val2)) 25 | #define DCHECK_GT(val1, val2) assert((val1) > (val2)) 26 | 27 | // Always-on checking 28 | #define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x 29 | #define CHECK_LT(x, y) CHECK((x) < (y)) 30 | #define CHECK_GT(x, y) CHECK((x) > (y)) 31 | #define CHECK_LE(x, y) CHECK((x) <= (y)) 32 | #define CHECK_GE(x, y) CHECK((x) >= (y)) 33 | #define CHECK_EQ(x, y) CHECK((x) == (y)) 34 | #define CHECK_NE(x, y) CHECK((x) != (y)) 35 | 36 | #define LOG_INFO LogMessage(__FILE__, __LINE__) 37 | #define LOG_WARNING LogMessage(__FILE__, __LINE__) 38 | #define LOG_ERROR LogMessage(__FILE__, __LINE__) 39 | #define LOG_FATAL LogMessageFatal(__FILE__, __LINE__) 40 | #define LOG_QFATAL LOG_FATAL 41 | 42 | // It seems that one of the Windows header files defines ERROR as 0. 43 | #ifdef _WIN32 44 | #define LOG_0 LOG_INFO 45 | #endif 46 | 47 | #ifdef NDEBUG 48 | #define LOG_DFATAL LOG_ERROR 49 | #else 50 | #define LOG_DFATAL LOG_FATAL 51 | #endif 52 | 53 | #define LOG(severity) LOG_ ## severity.stream() 54 | 55 | #define VLOG(x) if((x)>0){}else LOG_INFO.stream() 56 | 57 | class LogMessage { 58 | public: 59 | LogMessage(const char* file, int line) 60 | : flushed_(false) { 61 | stream() << file << ":" << line << ": "; 62 | } 63 | void Flush() { 64 | stream() << "\n"; 65 | std::string s = str_.str(); 66 | size_t n = s.size(); 67 | if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc 68 | flushed_ = true; 69 | } 70 | ~LogMessage() { 71 | if (!flushed_) { 72 | Flush(); 73 | } 74 | } 75 | std::ostream& stream() { return str_; } 76 | 77 | private: 78 | bool flushed_; 79 | std::ostringstream str_; 80 | 81 | LogMessage(const LogMessage&) = delete; 82 | LogMessage& operator=(const LogMessage&) = delete; 83 | }; 84 | 85 | // Silence "destructor never returns" warning for ~LogMessageFatal(). 86 | // Since this is a header file, push and then pop to limit the scope. 87 | #ifdef _MSC_VER 88 | #pragma warning(push) 89 | #pragma warning(disable: 4722) 90 | #endif 91 | 92 | class LogMessageFatal : public LogMessage { 93 | public: 94 | LogMessageFatal(const char* file, int line) 95 | : LogMessage(file, line) {} 96 | ATTRIBUTE_NORETURN ~LogMessageFatal() { 97 | Flush(); 98 | abort(); 99 | } 100 | private: 101 | LogMessageFatal(const LogMessageFatal&) = delete; 102 | LogMessageFatal& operator=(const LogMessageFatal&) = delete; 103 | }; 104 | 105 | #ifdef _MSC_VER 106 | #pragma warning(pop) 107 | #endif 108 | 109 | #endif // UTIL_LOGGING_H_ 110 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/perl_groups.cc: -------------------------------------------------------------------------------- 1 | // GENERATED BY make_perl_groups.pl; DO NOT EDIT. 2 | // make_perl_groups.pl >perl_groups.cc 3 | 4 | #include "re2/unicode_groups.h" 5 | 6 | namespace re2 { 7 | 8 | static const URange16 code1[] = { /* \d */ 9 | { 0x30, 0x39 }, 10 | }; 11 | static const URange16 code2[] = { /* \s */ 12 | { 0x9, 0xa }, 13 | { 0xc, 0xd }, 14 | { 0x20, 0x20 }, 15 | }; 16 | static const URange16 code3[] = { /* \w */ 17 | { 0x30, 0x39 }, 18 | { 0x41, 0x5a }, 19 | { 0x5f, 0x5f }, 20 | { 0x61, 0x7a }, 21 | }; 22 | const UGroup perl_groups[] = { 23 | { "\\d", +1, code1, 1 }, 24 | { "\\D", -1, code1, 1 }, 25 | { "\\s", +1, code2, 3 }, 26 | { "\\S", -1, code2, 3 }, 27 | { "\\w", +1, code3, 4 }, 28 | { "\\W", -1, code3, 4 }, 29 | }; 30 | const int num_perl_groups = 6; 31 | static const URange16 code4[] = { /* [:alnum:] */ 32 | { 0x30, 0x39 }, 33 | { 0x41, 0x5a }, 34 | { 0x61, 0x7a }, 35 | }; 36 | static const URange16 code5[] = { /* [:alpha:] */ 37 | { 0x41, 0x5a }, 38 | { 0x61, 0x7a }, 39 | }; 40 | static const URange16 code6[] = { /* [:ascii:] */ 41 | { 0x0, 0x7f }, 42 | }; 43 | static const URange16 code7[] = { /* [:blank:] */ 44 | { 0x9, 0x9 }, 45 | { 0x20, 0x20 }, 46 | }; 47 | static const URange16 code8[] = { /* [:cntrl:] */ 48 | { 0x0, 0x1f }, 49 | { 0x7f, 0x7f }, 50 | }; 51 | static const URange16 code9[] = { /* [:digit:] */ 52 | { 0x30, 0x39 }, 53 | }; 54 | static const URange16 code10[] = { /* [:graph:] */ 55 | { 0x21, 0x7e }, 56 | }; 57 | static const URange16 code11[] = { /* [:lower:] */ 58 | { 0x61, 0x7a }, 59 | }; 60 | static const URange16 code12[] = { /* [:print:] */ 61 | { 0x20, 0x7e }, 62 | }; 63 | static const URange16 code13[] = { /* [:punct:] */ 64 | { 0x21, 0x2f }, 65 | { 0x3a, 0x40 }, 66 | { 0x5b, 0x60 }, 67 | { 0x7b, 0x7e }, 68 | }; 69 | static const URange16 code14[] = { /* [:space:] */ 70 | { 0x9, 0xd }, 71 | { 0x20, 0x20 }, 72 | }; 73 | static const URange16 code15[] = { /* [:upper:] */ 74 | { 0x41, 0x5a }, 75 | }; 76 | static const URange16 code16[] = { /* [:word:] */ 77 | { 0x30, 0x39 }, 78 | { 0x41, 0x5a }, 79 | { 0x5f, 0x5f }, 80 | { 0x61, 0x7a }, 81 | }; 82 | static const URange16 code17[] = { /* [:xdigit:] */ 83 | { 0x30, 0x39 }, 84 | { 0x41, 0x46 }, 85 | { 0x61, 0x66 }, 86 | }; 87 | const UGroup posix_groups[] = { 88 | { "[:alnum:]", +1, code4, 3 }, 89 | { "[:^alnum:]", -1, code4, 3 }, 90 | { "[:alpha:]", +1, code5, 2 }, 91 | { "[:^alpha:]", -1, code5, 2 }, 92 | { "[:ascii:]", +1, code6, 1 }, 93 | { "[:^ascii:]", -1, code6, 1 }, 94 | { "[:blank:]", +1, code7, 2 }, 95 | { "[:^blank:]", -1, code7, 2 }, 96 | { "[:cntrl:]", +1, code8, 2 }, 97 | { "[:^cntrl:]", -1, code8, 2 }, 98 | { "[:digit:]", +1, code9, 1 }, 99 | { "[:^digit:]", -1, code9, 1 }, 100 | { "[:graph:]", +1, code10, 1 }, 101 | { "[:^graph:]", -1, code10, 1 }, 102 | { "[:lower:]", +1, code11, 1 }, 103 | { "[:^lower:]", -1, code11, 1 }, 104 | { "[:print:]", +1, code12, 1 }, 105 | { "[:^print:]", -1, code12, 1 }, 106 | { "[:punct:]", +1, code13, 4 }, 107 | { "[:^punct:]", -1, code13, 4 }, 108 | { "[:space:]", +1, code14, 2 }, 109 | { "[:^space:]", -1, code14, 2 }, 110 | { "[:upper:]", +1, code15, 1 }, 111 | { "[:^upper:]", -1, code15, 1 }, 112 | { "[:word:]", +1, code16, 4 }, 113 | { "[:^word:]", -1, code16, 4 }, 114 | { "[:xdigit:]", +1, code17, 3 }, 115 | { "[:^xdigit:]", -1, code17, 3 }, 116 | }; 117 | const int num_posix_groups = 28; 118 | 119 | } // namespace re2 120 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/prefilter.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_PREFILTER_H_ 6 | #define RE2_PREFILTER_H_ 7 | 8 | // Prefilter is the class used to extract string guards from regexps. 9 | // Rather than using Prefilter class directly, use FilteredRE2. 10 | // See filtered_re2.h 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "util/util.h" 17 | #include "util/logging.h" 18 | 19 | namespace re2 { 20 | 21 | class RE2; 22 | 23 | class Regexp; 24 | 25 | class Prefilter { 26 | // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h 27 | public: 28 | enum Op { 29 | ALL = 0, // Everything matches 30 | NONE, // Nothing matches 31 | ATOM, // The string atom() must match 32 | AND, // All in subs() must match 33 | OR, // One of subs() must match 34 | }; 35 | 36 | explicit Prefilter(Op op); 37 | ~Prefilter(); 38 | 39 | Op op() { return op_; } 40 | const std::string& atom() const { return atom_; } 41 | void set_unique_id(int id) { unique_id_ = id; } 42 | int unique_id() const { return unique_id_; } 43 | 44 | // The children of the Prefilter node. 45 | std::vector* subs() { 46 | DCHECK(op_ == AND || op_ == OR); 47 | return subs_; 48 | } 49 | 50 | // Set the children vector. Prefilter takes ownership of subs and 51 | // subs_ will be deleted when Prefilter is deleted. 52 | void set_subs(std::vector* subs) { subs_ = subs; } 53 | 54 | // Given a RE2, return a Prefilter. The caller takes ownership of 55 | // the Prefilter and should deallocate it. Returns NULL if Prefilter 56 | // cannot be formed. 57 | static Prefilter* FromRE2(const RE2* re2); 58 | 59 | // Returns a readable debug string of the prefilter. 60 | std::string DebugString() const; 61 | 62 | private: 63 | class Info; 64 | 65 | // Combines two prefilters together to create an AND. The passed 66 | // Prefilters will be part of the returned Prefilter or deleted. 67 | static Prefilter* And(Prefilter* a, Prefilter* b); 68 | 69 | // Combines two prefilters together to create an OR. The passed 70 | // Prefilters will be part of the returned Prefilter or deleted. 71 | static Prefilter* Or(Prefilter* a, Prefilter* b); 72 | 73 | // Generalized And/Or 74 | static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b); 75 | 76 | static Prefilter* FromRegexp(Regexp* a); 77 | 78 | static Prefilter* FromString(const std::string& str); 79 | 80 | static Prefilter* OrStrings(std::set* ss); 81 | 82 | static Info* BuildInfo(Regexp* re); 83 | 84 | Prefilter* Simplify(); 85 | 86 | // Kind of Prefilter. 87 | Op op_; 88 | 89 | // Sub-matches for AND or OR Prefilter. 90 | std::vector* subs_; 91 | 92 | // Actual string to match in leaf node. 93 | std::string atom_; 94 | 95 | // If different prefilters have the same string atom, or if they are 96 | // structurally the same (e.g., OR of same atom strings) they are 97 | // considered the same unique nodes. This is the id for each unique 98 | // node. This field is populated with a unique id for every node, 99 | // and -1 for duplicate nodes. 100 | int unique_id_; 101 | 102 | Prefilter(const Prefilter&) = delete; 103 | Prefilter& operator=(const Prefilter&) = delete; 104 | }; 105 | 106 | } // namespace re2 107 | 108 | #endif // RE2_PREFILTER_H_ 109 | -------------------------------------------------------------------------------- /deps/re2/re2/util/benchmark.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "util/benchmark.h" 12 | #include "util/flags.h" 13 | #include "re2/re2.h" 14 | 15 | #ifdef _WIN32 16 | #define snprintf _snprintf 17 | #endif 18 | 19 | using ::testing::Benchmark; 20 | 21 | static Benchmark* benchmarks[10000]; 22 | static int nbenchmarks; 23 | 24 | void Benchmark::Register() { 25 | lo_ = std::max(1, lo_); 26 | hi_ = std::max(lo_, hi_); 27 | benchmarks[nbenchmarks++] = this; 28 | } 29 | 30 | static int64_t nsec() { 31 | return std::chrono::duration_cast( 32 | std::chrono::steady_clock::now().time_since_epoch()) 33 | .count(); 34 | } 35 | 36 | static int64_t t0; 37 | static int64_t ns; 38 | static int64_t bytes; 39 | static int64_t items; 40 | 41 | void StartBenchmarkTiming() { 42 | if (t0 == 0) { 43 | t0 = nsec(); 44 | } 45 | } 46 | 47 | void StopBenchmarkTiming() { 48 | if (t0 != 0) { 49 | ns += nsec() - t0; 50 | t0 = 0; 51 | } 52 | } 53 | 54 | void SetBenchmarkBytesProcessed(int64_t b) { bytes = b; } 55 | 56 | void SetBenchmarkItemsProcessed(int64_t i) { items = i; } 57 | 58 | static void RunFunc(Benchmark* b, int iters, int arg) { 59 | t0 = nsec(); 60 | ns = 0; 61 | bytes = 0; 62 | items = 0; 63 | b->func()(iters, arg); 64 | StopBenchmarkTiming(); 65 | } 66 | 67 | static int round(int n) { 68 | int base = 1; 69 | while (base * 10 < n) base *= 10; 70 | if (n < 2 * base) return 2 * base; 71 | if (n < 5 * base) return 5 * base; 72 | return 10 * base; 73 | } 74 | 75 | static void RunBench(Benchmark* b, int arg) { 76 | int iters, last; 77 | 78 | // Run once just in case it's expensive. 79 | iters = 1; 80 | RunFunc(b, iters, arg); 81 | while (ns < (int)1e9 && iters < (int)1e9) { 82 | last = iters; 83 | if (ns / iters == 0) { 84 | iters = (int)1e9; 85 | } else { 86 | iters = (int)1e9 / static_cast(ns / iters); 87 | } 88 | iters = std::max(last + 1, std::min(iters + iters / 2, 100 * last)); 89 | iters = round(iters); 90 | RunFunc(b, iters, arg); 91 | } 92 | 93 | char mb[100]; 94 | char suf[100]; 95 | mb[0] = '\0'; 96 | suf[0] = '\0'; 97 | if (ns > 0 && bytes > 0) 98 | snprintf(mb, sizeof mb, "\t%7.2f MB/s", 99 | ((double)bytes / 1e6) / ((double)ns / 1e9)); 100 | if (b->has_arg()) { 101 | if (arg >= (1 << 20)) { 102 | snprintf(suf, sizeof suf, "/%dM", arg / (1 << 20)); 103 | } else if (arg >= (1 << 10)) { 104 | snprintf(suf, sizeof suf, "/%dK", arg / (1 << 10)); 105 | } else { 106 | snprintf(suf, sizeof suf, "/%d", arg); 107 | } 108 | } 109 | printf("%s%s\t%8d\t%10lld ns/op%s\n", b->name(), suf, iters, 110 | (long long)ns / iters, mb); 111 | fflush(stdout); 112 | } 113 | 114 | static bool WantBench(const char* name, int argc, const char** argv) { 115 | if (argc == 1) return true; 116 | for (int i = 1; i < argc; i++) { 117 | if (RE2::PartialMatch(name, argv[i])) 118 | return true; 119 | } 120 | return false; 121 | } 122 | 123 | int main(int argc, const char** argv) { 124 | for (int i = 0; i < nbenchmarks; i++) { 125 | Benchmark* b = benchmarks[i]; 126 | if (!WantBench(b->name(), argc, argv)) 127 | continue; 128 | for (int arg = b->lo(); arg <= b->hi(); arg <<= 1) 129 | RunBench(b, arg); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | This page describes installation of `pog` and how you can integrate it into your own project. 8 | 9 | Getting source code 10 | =================== 11 | 12 | At first, you'll need to obtain source code. If you want the latest released version, which should always be stable, you can download one from `GitHub releases page `_. 13 | If you want to get the latest development version or you are intersted in the development of the library, you can also get the source code using ``git``. To clone the repository run: 14 | 15 | .. code-block:: bash 16 | 17 | git clone https://github.com/metthal/pog.git 18 | 19 | 20 | Requirements 21 | ============ 22 | 23 | In order to use `pog`, you will need: 24 | 25 | * C++ compiler with C++17 support 26 | * CMake 3.8+ 27 | * `re2 `_ 28 | * `fmt `_ 29 | 30 | You can install them from your distribution repositories. For Ubuntu based distributions use: 31 | 32 | .. code-block:: bash 33 | 34 | apt-get install libfmt-dev libre2-dev 35 | 36 | For Red Hat based distributions use: 37 | 38 | .. code-block:: bash 39 | 40 | dnf install fmt-devel re2-devel 41 | 42 | For macOS use: 43 | 44 | .. code-block:: bash 45 | 46 | brew install fmt re2 47 | 48 | There is also an option to download ``re2`` or ``fmt`` while building the project. See :ref:`compilation` for more information regarding this. 49 | 50 | .. _compilation: 51 | 52 | Compilation 53 | =========== 54 | 55 | `pog` itself is header-only library but it has dependencies which are not header-only. To compile it run: 56 | 57 | .. code-block:: bash 58 | 59 | cmake -DCMAKE_BUILD_TYPE=Release [OPTIONS] .. 60 | cmake --build . --target install 61 | 62 | Other options you can use: 63 | 64 | * ``POG_BUNDLED_RE2`` - Bundled ``re2`` will be used. It will be compiled and installed as ``libpog_re2.a`` (or ``pog_re2.lib`` on Windows) together with the library. (Default: ``OFF``) 65 | * ``POG_BUNDLED_FMT`` - Bundled ``fmt`` will be used. It will be compiled and installed as ``libpog_fmt.a`` (or ``pog_fmt.lib`` on Windows) together with the library. (Default: ``OFF``) 66 | * ``POG_TESTS`` - Build tests located in ``tests/`` folder. (Default: ``OFF``) 67 | * ``POG_EXAMPLES`` - Build examples located in ``examples/`` folder. (Default: ``OFF``) 68 | * ``POG_PIC`` - Build position independent code. (Default: ``OFF``) 69 | 70 | .. attention:: 71 | 72 | Option ``POG_PIC`` does not have any effect if you do not neither of ``POG_BUNDLED_RE2`` or ``POG_BUNDLED_FMT``. Libraries installed in your system are not guaranteed to be position independent. 73 | 74 | Usage 75 | ===== 76 | 77 | ``pog`` will be installed together with CMake configuration files which make integration into other CMake projects much more easier. If you use CMake in your project put following lines in your ``CMakeLists.txt`` file and that should be it. 78 | 79 | .. code-block:: cmake 80 | 81 | find_package(pog REQUIRED) 82 | target_link_libraries( pog::pog) 83 | 84 | For projects which use other build systems, you can use `pkgconfig `_ files which are installed too. To obtain which compilation flags are needed run following commands in your shell or integrate it directly into your build system. 85 | 86 | .. code-block:: bash 87 | 88 | pkg-config --cflags pog 89 | pkg-config --libs pog 90 | 91 | To use `pog` from your source code, include file ````. Everything in `pog` is located inside ``pog`` namespace. Example: 92 | 93 | .. code-block:: cpp 94 | 95 | #include 96 | 97 | int main() 98 | { 99 | pog::Parser parser; 100 | 101 | // your parser definition 102 | } 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pog 2 | 3 | [![Build Status](https://travis-ci.org/metthal/pog.svg?branch=master)](https://travis-ci.org/metthal/pog) 4 | [![Build Status](https://ci.appveyor.com/api/projects/status/84heo43wj4mqoo5y/branch/master?svg=true)](https://ci.appveyor.com/project/metthal/pog/branch/master) 5 | [![Documentation Status](https://readthedocs.org/projects/pog/badge/?version=latest)](https://pog.readthedocs.io/en/latest/?badge=latest) 6 | [![codecov](https://codecov.io/gh/metthal/pog/branch/master/graph/badge.svg)](https://codecov.io/gh/metthal/pog) 7 | 8 | Pog is C++17 library for generating LALR(1) parsers. It splits definitions of parser into: 9 | 10 | 1. Declaration of tokens (regular expressions describing how the input should be tokenized) 11 | 2. Grammar rules over tokens from tokenization phase 12 | 13 | If you are familiar with tools like yacc + lex or bison + flex then this should be already known concept for you. This library is header-only itself but requires RE2 library which does not come with header-only version. The plan is to be completely header-only in the future. 14 | 15 | See [documentation](https://pog.readthedocs.io/en/stable/) for more information about the installation and usage of the library. 16 | 17 | ## Why make another parser generator? 18 | 19 | I had idea for project like this for a few years already, back when I used bison + flex for a school project. The advantage of bison + flex is that it generates LALR(1) parser. These parsers are very good for parsing usual programming languages constructs without any transformations of the grammar (such as removal of left-recursion, making sure that no 2 rules have same prefix). Their approach of splitting the process into tokenization and parsing makes it much easier to write the actual grammar without having it cluttered with things like whitespaces, comments and other things that can be ignored and don't have to be present in the grammar itself. The disadvantage of bison + flex is that you have to have these installed on your system because they are standalone tools which will generate you C/C++ code. Maintaining build system which uses them and works on Linux, Windows and macOS is not an easy task. For a long time, bison was also not able to work with modern C++ features such as move semantics. It should be supported as of now (3.4) but a lot of Linux distributions still don't have this version and some stable distros won't have for a very long time. There are also other options than bison + flex in C++ world such as Boost.Spirit or PEGTL which are all amazing but they all have some drawbacks (LL parsers, cluttered and hard to maintain grammars, inability to specify operator precedence, ...). This library aims to provide what was missing out there -- taking philosophy of bison + flex and putting it into pure C++ while still generating LALR(1) parser. 20 | 21 | The implemented parser generator is based on algorithms from papers _Efficient computation of LALR(1) look-ahead sets_, _Development of an LALR(1) Parser Generator_, _Simple computation of LALR(1) lookahead sets_ and book _Compilers: Principles, Techniques, and Tools (2nd Edition)_. 22 | 23 | ## Roadmap 24 | 25 | Things to do before 1.0.0 26 | 27 | - [x] Tokenizer action on end of input 28 | - [x] Support for states in tokenizer (`BEGIN`, `END` like behavior in flex) 29 | - [x] Generate debugging report (text file parsing table, states, lookahead sets, graphviz LALR automaton, ...) 30 | - [x] Windows & macOS Build 31 | - [x] Tests 32 | - [x] Code Coverage 33 | - [x] CI pipeline (Travis CI, AppVeyor) 34 | - [ ] Packaging (ZIP, RPM, DEB, ... + getting package into repositories) 35 | - [x] Sphinx Docs (+ Read The Docs integration) 36 | 37 | Things to do after 1.0.0 38 | 39 | - [ ] Error Recovery 40 | - [ ] Code Cleanup :) 41 | - [ ] Own implementation for tokenizer to be header-only (DFA) 42 | - [ ] Lightweight iterator ranges 43 | 44 | ## Requirements 45 | 46 | * fmt (5.3.0 or newer) 47 | * re2 (2019-09-01 or newer) 48 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/filtered_re2.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "re2/filtered_re2.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "util/util.h" 11 | #include "util/logging.h" 12 | #include "re2/prefilter.h" 13 | #include "re2/prefilter_tree.h" 14 | 15 | namespace re2 { 16 | 17 | FilteredRE2::FilteredRE2() 18 | : compiled_(false), 19 | prefilter_tree_(new PrefilterTree()) { 20 | } 21 | 22 | FilteredRE2::FilteredRE2(int min_atom_len) 23 | : compiled_(false), 24 | prefilter_tree_(new PrefilterTree(min_atom_len)) { 25 | } 26 | 27 | FilteredRE2::~FilteredRE2() { 28 | for (size_t i = 0; i < re2_vec_.size(); i++) 29 | delete re2_vec_[i]; 30 | delete prefilter_tree_; 31 | } 32 | 33 | RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, 34 | const RE2::Options& options, int* id) { 35 | RE2* re = new RE2(pattern, options); 36 | RE2::ErrorCode code = re->error_code(); 37 | 38 | if (!re->ok()) { 39 | if (options.log_errors()) { 40 | LOG(ERROR) << "Couldn't compile regular expression, skipping: " 41 | << re << " due to error " << re->error(); 42 | } 43 | delete re; 44 | } else { 45 | *id = static_cast(re2_vec_.size()); 46 | re2_vec_.push_back(re); 47 | } 48 | 49 | return code; 50 | } 51 | 52 | void FilteredRE2::Compile(std::vector* atoms) { 53 | if (compiled_) { 54 | LOG(ERROR) << "Compile called already."; 55 | return; 56 | } 57 | 58 | if (re2_vec_.empty()) { 59 | LOG(ERROR) << "Compile called before Add."; 60 | return; 61 | } 62 | 63 | for (size_t i = 0; i < re2_vec_.size(); i++) { 64 | Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); 65 | prefilter_tree_->Add(prefilter); 66 | } 67 | atoms->clear(); 68 | prefilter_tree_->Compile(atoms); 69 | compiled_ = true; 70 | } 71 | 72 | int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { 73 | for (size_t i = 0; i < re2_vec_.size(); i++) 74 | if (RE2::PartialMatch(text, *re2_vec_[i])) 75 | return static_cast(i); 76 | return -1; 77 | } 78 | 79 | int FilteredRE2::FirstMatch(const StringPiece& text, 80 | const std::vector& atoms) const { 81 | if (!compiled_) { 82 | LOG(DFATAL) << "FirstMatch called before Compile."; 83 | return -1; 84 | } 85 | std::vector regexps; 86 | prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); 87 | for (size_t i = 0; i < regexps.size(); i++) 88 | if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) 89 | return regexps[i]; 90 | return -1; 91 | } 92 | 93 | bool FilteredRE2::AllMatches( 94 | const StringPiece& text, 95 | const std::vector& atoms, 96 | std::vector* matching_regexps) const { 97 | matching_regexps->clear(); 98 | std::vector regexps; 99 | prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); 100 | for (size_t i = 0; i < regexps.size(); i++) 101 | if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) 102 | matching_regexps->push_back(regexps[i]); 103 | return !matching_regexps->empty(); 104 | } 105 | 106 | void FilteredRE2::AllPotentials( 107 | const std::vector& atoms, 108 | std::vector* potential_regexps) const { 109 | prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps); 110 | } 111 | 112 | void FilteredRE2::RegexpsGivenStrings(const std::vector& matched_atoms, 113 | std::vector* passed_regexps) { 114 | prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); 115 | } 116 | 117 | void FilteredRE2::PrintPrefilter(int regexpid) { 118 | prefilter_tree_->PrintPrefilter(regexpid); 119 | } 120 | 121 | } // namespace re2 122 | -------------------------------------------------------------------------------- /tests/test_rule.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | class TestRule : public ::testing::Test {}; 6 | 7 | using namespace pog; 8 | 9 | TEST_F(TestRule, 10 | SimpleRule) { 11 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 12 | Symbol s2(2, SymbolKind::Nonterminal, "2"); 13 | Symbol s3(3, SymbolKind::Nonterminal, "3"); 14 | Rule rule(42, &s1, std::vector*>{&s2, &s3}, [](std::vector&&) -> int { return 0; }); 15 | 16 | EXPECT_EQ(rule.get_index(), 42u); 17 | EXPECT_EQ(rule.get_lhs(), &s1); 18 | EXPECT_EQ(rule.get_rhs(), (std::vector*>{&s2, &s3})); 19 | EXPECT_FALSE(rule.has_precedence()); 20 | } 21 | 22 | TEST_F(TestRule, 23 | Precedence) { 24 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 25 | Symbol s2(2, SymbolKind::Nonterminal, "2"); 26 | Symbol s3(3, SymbolKind::Nonterminal, "3"); 27 | Rule rule(42, &s1, std::vector*>{&s2, &s3}, [](std::vector&&) -> int { return 0; }); 28 | rule.set_precedence(1, Associativity::Right); 29 | 30 | EXPECT_EQ(rule.get_index(), 42u); 31 | EXPECT_EQ(rule.get_lhs(), &s1); 32 | EXPECT_EQ(rule.get_rhs(), (std::vector*>{&s2, &s3})); 33 | EXPECT_TRUE(rule.has_precedence()); 34 | EXPECT_EQ(rule.get_precedence(), (Precedence{1, Associativity::Right})); 35 | } 36 | 37 | TEST_F(TestRule, 38 | RightmostTerminalWhileThereIsNone) { 39 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 40 | Symbol s2(2, SymbolKind::Nonterminal, "2"); 41 | Symbol s3(3, SymbolKind::Nonterminal, "3"); 42 | Rule rule(42, &s1, std::vector*>{&s2, &s3}, [](std::vector&&) -> int { return 0; }); 43 | 44 | EXPECT_EQ(rule.get_rightmost_terminal(), nullptr); 45 | } 46 | 47 | TEST_F(TestRule, 48 | RightmostTerminal) { 49 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 50 | Symbol s2(2, SymbolKind::Terminal, "2"); 51 | Symbol s3(3, SymbolKind::Terminal, "3"); 52 | Symbol s4(4, SymbolKind::Nonterminal, "4"); 53 | Rule rule(42, &s1, std::vector*>{&s2, &s3, &s4}, [](std::vector&&) -> int { return 0; }); 54 | 55 | EXPECT_EQ(rule.get_rightmost_terminal(), &s3); 56 | } 57 | 58 | TEST_F(TestRule, 59 | ToString) { 60 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 61 | Symbol s2(2, SymbolKind::Terminal, "2"); 62 | Symbol s3(3, SymbolKind::Terminal, "3"); 63 | Symbol s4(4, SymbolKind::Nonterminal, "4"); 64 | Rule rule(42, &s1, std::vector*>{&s2, &s3, &s4}, [](std::vector&&) -> int { return 0; }); 65 | 66 | EXPECT_EQ(rule.to_string(), "1 -> 2 3 4"); 67 | } 68 | 69 | TEST_F(TestRule, 70 | EpsilonToString) { 71 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 72 | Rule rule(42, &s1, std::vector*>{}, [](std::vector&&) -> int { return 0; }); 73 | 74 | EXPECT_EQ(rule.to_string(), "1 -> "); 75 | } 76 | 77 | TEST_F(TestRule, 78 | PerformAction) { 79 | bool called = false; 80 | 81 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 82 | Rule rule(42, &s1, std::vector*>{}, [&](std::vector&& args) -> int { 83 | called = true; 84 | return static_cast(args.size()); 85 | }); 86 | 87 | EXPECT_EQ(rule.perform_action(std::vector{1, 2, 3, 4}), 4); 88 | EXPECT_TRUE(called); 89 | } 90 | 91 | TEST_F(TestRule, 92 | Equality) { 93 | Symbol s1(1, SymbolKind::Nonterminal, "1"); 94 | Symbol s2(2, SymbolKind::Terminal, "2"); 95 | Symbol s3(3, SymbolKind::Terminal, "3"); 96 | Symbol s4(4, SymbolKind::Nonterminal, "4"); 97 | Rule rule1(42, &s1, std::vector*>{&s2, &s3, &s4}, [](std::vector&&) -> int { return 0; }); 98 | Rule rule2(42, &s1, std::vector*>{}, [](std::vector&&) -> int { return 0; }); 99 | Rule rule3(43, &s1, std::vector*>{&s2, &s3, &s4}, [](std::vector&&) -> int { return 0; }); 100 | 101 | EXPECT_TRUE(rule1 == rule2); 102 | EXPECT_FALSE(rule1 == rule3); 103 | 104 | EXPECT_FALSE(rule1 != rule2); 105 | EXPECT_TRUE(rule1 != rule3); 106 | } 107 | -------------------------------------------------------------------------------- /include/pog/item.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace pog { 10 | 11 | template 12 | class Item 13 | { 14 | public: 15 | using RuleType = Rule; 16 | using SymbolType = Symbol; 17 | 18 | Item(const RuleType* rule, std::size_t read_pos = 0) 19 | : _rule(rule), _read_pos(read_pos) {} 20 | Item(const Item&) = default; 21 | Item(Item&&) noexcept = default; 22 | 23 | const RuleType* get_rule() const { return _rule; } 24 | std::size_t get_read_pos() const { return _read_pos; } 25 | 26 | const SymbolType* get_previous_symbol() const 27 | { 28 | return _read_pos == 0 ? nullptr : _rule->get_rhs()[_read_pos - 1]; 29 | } 30 | 31 | const SymbolType* get_read_symbol() const 32 | { 33 | return is_final() ? nullptr : _rule->get_rhs()[_read_pos]; 34 | } 35 | 36 | std::vector get_left_side_without_read_symbol() 37 | { 38 | if (_read_pos == 0) 39 | return {}; 40 | 41 | // TODO: return just iterator range 42 | std::vector result(_read_pos); 43 | std::copy(_rule->get_rhs().begin(), _rule->get_rhs().begin() + _read_pos, result.begin()); 44 | return result; 45 | } 46 | 47 | std::vector get_right_side_without_read_symbol() 48 | { 49 | if (is_final()) 50 | { 51 | assert(false && "Shouldn't call get_right_side_without_read_symbol() on final item"); 52 | return {}; 53 | } 54 | 55 | auto rest_size = _rule->get_rhs().size() - _read_pos - 1; 56 | if (rest_size == 0) 57 | return {}; 58 | 59 | // TODO: possibly just return iterator range? 60 | std::vector result(rest_size); 61 | std::copy(_rule->get_rhs().begin() + _read_pos + 1, _rule->get_rhs().end(), result.begin()); 62 | return result; 63 | } 64 | 65 | void step() 66 | { 67 | if (!is_final()) 68 | _read_pos++; 69 | } 70 | 71 | void step_back() 72 | { 73 | if (_read_pos > 0) 74 | _read_pos--; 75 | } 76 | 77 | bool is_kernel() const 78 | { 79 | return _read_pos > 0 || _rule->is_start_rule(); 80 | } 81 | 82 | bool is_final() const 83 | { 84 | return _read_pos == _rule->get_rhs().size(); 85 | } 86 | 87 | bool is_accepting() const 88 | { 89 | return !is_final() && get_read_symbol()->is_end(); 90 | } 91 | 92 | std::string to_string(std::string_view arrow = "->", std::string_view eps = "", std::string_view sep = "<*>") const 93 | { 94 | const auto& rhs = _rule->get_rhs(); 95 | std::vector left_of_read_pos(_read_pos); 96 | std::vector right_of_read_pos(rhs.size() - _read_pos); 97 | std::transform(rhs.begin(), rhs.begin() + _read_pos, left_of_read_pos.begin(), [](const auto* sym) { 98 | return sym->get_name(); 99 | }); 100 | std::transform(rhs.begin() + _read_pos, rhs.end(), right_of_read_pos.begin(), [](const auto* sym) { 101 | return sym->get_name(); 102 | }); 103 | 104 | std::vector parts; 105 | if (!left_of_read_pos.empty()) 106 | parts.push_back(fmt::format("{}", fmt::join(left_of_read_pos.begin(), left_of_read_pos.end(), " "))); 107 | parts.push_back(std::string{sep}); 108 | if (!right_of_read_pos.empty()) 109 | parts.push_back(fmt::format("{}", fmt::join(right_of_read_pos.begin(), right_of_read_pos.end(), " "))); 110 | 111 | if (parts.size() == 1) 112 | parts.push_back(std::string{eps}); 113 | 114 | return fmt::format("{} {} {}", _rule->get_lhs()->get_name(), arrow, fmt::join(parts.begin(), parts.end(), " ")); 115 | } 116 | 117 | bool operator==(const Item& rhs) const 118 | { 119 | return get_rule()->get_index() == rhs.get_rule()->get_index() && get_read_pos() == rhs.get_read_pos(); 120 | } 121 | 122 | bool operator!=(const Item& rhs) const 123 | { 124 | return !(*this == rhs); 125 | } 126 | 127 | bool operator<(const Item& rhs) const 128 | { 129 | return std::tuple{is_kernel() ? 0 : 1, _rule->get_index(), _read_pos} < std::tuple{rhs.is_kernel() ? 0 : 1, rhs._rule->get_index(), rhs._read_pos}; 130 | } 131 | 132 | private: 133 | const RuleType* _rule; 134 | std::size_t _read_pos; 135 | }; 136 | 137 | } // namespace pog 138 | -------------------------------------------------------------------------------- /deps/re2/re2/util/benchmark.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_BENCHMARK_H_ 6 | #define UTIL_BENCHMARK_H_ 7 | 8 | #include 9 | #include 10 | 11 | #include "util/logging.h" 12 | #include "util/util.h" 13 | 14 | // Globals for the old benchmark API. 15 | void StartBenchmarkTiming(); 16 | void StopBenchmarkTiming(); 17 | void SetBenchmarkBytesProcessed(int64_t b); 18 | void SetBenchmarkItemsProcessed(int64_t i); 19 | 20 | namespace benchmark { 21 | 22 | // The new benchmark API implemented as a layer over the old benchmark API. 23 | // (Please refer to https://github.com/google/benchmark for documentation.) 24 | class State { 25 | private: 26 | class Iterator { 27 | public: 28 | // Benchmark code looks like this: 29 | // 30 | // for (auto _ : state) { 31 | // // ... 32 | // } 33 | // 34 | // We try to avoid compiler warnings about such variables being unused. 35 | struct ATTRIBUTE_UNUSED Value {}; 36 | 37 | explicit Iterator(int64_t iters) : iters_(iters) {} 38 | 39 | bool operator!=(const Iterator& that) const { 40 | if (iters_ != that.iters_) { 41 | return true; 42 | } else { 43 | // We are about to stop the loop, so stop timing. 44 | StopBenchmarkTiming(); 45 | return false; 46 | } 47 | } 48 | 49 | Value operator*() const { 50 | return Value(); 51 | } 52 | 53 | Iterator& operator++() { 54 | --iters_; 55 | return *this; 56 | } 57 | 58 | private: 59 | int64_t iters_; 60 | }; 61 | 62 | public: 63 | explicit State(int64_t iters) 64 | : iters_(iters), arg_(0), has_arg_(false) {} 65 | 66 | State(int64_t iters, int64_t arg) 67 | : iters_(iters), arg_(arg), has_arg_(true) {} 68 | 69 | Iterator begin() { 70 | // We are about to start the loop, so start timing. 71 | StartBenchmarkTiming(); 72 | return Iterator(iters_); 73 | } 74 | 75 | Iterator end() { 76 | return Iterator(0); 77 | } 78 | 79 | void SetBytesProcessed(int64_t b) { SetBenchmarkBytesProcessed(b); } 80 | void SetItemsProcessed(int64_t i) { SetBenchmarkItemsProcessed(i); } 81 | int64_t iterations() const { return iters_; } 82 | // Pretend to support multiple arguments. 83 | int64_t range(int pos) const { CHECK(has_arg_); return arg_; } 84 | 85 | private: 86 | int64_t iters_; 87 | int64_t arg_; 88 | bool has_arg_; 89 | 90 | State(const State&) = delete; 91 | State& operator=(const State&) = delete; 92 | }; 93 | 94 | } // namespace benchmark 95 | 96 | namespace testing { 97 | 98 | class Benchmark { 99 | public: 100 | Benchmark(const char* name, void (*func)(benchmark::State&)) 101 | : name_(name), 102 | func_([func](int iters, int arg) { 103 | benchmark::State state(iters); 104 | func(state); 105 | }), 106 | lo_(0), 107 | hi_(0), 108 | has_arg_(false) { 109 | Register(); 110 | } 111 | 112 | Benchmark(const char* name, void (*func)(benchmark::State&), int lo, int hi) 113 | : name_(name), 114 | func_([func](int iters, int arg) { 115 | benchmark::State state(iters, arg); 116 | func(state); 117 | }), 118 | lo_(lo), 119 | hi_(hi), 120 | has_arg_(true) { 121 | Register(); 122 | } 123 | 124 | // Pretend to support multiple threads. 125 | Benchmark* ThreadRange(int lo, int hi) { return this; } 126 | 127 | const char* name() const { return name_; } 128 | const std::function& func() const { return func_; } 129 | int lo() const { return lo_; } 130 | int hi() const { return hi_; } 131 | bool has_arg() const { return has_arg_; } 132 | 133 | private: 134 | void Register(); 135 | 136 | const char* name_; 137 | std::function func_; 138 | int lo_; 139 | int hi_; 140 | bool has_arg_; 141 | 142 | Benchmark(const Benchmark&) = delete; 143 | Benchmark& operator=(const Benchmark&) = delete; 144 | }; 145 | 146 | } // namespace testing 147 | 148 | #define BENCHMARK(f) \ 149 | ::testing::Benchmark* _benchmark_##f = \ 150 | (new ::testing::Benchmark(#f, f)) 151 | 152 | #define BENCHMARK_RANGE(f, lo, hi) \ 153 | ::testing::Benchmark* _benchmark_##f = \ 154 | (new ::testing::Benchmark(#f, f, lo, hi)) 155 | 156 | #endif // UTIL_BENCHMARK_H_ 157 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/filtered_re2.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_FILTERED_RE2_H_ 6 | #define RE2_FILTERED_RE2_H_ 7 | 8 | // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. 9 | // It provides a prefilter mechanism that helps in cutting down the 10 | // number of regexps that need to be actually searched. 11 | // 12 | // By design, it does not include a string matching engine. This is to 13 | // allow the user of the class to use their favorite string match 14 | // engine. The overall flow is: Add all the regexps using Add, then 15 | // Compile the FilteredRE2. The compile returns strings that need to 16 | // be matched. Note that all returned strings are lowercase. For 17 | // applying regexps to a search text, the caller does the string 18 | // matching using the strings returned. When doing the string match, 19 | // note that the caller has to do that on lower cased version of the 20 | // search text. Then call FirstMatch or AllMatches with a vector of 21 | // indices of strings that were found in the text to get the actual 22 | // regexp matches. 23 | 24 | #include 25 | #include 26 | 27 | #include "re2/re2.h" 28 | 29 | namespace re2 { 30 | 31 | class PrefilterTree; 32 | 33 | class FilteredRE2 { 34 | public: 35 | FilteredRE2(); 36 | explicit FilteredRE2(int min_atom_len); 37 | ~FilteredRE2(); 38 | 39 | // Uses RE2 constructor to create a RE2 object (re). Returns 40 | // re->error_code(). If error_code is other than NoError, then re is 41 | // deleted and not added to re2_vec_. 42 | RE2::ErrorCode Add(const StringPiece& pattern, 43 | const RE2::Options& options, 44 | int *id); 45 | 46 | // Prepares the regexps added by Add for filtering. Returns a set 47 | // of strings that the caller should check for in candidate texts. 48 | // The returned strings are lowercased. When doing string matching, 49 | // the search text should be lowercased first to find matching 50 | // strings from the set of strings returned by Compile. Call after 51 | // all Add calls are done. 52 | void Compile(std::vector* strings_to_match); 53 | 54 | // Returns the index of the first matching regexp. 55 | // Returns -1 on no match. Can be called prior to Compile. 56 | // Does not do any filtering: simply tries to Match the 57 | // regexps in a loop. 58 | int SlowFirstMatch(const StringPiece& text) const; 59 | 60 | // Returns the index of the first matching regexp. 61 | // Returns -1 on no match. Compile has to be called before 62 | // calling this. 63 | int FirstMatch(const StringPiece& text, 64 | const std::vector& atoms) const; 65 | 66 | // Returns the indices of all matching regexps, after first clearing 67 | // matched_regexps. 68 | bool AllMatches(const StringPiece& text, 69 | const std::vector& atoms, 70 | std::vector* matching_regexps) const; 71 | 72 | // Returns the indices of all potentially matching regexps after first 73 | // clearing potential_regexps. 74 | // A regexp is potentially matching if it passes the filter. 75 | // If a regexp passes the filter it may still not match. 76 | // A regexp that does not pass the filter is guaranteed to not match. 77 | void AllPotentials(const std::vector& atoms, 78 | std::vector* potential_regexps) const; 79 | 80 | // The number of regexps added. 81 | int NumRegexps() const { return static_cast(re2_vec_.size()); } 82 | 83 | // Get the individual RE2 objects. 84 | const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; } 85 | 86 | private: 87 | // Print prefilter. 88 | void PrintPrefilter(int regexpid); 89 | 90 | // Useful for testing and debugging. 91 | void RegexpsGivenStrings(const std::vector& matched_atoms, 92 | std::vector* passed_regexps); 93 | 94 | // All the regexps in the FilteredRE2. 95 | std::vector re2_vec_; 96 | 97 | // Has the FilteredRE2 been compiled using Compile() 98 | bool compiled_; 99 | 100 | // An AND-OR tree of string atoms used for filtering regexps. 101 | PrefilterTree* prefilter_tree_; 102 | 103 | FilteredRE2(const FilteredRE2&) = delete; 104 | FilteredRE2& operator=(const FilteredRE2&) = delete; 105 | }; 106 | 107 | } // namespace re2 108 | 109 | #endif // RE2_FILTERED_RE2_H_ 110 | -------------------------------------------------------------------------------- /tests/test_token_builder.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | using namespace pog; 6 | 7 | class TestTokenBuilder : public ::testing::Test 8 | { 9 | public: 10 | TestTokenBuilder() : grammar(), tokenizer(&grammar) {} 11 | 12 | Grammar grammar; 13 | Tokenizer tokenizer; 14 | }; 15 | 16 | TEST_F(TestTokenBuilder, 17 | Initialization) { 18 | TokenBuilder tb(&grammar, &tokenizer); 19 | 20 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 21 | EXPECT_EQ(tokenizer.get_tokens().size(), 1u); 22 | } 23 | 24 | TEST_F(TestTokenBuilder, 25 | NoTokens) { 26 | TokenBuilder tb(&grammar, &tokenizer); 27 | tb.done(); 28 | 29 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 30 | EXPECT_EQ(tokenizer.get_tokens().size(), 1u); 31 | } 32 | 33 | TEST_F(TestTokenBuilder, 34 | SingleTokenWithoutAnything) { 35 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 36 | tb.done(); 37 | 38 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 39 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 40 | 41 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 42 | } 43 | 44 | TEST_F(TestTokenBuilder, 45 | SingleTokenWithSymbol) { 46 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 47 | tb.symbol("ABC"); 48 | tb.done(); 49 | 50 | EXPECT_EQ(grammar.get_symbols().size(), 3u); 51 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 52 | 53 | EXPECT_EQ(grammar.get_symbols()[2]->get_name(), "ABC"); 54 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 55 | } 56 | 57 | TEST_F(TestTokenBuilder, 58 | SingleTokenWithAction) { 59 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 60 | tb.action([](std::string_view) { return 42; }); 61 | tb.done(); 62 | 63 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 64 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 65 | 66 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 67 | EXPECT_TRUE(tokenizer.get_tokens()[1]->has_action()); 68 | EXPECT_EQ(tokenizer.get_tokens()[1]->perform_action("xyz"), 42); 69 | } 70 | 71 | TEST_F(TestTokenBuilder, 72 | SingleTokenWithFullwordSpecifier) { 73 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 74 | tb.fullword(); 75 | tb.done(); 76 | 77 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 78 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 79 | 80 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc(\\b|$)"); 81 | } 82 | 83 | TEST_F(TestTokenBuilder, 84 | SingleTokenWithStates) { 85 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 86 | tb.states("state1", "state2"); 87 | tb.done(); 88 | 89 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 90 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 91 | 92 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 93 | } 94 | 95 | TEST_F(TestTokenBuilder, 96 | SingleTokenWithTransitionToState) { 97 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 98 | tb.enter_state("state1"); 99 | tb.done(); 100 | 101 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 102 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 103 | 104 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 105 | EXPECT_TRUE(tokenizer.get_tokens()[1]->has_transition_to_state()); 106 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_transition_to_state(), "state1"); 107 | } 108 | 109 | TEST_F(TestTokenBuilder, 110 | SingleTokenWithPrecedence) { 111 | TokenBuilder tb(&grammar, &tokenizer, "abc"); 112 | tb.symbol("ABC"); 113 | tb.precedence(1, Associativity::Left); 114 | tb.done(); 115 | 116 | EXPECT_EQ(grammar.get_symbols().size(), 3u); 117 | EXPECT_EQ(tokenizer.get_tokens().size(), 2u); 118 | 119 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 120 | EXPECT_TRUE(grammar.get_symbols()[2]->has_precedence()); 121 | EXPECT_EQ(grammar.get_symbols()[2]->get_precedence(), (Precedence{1, Associativity::Left})); 122 | } 123 | 124 | TEST_F(TestTokenBuilder, 125 | MultipleTokensDescription) { 126 | TokenBuilder tb1(&grammar, &tokenizer, "abc"); 127 | tb1.symbol("ABC"); 128 | tb1.description("abc token"); 129 | tb1.done(); 130 | 131 | TokenBuilder tb2(&grammar, &tokenizer, "def"); 132 | tb2.symbol("DEF"); 133 | tb2.done(); 134 | 135 | 136 | EXPECT_EQ(grammar.get_symbols().size(), 4u); 137 | EXPECT_EQ(tokenizer.get_tokens().size(), 3u); 138 | 139 | EXPECT_EQ(grammar.get_symbols()[2]->get_name(), "ABC"); 140 | EXPECT_EQ(grammar.get_symbols()[2]->get_description(), "abc token"); 141 | EXPECT_EQ(grammar.get_symbols()[3]->get_name(), "DEF"); 142 | EXPECT_EQ(grammar.get_symbols()[3]->get_description(), "DEF"); 143 | EXPECT_EQ(tokenizer.get_tokens()[1]->get_pattern(), "abc"); 144 | EXPECT_EQ(tokenizer.get_tokens()[2]->get_pattern(), "def"); 145 | } 146 | -------------------------------------------------------------------------------- /deps/re2/re2/util/mutex.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef UTIL_MUTEX_H_ 6 | #define UTIL_MUTEX_H_ 7 | 8 | /* 9 | * A simple mutex wrapper, supporting locks and read-write locks. 10 | * You should assume the locks are *not* re-entrant. 11 | */ 12 | 13 | #if !defined(_WIN32) 14 | #ifndef _POSIX_C_SOURCE 15 | #define _POSIX_C_SOURCE 200809L 16 | #endif 17 | #include 18 | #if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0 19 | #define MUTEX_IS_PTHREAD_RWLOCK 20 | #endif 21 | #endif 22 | 23 | #if defined(MUTEX_IS_PTHREAD_RWLOCK) 24 | #include 25 | #include 26 | typedef pthread_rwlock_t MutexType; 27 | #else 28 | #include 29 | typedef std::mutex MutexType; 30 | #endif 31 | 32 | namespace re2 { 33 | 34 | class Mutex { 35 | public: 36 | inline Mutex(); 37 | inline ~Mutex(); 38 | inline void Lock(); // Block if needed until free then acquire exclusively 39 | inline void Unlock(); // Release a lock acquired via Lock() 40 | // Note that on systems that don't support read-write locks, these may 41 | // be implemented as synonyms to Lock() and Unlock(). So you can use 42 | // these for efficiency, but don't use them anyplace where being able 43 | // to do shared reads is necessary to avoid deadlock. 44 | inline void ReaderLock(); // Block until free or shared then acquire a share 45 | inline void ReaderUnlock(); // Release a read share of this Mutex 46 | inline void WriterLock() { Lock(); } // Acquire an exclusive lock 47 | inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() 48 | 49 | private: 50 | MutexType mutex_; 51 | 52 | // Catch the error of writing Mutex when intending MutexLock. 53 | Mutex(Mutex *ignored); 54 | 55 | Mutex(const Mutex&) = delete; 56 | Mutex& operator=(const Mutex&) = delete; 57 | }; 58 | 59 | #if defined(MUTEX_IS_PTHREAD_RWLOCK) 60 | 61 | #define SAFE_PTHREAD(fncall) \ 62 | do { \ 63 | if ((fncall) != 0) abort(); \ 64 | } while (0) 65 | 66 | Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } 67 | Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } 68 | void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); } 69 | void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } 70 | void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); } 71 | void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } 72 | 73 | #undef SAFE_PTHREAD 74 | 75 | #else 76 | 77 | Mutex::Mutex() { } 78 | Mutex::~Mutex() { } 79 | void Mutex::Lock() { mutex_.lock(); } 80 | void Mutex::Unlock() { mutex_.unlock(); } 81 | void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex. 82 | void Mutex::ReaderUnlock() { Unlock(); } 83 | 84 | #endif 85 | 86 | // -------------------------------------------------------------------------- 87 | // Some helper classes 88 | 89 | // MutexLock(mu) acquires mu when constructed and releases it when destroyed. 90 | class MutexLock { 91 | public: 92 | explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } 93 | ~MutexLock() { mu_->Unlock(); } 94 | private: 95 | Mutex * const mu_; 96 | 97 | MutexLock(const MutexLock&) = delete; 98 | MutexLock& operator=(const MutexLock&) = delete; 99 | }; 100 | 101 | // ReaderMutexLock and WriterMutexLock do the same, for rwlocks 102 | class ReaderMutexLock { 103 | public: 104 | explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } 105 | ~ReaderMutexLock() { mu_->ReaderUnlock(); } 106 | private: 107 | Mutex * const mu_; 108 | 109 | ReaderMutexLock(const ReaderMutexLock&) = delete; 110 | ReaderMutexLock& operator=(const ReaderMutexLock&) = delete; 111 | }; 112 | 113 | class WriterMutexLock { 114 | public: 115 | explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } 116 | ~WriterMutexLock() { mu_->WriterUnlock(); } 117 | private: 118 | Mutex * const mu_; 119 | 120 | WriterMutexLock(const WriterMutexLock&) = delete; 121 | WriterMutexLock& operator=(const WriterMutexLock&) = delete; 122 | }; 123 | 124 | // Catch bug where variable name is omitted, e.g. MutexLock (&mu); 125 | #define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name") 126 | #define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name") 127 | #define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name") 128 | 129 | } // namespace re2 130 | 131 | #endif // UTIL_MUTEX_H_ 132 | -------------------------------------------------------------------------------- /include/pog/rule_builder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace pog { 7 | 8 | template 9 | class RuleBuilder 10 | { 11 | public: 12 | using GrammarType = Grammar; 13 | using RuleType = Rule; 14 | using SymbolType = Symbol; 15 | 16 | struct SymbolsAndAction 17 | { 18 | std::vector symbols; 19 | typename RuleType::CallbackType action; 20 | }; 21 | 22 | struct RightHandSide 23 | { 24 | std::vector symbols_and_action; 25 | std::optional precedence; 26 | }; 27 | 28 | RuleBuilder(GrammarType* grammar, const std::string& lhs) : _grammar(grammar), _lhs(lhs), _rhss() {} 29 | 30 | void done() 31 | { 32 | if (_rhss.empty()) 33 | return; 34 | 35 | const auto* lhs_symbol = _grammar->add_symbol(SymbolKind::Nonterminal, _lhs); 36 | 37 | std::size_t rhs_counter = 0; 38 | for (auto&& rhs : _rhss) 39 | { 40 | assert(!rhs.symbols_and_action.empty() && "No symbols and action associated to right-hand side of the rule. This shouldn't happen"); 41 | 42 | std::vector rhs_symbols; 43 | for (std::size_t i = 0; i < rhs.symbols_and_action.size(); ++i) 44 | { 45 | auto&& symbols_and_action = rhs.symbols_and_action[i]; 46 | 47 | std::transform(symbols_and_action.symbols.begin(), symbols_and_action.symbols.end(), std::back_inserter(rhs_symbols), [this](const auto& sym_name) { 48 | return _grammar->add_symbol(SymbolKind::Nonterminal, sym_name); 49 | }); 50 | 51 | // There are multple actions (mid-rule actions) so we need to create new symbol and epsilon rule 52 | // for each midrule action. Midrule symbols will be inserted into the original rule. 53 | // 54 | // If you have rule A -> B C D and you want to perform action after B, then we'll create rules 55 | // A -> B X C D 56 | // X -> 57 | // where X -> will have assigned the midrule action. 58 | if (i < rhs.symbols_and_action.size() - 1) 59 | { 60 | // Create unique nonterminal for midrule action 61 | auto midsymbol = _grammar->add_symbol( 62 | SymbolKind::Nonterminal, 63 | fmt::format("_{}#{}.{}", _lhs, rhs_counter, i) 64 | ); 65 | 66 | // Create rule to which midrule action can be assigned and set midrule size. 67 | // Midrule size is number of symbols preceding the midrule symbol. It represents how many 68 | // items from stack we need to borrow for action arguments. 69 | auto rule = _grammar->add_rule(midsymbol, std::vector{}, std::move(symbols_and_action.action)); 70 | rule->set_midrule(rhs_symbols.size()); 71 | rhs_symbols.push_back(midsymbol); 72 | } 73 | // This is the last action so do not mark it as midrule 74 | else 75 | { 76 | auto rule = _grammar->add_rule(lhs_symbol, rhs_symbols, std::move(symbols_and_action.action)); 77 | if (rule && rhs.precedence) 78 | { 79 | const auto& prec = rhs.precedence.value(); 80 | rule->set_precedence(prec.level, prec.assoc); 81 | } 82 | } 83 | } 84 | 85 | rhs_counter++; 86 | } 87 | } 88 | 89 | template 90 | RuleBuilder& production(Args&&... args) 91 | { 92 | _rhss.push_back(RightHandSide{ 93 | std::vector{ 94 | SymbolsAndAction{ 95 | std::vector{}, 96 | {} 97 | } 98 | }, 99 | std::nullopt 100 | }); 101 | _production(_rhss.back().symbols_and_action, std::forward(args)...); 102 | return *this; 103 | } 104 | 105 | RuleBuilder& precedence(std::uint32_t level, Associativity assoc) 106 | { 107 | _rhss.back().precedence = Precedence{level, assoc}; 108 | return *this; 109 | } 110 | 111 | private: 112 | void _production(std::vector&) {} 113 | 114 | template 115 | void _production(std::vector& sa, const std::string& symbol, Args&&... args) 116 | { 117 | sa.back().symbols.push_back(symbol); 118 | _production(sa, std::forward(args)...); 119 | } 120 | 121 | template 122 | void _production(std::vector& sa, typename RuleType::CallbackType&& action, Args&&... args) 123 | { 124 | sa.back().action = std::move(action); 125 | // We have ran into action so create new record in symbols and actions vector 126 | // but only if it isn't the very last thing in the production 127 | if constexpr (sizeof...(args) > 0) 128 | sa.push_back(SymbolsAndAction{ 129 | std::vector{}, 130 | {} 131 | }); 132 | _production(sa, std::forward(args)...); 133 | } 134 | 135 | GrammarType* _grammar; 136 | std::string _lhs; 137 | std::vector _rhss; 138 | }; 139 | 140 | } // namespace pog 141 | -------------------------------------------------------------------------------- /deps/fmt/fmt/include/fmt/ostream.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - std::ostream support 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_OSTREAM_H_ 9 | #define FMT_OSTREAM_H_ 10 | 11 | #include 12 | #include "format.h" 13 | 14 | FMT_BEGIN_NAMESPACE 15 | namespace internal { 16 | 17 | template class formatbuf : public std::basic_streambuf { 18 | private: 19 | using int_type = typename std::basic_streambuf::int_type; 20 | using traits_type = typename std::basic_streambuf::traits_type; 21 | 22 | buffer& buffer_; 23 | 24 | public: 25 | formatbuf(buffer& buf) : buffer_(buf) {} 26 | 27 | protected: 28 | // The put-area is actually always empty. This makes the implementation 29 | // simpler and has the advantage that the streambuf and the buffer are always 30 | // in sync and sputc never writes into uninitialized memory. The obvious 31 | // disadvantage is that each call to sputc always results in a (virtual) call 32 | // to overflow. There is no disadvantage here for sputn since this always 33 | // results in a call to xsputn. 34 | 35 | int_type overflow(int_type ch = traits_type::eof()) FMT_OVERRIDE { 36 | if (!traits_type::eq_int_type(ch, traits_type::eof())) 37 | buffer_.push_back(static_cast(ch)); 38 | return ch; 39 | } 40 | 41 | std::streamsize xsputn(const Char* s, std::streamsize count) FMT_OVERRIDE { 42 | buffer_.append(s, s + count); 43 | return count; 44 | } 45 | }; 46 | 47 | template struct test_stream : std::basic_ostream { 48 | private: 49 | struct null; 50 | // Hide all operator<< from std::basic_ostream. 51 | void operator<<(null); 52 | }; 53 | 54 | // Checks if T has a user-defined operator<< (e.g. not a member of 55 | // std::ostream). 56 | template class is_streamable { 57 | private: 58 | template 59 | static decltype((void)(std::declval&>() 60 | << std::declval()), 61 | std::true_type()) 62 | test(int); 63 | 64 | template static std::false_type test(...); 65 | 66 | using result = decltype(test(0)); 67 | 68 | public: 69 | static const bool value = result::value; 70 | }; 71 | 72 | // Write the content of buf to os. 73 | template 74 | void write(std::basic_ostream& os, buffer& buf) { 75 | const Char* buf_data = buf.data(); 76 | using unsigned_streamsize = std::make_unsigned::type; 77 | unsigned_streamsize size = buf.size(); 78 | unsigned_streamsize max_size = 79 | to_unsigned((std::numeric_limits::max)()); 80 | do { 81 | unsigned_streamsize n = size <= max_size ? size : max_size; 82 | os.write(buf_data, static_cast(n)); 83 | buf_data += n; 84 | size -= n; 85 | } while (size != 0); 86 | } 87 | 88 | template 89 | void format_value(buffer& buf, const T& value) { 90 | formatbuf format_buf(buf); 91 | std::basic_ostream output(&format_buf); 92 | output.exceptions(std::ios_base::failbit | std::ios_base::badbit); 93 | output << value; 94 | buf.resize(buf.size()); 95 | } 96 | 97 | // Formats an object of type T that has an overloaded ostream operator<<. 98 | template 99 | struct fallback_formatter::value>> 100 | : formatter, Char> { 101 | template 102 | auto format(const T& value, Context& ctx) -> decltype(ctx.out()) { 103 | basic_memory_buffer buffer; 104 | format_value(buffer, value); 105 | basic_string_view str(buffer.data(), buffer.size()); 106 | return formatter, Char>::format(str, ctx); 107 | } 108 | }; 109 | } // namespace internal 110 | 111 | template 112 | void vprint(std::basic_ostream& os, basic_string_view format_str, 113 | basic_format_args> args) { 114 | basic_memory_buffer buffer; 115 | internal::vformat_to(buffer, format_str, args); 116 | internal::write(os, buffer); 117 | } 118 | 119 | /** 120 | \rst 121 | Prints formatted data to the stream *os*. 122 | 123 | **Example**:: 124 | 125 | fmt::print(cerr, "Don't {}!", "panic"); 126 | \endrst 127 | */ 128 | template ::value, char_t>> 130 | void print(std::basic_ostream& os, const S& format_str, Args&&... args) { 131 | vprint(os, to_string_view(format_str), 132 | {internal::make_args_checked(format_str, args...)}); 133 | } 134 | FMT_END_NAMESPACE 135 | 136 | #endif // FMT_OSTREAM_H_ 137 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/set.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "re2/set.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "util/util.h" 12 | #include "util/logging.h" 13 | #include "re2/pod_array.h" 14 | #include "re2/prog.h" 15 | #include "re2/re2.h" 16 | #include "re2/regexp.h" 17 | #include "re2/stringpiece.h" 18 | 19 | namespace re2 { 20 | 21 | RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { 22 | options_.Copy(options); 23 | options_.set_never_capture(true); // might unblock some optimisations 24 | anchor_ = anchor; 25 | prog_ = NULL; 26 | compiled_ = false; 27 | size_ = 0; 28 | } 29 | 30 | RE2::Set::~Set() { 31 | for (size_t i = 0; i < elem_.size(); i++) 32 | elem_[i].second->Decref(); 33 | delete prog_; 34 | } 35 | 36 | int RE2::Set::Add(const StringPiece& pattern, std::string* error) { 37 | if (compiled_) { 38 | LOG(DFATAL) << "RE2::Set::Add() called after compiling"; 39 | return -1; 40 | } 41 | 42 | Regexp::ParseFlags pf = static_cast( 43 | options_.ParseFlags()); 44 | RegexpStatus status; 45 | re2::Regexp* re = Regexp::Parse(pattern, pf, &status); 46 | if (re == NULL) { 47 | if (error != NULL) 48 | *error = status.Text(); 49 | if (options_.log_errors()) 50 | LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); 51 | return -1; 52 | } 53 | 54 | // Concatenate with match index and push on vector. 55 | int n = static_cast(elem_.size()); 56 | re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); 57 | if (re->op() == kRegexpConcat) { 58 | int nsub = re->nsub(); 59 | PODArray sub(nsub + 1); 60 | for (int i = 0; i < nsub; i++) 61 | sub[i] = re->sub()[i]->Incref(); 62 | sub[nsub] = m; 63 | re->Decref(); 64 | re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); 65 | } else { 66 | re2::Regexp* sub[2]; 67 | sub[0] = re; 68 | sub[1] = m; 69 | re = re2::Regexp::Concat(sub, 2, pf); 70 | } 71 | elem_.emplace_back(std::string(pattern), re); 72 | return n; 73 | } 74 | 75 | bool RE2::Set::Compile() { 76 | if (compiled_) { 77 | LOG(DFATAL) << "RE2::Set::Compile() called more than once"; 78 | return false; 79 | } 80 | compiled_ = true; 81 | size_ = static_cast(elem_.size()); 82 | 83 | // Sort the elements by their patterns. This is good enough for now 84 | // until we have a Regexp comparison function. (Maybe someday...) 85 | std::sort(elem_.begin(), elem_.end(), 86 | [](const Elem& a, const Elem& b) -> bool { 87 | return a.first < b.first; 88 | }); 89 | 90 | PODArray sub(size_); 91 | for (int i = 0; i < size_; i++) 92 | sub[i] = elem_[i].second; 93 | elem_.clear(); 94 | elem_.shrink_to_fit(); 95 | 96 | Regexp::ParseFlags pf = static_cast( 97 | options_.ParseFlags()); 98 | re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); 99 | 100 | prog_ = Prog::CompileSet(re, anchor_, options_.max_mem()); 101 | re->Decref(); 102 | return prog_ != NULL; 103 | } 104 | 105 | bool RE2::Set::Match(const StringPiece& text, std::vector* v) const { 106 | return Match(text, v, NULL); 107 | } 108 | 109 | bool RE2::Set::Match(const StringPiece& text, std::vector* v, 110 | ErrorInfo* error_info) const { 111 | if (!compiled_) { 112 | LOG(DFATAL) << "RE2::Set::Match() called before compiling"; 113 | if (error_info != NULL) 114 | error_info->kind = kNotCompiled; 115 | return false; 116 | } 117 | bool dfa_failed = false; 118 | std::unique_ptr matches; 119 | if (v != NULL) { 120 | matches.reset(new SparseSet(size_)); 121 | v->clear(); 122 | } 123 | bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch, 124 | NULL, &dfa_failed, matches.get()); 125 | if (dfa_failed) { 126 | if (options_.log_errors()) 127 | LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " 128 | << "bytemap range " << prog_->bytemap_range() << ", " 129 | << "list count " << prog_->list_count(); 130 | if (error_info != NULL) 131 | error_info->kind = kOutOfMemory; 132 | return false; 133 | } 134 | if (ret == false) { 135 | if (error_info != NULL) 136 | error_info->kind = kNoError; 137 | return false; 138 | } 139 | if (v != NULL) { 140 | if (matches->empty()) { 141 | LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!"; 142 | if (error_info != NULL) 143 | error_info->kind = kInconsistent; 144 | return false; 145 | } 146 | v->assign(matches->begin(), matches->end()); 147 | } 148 | if (error_info != NULL) 149 | error_info->kind = kNoError; 150 | return true; 151 | } 152 | 153 | } // namespace re2 154 | -------------------------------------------------------------------------------- /deps/re2/re2/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The RE2 Authors. All Rights Reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | # Old enough to support Ubuntu Xenial. 6 | cmake_minimum_required(VERSION 3.5.1) 7 | 8 | if(POLICY CMP0048) 9 | cmake_policy(SET CMP0048 NEW) 10 | endif() 11 | 12 | project(RE2 CXX) 13 | include(CTest) 14 | include(GNUInstallDirs) 15 | 16 | option(BUILD_SHARED_LIBS "build shared libraries" OFF) 17 | option(USEPCRE "use PCRE in tests and benchmarks" OFF) 18 | 19 | # CMake seems to have no way to enable/disable testing per subproject, 20 | # so we provide an option similar to BUILD_TESTING, but just for RE2. 21 | option(RE2_BUILD_TESTING "enable testing for RE2" ON) 22 | 23 | set(EXTRA_TARGET_LINK_LIBRARIES) 24 | 25 | if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") 26 | if(MSVC_VERSION LESS 1900) 27 | message(FATAL_ERROR "you need Visual Studio 2015 or later") 28 | endif() 29 | if(BUILD_SHARED_LIBS) 30 | # See http://www.kitware.com/blog/home/post/939 for details. 31 | set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) 32 | endif() 33 | # CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX, 34 | # so we disable various warnings that aren't particularly helpful. 35 | add_compile_options(/wd4100 /wd4201 /wd4456 /wd4457 /wd4702 /wd4815) 36 | # Without a byte order mark (BOM), Visual Studio assumes that the source 37 | # file is encoded using the current user code page, so we specify UTF-8. 38 | add_compile_options(/utf-8) 39 | elseif(CYGWIN OR MINGW) 40 | # See https://stackoverflow.com/questions/38139631 for details. 41 | add_compile_options(-std=gnu++11) 42 | elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") 43 | add_compile_options(-std=c++11) 44 | endif() 45 | 46 | if(WIN32) 47 | add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX) 48 | add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS) 49 | elseif(UNIX) 50 | add_compile_options(-pthread) 51 | list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread) 52 | endif() 53 | 54 | if(USEPCRE) 55 | add_definitions(-DUSEPCRE) 56 | list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre) 57 | endif() 58 | 59 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 60 | 61 | set(RE2_SOURCES 62 | re2/bitstate.cc 63 | re2/compile.cc 64 | re2/dfa.cc 65 | re2/filtered_re2.cc 66 | re2/mimics_pcre.cc 67 | re2/nfa.cc 68 | re2/onepass.cc 69 | re2/parse.cc 70 | re2/perl_groups.cc 71 | re2/prefilter.cc 72 | re2/prefilter_tree.cc 73 | re2/prog.cc 74 | re2/re2.cc 75 | re2/regexp.cc 76 | re2/set.cc 77 | re2/simplify.cc 78 | re2/stringpiece.cc 79 | re2/tostring.cc 80 | re2/unicode_casefold.cc 81 | re2/unicode_groups.cc 82 | util/rune.cc 83 | util/strutil.cc 84 | ) 85 | 86 | add_library(re2 ${RE2_SOURCES}) 87 | add_library(re2::re2 ALIAS re2) 88 | 89 | if(RE2_BUILD_TESTING) 90 | set(TESTING_SOURCES 91 | re2/testing/backtrack.cc 92 | re2/testing/dump.cc 93 | re2/testing/exhaustive_tester.cc 94 | re2/testing/null_walker.cc 95 | re2/testing/regexp_generator.cc 96 | re2/testing/string_generator.cc 97 | re2/testing/tester.cc 98 | util/pcre.cc 99 | ) 100 | 101 | add_library(testing STATIC ${TESTING_SOURCES}) 102 | 103 | set(TEST_TARGETS 104 | charclass_test 105 | compile_test 106 | filtered_re2_test 107 | mimics_pcre_test 108 | parse_test 109 | possible_match_test 110 | re2_test 111 | re2_arg_test 112 | regexp_test 113 | required_prefix_test 114 | search_test 115 | set_test 116 | simplify_test 117 | string_generator_test 118 | 119 | dfa_test 120 | exhaustive1_test 121 | exhaustive2_test 122 | exhaustive3_test 123 | exhaustive_test 124 | random_test 125 | ) 126 | 127 | set(BENCHMARK_TARGETS 128 | regexp_benchmark 129 | ) 130 | 131 | foreach(target ${TEST_TARGETS}) 132 | add_executable(${target} re2/testing/${target}.cc util/test.cc) 133 | target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES}) 134 | add_test(NAME ${target} COMMAND ${target}) 135 | endforeach(target) 136 | 137 | foreach(target ${BENCHMARK_TARGETS}) 138 | add_executable(${target} re2/testing/${target}.cc util/benchmark.cc) 139 | target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES}) 140 | endforeach(target) 141 | endif() 142 | 143 | set(RE2_HEADERS 144 | re2/filtered_re2.h 145 | re2/re2.h 146 | re2/set.h 147 | re2/stringpiece.h 148 | ) 149 | 150 | install(FILES ${RE2_HEADERS} 151 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/re2) 152 | install(TARGETS re2 EXPORT re2Config 153 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 154 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 155 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 156 | INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 157 | install(EXPORT re2Config 158 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 NAMESPACE re2::) 159 | -------------------------------------------------------------------------------- /include/pog/state.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace pog { 11 | 12 | template 13 | class State 14 | { 15 | public: 16 | using ItemType = Item; 17 | using SymbolType = Symbol; 18 | 19 | State() : _index(std::numeric_limits::max()) {} 20 | State(std::uint32_t index) : _index(index) {} 21 | 22 | std::uint32_t get_index() const { return _index; } 23 | void set_index(std::uint32_t index) { _index = index; } 24 | 25 | std::size_t size() const { return _items.size(); } 26 | auto begin() const { return _items.begin(); } 27 | auto end() const { return _items.end(); } 28 | 29 | template 30 | std::pair add_item(T&& item) 31 | { 32 | auto itr = std::lower_bound(_items.begin(), _items.end(), item, [](const auto& left, const auto& needle) { 33 | return *left.get() < needle; 34 | }); 35 | 36 | if (itr == _items.end() || *itr->get() != item) 37 | { 38 | auto new_itr = _items.insert(itr, std::make_unique(std::forward(item))); 39 | return {new_itr->get(), true}; 40 | } 41 | else 42 | return {itr->get(), false}; 43 | } 44 | 45 | void add_transition(const SymbolType* symbol, const State* state) 46 | { 47 | _transitions.emplace(symbol, state); 48 | } 49 | 50 | void add_back_transition(const SymbolType* symbol, const State* state) 51 | { 52 | auto itr = _back_transitions.find(symbol); 53 | if (itr == _back_transitions.end()) 54 | { 55 | _back_transitions.emplace(symbol, std::vector{state}); 56 | return; 57 | } 58 | 59 | auto state_itr = std::lower_bound(itr->second.begin(), itr->second.end(), state->get_index(), [](const auto& left, const auto& needle) { 60 | return left->get_index() < needle; 61 | }); 62 | 63 | if (state_itr == itr->second.end() || (*state_itr)->get_index() != state->get_index()) 64 | itr->second.insert(state_itr, state); 65 | } 66 | 67 | bool is_accepting() const 68 | { 69 | return std::count_if(_items.begin(), _items.end(), [](const auto& item) { 70 | return item->is_accepting(); 71 | }) == 1; 72 | } 73 | 74 | std::string to_string(std::string_view arrow = "->", std::string_view eps = "", std::string_view sep = "<*>", const std::string& newline = "\n") const 75 | { 76 | std::vector item_strings(_items.size()); 77 | std::transform(_items.begin(), _items.end(), item_strings.begin(), [&](const auto& item) { 78 | return item->to_string(arrow, eps, sep); 79 | }); 80 | return fmt::format("{}", fmt::join(item_strings.begin(), item_strings.end(), newline)); 81 | } 82 | 83 | std::vector get_production_items() const 84 | { 85 | std::vector result; 86 | transform_if(_items.begin(), _items.end(), std::back_inserter(result), 87 | [](const auto& item) { 88 | return item->is_final(); 89 | }, 90 | [](const auto& item) { 91 | return item.get(); 92 | } 93 | ); 94 | return result; 95 | } 96 | 97 | auto get_kernel() const 98 | { 99 | return FilterView{_items.begin(), _items.end(), [](const auto& item) { 100 | return item->is_kernel(); 101 | }}; 102 | } 103 | 104 | bool contains(const ItemType& item) const 105 | { 106 | auto itr = std::lower_bound(_items.begin(), _items.end(), item, [](const auto& left, const auto& needle) { 107 | return *left.get() < needle; 108 | }); 109 | return itr != _items.end() && *itr->get() == item; 110 | } 111 | 112 | bool operator==(const State& rhs) const 113 | { 114 | auto lhs_kernel = get_kernel(); 115 | auto rhs_kernel = rhs.get_kernel(); 116 | return std::equal(lhs_kernel.begin(), lhs_kernel.end(), rhs_kernel.begin(), rhs_kernel.end(), [](const auto& left, const auto& right) { 117 | return *left.get() == *right.get(); 118 | }); 119 | } 120 | 121 | bool operator !=(const State& rhs) const 122 | { 123 | return !(*this == rhs); 124 | } 125 | 126 | const std::map>& get_transitions() const { return _transitions; } 127 | const std::map, SymbolLess>& get_back_transitions() const { return _back_transitions; } 128 | 129 | private: 130 | std::uint32_t _index; 131 | std::vector> _items; 132 | std::map> _transitions; 133 | std::map, SymbolLess> _back_transitions; 134 | }; 135 | 136 | template 137 | struct StateKernelHash 138 | { 139 | std::size_t operator()(const State* state) const 140 | { 141 | std::size_t kernel_hash = 0; 142 | for (const auto& item : state->get_kernel()) 143 | hash_combine(kernel_hash, item->get_rule()->get_index(), item->get_read_pos()); 144 | return kernel_hash; 145 | } 146 | }; 147 | 148 | template 149 | struct StateKernelEquals 150 | { 151 | bool operator()(const State* state1, const State* state2) const 152 | { 153 | return *state1 == *state2; 154 | } 155 | }; 156 | 157 | } // namespace pog 158 | -------------------------------------------------------------------------------- /include/pog/automaton.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace pog { 12 | 13 | template 14 | class Automaton 15 | { 16 | public: 17 | using GrammarType = Grammar; 18 | using ItemType = Item; 19 | using StateType = State; 20 | using SymbolType = Symbol; 21 | 22 | using StateAndSymbolType = StateAndSymbol; 23 | 24 | Automaton(const GrammarType* grammar) : _grammar(grammar), _states(), _state_to_index() {} 25 | 26 | const std::vector>& get_states() const { return _states; } 27 | 28 | const StateType* get_state(std::size_t index) const 29 | { 30 | assert(index < _states.size() && "Accessing state index out of bounds"); 31 | return _states[index].get(); 32 | } 33 | 34 | template 35 | std::pair add_state(StateT&& state) 36 | { 37 | auto itr = _state_to_index.find(&state); 38 | if (itr != _state_to_index.end()) 39 | return {_states[itr->second].get(), false}; 40 | 41 | _states.push_back(std::make_unique(std::forward(state))); 42 | _state_to_index.emplace(_states.back().get(), _states.size() - 1); 43 | return {_states.back().get(), true}; 44 | } 45 | 46 | void closure(StateType& state) 47 | { 48 | std::deque to_process; 49 | for (const auto& item : state) 50 | to_process.push_back(item.get()); 51 | 52 | while (!to_process.empty()) 53 | { 54 | const auto* current_item = to_process.front(); 55 | to_process.pop_front(); 56 | 57 | const auto* next_symbol = current_item->get_read_symbol(); 58 | auto rules = _grammar->get_rules_of_symbol(next_symbol); 59 | for (const auto* rule : rules) 60 | { 61 | auto new_item = Item{rule}; 62 | auto result = state.add_item(std::move(new_item)); 63 | if (result.second) 64 | to_process.push_back(result.first); 65 | } 66 | } 67 | } 68 | 69 | void construct_states() 70 | { 71 | StateType initial_state; 72 | initial_state.add_item(ItemType{_grammar->get_start_rule()}); 73 | initial_state.set_index(0); 74 | closure(initial_state); 75 | auto result = add_state(std::move(initial_state)); 76 | 77 | std::deque to_process{result.first}; 78 | while (!to_process.empty()) 79 | { 80 | auto* state = to_process.front(); 81 | to_process.pop_front(); 82 | 83 | std::map> prepared_states; 84 | for (const auto& item : *state) 85 | { 86 | if (item->is_final()) 87 | continue; 88 | 89 | auto next_sym = item->get_read_symbol(); 90 | if (next_sym->is_end()) 91 | continue; 92 | 93 | auto new_item = Item{*item}; 94 | new_item.step(); 95 | 96 | auto itr = prepared_states.find(next_sym); 97 | if (itr == prepared_states.end()) 98 | std::tie(itr, std::ignore) = prepared_states.emplace(next_sym, StateType{}); 99 | itr->second.add_item(std::move(new_item)); 100 | } 101 | 102 | for (auto&& [symbol, prepared_state] : prepared_states) 103 | { 104 | prepared_state.set_index(static_cast(_states.size())); 105 | auto result = add_state(std::move(prepared_state)); 106 | auto* target_state = result.first; 107 | if (result.second) 108 | { 109 | // We calculate closure only if it's new state introduced in the automaton. 110 | // States can be compared only with their kernel items so it's better to just do it 111 | // once for each state. 112 | closure(*target_state); 113 | to_process.push_back(target_state); 114 | } 115 | state->add_transition(symbol, target_state); 116 | target_state->add_back_transition(symbol, state); 117 | } 118 | } 119 | } 120 | 121 | std::string generate_graph() const 122 | { 123 | std::vector states_str(_states.size()); 124 | std::transform(_states.begin(), _states.end(), states_str.begin(), [](const auto& state) { 125 | std::vector items_str(state->size()); 126 | std::transform(state->begin(), state->end(), items_str.begin(), [](const auto& item) { 127 | return item->to_string("→", "ε", "•"); 128 | }); 129 | return fmt::format("{} [label=\"{}\\l\", xlabel=\"{}\"]", state->get_index(), fmt::join(items_str.begin(), items_str.end(), "\\l"), state->get_index()); 130 | }); 131 | std::vector edges_str; 132 | for (const auto& state : _states) 133 | { 134 | for (const auto& [sym, dest] : state->get_transitions()) 135 | { 136 | edges_str.push_back(fmt::format("{} -> {} [label=\"{}\"]", state->get_index(), dest->get_index(), sym->get_name())); 137 | } 138 | } 139 | return fmt::format(R"(digraph Automaton {{ 140 | node [shape=rect]; 141 | 142 | {} 143 | 144 | {} 145 | }})", 146 | fmt::join(states_str.begin(), states_str.end(), "\n"), 147 | fmt::join(edges_str.begin(), edges_str.end(), "\n") 148 | ); 149 | } 150 | 151 | private: 152 | const GrammarType* _grammar; 153 | std::vector> _states; 154 | std::unordered_map, StateKernelEquals> _state_to_index; 155 | }; 156 | 157 | } // namespace pog 158 | -------------------------------------------------------------------------------- /examples/ini/ini.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | using namespace pog; 9 | 10 | struct NoData{}; 11 | using Value = std::variant; 12 | 13 | struct Attribute 14 | { 15 | std::string key; 16 | Value value; 17 | }; 18 | 19 | struct Section 20 | { 21 | std::string name; 22 | std::vector attributes; 23 | }; 24 | 25 | struct Document 26 | { 27 | Section global; 28 | std::vector
sections; 29 | }; 30 | 31 | void print_attributes(const Section& section) 32 | { 33 | for (const auto& attr : section.attributes) 34 | { 35 | fmt::print("{}::{}", section.name, attr.key); 36 | std::visit(overloaded { 37 | [](const NoData&) { fmt::print(" (no data)\n"); }, 38 | [](bool b) { fmt::print(" (bool) = {}\n", b); }, 39 | [](int i) { fmt::print(" (int) = {}\n", i); }, 40 | [](double d) { fmt::print(" (double) = {}\n", d); }, 41 | [](const std::string& s) { fmt::print(" (string) = {}\n", s); } 42 | }, attr.value); 43 | } 44 | } 45 | 46 | int main(int argc, char* argv[]) 47 | { 48 | if (argc < 2) 49 | { 50 | fmt::print("Usage: {} INPUT_FILE\n", argv[0]); 51 | return 1; 52 | } 53 | 54 | using ParserType = std::variant< 55 | Value, 56 | Attribute, 57 | Section, 58 | Document, 59 | std::vector, 60 | std::vector
61 | >; 62 | 63 | Parser p; 64 | 65 | p.token(R"(\s+)"); 66 | p.token("\\[").symbol("["); 67 | p.token("\\]").symbol("]"); 68 | p.token("=").symbol("="); 69 | p.token(R"([0-9]+\.[0-9]+)").symbol("double").action([](std::string_view str) -> ParserType { 70 | return std::stod(std::string{str}); 71 | }); 72 | p.token(R"([0-9]+)").symbol("int").action([](std::string_view str) -> ParserType { 73 | return std::stoi(std::string{str}); 74 | }); 75 | p.token("(true|false)").symbol("bool").action([](std::string_view str) -> ParserType { 76 | return str == "true"; 77 | }); 78 | p.token("[a-zA-Z0-9]+").symbol("id").fullword().action([](std::string_view str) -> ParserType { 79 | return std::string{str}; 80 | }); 81 | 82 | p.set_start_symbol("root"); 83 | p.rule("root") 84 | .production("attrs", "sections", [](auto&& args) -> ParserType { 85 | return Document{ 86 | Section{std::string{}, std::get>(args[0])}, 87 | std::get>(args[1]) 88 | }; 89 | }) 90 | .production("attrs", [](auto&& args) -> ParserType { 91 | return Document{ 92 | Section{std::string{}, std::get>(args[0])}, 93 | std::vector
{} 94 | }; 95 | }) 96 | .production("sections", [](auto&& args) -> ParserType { 97 | return Document{ 98 | Section{std::string{}, std::vector{}}, 99 | std::get>(args[0]) 100 | }; 101 | }) 102 | .production([](auto&&) -> ParserType { 103 | return Document{ 104 | Section{std::string{}, std::vector{}}, 105 | std::vector
{} 106 | }; 107 | }); 108 | 109 | p.rule("sections") 110 | .production("sections", "section", [](auto&& args) -> ParserType { 111 | std::get>(args[0]).push_back(std::get
(args[1])); 112 | return std::move(args[0]); 113 | }) 114 | .production("section", [](auto&& args) -> ParserType { 115 | return std::vector
{std::get
(args[0])}; 116 | }); 117 | 118 | p.rule("section") 119 | .production("[", "id", "]", "attrs", [](auto&& args) -> ParserType { 120 | return Section{ 121 | std::get(std::get(args[1])), 122 | std::get>(args[3]) 123 | }; 124 | }); 125 | 126 | p.rule("attrs") 127 | .production("attrs", "attr", [](auto&& args) -> ParserType { 128 | std::get>(args[0]).push_back(std::get(args[1])); 129 | return std::move(args[0]); 130 | }) 131 | .production("attr", [](auto&& args) -> ParserType { 132 | return std::vector{std::get(args[0])}; 133 | }); 134 | 135 | p.rule("attr").production("id", "=", "value", [](auto&& args) -> ParserType { 136 | return Attribute{ 137 | std::get(std::get(args[0])), 138 | std::get(args[2]) 139 | }; 140 | }); 141 | 142 | p.rule("value") 143 | .production("double", [](auto&& args) -> ParserType { 144 | return std::move(args[0]); 145 | }) 146 | .production("int", [](auto&& args) -> ParserType { 147 | return std::move(args[0]); 148 | }) 149 | .production("bool", [](auto&& args) -> ParserType { 150 | return std::move(args[0]); 151 | }) 152 | .production("id", [](auto&& args) -> ParserType { 153 | return std::move(args[0]); 154 | }); 155 | 156 | p.prepare(); 157 | 158 | std::ifstream input_file(argv[1]); 159 | auto document = p.parse(input_file); 160 | 161 | if (document) 162 | { 163 | const auto& d = document.value(); 164 | if (std::holds_alternative(d)) 165 | { 166 | const auto& dd = std::get(d); 167 | print_attributes(dd.global); 168 | for (const auto& sect : dd.sections) 169 | print_attributes(sect); 170 | } 171 | else 172 | { 173 | fmt::print("Parser error\n"); 174 | return 1; 175 | } 176 | } 177 | else 178 | { 179 | fmt::print("Parser error\n"); 180 | return 1; 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /include/pog/relations/lookback.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace pog { 8 | 9 | /** 10 | * Lookback is a relation of tuples (q,R) where q is state and R is a rule from grammar and 11 | * tuples of (p,x) where p is state and x is symbol. Basically it's a relation on state and rule 12 | * with state and symbol. 13 | * 14 | * Let's imagine that state Q contains final item A -> x <*> where x is a sequence of terminals and 15 | * nonterminals. That means we need to perform production of rule A -> x and reduce x on the stack 16 | * into A. That also means that there is some state P with item B -> a <*> A b through which we had to 17 | * go into state Q. If it happens then (Q, A -> x) lookbacks (P, A). 18 | * 19 | * To put it simply, in order to get to state with final item A -> x <*>, we first had to go through 20 | * state with item B -> a <*> A b. We just simply put state with item A -> x <*> and rule A -> x into relation 21 | * with the origin state with item B -> a <*> A b and symbol A. 22 | * 23 | * This is useful for so-called propagation of lookaheads. If we know that rule A -> x is being used 24 | * and it all originated in certain state where rule B -> a A b is being processed, we can use what 25 | * can possible follow A in B -> a A b to know whether to use production of A -> x. 26 | */ 27 | template 28 | class Lookback : public Relation, StateAndSymbol> 29 | { 30 | public: 31 | using Parent = Relation, StateAndSymbol>; 32 | 33 | using AutomatonType = Automaton; 34 | using BacktrackingInfoType = BacktrackingInfo; 35 | using GrammarType = Grammar; 36 | using StateType = State; 37 | using SymbolType = Symbol; 38 | 39 | using StateAndSymbolType = StateAndSymbol; 40 | using StateAndRuleType = StateAndRule; 41 | 42 | Lookback(const AutomatonType* automaton, const GrammarType* grammar) : Parent(automaton, grammar) {} 43 | Lookback(const Lookback&) = delete; 44 | Lookback(Lookback&&) noexcept = default; 45 | 46 | virtual void calculate() override 47 | { 48 | // Iterate over all states of LR automaton 49 | for (const auto& state : Parent::_automaton->get_states()) 50 | { 51 | for (const auto& item : *state.get()) 52 | { 53 | // We are not interested in items other than in form A -> x <*> 54 | if (!item->is_final()) 55 | continue; 56 | 57 | // Get left-hand side symbol of a rule 58 | auto prod_symbol = item->get_rule()->get_lhs(); 59 | 60 | // Now we'll start backtracking through LR automaton using backtransitions. 61 | // We'll basically just go in the different direction of arrows in the automata. 62 | // We know that we have item A -> x <*> so we know which backtransitions to take (those contained in sequence x). 63 | // There can be multiple transitions through the same symbol 64 | // going into current state so we'll put them into queue and process until queue is empty. 65 | std::unordered_set visited_states; 66 | std::deque to_process; 67 | // Let's insert the current state and item A -> x <*> into the queue as a starting point 68 | to_process.push_back(BacktrackingInfoType{state.get(), *item.get()}); 69 | while (!to_process.empty()) 70 | { 71 | auto backtracking_info = std::move(to_process.front()); 72 | to_process.pop_front(); 73 | 74 | // If the state has transition over the symbol A, that means there is an item B -> a <*> A b 75 | if (backtracking_info.state->get_transitions().find(prod_symbol) != backtracking_info.state->get_transitions().end()) 76 | { 77 | // Insert relation 78 | StateAndRuleType src_sr{state.get(), item->get_rule()}; 79 | StateAndSymbolType dest_ss{backtracking_info.state, prod_symbol}; 80 | auto itr = Parent::_relation.find(src_sr); 81 | if (itr == Parent::_relation.end()) 82 | Parent::_relation.emplace(std::move(src_sr), std::unordered_set{std::move(dest_ss)}); 83 | else 84 | itr->second.insert(std::move(dest_ss)); 85 | } 86 | 87 | // We've reached item with <*> at the start so we are no longer interested in it 88 | if (backtracking_info.item.get_read_pos() == 0) 89 | continue; 90 | 91 | // Observe backtransitions over the symbol left to the <*> in an item 92 | const auto& back_trans = backtracking_info.state->get_back_transitions(); 93 | auto itr = back_trans.find(backtracking_info.item.get_previous_symbol()); 94 | if (itr == back_trans.end()) 95 | assert(false && "This shouldn't happen"); 96 | 97 | // Perform step back of an item so that <*> in an item is moved one symbol to the left 98 | backtracking_info.item.step_back(); 99 | for (const auto& dest_state : itr->second) 100 | { 101 | if (visited_states.find(dest_state) == visited_states.end()) 102 | { 103 | // Put non-visited states from backtransitions into the queue 104 | to_process.push_back(BacktrackingInfoType{dest_state, backtracking_info.item}); 105 | visited_states.emplace(dest_state); 106 | } 107 | } 108 | } 109 | } 110 | } 111 | } 112 | }; 113 | 114 | } // namespace pog 115 | -------------------------------------------------------------------------------- /deps/re2/re2/util/strutil.cc: -------------------------------------------------------------------------------- 1 | // Copyright 1999-2005 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include 6 | #include 7 | 8 | #include "util/strutil.h" 9 | 10 | #ifdef _WIN32 11 | #define snprintf _snprintf 12 | #define vsnprintf _vsnprintf 13 | #endif 14 | 15 | namespace re2 { 16 | 17 | // ---------------------------------------------------------------------- 18 | // CEscapeString() 19 | // Copies 'src' to 'dest', escaping dangerous characters using 20 | // C-style escape sequences. 'src' and 'dest' should not overlap. 21 | // Returns the number of bytes written to 'dest' (not including the \0) 22 | // or (size_t)-1 if there was insufficient space. 23 | // ---------------------------------------------------------------------- 24 | static size_t CEscapeString(const char* src, size_t src_len, 25 | char* dest, size_t dest_len) { 26 | const char* src_end = src + src_len; 27 | size_t used = 0; 28 | 29 | for (; src < src_end; src++) { 30 | if (dest_len - used < 2) // space for two-character escape 31 | return (size_t)-1; 32 | 33 | unsigned char c = *src; 34 | switch (c) { 35 | case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; 36 | case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; 37 | case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; 38 | case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; 39 | case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; 40 | case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; 41 | default: 42 | // Note that if we emit \xNN and the src character after that is a hex 43 | // digit then that digit must be escaped too to prevent it being 44 | // interpreted as part of the character code by C. 45 | if (c < ' ' || c > '~') { 46 | if (dest_len - used < 5) // space for four-character escape + \0 47 | return (size_t)-1; 48 | snprintf(dest + used, 5, "\\%03o", c); 49 | used += 4; 50 | } else { 51 | dest[used++] = c; break; 52 | } 53 | } 54 | } 55 | 56 | if (dest_len - used < 1) // make sure that there is room for \0 57 | return (size_t)-1; 58 | 59 | dest[used] = '\0'; // doesn't count towards return value though 60 | return used; 61 | } 62 | 63 | // ---------------------------------------------------------------------- 64 | // CEscape() 65 | // Copies 'src' to result, escaping dangerous characters using 66 | // C-style escape sequences. 'src' and 'dest' should not overlap. 67 | // ---------------------------------------------------------------------- 68 | std::string CEscape(const StringPiece& src) { 69 | const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion 70 | char* dest = new char[dest_len]; 71 | const size_t used = CEscapeString(src.data(), src.size(), 72 | dest, dest_len); 73 | std::string s = std::string(dest, used); 74 | delete[] dest; 75 | return s; 76 | } 77 | 78 | void PrefixSuccessor(std::string* prefix) { 79 | // We can increment the last character in the string and be done 80 | // unless that character is 255, in which case we have to erase the 81 | // last character and increment the previous character, unless that 82 | // is 255, etc. If the string is empty or consists entirely of 83 | // 255's, we just return the empty string. 84 | while (!prefix->empty()) { 85 | char& c = prefix->back(); 86 | if (c == '\xff') { // char literal avoids signed/unsigned. 87 | prefix->pop_back(); 88 | } else { 89 | ++c; 90 | break; 91 | } 92 | } 93 | } 94 | 95 | static void StringAppendV(std::string* dst, const char* format, va_list ap) { 96 | // First try with a small fixed size buffer 97 | char space[1024]; 98 | 99 | // It's possible for methods that use a va_list to invalidate 100 | // the data in it upon use. The fix is to make a copy 101 | // of the structure before using it and use that copy instead. 102 | va_list backup_ap; 103 | va_copy(backup_ap, ap); 104 | int result = vsnprintf(space, sizeof(space), format, backup_ap); 105 | va_end(backup_ap); 106 | 107 | if ((result >= 0) && (static_cast(result) < sizeof(space))) { 108 | // It fit 109 | dst->append(space, result); 110 | return; 111 | } 112 | 113 | // Repeatedly increase buffer size until it fits 114 | int length = sizeof(space); 115 | while (true) { 116 | if (result < 0) { 117 | // Older behavior: just try doubling the buffer size 118 | length *= 2; 119 | } else { 120 | // We need exactly "result+1" characters 121 | length = result+1; 122 | } 123 | char* buf = new char[length]; 124 | 125 | // Restore the va_list before we use it again 126 | va_copy(backup_ap, ap); 127 | result = vsnprintf(buf, length, format, backup_ap); 128 | va_end(backup_ap); 129 | 130 | if ((result >= 0) && (result < length)) { 131 | // It fit 132 | dst->append(buf, result); 133 | delete[] buf; 134 | return; 135 | } 136 | delete[] buf; 137 | } 138 | } 139 | 140 | std::string StringPrintf(const char* format, ...) { 141 | va_list ap; 142 | va_start(ap, format); 143 | std::string result; 144 | StringAppendV(&result, format, ap); 145 | va_end(ap); 146 | return result; 147 | } 148 | 149 | } // namespace re2 150 | -------------------------------------------------------------------------------- /deps/re2/re2/re2/prefilter_tree.h: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #ifndef RE2_PREFILTER_TREE_H_ 6 | #define RE2_PREFILTER_TREE_H_ 7 | 8 | // The PrefilterTree class is used to form an AND-OR tree of strings 9 | // that would trigger each regexp. The 'prefilter' of each regexp is 10 | // added to PrefilterTree, and then PrefilterTree is used to find all 11 | // the unique strings across the prefilters. During search, by using 12 | // matches from a string matching engine, PrefilterTree deduces the 13 | // set of regexps that are to be triggered. The 'string matching 14 | // engine' itself is outside of this class, and the caller can use any 15 | // favorite engine. PrefilterTree provides a set of strings (called 16 | // atoms) that the user of this class should use to do the string 17 | // matching. 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include "util/util.h" 24 | #include "re2/prefilter.h" 25 | #include "re2/sparse_array.h" 26 | 27 | namespace re2 { 28 | 29 | class PrefilterTree { 30 | public: 31 | PrefilterTree(); 32 | explicit PrefilterTree(int min_atom_len); 33 | ~PrefilterTree(); 34 | 35 | // Adds the prefilter for the next regexp. Note that we assume that 36 | // Add called sequentially for all regexps. All Add calls 37 | // must precede Compile. 38 | void Add(Prefilter* prefilter); 39 | 40 | // The Compile returns a vector of string in atom_vec. 41 | // Call this after all the prefilters are added through Add. 42 | // No calls to Add after Compile are allowed. 43 | // The caller should use the returned set of strings to do string matching. 44 | // Each time a string matches, the corresponding index then has to be 45 | // and passed to RegexpsGivenStrings below. 46 | void Compile(std::vector* atom_vec); 47 | 48 | // Given the indices of the atoms that matched, returns the indexes 49 | // of regexps that should be searched. The matched_atoms should 50 | // contain all the ids of string atoms that were found to match the 51 | // content. The caller can use any string match engine to perform 52 | // this function. This function is thread safe. 53 | void RegexpsGivenStrings(const std::vector& matched_atoms, 54 | std::vector* regexps) const; 55 | 56 | // Print debug prefilter. Also prints unique ids associated with 57 | // nodes of the prefilter of the regexp. 58 | void PrintPrefilter(int regexpid); 59 | 60 | private: 61 | typedef SparseArray IntMap; 62 | typedef std::map StdIntMap; 63 | typedef std::map NodeMap; 64 | 65 | // Each unique node has a corresponding Entry that helps in 66 | // passing the matching trigger information along the tree. 67 | struct Entry { 68 | public: 69 | // How many children should match before this node triggers the 70 | // parent. For an atom and an OR node, this is 1 and for an AND 71 | // node, it is the number of unique children. 72 | int propagate_up_at_count; 73 | 74 | // When this node is ready to trigger the parent, what are the indices 75 | // of the parent nodes to trigger. The reason there may be more than 76 | // one is because of sharing. For example (abc | def) and (xyz | def) 77 | // are two different nodes, but they share the atom 'def'. So when 78 | // 'def' matches, it triggers two parents, corresponding to the two 79 | // different OR nodes. 80 | StdIntMap* parents; 81 | 82 | // When this node is ready to trigger the parent, what are the 83 | // regexps that are triggered. 84 | std::vector regexps; 85 | }; 86 | 87 | // Returns true if the prefilter node should be kept. 88 | bool KeepNode(Prefilter* node) const; 89 | 90 | // This function assigns unique ids to various parts of the 91 | // prefilter, by looking at if these nodes are already in the 92 | // PrefilterTree. 93 | void AssignUniqueIds(NodeMap* nodes, std::vector* atom_vec); 94 | 95 | // Given the matching atoms, find the regexps to be triggered. 96 | void PropagateMatch(const std::vector& atom_ids, 97 | IntMap* regexps) const; 98 | 99 | // Returns the prefilter node that has the same NodeString as this 100 | // node. For the canonical node, returns node. 101 | Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node); 102 | 103 | // A string that uniquely identifies the node. Assumes that the 104 | // children of node has already been assigned unique ids. 105 | std::string NodeString(Prefilter* node) const; 106 | 107 | // Recursively constructs a readable prefilter string. 108 | std::string DebugNodeString(Prefilter* node) const; 109 | 110 | // Used for debugging. 111 | void PrintDebugInfo(NodeMap* nodes); 112 | 113 | // These are all the nodes formed by Compile. Essentially, there is 114 | // one node for each unique atom and each unique AND/OR node. 115 | std::vector entries_; 116 | 117 | // indices of regexps that always pass through the filter (since we 118 | // found no required literals in these regexps). 119 | std::vector unfiltered_; 120 | 121 | // vector of Prefilter for all regexps. 122 | std::vector prefilter_vec_; 123 | 124 | // Atom index in returned strings to entry id mapping. 125 | std::vector atom_index_to_id_; 126 | 127 | // Has the prefilter tree been compiled. 128 | bool compiled_; 129 | 130 | // Strings less than this length are not stored as atoms. 131 | const int min_atom_len_; 132 | 133 | PrefilterTree(const PrefilterTree&) = delete; 134 | PrefilterTree& operator=(const PrefilterTree&) = delete; 135 | }; 136 | 137 | } // namespace 138 | 139 | #endif // RE2_PREFILTER_TREE_H_ 140 | -------------------------------------------------------------------------------- /tests/test_rule_builder.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | using namespace pog; 6 | 7 | class TestRuleBuilder : public ::testing::Test 8 | { 9 | public: 10 | Grammar grammar; 11 | }; 12 | 13 | TEST_F(TestRuleBuilder, 14 | Initialization) { 15 | RuleBuilder rb(&grammar, "A"); 16 | 17 | EXPECT_EQ(grammar.get_symbols().size(), 2u); // start and end symbol 18 | EXPECT_TRUE(grammar.get_rules().empty()); 19 | } 20 | 21 | TEST_F(TestRuleBuilder, 22 | NoProductions) { 23 | RuleBuilder rb(&grammar, "A"); 24 | rb.done(); 25 | 26 | EXPECT_EQ(grammar.get_symbols().size(), 2u); 27 | EXPECT_TRUE(grammar.get_rules().empty()); 28 | } 29 | 30 | TEST_F(TestRuleBuilder, 31 | SingleProductionWithoutAction) { 32 | RuleBuilder rb(&grammar, "A"); 33 | rb.production("a"); 34 | rb.done(); 35 | 36 | EXPECT_EQ(grammar.get_symbols().size(), 4u); 37 | EXPECT_EQ(grammar.get_rules().size(), 1u); 38 | EXPECT_EQ(grammar.get_rules()[0]->to_string(), "A -> a"); 39 | EXPECT_FALSE(grammar.get_rules()[0]->has_action()); 40 | } 41 | 42 | TEST_F(TestRuleBuilder, 43 | SingleProductionWithAction) { 44 | RuleBuilder rb(&grammar, "A"); 45 | rb.production("a", [](auto&&) { return 42; }); 46 | rb.done(); 47 | 48 | EXPECT_EQ(grammar.get_symbols().size(), 4u); 49 | EXPECT_EQ(grammar.get_rules().size(), 1u); 50 | EXPECT_EQ(grammar.get_rules()[0]->to_string(), "A -> a"); 51 | EXPECT_TRUE(grammar.get_rules()[0]->has_action()); 52 | } 53 | 54 | TEST_F(TestRuleBuilder, 55 | MultipleProductionsWithActions) { 56 | RuleBuilder rb(&grammar, "A"); 57 | rb.production("A", "a", [](auto&&) { return 42; }) 58 | .production("a", [](auto&&) { return 42; }); 59 | rb.done(); 60 | 61 | EXPECT_EQ(grammar.get_symbols().size(), 4u); 62 | EXPECT_EQ(grammar.get_rules().size(), 2u); 63 | EXPECT_EQ(grammar.get_rules()[0]->to_string(), "A -> A a"); 64 | EXPECT_EQ(grammar.get_rules()[1]->to_string(), "A -> a"); 65 | EXPECT_TRUE(grammar.get_rules()[0]->has_action()); 66 | EXPECT_TRUE(grammar.get_rules()[1]->has_action()); 67 | } 68 | 69 | TEST_F(TestRuleBuilder, 70 | SingleProductionWithMidruleActions) { 71 | RuleBuilder rb(&grammar, "func"); 72 | rb.production( 73 | "func", "id", [](auto&&) { return 42; }, 74 | "{", "body", "}", [](auto&&) { return 43; } 75 | ); 76 | rb.done(); 77 | 78 | EXPECT_EQ(grammar.get_symbols().size(), 8u); 79 | EXPECT_EQ(grammar.get_rules().size(), 2u); 80 | EXPECT_EQ(grammar.get_rules()[0]->to_string(), "_func#0.0 -> "); 81 | EXPECT_EQ(grammar.get_rules()[1]->to_string(), "func -> func id _func#0.0 { body }"); 82 | EXPECT_TRUE(grammar.get_rules()[0]->has_action()); 83 | EXPECT_TRUE(grammar.get_rules()[1]->has_action()); 84 | 85 | EXPECT_EQ(grammar.get_rules()[0]->perform_action(std::vector{}), 42); 86 | EXPECT_EQ(grammar.get_rules()[0]->perform_action(std::vector{1, 2, 3}), 42); 87 | EXPECT_EQ(grammar.get_rules()[1]->perform_action(std::vector{}), 43); 88 | EXPECT_EQ(grammar.get_rules()[1]->perform_action(std::vector{1, 2, 3}), 43); 89 | } 90 | 91 | TEST_F(TestRuleBuilder, 92 | MultipleProductionsWithMidruleActions) { 93 | RuleBuilder rb(&grammar, "def"); 94 | rb.production( 95 | "func", "id", [](auto&&) { return 42; }, 96 | "(", "args", ")", [](auto&&) { return 43; }, 97 | "{", "body", "}", [](auto&&) { return 44; } 98 | ) 99 | .production( 100 | "var", "id", "=", [](auto&&) { return 142; }, 101 | "expr", [](auto&&) { return 143; } 102 | ); 103 | rb.done(); 104 | 105 | EXPECT_EQ(grammar.get_symbols().size(), 17u); 106 | EXPECT_EQ(grammar.get_rules().size(), 5u); 107 | EXPECT_EQ(grammar.get_rules()[0]->to_string(), "_def#0.0 -> "); 108 | EXPECT_EQ(grammar.get_rules()[1]->to_string(), "_def#0.1 -> "); 109 | EXPECT_EQ(grammar.get_rules()[2]->to_string(), "def -> func id _def#0.0 ( args ) _def#0.1 { body }"); 110 | EXPECT_EQ(grammar.get_rules()[3]->to_string(), "_def#1.0 -> "); 111 | EXPECT_EQ(grammar.get_rules()[4]->to_string(), "def -> var id = _def#1.0 expr"); 112 | EXPECT_TRUE(grammar.get_rules()[0]->has_action()); 113 | EXPECT_TRUE(grammar.get_rules()[1]->has_action()); 114 | EXPECT_TRUE(grammar.get_rules()[2]->has_action()); 115 | EXPECT_TRUE(grammar.get_rules()[3]->has_action()); 116 | EXPECT_TRUE(grammar.get_rules()[4]->has_action()); 117 | 118 | EXPECT_EQ(grammar.get_rules()[0]->perform_action(std::vector{}), 42); 119 | EXPECT_EQ(grammar.get_rules()[0]->perform_action(std::vector{1, 2, 3}), 42); 120 | EXPECT_EQ(grammar.get_rules()[1]->perform_action(std::vector{}), 43); 121 | EXPECT_EQ(grammar.get_rules()[1]->perform_action(std::vector{1, 2, 3}), 43); 122 | EXPECT_EQ(grammar.get_rules()[2]->perform_action(std::vector{}), 44); 123 | EXPECT_EQ(grammar.get_rules()[2]->perform_action(std::vector{1, 2, 3}), 44); 124 | EXPECT_EQ(grammar.get_rules()[3]->perform_action(std::vector{}), 142); 125 | EXPECT_EQ(grammar.get_rules()[3]->perform_action(std::vector{1, 2, 3}), 142); 126 | EXPECT_EQ(grammar.get_rules()[4]->perform_action(std::vector{}), 143); 127 | EXPECT_EQ(grammar.get_rules()[4]->perform_action(std::vector{1, 2, 3}), 143); 128 | } 129 | 130 | TEST_F(TestRuleBuilder, 131 | EpsilonRuleWithAction) { 132 | RuleBuilder rb(&grammar, "A"); 133 | rb.production("A", "a", [](auto&&) { return 42; }) 134 | .production([](auto&&) { return 43; }); 135 | rb.done(); 136 | 137 | EXPECT_EQ(grammar.get_symbols().size(), 4u); 138 | EXPECT_EQ(grammar.get_rules().size(), 2u); 139 | EXPECT_EQ(grammar.get_rules()[0]->to_string(), "A -> A a"); 140 | EXPECT_EQ(grammar.get_rules()[1]->to_string(), "A -> "); 141 | EXPECT_TRUE(grammar.get_rules()[0]->has_action()); 142 | EXPECT_TRUE(grammar.get_rules()[1]->has_action()); 143 | 144 | EXPECT_EQ(grammar.get_rules()[0]->perform_action(std::vector{}), 42); 145 | EXPECT_EQ(grammar.get_rules()[0]->perform_action(std::vector{1, 2, 3}), 42); 146 | EXPECT_EQ(grammar.get_rules()[1]->perform_action(std::vector{}), 43); 147 | EXPECT_EQ(grammar.get_rules()[1]->perform_action(std::vector{1, 2, 3}), 43); 148 | } 149 | --------------------------------------------------------------------------------