├── .gitignore ├── project └── main.cpp ├── include ├── parserlib │ ├── parse_node_id_type.hpp │ ├── left_recursion_state_type.hpp │ ├── default_source_position.hpp │ ├── default_symbol_comparator.hpp │ ├── any_parse_node.hpp │ ├── end_parse_node.hpp │ ├── case_sensitive_symbol_comparator.hpp │ ├── parent_parse_node.hpp │ ├── left_recursion_exception.hpp │ ├── symbol_range_parse_node.hpp │ ├── bool_parse_node.hpp │ ├── loop_parse_node.hpp │ ├── optional_parse_node.hpp │ ├── parse_error.hpp │ ├── text_source_position.hpp │ ├── debug_parse_node.hpp │ ├── logical_and_parse_node.hpp │ ├── logical_not_parse_node.hpp │ ├── parse_node_wrapper.hpp │ ├── tuple_for_each.hpp │ ├── newline_parse_node.hpp │ ├── symbol_parse_node.hpp │ ├── container_parse_context.hpp │ ├── istream_parse_context.hpp │ ├── match.hpp │ ├── choice_parse_node.hpp │ ├── parse_with_parse_state.hpp │ ├── symbol_set_parse_node.hpp │ ├── string_parse_node.hpp │ ├── parse_node_ptr.hpp │ ├── match_parse_node.hpp │ ├── sequence_parse_node.hpp │ ├── parse_node.hpp │ ├── source_range.hpp │ ├── parse_position.hpp │ ├── rule.hpp │ ├── error_parse_node.hpp │ ├── parse_left_recursion_algorithm.hpp │ ├── parse_context.hpp │ └── source_range_parse_context.hpp └── parserlib.hpp ├── README.md ├── tests └── tests.cpp └── LICENSE.txt /.gitignore: -------------------------------------------------------------------------------- 1 | temp/ 2 | VS*/ 3 | CB/ 4 | *.backup 5 | -------------------------------------------------------------------------------- /project/main.cpp: -------------------------------------------------------------------------------- 1 | extern void run_tests(); 2 | 3 | 4 | int main() { 5 | run_tests(); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /include/parserlib/parse_node_id_type.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_NODE_ID_TYPE_HPP 2 | #define PARSERLIB_PARSE_NODE_ID_TYPE_HPP 3 | 4 | 5 | #include 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * Parse node id type. 13 | * It can also hold a pointer value. 14 | */ 15 | using parse_node_id_type = uintptr_t; 16 | 17 | 18 | } //namespace parserlib 19 | 20 | 21 | #endif //PARSERLIB_PARSE_NODE_ID_TYPE_HPP 22 | -------------------------------------------------------------------------------- /include/parserlib/left_recursion_state_type.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_LEFT_RECURSION_STATE_TYPE_HPP 2 | #define PARSERLIB_LEFT_RECURSION_STATE_TYPE_HPP 3 | 4 | 5 | namespace parserlib { 6 | 7 | 8 | /** 9 | * State types for left recursion. 10 | */ 11 | enum class left_recursion_state_type { 12 | /** No left recursion detected. */ 13 | no_left_recursion, 14 | 15 | /** Left recursion must be rejected. */ 16 | reject_left_recursion, 17 | 18 | /** Left recursion must be accepted. */ 19 | accept_left_recursion, 20 | 21 | /** Left recursion accepted. */ 22 | accepted_left_recursion 23 | }; 24 | 25 | 26 | } //namespace parserlib 27 | 28 | 29 | #endif //PARSERLIB_LEFT_RECURSION_STATE_TYPE_HPP 30 | -------------------------------------------------------------------------------- /include/parserlib/default_source_position.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_DEFAULT_SOURCE_POSITION_HPP 2 | #define PARSERLIB_DEFAULT_SOURCE_POSITION_HPP 3 | 4 | 5 | #include 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * An empty source position. 13 | */ 14 | class default_source_position { 15 | public: 16 | /** 17 | * Does nothing. 18 | */ 19 | void increment() { 20 | } 21 | 22 | /** 23 | * Does nothing. 24 | */ 25 | void increment(std::size_t) { 26 | } 27 | 28 | /** 29 | * Does nothing. 30 | */ 31 | void increment_line() { 32 | } 33 | }; 34 | 35 | 36 | } //namespace parserlib 37 | 38 | 39 | #endif //PARSERLIB_DEFAULT_SOURCE_POSITION_HPP 40 | -------------------------------------------------------------------------------- /include/parserlib/default_symbol_comparator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_DEFAULT_SYMBOL_COMPARATOR_HPP 2 | #define PARSERLIB_DEFAULT_SYMBOL_COMPARATOR_HPP 3 | 4 | 5 | namespace parserlib { 6 | 7 | 8 | /** 9 | * The default symbol comparator. 10 | * It compares values 'as-is', i.e. without a conversion. 11 | */ 12 | class default_symbol_comparator { 13 | public: 14 | /** 15 | * Returns the difference of the two objects, after casting them to 'int'. 16 | * @param a the first symbol. 17 | * @param b the second symbol. 18 | * @return the difference 'a - b' as an integer. 19 | */ 20 | template 21 | int operator ()(const A& a, const B& b) const { 22 | return static_cast(a) - static_cast(b); 23 | } 24 | }; 25 | 26 | 27 | } //namespace parserlib 28 | 29 | 30 | #endif //PARSERLIB_DEFAULT_SYMBOL_COMPARATOR_HPP 31 | -------------------------------------------------------------------------------- /include/parserlib/any_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_ANY_PARSE_NODE_HPP 2 | #define PARSERLIB_ANY_PARSE_NODE_HPP 3 | 4 | 5 | #include "parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that parses any symbol. 13 | */ 14 | class any_parse_node : public parse_node { 15 | public: 16 | /** 17 | * Parses the next available symbol. 18 | * @param pc the parse context to use. 19 | * @return true if the end has not been reached, false otherwise. 20 | */ 21 | template 22 | bool parse(ParseContext& pc) const { 23 | return pc.parse_any_symbol(); 24 | } 25 | }; 26 | 27 | 28 | /** 29 | * Creates an any parse node instance. 30 | * @return an any parse node instance. 31 | */ 32 | inline any_parse_node any() { 33 | return any_parse_node(); 34 | } 35 | 36 | 37 | } //namespace parserlib 38 | 39 | 40 | #endif //PARSERLIB_ANY_PARSE_NODE_HPP 41 | -------------------------------------------------------------------------------- /include/parserlib/end_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_END_PARSE_NODE_HPP 2 | #define PARSERLIB_END_PARSE_NODE_HPP 3 | 4 | 5 | #include "parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that checks if the end of input has been reached. 13 | */ 14 | class end_parse_node : public parse_node { 15 | public: 16 | /** 17 | * Returns true if the end has been reached. 18 | * @param pc the parse context to use. 19 | * @return true if the end has been reached, false otherwise. 20 | */ 21 | template 22 | bool parse(ParseContext& pc) const { 23 | return !pc.is_valid_parse_position(); 24 | } 25 | }; 26 | 27 | 28 | /** 29 | * Creates an end parse node instance. 30 | * @return an end parse node instance. 31 | */ 32 | inline end_parse_node end() { 33 | return end_parse_node(); 34 | } 35 | 36 | 37 | } //namespace parserlib 38 | 39 | 40 | #endif //PARSERLIB_END_PARSE_NODE_HPP 41 | -------------------------------------------------------------------------------- /include/parserlib/case_sensitive_symbol_comparator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_CASE_SENSITIVE_SYMBOL_COMPARATOR_HPP 2 | #define PARSERLIB_CASE_SENSITIVE_SYMBOL_COMPARATOR_HPP 3 | 4 | 5 | #include 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A case sensitive symbol comparator. 13 | * It compares values by converting them to lowercase, using the function 'std::tolower'. 14 | */ 15 | class case_sensitive_symbol_comparator { 16 | public: 17 | /** 18 | * Returns the difference of the two objects, after casting them to 'int' and lowering their case using 'std::tolower'. 19 | * @param a the first symbol. 20 | * @param b the second symbol. 21 | * @return the difference 'a - b' as an integer. 22 | */ 23 | template 24 | int operator ()(const A& a, const B& b) const { 25 | return std::tolower(static_cast(a)) - std::tolower(static_cast(b)); 26 | } 27 | }; 28 | 29 | 30 | } //namespace parserlib 31 | 32 | 33 | #endif //PARSERLIB_CASE_SENSITIVE_SYMBOL_COMPARATOR_HPP 34 | -------------------------------------------------------------------------------- /include/parserlib/parent_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARENT_PARSE_NODE_HPP 2 | #define PARSERLIB_PARENT_PARSE_NODE_HPP 3 | 4 | 5 | #include "parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * Base class for parse nodes with children. 13 | * @param Impl type of class that is derived from this class and represents the implementation type. 14 | * @param Children type of children. 15 | */ 16 | template 17 | class parent_parse_node : public parse_node { 18 | public: 19 | /** 20 | * The constructor. 21 | * @param children the children. 22 | */ 23 | parent_parse_node(const Children& children) 24 | : m_children(children) 25 | { 26 | } 27 | 28 | /** 29 | * Returns the children. 30 | * @return the children. 31 | */ 32 | const Children& get_children() const { 33 | return m_children; 34 | } 35 | 36 | private: 37 | Children m_children; 38 | }; 39 | 40 | 41 | } //namespace parserlib 42 | 43 | 44 | #endif //PARSERLIB_PARENT_PARSE_NODE_HPP 45 | -------------------------------------------------------------------------------- /include/parserlib/left_recursion_exception.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_LEFT_RECURSION_EXCEPTION_HPP 2 | #define PARSERLIB_LEFT_RECURSION_EXCEPTION_HPP 3 | 4 | 5 | #include "parse_node_id_type.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * Exception thrown when there is a left recursion. 13 | */ 14 | class left_recursion_exception { 15 | public: 16 | /** 17 | * The constructor. 18 | * @param parse_node_id id of the parse node that caused the left recursion. 19 | */ 20 | left_recursion_exception(parse_node_id_type parse_node_id) 21 | : m_parse_node_id(parse_node_id) 22 | { 23 | } 24 | 25 | /** 26 | * Returns the id of the parse node that caused the left recursion. 27 | * @return the id of the parse node that caused the left recursion. 28 | */ 29 | parse_node_id_type get_parse_node_id() const { 30 | return m_parse_node_id; 31 | } 32 | 33 | private: 34 | const parse_node_id_type m_parse_node_id; 35 | }; 36 | 37 | 38 | } //namespace parserlib 39 | 40 | 41 | #endif //PARSERLIB_LEFT_RECURSION_EXCEPTION_HPP 42 | -------------------------------------------------------------------------------- /include/parserlib/symbol_range_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_SYMBOL_RANGE_PARSE_NODE_HPP 2 | #define PARSERLIB_SYMBOL_RANGE_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include "parse_node.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A parse node that parses a symbol within a range. 14 | */ 15 | template 16 | class symbol_range_parse_node : public parse_node> { 17 | public: 18 | symbol_range_parse_node(const Symbol& min, const Symbol& max) 19 | : m_min(min) 20 | , m_max(max) 21 | { 22 | assert(min <= max); 23 | } 24 | 25 | template 26 | bool parse(ParseContext& pc) const { 27 | return pc.parse_symbol_range(static_cast(m_min), static_cast(m_max)); 28 | } 29 | 30 | private: 31 | Symbol m_min; 32 | Symbol m_max; 33 | }; 34 | 35 | 36 | template 37 | symbol_range_parse_node range(const Symbol& min, const Symbol& max) { 38 | return { min, max }; 39 | } 40 | 41 | 42 | } //namespace parserlib 43 | 44 | 45 | #endif //PARSERLIB_SYMBOL_RANGE_PARSE_NODE_HPP 46 | -------------------------------------------------------------------------------- /include/parserlib.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_HPP 2 | #define PARSERLIB_HPP 3 | 4 | 5 | #include "parserlib/default_source_position.hpp" 6 | #include "parserlib/text_source_position.hpp" 7 | #include "parserlib/default_symbol_comparator.hpp" 8 | #include "parserlib/case_sensitive_symbol_comparator.hpp" 9 | #include "parserlib/container_parse_context.hpp" 10 | #include "parserlib/istream_parse_context.hpp" 11 | #include "parserlib/symbol_parse_node.hpp" 12 | #include "parserlib/string_parse_node.hpp" 13 | #include "parserlib/symbol_set_parse_node.hpp" 14 | #include "parserlib/symbol_range_parse_node.hpp" 15 | #include "parserlib/any_parse_node.hpp" 16 | #include "parserlib/end_parse_node.hpp" 17 | #include "parserlib/bool_parse_node.hpp" 18 | #include "parserlib/debug_parse_node.hpp" 19 | #include "parserlib/newline_parse_node.hpp" 20 | #include "parserlib/error_parse_node.hpp" 21 | #include "parserlib/loop_parse_node.hpp" 22 | #include "parserlib/optional_parse_node.hpp" 23 | #include "parserlib/logical_and_parse_node.hpp" 24 | #include "parserlib/logical_not_parse_node.hpp" 25 | #include "parserlib/sequence_parse_node.hpp" 26 | #include "parserlib/choice_parse_node.hpp" 27 | #include "parserlib/match_parse_node.hpp" 28 | #include "parserlib/parse_node_ptr.hpp" 29 | #include "parserlib/rule.hpp" 30 | 31 | 32 | #endif // PARSERLIB_HPP 33 | -------------------------------------------------------------------------------- /include/parserlib/bool_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_BOOL_PARSE_NODE_HPP 2 | #define PARSERLIB_BOOL_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include "parse_node.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A parse node that returns a boolean value. 14 | */ 15 | class bool_parse_node : public parse_node { 16 | public: 17 | /** 18 | * The constructor. 19 | * @param value the boolean value. 20 | */ 21 | bool_parse_node(bool value) 22 | : m_value(value) 23 | { 24 | } 25 | 26 | /** 27 | * Returns the boolean value. 28 | * @param pc the parse context to use. 29 | * @return the boolean value. 30 | */ 31 | template 32 | bool parse(ParseContext& pc) const { 33 | return m_value; 34 | } 35 | 36 | private: 37 | bool m_value; 38 | }; 39 | 40 | 41 | /** 42 | * Allows the creation of a boolean parse node. 43 | * It is declared as a templated function in order to avoid accidental conversions. 44 | * @param value the boolean value to create a parse node for. 45 | * @return a boolean parse node. 46 | */ 47 | template , bool> = true> 48 | bool_parse_node make_parse_node(T value) { 49 | return value; 50 | } 51 | 52 | 53 | } //namespace parserlib 54 | 55 | 56 | #endif //PARSERLIB_BOOL_PARSE_NODE_HPP 57 | -------------------------------------------------------------------------------- /include/parserlib/loop_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_LOOP_PARSE_NODE_HPP 2 | #define PARSERLIB_LOOP_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that makes a loop out of another parse node. 13 | * @param Child Type of the parse node to create a loop for. 14 | */ 15 | template 16 | class loop_parse_node : public parent_parse_node, Child> { 17 | public: 18 | /** The parent parse node type. */ 19 | using parent_type = parent_parse_node, Child>; 20 | 21 | /** 22 | * The constructor. 23 | * @param child the child. 24 | */ 25 | loop_parse_node(const Child& child) 26 | : parent_type(child) 27 | { 28 | } 29 | 30 | /** 31 | * Continues parsing while the child returns 'true' from its parse function. 32 | * @param pc the context to pass to the child. 33 | * @return true. 34 | */ 35 | template 36 | bool parse(ParseContext& pc) const { 37 | while (parent_type::get_children().parse(pc)) { 38 | } 39 | return true; 40 | } 41 | }; 42 | 43 | 44 | template 45 | loop_parse_node parse_node::operator *() const { 46 | return *get_impl(); 47 | } 48 | 49 | 50 | } //namespace parserlib 51 | 52 | 53 | #endif //PARSERLIB_LOOP_PARSE_NODE_HPP 54 | -------------------------------------------------------------------------------- /include/parserlib/optional_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_OPTIONAL_PARSE_NODE_HPP 2 | #define PARSERLIB_OPTIONAL_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that makes another parse node optional. 13 | * @param Child Type of the parse node to make optional. 14 | */ 15 | template 16 | class optional_parse_node : public parent_parse_node, Child> { 17 | public: 18 | /** The parent parse node type. */ 19 | using parent_type = parent_parse_node, Child>; 20 | 21 | /** 22 | * The constructor. 23 | * @param child the child. 24 | */ 25 | optional_parse_node(const Child& child) 26 | : parent_type(child) 27 | { 28 | } 29 | 30 | /** 31 | * Continues parsing while the child returns 'true' from its parse function. 32 | * @param pc the context to pass to the child. 33 | * @return true. 34 | */ 35 | template 36 | bool parse(ParseContext& pc) const { 37 | parent_type::get_children().parse(pc); 38 | return true; 39 | } 40 | }; 41 | 42 | 43 | template 44 | optional_parse_node parse_node::operator -() const { 45 | return *get_impl(); 46 | } 47 | 48 | 49 | } //namespace parserlib 50 | 51 | 52 | #endif //PARSERLIB_OPTIONAL_PARSE_NODE_HPP 53 | -------------------------------------------------------------------------------- /include/parserlib/parse_error.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_ERROR_HPP 2 | #define PARSERLIB_PARSE_ERROR_HPP 3 | 4 | 5 | #include "source_range.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A class that represents a source range that is a parse error. 13 | * @param Id id type of the error. 14 | * @param Iterator type of iterator for the parse position. 15 | * @param SourcePosition type of source position for the parse position. 16 | */ 17 | template 18 | class parse_error : public source_range { 19 | public: 20 | /** The source range type. */ 21 | using source_range_type = source_range; 22 | 23 | using typename source_range_type::id_type; 24 | using typename source_range_type::parse_position_type; 25 | 26 | /** 27 | * The default constructor. 28 | */ 29 | parse_error() { 30 | } 31 | 32 | /** 33 | * Constructor from arguments. 34 | * @param id id of the range. 35 | * @param begin_parse_position the position where the range starts from. 36 | * @param end_parse_position the position where the range ends at. 37 | */ 38 | parse_error(const id_type& id, const parse_position_type& begin_parse_position, const parse_position_type& end_parse_position) 39 | : source_range_type(id, begin_parse_position, end_parse_position) 40 | { 41 | } 42 | }; 43 | 44 | 45 | } //namespace parserlib 46 | 47 | 48 | #endif //PARSERLIB_PARSE_ERROR_HPP 49 | -------------------------------------------------------------------------------- /include/parserlib/text_source_position.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_TEXT_SOURCE_POSITION_HPP 2 | #define PARSERLIB_TEXT_SOURCE_POSITION_HPP 3 | 4 | 5 | #include 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A text source position. 13 | * It maintains a line and a column. 14 | * Both start counting from 1. 15 | */ 16 | class text_source_position { 17 | public: 18 | /** 19 | * Returns the current line. 20 | * @return the current line. 21 | */ 22 | std::size_t get_line() const { 23 | return m_line; 24 | } 25 | 26 | /** 27 | * Returns the current column. 28 | * @return the current column. 29 | */ 30 | std::size_t get_column() const { 31 | return m_line; 32 | } 33 | 34 | /** 35 | * Increments the current column. 36 | */ 37 | void increment() { 38 | ++m_column; 39 | } 40 | 41 | /** 42 | * Increments the current column by the given count. 43 | * @param count number of columns to add. 44 | */ 45 | void increment(std::size_t count) { 46 | m_column += count; 47 | } 48 | 49 | /** 50 | * Increments the line and sets the column to 1. 51 | */ 52 | void increment_line() { 53 | ++m_line; 54 | m_column = 1; 55 | } 56 | 57 | private: 58 | std::size_t m_line{ 1 }; 59 | std::size_t m_column{ 1 }; 60 | }; 61 | 62 | 63 | } //namespace parserlib 64 | 65 | 66 | #endif //PARSERLIB_TEXT_SOURCE_POSITION_HPP 67 | -------------------------------------------------------------------------------- /include/parserlib/debug_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_DEBUG_PARSE_NODE_HPP 2 | #define PARSERLIB_DEBUG_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that can be used to debug another parse node. 13 | * @param Child Type of the parse node to debug. 14 | */ 15 | template 16 | class debug_parse_node : public parent_parse_node, Child> { 17 | public: 18 | /** The parent parse node type. */ 19 | using parent_type = parent_parse_node, Child>; 20 | 21 | /** 22 | * The constructor. 23 | * @param child the child. 24 | */ 25 | debug_parse_node(const Child& child) 26 | : parent_type(child) 27 | { 28 | } 29 | 30 | /** 31 | * It invokes the child node to parse. 32 | * A breakpoint can be put here for debugging. 33 | * @param pc the context to pass to the child. 34 | * @return the result of the child parse node. 35 | */ 36 | template 37 | bool parse(ParseContext& pc) const { 38 | const bool result = parent_type::get_children().parse(pc); 39 | return result; 40 | } 41 | }; 42 | 43 | 44 | /** 45 | * Creates a debug parse node for another parse node. 46 | * @param child the child parse node to debug. 47 | * @return a debug parse node. 48 | */ 49 | template 50 | debug_parse_node debug(const parse_node& child) { 51 | return *child.get_impl(); 52 | } 53 | 54 | 55 | } //namespace parserlib 56 | 57 | 58 | #endif //PARSERLIB_DEBUG_PARSE_NODE_HPP 59 | -------------------------------------------------------------------------------- /include/parserlib/logical_and_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_LOGICAL_AND_PARSE_NODE_HPP 2 | #define PARSERLIB_LOGICAL_AND_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | #include "parse_with_parse_state.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A parse node that acts as a logical AND predicate, using another node. 14 | * @param Child Type of the parse node to make a logical AND parse node out ot. 15 | */ 16 | template 17 | class logical_and_parse_node : public parent_parse_node, Child> { 18 | public: 19 | /** The parent parse node type. */ 20 | using parent_type = parent_parse_node, Child>; 21 | 22 | /** 23 | * The constructor. 24 | * @param child the child. 25 | */ 26 | logical_and_parse_node(const Child& child) 27 | : parent_type(child) 28 | { 29 | } 30 | 31 | /** 32 | * Parses the node; in the end, it restores the parse state to the one 33 | * before the function is entered. 34 | * @param pc the context to pass to the child. 35 | * @return true if the parsing succeeded, false otherwise. 36 | */ 37 | template 38 | bool parse(ParseContext& pc) const { 39 | return parse_without_parse_state(pc, [&](ParseContext& pc) { 40 | return parent_type::get_children().parse(pc); 41 | }); 42 | } 43 | }; 44 | 45 | 46 | template 47 | logical_and_parse_node parse_node::operator &() const { 48 | return *get_impl(); 49 | } 50 | 51 | 52 | } //namespace parserlib 53 | 54 | 55 | #endif //PARSERLIB_LOGICAL_AND_PARSE_NODE_HPP 56 | -------------------------------------------------------------------------------- /include/parserlib/logical_not_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_LOGICAL_NOT_PARSE_NODE_HPP 2 | #define PARSERLIB_LOGICAL_NOT_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | #include "parse_with_parse_state.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A parse node that acts as a logical NOT predicate, using another node. 14 | * @param Child Type of the parse node to make a logical NOT parse node out ot. 15 | */ 16 | template 17 | class logical_not_parse_node : public parent_parse_node, Child> { 18 | public: 19 | /** The parent parse node type. */ 20 | using parent_type = parent_parse_node, Child>; 21 | 22 | /** 23 | * The constructor. 24 | * @param child the child. 25 | */ 26 | logical_not_parse_node(const Child& child) 27 | : parent_type(child) 28 | { 29 | } 30 | 31 | /** 32 | * Parses the node; in the end, it restores the parse state to the one 33 | * before the function is entered. 34 | * @param pc the context to pass to the child. 35 | * @return the opposite of what the child parse node returned. 36 | */ 37 | template 38 | bool parse(ParseContext& pc) const { 39 | return parse_without_parse_state(pc, [&](ParseContext& pc) { 40 | return !parent_type::get_children().parse(pc); 41 | }); 42 | } 43 | }; 44 | 45 | 46 | template 47 | logical_not_parse_node parse_node::operator !() const { 48 | return *get_impl(); 49 | } 50 | 51 | 52 | } //namespace parserlib 53 | 54 | 55 | #endif //PARSERLIB_LOGICAL_NOT_PARSE_NODE_HPP 56 | -------------------------------------------------------------------------------- /include/parserlib/parse_node_wrapper.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_NODE_WRAPPER_HPP 2 | #define PARSERLIB_PARSE_NODE_WRAPPER_HPP 3 | 4 | 5 | namespace parserlib { 6 | 7 | 8 | /** 9 | * Base class for parse node wrapper implementations. 10 | * @param ParseContext the parse context type to use for parsing. 11 | */ 12 | template 13 | class parse_node_wrapper { 14 | public: 15 | /** 16 | * The destructor. 17 | * Virtual due to polymorphism. 18 | */ 19 | virtual ~parse_node_wrapper() { 20 | } 21 | 22 | /** 23 | * Interface for parsing. 24 | * @param pc the parse context to use. 25 | * @return true on success, false on failure. 26 | */ 27 | virtual bool parse(ParseContext& pc) const = 0; 28 | }; 29 | 30 | 31 | /** 32 | * Implementation for parse node wrapping. 33 | * @param ParseContext the parse context type to use for parsing. 34 | * @param ParseNode the parse node type to wrap. 35 | */ 36 | template 37 | class parse_node_wrapper_impl : public parse_node_wrapper { 38 | public: 39 | /** 40 | * The constructor. 41 | * @param parse_node the parse node to parse. 42 | */ 43 | parse_node_wrapper_impl(const ParseNode& parse_node) 44 | : m_impl(parse_node) 45 | { 46 | } 47 | 48 | /** 49 | * Invokes the parse function of the wrapped parse node. 50 | * @param pc the parse context to use. 51 | * @return true on success, false on failure. 52 | */ 53 | bool parse(ParseContext& pc) const final { 54 | return m_impl.parse(pc); 55 | } 56 | 57 | private: 58 | ParseNode m_impl; 59 | }; 60 | 61 | 62 | } //namespace parserlib 63 | 64 | 65 | #endif //PARSERLIB_PARSE_NODE_WRAPPER_HPP 66 | -------------------------------------------------------------------------------- /include/parserlib/tuple_for_each.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_TUPLE_FOR_EACH_HPP 2 | #define PARSERLIB_TUPLE_FOR_EACH_HPP 3 | 4 | 5 | #include 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | template class parse_node; 12 | 13 | 14 | /** 15 | * Iterates the given tuple and calls the given function for each member. 16 | * @param Index index to start the iteration from. 17 | * @param tpl the tuple. 18 | * @param fn the function. 19 | */ 20 | template 21 | void tuple_for_each(const Tpl& tpl, const F& fn) { 22 | if constexpr (Index < std::tuple_size_v) { 23 | fn(std::get(tpl)); 24 | tuple_for_each(tpl, fn); 25 | } 26 | } 27 | 28 | 29 | /** 30 | * Iterates the given tuple and calls the given function for each member. 31 | * If the function returns the same value as the result, then 32 | * the function is invoked for the next tuple members; 33 | * otherwise, the opposite of the given result is returned. 34 | * If all members of the tuple are processed, 35 | * then the given result is returned. 36 | * @param Index index to start the iteration from. 37 | * @param Result the result to return. 38 | * @param tpl the tuple. 39 | * @param fn the function. 40 | */ 41 | template 42 | bool tuple_for_each_cond(const Tpl& tpl, const F& fn) { 43 | if constexpr (Index < std::tuple_size_v) { 44 | if (fn(std::get(tpl)) == Result) { 45 | return tuple_for_each_cond(tpl, fn); 46 | } 47 | else { 48 | return !Result; 49 | } 50 | } 51 | else { 52 | return Result; 53 | } 54 | } 55 | 56 | 57 | } //namespace parserlib 58 | 59 | 60 | #endif //PARSERLIB_TUPLE_FOR_EACH_HPP 61 | -------------------------------------------------------------------------------- /include/parserlib/newline_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_NEWLINE_PARSE_NODE_HPP 2 | #define PARSERLIB_NEWLINE_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that can be used to increment a parse context's current line, 13 | * when another parser parses successfully. 14 | * @param Child Type of the parse node to newline. 15 | */ 16 | template 17 | class newline_parse_node : public parent_parse_node, Child> { 18 | public: 19 | /** The parent parse node type. */ 20 | using parent_type = parent_parse_node, Child>; 21 | 22 | /** 23 | * The constructor. 24 | * @param child the child. 25 | */ 26 | newline_parse_node(const Child& child) 27 | : parent_type(child) 28 | { 29 | } 30 | 31 | /** 32 | * It invokes the child node to parse. 33 | * If the child node parses successfully, then the parse position line is incremented. 34 | * @param pc the context to pass to the child. 35 | * @return the result of the child parse node. 36 | */ 37 | template 38 | bool parse(ParseContext& pc) const { 39 | if (parent_type::get_children().parse(pc)) { 40 | pc.increment_parse_position_line(); 41 | return true; 42 | } 43 | return false; 44 | } 45 | }; 46 | 47 | 48 | /** 49 | * Creates a newline parse node for another parse node. 50 | * @param value value or parse node to create a newline parser for. 51 | * @return a newline parse node. 52 | */ 53 | template 54 | auto newline(const T& value) { 55 | return newline_parse_node(make_parse_node(value)); 56 | } 57 | 58 | 59 | } //namespace parserlib 60 | 61 | 62 | #endif //PARSERLIB_NEWLINE_PARSE_NODE_HPP 63 | -------------------------------------------------------------------------------- /include/parserlib/symbol_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_SYMBOL_PARSE_NODE_HPP 2 | #define PARSERLIB_SYMBOL_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include "parse_node.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A parse node class that parses a single symbol. 14 | * @param Symbol Type of symbol to parse; must be convertible to 'int'. 15 | */ 16 | template 17 | class symbol_parse_node : public parse_node> { 18 | public: 19 | /** 20 | * The constructor. 21 | * @param symbol the symbol to parse. 22 | */ 23 | symbol_parse_node(const Symbol& symbol) 24 | : m_symbol(symbol) 25 | { 26 | } 27 | 28 | /** 29 | * Parses a symbol. 30 | * @param pc the context to use for parsing. 31 | * @return true on success, false on failure. 32 | */ 33 | template 34 | bool parse(ParseContext& pc) const { 35 | return pc.parse_symbol(static_cast(m_symbol)); 36 | } 37 | 38 | private: 39 | Symbol m_symbol; 40 | }; 41 | 42 | 43 | /** 44 | * Creates a symbol parse node. 45 | * @param symbol the symbol to parse. 46 | * @return a symbol parse node. 47 | */ 48 | template 49 | symbol_parse_node terminal(const Symbol& symbol) { 50 | return symbol; 51 | } 52 | 53 | 54 | /** 55 | * Creates a symbol parse node. 56 | * @param symbol the symbol to parse; must not be a boolean. 57 | * @return a symbol parse node. 58 | */ 59 | template && !std::is_base_of_v, bool> = true> 60 | symbol_parse_node make_parse_node(const Symbol& symbol) { 61 | return symbol; 62 | } 63 | 64 | 65 | } //namespace parserlib 66 | 67 | 68 | #endif //PARSERLIB_SYMBOL_PARSE_NODE_HPP 69 | -------------------------------------------------------------------------------- /include/parserlib/container_parse_context.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CONTAINER_PARSE_CONTEXT_HPP 2 | #define CONTAINER_PARSE_CONTEXT_HPP 3 | 4 | 5 | #include "source_range_parse_context.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse context class used for parsing source in a container. 13 | * @param Source container type. 14 | * @param MatchId match id type. 15 | * @param ErrorId error id type. 16 | * @param SourcePosition source position type. 17 | * @param SymbolComparator symbol comparator type. 18 | */ 19 | template < 20 | class Source = std::string, 21 | class MatchId = int, 22 | class ErrorId = int, 23 | class SourcePosition = default_source_position, 24 | class SymbolComparator = default_symbol_comparator 25 | > 26 | class container_parse_context 27 | : public source_range_parse_context 28 | { 29 | public: 30 | /** Iterator type. */ 31 | using iterator_type = typename Source::const_iterator; 32 | 33 | /** Base class type. */ 34 | using range_parse_context_type = source_range_parse_context; 35 | 36 | /** 37 | * Constructor from iterator range. 38 | * @param begin start of range to parse. 39 | * @param end end of range to parse. 40 | */ 41 | container_parse_context(const iterator_type& begin, const iterator_type& end) 42 | : range_parse_context_type(begin, end) 43 | { 44 | } 45 | 46 | /** 47 | * Constructor from container. 48 | * @param source the container to parse. 49 | */ 50 | container_parse_context(Source& source) 51 | : container_parse_context(source.begin(), source.end()) 52 | { 53 | } 54 | }; 55 | 56 | 57 | } //namespace parserlib 58 | 59 | 60 | #endif //CONTAINER_PARSE_CONTEXT_HPP 61 | -------------------------------------------------------------------------------- /include/parserlib/istream_parse_context.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_ISTREAM_PARSE_CONTEXT_HPP 2 | #define PARSERLIB_ISTREAM_PARSE_CONTEXT_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include "source_range_parse_context.hpp" 8 | 9 | 10 | namespace parserlib { 11 | 12 | 13 | /** 14 | * A parse context class used for parsing source in an input stream. 15 | * @param Source stream type. 16 | * @param MatchId match id type. 17 | * @param ErrorId error id type. 18 | * @param SourcePosition source position type. 19 | * @param SymbolComparator symbol comparator type. 20 | */ 21 | template < 22 | class Source = std::stringstream, 23 | class MatchId = int, 24 | class ErrorId = int, 25 | class SourcePosition = default_source_position, 26 | class SymbolComparator = default_symbol_comparator 27 | > 28 | class istream_parse_context 29 | : public source_range_parse_context, MatchId, ErrorId, SourcePosition, SymbolComparator> 30 | { 31 | public: 32 | /** Iterator type. */ 33 | using iterator_type = std::istreambuf_iterator; 34 | 35 | /** Base class type. */ 36 | using range_parse_context_type = source_range_parse_context; 37 | 38 | /** 39 | * Constructor from iterator range. 40 | * @param begin start of range to parse. 41 | * @param end end of range to parse. 42 | */ 43 | istream_parse_context(const iterator_type& begin, const iterator_type& end = iterator_type()) 44 | : range_parse_context_type(begin, end) 45 | { 46 | } 47 | 48 | /** 49 | * Constructor from container. 50 | * @param source the container to parse. 51 | */ 52 | istream_parse_context(Source& source) 53 | : istream_parse_context(iterator_type(source)) 54 | { 55 | } 56 | }; 57 | 58 | 59 | } //namespace parserlib 60 | 61 | 62 | #endif //PARSERLIB_ISTREAM_PARSE_CONTEXT_HPP 63 | -------------------------------------------------------------------------------- /include/parserlib/match.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_MATCH_HPP 2 | #define PARSERLIB_MATCH_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include "source_range.hpp" 8 | 9 | 10 | namespace parserlib { 11 | 12 | 13 | /** 14 | * A class that represents a source range that is a match. 15 | * @param Id id type of the match. 16 | * @param Iterator type of iterator for the parse position. 17 | * @param SourcePosition type of source position for the parse position. 18 | */ 19 | template 20 | class match : public source_range { 21 | public: 22 | /** The source range type. */ 23 | using source_range_type = source_range; 24 | 25 | using typename source_range_type::id_type; 26 | using typename source_range_type::parse_position_type; 27 | 28 | /** The match type. */ 29 | using match_type = match; 30 | 31 | /** The match container type. */ 32 | using match_container_type = std::vector; 33 | 34 | /** 35 | * The default constructor. 36 | */ 37 | match() { 38 | } 39 | 40 | /** 41 | * Constructor from arguments. 42 | * @param id id of the range. 43 | * @param begin_parse_position the position where the range starts from. 44 | * @param end_parse_position the position where the range ends at. 45 | * @param children the children matches. 46 | */ 47 | match(const id_type& id, const parse_position_type& begin_parse_position, const parse_position_type& end_parse_position, match_container_type&& children) 48 | : source_range_type(id, begin_parse_position, end_parse_position) 49 | , m_children(std::move(children)) 50 | { 51 | } 52 | 53 | /** 54 | * Returns the children matches. 55 | * @return the children matches. 56 | */ 57 | const match_container_type& get_children() const { 58 | return m_children; 59 | } 60 | 61 | private: 62 | match_container_type m_children; 63 | }; 64 | 65 | 66 | } //namespace parserlib 67 | 68 | 69 | #endif //PARSERLIB_MATCH_HPP 70 | -------------------------------------------------------------------------------- /include/parserlib/choice_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_CHOICE_PARSE_NODE_HPP 2 | #define PARSERLIB_CHOICE_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include "tuple_for_each.hpp" 7 | #include "loop_parse_node.hpp" 8 | #include "parse_with_parse_state.hpp" 9 | 10 | 11 | namespace parserlib { 12 | 13 | 14 | struct choice_parse_node_tag {}; 15 | 16 | 17 | template 18 | class choice_parse_node 19 | : public parent_parse_node, std::tuple> 20 | , public choice_parse_node_tag 21 | { 22 | public: 23 | using tuple_type = std::tuple; 24 | 25 | using parent_type = parent_parse_node, tuple_type>; 26 | 27 | choice_parse_node(const tuple_type& children) 28 | : parent_type(children) 29 | { 30 | } 31 | 32 | template 33 | bool parse(ParseContext& pc) const { 34 | return parse_with_parse_state(pc, [&](ParseContext& pc) { 35 | return tuple_for_each_cond<0, false>(parent_type::get_children(), [&](const auto& child) { 36 | return child.parse(pc); 37 | }); 38 | }); 39 | } 40 | }; 41 | 42 | 43 | template || std::is_base_of_v, bool> = true> 44 | auto operator | (const L& left, const R& right) { 45 | if constexpr (std::is_base_of_v && std::is_base_of_v) { 46 | return choice_parse_node(std::tuple_cat(left.get_children(), right.get_children())); 47 | } 48 | else if constexpr (std::is_base_of_v) { 49 | return choice_parse_node(std::tuple_cat(left.get_children(), std::make_tuple(make_parse_node(right)))); 50 | } 51 | else if constexpr (std::is_base_of_v) { 52 | return choice_parse_node(std::tuple_cat(std::make_tuple(make_parse_node(left)), right.get_children())); 53 | } 54 | else { 55 | return choice_parse_node(std::make_tuple(make_parse_node(left), make_parse_node(right))); 56 | } 57 | } 58 | 59 | 60 | } //namespace parserlib 61 | 62 | 63 | #endif //PARSERLIB_CHOICE_PARSE_NODE_HPP 64 | -------------------------------------------------------------------------------- /include/parserlib/parse_with_parse_state.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_WITH_PARSE_STATE_HPP 2 | #define PARSERLIB_PARSE_WITH_PARSE_STATE_HPP 3 | 4 | 5 | namespace parserlib { 6 | 7 | 8 | /** 9 | * Helper function used for invoking a parse function 10 | * by saving the parse state of the given parse context, 11 | * invoking the function, 12 | * and then restoring the parse state of given parse context, 13 | * either accepting the new parse state, if the parse was successful, 14 | * or rejecting the new parse state, if the parse was unsucessful. 15 | * It also captures any exception thrown during parsing 16 | * and restores the parse state to its previous state. 17 | * @param pc the parse context to use. 18 | * @param fn the function to invoke. 19 | * @return true on success, false on failure. 20 | */ 21 | template 22 | bool parse_with_parse_state(ParseContext& pc, const F& fn) { 23 | pc.save_parse_state(); 24 | try { 25 | if (fn(pc)) { 26 | pc.accept_parse_state(); 27 | return true; 28 | } 29 | pc.reject_parse_state(); 30 | return false; 31 | } 32 | catch (...) { 33 | pc.reject_parse_state(); 34 | throw; 35 | } 36 | } 37 | 38 | 39 | /** 40 | * Helper function used for invoking a parse function 41 | * by saving the parse state of the given parse context, 42 | * invoking the function, 43 | * and then restoring the parse state of given parse context. 44 | * It also captures any exception thrown during parsing 45 | * and restores the parse state to its previous state. 46 | * @param pc the parse context to use. 47 | * @param fn the function to invoke. 48 | * @return true on success, false on failure. 49 | */ 50 | template 51 | bool parse_without_parse_state(ParseContext& pc, const F& fn) { 52 | pc.save_parse_state(); 53 | try { 54 | const bool result = fn(pc); 55 | pc.reject_parse_state(); 56 | return result; 57 | } 58 | catch (...) { 59 | pc.reject_parse_state(); 60 | throw; 61 | } 62 | } 63 | 64 | 65 | } //namespace parserlib 66 | 67 | 68 | #endif //PARSERLIB_PARSE_WITH_PARSE_STATE_HPP 69 | -------------------------------------------------------------------------------- /include/parserlib/symbol_set_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_SYMBOL_SET_PARSE_NODE_HPP 2 | #define PARSERLIB_SYMBOL_SET_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include "parse_node.hpp" 8 | 9 | 10 | namespace parserlib { 11 | 12 | 13 | /** 14 | * A parse node class that parses a symbol out of a set. 15 | * @param Symbol type of symbol; it must be convertible to 'int'. 16 | */ 17 | template 18 | class symbol_set_parse_node : public parse_node> { 19 | public: 20 | /** 21 | * The constructor. 22 | * @param set the set to parse. 23 | */ 24 | symbol_set_parse_node(const std::vector& set) 25 | : m_set(set) 26 | , m_symbol_set(get_symbol_set(set)) 27 | { 28 | } 29 | 30 | /** 31 | * Parses a set. 32 | * @param pc the context to use for parsing. 33 | * @return true on success, false on failure. 34 | */ 35 | template 36 | bool parse(ParseContext& pc) const { 37 | return pc.parse_symbol_set(m_symbol_set); 38 | } 39 | 40 | private: 41 | std::vector m_set; 42 | std::vector m_symbol_set; 43 | 44 | static std::vector get_symbol_set(const std::vector& set) { 45 | std::vector result; 46 | for (const Symbol& symbol : set) { 47 | result.push_back(static_cast(symbol)); 48 | } 49 | return result; 50 | } 51 | }; 52 | 53 | 54 | /** 55 | * Creates a parse node for a set. 56 | * @param set set to create a set parse node of. 57 | * @return a set parse node. 58 | */ 59 | template 60 | symbol_set_parse_node set(const Symbol* set) { 61 | std::basic_string_view str_view(set); 62 | return std::vector(str_view.begin(), str_view.end()); 63 | } 64 | 65 | 66 | /** 67 | * Creates a parse node for a set. 68 | * @param set set to create a set parse node of. 69 | * @return a set parse node. 70 | */ 71 | template 72 | symbol_set_parse_node set(const std::vector& set) { 73 | return set; 74 | } 75 | 76 | 77 | } //namespace parserlib 78 | 79 | 80 | #endif //PARSERLIB_SYMBOL_SET_PARSE_NODE_HPP 81 | -------------------------------------------------------------------------------- /include/parserlib/string_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_STRING_PARSE_NODE_HPP 2 | #define PARSERLIB_STRING_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include "parse_node.hpp" 8 | 9 | 10 | namespace parserlib { 11 | 12 | 13 | /** 14 | * A parse node class that parses a string. 15 | * @param Char type of character; it must be convertible to 'int'. 16 | */ 17 | template 18 | class string_parse_node : public parse_node> { 19 | public: 20 | /** 21 | * The constructor. 22 | * @param string the string to parse. 23 | */ 24 | string_parse_node(const std::basic_string_view& string) 25 | : m_string(string) 26 | , m_symbol_sequence(get_symbol_sequence(string)) 27 | { 28 | } 29 | 30 | /** 31 | * Parses a string. 32 | * @param pc the context to use for parsing. 33 | * @return true on success, false on failure. 34 | */ 35 | template 36 | bool parse(ParseContext& pc) const { 37 | return pc.parse_symbol_sequence(m_symbol_sequence); 38 | } 39 | 40 | private: 41 | std::basic_string_view m_string; 42 | std::vector m_symbol_sequence; 43 | 44 | static std::vector get_symbol_sequence(const std::basic_string_view& string) { 45 | std::vector result; 46 | for (const Char& ch : string) { 47 | result.push_back(static_cast(ch)); 48 | } 49 | return result; 50 | } 51 | }; 52 | 53 | 54 | /** 55 | * Creates a parse node for a string. 56 | * @param string string to create a string parse node of. 57 | * @return a string parse node. 58 | */ 59 | template 60 | string_parse_node terminal(const Char* string) { 61 | return std::basic_string_view(string); 62 | } 63 | 64 | 65 | /** 66 | * Creates a parse node for a string. 67 | * @param string string to create a string parse node of. 68 | * @return a string parse node. 69 | */ 70 | template 71 | string_parse_node make_parse_node(const Char* string) { 72 | return std::basic_string_view(string); 73 | } 74 | 75 | 76 | } //namespace parserlib 77 | 78 | 79 | #endif //PARSERLIB_STRING_PARSE_NODE_HPP 80 | -------------------------------------------------------------------------------- /include/parserlib/parse_node_ptr.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_NODE_PTR_HPP 2 | #define PARSERLIB_PARSE_NODE_PTR_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include "parse_node.hpp" 9 | #include "parse_context.hpp" 10 | #include "parse_node_wrapper.hpp" 11 | 12 | 13 | namespace parserlib { 14 | 15 | 16 | template 17 | class parse_node_ptr : public parse_node> { 18 | public: 19 | parse_node_ptr() { 20 | } 21 | 22 | parse_node_ptr(const parse_node_ptr& src) 23 | : m_parse_node(src.m_parse_node) 24 | { 25 | } 26 | 27 | parse_node_ptr(parse_node_ptr&& src) 28 | : m_parse_node(std::move(src.m_parse_node)) 29 | { 30 | } 31 | 32 | template 33 | parse_node_ptr(const T& value) 34 | : m_parse_node(make_wrapper(make_parse_node(value))) 35 | { 36 | } 37 | 38 | parse_node_ptr& operator = (const parse_node_ptr& src) { 39 | m_parse_node = src.m_parse_node; 40 | return *this; 41 | } 42 | 43 | parse_node_ptr& operator = (parse_node_ptr&& src) { 44 | m_parse_node = std::move(src.m_parse_node); 45 | return *this; 46 | } 47 | 48 | template 49 | parse_node_ptr& operator = (const T& value) { 50 | m_parse_node = make_wrapper(make_parse_node(value)); 51 | return *this; 52 | } 53 | 54 | operator bool() const { 55 | return m_parse_node; 56 | } 57 | 58 | parse_node_wrapper* get() const { 59 | return m_parse_node.get(); 60 | } 61 | 62 | parse_node_wrapper* operator ->() const { 63 | assert(get()); 64 | return get(); 65 | } 66 | 67 | bool parse(ParseContext& pc) const { 68 | return m_parse_node->parse(pc); 69 | } 70 | 71 | private: 72 | std::shared_ptr> m_parse_node; 73 | 74 | template 75 | std::shared_ptr> make_wrapper(const parse_node& parse_node) { 76 | return std::make_shared>(*parse_node.get_impl()); 77 | } 78 | }; 79 | 80 | 81 | template 82 | auto parse_node::operator ~() const { 83 | return parse_node_ptr<>(*get_impl()); 84 | } 85 | 86 | 87 | } //namespace parserlib 88 | 89 | 90 | #endif //PARSERLIB_PARSE_NODE_PTR_HPP 91 | -------------------------------------------------------------------------------- /include/parserlib/match_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_MATCH_PARSE_NODE_HPP 2 | #define PARSERLIB_MATCH_PARSE_NODE_HPP 3 | 4 | 5 | #include "parent_parse_node.hpp" 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse node that adds a match to a parse context when its child node parses successfully. 13 | * @param MatchId type of match id. 14 | * @param Child type of parse node to use as a child. 15 | */ 16 | template 17 | class match_parse_node : public parent_parse_node, Child> { 18 | public: 19 | /** The parent type. */ 20 | using parent_type = parent_parse_node, Child>; 21 | 22 | /** 23 | * The constructor. 24 | * @param id the id to put to the parse context as a match id. 25 | * @param child the parse node to use as a child. 26 | */ 27 | match_parse_node(const MatchId& id, const Child& child) 28 | : parent_type(child) 29 | , m_id(id) 30 | { 31 | } 32 | 33 | /** 34 | * Invokes the child parse node to parse. 35 | * If the child parses successfully, then it adds a match 36 | * to the given parse context. 37 | * @param pc the parse context to add a match to. 38 | * @return true on success, false on failure. 39 | */ 40 | template 41 | bool parse(ParseContext& pc) const { 42 | pc.save_match_start_state(); 43 | bool result; 44 | try { 45 | result = parent_type::get_children().parse(pc); 46 | } 47 | catch (...) { 48 | pc.restore_match_start_state(); 49 | throw; 50 | } 51 | pc.restore_match_start_state(); 52 | if (result) { 53 | pc.add_match(static_cast(m_id)); 54 | return true; 55 | } 56 | return false; 57 | } 58 | 59 | private: 60 | MatchId m_id; 61 | }; 62 | 63 | 64 | /** 65 | * The operator that is used for creating a match parse node. 66 | * @param child the child parse node. 67 | * @param id id of the match. 68 | * @return a match parse node for the given id and child. 69 | */ 70 | template 71 | match_parse_node operator ->* (const parse_node& child, const MatchId& id) { 72 | return match_parse_node(id, *child.get_impl()); 73 | } 74 | 75 | 76 | } //namespace parserlib 77 | 78 | 79 | #endif //PARSERLIB_MATCH_PARSE_NODE_HPP 80 | -------------------------------------------------------------------------------- /include/parserlib/sequence_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_SEQUENCE_PARSE_NODE_HPP 2 | #define PARSERLIB_SEQUENCE_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include "tuple_for_each.hpp" 7 | #include "loop_parse_node.hpp" 8 | #include "parse_with_parse_state.hpp" 9 | 10 | 11 | namespace parserlib { 12 | 13 | 14 | struct sequence_parse_node_tag { 15 | }; 16 | 17 | 18 | template 19 | class sequence_parse_node 20 | : public parent_parse_node, std::tuple> 21 | , public sequence_parse_node_tag 22 | { 23 | public: 24 | using tuple_type = std::tuple; 25 | 26 | using parent_type = parent_parse_node, tuple_type>; 27 | 28 | sequence_parse_node(const tuple_type& children) 29 | : parent_type(children) 30 | { 31 | } 32 | 33 | template 34 | bool parse(ParseContext& pc) const { 35 | return parse_with_parse_state(pc, [&](ParseContext& pc) { 36 | return tuple_for_each_cond<0, true>(parent_type::get_children(), [&](const auto& child) { 37 | return child.parse(pc); 38 | }); 39 | }); 40 | } 41 | }; 42 | 43 | 44 | template 45 | sequence_parse_node> parse_node::operator +() const { 46 | return std::make_tuple(*get_impl(), *(*get_impl())); 47 | } 48 | 49 | 50 | template || std::is_base_of_v, bool> = true> 51 | auto operator >> (const L& left, const R& right) { 52 | if constexpr (std::is_base_of_v && std::is_base_of_v) { 53 | return sequence_parse_node(std::tuple_cat(left.get_children(), right.get_children())); 54 | } 55 | else if constexpr (std::is_base_of_v) { 56 | return sequence_parse_node(std::tuple_cat(left.get_children(), std::make_tuple(make_parse_node(right)))); 57 | } 58 | else if constexpr (std::is_base_of_v) { 59 | return sequence_parse_node(std::tuple_cat(std::make_tuple(make_parse_node(left)), right.get_children())); 60 | } 61 | else { 62 | return sequence_parse_node(std::make_tuple(make_parse_node(left), make_parse_node(right))); 63 | } 64 | } 65 | 66 | 67 | template || std::is_base_of_v, bool> = true> 68 | auto operator - (const L& left, const R& right) { 69 | return !make_parse_node(right) >> make_parse_node(left); 70 | } 71 | 72 | 73 | } //namespace parserlib 74 | 75 | 76 | #endif //PARSERLIB_SEQUENCE_PARSE_NODE_HPP 77 | -------------------------------------------------------------------------------- /include/parserlib/parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_NODE_HPP 2 | #define PARSERLIB_PARSE_NODE_HPP 3 | 4 | 5 | namespace parserlib { 6 | 7 | 8 | template class loop_parse_node; 9 | template class optional_parse_node; 10 | template class logical_and_parse_node; 11 | template class logical_not_parse_node; 12 | template class sequence_parse_node; 13 | template class parse_node_ptr; 14 | 15 | 16 | struct parse_node_tag { 17 | }; 18 | 19 | 20 | /** 21 | * Base class for parse nodes. 22 | * Also provides the unary operators for parse nodes. 23 | * @param Impl type of class that is derived from this class and represents the implementation type. 24 | */ 25 | template 26 | class parse_node : public parse_node_tag { 27 | public: 28 | /** 29 | * Returns pointer to implementation for this parse node. 30 | * @return pointer to implementation for this parse node. 31 | */ 32 | const Impl* get_impl() const { 33 | return static_cast(this); 34 | } 35 | 36 | /** 37 | * Returns pointer to implementation for this parse node. 38 | * @return pointer to implementation for this parse node. 39 | */ 40 | Impl* get_impl() { 41 | return static_cast(this); 42 | } 43 | 44 | /** 45 | * Operator that converts a parse node into a loop. 46 | * @return a loop parse node. 47 | */ 48 | loop_parse_node operator *() const; 49 | 50 | /** 51 | * Operator that converts a parse node into a loop 52 | * that must parse successfully at least once. 53 | * @return a sequence of this and a loop of this. 54 | */ 55 | sequence_parse_node> operator +() const; 56 | 57 | /** 58 | * Makes this parse node optional. 59 | * @return an optional parse node. 60 | */ 61 | optional_parse_node operator -() const; 62 | 63 | /** 64 | * Uses this parse node as a logical AND predicate. 65 | * @return a logical AND parse node. 66 | */ 67 | logical_and_parse_node operator &() const; 68 | 69 | /** 70 | * Uses this parse node as a logical NOT predicate. 71 | * @return a logical NOT parse node. 72 | */ 73 | logical_not_parse_node operator !() const; 74 | 75 | /** 76 | * Creates a parse_node_ptr instance out of this parse node. 77 | * @return a generic parse_node_ptr instance out of this parse node. 78 | */ 79 | auto operator ~() const; 80 | }; 81 | 82 | 83 | /** 84 | * Function that returns the implementation of a parse node. 85 | * @return the implementation of a parse node. 86 | */ 87 | template 88 | const Impl& make_parse_node(const parse_node& parse_node) { 89 | return *parse_node.get_impl(); 90 | } 91 | 92 | 93 | } //namespace parserlib 94 | 95 | 96 | #endif //PARSERLIB_PARSE_NODE_HPP 97 | -------------------------------------------------------------------------------- /include/parserlib/source_range.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_SOURCE_RANGE_HPP 2 | #define PARSERLIB_SOURCE_RANGE_HPP 3 | 4 | 5 | #include 6 | #include "parse_position.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A class that represents a source range. 14 | * @param Id id type of the range. 15 | * @param Iterator type of iterator for the parse position. 16 | * @param SourcePosition type of source position for the parse position. 17 | */ 18 | template 19 | class source_range { 20 | public: 21 | /** Type of id. */ 22 | using id_type = Id; 23 | 24 | /** Type of parse position. */ 25 | using parse_position_type = parse_position; 26 | 27 | /** Type of iterator. */ 28 | using iterator_type = Iterator; 29 | 30 | /** 31 | * The default constructor. 32 | */ 33 | source_range() { 34 | } 35 | 36 | /** 37 | * Constructor from arguments. 38 | * @param id id of the range. 39 | * @param begin_parse_position the position where the range starts from. 40 | * @param end_parse_position the position where the range ends at. 41 | */ 42 | source_range(const id_type& id, const parse_position_type& begin_parse_position, const parse_position_type& end_parse_position) 43 | : m_id(id) 44 | , m_begin_parse_position(begin_parse_position) 45 | , m_end_parse_position(end_parse_position) 46 | { 47 | assert(m_begin_parse_position <= m_end_parse_position); 48 | } 49 | 50 | /** 51 | * Returns the id of the range. 52 | * @return the id of the range. 53 | */ 54 | const id_type& get_id() const { 55 | return m_id; 56 | } 57 | 58 | /** 59 | * Returns the parse position from where the range starts from in the source. 60 | * @return the parse position from where the range starts from in the source. 61 | */ 62 | const parse_position_type& get_begin_parse_position() const { 63 | return m_begin_parse_position; 64 | } 65 | 66 | /** 67 | * Returns the parse position from where the range ends at in the source. 68 | * @return the parse position from where the range ends at in the source. 69 | */ 70 | const parse_position_type& get_end_parse_position() const { 71 | return m_end_parse_position; 72 | } 73 | 74 | /** 75 | * Returns the begin parse position iterator. 76 | * @return the begin parse position iterator. 77 | */ 78 | const iterator_type& begin() const { 79 | return m_begin_parse_position.get_iterator(); 80 | } 81 | 82 | /** 83 | * Returns the end parse position iterator. 84 | * @return the end parse position iterator. 85 | */ 86 | const iterator_type& end() const { 87 | return m_end_parse_position.get_iterator(); 88 | } 89 | 90 | /** 91 | * Returns a container that corresponds to the source. 92 | * @return a container that corresponds to the source. 93 | */ 94 | template 95 | Container get_source() const { 96 | return Container(m_begin_parse_position.get_iterator(), m_end_parse_position.get_iterator()); 97 | } 98 | 99 | private: 100 | id_type m_id; 101 | parse_position_type m_begin_parse_position; 102 | parse_position_type m_end_parse_position; 103 | }; 104 | 105 | 106 | } //namespace parserlib 107 | 108 | 109 | #endif //PARSERLIB_SOURCE_RANGE_HPP 110 | -------------------------------------------------------------------------------- /include/parserlib/parse_position.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_POSITION_HPP 2 | #define PARSERLIB_PARSE_POSITION_HPP 3 | 4 | 5 | #include 6 | 7 | 8 | namespace parserlib { 9 | 10 | 11 | /** 12 | * A parse position. 13 | * It combines an iterator and a source position. 14 | * @param Iterator type of iterator. 15 | * @param SourcePosition type of source position. 16 | */ 17 | template 18 | class parse_position { 19 | public: 20 | /** Iterator type. */ 21 | using iterator_type = Iterator; 22 | 23 | /** Source position type. */ 24 | using source_position_type = SourcePosition; 25 | 26 | /** 27 | * The default constructor. 28 | */ 29 | parse_position() { 30 | } 31 | 32 | /** 33 | * Constructor from arguments. 34 | * @param iterator the iterator. 35 | * @param source_position the source position. 36 | */ 37 | parse_position(const iterator_type& iterator, const source_position_type& source_position = source_position_type()) 38 | : m_iterator(iterator) 39 | , m_source_position(source_position) 40 | { 41 | } 42 | 43 | /** 44 | * Returns the iterator. 45 | * @return the iterator. 46 | */ 47 | const iterator_type& get_iterator() const { 48 | return m_iterator; 49 | } 50 | 51 | /** 52 | * Returns the index of this parse position. 53 | * @return the index of this parse position. 54 | */ 55 | std::size_t get_index() const { 56 | return m_index; 57 | } 58 | 59 | /** 60 | * Returns the source position. 61 | * @returns the source position. 62 | */ 63 | const source_position_type& get_source_position() const { 64 | return m_source_position; 65 | } 66 | 67 | /** 68 | * Tests if this and the given parse position are equal. 69 | * @param other the other parse position to compare to this. 70 | * @return true if they are equal, false otherwise. 71 | */ 72 | bool operator == (const parse_position& other) const { 73 | return m_index == other.m_index; 74 | } 75 | 76 | /** 77 | * Tests if this and the given parse position are different. 78 | * @param other the other parse position to compare to this. 79 | * @return true if they are different, false otherwise. 80 | */ 81 | bool operator != (const parse_position& other) const { 82 | return m_index != other.m_index; 83 | } 84 | 85 | /** 86 | * Tests if this parse position comes before the given parse position. 87 | * @param other the other parse position to compare to this. 88 | * @return true if this parse position comes before the given parse position, false otherwise. 89 | */ 90 | bool operator < (const parse_position& other) const { 91 | return m_index < other.m_index; 92 | } 93 | 94 | /** 95 | * Tests if this parse position comes before the given parse position or if they are equal. 96 | * @param other the other parse position to compare to this. 97 | * @return true if this parse position comes before the given parse position or they are equal, false otherwise. 98 | */ 99 | bool operator <= (const parse_position& other) const { 100 | return m_index <= other.m_index; 101 | } 102 | 103 | /** 104 | * Tests if this parse position comes after the given parse position. 105 | * @param other the other parse position to compare to this. 106 | * @return true if this parse position comes after the given parse position, false otherwise. 107 | */ 108 | bool operator > (const parse_position& other) const { 109 | return m_index < other.m_index; 110 | } 111 | 112 | /** 113 | * Tests if this parse position comes after the given parse position or if they are equal. 114 | * @param other the other parse position to compare to this. 115 | * @return true if this parse position comes after the given parse position or they are equal, false otherwise. 116 | */ 117 | bool operator >= (const parse_position& other) const { 118 | return m_index <= other.m_index; 119 | } 120 | 121 | /** 122 | * Increments this parse position by one. 123 | */ 124 | void increment() { 125 | ++m_iterator; 126 | ++m_index; 127 | m_source_position.increment(); 128 | } 129 | 130 | /** 131 | * Increments this parse position by a number. 132 | * @param count number of positions to advance. 133 | */ 134 | void increment(std::size_t count) { 135 | m_iterator = std::next(m_iterator, count); 136 | m_index += count; 137 | m_source_position.increment(count); 138 | } 139 | 140 | /** 141 | * Increments the line of the source position. 142 | */ 143 | void increment_line() { 144 | m_source_position.increment_line(); 145 | } 146 | 147 | private: 148 | iterator_type m_iterator; 149 | std::size_t m_index{ 0 }; 150 | source_position_type m_source_position; 151 | }; 152 | 153 | 154 | } //namespace parserlib 155 | 156 | 157 | #endif //PARSERLIB_PARSE_POSITION_HPP 158 | -------------------------------------------------------------------------------- /include/parserlib/rule.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_RULE_HPP 2 | #define PARSERLIB_RULE_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include "parse_node.hpp" 9 | #include "parse_node_wrapper.hpp" 10 | #include "parse_left_recursion_algorithm.hpp" 11 | #include "parse_context.hpp" 12 | 13 | 14 | namespace parserlib { 15 | 16 | 17 | /** 18 | * Special parse node that allows the creation of recursive grammars. 19 | * @param ParseContext the parse context to use for parsing. 20 | */ 21 | template 22 | class rule : public parse_node> { 23 | public: 24 | /** 25 | * The default constructor. 26 | */ 27 | rule() 28 | : m_forward_reference(get_forward_reference(this)) 29 | { 30 | } 31 | 32 | /** 33 | * The copy constructor. 34 | * @param src the source object. 35 | */ 36 | rule(const rule& src) 37 | : m_forward_reference(get_forward_reference(std::addressof(src))) 38 | { 39 | register_forward_reference(); 40 | } 41 | 42 | /** 43 | * The move constructor. 44 | * @param src the source object. 45 | */ 46 | rule(rule&& src) 47 | : m_forward_reference(std::move(src.m_forward_reference)) 48 | { 49 | register_forward_reference(); 50 | } 51 | 52 | /** 53 | * Constructor from value/parse node. 54 | * @param src source value/parse node. 55 | */ 56 | template 57 | rule(const T& src) 58 | : m_forward_reference(get_forward_reference(this)) 59 | { 60 | m_forward_reference->parse_node = make_unique_wrapper(src); 61 | } 62 | 63 | /** 64 | * The copy assignment operator. 65 | * @param src the source object. 66 | * @return reference to this. 67 | */ 68 | rule& operator = (const rule& src) { 69 | m_forward_reference = get_forward_reference(std::addressof(src)); 70 | register_forward_reference(); 71 | return *this; 72 | } 73 | 74 | /** 75 | * The move assignment operator. 76 | * @param src the source object. 77 | * @return reference to this. 78 | */ 79 | rule& operator = (rule&& src) { 80 | m_forward_reference = std::move(src.m_forward_reference); 81 | register_forward_reference(); 82 | return *this; 83 | } 84 | 85 | /** 86 | * Assignment from value/parse node. 87 | * All instances of this rule receive the new parse node. 88 | * @param src the source object. 89 | * @return reference to this. 90 | */ 91 | template 92 | rule& operator = (const T& src) { 93 | m_forward_reference->parse_node = make_unique_wrapper(src); 94 | return *this; 95 | } 96 | 97 | /** 98 | * Parses this rule using left recursion parsing. 99 | * @param pc the parse context to use. 100 | * @return true on success, false on failure. 101 | */ 102 | bool parse(ParseContext& pc) const { 103 | return parse_left_recursion_algorithm::parse( 104 | pc, 105 | reinterpret_cast(m_forward_reference->parse_node.get()), 106 | *m_forward_reference->parse_node.get() 107 | ); 108 | } 109 | 110 | private: 111 | //each rule points to this shared object 112 | struct forward_reference { 113 | std::unique_ptr> parse_node; 114 | }; 115 | 116 | //the shared object 117 | std::shared_ptr m_forward_reference; 118 | 119 | //type of map for storing forward references 120 | using forward_reference_map = std::map>; 121 | 122 | //contains the forward references map 123 | static forward_reference_map& get_forward_reference_map() { 124 | static thread_local forward_reference_map forward_references; 125 | return forward_references; 126 | } 127 | 128 | //get a forward reference for the given rule; 129 | //if the forward reference struct does not exist, 130 | //then a new one is created. 131 | static std::shared_ptr get_forward_reference(const rule* r) { 132 | forward_reference_map& forward_references = get_forward_reference_map(); 133 | auto it = forward_references.find(r); 134 | if (it != forward_references.end()) { 135 | return it->second; 136 | } 137 | auto [it1, ok] = forward_references.insert(std::make_pair(r, std::make_shared())); 138 | return it1->second; 139 | } 140 | 141 | //creates a unique wrapper for type T 142 | template 143 | static std::unique_ptr> make_unique_wrapper(const T& src) { 144 | using parse_node_type = decltype(make_parse_node(src)); 145 | using wrapper_type = parse_node_wrapper_impl; 146 | return std::make_unique(make_parse_node(src)); 147 | } 148 | 149 | //registers this with its current forward reference 150 | void register_forward_reference() { 151 | get_forward_reference_map()[this] = m_forward_reference; 152 | } 153 | }; 154 | 155 | 156 | } //namespace parserlib 157 | 158 | 159 | #endif //PARSERLIB_RULE_HPP 160 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parserlib 2 | 3 | * [Introduction](#introduction) 4 | * [Using the library](./doc/using_the_library.md) 5 | * [Writing a parser](./doc/writing_a_parser.md) 6 | * [Using a parser](./doc/using_a_parser.md) 7 | * [Debugging a parser](./doc/debugging_a_parser.md) 8 | * [Parsing left recursion grammars](./doc/left_recursion.md) 9 | * [Writing a tokenizer and parser](./doc/tokenizer_and_parser.md) 10 | * [Examples](#examples) 11 | * [Changes](#changes) 12 | 13 | Full code documentation at: [./doc/doxygen/html/index.html](./doc/doxygen/html/index.html). 14 | 15 | ## Introduction 16 | 17 | `Parserlib` is a `c++17` header only library that allows building of recursive-descent parsers using EBNF-like syntax. 18 | 19 | **Features** 20 | 21 | * c++17 (tested with msvc and gcc). 22 | * header-only. 23 | * recursive-descent parsing. 24 | * parsing of left-recursive grammars. 25 | * EBNF-like syntax. 26 | * character/custom type parsing. 27 | * extensible via templates. 28 | * multiple error handling. 29 | * ASTs (Abstract Syntax Trees). 30 | * debugging via annotations. 31 | * debugging via inspecting grammar as text. 32 | 33 | **Version** 34 | 35 | 1.0.0.9 36 | 37 | **Quick example** 38 | 39 | The following program implements and uses a calculator parser on strings: 40 | 41 | ```cpp 42 | #include "parserlib.hpp" 43 | using namespace parserlib; 44 | 45 | extern rule<> expr; 46 | 47 | const auto digit = range('0', '9'); 48 | 49 | const auto number = +digit >> -('.' >> +digit); 50 | 51 | rule<> val = number 52 | | '(' >> expr >> ')'; 53 | 54 | rule<> mul = mul >> '*' >> val 55 | | mul >> '/' >> val 56 | | val; 57 | 58 | rule<> add = add >> '+' >> mul 59 | | add >> '-' >> mul 60 | | mul; 61 | 62 | rule<> expr = add; 63 | 64 | int main() { 65 | std::string input = "1+2/(3*4)"; 66 | parse_context<> pc(input); 67 | const bool result = expr.parse(pc); 68 | return 0; 69 | } 70 | ``` 71 | 72 | ## Examples 73 | 74 | * [JSON parser](./examples/json.hpp) 75 | * [XML parser](./examples/xml.hpp) 76 | 77 | ## Changes 78 | 79 | - 1.0.0.9 80 | - Rewritten again from scratch in order to improve the quality. 81 | - changes: 82 | - renamed some functions in order to make more sense when the code is read. 83 | - improved support for catching errors and continuing. 84 | - more analytical documentation. 85 | - new features: 86 | - better debugging support. 87 | - grammar annotations. 88 | - extensible parse context. 89 | - infinite recursion exception. 90 | - loop breaks. 91 | - compile-time rule optimizations. 92 | 93 | - 1.0.0.8 94 | - Rewritten again from scratch, in order to deal with error handling in a much better way, 95 | to make everything `noexcept` for increased performance, 96 | to add new capabilities. 97 | 98 | - 1.0.0.7 99 | - Rewritten from scratch, to improve quality of the API. 100 | 101 | - 1.0.0.6 102 | - Added function-based parsing. 103 | 104 | - 1.0.0.5 105 | - Added custom match functions in order to allow the resolution of ambiguities while parsing. 106 | - allowed terminal values to be of different type that the value of the source container, in order to allow the result of a parse (the ast nodes created by a parse) to be fed to another parse function. 107 | - added terminal parsing via functions. 108 | - added parsing via standalone functions. 109 | - added multiple error handling. 110 | 111 | - 1.0.0.4 112 | - Rewrote the library: 113 | - all parser grammar classes are now inside a single template class `class parser_engine`, for the following reasons: 114 | - compiler performance (MSVC 32-bit regularly crashed with out of memory error from the many template instantiations of previous versions). 115 | - library code organization; writing a grammar usually requires including all the grammar constructs, so it is reduntant to have separate files for each grammar-related class. 116 | - user code organization; whole grammars need to be specialized on source type. 117 | - coding style is closer to the standard: all identifiers are lower case, words are separated by underscores, idiomatic c++ is used whenever possible. 118 | - Rewrote the documentation, due to more functionality to be added in the future. 119 | 120 | - 1.0.0.3 121 | - Reorganized the library in order to support compiler front ends into a separate namespace. The main library is now in `namespace parserlib::core`. 122 | - Added `namespace parserlib::cfe` which now contains the compiler-front-end functionality. 123 | - separated tokenization and parsing phases for compiler-front-ends. 124 | - Added relevant documentation and unit tests. 125 | 126 | - 1.0.0.2 127 | - Rewrote the library from scratch in order to provide a better interface. Changes: 128 | - All the getter methods now start with 'get', in order to play better with Intellisense. 129 | - The `ParseContext` class is now configured over the Source type, with the default class being the class `SourceString`. 130 | - The class `SourceString` provides custom iterator which counts lines and columns, compatible with the `std::string` interface. 131 | - The functions `terminal`, `terminalSet`, `terminalRange` are changed to `term`, `oneOf`, `oneIn`. 132 | - Matches are now only hierarchical (as in `operator >=` of previous version). 133 | - The `'operator >=` has been replaced with `operator ->*`, which is much more distinct than the former; no more typing accidentally '>>' where `>=` was intended. 134 | - The default match id type is no longer a string; it is an int. 135 | - Simplified the left recursion parsing implementation. 136 | 137 | - 1.0.0.1 138 | - Added support for compiler front-end construction. 139 | 140 | - 1.0.0.0 141 | - Initial release. -------------------------------------------------------------------------------- /include/parserlib/error_parse_node.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_ERROR_PARSE_NODE_HPP 2 | #define PARSERLIB_ERROR_PARSE_NODE_HPP 3 | 4 | 5 | #include 6 | #include "parent_parse_node.hpp" 7 | 8 | 9 | namespace parserlib { 10 | 11 | 12 | /** 13 | * A parse node that adds a error to a parse context when its child node parses successfully. 14 | * @param ErrorId type of error id. 15 | * @param Child type of parse node to use as a child. 16 | */ 17 | template 18 | class error_parse_node : public parent_parse_node, Child> { 19 | public: 20 | /** The parent type. */ 21 | using parent_type = parent_parse_node, Child>; 22 | 23 | /** 24 | * The constructor. 25 | * @param id the id to put to the parse context as a error id. 26 | * @param child the parse node to use as a child. 27 | */ 28 | error_parse_node(const ErrorId& id, const Child& child) 29 | : parent_type(child) 30 | , m_id(id) 31 | { 32 | } 33 | 34 | /** 35 | * Invokes the child parse node to parse. 36 | * If the child parses successfully, then it adds a error 37 | * to the given parse context. 38 | * @param pc the parse context to add a error to. 39 | * @return true on success, false on failure. 40 | */ 41 | template 42 | bool parse(ParseContext& pc) const { 43 | pc.save_error_start_state(); 44 | bool result; 45 | try { 46 | result = parent_type::get_children().parse(pc); 47 | } 48 | catch (...) { 49 | pc.restore_error_start_state(); 50 | throw; 51 | } 52 | pc.restore_error_start_state(); 53 | if (result) { 54 | pc.add_error(static_cast(m_id)); 55 | return true; 56 | } 57 | return false; 58 | } 59 | 60 | private: 61 | ErrorId m_id; 62 | }; 63 | 64 | 65 | struct skip_error_parse_node_tag { 66 | }; 67 | 68 | 69 | /** 70 | * A parse node that skips input until the child parse node parses successfully. 71 | * The parse position after this node parses is set to the last parse position 72 | * before the child parse node parsed successfully. 73 | * @param Child Type of the child parse node. 74 | */ 75 | template 76 | class skip_before_parse_node : public parent_parse_node, Child>, public skip_error_parse_node_tag { 77 | public: 78 | /** The parent parse node type. */ 79 | using parent_type = parent_parse_node, Child>; 80 | 81 | /** 82 | * The constructor. 83 | * @param child the child. 84 | */ 85 | skip_before_parse_node(const Child& child) 86 | : parent_type(child) 87 | { 88 | } 89 | 90 | /** 91 | * It invokes the child node to parse in a loop, 92 | * until the child parse node parses successfully, 93 | * or the end of input is reached. 94 | * On return, the parse position is set to the parse position 95 | * before the successful parsing by the child parse node. 96 | * @param pc the context to pass to the child. 97 | * @return always true. 98 | */ 99 | template 100 | bool parse(ParseContext& pc) const { 101 | while (pc.is_valid_parse_position()) { 102 | pc.save_parse_state(); 103 | try { 104 | const bool result = parent_type::get_children().parse(pc); 105 | pc.reject_parse_state(); 106 | if (result) { 107 | return true; 108 | } 109 | } 110 | catch (...) { 111 | pc.reject_parse_state(); 112 | throw; 113 | } 114 | pc.increment_parse_position(); 115 | } 116 | return true; 117 | } 118 | }; 119 | 120 | 121 | /** 122 | * A parse node that skips input until the child parse node parses successfully. 123 | * The parse position after this node parses is set to the parse position 124 | * after the child parse node parsed successfully. 125 | * @param Child Type of the child parse node. 126 | */ 127 | template 128 | class skip_after_parse_node : public parent_parse_node, Child>, public skip_error_parse_node_tag { 129 | public: 130 | /** The parent parse node type. */ 131 | using parent_type = parent_parse_node, Child>; 132 | 133 | /** 134 | * The constructor. 135 | * @param child the child. 136 | */ 137 | skip_after_parse_node(const Child& child) 138 | : parent_type(child) 139 | { 140 | } 141 | 142 | /** 143 | * It invokes the child node to parse in a loop, 144 | * until the child parse node parses successfully, 145 | * or the end of input is reached. 146 | * On return, the parse position is set to the parse position 147 | * after the successful parsing by the child parse node. 148 | * @param pc the context to pass to the child. 149 | * @return always true. 150 | */ 151 | template 152 | bool parse(ParseContext& pc) const { 153 | while (pc.is_valid_parse_position()) { 154 | pc.save_parse_state(); 155 | try { 156 | const bool result = parent_type::get_children().parse(pc); 157 | if (result) { 158 | pc.accept_parse_state(); 159 | return true; 160 | } 161 | pc.reject_parse_state(); 162 | } 163 | catch (...) { 164 | pc.reject_parse_state(); 165 | throw; 166 | } 167 | pc.increment_parse_position(); 168 | } 169 | return true; 170 | } 171 | }; 172 | 173 | 174 | /** 175 | * Creates an error parse node for the specific error id and child. 176 | * @param child the child parse node; it must be a skip error parse node. 177 | * @param id id of the error. 178 | * @return a error parse node for the given id and child. 179 | */ 180 | template , bool> = true> 181 | error_parse_node error(const ErrorId& id, const parse_node& child) { 182 | return error_parse_node(id, *child.get_impl()); 183 | } 184 | 185 | 186 | /** 187 | * Creates a skip before parse node. 188 | * @param value value or parse node to create a skip before parse node. 189 | * @return a skip before parse node. 190 | */ 191 | template 192 | auto skip_before(const T& value) { 193 | return skip_before_parse_node(make_parse_node(value)); 194 | } 195 | 196 | 197 | /** 198 | * Creates a skip after parse node. 199 | * @param value value or parse node to create a skip after parse node. 200 | * @return a skip after parse node. 201 | */ 202 | template 203 | auto skip_after(const T& value) { 204 | return skip_after_parse_node(make_parse_node(value)); 205 | } 206 | 207 | 208 | } //namespace parserlib 209 | 210 | 211 | #endif //PARSERLIB_ERROR_PARSE_NODE_HPP 212 | -------------------------------------------------------------------------------- /include/parserlib/parse_left_recursion_algorithm.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_LEFT_RECURSION_ALGORITHM_HPP 2 | #define PARSERLIB_PARSE_LEFT_RECURSION_ALGORITHM_HPP 3 | 4 | 5 | #include 6 | #include "parse_node_id_type.hpp" 7 | #include "left_recursion_state_type.hpp" 8 | #include "left_recursion_exception.hpp" 9 | 10 | 11 | namespace parserlib { 12 | 13 | 14 | /** 15 | * Implements the left recursion parsing algorithm. 16 | */ 17 | class parse_left_recursion_algorithm { 18 | public: 19 | /** 20 | * Parses using the left recursion parsing algorithm. 21 | * @param context the parse context to use. 22 | * @param parse_node_id id of the parse node. 23 | * @param node the node to invoke for parsing. 24 | * @return true if the source was parsed successfully, false otherwise. 25 | * @exception left_recursion_exception thrown if the left recursion could not be resolved. 26 | */ 27 | template 28 | static bool parse(ParseContext& context, parse_node_id_type parse_node_id, const ParseNode& node) { 29 | //get the left recursion state of the parse node 30 | const auto [state_type, is_left_recursive] = context.get_left_recursion_state(parse_node_id); 31 | 32 | //parse non-left recursion 33 | if (!is_left_recursive) { 34 | return parse_non_left_recursion_state(context, parse_node_id, node); 35 | } 36 | 37 | //parse left recursion 38 | return parse_left_recursion_state(context, parse_node_id, state_type); 39 | } 40 | 41 | private: 42 | //parses no left recursion 43 | template 44 | static bool parse_non_left_recursion_state(ParseContext& context, parse_node_id_type parse_node_id, const ParseNode& node) { 45 | //save the current left recursion state of the node and set it to no left recursion 46 | context.begin_no_left_recursion_state(parse_node_id); 47 | 48 | //parse 49 | try { 50 | //invoke the node to parse 51 | const bool result = node.parse(context); 52 | 53 | //restore the parse node left recursion state 54 | context.restore_left_recursion_state(parse_node_id); 55 | 56 | return result; 57 | } 58 | 59 | //handle left recursion 60 | catch (left_recursion_exception ex) { 61 | context.restore_left_recursion_state(parse_node_id); 62 | 63 | //if the left recursion was for the current node, then handle it 64 | if (ex.get_parse_node_id() == parse_node_id) { 65 | return do_left_recursion(context, parse_node_id, node); 66 | } 67 | 68 | //else propagate the left recursion to outter execution contexts 69 | throw ex; 70 | } 71 | 72 | //for other exception, restore state and rethrow it 73 | catch (...) { 74 | context.restore_left_recursion_state(parse_node_id); 75 | throw; 76 | } 77 | } 78 | 79 | //parses left recursion 80 | template 81 | static bool parse_left_recursion_state(ParseContext& context, parse_node_id_type parse_node_id, left_recursion_state_type state_type) { 82 | switch (state_type) { 83 | //start left recursion parsing 84 | case left_recursion_state_type::no_left_recursion: 85 | throw left_recursion_exception(parse_node_id); 86 | 87 | //reject left recursion to allow non-left recursive branches to parse 88 | case left_recursion_state_type::reject_left_recursion: 89 | return false; 90 | 91 | //accept left recursion; status set to accepted 92 | case left_recursion_state_type::accept_left_recursion: 93 | context.accept_left_recursion_state(parse_node_id); 94 | return true; 95 | 96 | //already accepted left recursion 97 | case left_recursion_state_type::accepted_left_recursion: 98 | return true; 99 | } 100 | 101 | //invalid state; perhaps data corruption 102 | throw std::runtime_error("parse_left_recursion_algorithm::parse_left_recursion_state: invalid parse node state type."); 103 | } 104 | 105 | //do left recursion for the given parse node; first the reject state, to allow non-left recursive branches to parse, 106 | //then the accept phase, to allow parts after the left-recursive branches to be parsed 107 | template 108 | static bool do_left_recursion(ParseContext& context, parse_node_id_type parse_node_id, const ParseNode& node) { 109 | if (do_reject_left_recursion(context, parse_node_id, node)) { 110 | return do_accept_left_recursion(context, parse_node_id, node); 111 | } 112 | return false; 113 | } 114 | 115 | //do reject left recursion for the given parse node 116 | template 117 | static bool do_reject_left_recursion(ParseContext& context, parse_node_id_type parse_node_id, const ParseNode& node) { 118 | //save the match start state so as that the accept phase starts from the correct match start state 119 | context.save_match_start_state(); 120 | 121 | //enter the reject left recursion state 122 | context.begin_reject_left_recursion_state(parse_node_id); 123 | 124 | //try to parse 125 | try { 126 | const bool result = node.parse(context); 127 | context.restore_left_recursion_state(parse_node_id); 128 | context.restore_match_start_state(); 129 | return result; 130 | } 131 | 132 | //in case of exception 133 | catch (...) { 134 | context.restore_left_recursion_state(parse_node_id); 135 | context.restore_match_start_state(); 136 | throw; 137 | } 138 | } 139 | 140 | //do accept left recursion for the given parse node 141 | template 142 | static bool do_accept_left_recursion(ParseContext& context, parse_node_id_type parse_node_id, const ParseNode& node) { 143 | //parse continuously the repeating part until no more possible parsing 144 | for (;;) { 145 | //try to parse 146 | try { 147 | //save the current match start state in order to later restore it for the next loop 148 | context.begin_accept_left_recursion_state(parse_node_id); 149 | 150 | //parse 151 | const bool result = node.parse(context); 152 | 153 | //restore the state 154 | context.restore_left_recursion_state(parse_node_id); 155 | 156 | if (!result) { 157 | break; 158 | } 159 | } 160 | 161 | //on exception, restore state and rethrow 162 | catch (...) { 163 | context.restore_left_recursion_state(parse_node_id); 164 | context.restore_match_start_state(); 165 | throw; 166 | } 167 | } 168 | 169 | //success 170 | return true; 171 | } 172 | 173 | }; 174 | 175 | 176 | } //namespace parserlib 177 | 178 | 179 | #endif //PARSERLIB_PARSE_LEFT_RECURSION_ALGORITHM_HPP 180 | -------------------------------------------------------------------------------- /include/parserlib/parse_context.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_PARSE_CONTEXT_HPP 2 | #define PARSERLIB_PARSE_CONTEXT_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include "parse_node_id_type.hpp" 9 | #include "left_recursion_state_type.hpp" 10 | 11 | 12 | namespace parserlib { 13 | 14 | 15 | /** 16 | * Base class for parse contexts. 17 | * It provides the interfaces needed for parsing. 18 | * 19 | * The APIs provided are about: 20 | * 21 | * - managing the parse position. 22 | * - doing symbol comparisons. 23 | * - providing the basic parse operations for performance (parsing symbols, sequences, sets, ranges, any, etc...). 24 | * - managing the parse state. 25 | * - managing matches. 26 | * - managing errors. 27 | * - managing left recursion. 28 | */ 29 | class parse_context { 30 | public: 31 | /** 32 | * Destructor. 33 | * Virtual due to polymorphism. 34 | */ 35 | virtual ~parse_context() { 36 | } 37 | 38 | /** 39 | * Tests whether the end position is reached or not. 40 | * @return true if the end position is not reached, false otherwise. 41 | */ 42 | virtual bool is_valid_parse_position() const = 0; 43 | 44 | /** 45 | * Tests whether the end position is reached or not. 46 | * @return true if the end position is reached, false otherwise. 47 | */ 48 | bool is_end_parse_position() const { 49 | return !is_valid_parse_position(); 50 | } 51 | 52 | /** 53 | * Increments the current parse position by one symbol. 54 | */ 55 | virtual void increment_parse_position() = 0; 56 | 57 | /** 58 | * Increments the current parse position by a count. 59 | * @param count number of symbols to increment the parse position. 60 | */ 61 | virtual void increment_parse_position(std::size_t count) = 0; 62 | 63 | /** 64 | * Increments the line of the current parse position. 65 | */ 66 | virtual void increment_parse_position_line() = 0; 67 | 68 | /** 69 | * Retrieves the current symbol, i.e. the symbol at the current parse position. 70 | * @return the current symbol. 71 | */ 72 | virtual int get_current_symbol() const = 0; 73 | 74 | /** 75 | * Compares two symbols. 76 | * @param left the left symbol to compare. 77 | * @param right the right symbol to compare. 78 | * @return less than 0 if left < right, 0 if left == right, greater than 0 if left > right. 79 | */ 80 | virtual int compare_symbols(int left, int right) const = 0; 81 | 82 | /** 83 | * Parses a single symbol. 84 | * @param symbol symbol to compare. 85 | * @return true on success, false on failure. 86 | */ 87 | virtual bool parse_symbol(int symbol) = 0; 88 | 89 | /** 90 | * Parses the given symbol sequence. 91 | * @param sequence the sequence of symbols to compare. 92 | * @return true on success, false on failure. 93 | */ 94 | virtual bool parse_symbol_sequence(const std::vector& sequence) = 0; 95 | 96 | /** 97 | * Parses the symbol at the current parse position out of a set of symbols. 98 | * @param set the set of symbols to compare. 99 | * @return true if the current symbol is within the given set, false otherwise. 100 | */ 101 | virtual bool parse_symbol_set(const std::vector& set) = 0; 102 | 103 | /** 104 | * Parses the symbol at the current parse position within a symbol range. 105 | * @param min min symbol value. 106 | * @param max max symbol value. 107 | * @return true if the current symbol is within the given range, false otherwise. 108 | */ 109 | virtual bool parse_symbol_range(int min, int max) = 0; 110 | 111 | /** 112 | * Parses any symbol, except if the end of input has been reached. 113 | * @return true if the end of input has not been reached, false otherwise. 114 | */ 115 | virtual bool parse_any_symbol() = 0; 116 | 117 | /** 118 | * Saves the current parse state into an internal stack. 119 | */ 120 | virtual void save_parse_state() = 0; 121 | 122 | /** 123 | * Rejects the current parse state. 124 | * The parse state is restored from the last entry 125 | * in the internal parse state stack. 126 | */ 127 | virtual void reject_parse_state() = 0; 128 | 129 | /** 130 | * Accepts the current parse state by removing the last entry 131 | * saved in the internal parse state stack. 132 | */ 133 | virtual void accept_parse_state() = 0; 134 | 135 | /** 136 | * Saves the current match start state to an internal stack. 137 | * A match start state is not always the same as the current parse state, 138 | * due to left recursion. 139 | */ 140 | virtual void save_match_start_state() = 0; 141 | 142 | /** 143 | * Restores the current match start state from the internal stack. 144 | */ 145 | virtual void restore_match_start_state() = 0; 146 | 147 | /** 148 | * Adds a match with the given id. 149 | * The source range for the match is from the current match start state to the current parse state. 150 | * @param id the id of the match. 151 | */ 152 | virtual void add_match(int id) = 0; 153 | 154 | /** 155 | * Saves the current error start parse state into an internal stack. 156 | */ 157 | virtual void save_error_start_state() = 0; 158 | 159 | /** 160 | * Restores the error start state from the internal stack. 161 | */ 162 | virtual void restore_error_start_state() = 0; 163 | 164 | /** 165 | * Adds an error with the given id. 166 | * The source range for the error is from the current error start state to the current parse state. 167 | * @param id the id of the error. 168 | */ 169 | virtual void add_error(int id) = 0; 170 | 171 | /** 172 | * Returns the current left recursion state for a parse node. 173 | * @param parse_node_id id of the parse node to get the left recursion state of. 174 | * @return a pair of: 175 | * - the left recursion state of the parse node. 176 | * - a flag which indicates if a parse node is left-recursive at the current parse state. 177 | */ 178 | virtual std::pair get_left_recursion_state(parse_node_id_type parse_node_id) const = 0; 179 | 180 | /** 181 | * Saves the current left recursion state for the parse node with the given id 182 | * and sets its left recursion state type to 'no_left_recursion'. 183 | * @param parse_node_id id of the parse node to set the left recursion state of. 184 | */ 185 | virtual void begin_no_left_recursion_state(uintptr_t parse_node_id) = 0; 186 | 187 | /** 188 | * Saves the current left recursion state for the parse node with the given id 189 | * and sets its left recursion state type to 'reject_left_recursion'. 190 | * @param parse_node_id id of the parse node to set the left recursion state of. 191 | */ 192 | virtual void begin_reject_left_recursion_state(uintptr_t parse_node_id) = 0; 193 | 194 | /** 195 | * Saves the current left recursion state for the parse node with the given id 196 | * and sets its left recursion state type to 'accept_left_recursion'. 197 | * @param parse_node_id id of the parse node to set the left recursion state of. 198 | */ 199 | virtual void begin_accept_left_recursion_state(uintptr_t parse_node_id) = 0; 200 | 201 | /** 202 | * Sets the left recursion state type for a parse node to 'accepted_left_recursion'. 203 | * @param parse_node_id id of the parse node to set the left recursion state of. 204 | */ 205 | virtual void accept_left_recursion_state(parse_node_id_type parse_node_id) = 0; 206 | 207 | /** 208 | * Restores the current parse node state for left recursion, from the internal stack, 209 | * for the parse node with the given id. 210 | * @param parse_node_id id of the parse node to restore the left recursion state of. 211 | */ 212 | virtual void restore_left_recursion_state(uintptr_t parse_node_id) = 0; 213 | }; 214 | 215 | 216 | } //namespace parserlib 217 | 218 | 219 | #endif //PARSERLIB_PARSE_CONTEXT_HPP 220 | -------------------------------------------------------------------------------- /tests/tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "parserlib.hpp" 4 | 5 | 6 | using namespace parserlib; 7 | 8 | 9 | #define ASSERT(FILE, LINE, COND) {\ 10 | if ((COND) == false) {\ 11 | std::cout << "Assertion failed: file " << FILE << ", line " << LINE << ": " << #COND << std::endl;\ 12 | }\ 13 | } 14 | 15 | 16 | #define DO_TEST(...) do_test(__FILE__, __LINE__, __VA_ARGS__) 17 | 18 | 19 | struct test_match { 20 | size_t begin; 21 | size_t end; 22 | const char* text; 23 | int id; 24 | std::vector children; 25 | }; 26 | 27 | 28 | struct test_error { 29 | size_t begin; 30 | size_t end; 31 | int id; 32 | }; 33 | 34 | 35 | using text_parse_context = container_parse_context; 36 | 37 | 38 | struct any_value { 39 | template 40 | operator T() const { 41 | return T(); 42 | } 43 | }; 44 | 45 | 46 | #define _ any_value() 47 | 48 | 49 | template 50 | static void do_test_matches(const char* file, const int line, const std::string& source, const MatchContainer& matches, const std::vector& test_matches) { 51 | ASSERT(file, line, matches.size() == test_matches.size()); 52 | for (size_t index = 0; index < test_matches.size(); ++index) { 53 | const auto& context_match = matches[index]; 54 | const struct test_match& test_match = test_matches[index]; 55 | ASSERT(file, line, context_match.get_begin_parse_position().get_iterator() == std::next(source.begin(), test_match.begin)); 56 | ASSERT(file, line, context_match.get_end_parse_position().get_iterator() == std::next(source.begin(), test_match.end)); 57 | ASSERT(file, line, context_match.get_source() == test_match.text); 58 | ASSERT(file, line, static_cast(context_match.get_id()) == test_match.id); 59 | do_test_matches(file, line, source, context_match.get_children(), test_match.children); 60 | } 61 | } 62 | 63 | 64 | template 65 | static void do_test( 66 | const char* file, 67 | const int line, 68 | const Grammar& grammar, 69 | const std::string& input, 70 | bool test_result, 71 | const std::vector& test_matches = {}, 72 | const std::vector& test_errors = {}, 73 | const std::function& other_test = {}) 74 | { 75 | std::string source = input; 76 | text_parse_context context(source); 77 | const bool result = grammar.parse(context); 78 | 79 | //test the parse result 80 | ASSERT(file, line, result == test_result); 81 | 82 | //test the matches 83 | do_test_matches(file, line, source, context.get_matches(), test_matches); 84 | 85 | //test the errors 86 | ASSERT(file, line, context.get_errors().size() == test_errors.size()); 87 | for (size_t index = 0; index < test_errors.size(); ++index) { 88 | const auto& context_error = context.get_errors()[index]; 89 | const struct test_error& test_error = test_errors[index]; 90 | ASSERT(file, line, context_error.get_begin_parse_position().get_iterator() == std::next(source.begin(), test_error.begin)); 91 | ASSERT(file, line, context_error.get_end_parse_position().get_iterator() == std::next(source.begin(), test_error.end)); 92 | ASSERT(file, line, context_error.get_id() == test_error.id); 93 | } 94 | 95 | //other test 96 | if (other_test) { 97 | ASSERT(file, line, other_test(context)); 98 | } 99 | } 100 | 101 | 102 | static void test_parse_symbol() { 103 | const auto grammar = terminal('a'); 104 | DO_TEST(grammar, "a", true); 105 | DO_TEST(grammar, "b", false); 106 | } 107 | 108 | 109 | static void test_parse_string() { 110 | const auto grammar = terminal("abc"); 111 | DO_TEST(grammar, "abc", true); 112 | DO_TEST(grammar, "abd", false); 113 | } 114 | 115 | 116 | static void test_parse_set() { 117 | const auto grammar = set("abc"); 118 | DO_TEST(grammar, "a", true); 119 | DO_TEST(grammar, "b", true); 120 | DO_TEST(grammar, "c", true); 121 | DO_TEST(grammar, "A", false); 122 | DO_TEST(grammar, "d", false); 123 | } 124 | 125 | 126 | static void test_parse_range() { 127 | const auto grammar = range('0', '9'); 128 | DO_TEST(grammar, "0", true); 129 | DO_TEST(grammar, "5", true); 130 | DO_TEST(grammar, "9", true); 131 | DO_TEST(grammar, "a", false); 132 | DO_TEST(grammar, "b", false); 133 | } 134 | 135 | 136 | static void test_parse_any() { 137 | const auto grammar = any(); 138 | DO_TEST(grammar, "a", true); 139 | DO_TEST(grammar, "b", true); 140 | DO_TEST(grammar, "", false); 141 | } 142 | 143 | 144 | static void test_parse_end() { 145 | const auto grammar = end(); 146 | DO_TEST(grammar, "", true); 147 | DO_TEST(grammar, "b", false); 148 | } 149 | 150 | 151 | static void test_parse_bool() { 152 | DO_TEST(terminal('a') >> true, "a", true); 153 | DO_TEST(terminal('a') >> false, "a", false); 154 | } 155 | 156 | 157 | static void test_parse_newline() { 158 | const auto grammar = newline('\n'); 159 | DO_TEST(grammar, "\n", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_source_position().get_line() == 2; }); 160 | DO_TEST(grammar, "a", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_source_position().get_line() == 1; }); 161 | } 162 | 163 | 164 | static void test_parse_error() { 165 | { 166 | const auto grammar = terminal('a') >> ';' | error(1, skip_before(';')); 167 | DO_TEST(grammar, "a;", true); 168 | DO_TEST(grammar, "b;", true, _, { {0, 1, 1} }, [](const auto& pc) { return pc.get_parse_position().get_index() == 1; }); 169 | } 170 | { 171 | const auto grammar = terminal('a') >> ';' | error(1, skip_after(';')); 172 | DO_TEST(grammar, "a;", true); 173 | DO_TEST(grammar, "b;", true, _, { {0, 2, 1} }, [](const auto& pc) { return pc.get_parse_position().get_index() == 2; }); 174 | } 175 | } 176 | 177 | 178 | static void test_parse_loop_0() { 179 | const auto grammar = *terminal('a'); 180 | DO_TEST(grammar, "a", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 181 | DO_TEST(grammar, "aa", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 182 | DO_TEST(grammar, "aaa", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 183 | DO_TEST(grammar, "", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 184 | DO_TEST(grammar, "b", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 185 | } 186 | 187 | 188 | static void test_parse_loop_1() { 189 | const auto grammar = +terminal('a'); 190 | DO_TEST(grammar, "a", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 191 | DO_TEST(grammar, "aa", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 192 | DO_TEST(grammar, "aaa", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 193 | DO_TEST(grammar, "ab", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 1; }); 194 | DO_TEST(grammar, "aab", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 2; }); 195 | DO_TEST(grammar, "", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 196 | DO_TEST(grammar, "b", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 197 | } 198 | 199 | 200 | static void test_parse_optional() { 201 | const auto grammar = -terminal('a'); 202 | DO_TEST(grammar, "a", true, _, _, [](const auto& pc) { return pc.is_end_parse_position(); }); 203 | DO_TEST(grammar, "b", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 204 | } 205 | 206 | 207 | static void test_parse_logical_and() { 208 | const auto grammar = &terminal('a'); 209 | DO_TEST(grammar, "a", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 210 | DO_TEST(grammar, "b", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 211 | } 212 | 213 | 214 | static void test_parse_logical_not() { 215 | const auto grammar = !terminal('a'); 216 | DO_TEST(grammar, "a", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 217 | DO_TEST(grammar, "b", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 218 | } 219 | 220 | 221 | static void test_parse_sequence() { 222 | const auto grammar = terminal('a') >> 'b'; 223 | DO_TEST(grammar, "ab", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 2; }); 224 | DO_TEST(grammar, "ac", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 225 | } 226 | 227 | 228 | static void test_parse_choice() { 229 | const auto grammar = terminal('a') | 'b'; 230 | DO_TEST(grammar, "a", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 1; }); 231 | DO_TEST(grammar, "b", true, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 1; }); 232 | DO_TEST(grammar, "c", false, _, _, [](const auto& pc) { return pc.get_parse_position().get_index() == 0; }); 233 | } 234 | 235 | 236 | static void test_parse_match() { 237 | const auto grammar = +((terminal('a')->*1 >> terminal('b')->*2)->*4 | terminal('c')->*3); 238 | DO_TEST(grammar, "ab", true, { {0, 2, "ab", 4, {{0, 1, "a", 1}, {1, 2, "b", 2}}} }); 239 | DO_TEST(grammar, "c", true, { {0, 1, "c", 3} }); 240 | DO_TEST(grammar, "abc", true, { {0, 2, "ab", 4, {{0, 1, "a", 1}, {1, 2, "b", 2}}}, {2, 3, "c", 3} }); 241 | DO_TEST(grammar, "d", false); 242 | } 243 | 244 | 245 | static void test_parse_node_ptr() { 246 | const parse_node_ptr grammar = terminal('a'); 247 | DO_TEST(grammar, "a", true); 248 | DO_TEST(grammar, "b", false); 249 | } 250 | 251 | 252 | static void test_parse_rule() { 253 | const rule grammar = terminal('a'); 254 | DO_TEST(grammar, "a", true); 255 | DO_TEST(grammar, "b", false); 256 | } 257 | 258 | 259 | void run_tests() { 260 | test_parse_symbol(); 261 | test_parse_string(); 262 | test_parse_set(); 263 | test_parse_range(); 264 | test_parse_any(); 265 | test_parse_end(); 266 | test_parse_bool(); 267 | test_parse_newline(); 268 | test_parse_error(); 269 | test_parse_loop_0(); 270 | test_parse_loop_1(); 271 | test_parse_optional(); 272 | test_parse_logical_and(); 273 | test_parse_logical_not(); 274 | test_parse_sequence(); 275 | test_parse_choice(); 276 | test_parse_match(); 277 | test_parse_node_ptr(); 278 | test_parse_rule(); 279 | } 280 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /include/parserlib/source_range_parse_context.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSERLIB_SOURCE_RANGE_PARSE_CONTEXT_HPP 2 | #define PARSERLIB_SOURCE_RANGE_PARSE_CONTEXT_HPP 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include "parse_context.hpp" 9 | #include "match.hpp" 10 | #include "parse_error.hpp" 11 | #include "default_source_position.hpp" 12 | #include "default_symbol_comparator.hpp" 13 | 14 | 15 | namespace parserlib { 16 | 17 | 18 | /** 19 | * A parse context class used for parsing a source range. 20 | * @param Iterator iterator type. 21 | * @param MatchId match id type. 22 | * @param ErrorId error id type. 23 | * @param SourcePosition source position type. 24 | * @param SymbolComparator symbol comparator type. 25 | */ 26 | template < 27 | class Iterator = typename std::string::const_iterator, 28 | class MatchId = int, 29 | class ErrorId = int, 30 | class SourcePosition = default_source_position, 31 | class SymbolComparator = default_symbol_comparator 32 | > 33 | class source_range_parse_context : public parse_context { 34 | public: 35 | /** Iterator type. */ 36 | using iterator_type = Iterator; 37 | 38 | /** Match id type. */ 39 | using match_id_type = MatchId; 40 | 41 | /** Error id type. */ 42 | using error_id_type = ErrorId; 43 | 44 | /** Source position type. */ 45 | using source_position_type = SourcePosition; 46 | 47 | /** Symbol comparator type. */ 48 | using symbol_comparator_type = SymbolComparator; 49 | 50 | /** Parse position type. */ 51 | using parse_position_type = parse_position; 52 | 53 | /** Match type. */ 54 | using match_type = match; 55 | 56 | /** Match container type. */ 57 | using match_container_type = std::vector; 58 | 59 | /** Parse error type. */ 60 | using parse_error_type = parse_error; 61 | 62 | /** Parse error container type. */ 63 | using parse_error_container_type = std::vector; 64 | 65 | source_range_parse_context(const iterator_type& begin, const iterator_type& end) 66 | : m_state{ parse_position_type(begin), 0, end } 67 | , m_match_start_state{ parse_position_type(begin), 0 } 68 | , m_error_start_state{ parse_position_type(begin) } 69 | , m_initial_parse_node_state{ left_recursion_state{ end, left_recursion_state_type::no_left_recursion}, {}} 70 | , m_end_iterator(end) 71 | { 72 | } 73 | 74 | /** 75 | * Tests whether the end position is reached or not. 76 | * @return true if the end position is not reached, false otherwise. 77 | */ 78 | bool is_valid_parse_position() const final { 79 | return m_state.parse_position.get_iterator() != m_state.end; 80 | } 81 | 82 | /** 83 | * Increments the current parse position by one symbol. 84 | */ 85 | void increment_parse_position() final { 86 | m_state.parse_position.increment(); 87 | m_match_start_state.parse_position = m_state.parse_position; 88 | m_error_start_state.parse_position = m_state.parse_position; 89 | } 90 | 91 | /** 92 | * Increments the current parse position by a count. 93 | * @param count number of symbols to increment the parse position. 94 | */ 95 | void increment_parse_position(std::size_t count) final { 96 | m_state.parse_position.increment(count); 97 | m_match_start_state.parse_position = m_state.parse_position; 98 | m_error_start_state.parse_position = m_state.parse_position; 99 | } 100 | 101 | /** 102 | * Increments the line of the current parse position. 103 | */ 104 | void increment_parse_position_line() final { 105 | m_state.parse_position.increment_line(); 106 | } 107 | 108 | /** 109 | * Retrieves the current symbol, i.e. the symbol at the current parse position. 110 | * @return the current symbol. 111 | */ 112 | int get_current_symbol() const final { 113 | return static_cast(*m_state.parse_position.get_iterator()); 114 | } 115 | 116 | /** 117 | * Compares two symbols. 118 | * @param left the left symbol to compare. 119 | * @param right the right symbol to compare. 120 | * @return less than 0 if left < right, 0 if left == right, greater than 0 if left > right. 121 | */ 122 | int compare_symbols(int left, int right) const final { 123 | return symbol_comparator_type()(left, right); 124 | } 125 | 126 | /** 127 | * Parses a single symbol. 128 | * @param symbol symbol to compare. 129 | * @return true on success, false on failure. 130 | */ 131 | bool parse_symbol(int symbol) final { 132 | if (is_valid_parse_position()) { 133 | if (compare_symbols(get_current_symbol(), symbol) == 0) { 134 | increment_parse_position(); 135 | return true; 136 | } 137 | } 138 | return false; 139 | } 140 | 141 | /** 142 | * Parses the given symbol sequence. 143 | * @param sequence the sequence of symbols to compare. 144 | * @return true on success, false on failure. 145 | */ 146 | bool parse_symbol_sequence(const std::vector& sequence) final { 147 | if (is_valid_parse_position()) { 148 | auto itSequence = sequence.begin(); 149 | auto itSource = m_state.parse_position.get_iterator(); 150 | for (;;) { 151 | if (itSequence == sequence.end()) { 152 | increment_parse_position(sequence.size()); 153 | return true; 154 | } 155 | if (itSource == m_state.end || symbol_comparator_type()(*itSource, *itSequence)) { 156 | break; 157 | } 158 | ++itSequence; 159 | ++itSource; 160 | } 161 | } 162 | return false; 163 | } 164 | 165 | /** 166 | * Parses the symbol at the current parse position out of a set of symbols. 167 | * @param set the set of symbols to compare. 168 | * @return true if the current symbol is within the given set, false otherwise. 169 | */ 170 | bool parse_symbol_set(const std::vector& set) final { 171 | if (is_valid_parse_position()) { 172 | const int current_symbol = get_current_symbol(); 173 | for (const int symbol : set) { 174 | const int result = compare_symbols(current_symbol, symbol); 175 | if (!result) { 176 | increment_parse_position(); 177 | return true; 178 | } 179 | } 180 | } 181 | return false; 182 | } 183 | 184 | /** 185 | * Parses the symbol at the current parse position within a symbol range. 186 | * @param min min symbol value. 187 | * @param max max symbol value. 188 | * @return true if the current symbol is within the given range, false otherwise. 189 | */ 190 | bool parse_symbol_range(int min, int max) final { 191 | assert(min <= max); 192 | if (is_valid_parse_position()) { 193 | const int current_symbol = get_current_symbol(); 194 | if (compare_symbols(current_symbol, min) >= 0 && compare_symbols(current_symbol, max) <= 0) { 195 | increment_parse_position(); 196 | return true; 197 | } 198 | } 199 | return false; 200 | } 201 | 202 | /** 203 | * Parses any symbol, except if the end of input has been reached. 204 | * @return true if the end of input has not been reached, false otherwise. 205 | */ 206 | bool parse_any_symbol() final { 207 | if (is_valid_parse_position()) { 208 | increment_parse_position(); 209 | return true; 210 | } 211 | return false; 212 | } 213 | 214 | /** 215 | * Saves the current parse state into an internal stack. 216 | */ 217 | void save_parse_state() final { 218 | m_state_stack.push_back(m_state); 219 | } 220 | 221 | /** 222 | * Rejects the current parse state. 223 | * The parse state is restored from the last entry 224 | * in the internal parse state stack. 225 | */ 226 | void reject_parse_state() final { 227 | assert(!m_state_stack.empty()); 228 | m_state = m_state_stack.back(); 229 | m_state_stack.pop_back(); 230 | m_matches.resize(m_state.match_count); 231 | } 232 | 233 | /** 234 | * Accepts the current parse state by removing the last entry 235 | * saved in the internal parse state stack. 236 | */ 237 | void accept_parse_state() final { 238 | assert(!m_state_stack.empty()); 239 | m_state_stack.pop_back(); 240 | } 241 | 242 | /** 243 | * Saves the current match start state to an internal stack. 244 | * A match start state is not always the same as the current parse state, 245 | * due to left recursion. 246 | */ 247 | void save_match_start_state() final { 248 | m_match_start_state_stack.push_back(m_match_start_state); 249 | } 250 | 251 | /** 252 | * Restores the current match start state from the internal stack. 253 | */ 254 | void restore_match_start_state() final { 255 | assert(!m_match_start_state_stack.empty()); 256 | m_match_start_state = m_match_start_state_stack.back(); 257 | m_match_start_state_stack.pop_back(); 258 | } 259 | 260 | /** 261 | * Adds a match with the given id. 262 | * The source range for the match is from the current match start state to the current parse state. 263 | * @param id the id of the match. 264 | */ 265 | void add_match(int id) final { 266 | match_container_type children(m_matches.begin() + m_match_start_state.match_count, m_matches.end()); 267 | m_matches.resize(m_match_start_state.match_count); 268 | m_matches.push_back(match_type(static_cast(id), m_match_start_state.parse_position, m_state.parse_position, std::move(children))); 269 | m_state.match_count = m_matches.size(); 270 | m_match_start_state.parse_position = m_state.parse_position; 271 | m_match_start_state.match_count = m_matches.size(); 272 | } 273 | 274 | /** 275 | * Saves the current error start parse state into an internal stack. 276 | */ 277 | void save_error_start_state() final { 278 | m_error_start_state_stack.push_back(m_error_start_state); 279 | } 280 | 281 | /** 282 | * Restores the error start state from the internal stack. 283 | */ 284 | void restore_error_start_state() final { 285 | assert(!m_error_start_state_stack.empty()); 286 | m_error_start_state = m_error_start_state_stack.back(); 287 | m_error_start_state_stack.pop_back(); 288 | } 289 | 290 | /** 291 | * Adds an error with the given id. 292 | * The source range for the error is from the current error start state to the current parse state. 293 | * @param id the id of the error. 294 | */ 295 | void add_error(int id) final { 296 | m_errors.push_back(parse_error_type(static_cast(id), m_error_start_state.parse_position, m_state.parse_position)); 297 | m_error_start_state.parse_position = m_state.parse_position; 298 | } 299 | 300 | /** 301 | * Returns the current left recursion state for a parse node. 302 | * @param parse_node_id id of the parse node to get the left recursion state of. 303 | * @return a pair of: 304 | * - the left recursion state of the parse node. 305 | * - a flag which indicates if a parse node is left-recursive at the current parse state. 306 | */ 307 | std::pair get_left_recursion_state(parse_node_id_type parse_node_id) const final { 308 | parse_node_state& pns = get_parse_node_state(parse_node_id); 309 | return { pns.state.type, m_state.parse_position.get_iterator() == pns.state.iterator }; 310 | } 311 | 312 | /** 313 | * Saves the current left recursion state for the parse node with the given id 314 | * and sets its left recursion state type to 'no_left_recursion'. 315 | * @param parse_node_id id of the parse node to set the left recursion state of. 316 | */ 317 | void begin_no_left_recursion_state(uintptr_t parse_node_id) final { 318 | begin_left_recursion_state(parse_node_id, left_recursion_state_type::no_left_recursion); 319 | } 320 | 321 | /** 322 | * Saves the current left recursion state for the parse node with the given id 323 | * and sets its left recursion state type to 'reject_left_recursion'. 324 | * @param parse_node_id id of the parse node to set the left recursion state of. 325 | */ 326 | void begin_reject_left_recursion_state(uintptr_t parse_node_id) final { 327 | begin_left_recursion_state(parse_node_id, left_recursion_state_type::reject_left_recursion); 328 | } 329 | 330 | /** 331 | * Saves the current left recursion state for the parse node with the given id 332 | * and sets its left recursion state type to 'accept_left_recursion'. 333 | * @param parse_node_id id of the parse node to set the left recursion state of. 334 | */ 335 | void begin_accept_left_recursion_state(uintptr_t parse_node_id) final { 336 | begin_left_recursion_state(parse_node_id, left_recursion_state_type::accept_left_recursion); 337 | m_state.end = m_state.parse_position.get_iterator(); 338 | } 339 | 340 | /** 341 | * Sets the left recursion state type for a parse node to 'accepted_left_recursion'. 342 | * @param parse_node_id id of the parse node to set the left recursion state of. 343 | */ 344 | void accept_left_recursion_state(parse_node_id_type parse_node_id) final { 345 | parse_node_state& pns = get_parse_node_state(parse_node_id); 346 | assert(!pns.state_stack.empty()); 347 | pns.state_stack.back().type = left_recursion_state_type::accepted_left_recursion; 348 | m_state.end = m_end_iterator; 349 | } 350 | 351 | /** 352 | * Restores the current parse node state for left recursion, from the internal stack, 353 | * for the parse node with the given id. 354 | * @param parse_node_id id of the parse node to restore the left recursion state of. 355 | */ 356 | void restore_left_recursion_state(uintptr_t parse_node_id) final { 357 | parse_node_state& pns = get_parse_node_state(parse_node_id); 358 | assert(!pns.state_stack.empty()); 359 | pns.state = pns.state_stack.back(); 360 | pns.state_stack.pop_back(); 361 | } 362 | 363 | /** 364 | * Returns the current parse position. 365 | * @return the current parse position. 366 | */ 367 | const parse_position_type& get_parse_position() const { 368 | return m_state.parse_position; 369 | } 370 | 371 | /** 372 | * Returns the matches. 373 | * @return the matches. 374 | */ 375 | const match_container_type& get_matches() const { 376 | return m_matches; 377 | } 378 | 379 | /** 380 | * Returns the errors. 381 | * @return the errors. 382 | */ 383 | const parse_error_container_type& get_errors() const { 384 | return m_errors; 385 | } 386 | 387 | private: 388 | //parse state type 389 | struct state { 390 | parse_position_type parse_position; 391 | std::size_t match_count; 392 | iterator_type end; 393 | }; 394 | 395 | //parse state stack type 396 | using state_stack = std::vector; 397 | 398 | //match start state type 399 | struct match_start_state { 400 | parse_position_type parse_position; 401 | std::size_t match_count; 402 | }; 403 | 404 | //match start state stack type 405 | using match_start_state_stack = std::vector; 406 | 407 | //error start state type 408 | struct error_start_state { 409 | parse_position_type parse_position; 410 | }; 411 | 412 | //error start state stack 413 | using error_start_state_stack = std::vector; 414 | 415 | //left recursion state type 416 | struct left_recursion_state { 417 | iterator_type iterator; 418 | left_recursion_state_type type; 419 | }; 420 | 421 | //left recursion state stack 422 | using left_recursion_state_stack = std::vector; 423 | 424 | //parse node state for left recursion 425 | struct parse_node_state { 426 | left_recursion_state state; 427 | left_recursion_state_stack state_stack; 428 | }; 429 | 430 | //parse node state map 431 | using parse_node_state_map = std::map; 432 | 433 | //states 434 | state m_state; 435 | match_start_state m_match_start_state; 436 | error_start_state m_error_start_state; 437 | 438 | //state stacks 439 | state_stack m_state_stack; 440 | match_start_state_stack m_match_start_state_stack; 441 | error_start_state_stack m_error_start_state_stack; 442 | 443 | //state maps 444 | mutable parse_node_state_map m_parse_node_state_map; 445 | 446 | //output 447 | match_container_type m_matches; 448 | parse_error_container_type m_errors; 449 | 450 | //constants 451 | const parse_node_state m_initial_parse_node_state; 452 | const iterator_type m_end_iterator; 453 | 454 | //get the parse node state for the specific parse node id 455 | parse_node_state& get_parse_node_state(parse_node_id_type id) const { 456 | const auto [it, ok] = m_parse_node_state_map.insert(std::make_pair(id, m_initial_parse_node_state)); 457 | return it->second; 458 | } 459 | 460 | //start a lefft recursion state for the specific parse node id 461 | void begin_left_recursion_state(uintptr_t parse_node_id, left_recursion_state_type type) { 462 | parse_node_state& pns = get_parse_node_state(parse_node_id); 463 | pns.state_stack.push_back(pns.state); 464 | pns.state.iterator = m_state.parse_position.get_iterator(); 465 | pns.state.type = type; 466 | } 467 | }; 468 | 469 | 470 | } //namespace parserlib 471 | 472 | 473 | #endif //PARSERLIB_SOURCE_RANGE_PARSE_CONTEXT_HPP 474 | --------------------------------------------------------------------------------