├── EXPERIMENTAL ├── CREDITS ├── lib ├── lexertl14 │ ├── README.md │ └── include │ │ └── lexertl │ │ ├── observer_ptr.hpp │ │ ├── runtime_error.hpp │ │ ├── narrow.hpp │ │ ├── stream_num.hpp │ │ ├── enum_operator.hpp │ │ ├── enums.hpp │ │ ├── parser │ │ ├── tokeniser │ │ │ ├── fold4.inc │ │ │ ├── re_tokeniser_state.hpp │ │ │ └── re_token.hpp │ │ └── tree │ │ │ ├── iteration_node.hpp │ │ │ ├── selection_node.hpp │ │ │ ├── leaf_node.hpp │ │ │ ├── end_node.hpp │ │ │ ├── sequence_node.hpp │ │ │ └── node.hpp │ │ ├── char_traits.hpp │ │ ├── licence_1_0.txt │ │ ├── sm_traits.hpp │ │ ├── sm_to_csm.hpp │ │ ├── partition │ │ ├── charset.hpp │ │ └── equivset.hpp │ │ ├── internals.hpp │ │ ├── iterator.hpp │ │ ├── replace.hpp │ │ ├── memory_file.hpp │ │ ├── serialise.hpp │ │ └── match_results.hpp ├── parsertl14 │ ├── README.md │ └── include │ │ └── parsertl │ │ ├── ebnf.y │ │ ├── runtime_error.hpp │ │ ├── enums.hpp │ │ ├── narrow.hpp │ │ ├── dfa.hpp │ │ ├── nt_info.hpp │ │ ├── enum_operator.hpp │ │ ├── licence_1_0.txt │ │ ├── token.hpp │ │ ├── capture.hpp │ │ ├── match.hpp │ │ ├── parse.hpp │ │ ├── search_iterator.hpp │ │ ├── match_results.hpp │ │ ├── iterator.hpp │ │ ├── serialise.hpp │ │ ├── ebnf_tables.hpp │ │ ├── lookup.hpp │ │ └── state_machine.hpp └── parle │ ├── cvt.hpp │ └── lexer │ └── iterator.hpp ├── tests ├── lexer_003.json ├── lexer_flags.phpt ├── readBison.phpt ├── sigil_001.phpt ├── stack_001.phpt ├── reflection_001.phpt ├── lexer_001.phpt ├── lexer_position_tracking_002.phpt ├── lexer_002.phpt ├── lexer_005.phpt ├── lexer_006.phpt ├── sigil_002.phpt ├── words_001.phpt ├── reflection_002.phpt ├── words_002.phpt ├── words_003.phpt ├── lexer_004.phpt ├── calc_001.phpt ├── lexer_position_tracking_001.phpt ├── calc_002.phpt ├── calc_003.phpt ├── lexer_007.phpt └── lexer_003.phpt ├── .gitignore ├── config.m4 ├── config.w32 ├── LICENSE ├── INSTALL.md ├── bench ├── parse_str.php ├── phlexy_alike.php └── parse_str.impl.php ├── .github └── workflows │ └── main.yml ├── php_parle.h └── README.md /EXPERIMENTAL: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CREDITS: -------------------------------------------------------------------------------- 1 | parle 2 | Anatol Belski, Ben Hanson 3 | -------------------------------------------------------------------------------- /lib/lexertl14/README.md: -------------------------------------------------------------------------------- 1 | # lexertl14 2 | C++14 version of lexertl 3 | -------------------------------------------------------------------------------- /lib/parsertl14/README.md: -------------------------------------------------------------------------------- 1 | # parsertl14 2 | C++14 version of parsertl 3 | -------------------------------------------------------------------------------- /tests/lexer_003.json: -------------------------------------------------------------------------------- 1 | { 2 | "key": [ 3 | "qelque choose", 4 | 42, 5 | "füße" 6 | ], 7 | "obj": { 8 | "prop": 12 9 | }, 10 | "some": null 11 | } 12 | 13 | -------------------------------------------------------------------------------- /tests/lexer_flags.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lexer flags 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | flags = Lexer::DOT_NOT_LF | Lexer::DOT_NOT_CRLF; 12 | var_dump($lex->flags); 13 | $lex->flags |= Lexer::SKIP_WS; 14 | var_dump($lex->flags); 15 | 16 | ?> 17 | ==DONE== 18 | --EXPECTF-- 19 | int(6) 20 | int(14) 21 | ==DONE== 22 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/observer_ptr.hpp: -------------------------------------------------------------------------------- 1 | // observer_ptr.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | #ifndef LEXERTL_OBSERVER_PTR_HPP 8 | #define LEXERTL_OBSERVER_PTR_HPP 9 | 10 | namespace lexertl 11 | { 12 | template 13 | using observer_ptr = T*; 14 | } 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /tests/readBison.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | readBison() test 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | readBison("%%\n;start: 'a';%%\n"); 12 | $p->dump(); 13 | 14 | try 15 | { 16 | $p->readBison("@"); 17 | } 18 | catch (\Throwable $e) 19 | { 20 | echo $e->getMessage(), "\n"; 21 | } 22 | ?> 23 | ==DONE== 24 | --EXPECT-- 25 | %token 'a' 26 | %% 27 | 28 | start: 'a'; 29 | 30 | %% 31 | Syntax error on line 1: '@' 32 | ==DONE== 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .deps 2 | *.lo 3 | *.la 4 | .libs 5 | acinclude.m4 6 | aclocal.m4 7 | autom4te.cache 8 | build 9 | config.guess 10 | config.h 11 | config.h.in 12 | config.log 13 | config.nice 14 | config.status 15 | config.sub 16 | configure 17 | configure.in 18 | install-sh 19 | libtool 20 | ltmain.sh 21 | Makefile 22 | Makefile.fragments 23 | Makefile.global 24 | Makefile.objects 25 | missing 26 | mkinstalldirs 27 | modules 28 | run-tests.php 29 | tests/*.diff 30 | tests/*.out 31 | tests/*.php 32 | tests/*.exp 33 | tests/*.log 34 | tests/*.sh 35 | config.nice.bat 36 | configure.bat 37 | configure.js 38 | x64 39 | Release 40 | Release_TS 41 | Debug 42 | Debug_TS 43 | *.patch 44 | *.diff 45 | *.tmp 46 | *.swp 47 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/runtime_error.hpp: -------------------------------------------------------------------------------- 1 | // runtime_error.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_RUNTIME_ERROR_HPP 7 | #define LEXERTL_RUNTIME_ERROR_HPP 8 | 9 | #include 10 | 11 | namespace lexertl 12 | { 13 | class runtime_error : public std::runtime_error 14 | { 15 | public: 16 | runtime_error(const std::string& what_arg_) : 17 | std::runtime_error(what_arg_) 18 | { 19 | } 20 | }; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/ebnf.y: -------------------------------------------------------------------------------- 1 | /* Generate code using: bison -S parsertl.cc ebnf.y */ 2 | %token EMPTY IDENTIFIER PREC TERMINAL 3 | %% 4 | 5 | rule: rhs_or; 6 | 7 | rhs_or: opt_prec_list 8 | | rhs_or '|' opt_prec_list; 9 | 10 | opt_prec_list: opt_list opt_prec; 11 | 12 | opt_list: 13 | | EMPTY 14 | | rhs_list; 15 | 16 | rhs_list: rhs 17 | | rhs_list rhs; 18 | 19 | rhs: IDENTIFIER 20 | | TERMINAL 21 | | '[' rhs_or ']' 22 | | rhs '?' 23 | | '{' rhs_or '}' 24 | | rhs '*' 25 | | '{' rhs_or '}' '-' 26 | | rhs '+' 27 | | '(' rhs_or ')'; 28 | 29 | opt_prec: 30 | | PREC IDENTIFIER 31 | | PREC TERMINAL; 32 | 33 | %% 34 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/runtime_error.hpp: -------------------------------------------------------------------------------- 1 | // runtime_error.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_RUNTIME_ERROR_HPP 7 | #define PARSERTL_RUNTIME_ERROR_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | class runtime_error : public std::runtime_error 14 | { 15 | public: 16 | explicit runtime_error(const std::string& what_arg_) : 17 | std::runtime_error(what_arg_) 18 | { 19 | } 20 | }; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/enums.hpp: -------------------------------------------------------------------------------- 1 | // enums.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_ENUMS_HPP 7 | #define PARSERTL_ENUMS_HPP 8 | 9 | namespace parsertl 10 | { 11 | enum class rule_flags { enable_captures = 1 }; 12 | enum class action 13 | { 14 | error, 15 | shift, 16 | reduce, 17 | go_to, 18 | accept 19 | }; 20 | enum class error_type 21 | { 22 | syntax_error, 23 | non_associative, 24 | unknown_token 25 | }; 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/narrow.hpp: -------------------------------------------------------------------------------- 1 | // narrow.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_NARROW_HPP 7 | #define PARSERTL_NARROW_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | template 14 | void narrow(const char_type* str_, std::ostringstream& ss_) 15 | { 16 | while (*str_) 17 | { 18 | // Safe to simply cast to char when string only contains ASCII. 19 | ss_ << static_cast(*str_++); 20 | } 21 | } 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/narrow.hpp: -------------------------------------------------------------------------------- 1 | // narrow.hpp 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_NARROW_HPP 7 | #define LEXERTL_NARROW_HPP 8 | 9 | #include 10 | 11 | namespace lexertl 12 | { 13 | template 14 | void narrow(const char_type* str_, std::ostringstream& ss_) 15 | { 16 | while (*str_) 17 | { 18 | // Safe to simply cast to char. 19 | // when string only contains ASCII. 20 | ss_ << static_cast(*str_++); 21 | } 22 | } 23 | } 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/dfa.hpp: -------------------------------------------------------------------------------- 1 | // dfa.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_DFA_HPP 7 | #define PARSERTL_DFA_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | using size_t_pair = std::pair; 15 | using size_t_pair_vector = std::vector; 16 | 17 | struct dfa_state 18 | { 19 | size_t_pair_vector _basis; 20 | size_t_pair_vector _closure; 21 | size_t_pair_vector _transitions; 22 | }; 23 | 24 | // Must be deque due to iterator usage in basic_generator::build_dfa(). 25 | using dfa = std::deque; 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/nt_info.hpp: -------------------------------------------------------------------------------- 1 | // nt_info.hpp 2 | // Copyright (c) 2016-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_NT_INFO_HPP 7 | #define PARSERTL_NT_INFO_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | using char_vector = std::vector; 14 | 15 | struct nt_info 16 | { 17 | bool _nullable = false; 18 | char_vector _first_set; 19 | char_vector _follow_set; 20 | 21 | explicit nt_info(const std::size_t terminals_) : 22 | _first_set(terminals_, 0), 23 | _follow_set(terminals_, 0) 24 | { 25 | } 26 | }; 27 | 28 | using nt_info_vector = std::vector; 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /tests/sigil_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Test sigil methods 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push("start", "'a' B"); 14 | $b_idx = $p->push("B", "'b'"); 15 | $p->build(); 16 | 17 | $lex = new Lexer; 18 | $lex->push("a", $p->tokenId("'a'")); 19 | $lex->push("b", $p->tokenId("'b'")); 20 | $lex->push("\\s+", Token::SKIP); 21 | $lex->build(); 22 | 23 | $p->consume("a b", $lex); 24 | 25 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 26 | switch ($p->action) { 27 | case Parser::ACTION_REDUCE: 28 | echo $p->sigilName(($p->reduceId == $b_idx) ? 0 : 1) . "\n"; 29 | echo $p->sigilCount() . "\n"; 30 | break; 31 | } 32 | 33 | $p->advance(); 34 | } 35 | ?> 36 | ==DONE== 37 | --EXPECT-- 38 | 'b' 39 | 1 40 | B 41 | 2 42 | ==DONE== 43 | -------------------------------------------------------------------------------- /tests/stack_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Stack var_dump() 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push(1); 10 | $s->push(2); 11 | $s->push(3); 12 | var_dump($s); 13 | var_dump($s->empty, $s->size, $s->top); 14 | 15 | $s->pop(); 16 | var_dump($s); 17 | 18 | ?> 19 | ==DONE== 20 | --EXPECTF-- 21 | object(Parle\Stack)#%d (4) { 22 | ["empty"]=> 23 | bool(false) 24 | ["size"]=> 25 | int(3) 26 | ["top"]=> 27 | int(3) 28 | ["elements"]=> 29 | array(3) { 30 | [0]=> 31 | int(3) 32 | [1]=> 33 | int(2) 34 | [2]=> 35 | int(1) 36 | } 37 | } 38 | bool(false) 39 | int(3) 40 | int(3) 41 | object(Parle\Stack)#%d (4) { 42 | ["empty"]=> 43 | bool(false) 44 | ["size"]=> 45 | int(2) 46 | ["top"]=> 47 | int(2) 48 | ["elements"]=> 49 | array(2) { 50 | [0]=> 51 | int(2) 52 | [1]=> 53 | int(1) 54 | } 55 | } 56 | ==DONE== 57 | -------------------------------------------------------------------------------- /tests/reflection_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | return type in arg info 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | = 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType()); 10 | $r = new ReflectionMethod("Parle\\RLexer", "getToken"); 11 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType()); 12 | $r = new ReflectionMethod("Parle\\Parser", "errorInfo"); 13 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType()); 14 | $r = new ReflectionMethod("Parle\\RParser", "errorInfo"); 15 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType()); 16 | 17 | ?> 18 | ==DONE== 19 | --EXPECTF-- 20 | string(11) "Parle\Token" 21 | string(11) "Parle\Token" 22 | string(15) "Parle\ErrorInfo" 23 | string(15) "Parle\ErrorInfo" 24 | ==DONE== 25 | -------------------------------------------------------------------------------- /tests/lexer_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lex PHP var statement 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push("\$[a-z]{1,}[a-zA-Z0-9_]+", 1); 13 | $lex->push("=", 2); 14 | $lex->push("[0-9]+", 3); 15 | $lex->push(";", 4); 16 | 17 | $lex->build(); 18 | 19 | $s = "\$hello=42;"; 20 | $lex->consume($s); 21 | 22 | $lex->advance(); 23 | $tok = $lex->getToken(); 24 | 25 | while (Token::EOI != $tok->id) { 26 | var_dump($tok); 27 | $lex->advance(); 28 | $tok = $lex->getToken(); 29 | } 30 | 31 | ?> 32 | ==DONE== 33 | --EXPECTF-- 34 | object(Parle\Token)#%d (2) { 35 | ["id"]=> 36 | int(1) 37 | ["value"]=> 38 | string(6) "$hello" 39 | } 40 | object(Parle\Token)#%d (2) { 41 | ["id"]=> 42 | int(2) 43 | ["value"]=> 44 | string(1) "=" 45 | } 46 | object(Parle\Token)#%d (2) { 47 | ["id"]=> 48 | int(3) 49 | ["value"]=> 50 | string(2) "42" 51 | } 52 | object(Parle\Token)#%d (2) { 53 | ["id"]=> 54 | int(4) 55 | ["value"]=> 56 | string(1) ";" 57 | } 58 | ==DONE== 59 | -------------------------------------------------------------------------------- /tests/lexer_position_tracking_002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lex test line property 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push("[a-z]", 1); 13 | $lex->push("[\n]", 2); 14 | $lex->push("bc", 3); 15 | $lex->push("ij", 4); 16 | 17 | $lex->build(); 18 | 19 | $lines = array( 20 | "abc", 21 | "de", 22 | "f", 23 | "ghijk", 24 | "xyz", 25 | ); 26 | $s = implode("\n", $lines); 27 | //$s = "abc\nd\n\r\nf\nxyz"; 28 | $lex->consume($s); 29 | 30 | printf("L C M T\n"); 31 | do { 32 | $lex->advance(); 33 | $tok = $lex->getToken(); 34 | printf("%d %d %2d %s\n", $lex->line, $lex->column, $lex->marker, (2 == $tok->id ? ">LF<" : $tok->value)); 35 | } while (Token::EOI != $tok->id); 36 | 37 | ?> 38 | ==DONE== 39 | --EXPECTF-- 40 | L C M T 41 | 0 0 0 a 42 | 0 1 1 bc 43 | 0 3 3 >LF< 44 | 1 0 4 d 45 | 1 1 5 e 46 | 1 2 6 >LF< 47 | 2 0 7 f 48 | 2 1 8 >LF< 49 | 3 0 9 g 50 | 3 1 10 h 51 | 3 2 11 ij 52 | 3 4 13 k 53 | 3 5 14 >LF< 54 | 4 0 15 x 55 | 4 1 16 y 56 | 4 2 17 z 57 | 4 3 18 58 | ==DONE== 59 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/stream_num.hpp: -------------------------------------------------------------------------------- 1 | // stream_num.hpp 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_STREAM_NUM_HPP 7 | #define LEXERTL_STREAM_NUM_HPP 8 | 9 | #include 10 | 11 | namespace lexertl 12 | { 13 | template 14 | void stream_num(const T num_, std::ostream& ss_) 15 | { 16 | ss_ << num_; 17 | } 18 | 19 | template 20 | void stream_num(const T num_, std::wostream& ss_) 21 | { 22 | ss_ << num_; 23 | } 24 | 25 | // MSVC doesn't support streaming integers etc to 26 | // std::basic_ostringstream. 27 | template 28 | void stream_num(const T num_, std::basic_ostream& ss_) 29 | { 30 | std::stringstream css_; 31 | std::string count_; 32 | 33 | css_ << num_; 34 | count_ = css_.str(); 35 | 36 | for (char c_ : count_) 37 | { 38 | ss_ << static_cast(c_); 39 | } 40 | } 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /config.m4: -------------------------------------------------------------------------------- 1 | dnl $Id$ 2 | dnl config.m4 for extension parle 3 | 4 | PHP_ARG_ENABLE(parle, whether to enable parle support, 5 | [ --enable-parle Enable lexer/parser support]) 6 | PHP_ARG_ENABLE(parle-utf32, whether to enable internal UTF-32 support in parle, 7 | [ --enable-parle-utf32 Enable internal UTF-32 support for lexer/parser], no, no) 8 | 9 | if test "$PHP_PARLE" != "no"; then 10 | PHP_REQUIRE_CXX() 11 | 12 | AC_DEFINE(HAVE_PARLE,1,[ ]) 13 | PHP_SUBST(PARLE_SHARED_LIBADD) 14 | 15 | PHP_NEW_EXTENSION(parle, parle.cpp, $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 -std=c++14, cxx) 16 | 17 | PHP_ADD_INCLUDE($ext_srcdir/lib/lexertl14) 18 | PHP_ADD_INCLUDE($ext_builddir/lib/lexertl14) 19 | PHP_ADD_INCLUDE($ext_srcdir/lib/parsertl14) 20 | PHP_ADD_INCLUDE($ext_builddir/lib/parsertl14) 21 | PHP_ADD_INCLUDE($ext_srcdir/lib/parle) 22 | PHP_ADD_INCLUDE($ext_builddir/lib/parle) 23 | PHP_ADD_INCLUDE($ext_srcdir/lib) 24 | PHP_ADD_INCLUDE($ext_builddir/lib) 25 | 26 | if test "$PHP_PARLE_UTF32" != "no"; then 27 | AC_DEFINE(HAVE_PARLE_UTF32,1,[ ]) 28 | fi 29 | dnl PHP_INSTALL_HEADERS([ext/parle/php_parle.h]) 30 | fi 31 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/enum_operator.hpp: -------------------------------------------------------------------------------- 1 | // enum_operator.hpp 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_ENUM_OPERATOR_HPP 7 | #define LEXERTL_ENUM_OPERATOR_HPP 8 | 9 | #include 10 | 11 | namespace lexertl 12 | { 13 | // Operator to convert enum class to underlying type (usually int) 14 | // Example: 15 | // enum class number { one = 1, two, three }; 16 | // int num = *number::two; 17 | template 18 | auto operator*(T e) noexcept -> 19 | std::enable_if_t::value, uint16_t> 20 | { 21 | return static_cast(e); 22 | } 23 | 24 | // This is the compile time version of the above operator 25 | // (e.g. Setting a C style array size using an enum) 26 | template 27 | constexpr auto operator+(T e) noexcept -> 28 | std::enable_if_t::value, uint16_t> 29 | { 30 | return static_cast(e); 31 | } 32 | } 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/enum_operator.hpp: -------------------------------------------------------------------------------- 1 | // enum_operator.hpp 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_ENUM_OPERATOR_HPP 7 | #define PARSERTL_ENUM_OPERATOR_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | // Operator to convert enum class to underlying type (usually int) 14 | // Example: 15 | // enum class number { one = 1, two, three }; 16 | // int num = *number::two; 17 | template 18 | auto operator*(T e) noexcept -> 19 | std::enable_if_t::value, uint16_t> 20 | { 21 | return static_cast(e); 22 | } 23 | 24 | // This is the compile time version of the above operator 25 | // (e.g. Setting a C style array size using an enum) 26 | template 27 | constexpr auto operator+(T e) noexcept -> 28 | std::enable_if_t::value, uint16_t> 29 | { 30 | return static_cast(e); 31 | } 32 | } 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/enums.hpp: -------------------------------------------------------------------------------- 1 | // enums.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | #ifndef LEXERTL_ENUMS_HPP 8 | #define LEXERTL_ENUMS_HPP 9 | 10 | namespace lexertl 11 | { 12 | enum class regex_flags 13 | { 14 | icase = 1, dot_not_newline = 2, dot_not_cr_lf = 4, 15 | skip_ws = 8, match_zero_len = 16 16 | }; 17 | // 0 = end_state, 1 = id, 2 = user_id, 3 = push_dfa 18 | // 4 = next_dfa, 5 = dead_state, 6 = dfa start 19 | enum class state_index 20 | { 21 | end_state, id, user_id, push_dfa, 22 | next_dfa, eol, dead_state, transitions 23 | }; 24 | // Rule flags: 25 | enum class feature_bit 26 | { 27 | bol = 1, eol = 2, skip = 4, again = 8, 28 | multi_state = 16, recursive = 32, advance = 64 29 | }; 30 | // End state flags: 31 | enum class state_bit 32 | { 33 | end_state = 1, greedy = 2, pop_dfa = 4 34 | }; 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tokeniser/fold4.inc: -------------------------------------------------------------------------------- 1 | {{0x10400, 0x10427}, {0x10428, 0x1044f}}, 2 | {{0x10428, 0x1044f}, {0x10400, 0x10427}}, 3 | {{0x104b0, 0x104d3}, {0x104d8, 0x104fb}}, 4 | {{0x104d8, 0x104fb}, {0x104b0, 0x104d3}}, 5 | {{0x10570, 0x1057a}, {0x10597, 0x105a1}}, 6 | {{0x1057c, 0x1058a}, {0x105a3, 0x105b1}}, 7 | {{0x1058c, 0x10592}, {0x105b3, 0x105b9}}, 8 | {{0x10594, 0x10595}, {0x105bb, 0x105bc}}, 9 | {{0x10597, 0x105a1}, {0x10570, 0x1057a}}, 10 | {{0x105a3, 0x105b1}, {0x1057c, 0x1058a}}, 11 | {{0x105b3, 0x105b9}, {0x1058c, 0x10592}}, 12 | {{0x105bb, 0x105bc}, {0x10594, 0x10595}}, 13 | {{0x10c80, 0x10cb2}, {0x10cc0, 0x10cf2}}, 14 | {{0x10cc0, 0x10cf2}, {0x10c80, 0x10cb2}}, 15 | {{0x118a0, 0x118bf}, {0x118c0, 0x118df}}, 16 | {{0x118c0, 0x118df}, {0x118a0, 0x118bf}}, 17 | {{0x16e40, 0x16e5f}, {0x16e60, 0x16e7f}}, 18 | {{0x16e60, 0x16e7f}, {0x16e40, 0x16e5f}}, 19 | {{0x1e900, 0x1e921}, {0x1e922, 0x1e943}}, 20 | {{0x1e922, 0x1e943}, {0x1e900, 0x1e921}}, 21 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/char_traits.hpp: -------------------------------------------------------------------------------- 1 | // char_traits.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | #ifndef LEXERTL_CHAR_TRAITS_HPP 8 | #define LEXERTL_CHAR_TRAITS_HPP 9 | 10 | #include 11 | 12 | namespace lexertl 13 | { 14 | template 15 | struct basic_char_traits 16 | { 17 | using char_type = ch_type; 18 | using index_type = ch_type; 19 | 20 | static index_type max_val() 21 | { 22 | const std::uint32_t max_ = 0x10ffff; 23 | 24 | return sizeof(char_type) > 2 ? 25 | max_ : (max_ & 0xffff); 26 | } 27 | }; 28 | 29 | template<> 30 | struct basic_char_traits 31 | { 32 | using char_type = char; 33 | using index_type = unsigned char; 34 | 35 | static index_type max_val() 36 | { 37 | // Prevent annoying warning (VC++) 38 | index_type zero_ = 0; 39 | 40 | return ~zero_; 41 | } 42 | }; 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /tests/lexer_002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lex various number formats 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push("0b[01]+", 1); 14 | $lex->push("0[0-7]*", 2); 15 | $lex->push("[1-9][0-9]*", 3); 16 | $lex->push("0x[0-9a-fA-F]+", 4); 17 | 18 | $lex->build(); 19 | 20 | $nums = array( 21 | "0x42 0b010101 075 24", 22 | ); 23 | 24 | foreach ($nums as $in) { 25 | 26 | $lex->consume($in); 27 | 28 | $lex->advance(); 29 | $tok = $lex->getToken(); 30 | 31 | while (Token::EOI != $tok->id) { 32 | if ($tok->id != Token::UNKNOWN) { 33 | var_dump($tok); 34 | } 35 | $lex->advance(); 36 | $tok = $lex->getToken(); 37 | } 38 | } 39 | 40 | ?> 41 | ==DONE== 42 | --EXPECTF-- 43 | object(Parle\Token)#%d (2) { 44 | ["id"]=> 45 | int(4) 46 | ["value"]=> 47 | string(4) "0x42" 48 | } 49 | object(Parle\Token)#%d (2) { 50 | ["id"]=> 51 | int(1) 52 | ["value"]=> 53 | string(8) "0b010101" 54 | } 55 | object(Parle\Token)#%d (2) { 56 | ["id"]=> 57 | int(2) 58 | ["value"]=> 59 | string(3) "075" 60 | } 61 | object(Parle\Token)#%d (2) { 62 | ["id"]=> 63 | int(3) 64 | ["value"]=> 65 | string(2) "24" 66 | } 67 | ==DONE== 68 | -------------------------------------------------------------------------------- /config.w32: -------------------------------------------------------------------------------- 1 | // $Id$ 2 | // vim:ft=javascript 3 | 4 | ARG_ENABLE("parle", "Enable lexer/parser support", "no"); 5 | ARG_ENABLE("parle-utf32", "Enable internal UTF-32 support for lexer/parser", "no"); 6 | 7 | if (PHP_PARLE != "no") { 8 | var parle_lib_path = configure_module_dirname + "\\lib"; 9 | if (CHECK_HEADER_ADD_INCLUDE("include/lexertl/generator.hpp", "CFLAGS_PARLE", PHP_PARLE + ";" + parle_lib_path + "\\lexertl14") && 10 | CHECK_HEADER_ADD_INCLUDE("include/parsertl/generator.hpp", "CFLAGS_PARLE", PHP_PARLE + ";" + parle_lib_path + "\\parsertl14") && 11 | CHECK_HEADER_ADD_INCLUDE("parle/lexer/iterator.hpp", "CFLAGS_PARLE", PHP_PARLE + ";" + parle_lib_path)) { 12 | EXTENSION("parle", "parle.cpp", PHP_PARLE_SHARED, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); 13 | ADD_FLAG("CFLAGS_PARLE", " /I " + parle_lib_path + "\\lexertl14 /I " + parle_lib_path + "\\parsertl14 /EHsc -std:c++14"); 14 | ADD_FLAG("CFLAGS_BD_EXT_PARLE", ' /D ZEND_WIN32_KEEP_INLINE=1 /U ZEND_WIN32_FORCE_INLINE '); 15 | /*PHP_INSTALL_HEADERS("ext/parle", "php_parle.h");*/ 16 | AC_DEFINE("HAVE_PARLE", 1, "Have parle extension"); 17 | if (PHP_PARLE_UTF32 != "no") { 18 | AC_DEFINE("HAVE_PARLE_UTF32", 1, "Have internal UTF-32 support in parle"); 19 | ADD_FLAG("CFLAGS_PARLE", " /D HAVE_PARLE_UTF32=1"); 20 | } 21 | } else { 22 | WARNING("parle not enabled; libraries and headers not found"); 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Anatol Belski 2 | All rights reserved. 3 | 4 | Author: Anatol Belski 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 | SUCH DAMAGE. 26 | 27 | -------------------------------------------------------------------------------- /tests/lexer_005.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lexer marker and cursor 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push("\$[a-zA-Z_][a-zA-Z0-9_]*", 1); 14 | $lex->push("=", 2); 15 | $lex->push("\d+", 3); 16 | $lex->push(";", 4); 17 | 18 | $lex->build(); 19 | 20 | $s = '$x = 42;' . "\n" . '$y;'; 21 | $lex->consume($s); 22 | 23 | echo "marker: ", $lex->marker, ", cursor: ", $lex->cursor, "\n"; 24 | do { 25 | $lex->advance(); 26 | $tok = $lex->getToken(); 27 | echo "marker: ", $lex->marker, ", cursor: ", $lex->cursor, ", token: '", $tok->value, "'\n"; 28 | } while (Token::EOI != $tok->id || $lex->bol); 29 | 30 | $len = strlen($s); 31 | if ($lex->cursor == $len) { 32 | echo "End of input at ", $len, "\n"; 33 | } else { 34 | echo "End of input should be at ", $len, ", but the cursor is at ", $lex->cursor, "\n"; 35 | } 36 | ?> 37 | ==DONE== 38 | --EXPECTF-- 39 | marker: 0, cursor: 0 40 | marker: 0, cursor: 2, token: '$x' 41 | marker: 2, cursor: 3, token: ' ' 42 | marker: 3, cursor: 4, token: '=' 43 | marker: 4, cursor: 5, token: ' ' 44 | marker: 5, cursor: 7, token: '42' 45 | marker: 7, cursor: 8, token: ';' 46 | marker: 8, cursor: 9, token: ' 47 | ' 48 | marker: 9, cursor: 11, token: '$y' 49 | marker: 11, cursor: 12, token: ';' 50 | marker: 12, cursor: 12, token: '' 51 | End of input at 12 52 | ==DONE== 53 | -------------------------------------------------------------------------------- /tests/lexer_006.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lexer token callback 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | push("\$[a-zA-Z_][a-zA-Z0-9_]*", 1); 15 | $lex->push("=", 2); 16 | $lex->push("\d+", 3); 17 | $lex->push(";", 4); 18 | $lex->push("[ ]", 42); 19 | $lex->callout(42, function () use ($in, $lex) { 20 | $tok = $lex->getToken(); 21 | echo "Custom handler called, token ", $tok->id, " won't return\n"; 22 | $i = $lex->cursor; 23 | while (" " == $in[$i]) $i++; 24 | $lex->reset($i); 25 | $lex->advance(); 26 | }); 27 | $f = function () use ($in, $lex) 28 | { 29 | $tok = $lex->getToken(); 30 | echo "Custom handler called, token ", $tok->id, " won't return\n"; 31 | $i = $lex->cursor; 32 | while ("\n" == $in[$i]) $i++; 33 | $lex->reset($i); 34 | $lex->advance(); 35 | }; 36 | $lex->push("[\n]", 24); 37 | $lex->callout(24, $f); 38 | 39 | $lex->build(); 40 | 41 | $lex->consume($in); 42 | 43 | do { 44 | $lex->advance(); 45 | $tok = $lex->getToken(); 46 | echo $tok->id, "\n"; 47 | } while (Token::EOI != $tok->id); 48 | 49 | ?> 50 | ==DONE== 51 | --EXPECT-- 52 | 1 53 | Custom handler called, token 42 won't return 54 | 2 55 | Custom handler called, token 42 won't return 56 | 3 57 | 4 58 | Custom handler called, token 24 won't return 59 | 1 60 | 4 61 | 0 62 | ==DONE== 63 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/licence_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | 25 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/licence_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | 25 | -------------------------------------------------------------------------------- /tests/sigil_002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Test sigil exceptions 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | sigilName(0); 17 | } catch (\Throwable $e) { 18 | echo $e->getMessage(), "\n"; 19 | } 20 | 21 | try { 22 | $p = new Parser; 23 | $p->sigil(3); 24 | } catch (\Throwable $e) { 25 | echo $e->getMessage(), "\n"; 26 | } 27 | 28 | try { 29 | $p = new RParser; 30 | $p->sigilCount(); 31 | } catch (\Throwable $e) { 32 | echo $e->getMessage(), "\n"; 33 | } 34 | 35 | $p = new RParser; 36 | $p->push("start", "'a'"); 37 | $p->build(); 38 | 39 | $lex = new RLexer; 40 | $lex->push("a", $p->tokenId("'a'")); 41 | $lex->push("\\s+", Token::SKIP); 42 | $lex->build(); 43 | 44 | $p->consume("a", $lex); 45 | 46 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 47 | switch ($p->action) { 48 | case Parser::ACTION_REDUCE: 49 | // throw here 50 | echo $p->sigilName(42); 51 | break; 52 | } 53 | 54 | $p->advance(); 55 | } 56 | ?> 57 | --EXPECTF-- 58 | Not in a reduce state! 59 | Not in a reduce state! 60 | Not in a reduce state! 61 | 62 | Fatal error: Uncaught Parle\ParserException: Invalid index 42 in %ssigil_002.php:%d 63 | Stack trace: 64 | #0 %ssigil_002.php(45): Parle\RParser->sigilName(42) 65 | #1 {main} 66 | thrown in %ssigil_002.php on line %d 67 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | INSTALLATION 2 | ============ 3 | 4 | # Pre-requisites 5 | 6 | * PHP version 7.4 and above. 7 | * A [C++14](http://en.cppreference.com/w/cpp/compiler_support) capable compiler is required. At least clang-5.0, GCC 5.0 and VS2015 are known to successfully build the extension. 8 | 9 | 10 | # Binary packages 11 | 12 | ## Windows 13 | 14 | DLL for Windows can be downloaded for the [PECL page](https://pecl.php.net/package/parle). 15 | 16 | If no DLL is available or there's another reason to build, please follow the [wiki](https://wiki.php.net/internals/windows/stepbystepbuild_sdk_2#building_pecl_extensions) 17 | instructions on how to setup the [php-sdk](https://github.com/php/php-sdk-binary-tools) and build an extension. 18 | 19 | ## RPM 20 | 21 | RPM for Fedora, RHEL and CentOS can be installed from the [Remi repository](https://rpms.remirepo.net/). 22 | 23 | 24 | # Building from sources 25 | 26 | ## From PECL 27 | 28 | Released versions can be installed using the ```pecl``` command: 29 | 30 | ``` 31 | pecl install parle-beta 32 | ``` 33 | 34 | By default, `pecl` will ask about enabling the UTF-32 support. For an unattended installation, the below can be considered: 35 | 36 | ``` 37 | echo | pecl install parle-beta 38 | ``` 39 | 40 | In this case, any package related question will be answered automatically with their default values. 41 | 42 | ## From git 43 | 44 | Using a clone of this repository to retrieve latest developement sources: 45 | 46 | ``` 47 | git clone https://github.com/weltling/parle.git 48 | cd parle 49 | phpize 50 | ./configure 51 | make 52 | ``` 53 | -------------------------------------------------------------------------------- /tests/words_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Parse words from a string 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | token("WORD"); 14 | $p->push("start", "sentence"); 15 | $p->push("sentence", "words"); 16 | $word_idx = $p->push("words", "WORD"); 17 | $words_idx = $p->push("words", "words WORD"); 18 | $p->build(); 19 | 20 | $lex = new Lexer; 21 | $lex->push("[^[:blank:][:punct:]]+", $p->tokenId("WORD")); 22 | $lex->push(".", Token::SKIP); 23 | $lex->build(); 24 | 25 | 26 | $words = array( 27 | "Sah ein Knab' ein Röslein stehn", 28 | "Но, чтобы стоять, я должен держаться корней.", 29 | "Homines sumus nun dei.", 30 | ); 31 | 32 | foreach ($words as $in) { 33 | $p->consume($in, $lex); 34 | $out = array(); 35 | 36 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 37 | switch ($p->action) { 38 | case Parser::ACTION_ERROR: 39 | throw new ParserException("Parser error"); 40 | break; 41 | case Parser::ACTION_REDUCE: 42 | switch ($p->reduceId) 43 | { 44 | case $word_idx: 45 | $out[] = $p->sigil(0); 46 | break; 47 | case $words_idx: 48 | $out[] = $p->sigil(1); 49 | break; 50 | } 51 | } 52 | 53 | $p->advance(); 54 | } 55 | 56 | var_dump(implode(" ", $out)); 57 | } 58 | 59 | ?> 60 | ==DONE== 61 | --EXPECT-- 62 | string(31) "Sah ein Knab ein Röslein stehn" 63 | string(76) "Но чтобы стоять я должен держаться корней" 64 | string(21) "Homines sumus nun dei" 65 | ==DONE== 66 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/sm_traits.hpp: -------------------------------------------------------------------------------- 1 | // sm_traits.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | #ifndef LEXERTL_SM_TRAITS_HPP 8 | #define LEXERTL_SM_TRAITS_HPP 9 | 10 | namespace lexertl 11 | { 12 | template 14 | struct basic_sm_traits 15 | { 16 | enum 17 | { 18 | char_24_bit = sizeof(ch_type) > 2, compressed = comp, lookup = look, 19 | is_dfa = dfa_nfa 20 | }; 21 | using input_char_type = ch_type; 22 | using char_type = ch_type; 23 | using id_type = sm_type; 24 | 25 | static id_type npos() 26 | { 27 | return static_cast(~0); 28 | } 29 | }; 30 | 31 | template 32 | struct basic_sm_traits 33 | { 34 | enum 35 | { 36 | char_24_bit = sizeof(ch_type) > 2, compressed = true, lookup = look, 37 | is_dfa = dfa_nfa 38 | }; 39 | using input_char_type = ch_type; 40 | using char_type = unsigned char; 41 | using id_type = sm_type; 42 | 43 | static id_type npos() 44 | { 45 | return static_cast(~0); 46 | } 47 | }; 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/token.hpp: -------------------------------------------------------------------------------- 1 | // token.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_TOKEN_HPP 7 | #define PARSERTL_TOKEN_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | template 15 | struct token 16 | { 17 | using char_type = typename iterator::value_type::char_type; 18 | using iter_type = typename iterator::value_type::iter_type; 19 | using string = std::basic_string; 20 | using token_vector = std::vector>; 21 | std::size_t id = static_cast(~0); 22 | iter_type first = iter_type(); 23 | iter_type second = iter_type(); 24 | 25 | token() = default; 26 | 27 | token(const std::size_t id_, const iter_type& first_, 28 | const iter_type& second_) : 29 | id(id_), 30 | first(first_), 31 | second(second_) 32 | { 33 | } 34 | 35 | string str() const 36 | { 37 | return string(first, second); 38 | } 39 | 40 | string substr(const std::size_t soffset_, 41 | const std::size_t eoffset_) const 42 | { 43 | return string(first + soffset_, second - eoffset_); 44 | } 45 | 46 | std::size_t length() const 47 | { 48 | return second - first; 49 | } 50 | }; 51 | } 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /tests/reflection_002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Test lexer/parser argument checking 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | token("INTEGER"); 20 | $p->push("start", "exp"); 21 | $int_idx = $p->push("exp", "INTEGER"); 22 | $p->build(); 23 | 24 | $lex = new RLexer; 25 | $lex->push("\\d+", $p->tokenId("INTEGER")); 26 | $lex->build(); 27 | 28 | 29 | } catch (\Throwable $e) { 30 | echo $e->getMessage(), PHP_EOL; 31 | } 32 | 33 | try { 34 | $p->validate($in, $lex); 35 | } catch (\Throwable $e) { 36 | echo $e->getMessage(), PHP_EOL; 37 | } 38 | 39 | try { 40 | $p->consume($in, $lex); 41 | } catch (\Throwable $e) { 42 | echo $e->getMessage(), PHP_EOL; 43 | } 44 | 45 | 46 | // variation 1 47 | try { 48 | $p = new RParser; 49 | $p->token("INTEGER"); 50 | $p->push("start", "exp"); 51 | $int_idx = $p->push("exp", "INTEGER"); 52 | $p->build(); 53 | 54 | $lex = new Lexer; 55 | $lex->push("\\d+", $p->tokenId("INTEGER")); 56 | $lex->build(); 57 | 58 | 59 | } catch (\Throwable $e) { 60 | echo $e->getMessage(), PHP_EOL; 61 | } 62 | 63 | try { 64 | $p->validate($in, $lex); 65 | } catch (\Throwable $e) { 66 | echo $e->getMessage(), PHP_EOL; 67 | } 68 | 69 | try { 70 | $p->consume($in, $lex); 71 | } catch (\Throwable $e) { 72 | echo $e->getMessage(), PHP_EOL; 73 | } 74 | 75 | ?> 76 | ==DONE== 77 | --EXPECTF-- 78 | %s\Parser::validate()%s Parle\RLexer given 79 | %s\Parser::consume()%s Parle\RLexer given 80 | %s\RParser::validate()%s Parle\Lexer given 81 | %s\RParser::consume()%s Parle\Lexer given 82 | ==DONE== 83 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/capture.hpp: -------------------------------------------------------------------------------- 1 | // capture.hpp 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_CAPTURE_HPP 7 | #define PARSERTL_CAPTURE_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | template 15 | struct capture 16 | { 17 | using iter_type = iterator; 18 | using char_type = typename std::iterator_traits::value_type; 19 | using string = std::basic_string; 20 | 21 | iterator first = iterator(); 22 | iterator second = iterator(); 23 | 24 | capture() = default; 25 | 26 | capture(const iterator& first_, 27 | const iterator& second_) : 28 | first(first_), 29 | second(second_) 30 | { 31 | } 32 | 33 | bool operator==(const capture& rhs_) const 34 | { 35 | return first == rhs_.first && 36 | second == rhs_.second; 37 | } 38 | 39 | bool empty() const 40 | { 41 | return first == second; 42 | } 43 | 44 | string str() const 45 | { 46 | return string(first, second); 47 | } 48 | 49 | string substr(const std::size_t soffset_, 50 | const std::size_t eoffset_) const 51 | { 52 | return string(first + soffset_, second - eoffset_); 53 | } 54 | 55 | std::size_t length() const 56 | { 57 | return second - first; 58 | } 59 | }; 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /tests/words_002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Parse words from a string, UTF-8 regex 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | token("WORD"); 17 | $p->push("start", "sentence"); 18 | $p->push("sentence", "words"); 19 | $word_idx = $p->push("words", "WORD"); 20 | $words_idx = $p->push("words", "words WORD"); 21 | $p->build(); 22 | 23 | $lex = new Lexer; 24 | $lex->push("[ -\\x7f]{+}[\\x80-\\xbf]{+}[\\xc2-\\xdf]{+}[\\xe0-\\xef]{+}[\\xf0-\\xff]+", $p->tokenId("WORD")); 25 | $lex->push("\\s+", Token::SKIP); 26 | $lex->build(); 27 | 28 | /* UTF-8 */ 29 | $words = array( 30 | "füße абракадабра 芬蘭", 31 | "Sah ein Knab' ein Röslein stehn", 32 | "Но, чтобы стоять, я должен держаться корней.", 33 | "Homines sumus nun dei.", 34 | "français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe", 35 | ); 36 | 37 | foreach ($words as $in) { 38 | $p->consume($in, $lex); 39 | $out = array(); 40 | 41 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 42 | switch ($p->action) { 43 | case Parser::ACTION_ERROR: 44 | throw new ParserException("Parser error"); 45 | break; 46 | case Parser::ACTION_REDUCE: 47 | switch ($p->reduceId) 48 | { 49 | case $word_idx: 50 | $out[] = $p->sigil(0); 51 | break; 52 | case $words_idx: 53 | $out[] = $p->sigil(1); 54 | break; 55 | } 56 | } 57 | 58 | $p->advance(); 59 | } 60 | 61 | var_dump(implode(" ", $out)); 62 | } 63 | 64 | ?> 65 | ==DONE== 66 | --EXPECT-- 67 | string(36) "füße абракадабра 芬蘭" 68 | string(32) "Sah ein Knab' ein Röslein stehn" 69 | string(79) "Но, чтобы стоять, я должен держаться корней." 70 | string(22) "Homines sumus nun dei." 71 | string(87) "français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe" 72 | ==DONE== 73 | -------------------------------------------------------------------------------- /tests/words_003.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Parse words from a string, UTF-8 regex 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | token("WORD"); 17 | $p->push("start", "sentence"); 18 | $p->push("sentence", "words"); 19 | $words_idx = $p->push("words", "words WORD"); 20 | $word_idx = $p->push("words", "WORD"); 21 | $p->build(); 22 | 23 | $lex = new Lexer; 24 | //$lex->push("[ -\\x10ffff]+", $p->tokenId("WORD")); 25 | $lex->push("[\p{L}\p{P}\p{InCJK_Unified_Ideographs}]+", $p->tokenId("WORD")); 26 | $lex->push(".", Token::SKIP); 27 | $lex->build(); 28 | 29 | /* UTF-8 */ 30 | $words = array( 31 | "füße абракадабра 芬蘭", 32 | "Sah ein Knab' ein Röslein stehn", 33 | "Но, чтобы стоять, я должен держаться корней.", 34 | "Homines sumus nun dei.", 35 | "français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe", 36 | ); 37 | 38 | foreach ($words as $in) { 39 | $p->consume($in, $lex); 40 | $out = array(); 41 | 42 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 43 | switch ($p->action) { 44 | case Parser::ACTION_ERROR: 45 | throw new ParserException("Parser error"); 46 | break; 47 | case Parser::ACTION_REDUCE: 48 | switch ($p->reduceId) 49 | { 50 | case $word_idx: 51 | $out[] = $p->sigil(0); 52 | break; 53 | case $words_idx: 54 | $out[] = $p->sigil(1); 55 | break; 56 | } 57 | } 58 | 59 | $p->advance(); 60 | } 61 | 62 | var_dump(implode(" ", $out)); 63 | } 64 | 65 | ?> 66 | ==DONE== 67 | --EXPECT-- 68 | string(36) "füße абракадабра 芬蘭" 69 | string(32) "Sah ein Knab' ein Röslein stehn" 70 | string(79) "Но, чтобы стоять, я должен держаться корней." 71 | string(22) "Homines sumus nun dei." 72 | string(87) "français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe" 73 | ==DONE== 74 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/sm_to_csm.hpp: -------------------------------------------------------------------------------- 1 | // sm_to_csm.hpp 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_SM_TO_CSM_HPP 7 | #define LEXERTL_SM_TO_CSM_HPP 8 | 9 | #include "enum_operator.hpp" 10 | #include "enums.hpp" 11 | #include "observer_ptr.hpp" 12 | #include 13 | 14 | namespace lexertl 15 | { 16 | template 17 | void sm_to_csm(const sm& sm_, char_state_machine& csm_) 18 | { 19 | using id_type = typename sm::traits::id_type; 20 | using internals = typename sm::internals; 21 | using string_token = typename char_state_machine::state::string_token; 22 | using index_type = typename string_token::index_type; 23 | using string_token_vector = 24 | typename char_state_machine::string_token_vector; 25 | const internals& internals_ = sm_.data(); 26 | const std::size_t dfas_ = internals_._dfa.size(); 27 | 28 | for (id_type i_ = 0; i_ < dfas_; ++i_) 29 | { 30 | if (internals_._dfa_alphabet[i_] == 0) continue; 31 | 32 | const std::size_t alphabet_ = internals_._dfa_alphabet[i_] - 33 | *state_index::transitions; 34 | string_token_vector token_vector_(alphabet_, string_token()); 35 | observer_ptr ptr_ = &internals_._lookup[i_].front(); 36 | 37 | for (std::size_t c_ = 0; c_ < 256; ++c_, ++ptr_) 38 | { 39 | if (*ptr_ >= *state_index::transitions) 40 | { 41 | string_token& token_ = token_vector_ 42 | [*ptr_ - *state_index::transitions]; 43 | 44 | token_.insert(typename string_token::range 45 | (index_type(c_), index_type(c_))); 46 | } 47 | } 48 | 49 | csm_.append(token_vector_, internals_, i_); 50 | } 51 | } 52 | } 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/partition/charset.hpp: -------------------------------------------------------------------------------- 1 | // charset.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | #ifndef LEXERTL_CHARSET_HPP 8 | #define LEXERTL_CHARSET_HPP 9 | 10 | #include 11 | #include 12 | #include 13 | #include "../string_token.hpp" 14 | 15 | namespace lexertl 16 | { 17 | namespace detail 18 | { 19 | template 20 | struct basic_charset 21 | { 22 | using token = basic_string_token; 23 | using index_set = std::set; 24 | 25 | token _token; 26 | index_set _index_set; 27 | 28 | basic_charset() = default; 29 | 30 | basic_charset(const token& token_, const id_type index_) : 31 | _token(token_) 32 | { 33 | _index_set.insert(index_); 34 | } 35 | 36 | bool empty() const 37 | { 38 | return _token.empty() && _index_set.empty(); 39 | } 40 | 41 | void intersect(basic_charset& rhs_, basic_charset& overlap_) 42 | { 43 | _token.intersect(rhs_._token, overlap_._token); 44 | 45 | if (!overlap_._token.empty()) 46 | { 47 | std::merge(_index_set.begin(), _index_set.end(), 48 | rhs_._index_set.begin(), rhs_._index_set.end(), 49 | std::inserter(overlap_._index_set, 50 | overlap_._index_set.end())); 51 | 52 | if (_token.empty()) 53 | { 54 | _index_set.clear(); 55 | } 56 | 57 | if (rhs_._token.empty()) 58 | { 59 | rhs_._index_set.clear(); 60 | } 61 | } 62 | } 63 | }; 64 | } 65 | } 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /bench/parse_str.php: -------------------------------------------------------------------------------- 1 | 5 | --FILE-- 6 | push("can", 1); 13 | $lex->push("^cmd$", 2); 14 | $lex->push("^cmd", 3); 15 | $lex->push("cmd$", 4); 16 | $lex->push("[a-z]+", 50); 17 | $lex->push("\\s+", 100); 18 | 19 | $lex->build(); 20 | 21 | $s = "can\ncmd\na cmd\ncmd again\nanother cmd"; 22 | $lex->consume($s); 23 | 24 | $lex->reset(4); 25 | $lex->bol = true; 26 | 27 | $lex->advance(); 28 | $tok = $lex->getToken(); 29 | 30 | while (Token::EOI != $tok->id) { 31 | var_dump($tok); 32 | $lex->advance(); 33 | $tok = $lex->getToken(); 34 | } 35 | 36 | ?> 37 | ==DONE== 38 | --EXPECTF-- 39 | object(Parle\Token)#%d (2) { 40 | ["id"]=> 41 | int(2) 42 | ["value"]=> 43 | string(3) "cmd" 44 | } 45 | object(Parle\Token)#%d (2) { 46 | ["id"]=> 47 | int(100) 48 | ["value"]=> 49 | string(1) " 50 | " 51 | } 52 | object(Parle\Token)#%d (2) { 53 | ["id"]=> 54 | int(50) 55 | ["value"]=> 56 | string(1) "a" 57 | } 58 | object(Parle\Token)#%d (2) { 59 | ["id"]=> 60 | int(100) 61 | ["value"]=> 62 | string(1) " " 63 | } 64 | object(Parle\Token)#%d (2) { 65 | ["id"]=> 66 | int(4) 67 | ["value"]=> 68 | string(3) "cmd" 69 | } 70 | object(Parle\Token)#%d (2) { 71 | ["id"]=> 72 | int(100) 73 | ["value"]=> 74 | string(1) " 75 | " 76 | } 77 | object(Parle\Token)#%d (2) { 78 | ["id"]=> 79 | int(3) 80 | ["value"]=> 81 | string(3) "cmd" 82 | } 83 | object(Parle\Token)#%d (2) { 84 | ["id"]=> 85 | int(100) 86 | ["value"]=> 87 | string(1) " " 88 | } 89 | object(Parle\Token)#%d (2) { 90 | ["id"]=> 91 | int(50) 92 | ["value"]=> 93 | string(5) "again" 94 | } 95 | object(Parle\Token)#%d (2) { 96 | ["id"]=> 97 | int(100) 98 | ["value"]=> 99 | string(1) " 100 | " 101 | } 102 | object(Parle\Token)#%d (2) { 103 | ["id"]=> 104 | int(50) 105 | ["value"]=> 106 | string(7) "another" 107 | } 108 | object(Parle\Token)#%d (2) { 109 | ["id"]=> 110 | int(100) 111 | ["value"]=> 112 | string(1) " " 113 | } 114 | object(Parle\Token)#%d (2) { 115 | ["id"]=> 116 | int(4) 117 | ["value"]=> 118 | string(3) "cmd" 119 | } 120 | ==DONE== 121 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/internals.hpp: -------------------------------------------------------------------------------- 1 | // internals.hpp 2 | // Copyright (c) 2009-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_INTERNALS_HPP 7 | #define LEXERTL_INTERNALS_HPP 8 | 9 | #include "enum_operator.hpp" 10 | #include "enums.hpp" 11 | #include 12 | #include 13 | 14 | namespace lexertl 15 | { 16 | namespace detail 17 | { 18 | template 19 | struct basic_internals 20 | { 21 | using id_type_vector = std::vector; 22 | using id_type_vector_vector = std::vector; 23 | 24 | id_type _eoi = 0; 25 | id_type_vector_vector _lookup; 26 | id_type_vector _dfa_alphabet; 27 | id_type _features = 0; 28 | id_type_vector_vector _dfa; 29 | 30 | void clear() 31 | { 32 | _eoi = 0; 33 | _lookup.clear(); 34 | _dfa_alphabet.clear(); 35 | _features = 0; 36 | _dfa.clear(); 37 | } 38 | 39 | bool empty() const 40 | { 41 | return _dfa.empty(); 42 | } 43 | 44 | void add_states(const std::size_t num_) 45 | { 46 | for (std::size_t index_ = 0; index_ < num_; ++index_) 47 | { 48 | // lookup *always* has a size 256 now. 49 | _lookup.push_back(id_type_vector(256, 50 | static_cast(*state_index::dead_state))); 51 | _dfa_alphabet.push_back(0); 52 | _dfa.emplace_back(); 53 | } 54 | } 55 | 56 | void swap(basic_internals& internals_) noexcept 57 | { 58 | std::swap(_eoi, internals_._eoi); 59 | _lookup.swap(internals_._lookup); 60 | _dfa_alphabet.swap(internals_._dfa_alphabet); 61 | std::swap(_features, internals_._features); 62 | _dfa.swap(internals_._dfa); 63 | } 64 | }; 65 | } 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | workflow_dispatch: 10 | 11 | jobs: 12 | build: 13 | strategy: 14 | matrix: 15 | operating-system: ['ubuntu-22.04'] 16 | php-versions: ['7.4', '8.0', '8.1', '8.2'] 17 | utf32: [1, 0] 18 | gcc-versions: ['9', '11', '12'] 19 | runs-on: ${{ matrix.operating-system }} 20 | steps: 21 | - uses: actions/checkout@v3 22 | - uses: egor-tensin/setup-gcc@v1 23 | with: 24 | version: ${{ matrix.gcc-versions }} 25 | - uses: shivammathur/setup-php@v2 26 | with: 27 | php-version: ${{ matrix.php-versions }} 28 | - run: phpize 29 | - run: | 30 | if test ${{ matrix.utf32 }} = 1; then UTF32_OPT=--enable-parle-utf32; fi 31 | CC=gcc-${{ matrix.gcc-versions }} CXX=g++-${{ matrix.gcc-versions }} ./configure $UTF32_OPT 32 | - run: make 33 | - run: make test TESTS="-P -q --show-diff" 34 | pecl: 35 | runs-on: ubuntu-latest 36 | container: php:8.2-cli-alpine 37 | steps: 38 | - name: Install required system packages 39 | run: apk add --update $PHPIZE_DEPS 40 | - name: Checkout 41 | uses: actions/checkout@v3 42 | - name: Create temporary directory 43 | id: temp-dir 44 | run: printf "path=%s\n" "$(mktemp -d)" >>"$GITHUB_OUTPUT" 45 | - name: Create package 46 | run: | 47 | cd "${{ steps.temp-dir.outputs.path }}" 48 | pecl package "$GITHUB_WORKSPACE/package.xml" 49 | - name: Compile package 50 | run: printf '' | pecl install ${{ steps.temp-dir.outputs.path }}/parle-*.tgz 51 | - name: Enable extension 52 | run: docker-php-ext-enable parle 53 | - name: Check for PHP startup warnings 54 | run: | 55 | php -d display_errors=stderr -d display_startup_errors=1 -d error_reporting=-1 -r ';' 2>/tmp/php-startup-warnings 56 | if [ -s /tmp/php-startup-warnings ]; then 57 | echo 'The PHP extension was successfully installed, but PHP raised these warnings:' >&2 58 | cat /tmp/php-startup-warnings >&2 59 | exit 1 60 | fi 61 | echo "PHP didn't raise any warnings at startup." 62 | - name: Inspect extension 63 | run: php --ri parle 64 | -------------------------------------------------------------------------------- /bench/phlexy_alike.php: -------------------------------------------------------------------------------- 1 | push($d[1], $d[0]); 71 | } 72 | $lex->build(); 73 | $lex->consume($in); 74 | 75 | do { 76 | $lex->advance(); 77 | $tok = $lex->getToken(); 78 | // var_dump($tok); 79 | } while (Token::EOI != $tok->id); 80 | 81 | $endTime = microtime(true); 82 | 83 | echo 'Took ', $endTime - $startTime, ' seconds (', get_class($lex), ')', "\n"; 84 | } 85 | -------------------------------------------------------------------------------- /tests/calc_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Simple stackless calc 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | token("INTEGER"); 16 | $p->left("'+' '-'"); 17 | $p->left("'*' '/'"); 18 | 19 | $p->push("start", "exp"); 20 | $add_idx = $p->push("exp", "exp '+' exp"); 21 | $sub_idx = $p->push("exp", "exp '-' exp"); 22 | $mul_idx = $p->push("exp", "exp '*' exp"); 23 | $div_idx = $p->push("exp", "exp '/' exp"); 24 | $int_idx = $p->push("exp", "INTEGER"); 25 | 26 | $p->build(); 27 | 28 | $lex = new RLexer; 29 | $lex->push("[+]", $p->tokenId("'+'")); 30 | $lex->push("[-]", $p->tokenId("'-'")); 31 | $lex->push("[*]", $p->tokenId("'*'")); 32 | $lex->push("[/]", $p->tokenId("'/'")); 33 | $lex->push("\\d+", $p->tokenId("INTEGER")); 34 | $lex->push("\\s+", Token::SKIP); 35 | 36 | $lex->build(); 37 | 38 | $exp = array( 39 | "1 + 1", 40 | "33 / 10", 41 | "100 * 45", 42 | "17 - 45", 43 | ); 44 | 45 | foreach ($exp as $in) { 46 | if (!$p->validate($in, $lex)) { 47 | throw new ParserException("Failed to validate input"); 48 | } 49 | 50 | $p->consume($in, $lex); 51 | 52 | while (RParser::ACTION_ERROR != $p->action && RParser::ACTION_ACCEPT != $p->action) { 53 | switch ($p->action) { 54 | case RParser::ACTION_ERROR: 55 | throw new ParserException("Parser error"); 56 | break; 57 | case RParser::ACTION_SHIFT: 58 | case RParser::ACTION_GOTO: 59 | case RParser::ACTION_ACCEPT: 60 | break; 61 | case RParser::ACTION_REDUCE: 62 | switch ($p->reduceId) { 63 | case $add_idx: 64 | $l = $p->sigil(0); 65 | $r = $p->sigil(2); 66 | echo "$l + $r = " . ($l + $r) . "\n"; 67 | break; 68 | case $sub_idx: 69 | $l = $p->sigil(0); 70 | $r = $p->sigil(2); 71 | echo "$l - $r = " . ($l - $r) . "\n"; 72 | break; 73 | case $mul_idx: 74 | $l = $p->sigil(0); 75 | $r = $p->sigil(2); 76 | echo "$l * $r = " . ($l * $r) . "\n"; 77 | break; 78 | case $div_idx: 79 | $l = $p->sigil(0); 80 | $r = $p->sigil(2); 81 | echo "$l / $r = " . ($l / $r) . "\n"; 82 | break; 83 | } 84 | break; 85 | } 86 | $p->advance(); 87 | } 88 | } 89 | 90 | ?> 91 | ==DONE== 92 | --EXPECT-- 93 | 1 + 1 = 2 94 | 33 / 10 = 3.3 95 | 100 * 45 = 4500 96 | 17 - 45 = -28 97 | ==DONE== 98 | -------------------------------------------------------------------------------- /lib/parle/cvt.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Anatol Belski 3 | * All rights reserved. 4 | * 5 | * Author: Anatol Belski 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 1. Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 | * SUCH DAMAGE. 27 | * 28 | */ 29 | 30 | /* $Id$ */ 31 | 32 | 33 | #ifndef PARLE_OSTREAM_HPP 34 | #define PARLE_OSTREAM_HPP 35 | 36 | #if PARLE_U32 37 | #include 38 | #include 39 | 40 | namespace parle 41 | { 42 | #ifndef ZTS 43 | static std::wstring_convert, parle::char_type> cvt; 44 | #else 45 | static thread_local std::wstring_convert, parle::char_type> cvt; 46 | #endif 47 | } 48 | 49 | #define PARLE_CVT_U32(sptr) parle::cvt.from_bytes(sptr).c_str() 50 | #define PARLE_SCVT_U32(s) parle::cvt.from_bytes(s) 51 | #if defined(_MSC_VER) 52 | #define PARLE_PRE_U32(ca) PARLE_SCVT_U32(ca) 53 | #else 54 | #define PARLE_PRE_U32(ca) U ## ca 55 | #endif 56 | #define PARLE_CVT_U8(sptr) parle::cvt.to_bytes(sptr).c_str() 57 | #define PARLE_SCVT_U8(s) parle::cvt.to_bytes(s) 58 | #else 59 | #define PARLE_CVT_U32(sptr) sptr 60 | #define PARLE_SCVT_U32(s) s 61 | #define PARLE_PRE_U32(ca) ca 62 | #define PARLE_CVT_U8(sptr) sptr 63 | #define PARLE_SCVT_U8(s) s 64 | #endif 65 | 66 | #endif /* PARLE_PHP_OSTREAM_CPP */ 67 | 68 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/match.hpp: -------------------------------------------------------------------------------- 1 | // match.hpp 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_MATCH_HPP 7 | #define PARSERTL_MATCH_HPP 8 | 9 | #include "lookup.hpp" 10 | #include "parse.hpp" 11 | 12 | namespace parsertl 13 | { 14 | // Parse entire sequence and return boolean 15 | template 16 | bool match(lexer_iterator iter_, const sm_type& sm_) 17 | { 18 | basic_match_results results_(iter_->id, sm_); 19 | 20 | return parse(iter_, sm_, results_); 21 | } 22 | 23 | template 24 | bool match(lexer_iterator iter_, const sm_type& sm_, captures& captures_) 25 | { 26 | basic_match_results results_(iter_->id, sm_); 27 | // Qualify token to prevent arg dependant lookup 28 | using token = parsertl::token; 29 | typename token::token_vector productions_; 30 | 31 | captures_.clear(); 32 | captures_.resize(sm_._captures.back().first + 33 | sm_._captures.back().second.size() + 1); 34 | captures_[0].emplace_back(iter_->first, iter_->second); 35 | 36 | while (results_.entry.action != action::error && 37 | results_.entry.action != action::accept) 38 | { 39 | if (results_.entry.action == action::reduce) 40 | { 41 | const auto& row_ = sm_._captures[results_.entry.param]; 42 | 43 | if (!row_.second.empty()) 44 | { 45 | std::size_t index_ = 0; 46 | 47 | for (const auto& pair_ : row_.second) 48 | { 49 | const auto& token1_ = results_. 50 | dollar(pair_.first, sm_, productions_); 51 | const auto& token2_ = results_. 52 | dollar(pair_.second, sm_, productions_); 53 | auto& entry_ = captures_[row_.first + index_ + 1]; 54 | 55 | entry_.emplace_back(token1_.first, token2_.second); 56 | ++index_; 57 | } 58 | } 59 | } 60 | 61 | lookup(iter_, sm_, results_, productions_); 62 | } 63 | 64 | return results_.entry.action == action::accept; 65 | } 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /php_parle.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Anatol Belski 3 | * All rights reserved. 4 | * 5 | * Author: Anatol Belski 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 1. Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 | * SUCH DAMAGE. 27 | * 28 | */ 29 | 30 | /* $Id$ */ 31 | 32 | #ifndef PHP_PARLE_H 33 | #define PHP_PARLE_H 34 | 35 | extern zend_module_entry parle_module_entry; 36 | #define phpext_parle_ptr &parle_module_entry 37 | 38 | #define PHP_PARLE_VERSION "0.8.6-dev" 39 | 40 | #ifdef PHP_WIN32 41 | # define PHP_PARLE_API __declspec(dllexport) 42 | #elif defined(__GNUC__) && __GNUC__ >= 4 43 | # define PHP_PARLE_API __attribute__ ((visibility("default"))) 44 | #else 45 | # define PHP_PARLE_API 46 | #endif 47 | 48 | #ifdef ZTS 49 | #include "TSRM.h" 50 | #endif 51 | 52 | /* 53 | Declare any global variables you may need between the BEGIN 54 | and END macros here: 55 | 56 | ZEND_BEGIN_MODULE_GLOBALS(parle) 57 | zend_long global_value; 58 | char *global_string; 59 | ZEND_END_MODULE_GLOBALS(parle) 60 | */ 61 | 62 | /* Always refer to the globals in your function as PARLE_G(variable). 63 | You are encouraged to rename these macros something shorter, see 64 | examples in any other php module directory. 65 | */ 66 | #define PARLE_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(parle, v) 67 | 68 | #if defined(ZTS) && defined(COMPILE_DL_PARLE) 69 | ZEND_TSRMLS_CACHE_EXTERN() 70 | #endif 71 | 72 | #endif /* PHP_PARLE_H */ 73 | 74 | 75 | /* 76 | * Local variables: 77 | * tab-width: 4 78 | * c-basic-offset: 4 79 | * End: 80 | * vim600: noet sw=4 ts=4 fdm=marker 81 | * vim<600: noet sw=4 ts=4 82 | */ 83 | -------------------------------------------------------------------------------- /tests/lexer_position_tracking_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lexer functionality while it's used by parser 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | token("NEWLINE"); 12 | $par->token("LETTER"); 13 | $par->token("' '"); 14 | $par->push("START", "LETTERS"); 15 | $prod_0 = $par->push("LETTERS", "LETTER | NEWLINE"); 16 | $prod_1 = $par->push("LETTERS", "LETTERS LETTER"); 17 | $par->push("LETTERS", "LETTERS NEWLINE"); 18 | $par->build(); 19 | 20 | 21 | $lex = new Lexer; 22 | $lex->push("[a-z]", $par->tokenId("LETTER")); 23 | $lex->push("[\r]?[\n]", $par->tokenId("NEWLINE")); 24 | $lex->build(); 25 | 26 | $in = "abc\ndef\r\nghf\nxy\\z"; 27 | $par->consume($in, $lex); 28 | 29 | 30 | do { 31 | switch ($par->action) { 32 | case Parser::ACTION_ERROR: 33 | $i = $par->errorInfo(); 34 | var_dump($i, $lex, $lex->getToken(), substr($in, $lex->marker, $lex->cursor - $lex->marker)); 35 | throw new ParserException("Error"); 36 | break; 37 | /*case Parser::ACTION_GOTO: 38 | echo "Trace: '", $par->trace(), "', token: '", $lex->getToken()->value, "'", PHP_EOL; 39 | break; 40 | case Parser::ACTION_SHIFT: 41 | echo "Trace: '", $par->trace(), "', token: '", $lex->getToken()->value, "'", PHP_EOL; 42 | break;*/ 43 | case Parser::ACTION_REDUCE: 44 | //echo "Trace: ", $par->trace(), PHP_EOL; 45 | switch ($par->reduceId) { 46 | case $prod_0; 47 | echo " Match: '", $par->sigil(0), "', token: '", substr($in, $lex->marker, $lex->cursor - $lex->marker), "'", PHP_EOL; 48 | break; 49 | case $prod_1; 50 | echo " Match: '", $par->sigil(1), "', token: '", substr($in, $lex->marker, $lex->cursor - $lex->marker), "'", PHP_EOL; 51 | break; 52 | } 53 | break; 54 | } 55 | $par->advance(); 56 | } while (Parser::ACTION_ACCEPT != $par->action); 57 | 58 | ?> 59 | ==DONE== 60 | --EXPECTF-- 61 | Match: 'a', token: 'b' 62 | Match: 'b', token: 'c' 63 | Match: 'c', token: ' 64 | ' 65 | Match: 'd', token: 'e' 66 | Match: 'e', token: 'f' 67 | Match: 'f', token: ' 68 | ' 69 | Match: 'g', token: 'h' 70 | Match: 'h', token: 'f' 71 | Match: 'f', token: ' 72 | ' 73 | Match: 'x', token: 'y' 74 | object(Parle\ErrorInfo)#%d (3) { 75 | ["id"]=> 76 | int(2) 77 | ["position"]=> 78 | int(15) 79 | ["token"]=> 80 | object(Parle\Token)#4 (2) { 81 | ["id"]=> 82 | int(65535) 83 | ["value"]=> 84 | string(1) "\" 85 | } 86 | } 87 | object(Parle\Lexer)#%d (7) { 88 | ["bol"]=> 89 | bool(false) 90 | ["flags"]=> 91 | int(6) 92 | ["state"]=> 93 | int(0) 94 | ["marker"]=> 95 | int(15) 96 | ["cursor"]=> 97 | int(16) 98 | ["line"]=> 99 | int(2) 100 | ["coulmn"]=> 101 | int(2) 102 | } 103 | object(Parle\Token)#%d (2) { 104 | ["id"]=> 105 | int(65535) 106 | ["value"]=> 107 | string(1) "\" 108 | } 109 | string(1) "\" 110 | 111 | Fatal error: Uncaught Parle\ParserException: Error in %s:%d 112 | Stack trace: 113 | #0 {main} 114 | thrown in %s on line %d 115 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/parse.hpp: -------------------------------------------------------------------------------- 1 | // parse.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_PARSE_HPP 7 | #define PARSERTL_PARSE_HPP 8 | 9 | #include "match_results.hpp" 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | // Parse entire sequence and return boolean 15 | template 16 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 17 | basic_match_results& results_) 18 | { 19 | while (results_.entry.action != action::error) 20 | { 21 | switch (results_.entry.action) 22 | { 23 | case action::shift: 24 | results_.stack.push_back(results_.entry.param); 25 | 26 | if (iter_->id != 0) 27 | ++iter_; 28 | 29 | results_.token_id = iter_->id; 30 | 31 | if (results_.token_id == lexer_iterator::value_type::npos()) 32 | { 33 | results_.entry.action = action::error; 34 | results_.entry.param = 35 | static_cast 36 | (error_type::unknown_token); 37 | } 38 | else 39 | { 40 | results_.entry = 41 | sm_.at(results_.stack.back(), results_.token_id); 42 | } 43 | 44 | break; 45 | case action::reduce: 46 | { 47 | const std::size_t size_ = 48 | sm_._rules[results_.entry.param].second.size(); 49 | 50 | if (size_) 51 | { 52 | results_.stack.resize(results_.stack.size() - size_); 53 | } 54 | 55 | results_.token_id = sm_._rules[results_.entry.param].first; 56 | results_.entry = 57 | sm_.at(results_.stack.back(), results_.token_id); 58 | break; 59 | } 60 | case action::go_to: 61 | results_.stack.push_back(results_.entry.param); 62 | results_.token_id = iter_->id; 63 | results_.entry = 64 | sm_.at(results_.stack.back(), results_.token_id); 65 | break; 66 | default: 67 | // accept 68 | // error 69 | break; 70 | } 71 | 72 | if (results_.entry.action == action::accept) 73 | { 74 | const std::size_t size_ = 75 | sm_._rules[results_.entry.param].second.size(); 76 | 77 | if (size_) 78 | { 79 | results_.stack.resize(results_.stack.size() - size_); 80 | } 81 | 82 | break; 83 | } 84 | } 85 | 86 | return results_.entry.action == action::accept; 87 | } 88 | } 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /tests/calc_002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Advanced calc with state 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | token("INTEGER"); 16 | $p->left("'+' '-'"); 17 | $p->left("'*' '/'"); 18 | $p->precedence("NEGATE"); 19 | $p->right("'^'"); 20 | 21 | $p->push("start", "exp"); 22 | $add_idx = $p->push("exp", "exp '+' exp"); 23 | $sub_idx = $p->push("exp", "exp '-' exp"); 24 | $mul_idx = $p->push("exp", "exp '*' exp"); 25 | $div_idx = $p->push("exp", "exp '/' exp"); 26 | $p->push("exp", "'(' exp ')'"); 27 | $neg_idx = $p->push("exp", "'-' exp %prec NEGATE"); 28 | $exp_idx = $p->push("exp", "exp '^' exp"); 29 | $int_idx = $p->push("exp", "INTEGER"); 30 | 31 | $p->build(); 32 | 33 | $lex = new Lexer; 34 | $lex->push("[+]", $p->tokenId("'+'")); 35 | $lex->push("[-]", $p->tokenId("'-'")); 36 | $lex->push("[*]", $p->tokenId("'*'")); 37 | $lex->push("[\\^]", $p->tokenId("'^'")); 38 | $lex->push("[/]", $p->tokenId("'/'")); 39 | $lex->push("\\d+", $p->tokenId("INTEGER")); 40 | $lex->push("[(]", $p->tokenId("'('")); 41 | $lex->push("[)]", $p->tokenId("')'")); 42 | $lex->push("\\s+", Token::SKIP); 43 | 44 | $lex->build(); 45 | 46 | $exp = array( 47 | "1 + 2^4", 48 | "33 / (10 + 1)", 49 | "100 * 45 / 10", 50 | "55 - 10*5", 51 | "10 - -4", 52 | "10000000^0 + 10 - 3^2", 53 | ); 54 | 55 | foreach ($exp as $in) { 56 | if (!$p->validate($in, $lex)) { 57 | throw new ParserException("Failed to validate input"); 58 | } 59 | 60 | $p->consume($in, $lex); 61 | 62 | $stack = new Stack; 63 | 64 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 65 | switch ($p->action) { 66 | case Parser::ACTION_ERROR: 67 | throw new ParserException("Parser error"); 68 | break; 69 | case Parser::ACTION_SHIFT: 70 | case Parser::ACTION_GOTO: 71 | case Parser::ACTION_ACCEPT: 72 | break; 73 | case Parser::ACTION_REDUCE: 74 | switch ($p->reduceId) { 75 | case $add_idx: 76 | $op0 = $stack->top; 77 | $stack->pop(); 78 | $stack->top += $op0; 79 | break; 80 | case $sub_idx: 81 | $op0 = $stack->top; 82 | $stack->pop(); 83 | $stack->top -= $op0; 84 | break; 85 | case $mul_idx: 86 | $op0 = $stack->top; 87 | $stack->pop(); 88 | $stack->top *= $op0; 89 | break; 90 | case $div_idx: 91 | $op0 = $stack->top; 92 | $stack->pop(); 93 | $stack->top /= $op0; 94 | break; 95 | case $exp_idx: 96 | $op0 = $stack->top; 97 | $stack->pop(); 98 | $stack->top = $stack->top ** $op0; 99 | break; 100 | case $neg_idx: 101 | $stack->top = -$stack->top; 102 | break; 103 | case $int_idx: 104 | $i = (int)$p->sigil(); 105 | $stack->push($i); 106 | break; 107 | } 108 | 109 | break; 110 | } 111 | $p->advance(); 112 | } 113 | echo "$in = " . $stack->top . "\n"; 114 | } 115 | 116 | ?> 117 | ==DONE== 118 | --EXPECT-- 119 | 1 + 2^4 = 17 120 | 33 / (10 + 1) = 3 121 | 100 * 45 / 10 = 450 122 | 55 - 10*5 = 5 123 | 10 - -4 = 14 124 | 10000000^0 + 10 - 3^2 = 2 125 | ==DONE== 126 | -------------------------------------------------------------------------------- /tests/calc_003.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Advanced calc with state 3 | --SKIPIF-- 4 | 5 | --FILE-- 6 | token("INTEGER"); 16 | $p->left("'+' '-'"); 17 | $p->left("'*' '/'"); 18 | $p->precedence("NEGATE"); 19 | $p->right("'^'"); 20 | 21 | $p->push("start", "exp"); 22 | $add_idx = $p->push("exp", "exp '+' exp"); 23 | $sub_idx = $p->push("exp", "exp '-' exp"); 24 | $mul_idx = $p->push("exp", "exp '*' exp"); 25 | $div_idx = $p->push("exp", "exp '/' exp"); 26 | $p->push("exp", "'(' exp ')'"); 27 | $neg_idx = $p->push("exp", "'-' exp %prec NEGATE"); 28 | $exp_idx = $p->push("exp", "exp '^' exp"); 29 | $int_idx = $p->push("exp", "INTEGER"); 30 | 31 | $p->build(); 32 | 33 | $lex = new Lexer; 34 | $lex->push("[+]", $p->tokenId("'+'")); 35 | $lex->push("[-]", $p->tokenId("'-'")); 36 | $lex->push("[*]", $p->tokenId("'*'")); 37 | $lex->push("[\\^]", $p->tokenId("'^'")); 38 | $lex->push("[/]", $p->tokenId("'/'")); 39 | $lex->push("\\d+", $p->tokenId("INTEGER")); 40 | $lex->push("[(]", $p->tokenId("'('")); 41 | $lex->push("[)]", $p->tokenId("')'")); 42 | $lex->push("\\s+", 42); 43 | $lex->callout(42, function () use ($lex) { 44 | do { 45 | $lex->advance(); 46 | $tok = $lex->getToken(); 47 | } while (42 == $tok->id); 48 | }); 49 | 50 | $lex->build(); 51 | 52 | $exp = array( 53 | "1 + 2^4", 54 | "33 / (10 + 1)", 55 | "100 * 45 / 10", 56 | "55 - 10*5", 57 | "10 - -4", 58 | "10000000^0 + 10 - 3^2", 59 | ); 60 | 61 | foreach ($exp as $in) { 62 | if (!$p->validate($in, $lex)) { 63 | throw new ParserException("Failed to validate input"); 64 | } 65 | 66 | $p->consume($in, $lex); 67 | 68 | $stack = new Stack; 69 | 70 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) { 71 | switch ($p->action) { 72 | case Parser::ACTION_ERROR: 73 | throw new ParserException("Parser error"); 74 | break; 75 | case Parser::ACTION_SHIFT: 76 | case Parser::ACTION_GOTO: 77 | case Parser::ACTION_ACCEPT: 78 | break; 79 | case Parser::ACTION_REDUCE: 80 | switch ($p->reduceId) { 81 | case $add_idx: 82 | $op0 = $stack->top; 83 | $stack->pop(); 84 | $stack->top += $op0; 85 | break; 86 | case $sub_idx: 87 | $op0 = $stack->top; 88 | $stack->pop(); 89 | $stack->top -= $op0; 90 | break; 91 | case $mul_idx: 92 | $op0 = $stack->top; 93 | $stack->pop(); 94 | $stack->top *= $op0; 95 | break; 96 | case $div_idx: 97 | $op0 = $stack->top; 98 | $stack->pop(); 99 | $stack->top /= $op0; 100 | break; 101 | case $exp_idx: 102 | $op0 = $stack->top; 103 | $stack->pop(); 104 | $stack->top = $stack->top ** $op0; 105 | break; 106 | case $neg_idx: 107 | $stack->top = -$stack->top; 108 | break; 109 | case $int_idx: 110 | $i = (int)$p->sigil(); 111 | $stack->push($i); 112 | break; 113 | } 114 | 115 | break; 116 | } 117 | $p->advance(); 118 | } 119 | echo "$in = " . $stack->top . "\n"; 120 | } 121 | 122 | ?> 123 | ==DONE== 124 | --EXPECT-- 125 | 1 + 2^4 = 17 126 | 33 / (10 + 1) = 3 127 | 100 * 45 / 10 = 450 128 | 55 - 10*5 = 5 129 | 10 - -4 = 14 130 | 10000000^0 + 10 - 3^2 = 2 131 | ==DONE== 132 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tree/iteration_node.hpp: -------------------------------------------------------------------------------- 1 | // iteration_node.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_ITERATION_NODE_HPP 7 | #define LEXERTL_ITERATION_NODE_HPP 8 | 9 | #include "node.hpp" 10 | 11 | namespace lexertl 12 | { 13 | namespace detail 14 | { 15 | template 16 | class basic_iteration_node : public basic_node 17 | { 18 | public: 19 | using node = basic_node; 20 | using bool_stack = typename node::bool_stack; 21 | using const_node_stack = typename node::const_node_stack; 22 | using node_ptr_vector = typename node::node_ptr_vector; 23 | using node_stack = typename node::node_stack; 24 | using node_type = typename node::node_type; 25 | using node_vector = typename node::node_vector; 26 | 27 | basic_iteration_node(observer_ptr next_, const bool greedy_) : 28 | node(true), 29 | _next(next_), 30 | _greedy(greedy_) 31 | { 32 | _next->append_firstpos(node::firstpos()); 33 | _next->append_lastpos(node::lastpos()); 34 | 35 | for (observer_ptr node_ : node::lastpos()) 36 | { 37 | node_->append_followpos(node::firstpos()); 38 | } 39 | 40 | for (observer_ptr node_ : node::firstpos()) 41 | { 42 | node_->greedy(greedy_); 43 | } 44 | } 45 | 46 | ~basic_iteration_node() override = default; 47 | 48 | node_type what_type() const override 49 | { 50 | return node::node_type::ITERATION; 51 | } 52 | 53 | bool traverse(const_node_stack& node_stack_, 54 | bool_stack& perform_op_stack_) const override 55 | { 56 | perform_op_stack_.push(true); 57 | node_stack_.push(_next); 58 | return true; 59 | } 60 | 61 | private: 62 | observer_ptr _next; 63 | bool _greedy; 64 | 65 | void copy_node(node_ptr_vector& node_ptr_vector_, 66 | node_stack& new_node_stack_, bool_stack& perform_op_stack_, 67 | bool& down_) const override 68 | { 69 | if (perform_op_stack_.top()) 70 | { 71 | observer_ptr ptr_ = new_node_stack_.top(); 72 | 73 | node_ptr_vector_.push_back(std::make_unique 74 | (ptr_, _greedy)); 75 | new_node_stack_.top() = node_ptr_vector_.back().get(); 76 | } 77 | else 78 | { 79 | down_ = true; 80 | } 81 | 82 | perform_op_stack_.pop(); 83 | } 84 | 85 | // No copy construction. 86 | basic_iteration_node(const basic_iteration_node&) = delete; 87 | // No assignment. 88 | const basic_iteration_node& operator = 89 | (const basic_iteration_node&) = delete; 90 | }; 91 | } 92 | } 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/search_iterator.hpp: -------------------------------------------------------------------------------- 1 | // iterator.hpp 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_SEARCH_ITERATOR_HPP 7 | #define PARSERTL_SEARCH_ITERATOR_HPP 8 | 9 | #include "capture.hpp" 10 | #include "../../../lexertl14/include/lexertl/iterator.hpp" 11 | #include "match_results.hpp" 12 | #include "search.hpp" 13 | 14 | namespace parsertl 15 | { 16 | template 18 | class search_iterator 19 | { 20 | public: 21 | using iter_type = typename lexer_iterator::value_type::iter_type; 22 | using results = std::vector>>; 23 | using value_type = results; 24 | using difference_type = ptrdiff_t; 25 | using pointer = const value_type*; 26 | using reference = const value_type&; 27 | using iterator_category = std::forward_iterator_tag; 28 | 29 | search_iterator() = default; 30 | 31 | search_iterator(const lexer_iterator& iter_, const sm_type& sm_) : 32 | _iter(iter_), 33 | _sm(&sm_) 34 | { 35 | _captures.emplace_back(); 36 | _captures.back().emplace_back(iter_->first, iter_->first); 37 | lookup(); 38 | } 39 | 40 | search_iterator& operator ++() 41 | { 42 | lookup(); 43 | return *this; 44 | } 45 | 46 | search_iterator operator ++(int) 47 | { 48 | search_iterator iter_ = *this; 49 | 50 | lookup(); 51 | return iter_; 52 | } 53 | 54 | const value_type& operator *() const 55 | { 56 | return _captures; 57 | } 58 | 59 | const value_type* operator ->() const 60 | { 61 | return &_captures; 62 | } 63 | 64 | bool operator ==(const search_iterator& rhs_) const 65 | { 66 | return _sm == rhs_._sm && 67 | (_sm == nullptr ? 68 | true : 69 | _captures == rhs_._captures); 70 | } 71 | 72 | bool operator !=(const search_iterator& rhs_) const 73 | { 74 | return !(*this == rhs_); 75 | } 76 | 77 | private: 78 | lexer_iterator _iter; 79 | results _captures; 80 | const sm_type* _sm = nullptr; 81 | 82 | void lookup() 83 | { 84 | lexer_iterator end; 85 | 86 | _captures.clear(); 87 | 88 | if (search(_iter, end, *_sm, _captures)) 89 | { 90 | _iter = end; 91 | } 92 | else 93 | { 94 | _sm = nullptr; 95 | } 96 | } 97 | }; 98 | 99 | using ssearch_iterator = 100 | search_iterator; 101 | using csearch_iterator = 102 | search_iterator; 103 | using wssearch_iterator = 104 | search_iterator; 105 | using wcsearch_iterator = 106 | search_iterator; 107 | } 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tokeniser/re_tokeniser_state.hpp: -------------------------------------------------------------------------------- 1 | // tokeniser_state.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_RE_TOKENISER_STATE_HPP 7 | #define LEXERTL_RE_TOKENISER_STATE_HPP 8 | 9 | #include "../../char_traits.hpp" 10 | #include "../../enums.hpp" 11 | #include 12 | #include "../../narrow.hpp" 13 | #include 14 | 15 | namespace lexertl 16 | { 17 | namespace detail 18 | { 19 | template 20 | struct basic_re_tokeniser_state 21 | { 22 | using char_type = ch_type; 23 | using index_type = 24 | typename basic_char_traits::index_type; 25 | 26 | const char_type* const _start; 27 | const char_type* const _end; 28 | const char_type* _curr; 29 | id_type _id; 30 | std::size_t _flags; 31 | std::stack _flags_stack; 32 | std::locale _locale; 33 | const char_type* _macro_name; 34 | long _paren_count = 0; 35 | bool _in_string = false; 36 | id_type _nl_id = static_cast(~0); 37 | 38 | basic_re_tokeniser_state(const char_type* start_, 39 | const char_type* const end_, id_type id_, 40 | const std::size_t flags_, const std::locale locale_, 41 | const char_type* macro_name_) : 42 | _start(start_), 43 | _end(end_), 44 | _curr(start_), 45 | _id(id_), 46 | _flags(flags_), 47 | _locale(locale_), 48 | _macro_name(macro_name_) 49 | { 50 | } 51 | 52 | inline bool next(char_type& ch_) 53 | { 54 | if (_curr >= _end) 55 | { 56 | ch_ = 0; 57 | return true; 58 | } 59 | else 60 | { 61 | ch_ = *_curr; 62 | increment(); 63 | return false; 64 | } 65 | } 66 | 67 | inline void increment() 68 | { 69 | ++_curr; 70 | } 71 | 72 | inline std::size_t index() const 73 | { 74 | return _curr - _start; 75 | } 76 | 77 | inline bool eos() const 78 | { 79 | return _curr >= _end; 80 | } 81 | 82 | inline void unexpected_end(std::ostringstream& ss_) const 83 | { 84 | ss_ << "Unexpected end of regex"; 85 | } 86 | 87 | inline void error(std::ostringstream& ss_) const 88 | { 89 | ss_ << " in "; 90 | 91 | if (_macro_name) 92 | { 93 | ss_ << "MACRO '"; 94 | narrow(_macro_name, ss_); 95 | ss_ << "'."; 96 | } 97 | else 98 | { 99 | ss_ << "rule id " << _id << '.'; 100 | } 101 | } 102 | }; 103 | } 104 | } 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/iterator.hpp: -------------------------------------------------------------------------------- 1 | // iterator.hpp 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | #ifndef LEXERTL_ITERATOR_HPP 8 | #define LEXERTL_ITERATOR_HPP 9 | 10 | #include 11 | #include "lookup.hpp" 12 | #include "state_machine.hpp" 13 | 14 | namespace lexertl 15 | { 16 | template 17 | class iterator 18 | { 19 | public: 20 | using id_type = typename results::id_type; 21 | using value_type = results; 22 | using difference_type = ptrdiff_t; 23 | using pointer = const value_type*; 24 | using reference = const value_type&; 25 | using iterator_category = std::forward_iterator_tag; 26 | 27 | iterator() = default; 28 | 29 | iterator(const iter& start_, const iter& end_, const sm_type& sm_, 30 | const bool bol_ = true, const id_type state_ = 0) : 31 | _results(start_, end_, bol_, state_), 32 | _sm(&sm_) 33 | { 34 | lookup(); 35 | } 36 | 37 | iterator& operator ++() 38 | { 39 | lookup(); 40 | return *this; 41 | } 42 | 43 | iterator operator ++(int) 44 | { 45 | iterator iter_ = *this; 46 | 47 | lookup(); 48 | return iter_; 49 | } 50 | 51 | const value_type& operator *() const 52 | { 53 | return _results; 54 | } 55 | 56 | const value_type* operator ->() const 57 | { 58 | return &_results; 59 | } 60 | 61 | bool operator ==(const iterator& rhs_) const 62 | { 63 | return _sm == rhs_._sm && (_sm == nullptr ? true : 64 | _results == rhs_._results); 65 | } 66 | 67 | bool operator !=(const iterator& rhs_) const 68 | { 69 | return !(*this == rhs_); 70 | } 71 | 72 | const sm_type& sm() const 73 | { 74 | return *_sm; 75 | } 76 | 77 | private: 78 | value_type _results; 79 | const sm_type* _sm = nullptr; 80 | 81 | void lookup() 82 | { 83 | lexertl::lookup(*_sm, _results); 84 | 85 | if (_results.first == _results.eoi) 86 | { 87 | _sm = nullptr; 88 | } 89 | } 90 | }; 91 | 92 | using siterator = 93 | iterator; 94 | using citerator = iterator; 95 | using wsiterator = 96 | iterator; 97 | using wciterator = iterator; 98 | using u32siterator = iterator; 100 | using u32citerator = iterator; 101 | 102 | using sriterator = 103 | iterator; 104 | using criterator = iterator; 105 | using wsriterator = 106 | iterator; 107 | using wcriterator = 108 | iterator; 109 | using u32sriterator = iterator; 111 | using u32criterator = iterator; 113 | } 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tree/selection_node.hpp: -------------------------------------------------------------------------------- 1 | // selection_node.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_SELECTION_NODE_HPP 7 | #define LEXERTL_SELECTION_NODE_HPP 8 | 9 | #include "node.hpp" 10 | 11 | namespace lexertl 12 | { 13 | namespace detail 14 | { 15 | template 16 | class basic_selection_node : public basic_node 17 | { 18 | public: 19 | using node = basic_node; 20 | using bool_stack = typename node::bool_stack; 21 | using const_node_stack = typename node::const_node_stack; 22 | using node_ptr_vector = typename node::node_ptr_vector; 23 | using node_stack = typename node::node_stack; 24 | using node_type = typename node::node_type; 25 | 26 | basic_selection_node(observer_ptr left_, 27 | observer_ptr right_) : 28 | node(left_->nullable() || right_->nullable()), 29 | _left(left_), 30 | _right(right_) 31 | { 32 | _left->append_firstpos(node::firstpos()); 33 | _right->append_firstpos(node::firstpos()); 34 | _left->append_lastpos(node::lastpos()); 35 | _right->append_lastpos(node::lastpos()); 36 | } 37 | 38 | ~basic_selection_node() override = default; 39 | 40 | node_type what_type() const override 41 | { 42 | return node::node_type::SELECTION; 43 | } 44 | 45 | bool traverse(const_node_stack& node_stack_, 46 | bool_stack& perform_op_stack_) const override 47 | { 48 | perform_op_stack_.push(true); 49 | 50 | switch (_right->what_type()) 51 | { 52 | case node::node_type::SEQUENCE: 53 | case node::node_type::SELECTION: 54 | case node::node_type::ITERATION: 55 | perform_op_stack_.push(false); 56 | break; 57 | default: 58 | break; 59 | } 60 | 61 | node_stack_.push(_right); 62 | node_stack_.push(_left); 63 | return true; 64 | } 65 | 66 | private: 67 | observer_ptr _left; 68 | observer_ptr _right; 69 | 70 | void copy_node(node_ptr_vector& node_ptr_vector_, 71 | node_stack& new_node_stack_, bool_stack& perform_op_stack_, 72 | bool& down_) const override 73 | { 74 | if (perform_op_stack_.top()) 75 | { 76 | observer_ptr rhs_ = new_node_stack_.top(); 77 | 78 | new_node_stack_.pop(); 79 | 80 | observer_ptr lhs_ = new_node_stack_.top(); 81 | 82 | node_ptr_vector_.push_back(std::make_unique 83 | (lhs_, rhs_)); 84 | new_node_stack_.top() = node_ptr_vector_.back().get(); 85 | } 86 | else 87 | { 88 | down_ = true; 89 | } 90 | 91 | perform_op_stack_.pop(); 92 | } 93 | 94 | // No copy construction. 95 | basic_selection_node(const basic_selection_node&) = delete; 96 | // No assignment. 97 | const basic_selection_node& operator = 98 | (const basic_selection_node&) = delete; 99 | }; 100 | } 101 | } 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tree/leaf_node.hpp: -------------------------------------------------------------------------------- 1 | // leaf_node.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_LEAF_NODE_HPP 7 | #define LEXERTL_LEAF_NODE_HPP 8 | 9 | #include "../../enums.hpp" // null_token 10 | #include "node.hpp" 11 | 12 | namespace lexertl 13 | { 14 | namespace detail 15 | { 16 | template 17 | class basic_leaf_node : public basic_node 18 | { 19 | public: 20 | using node = basic_node; 21 | using bool_stack = typename node::bool_stack; 22 | using const_node_stack = typename node::const_node_stack; 23 | using node_ptr_vector = typename node::node_ptr_vector; 24 | using node_stack = typename node::node_stack; 25 | using node_type = typename node::node_type; 26 | using node_vector = typename node::node_vector; 27 | 28 | basic_leaf_node(const id_type token_, const bool greedy_) : 29 | node(token_ == node::null_token()), 30 | _token(token_), 31 | _set_greedy(!greedy_), 32 | _greedy(greedy_) 33 | { 34 | if (!node::nullable()) 35 | { 36 | node::firstpos().push_back(this); 37 | node::lastpos().push_back(this); 38 | } 39 | } 40 | 41 | ~basic_leaf_node() override = default; 42 | 43 | void append_followpos 44 | (const node_vector& followpos_) override 45 | { 46 | _followpos.insert(_followpos.end(), 47 | followpos_.begin(), followpos_.end()); 48 | } 49 | 50 | node_type what_type() const override 51 | { 52 | return node::node_type::LEAF; 53 | } 54 | 55 | bool traverse(const_node_stack&/*node_stack_*/, 56 | bool_stack&/*perform_op_stack_*/) const override 57 | { 58 | return false; 59 | } 60 | 61 | id_type token() const override 62 | { 63 | return _token; 64 | } 65 | 66 | bool set_greedy() const override 67 | { 68 | return _set_greedy; 69 | } 70 | 71 | void greedy(const bool greedy_) override 72 | { 73 | if (!_set_greedy) 74 | { 75 | _greedy = greedy_; 76 | _set_greedy = true; 77 | } 78 | } 79 | 80 | bool greedy() const override 81 | { 82 | return _greedy; 83 | } 84 | 85 | const node_vector& followpos() const override 86 | { 87 | return _followpos; 88 | } 89 | 90 | node_vector& followpos() override 91 | { 92 | return _followpos; 93 | } 94 | 95 | private: 96 | id_type _token; 97 | bool _set_greedy; 98 | bool _greedy; 99 | node_vector _followpos; 100 | 101 | void copy_node(node_ptr_vector& node_ptr_vector_, 102 | node_stack& new_node_stack_, bool_stack&/*perform_op_stack_*/, 103 | bool&/*down_*/) const override 104 | { 105 | node_ptr_vector_.push_back(std::make_unique 106 | (_token, _greedy)); 107 | new_node_stack_.push(node_ptr_vector_.back().get()); 108 | } 109 | }; 110 | } 111 | } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tree/end_node.hpp: -------------------------------------------------------------------------------- 1 | // end_node.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_END_NODE_HPP 7 | #define LEXERTL_END_NODE_HPP 8 | 9 | #include "node.hpp" 10 | 11 | namespace lexertl 12 | { 13 | namespace detail 14 | { 15 | template 16 | class basic_end_node : public basic_node 17 | { 18 | public: 19 | using node = basic_node; 20 | using bool_stack = typename node::bool_stack; 21 | using const_node_stack = typename node::const_node_stack; 22 | using node_ptr_vector = typename node::node_ptr_vector; 23 | using node_stack = typename node::node_stack; 24 | using node_type = typename node::node_type; 25 | using node_vector = typename node::node_vector; 26 | 27 | basic_end_node(const id_type id_, const id_type user_id_, 28 | const id_type next_dfa_, const id_type push_dfa_, 29 | const bool pop_dfa_, const bool greedy_) : 30 | node(false), 31 | _id(id_), 32 | _user_id(user_id_), 33 | _next_dfa(next_dfa_), 34 | _push_dfa(push_dfa_), 35 | _pop_dfa(pop_dfa_), 36 | _greedy(greedy_) 37 | { 38 | node::firstpos().push_back(this); 39 | node::lastpos().push_back(this); 40 | } 41 | 42 | ~basic_end_node() override = default; 43 | 44 | node_type what_type() const override 45 | { 46 | return node::node_type::END; 47 | } 48 | 49 | bool traverse(const_node_stack&/*node_stack_*/, 50 | bool_stack&/*perform_op_stack_*/) const override 51 | { 52 | return false; 53 | } 54 | 55 | bool greedy() const override 56 | { 57 | return _greedy; 58 | } 59 | 60 | const node_vector& followpos() const override 61 | { 62 | // _followpos is always empty..! 63 | return _followpos; 64 | } 65 | 66 | node_vector& followpos() override 67 | { 68 | // _followpos is always empty..! 69 | return _followpos; 70 | } 71 | 72 | bool end_state() const override 73 | { 74 | return true; 75 | } 76 | 77 | id_type id() const override 78 | { 79 | return _id; 80 | } 81 | 82 | id_type user_id() const override 83 | { 84 | return _user_id; 85 | } 86 | 87 | id_type next_dfa() const override 88 | { 89 | return _next_dfa; 90 | } 91 | 92 | id_type push_dfa() const override 93 | { 94 | return _push_dfa; 95 | } 96 | 97 | bool pop_dfa() const override 98 | { 99 | return _pop_dfa; 100 | } 101 | 102 | private: 103 | id_type _id; 104 | id_type _user_id; 105 | id_type _next_dfa; 106 | id_type _push_dfa; 107 | bool _pop_dfa; 108 | bool _greedy; 109 | node_vector _followpos; 110 | 111 | void copy_node(node_ptr_vector&/*node_ptr_vector_*/, 112 | node_stack&/*new_node_stack_*/, 113 | bool_stack&/*perform_op_stack_*/, 114 | bool&/*down_*/) const override 115 | { 116 | // Nothing to do, as end_nodes are not copied. 117 | } 118 | }; 119 | } 120 | } 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/match_results.hpp: -------------------------------------------------------------------------------- 1 | // match_results.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_MATCH_RESULTS_HPP 7 | #define PARSERTL_MATCH_RESULTS_HPP 8 | 9 | #include "runtime_error.hpp" 10 | #include "state_machine.hpp" 11 | #include 12 | 13 | namespace parsertl 14 | { 15 | template 16 | struct basic_match_results 17 | { 18 | using id_type = typename sm_type::id_type; 19 | std::vector stack; 20 | id_type token_id = static_cast(~0); 21 | typename sm_type::entry entry; 22 | 23 | basic_match_results() 24 | { 25 | stack.push_back(0); 26 | entry.action = action::error; 27 | entry.param = static_cast(error_type::unknown_token); 28 | } 29 | 30 | explicit basic_match_results(const std::size_t reserved_) : 31 | stack(reserved_) 32 | { 33 | basic_match_results(); 34 | } 35 | 36 | basic_match_results(const id_type token_id_, const sm_type& sm_) 37 | { 38 | reset(token_id_, sm_); 39 | } 40 | 41 | basic_match_results(const id_type token_id_, const sm_type& sm_, 42 | const std::size_t reserved_) : 43 | stack(reserved_) 44 | { 45 | basic_match_results(token_id_, sm_); 46 | } 47 | 48 | void clear() 49 | { 50 | stack.clear(); 51 | stack.push_back(0); 52 | token_id = static_cast(~0); 53 | entry.clear(); 54 | } 55 | 56 | void reset(const id_type token_id_, const sm_type& sm_) 57 | { 58 | stack.clear(); 59 | stack.push_back(0); 60 | token_id = token_id_; 61 | 62 | if (token_id == static_cast(~0)) 63 | { 64 | entry.action = action::error; 65 | entry.param = static_cast(error_type::unknown_token); 66 | } 67 | else 68 | { 69 | entry = sm_.at(stack.back(), token_id); 70 | } 71 | } 72 | 73 | id_type reduce_id() const 74 | { 75 | if (entry.action != action::reduce) 76 | { 77 | throw runtime_error("Not in a reduce state!"); 78 | } 79 | 80 | return entry.param; 81 | } 82 | 83 | template 84 | typename token_vector::value_type& dollar(const std::size_t index_, 85 | const sm_type& sm_, token_vector& productions) const 86 | { 87 | if (entry.action != action::reduce) 88 | { 89 | throw runtime_error("Not in a reduce state!"); 90 | } 91 | 92 | return productions[productions.size() - 93 | production_size(sm_, entry.param) + index_]; 94 | } 95 | 96 | template 97 | const typename token_vector::value_type& 98 | dollar(const std::size_t index_, const sm_type& sm_, 99 | const token_vector& productions) const 100 | { 101 | if (entry.action != action::reduce) 102 | { 103 | throw runtime_error("Not in a reduce state!"); 104 | } 105 | 106 | return productions[productions.size() - 107 | production_size(sm_, entry.param) + index_]; 108 | } 109 | 110 | std::size_t production_size(const sm_type& sm, 111 | const std::size_t index_) const 112 | { 113 | return sm._rules[index_].second.size(); 114 | } 115 | 116 | bool operator ==(const basic_match_results& rhs_) const 117 | { 118 | return stack == rhs_.stack && 119 | token_id == rhs_.token_id && 120 | entry == rhs_.entry; 121 | } 122 | }; 123 | 124 | using match_results = basic_match_results; 125 | using uncompressed_match_results = 126 | basic_match_results; 127 | } 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/replace.hpp: -------------------------------------------------------------------------------- 1 | // replace.hpp 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_REPLACE_HPP 7 | #define LEXERTL_REPLACE_HPP 8 | 9 | #include "lookup.hpp" 10 | #include "state_machine.hpp" 11 | #include "match_results.hpp" 12 | 13 | namespace lexertl 14 | { 15 | template 18 | out_iter replace(out_iter out, fwd_iter first, fwd_iter second, 19 | const basic_state_machine& sm, 20 | const std::basic_string& fmt) 21 | { 22 | return replace(out, first, second, sm, fmt.c_str()); 23 | } 24 | 25 | template 27 | out_iter replace(out_iter out, fwd_iter first, fwd_iter second, 28 | const basic_state_machine& sm, 29 | const char_type* fmt) 30 | { 31 | const char_type* end_fmt = fmt; 32 | fwd_iter last = first; 33 | lexertl::match_results results(first, second); 34 | 35 | while (*end_fmt) 36 | ++end_fmt; 37 | 38 | // Lookahead 39 | lexertl::lookup(sm, results); 40 | 41 | while (results.id != 0) 42 | { 43 | std::copy(last, results.first, out); 44 | std::copy(fmt, end_fmt, out); 45 | last = results.second; 46 | lexertl::lookup(sm, results); 47 | } 48 | 49 | std::copy(last, results.first, out); 50 | return out; 51 | } 52 | 53 | template 56 | std::basic_string 57 | replace(const std::basic_string& s, 58 | const basic_state_machine& sm, 59 | const std::basic_string& fmt) 60 | { 61 | std::basic_string ret; 62 | 63 | replace(std::back_inserter(ret), s.cbegin(), s.cend(), sm, fmt); 64 | return ret; 65 | } 66 | 67 | template 69 | std::basic_string 70 | replace(const std::basic_string& s, 71 | const basic_state_machine& sm, 72 | const char_type* fmt) 73 | { 74 | std::basic_string ret; 75 | 76 | replace(std::back_inserter(ret), s.cbegin(), s.cend(), sm, fmt); 77 | return ret; 78 | } 79 | 80 | template 82 | std::basic_string 83 | replace(const char_type* s, 84 | const basic_state_machine& sm, 85 | const std::basic_string& fmt) 86 | { 87 | std::basic_string ret; 88 | const char_type* end_s = s; 89 | 90 | while (*end_s) 91 | ++end_s; 92 | 93 | replace(std::back_inserter(ret), s, end_s, sm, fmt); 94 | return ret; 95 | } 96 | 97 | template 98 | std::basic_string replace(const char_type* s, 99 | const basic_state_machine& sm, 100 | const char_type* fmt) 101 | { 102 | std::basic_string ret; 103 | const char_type* end_s = s; while (*end_s) ++end_s; 104 | const char_type* last = s; 105 | lexertl::match_results results(s, end_s); 106 | 107 | // Lookahead 108 | lexertl::lookup(sm, results); 109 | 110 | while (results.id != 0) 111 | { 112 | ret.append(last, results.first); 113 | ret.append(fmt); 114 | last = results.second; 115 | lexertl::lookup(sm, results); 116 | } 117 | 118 | ret.append(last, results.first); 119 | return ret; 120 | } 121 | } 122 | 123 | #endif 124 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tree/sequence_node.hpp: -------------------------------------------------------------------------------- 1 | // sequence_node.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_SEQUENCE_NODE_HPP 7 | #define LEXERTL_SEQUENCE_NODE_HPP 8 | 9 | #include "node.hpp" 10 | 11 | namespace lexertl 12 | { 13 | namespace detail 14 | { 15 | template 16 | class basic_sequence_node : public basic_node 17 | { 18 | public: 19 | using node = basic_node; 20 | using bool_stack = typename node::bool_stack; 21 | using const_node_stack = typename node::const_node_stack; 22 | using node_ptr_vector = typename node::node_ptr_vector; 23 | using node_stack = typename node::node_stack; 24 | using node_type = typename node::node_type; 25 | using node_vector = typename node::node_vector; 26 | 27 | basic_sequence_node(observer_ptr left_, 28 | observer_ptr right_) : 29 | node(left_->nullable() && right_->nullable()), 30 | _left(left_), 31 | _right(right_) 32 | { 33 | _left->append_firstpos(node::firstpos()); 34 | 35 | if (_left->nullable()) 36 | { 37 | _right->append_firstpos(node::firstpos()); 38 | } 39 | 40 | if (_right->nullable()) 41 | { 42 | _left->append_lastpos(node::lastpos()); 43 | } 44 | 45 | _right->append_lastpos(node::lastpos()); 46 | 47 | node_vector& lastpos_ = _left->lastpos(); 48 | const node_vector& firstpos_ = _right->firstpos(); 49 | 50 | for (observer_ptr node_ : lastpos_) 51 | { 52 | node_->append_followpos(firstpos_); 53 | } 54 | } 55 | 56 | ~basic_sequence_node() override = default; 57 | 58 | node_type what_type() const override 59 | { 60 | return node::node_type::SEQUENCE; 61 | } 62 | 63 | bool traverse(const_node_stack& node_stack_, 64 | bool_stack& perform_op_stack_) const override 65 | { 66 | perform_op_stack_.push(true); 67 | 68 | switch (_right->what_type()) 69 | { 70 | case node::node_type::SEQUENCE: 71 | case node::node_type::SELECTION: 72 | case node::node_type::ITERATION: 73 | perform_op_stack_.push(false); 74 | break; 75 | default: 76 | break; 77 | } 78 | 79 | node_stack_.push(_right); 80 | node_stack_.push(_left); 81 | return true; 82 | } 83 | 84 | private: 85 | observer_ptr _left; 86 | observer_ptr _right; 87 | 88 | void copy_node(node_ptr_vector& node_ptr_vector_, 89 | node_stack& new_node_stack_, bool_stack& perform_op_stack_, 90 | bool& down_) const override 91 | { 92 | if (perform_op_stack_.top()) 93 | { 94 | observer_ptr rhs_ = new_node_stack_.top(); 95 | 96 | new_node_stack_.pop(); 97 | 98 | observer_ptr lhs_ = new_node_stack_.top(); 99 | 100 | node_ptr_vector_.push_back(std::make_unique 101 | (lhs_, rhs_)); 102 | new_node_stack_.top() = node_ptr_vector_.back().get(); 103 | } 104 | else 105 | { 106 | down_ = true; 107 | } 108 | 109 | perform_op_stack_.pop(); 110 | } 111 | 112 | // No copy construction. 113 | basic_sequence_node(const basic_sequence_node&) = delete; 114 | // No assignment. 115 | const basic_sequence_node& operator = 116 | (const basic_sequence_node&) = delete; 117 | }; 118 | } 119 | } 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /lib/parle/lexer/iterator.hpp: -------------------------------------------------------------------------------- 1 | // Based on lexertl/iterator.hpp 2 | 3 | #ifndef PARLE_LEXER_ITERATOR_HPP 4 | #define PARLE_LEXER_ITERATOR_HPP 5 | 6 | #include 7 | #include 8 | #include "include/lexertl/lookup.hpp" 9 | #include "include/lexertl/runtime_error.hpp" 10 | 11 | #undef lookup 12 | 13 | namespace parle 14 | { 15 | namespace lexer 16 | { 17 | template 18 | class iterator 19 | { 20 | public: 21 | using value_type = results; 22 | using difference_type = ptrdiff_t; 23 | using pointer = const value_type *; 24 | using reference = const value_type &; 25 | using iterator_category = std::forward_iterator_tag; 26 | using cb_map = std::unordered_map; 27 | 28 | iterator() : 29 | _results(iter(), iter()), 30 | _sm(nullptr), 31 | _lex(nullptr) 32 | { 33 | } 34 | 35 | iterator(const iter &start_, const iter &end_, lexer_obj_type &lex, bool do_next = false) : 36 | _results(start_, end_), 37 | _sm(&lex.sm), 38 | _lex(&lex) 39 | { 40 | 41 | if (do_next) { 42 | lookup(); 43 | } 44 | } 45 | 46 | void set_bol(bool bol) 47 | { 48 | _results.bol = bol; 49 | } 50 | 51 | void reset(const iter &start_, const iter &end_) 52 | { 53 | if (_results.first > start_) { 54 | throw lexertl::runtime_error("Can only reset to a forward position"); 55 | } 56 | _results.first = start_; 57 | _results.second = start_; 58 | _results.eoi = end_; 59 | } 60 | 61 | // Only need this because of warnings with gcc with -Weffc++ 62 | iterator(const iterator &rhs_) 63 | { 64 | _results = rhs_._results; 65 | _sm = rhs_._sm; 66 | _lex = rhs_._lex; 67 | } 68 | 69 | // Only need this because of warnings with gcc with -Weffc++ 70 | iterator &operator =(const iterator &rhs_) 71 | { 72 | if (&rhs_ != this) 73 | { 74 | _results = rhs_._results; 75 | _sm = rhs_._sm; 76 | _lex = rhs_._lex; 77 | } 78 | 79 | return *this; 80 | } 81 | 82 | iterator &operator ++() 83 | { 84 | lookup(); 85 | return *this; 86 | } 87 | 88 | iterator operator ++(int) 89 | { 90 | iterator iter_ = *this; 91 | 92 | lookup(); 93 | return iter_; 94 | } 95 | 96 | const value_type &operator *() const 97 | { 98 | return _results; 99 | } 100 | 101 | const value_type *operator ->() const 102 | { 103 | return &_results; 104 | } 105 | 106 | bool operator ==(const iterator &rhs_) const 107 | { 108 | return _sm == rhs_._sm && (_sm == nullptr ? true : 109 | _results == rhs_._results); 110 | } 111 | 112 | bool operator !=(const iterator &rhs_) const 113 | { 114 | return !(*this == rhs_); 115 | } 116 | 117 | public: 118 | size_t line = SIZE_MAX; 119 | size_t column = SIZE_MAX; 120 | private: 121 | value_type _results; 122 | const sm_type *_sm; 123 | lexer_obj_type *_lex; 124 | 125 | void lookup() 126 | { 127 | if (_results.bol) { 128 | line++; 129 | column = 0; 130 | } else { 131 | column += _results.second - _results.first; 132 | } 133 | 134 | lexertl::lookup(*_sm, _results); 135 | 136 | if (_lex->cb_map.size() > 0) { 137 | auto it = _lex->cb_map.find(_results.id); 138 | if (_lex->cb_map.end() != it) { 139 | zval result; 140 | token_cb_type cb = it->second; 141 | zend_fcall_info fci; 142 | zend_fcall_info_cache fcc; 143 | 144 | if (FAILURE == zend_fcall_info_init(&cb.cb, 0, &fci, &fcc, NULL, NULL)) { 145 | zend_throw_exception_ex(ParleLexerException_ce, 0, "Failed to prepare function call"); 146 | if (_results.first == _results.eoi) { 147 | _sm = nullptr; 148 | } 149 | return; 150 | } 151 | ZVAL_NULL(&result); 152 | fci.retval = &result; 153 | fci.param_count = 0; 154 | 155 | if (FAILURE == zend_call_function(&fci, &fcc)) { 156 | zend_throw_exception_ex(ParleLexerException_ce, 0, "Callback execution failed"); 157 | if (_results.first == _results.eoi) { 158 | _sm = nullptr; 159 | } 160 | return; 161 | } 162 | 163 | #if 0 164 | convert_to_boolean(&result); 165 | if (Z_TYPE(result) == IS_FALSE && _results.first != _results.eoi) { 166 | lexertl::lookup(*_sm, _results); 167 | } 168 | #endif 169 | } 170 | } 171 | 172 | if (_results.first == _results.eoi) { 173 | _sm = nullptr; 174 | } 175 | } 176 | }; 177 | } 178 | } 179 | 180 | #endif 181 | 182 | /* 183 | * Local variables: 184 | * tab-width: 4 185 | * c-basic-offset: 4 186 | * End: 187 | * vim600: noet sw=4 ts=4 fdm=marker 188 | * vim<600: noet sw=4 ts=4 189 | */ 190 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/iterator.hpp: -------------------------------------------------------------------------------- 1 | // iterator.hpp 2 | // Copyright (c) 2022-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_ITERATOR_HPP 7 | #define PARSERTL_ITERATOR_HPP 8 | 9 | #include "../../../lexertl14/include/lexertl/iterator.hpp" 10 | #include "lookup.hpp" 11 | #include "match_results.hpp" 12 | #include "token.hpp" 13 | 14 | namespace parsertl 15 | { 16 | template 18 | class iterator 19 | { 20 | public: 21 | using results = basic_match_results; 22 | using value_type = results; 23 | using difference_type = ptrdiff_t; 24 | using pointer = const value_type*; 25 | using reference = const value_type&; 26 | using iterator_category = std::forward_iterator_tag; 27 | 28 | // Qualify token to prevent arg dependant lookup 29 | using token = parsertl::token; 30 | using token_vector = typename token::token_vector; 31 | 32 | iterator() = default; 33 | 34 | iterator(const lexer_iterator& iter_, const sm_type& sm_) : 35 | _iter(iter_), 36 | _results(_iter->id, sm_), 37 | _sm(&sm_) 38 | { 39 | // The first action can only ever be reduce 40 | // if the grammar treats no input as valid. 41 | if (_results.entry.action != action::reduce) 42 | lookup(); 43 | } 44 | 45 | iterator(const lexer_iterator& iter_, const sm_type& sm_, 46 | const std::size_t reserved_) : 47 | _iter(iter_), 48 | _results(_iter->id, sm_, reserved_), 49 | _productions(reserved_), 50 | _sm(&sm_) 51 | { 52 | // The first action can only ever be reduce 53 | // if the grammar treats no input as valid. 54 | if (_results.entry.action != action::reduce) 55 | lookup(); 56 | } 57 | 58 | typename token_vector::value_type dollar(const std::size_t index_) const 59 | { 60 | return _results.dollar(index_, *_sm, _productions); 61 | } 62 | 63 | iterator& operator ++() 64 | { 65 | lookup(); 66 | return *this; 67 | } 68 | 69 | iterator operator ++(int) 70 | { 71 | iterator iter_ = *this; 72 | 73 | lookup(); 74 | return iter_; 75 | } 76 | 77 | const value_type& operator *() const 78 | { 79 | return _results; 80 | } 81 | 82 | const value_type* operator ->() const 83 | { 84 | return &_results; 85 | } 86 | 87 | bool operator ==(const iterator& rhs_) const 88 | { 89 | return _sm == rhs_._sm && 90 | (_sm == nullptr ? true : 91 | _results == rhs_._results); 92 | } 93 | 94 | bool operator !=(const iterator& rhs_) const 95 | { 96 | return !(*this == rhs_); 97 | } 98 | 99 | private: 100 | lexer_iterator _iter; 101 | basic_match_results _results; 102 | token_vector _productions; 103 | const sm_type* _sm = nullptr; 104 | 105 | void lookup() 106 | { 107 | // do while because we need to move past the current reduce action 108 | do 109 | { 110 | parsertl::lookup(_iter, *_sm, _results, _productions); 111 | } while (_results.entry.action == action::shift || 112 | _results.entry.action == action::go_to); 113 | 114 | switch (_results.entry.action) 115 | { 116 | case action::accept: 117 | case action::error: 118 | _sm = nullptr; 119 | break; 120 | default: 121 | break; 122 | } 123 | } 124 | }; 125 | 126 | using siterator = iterator; 127 | using citerator = iterator; 128 | using wsiterator = iterator; 129 | using wciterator = iterator; 130 | } 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /tests/lexer_007.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lex JSON 3 | --SKIPIF-- 4 | 10 | --FILE-- 11 | insertMacro("STRING", "[\"]([ -\\x10ffff]{-}[\"\\\\]|\\\\([\"\\\\/bfnrt]|u[0-9a-fA-F]{4}))*[\"]"); 30 | $lex->insertMacro("NUMBER", "-?(0|[1-9]\\d*)([.]\\d+)?([eE][-+]?\\d+)?"); 31 | $lex->insertMacro("BOOL", "true|false"); 32 | $lex->insertMacro("NULL", "null"); 33 | 34 | $lex->pushState("END"); 35 | 36 | $lex->pushState("OBJECT"); 37 | $lex->pushState("NAME"); 38 | $lex->pushState("COLON"); 39 | $lex->pushState("OB_VALUE"); 40 | $lex->pushState("OB_COMMA"); 41 | 42 | $lex->pushState("ARRAY"); 43 | $lex->pushState("ARR_COMMA"); 44 | $lex->pushState("ARR_VALUE"); 45 | 46 | $lex->push("INITIAL", "[{]", eOpenOb, ">OBJECT:END"); 47 | $lex->push("INITIAL", "[[]", eOpenArr, ">ARRAY:END"); 48 | 49 | $lex->push("OBJECT,OB_COMMA", "[}]", eCloseOb, "<"); 50 | $lex->push("OBJECT,NAME", "{STRING}", eName, "COLON"); 51 | $lex->push("COLON", ":", Token::SKIP, "OB_VALUE"); 52 | 53 | $lex->push("OB_VALUE", "{STRING}", eString, "OB_COMMA"); 54 | $lex->push("OB_VALUE", "{NUMBER}", eNumber, "OB_COMMA"); 55 | $lex->push("OB_VALUE", "{BOOL}", eBoolean, "OB_COMMA"); 56 | $lex->push("OB_VALUE", "{NULL}", eNull, "OB_COMMA"); 57 | $lex->push("OB_VALUE", "[{]", eOpenOb, ">OBJECT:OB_COMMA"); 58 | $lex->push("OB_VALUE", "[[]", eOpenArr, ">ARRAY:OB_COMMA"); 59 | 60 | $lex->push("OB_COMMA", ",", Token::SKIP, "NAME"); 61 | 62 | $lex->push("ARRAY,ARR_COMMA", "\\]", eCloseArr, "<"); 63 | $lex->push("ARRAY,ARR_VALUE", "{STRING}", eString, "ARR_COMMA"); 64 | $lex->push("ARRAY,ARR_VALUE", "{NUMBER}", eNumber, "ARR_COMMA"); 65 | $lex->push("ARRAY,ARR_VALUE", "{BOOL}", eBoolean, "ARR_COMMA"); 66 | $lex->push("ARRAY,ARR_VALUE", "{NULL}", eNull, "ARR_COMMA"); 67 | $lex->push("ARRAY,ARR_VALUE", "[{]", eOpenOb, ">OBJECT:ARR_COMMA"); 68 | $lex->push("ARRAY,ARR_VALUE", "[[]", eOpenArr, ">ARRAY:ARR_COMMA"); 69 | 70 | $lex->push("ARR_COMMA", ",", Token::SKIP, "ARR_VALUE"); 71 | $lex->push("*", "[ \t\r\n]+", Token::SKIP, "."); 72 | 73 | $lex->build(); 74 | 75 | $in = file_get_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . "lexer_003.json"); 76 | 77 | $lex->consume($in); 78 | 79 | 80 | $lex->advance(); 81 | $tok = $lex->getToken(); 82 | while (Token::EOI != $tok->id) { 83 | var_dump($tok); 84 | $lex->advance(); 85 | $tok = $lex->getToken(); 86 | } 87 | 88 | ?> 89 | ==DONE== 90 | --EXPECTF-- 91 | object(Parle\Token)#%d (2) { 92 | ["id"]=> 93 | int(42) 94 | ["value"]=> 95 | string(1) "{" 96 | } 97 | object(Parle\Token)#%d (2) { 98 | ["id"]=> 99 | int(46) 100 | ["value"]=> 101 | string(5) ""key"" 102 | } 103 | object(Parle\Token)#%d (2) { 104 | ["id"]=> 105 | int(44) 106 | ["value"]=> 107 | string(1) "[" 108 | } 109 | object(Parle\Token)#%d (2) { 110 | ["id"]=> 111 | int(47) 112 | ["value"]=> 113 | string(15) ""qelque choose"" 114 | } 115 | object(Parle\Token)#%d (2) { 116 | ["id"]=> 117 | int(48) 118 | ["value"]=> 119 | string(2) "42" 120 | } 121 | object(Parle\Token)#%d (2) { 122 | ["id"]=> 123 | int(47) 124 | ["value"]=> 125 | string(8) ""füße"" 126 | } 127 | object(Parle\Token)#%d (2) { 128 | ["id"]=> 129 | int(45) 130 | ["value"]=> 131 | string(1) "]" 132 | } 133 | object(Parle\Token)#%d (2) { 134 | ["id"]=> 135 | int(46) 136 | ["value"]=> 137 | string(5) ""obj"" 138 | } 139 | object(Parle\Token)#%d (2) { 140 | ["id"]=> 141 | int(42) 142 | ["value"]=> 143 | string(1) "{" 144 | } 145 | object(Parle\Token)#%d (2) { 146 | ["id"]=> 147 | int(46) 148 | ["value"]=> 149 | string(6) ""prop"" 150 | } 151 | object(Parle\Token)#%d (2) { 152 | ["id"]=> 153 | int(48) 154 | ["value"]=> 155 | string(2) "12" 156 | } 157 | object(Parle\Token)#%d (2) { 158 | ["id"]=> 159 | int(43) 160 | ["value"]=> 161 | string(1) "}" 162 | } 163 | object(Parle\Token)#%d (2) { 164 | ["id"]=> 165 | int(46) 166 | ["value"]=> 167 | string(6) ""some"" 168 | } 169 | object(Parle\Token)#%d (2) { 170 | ["id"]=> 171 | int(50) 172 | ["value"]=> 173 | string(4) "null" 174 | } 175 | object(Parle\Token)#%d (2) { 176 | ["id"]=> 177 | int(43) 178 | ["value"]=> 179 | string(1) "}" 180 | } 181 | ==DONE== 182 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/memory_file.hpp: -------------------------------------------------------------------------------- 1 | // memory_file.hpp 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/) 3 | // Inspired by http://en.wikibooks.org/wiki/Optimizing_C%2B%2B/ 4 | // General_optimization_techniques/Input/Output#Memory-mapped_file 5 | // 6 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 7 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | #ifndef LEXERTL_MEMORY_FILE_HPP 10 | #define LEXERTL_MEMORY_FILE_HPP 11 | 12 | #include 13 | 14 | #ifdef _WIN32 15 | #include 16 | #else 17 | #include 18 | #include 19 | #include 20 | #include 21 | #endif 22 | 23 | // Only files small enough to fit into memory are supported. 24 | namespace lexertl 25 | { 26 | template 27 | class basic_memory_file 28 | { 29 | public: 30 | basic_memory_file() = default; 31 | 32 | explicit basic_memory_file(const char* pathname_) 33 | { 34 | open(pathname_); 35 | } 36 | 37 | // No copy construction. 38 | basic_memory_file(const basic_memory_file&) = delete; 39 | // No assignment. 40 | basic_memory_file& operator =(const basic_memory_file&) = delete; 41 | 42 | ~basic_memory_file() 43 | { 44 | close(); 45 | } 46 | 47 | void open(const char* pathname_) 48 | { 49 | if (_data) 50 | { 51 | close(); 52 | } 53 | 54 | #ifdef _WIN32 55 | _fh = ::CreateFileA(pathname_, GENERIC_READ, FILE_SHARE_READ, 56 | nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); 57 | _fmh = nullptr; 58 | 59 | if (_fh != INVALID_HANDLE_VALUE) 60 | { 61 | _fmh = ::CreateFileMapping(_fh, nullptr, PAGE_READONLY, 0, 0, 62 | nullptr); 63 | 64 | if (_fmh != nullptr) 65 | { 66 | _data = static_cast(::MapViewOfFile 67 | (_fmh, FILE_MAP_READ, 0, 0, 0)); 68 | 69 | if (_data) 70 | { 71 | _size = ::GetFileSize(_fh, nullptr) / 72 | sizeof(char_type); 73 | } 74 | } 75 | } 76 | #else 77 | _fh = ::open(pathname_, O_RDONLY); 78 | 79 | if (_fh > -1) 80 | { 81 | struct stat sbuf_; 82 | 83 | if (::fstat(_fh, &sbuf_) > -1) 84 | { 85 | _data = static_cast 86 | (::mmap(0, sbuf_.st_size, PROT_READ, 87 | MAP_SHARED, _fh, 0)); 88 | 89 | if (_data == MAP_FAILED) 90 | { 91 | _data = nullptr; 92 | } 93 | else 94 | { 95 | _size = sbuf_.st_size / sizeof(char_type); 96 | } 97 | } 98 | } 99 | #endif 100 | } 101 | 102 | const char_type* data() const 103 | { 104 | return _data; 105 | } 106 | 107 | std::size_t size() const 108 | { 109 | return _size; 110 | } 111 | 112 | void close() 113 | { 114 | if (_data) 115 | { 116 | #ifdef _WIN32 117 | ::UnmapViewOfFile(_data); 118 | ::CloseHandle(_fmh); 119 | ::CloseHandle(_fh); 120 | #else 121 | ::munmap(const_cast(_data), _size); 122 | ::close(_fh); 123 | _fh = 0; 124 | #endif 125 | _data = nullptr; 126 | _size = 0; 127 | #ifdef _WIN32 128 | _fh = nullptr; 129 | _fmh = nullptr; 130 | #endif 131 | } 132 | } 133 | 134 | private: 135 | const char_type* _data = nullptr; 136 | std::size_t _size = 0; 137 | #ifdef _WIN32 138 | HANDLE _fh = nullptr; 139 | HANDLE _fmh = nullptr; 140 | #else 141 | int _fh = 0; 142 | #endif 143 | }; 144 | 145 | using memory_file = basic_memory_file; 146 | using wmemory_file = basic_memory_file; 147 | using u32memory_file = basic_memory_file; 148 | } 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /tests/lexer_003.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Lex JSON 3 | --SKIPIF-- 4 | 10 | --FILE-- 11 | insertMacro("STRING", "[\"]([ -\x7f]{+}[\x80-\xbf]{+}[\xc2-\xdf]{+}[\xe0-\xef]{+}[\xf0-\xff]{-}[\"\\\]|\\\([\"\\\/bfnrt]|u[0-9a-fA-F]{4}))*[\"]"); 30 | $lex->insertMacro("NUMBER", "-?(0|[1-9]\\d*)([.]\\d+)?([eE][-+]?\\d+)?"); 31 | $lex->insertMacro("BOOL", "true|false"); 32 | $lex->insertMacro("NULL", "null"); 33 | 34 | $lex->pushState("END"); 35 | 36 | $lex->pushState("OBJECT"); 37 | $lex->pushState("NAME"); 38 | $lex->pushState("COLON"); 39 | $lex->pushState("OB_VALUE"); 40 | $lex->pushState("OB_COMMA"); 41 | 42 | $lex->pushState("ARRAY"); 43 | $lex->pushState("ARR_COMMA"); 44 | $lex->pushState("ARR_VALUE"); 45 | 46 | $lex->push("INITIAL", "[{]", eOpenOb, ">OBJECT:END"); 47 | $lex->push("INITIAL", "[[]", eOpenArr, ">ARRAY:END"); 48 | 49 | $lex->push("OBJECT,OB_COMMA", "[}]", eCloseOb, "<"); 50 | $lex->push("OBJECT,NAME", "{STRING}", eName, "COLON"); 51 | $lex->push("COLON", ":", Token::SKIP, "OB_VALUE"); 52 | 53 | $lex->push("OB_VALUE", "{STRING}", eString, "OB_COMMA"); 54 | $lex->push("OB_VALUE", "{NUMBER}", eNumber, "OB_COMMA"); 55 | $lex->push("OB_VALUE", "{BOOL}", eBoolean, "OB_COMMA"); 56 | $lex->push("OB_VALUE", "{NULL}", eNull, "OB_COMMA"); 57 | $lex->push("OB_VALUE", "[{]", eOpenOb, ">OBJECT:OB_COMMA"); 58 | $lex->push("OB_VALUE", "[[]", eOpenArr, ">ARRAY:OB_COMMA"); 59 | 60 | $lex->push("OB_COMMA", ",", Token::SKIP, "NAME"); 61 | 62 | $lex->push("ARRAY,ARR_COMMA", "\\]", eCloseArr, "<"); 63 | $lex->push("ARRAY,ARR_VALUE", "{STRING}", eString, "ARR_COMMA"); 64 | $lex->push("ARRAY,ARR_VALUE", "{NUMBER}", eNumber, "ARR_COMMA"); 65 | $lex->push("ARRAY,ARR_VALUE", "{BOOL}", eBoolean, "ARR_COMMA"); 66 | $lex->push("ARRAY,ARR_VALUE", "{NULL}", eNull, "ARR_COMMA"); 67 | $lex->push("ARRAY,ARR_VALUE", "[{]", eOpenOb, ">OBJECT:ARR_COMMA"); 68 | $lex->push("ARRAY,ARR_VALUE", "[[]", eOpenArr, ">ARRAY:ARR_COMMA"); 69 | 70 | $lex->push("ARR_COMMA", ",", Token::SKIP, "ARR_VALUE"); 71 | $lex->push("*", "[ \t\r\n]+", Token::SKIP, "."); 72 | 73 | $lex->build(); 74 | 75 | $in = file_get_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . "lexer_003.json"); 76 | 77 | $lex->consume($in); 78 | 79 | 80 | $lex->advance(); 81 | $tok = $lex->getToken(); 82 | while (Token::EOI != $tok->id) { 83 | var_dump($tok); 84 | $lex->advance(); 85 | $tok = $lex->getToken(); 86 | } 87 | 88 | ?> 89 | ==DONE== 90 | --EXPECTF-- 91 | object(Parle\Token)#%d (2) { 92 | ["id"]=> 93 | int(42) 94 | ["value"]=> 95 | string(1) "{" 96 | } 97 | object(Parle\Token)#%d (2) { 98 | ["id"]=> 99 | int(46) 100 | ["value"]=> 101 | string(5) ""key"" 102 | } 103 | object(Parle\Token)#%d (2) { 104 | ["id"]=> 105 | int(44) 106 | ["value"]=> 107 | string(1) "[" 108 | } 109 | object(Parle\Token)#%d (2) { 110 | ["id"]=> 111 | int(47) 112 | ["value"]=> 113 | string(15) ""qelque choose"" 114 | } 115 | object(Parle\Token)#%d (2) { 116 | ["id"]=> 117 | int(48) 118 | ["value"]=> 119 | string(2) "42" 120 | } 121 | object(Parle\Token)#%d (2) { 122 | ["id"]=> 123 | int(47) 124 | ["value"]=> 125 | string(8) ""füße"" 126 | } 127 | object(Parle\Token)#%d (2) { 128 | ["id"]=> 129 | int(45) 130 | ["value"]=> 131 | string(1) "]" 132 | } 133 | object(Parle\Token)#%d (2) { 134 | ["id"]=> 135 | int(46) 136 | ["value"]=> 137 | string(5) ""obj"" 138 | } 139 | object(Parle\Token)#%d (2) { 140 | ["id"]=> 141 | int(42) 142 | ["value"]=> 143 | string(1) "{" 144 | } 145 | object(Parle\Token)#%d (2) { 146 | ["id"]=> 147 | int(46) 148 | ["value"]=> 149 | string(6) ""prop"" 150 | } 151 | object(Parle\Token)#%d (2) { 152 | ["id"]=> 153 | int(48) 154 | ["value"]=> 155 | string(2) "12" 156 | } 157 | object(Parle\Token)#%d (2) { 158 | ["id"]=> 159 | int(43) 160 | ["value"]=> 161 | string(1) "}" 162 | } 163 | object(Parle\Token)#%d (2) { 164 | ["id"]=> 165 | int(46) 166 | ["value"]=> 167 | string(6) ""some"" 168 | } 169 | object(Parle\Token)#%d (2) { 170 | ["id"]=> 171 | int(50) 172 | ["value"]=> 173 | string(4) "null" 174 | } 175 | object(Parle\Token)#%d (2) { 176 | ["id"]=> 177 | int(43) 178 | ["value"]=> 179 | string(1) "}" 180 | } 181 | ==DONE== 182 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/serialise.hpp: -------------------------------------------------------------------------------- 1 | // serialise.hpp 2 | // Copyright (c) 2007-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_SERIALISE_HPP 7 | #define LEXERTL_SERIALISE_HPP 8 | 9 | #include "runtime_error.hpp" 10 | #include "state_machine.hpp" 11 | 12 | namespace lexertl 13 | { 14 | namespace detail 15 | { 16 | template 17 | void output_vec(const std::vector& vec_, stream& stream_) 18 | { 19 | std::basic_ostringstream ss_; 20 | std::basic_string str_; 21 | std::size_t line_len_ = 0; 22 | 23 | stream_ << vec_.size() << '\n'; 24 | 25 | for (const id_type l_ : vec_) 26 | { 27 | ss_ << l_; 28 | str_ = ss_.str(); 29 | 30 | if (line_len_ + str_.size() + 1 > 80) 31 | { 32 | stream_ << '\n' << str_ << ' '; 33 | line_len_ = str_.size() + 1; 34 | } 35 | else 36 | { 37 | stream_ << str_ << ' '; 38 | line_len_ += str_.size() + 1; 39 | } 40 | 41 | ss_.str(""); 42 | } 43 | 44 | stream_ << '\n'; 45 | } 46 | 47 | template 48 | void input_vec(stream& stream_, std::vector& vec_) 49 | { 50 | std::size_t num_ = 0; 51 | 52 | stream_>> num_; 53 | vec_.reserve(num_); 54 | 55 | for (std::size_t idx_ = 0; idx_ < num_; ++idx_) 56 | { 57 | std::size_t id_ = 0; 58 | 59 | stream_ >> id_; 60 | vec_.push_back(static_cast(id_)); 61 | } 62 | } 63 | } 64 | 65 | template 66 | void save(const basic_state_machine& sm_, 67 | stream& stream_) 68 | { 69 | using internals = detail::basic_internals; 70 | const internals& internals_ = sm_.data(); 71 | 72 | // Version number 73 | stream_ << 1 << '\n'; 74 | stream_ << sizeof(char_type) << '\n'; 75 | stream_ << sizeof(id_type) << '\n'; 76 | stream_ << internals_._eoi << '\n'; 77 | stream_ << internals_._lookup.size() << '\n'; 78 | 79 | for (const auto& vec_ : internals_._lookup) 80 | { 81 | detail::output_vec(vec_, stream_); 82 | } 83 | 84 | detail::output_vec(internals_._dfa_alphabet, stream_); 85 | stream_ << internals_._features << '\n'; 86 | stream_ << internals_._dfa.size() << '\n'; 87 | 88 | for (const auto& vec_ : internals_._dfa) 89 | { 90 | detail::output_vec(vec_, stream_); 91 | } 92 | } 93 | 94 | template 95 | void load(stream& stream_, basic_state_machine& sm_) 96 | { 97 | using internals = detail::basic_internals; 98 | internals& internals_ = sm_.data(); 99 | std::size_t num_ = 0; 100 | 101 | internals_.clear(); 102 | // Version 103 | stream_ >> num_; 104 | // sizeof(char_type) 105 | stream_ >> num_; 106 | 107 | if (num_ != sizeof(char_type)) 108 | throw runtime_error("char_type mismatch in lexertl::load()"); 109 | 110 | // sizeof(id_type) 111 | stream_ >> num_; 112 | 113 | if (num_ != sizeof(id_type)) 114 | throw runtime_error("id_type mismatch in lexertl::load()"); 115 | 116 | stream_ >> internals_._eoi; 117 | stream_ >> num_; 118 | internals_._lookup.reserve(num_); 119 | 120 | for (std::size_t idx_ = 0; idx_ < num_; ++idx_) 121 | { 122 | internals_._lookup.emplace_back(); 123 | detail::input_vec(stream_, internals_._lookup.back()); 124 | } 125 | 126 | detail::input_vec(stream_, internals_._dfa_alphabet); 127 | stream_ >> internals_._features; 128 | stream_ >> num_; 129 | internals_._dfa.reserve(num_); 130 | 131 | for (std::size_t idx_ = 0; idx_ < num_; ++idx_) 132 | { 133 | internals_._dfa.emplace_back(); 134 | detail::input_vec(stream_, internals_._dfa.back()); 135 | } 136 | } 137 | } 138 | 139 | #endif 140 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/serialise.hpp: -------------------------------------------------------------------------------- 1 | // serialise.hpp 2 | // Copyright (c) 2007-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_SERIALISE_HPP 7 | #define PARSERTL_SERIALISE_HPP 8 | 9 | #include "runtime_error.hpp" 10 | #include "../../../lexertl14/include/lexertl/serialise.hpp" 11 | #include "state_machine.hpp" 12 | 13 | namespace parsertl 14 | { 15 | template 16 | void save(const basic_state_machine& sm_, stream& stream_) 17 | { 18 | // Version number 19 | stream_ << 1 << '\n'; 20 | stream_ << sizeof(id_type) << '\n'; 21 | stream_ << sm_._columns << '\n'; 22 | stream_ << sm_._rows << '\n'; 23 | stream_ << sm_._rules.size() << '\n'; 24 | 25 | for (const auto& rule_ : sm_._rules) 26 | { 27 | stream_ << rule_.first << '\n'; 28 | lexertl::detail::output_vec(rule_.second, stream_); 29 | } 30 | 31 | stream_ << sm_._captures.size() << '\n'; 32 | 33 | for (const auto& capture_ : sm_._captures) 34 | { 35 | stream_ << capture_.first << '\n'; 36 | stream_ << capture_.second.size() << '\n'; 37 | 38 | for (const auto& pair_ : capture_.second) 39 | { 40 | stream_ << pair_.first << ' ' << pair_.second << '\n'; 41 | } 42 | } 43 | 44 | stream_ << sm_._table.size() << '\n'; 45 | 46 | for (const auto& vec_ : sm_._table) 47 | { 48 | stream_ << vec_.size() << '\n'; 49 | 50 | for (const auto& pair_ : vec_) 51 | { 52 | stream_ << pair_.first << ' '; 53 | stream_ << static_cast(pair_.second.action) << ' '; 54 | stream_ << pair_.second.param << '\n'; 55 | } 56 | } 57 | } 58 | 59 | template 60 | void load(stream& stream_, basic_state_machine& sm_) 61 | { 62 | std::size_t num_ = 0; 63 | 64 | sm_.clear(); 65 | // Version 66 | stream_ >> num_; 67 | // sizeof(id_type) 68 | stream_ >> num_; 69 | 70 | if (num_ != sizeof(id_type)) 71 | throw runtime_error("id_type mismatch in parsertl::load()"); 72 | 73 | stream_ >> sm_._columns; 74 | stream_ >> sm_._rows; 75 | stream_ >> num_; 76 | sm_._rules.reserve(num_); 77 | 78 | for (std::size_t idx_ = 0; idx_ < num_; ++idx_) 79 | { 80 | sm_._rules.emplace_back(); 81 | 82 | auto& rule_ = sm_._rules.back(); 83 | 84 | stream_ >> rule_.first; 85 | lexertl::detail::input_vec(stream_, rule_.second); 86 | } 87 | 88 | stream_ >> num_; 89 | sm_._captures.reserve(num_); 90 | 91 | for (std::size_t idx_ = 0, rows_ = num_; idx_ < rows_; ++idx_) 92 | { 93 | sm_._captures.emplace_back(); 94 | 95 | auto& capture_ = sm_._captures.back(); 96 | 97 | stream_ >> capture_.first; 98 | stream_ >> num_; 99 | capture_.second.reserve(num_); 100 | 101 | for (std::size_t idx2_ = 0, entries_ = num_; 102 | idx2_ < entries_; ++idx2_) 103 | { 104 | capture_.second.emplace_back(); 105 | 106 | auto& pair_ = capture_.second.back(); 107 | 108 | stream_ >> num_; 109 | pair_.first = static_cast(num_); 110 | stream_ >> num_; 111 | pair_.second = static_cast(num_); 112 | } 113 | } 114 | 115 | stream_ >> num_; 116 | sm_._table.reserve(num_); 117 | 118 | for (std::size_t idx_ = 0, rows_ = num_; idx_ < rows_; ++idx_) 119 | { 120 | sm_._table.emplace_back(); 121 | 122 | auto& vec_ = sm_._table.back(); 123 | 124 | stream_ >> num_; 125 | vec_.reserve(num_); 126 | 127 | for (std::size_t idx2_ = 0, entries_ = num_; 128 | idx2_ < entries_; ++idx2_) 129 | { 130 | vec_.emplace_back(); 131 | 132 | auto& pair_ = vec_.back(); 133 | 134 | stream_ >> num_; 135 | pair_.first = static_cast(num_); 136 | stream_ >> num_; 137 | pair_.second.action = static_cast(num_); 138 | stream_ >> num_; 139 | pair_.second.param = static_cast(num_); 140 | } 141 | } 142 | } 143 | } 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Parle provides lexing and parsing facilities for PHP 3 | ============================================= 4 | Lexing and parsing is used widely in the PHP core and extensions. Usually such a functionality is packed into a piece of C/C++ and depends on tools like [flex](http://flex.sourceforge.net/), [re2c](http://re2c.org/), [Bison](http://www.gnu.org/software/bison/), [LEMON](http://www.hwaci.com/sw/lemon/) or similar. With Parle, it is possible to implement lexing and parsing in PHP while relying on features and principles of the parser/lexer generator tools for C/C++. The Lexer and Parser classes are there in the Parle namespace. 5 | The implementation bases on the work of [Ben Hanson](http://www.benhanson.net/) 6 | 7 | - https://github.com/BenHanson/lexertl14 8 | - https://github.com/BenHanson/parsertl14 9 | 10 | The lexer is based on the pattern matching similar to flex. The parser is LALR(1). 11 | 12 | Supported is PHP 7.4 and above. A [C++14](http://en.cppreference.com/w/cpp/compiler_support) capable compiler is required. As of version 0.7.3 parle can optionally be compiled with internal UTF-32 support, making it possible to use Unicode character classes in patterns. 13 | 14 | The full extension documentation is available in the [PHP Manual](http://php.net/parle). 15 | 16 | Installation 17 | ============ 18 | 19 | Read the [INSTALL.md](./INSTALL.md) documentation. 20 | 21 | 22 | Example tokenizing comma separated integer list 23 | ============================================ 24 | ```php 25 | 26 | use Parle\Token; 27 | use Parle\Lexer; 28 | use Parle\LexerException; 29 | 30 | /* name => id */ 31 | $token = array( 32 | "COMMA" => 1, 33 | "CRLF" => 2, 34 | "DECIMAL" => 3, 35 | ); 36 | /* id => name */ 37 | $tokenIdToName = array_flip($token); 38 | 39 | $lex = new Lexer; 40 | $lex->push("[\x2c]", $token["COMMA"]); 41 | $lex->push("[\r][\n]", $token["CRLF"]); 42 | $lex->push("[\d]+", $token["DECIMAL"]); 43 | $lex->build(); 44 | 45 | $in = "0,1,2\r\n3,42,5\r\n6,77,8\r\n"; 46 | 47 | $lex->consume($in); 48 | 49 | do { 50 | $lex->advance(); 51 | $tok = $lex->getToken(); 52 | 53 | if (Token::UNKNOWN == $tok->id) { 54 | throw new LexerException("Unknown token '{$tok->value}' at offset {$lex->marker}."); 55 | } 56 | 57 | echo "TOKEN: ", $tokenIdToName[$tok->id], PHP_EOL; 58 | } while (Token::EOI != $tok->id); 59 | 60 | ``` 61 | 62 | 63 | Example parsing comma separated number list 64 | =========================== 65 | ```php 66 | 67 | use Parle\Lexer; 68 | use Parle\Parser; 69 | use Parle\ParserException; 70 | 71 | $p = new Parser; 72 | $p->token("CRLF"); 73 | $p->token("COMMA"); 74 | $p->token("INTEGER"); 75 | $p->token("'\"'"); 76 | $p->push("START", "RECORDS"); 77 | $prod_record_0 = $p->push("RECORDS", "RECORD CRLF"); 78 | $prod_record_1 = $p->push("RECORDS", "RECORDS RECORD CRLF"); 79 | $prod_int_0 = $p->push("RECORD", "INTEGER"); 80 | $prod_int_1 = $p->push("RECORD", "RECORD COMMA INTEGER"); 81 | $p->push("DECIMAL", "INTEGER COMMA INTEGER"); /* Production index unused. */ 82 | $prod_dec_0 = $p->push("RECORD", "'\"' DECIMAL '\"'"); 83 | $prod_dec_1 = $p->push("RECORD", "RECORD COMMA '\"' DECIMAL '\"'"); 84 | $p->build(); 85 | 86 | $lex = new Lexer; 87 | $lex->push("[\x2c]", $p->tokenId("COMMA")); 88 | $lex->push("[\r][\n]", $p->tokenId("CRLF")); 89 | $lex->push("[\d]+", $p->tokenId("INTEGER")); 90 | $lex->push("[\x22]", $p->tokenId("'\"'")); 91 | $lex->build(); 92 | 93 | /* Specifically using comma as both list separator and as a decimal mark. */ 94 | $in = "000,111,222\r\n\"333,3\",444,555\r\n666,777,\"888,8\"\r\n"; 95 | 96 | $p->consume($in, $lex); 97 | 98 | do { 99 | switch ($p->action) { 100 | case Parser::ACTION_ERROR: 101 | $err = $p->errorInfo(); 102 | if (Parser::ERROR_UNKNOWN_TOKEN == $err->id) { 103 | $tok = $err->token; 104 | $msg = "Unknown token '{$tok->value}' at offset {$err->position}"; 105 | } else if (Parser::ERROR_NON_ASSOCIATIVE == $err->id) { 106 | $tok = $err->token; 107 | $msg = "Token '{$tok->id}' at offset {$lex->marker} is not associative"; 108 | } else if (Parser::ERROR_SYNTAX == $err->id) { 109 | $tok = $err->token; 110 | $msg = "Syntax error at offset {$lex->marker}"; 111 | } else { 112 | $msg = "Parse error"; 113 | } 114 | throw new ParserException($msg); 115 | break; 116 | case Parser::ACTION_SHIFT: 117 | case Parser::ACTION_GOTO: 118 | case Parser::ACTION_ACCEPT: 119 | continue; 120 | break; 121 | case Parser::ACTION_REDUCE: 122 | switch ($p->reduceId) { 123 | case $prod_int_0: 124 | /* INTEGER */ 125 | echo $p->sigil(), PHP_EOL; 126 | break; 127 | case $prod_int_1: 128 | /* RECORD COMMA INTEGER */ 129 | echo $p->sigil(2), PHP_EOL; 130 | break; 131 | case $prod_dec_0: 132 | /* '\"' DECIMAL '\"' */ 133 | echo $p->sigil(1), PHP_EOL; 134 | break; 135 | case $prod_dec_1: 136 | /* RECORD COMMA '\"' DECIMAL '\"' */ 137 | echo $p->sigil(3), PHP_EOL; 138 | break; 139 | case $prod_record_0: 140 | case $prod_record_1: 141 | echo "=====", PHP_EOL; 142 | break; 143 | } 144 | break; 145 | } 146 | $p->advance(); 147 | } while (Parser::ACTION_ACCEPT != $p->action); 148 | 149 | ``` 150 | 151 | -------------------------------------------------------------------------------- /bench/parse_str.impl.php: -------------------------------------------------------------------------------- 1 | lex = $lex; 23 | $this->stack = new Stack; 24 | $this->debug = $debug; 25 | } 26 | 27 | public function init() 28 | { 29 | $this->terminal("left", "'='", "[=]"); 30 | $this->terminal("token", "']'", "[\]]"); 31 | $this->terminal("right", "'['", "[\[]"); 32 | $this->terminal("left", "'&'", "[&]"); 33 | $this->terminal("token", "T_STR", "[^=\[\]&\s]+"); 34 | 35 | $this->production("START", "PAIRS"); 36 | $this->production("PAIRS", "PAIR"); 37 | $this->production("PAIRS", "PAIRS '&' PAIR"); 38 | $this->production("VALUE", ""); 39 | $this->production("VALUE", "T_STR"); 40 | $this->production("ARRKEY", "", "handleEmptyDimensionKey"); 41 | $this->production("ARRKEY", "T_STR", "handleDimensionKey"); 42 | $this->production("ARRDIM", "'[' ARRKEY ']'"); 43 | $this->production("ARRDIM", "'[' ARRKEY ']' ARRDIM"); 44 | $this->production("PAIR", "T_STR ARRDIM '=' VALUE", "handleArray"); 45 | $this->production("PAIR", "T_STR '=' VALUE", "handleScalar"); 46 | 47 | $this->build(); 48 | $this->lex->build(); 49 | } 50 | 51 | protected function terminal(string $assoc, string $sym, string $reg) 52 | { 53 | switch ($assoc) { 54 | default: 55 | throw new ParserException("Unknown associativity '$assoc'."); 56 | case "left": 57 | $this->left($sym); 58 | break; 59 | case "right": 60 | $this->right($sym); 61 | break; 62 | case "token": 63 | $this->token($sym); 64 | break; 65 | case "nonassoc": 66 | $this->nonassoc($sym); 67 | break; 68 | } 69 | 70 | $id = $this->tokenId($sym); 71 | $this->lex->push($reg, $id); 72 | 73 | $this->tokenNameToId[$sym] = $id; 74 | $this->tokenIdToName[$id] = $sym; 75 | } 76 | 77 | protected function production(string $name, string $rule, $handler = NULL) 78 | { 79 | $id = $this->push($name, $rule); 80 | if ($handler) { 81 | $this->prodHandler[$id] = array($this, $handler); 82 | } 83 | } 84 | 85 | private function handleEmptyDimensionKey() 86 | { 87 | $this->stack->push(NULL); 88 | } 89 | 90 | private function handleDimensionKey() 91 | { 92 | $this->stack->push($this->sigil()); 93 | } 94 | 95 | private function handleScalar() 96 | { 97 | $name = $this->sigil(); 98 | $val = $this->sigil(2); 99 | $this->result[$name] = urldecode($val); 100 | } 101 | 102 | private function handleArray() 103 | { 104 | $name = $this->sigil(); 105 | $val = $this->sigil(3); 106 | 107 | // create top array element 108 | $k = $this->stack->top; 109 | $tmp = array(); 110 | if ($k) { 111 | $tmp[$k] = urldecode($val); 112 | } else { 113 | $tmp[] = urldecode($val); 114 | } 115 | $this->stack->pop(); 116 | 117 | // check if there are more dimensions 118 | while (!$this->stack->empty) { 119 | $k = $this->stack->top; 120 | $tmp2 = array(); 121 | if ($k) { 122 | $tmp2[$k] = $tmp; 123 | } else { 124 | $tmp2[] = $tmp; 125 | } 126 | $this->stack->pop(); 127 | $tmp = $tmp2; 128 | } 129 | if (!array_key_exists($name, $this->result)) { 130 | $this->result[$name] = array(); 131 | } 132 | $this->result[$name] = array_merge_recursive($this->result[$name], $tmp); 133 | } 134 | 135 | public function parse($in) 136 | { 137 | $this->result = array(); 138 | $this->stack = new Stack; 139 | 140 | $this->consume($in, $this->lex); 141 | 142 | while (Parser::ACTION_ACCEPT != $this->action) { 143 | switch ($this->action) { 144 | case Parser::ACTION_ERROR: 145 | $i = $this->errorInfo(); 146 | switch ($i->id) { 147 | case Parser::ERROR_SYNTAX: 148 | throw new ParserException("Syntax error at " . $i->position); 149 | case Parser::ERROR_NON_ASSOCIATIVE: 150 | throw new ParserException("Token " . $this->tokenIdToName[$i->token->id] . "is not associative"); 151 | case Parser::ERROR_UNKNOWN_TOKEN: 152 | throw new ParserException("Unknown token '" . $i->token->value . "' at " . $i->position); 153 | } 154 | break; 155 | case Parser::ACTION_SHIFT: 156 | case Parser::ACTION_GOTO: 157 | if ($this->debug) { 158 | echo $this->trace(), PHP_EOL; 159 | } 160 | break; 161 | case Parser::ACTION_REDUCE: 162 | if ($this->debug) { 163 | echo $this->trace(), PHP_EOL; 164 | } 165 | if (array_key_exists($this->reduceId, $this->prodHandler)) { 166 | if ($this->debug) { 167 | echo "calling ", $this->prodHandler[$this->reduceId][1], PHP_EOL; 168 | } 169 | call_user_func($this->prodHandler[$this->reduceId]); 170 | } 171 | break; 172 | } 173 | $this->advance(); 174 | } 175 | 176 | return $this->result; 177 | } 178 | } 179 | 180 | function parse_str(string $in, array &$result = array()) 181 | { 182 | $p = new ParseStrParser(new Lexer); 183 | 184 | $p->init(); 185 | 186 | $result = $p->parse($in); 187 | } 188 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/ebnf_tables.hpp: -------------------------------------------------------------------------------- 1 | // ebnf_tables.hpp 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_EBNF_TABLES_HPP 7 | #define PARSERTL_EBNF_TABLES_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | struct ebnf_tables 15 | { 16 | enum class yyconsts 17 | { 18 | YYFINAL = 16, 19 | YYLAST = 32, 20 | YYNTOKENS = 18, 21 | YYPACT_NINF = -4, 22 | YYTABLE_NINF = -1 23 | }; 24 | 25 | enum class yytokentype 26 | { 27 | EMPTY = 258, 28 | IDENTIFIER = 259, 29 | PREC = 260, 30 | TERMINAL = 261 31 | }; 32 | 33 | const std::vector yytranslate = 34 | { 35 | 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 37 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 38 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 39 | 16, 17, 13, 15, 2, 14, 2, 2, 2, 2, 40 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 41 | 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 42 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 43 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 8, 2, 9, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 11, 7, 12, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 49 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 51 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 52 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 53 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 54 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 56 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 57 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 58 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 59 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 60 | 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 61 | 5, 6 62 | }; 63 | const std::vector yyr1 = 64 | { 65 | 0, 18, 19, 20, 20, 21, 22, 22, 22, 23, 66 | 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 67 | 25, 25, 25 68 | }; 69 | const std::vector yyr2 70 | { 71 | 0, 2, 1, 1, 3, 2, 0, 1, 1, 1, 72 | 2, 1, 1, 3, 2, 3, 2, 4, 2, 3, 73 | 0, 2, 2 74 | }; 75 | const std::vector yydefact = 76 | { 77 | 6, 7, 11, 12, 6, 6, 6, 0, 2, 3, 78 | 20, 8, 9, 0, 0, 0, 1, 6, 0, 5, 79 | 10, 14, 16, 18, 13, 15, 19, 4, 21, 22, 80 | 17 81 | }; 82 | const std::vector yydefgoto = 83 | { 84 | -1, 7, 8, 9, 10, 11, 12, 19 85 | }; 86 | const std::vector yypact = 87 | { 88 | -3, -4, -4, -4, -3, -3, -3, 19, 18, -4, 89 | 22, -2, 5, 3, 4, 0, -4, -3, 20, -4, 90 | 5, -4, -4, -4, -4, 14, -4, -4, -4, -4, 91 | -4 92 | }; 93 | const std::vector yypgoto = 94 | { 95 | -4, -4, 17, 12, -4, -4, 21, -4 96 | }; 97 | const std::vector yytable = 98 | { 99 | 1, 2, 2, 3, 3, 4, 4, 17, 5, 5, 100 | 17, 17, 24, 6, 6, 21, 25, 26, 22, 16, 101 | 23, 13, 14, 15, 28, 17, 29, 18, 30, 27, 102 | 0, 0, 20 103 | }; 104 | const std::vector yycheck = 105 | { 106 | 3, 4, 4, 6, 6, 8, 8, 7, 11, 11, 107 | 7, 7, 9, 16, 16, 10, 12, 17, 13, 0, 108 | 15, 4, 5, 6, 4, 7, 6, 5, 14, 17, 109 | -1, -1, 11 110 | }; 111 | }; 112 | } 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/match_results.hpp: -------------------------------------------------------------------------------- 1 | // match_results.hpp 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_MATCH_RESULTS_HPP 7 | #define LEXERTL_MATCH_RESULTS_HPP 8 | 9 | #include "char_traits.hpp" 10 | #include "enum_operator.hpp" 11 | #include "enums.hpp" 12 | #include 13 | #include 14 | #include 15 | 16 | namespace lexertl 17 | { 18 | template 22 | struct match_results 23 | { 24 | using id_type = id_t; 25 | using iter_type = iter; 26 | using char_type = typename std::iterator_traits::value_type; 27 | using index_type = typename basic_char_traits::index_type; 28 | using string = std::basic_string; 29 | 30 | id_type id = 0; 31 | id_type user_id = npos(); 32 | iter_type first = iter_type(); 33 | iter_type second = iter_type(); 34 | iter_type eoi = iter_type(); 35 | bool bol = true; 36 | id_type state = 0; 37 | 38 | match_results() = default; 39 | 40 | match_results(const iter_type& start_, const iter_type& end_, 41 | const bool bol_ = true, const id_type state_ = 0) : 42 | first(start_), 43 | second(start_), 44 | eoi(end_), 45 | bol(bol_), 46 | state(state_) 47 | { 48 | } 49 | 50 | virtual ~match_results() = default; 51 | 52 | string str() const 53 | { 54 | return string(first, second); 55 | } 56 | 57 | string substr(const std::size_t soffset_, 58 | const std::size_t eoffset_) const 59 | { 60 | return string(first + soffset_, second - eoffset_); 61 | } 62 | 63 | virtual void clear() 64 | { 65 | id = 0; 66 | user_id = npos(); 67 | first = eoi; 68 | second = eoi; 69 | bol = true; 70 | state = 0; 71 | } 72 | 73 | virtual void reset(const iter_type& start_, const iter_type& end_) 74 | { 75 | id = 0; 76 | user_id = npos(); 77 | first = start_; 78 | second = start_; 79 | eoi = end_; 80 | bol = true; 81 | state = 0; 82 | } 83 | 84 | std::size_t length() const 85 | { 86 | return second - first; 87 | } 88 | 89 | static id_type npos() 90 | { 91 | return static_cast(~0); 92 | } 93 | 94 | static id_type skip() 95 | { 96 | return static_cast(~1); 97 | } 98 | 99 | bool operator ==(const match_results& rhs_) const 100 | { 101 | return id == rhs_.id && 102 | user_id == rhs_.user_id && 103 | first == rhs_.first && 104 | second == rhs_.second && 105 | eoi == rhs_.eoi && 106 | bol == rhs_.bol && 107 | state == rhs_.state; 108 | } 109 | }; 110 | 111 | template 115 | struct recursive_match_results : 116 | public match_results 117 | { 118 | using id_type_pair = std::pair; 119 | std::stack stack; 120 | 121 | recursive_match_results() : 122 | match_results() 123 | { 124 | } 125 | 126 | recursive_match_results(const iter& start_, const iter& end_, 127 | const bool bol_ = true, const id_type state_ = 0) : 128 | match_results(start_, end_, bol_, state_) 129 | { 130 | } 131 | 132 | ~recursive_match_results() override = default; 133 | 134 | void clear() override 135 | { 136 | match_results::clear(); 137 | 138 | while (!stack.empty()) stack.pop(); 139 | } 140 | 141 | void reset(const iter& start_, const iter& end_) override 142 | { 143 | match_results::reset(start_, end_); 144 | 145 | while (!stack.empty()) stack.pop(); 146 | } 147 | }; 148 | 149 | using smatch = match_results; 150 | using cmatch = match_results; 151 | using wsmatch = match_results; 152 | using wcmatch = match_results; 153 | using u32smatch = match_results; 154 | using u32cmatch = match_results; 155 | 156 | using srmatch = 157 | recursive_match_results; 158 | using crmatch = recursive_match_results; 159 | using wsrmatch = 160 | recursive_match_results; 161 | using wcrmatch = recursive_match_results; 162 | using u32srmatch = 163 | recursive_match_results; 164 | using u32crmatch = recursive_match_results; 165 | } 166 | 167 | #endif 168 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/lookup.hpp: -------------------------------------------------------------------------------- 1 | // lookup.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_LOOKUP_HPP 7 | #define PARSERTL_LOOKUP_HPP 8 | 9 | #include "match_results.hpp" 10 | #include "token.hpp" 11 | 12 | namespace parsertl 13 | { 14 | // parse sequence but do not keep track of productions 15 | template 16 | void lookup(lexer_iterator& iter_, const sm_type& sm_, 17 | basic_match_results& results_) 18 | { 19 | switch (results_.entry.action) 20 | { 21 | case action::shift: 22 | results_.stack.push_back(results_.entry.param); 23 | 24 | if (iter_->id != 0) 25 | ++iter_; 26 | 27 | results_.token_id = iter_->id; 28 | 29 | if (results_.token_id == lexer_iterator::value_type::npos()) 30 | { 31 | results_.entry.action = action::error; 32 | results_.entry.param = static_cast 33 | (error_type::unknown_token); 34 | } 35 | else 36 | { 37 | results_.entry = 38 | sm_.at(results_.entry.param, results_.token_id); 39 | } 40 | 41 | break; 42 | case action::reduce: 43 | { 44 | const std::size_t size_ = 45 | sm_._rules[results_.entry.param].second.size(); 46 | 47 | if (size_) 48 | { 49 | results_.stack.resize(results_.stack.size() - size_); 50 | } 51 | 52 | results_.token_id = sm_._rules[results_.entry.param].first; 53 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 54 | break; 55 | } 56 | case action::go_to: 57 | results_.stack.push_back(results_.entry.param); 58 | results_.token_id = iter_->id; 59 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 60 | break; 61 | case action::accept: 62 | { 63 | const std::size_t size_ = 64 | sm_._rules[results_.entry.param].second.size(); 65 | 66 | if (size_) 67 | { 68 | results_.stack.resize(results_.stack.size() - size_); 69 | } 70 | 71 | break; 72 | } 73 | default: 74 | // action::error 75 | break; 76 | } 77 | } 78 | 79 | // Parse sequence and maintain production vector 80 | template 81 | void lookup(lexer_iterator& iter_, const sm_type& sm_, 82 | basic_match_results& results_, token_vector& productions_) 83 | { 84 | switch (results_.entry.action) 85 | { 86 | case action::shift: 87 | results_.stack.push_back(results_.entry.param); 88 | productions_.emplace_back(iter_->id, iter_->first, iter_->second); 89 | 90 | if (iter_->id != 0) 91 | ++iter_; 92 | 93 | results_.token_id = iter_->id; 94 | 95 | if (results_.token_id == lexer_iterator::value_type::npos()) 96 | { 97 | results_.entry.action = action::error; 98 | results_.entry.param = static_cast 99 | (error_type::unknown_token); 100 | } 101 | else 102 | { 103 | results_.entry = 104 | sm_.at(results_.entry.param, results_.token_id); 105 | } 106 | 107 | break; 108 | case action::reduce: 109 | { 110 | const std::size_t size_ = 111 | sm_._rules[results_.entry.param].second.size(); 112 | typename token_vector::value_type token_; 113 | 114 | if (size_) 115 | { 116 | results_.stack.resize(results_.stack.size() - size_); 117 | token_.first = (productions_.end() - size_)->first; 118 | token_.second = productions_.back().second; 119 | productions_.resize(productions_.size() - size_); 120 | } 121 | else 122 | { 123 | if (productions_.empty()) 124 | { 125 | token_.first = token_.second = iter_->first; 126 | } 127 | else 128 | { 129 | token_.first = token_.second = productions_.back().second; 130 | } 131 | } 132 | 133 | results_.token_id = sm_._rules[results_.entry.param].first; 134 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 135 | token_.id = results_.token_id; 136 | productions_.push_back(token_); 137 | break; 138 | } 139 | case action::go_to: 140 | results_.stack.push_back(results_.entry.param); 141 | results_.token_id = iter_->id; 142 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 143 | break; 144 | case action::accept: 145 | { 146 | const std::size_t size_ = 147 | sm_._rules[results_.entry.param].second.size(); 148 | 149 | if (size_) 150 | { 151 | results_.stack.resize(results_.stack.size() - size_); 152 | } 153 | 154 | break; 155 | } 156 | default: 157 | // action::error 158 | break; 159 | } 160 | } 161 | } 162 | 163 | #endif 164 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/partition/equivset.hpp: -------------------------------------------------------------------------------- 1 | // equivset.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_EQUIVSET_HPP 7 | #define LEXERTL_EQUIVSET_HPP 8 | 9 | #include 10 | #include 11 | #include "../parser/tree/node.hpp" 12 | #include 13 | 14 | namespace lexertl 15 | { 16 | namespace detail 17 | { 18 | template 19 | struct basic_equivset 20 | { 21 | using index_set = std::set; 22 | using index_vector = std::vector; 23 | using node = basic_node; 24 | using node_vector = std::vector>; 25 | 26 | index_vector _index_vector; 27 | id_type _id = 0; 28 | bool _greedy = true; 29 | node_vector _followpos; 30 | 31 | basic_equivset() = default; 32 | 33 | basic_equivset(const index_set& index_set_, const id_type id_, 34 | const bool greedy_, const node_vector& followpos_) : 35 | _index_vector(index_set_.begin(), index_set_.end()), 36 | _id(id_), 37 | _greedy(greedy_), 38 | _followpos(followpos_) 39 | { 40 | } 41 | 42 | bool empty() const 43 | { 44 | return _index_vector.empty() && _followpos.empty(); 45 | } 46 | 47 | void intersect(basic_equivset& rhs_, basic_equivset& overlap_) 48 | { 49 | intersect_indexes(rhs_._index_vector, overlap_._index_vector); 50 | 51 | if (!overlap_._index_vector.empty()) 52 | { 53 | // Note that the LHS takes priority in order to 54 | // respect rule ordering priority in the lex spec. 55 | overlap_._id = _id; 56 | process_greedy(rhs_, overlap_); 57 | overlap_._followpos = _followpos; 58 | 59 | auto overlap_begin_ = overlap_._followpos.cbegin(); 60 | auto overlap_end_ = overlap_._followpos.cend(); 61 | 62 | for (observer_ptr node_ : rhs_._followpos) 63 | { 64 | if (std::find(overlap_begin_, overlap_end_, node_) == 65 | overlap_end_) 66 | { 67 | overlap_._followpos.push_back(node_); 68 | overlap_begin_ = overlap_._followpos.begin(); 69 | overlap_end_ = overlap_._followpos.end(); 70 | } 71 | } 72 | 73 | if (_index_vector.empty()) 74 | { 75 | _followpos.clear(); 76 | } 77 | 78 | if (rhs_._index_vector.empty()) 79 | { 80 | rhs_._followpos.clear(); 81 | } 82 | } 83 | } 84 | 85 | private: 86 | void process_greedy(basic_equivset& rhs_, basic_equivset& overlap_) 87 | { 88 | if (_greedy) 89 | overlap_._greedy = true; 90 | else 91 | { 92 | bool greedy_ = false; 93 | 94 | for (const node* node_ : rhs_._followpos) 95 | { 96 | // If a 'hard greedy' transition is present, 97 | // then respect that above all else. 98 | if (node_->what_type() == node::node_type::LEAF && 99 | node_->greedy() && node_->set_greedy()) 100 | { 101 | greedy_ = true; 102 | break; 103 | } 104 | } 105 | 106 | overlap_._greedy = greedy_; 107 | } 108 | } 109 | 110 | void intersect_indexes(index_vector& rhs_, index_vector& overlap_) 111 | { 112 | std::set_intersection(_index_vector.begin(), 113 | _index_vector.end(), rhs_.begin(), rhs_.end(), 114 | std::back_inserter(overlap_)); 115 | 116 | if (!overlap_.empty()) 117 | { 118 | remove(overlap_, _index_vector); 119 | remove(overlap_, rhs_); 120 | } 121 | } 122 | 123 | void remove(const index_vector& source_, index_vector& dest_) const 124 | { 125 | auto inter_ = source_.cbegin(); 126 | auto inter_end_ = source_.cend(); 127 | auto reader_ = std::find(dest_.begin(), dest_.end(), *inter_); 128 | auto writer_ = reader_; 129 | auto dest_end_ = dest_.end(); 130 | 131 | while (writer_ != dest_end_ && inter_ != inter_end_) 132 | { 133 | if (*reader_ == *inter_) 134 | { 135 | ++inter_; 136 | ++reader_; 137 | } 138 | else 139 | { 140 | *writer_++ = *reader_++; 141 | } 142 | } 143 | 144 | while (reader_ != dest_end_) 145 | { 146 | *writer_++ = *reader_++; 147 | } 148 | 149 | dest_.resize(dest_.size() - source_.size()); 150 | } 151 | }; 152 | } 153 | } 154 | 155 | #endif 156 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tokeniser/re_token.hpp: -------------------------------------------------------------------------------- 1 | // re_token.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_RE_TOKEN_HPP 7 | #define LEXERTL_RE_TOKEN_HPP 8 | 9 | #include "../../string_token.hpp" 10 | 11 | namespace lexertl 12 | { 13 | namespace detail 14 | { 15 | // Note that tokens following END are never seen by parser.hpp. 16 | enum class token_type 17 | { 18 | BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT, 19 | DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT, 20 | ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN, 21 | END, DIFF 22 | }; 23 | 24 | template 25 | struct basic_re_token 26 | { 27 | using string_token = basic_string_token; 28 | using string = std::basic_string; 29 | 30 | token_type _type; 31 | string _extra; 32 | string_token _str; 33 | 34 | explicit basic_re_token(const token_type type_ = 35 | token_type::BEGIN) : 36 | _type(type_) 37 | { 38 | } 39 | 40 | void clear() 41 | { 42 | _type = token_type::BEGIN; 43 | _extra.clear(); 44 | _str.clear(); 45 | } 46 | 47 | void swap(basic_re_token& rhs_) noexcept 48 | { 49 | std::swap(_type, rhs_._type); 50 | _extra.swap(rhs_._extra); 51 | _str.swap(rhs_._str); 52 | } 53 | 54 | char precedence(const token_type type_) const 55 | { 56 | // Moved in here for Solaris compiler. 57 | static const char precedence_table_ 58 | [static_cast(token_type::END) + 1] 59 | [static_cast(token_type::END) + 1] = 60 | { 61 | // BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, BOL, EOL, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END 62 | /*BEGIN*/{ ' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 63 | /*REGEX*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 64 | /*OREXP*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 65 | /* SEQ */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 66 | /* SUB */{ ' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 67 | /*EXPRE*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 68 | /* RPT */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>' }, 69 | /*DUPLI*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 70 | /* | */{ ' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, 71 | /*CHARA*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, 72 | /* BOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, 73 | /* EOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, 74 | /*MACRO*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, 75 | /* ( */{ ' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, 76 | /* ) */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, 77 | /* ? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 78 | /* ?? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 79 | /* * */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 80 | /* *? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 81 | /* + */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 82 | /* +? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 83 | /*{n,m}*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 84 | /*{nm}?*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, 85 | /* END */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' } 86 | }; 87 | 88 | return precedence_table_[static_cast(_type)] 89 | [static_cast(type_)]; 90 | } 91 | 92 | const char* precedence_string() const 93 | { 94 | // Moved in here for Solaris compiler. 95 | static const char* precedence_strings_ 96 | [static_cast(token_type::END) + 1] = 97 | { 98 | "BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", 99 | "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", 100 | "(", ")", "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", 101 | "{n[,[m]]}?", "END" 102 | }; 103 | 104 | return precedence_strings_[static_cast(_type)]; 105 | } 106 | }; 107 | } 108 | } 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /lib/parsertl14/include/parsertl/state_machine.hpp: -------------------------------------------------------------------------------- 1 | // state_machine.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_STATE_MACHINE_HPP 7 | #define PARSERTL_STATE_MACHINE_HPP 8 | 9 | #include 10 | #include 11 | #include "enums.hpp" 12 | #include 13 | 14 | namespace parsertl 15 | { 16 | template 17 | struct base_state_machine 18 | { 19 | using id_type = id_ty; 20 | using id_type_pair = std::pair; 21 | using capture_vector = std::vector; 22 | using capture = std::pair; 23 | using captures_vector = std::vector; 24 | using id_type_vector = std::vector; 25 | using id_type_vector_pair = std::pair; 26 | using rules = std::vector; 27 | 28 | std::size_t _columns = 0; 29 | std::size_t _rows = 0; 30 | rules _rules; 31 | captures_vector _captures; 32 | 33 | // If you get a compile error here you have 34 | // failed to define an unsigned id type. 35 | static_assert(std::is_unsigned::value, 36 | "Your id type is signed"); 37 | 38 | struct entry 39 | { 40 | // Qualify action to prevent compilation error 41 | parsertl::action action; 42 | id_type param; 43 | 44 | entry() : 45 | // Qualify action to prevent compilation error 46 | action(parsertl::action::error), 47 | param(static_cast(error_type::syntax_error)) 48 | { 49 | } 50 | 51 | // Qualify action to prevent compilation error 52 | entry(const parsertl::action action_, const id_type param_) : 53 | action(action_), 54 | param(param_) 55 | { 56 | } 57 | 58 | void clear() noexcept 59 | { 60 | // Qualify action to prevent compilation error 61 | action = parsertl::action::error; 62 | param = static_cast(error_type::syntax_error); 63 | } 64 | 65 | bool operator ==(const entry& rhs_) const 66 | { 67 | return action == rhs_.action && param == rhs_.param; 68 | } 69 | }; 70 | 71 | // No need to specify constructor. 72 | // Just in case someone wants to use a pointer to the base 73 | virtual ~base_state_machine() = default; 74 | 75 | virtual void clear() noexcept 76 | { 77 | _columns = _rows = 0; 78 | _rules.clear(); 79 | _captures.clear(); 80 | } 81 | }; 82 | 83 | // Uses a vector of vectors for the state machine 84 | template 85 | struct basic_state_machine : base_state_machine 86 | { 87 | using base_sm = base_state_machine; 88 | using id_type = id_ty; 89 | using entry = typename base_sm::entry; 90 | using id_type_entry_pair = std::pair; 91 | using id_type_entry_pair_vec = std::vector; 92 | using table = std::vector; 93 | 94 | table _table; 95 | 96 | // No need to specify constructor. 97 | ~basic_state_machine() override = default; 98 | 99 | void clear() noexcept override 100 | { 101 | base_sm::clear(); 102 | _table.clear(); 103 | } 104 | 105 | bool empty() const 106 | { 107 | return _table.empty(); 108 | } 109 | 110 | entry at(const std::size_t state_) const 111 | { 112 | const auto& s_ = _table[state_]; 113 | auto iter_ = std::find_if(s_.begin(), s_.end(), 114 | [](const auto& pair) 115 | { 116 | return pair.first == 0; 117 | }); 118 | 119 | if (iter_ == s_.end()) 120 | return entry(); 121 | else 122 | return iter_->second; 123 | } 124 | 125 | entry at(const std::size_t state_, const std::size_t token_id_) const 126 | { 127 | const auto& s_ = _table[state_]; 128 | auto iter_ = std::find_if(s_.begin(), s_.end(), 129 | [token_id_](const auto& pair) 130 | { 131 | return pair.first == token_id_; 132 | }); 133 | 134 | if (iter_ == s_.end()) 135 | return entry(); 136 | else 137 | return iter_->second; 138 | } 139 | 140 | void set(const std::size_t state_, const std::size_t token_id_, 141 | const entry& entry_) 142 | { 143 | auto& s_ = _table[state_]; 144 | auto iter_ = std::find_if(s_.begin(), s_.end(), 145 | [token_id_](const auto& pair) 146 | { 147 | return pair.first == token_id_; 148 | }); 149 | 150 | if (iter_ == s_.end()) 151 | s_.emplace_back(static_cast(token_id_), entry_); 152 | else 153 | iter_->second = entry_; 154 | } 155 | 156 | void push() 157 | { 158 | _table.resize(base_sm::_rows); 159 | } 160 | }; 161 | 162 | // Uses uncompressed 2d array for state machine 163 | template 164 | struct basic_uncompressed_state_machine : base_state_machine 165 | { 166 | using base_sm = base_state_machine; 167 | using id_type = id_ty; 168 | using entry = typename base_sm::entry; 169 | using table = std::vector; 170 | 171 | table _table; 172 | 173 | // No need to specify constructor. 174 | ~basic_uncompressed_state_machine() override = default; 175 | 176 | void clear() noexcept override 177 | { 178 | base_sm::clear(); 179 | _table.clear(); 180 | } 181 | 182 | bool empty() const 183 | { 184 | return _table.empty(); 185 | } 186 | 187 | entry at(const std::size_t state_) const 188 | { 189 | return _table[state_ * base_sm::_columns]; 190 | } 191 | 192 | entry at(const std::size_t state_, const std::size_t token_id_) const 193 | { 194 | return _table[state_ * base_sm::_columns + token_id_]; 195 | } 196 | 197 | void set(const std::size_t state_, const std::size_t token_id_, 198 | const entry& entry_) 199 | { 200 | _table[state_ * base_sm::_columns + token_id_] = entry_; 201 | } 202 | 203 | void push() 204 | { 205 | _table.resize(base_sm::_columns * base_sm::_rows); 206 | } 207 | }; 208 | 209 | using state_machine = basic_state_machine; 210 | using uncompressed_state_machine = 211 | basic_uncompressed_state_machine; 212 | } 213 | 214 | #endif 215 | -------------------------------------------------------------------------------- /lib/lexertl14/include/lexertl/parser/tree/node.hpp: -------------------------------------------------------------------------------- 1 | // node.hpp 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef LEXERTL_NODE_HPP 7 | #define LEXERTL_NODE_HPP 8 | 9 | #include 10 | #include 11 | #include "../../observer_ptr.hpp" 12 | #include "../../runtime_error.hpp" 13 | #include 14 | #include 15 | 16 | namespace lexertl 17 | { 18 | namespace detail 19 | { 20 | template 21 | class basic_node 22 | { 23 | public: 24 | enum class node_type { LEAF, SEQUENCE, SELECTION, ITERATION, END }; 25 | 26 | using bool_stack = std::stack; 27 | using node_stack = std::stack>; 28 | using const_node_stack = std::stack>; 29 | using node_vector = std::vector>; 30 | using node_ptr_vector = std::vector>; 31 | 32 | basic_node() = default; 33 | 34 | explicit basic_node(const bool nullable_) : 35 | _nullable(nullable_) 36 | { 37 | } 38 | 39 | basic_node(const basic_node&) = delete; 40 | const basic_node& operator =(const basic_node&) = delete; 41 | virtual ~basic_node() = default; 42 | 43 | static id_type null_token() 44 | { 45 | return static_cast(~0); 46 | } 47 | 48 | bool nullable() const 49 | { 50 | return _nullable; 51 | } 52 | 53 | void append_firstpos(node_vector& firstpos_) const 54 | { 55 | firstpos_.insert(firstpos_.end(), 56 | _firstpos.begin(), _firstpos.end()); 57 | } 58 | 59 | void append_lastpos(node_vector& lastpos_) const 60 | { 61 | lastpos_.insert(lastpos_.end(), 62 | _lastpos.begin(), _lastpos.end()); 63 | } 64 | 65 | virtual void append_followpos(const node_vector&/*followpos_*/) 66 | { 67 | throw runtime_error("Internal error node::append_followpos()."); 68 | } 69 | 70 | observer_ptr copy 71 | (node_ptr_vector& node_ptr_vector_) const 72 | { 73 | observer_ptr new_root_ = nullptr; 74 | const_node_stack node_stack_; 75 | bool_stack perform_op_stack_; 76 | bool down_ = true; 77 | node_stack new_node_stack_; 78 | 79 | node_stack_.push(this); 80 | 81 | while (!node_stack_.empty()) 82 | { 83 | while (down_) 84 | { 85 | down_ = node_stack_.top()->traverse(node_stack_, 86 | perform_op_stack_); 87 | } 88 | 89 | while (!down_ && !node_stack_.empty()) 90 | { 91 | observer_ptr top_ = node_stack_.top(); 92 | 93 | top_->copy_node(node_ptr_vector_, new_node_stack_, 94 | perform_op_stack_, down_); 95 | 96 | if (!down_) node_stack_.pop(); 97 | } 98 | } 99 | 100 | assert(new_node_stack_.size() == 1); 101 | new_root_ = new_node_stack_.top(); 102 | new_node_stack_.pop(); 103 | return new_root_; 104 | } 105 | 106 | virtual node_type what_type() const = 0; 107 | 108 | virtual bool traverse(const_node_stack& node_stack_, 109 | bool_stack& perform_op_stack_) const = 0; 110 | 111 | node_vector& firstpos() 112 | { 113 | return _firstpos; 114 | } 115 | 116 | const node_vector& firstpos() const 117 | { 118 | return _firstpos; 119 | } 120 | 121 | // _lastpos modified externally, so not const & 122 | node_vector& lastpos() 123 | { 124 | return _lastpos; 125 | } 126 | 127 | virtual bool end_state() const 128 | { 129 | return false; 130 | } 131 | 132 | virtual id_type id() const 133 | { 134 | throw runtime_error("Internal error node::id()."); 135 | #ifdef __SUNPRO_CC 136 | // Stop bogus Solaris compiler warning 137 | return id_type(); 138 | #endif 139 | } 140 | 141 | virtual id_type user_id() const 142 | { 143 | throw runtime_error("Internal error node::user_id()."); 144 | #ifdef __SUNPRO_CC 145 | // Stop bogus Solaris compiler warning 146 | return id_type(); 147 | #endif 148 | } 149 | 150 | virtual id_type next_dfa() const 151 | { 152 | throw runtime_error("Internal error node::next_dfa()."); 153 | #ifdef __SUNPRO_CC 154 | // Stop bogus Solaris compiler warning 155 | return id_type(); 156 | #endif 157 | } 158 | 159 | virtual id_type push_dfa() const 160 | { 161 | throw runtime_error("Internal error node::push_dfa()."); 162 | #ifdef __SUNPRO_CC 163 | // Stop bogus Solaris compiler warning 164 | return id_type(); 165 | #endif 166 | } 167 | 168 | virtual bool pop_dfa() const 169 | { 170 | throw runtime_error("Internal error node::pop_dfa()."); 171 | #ifdef __SUNPRO_CC 172 | // Stop bogus Solaris compiler warning 173 | return false; 174 | #endif 175 | } 176 | 177 | virtual id_type token() const 178 | { 179 | throw runtime_error("Internal error node::token()."); 180 | #ifdef __SUNPRO_CC 181 | // Stop bogus Solaris compiler warning 182 | return id_type(); 183 | #endif 184 | } 185 | 186 | virtual bool set_greedy() const 187 | { 188 | throw runtime_error("Internal error node::set_greedy()."); 189 | } 190 | 191 | virtual void greedy(const bool /*greedy_*/) 192 | { 193 | throw runtime_error("Internal error node::greedy(bool)."); 194 | } 195 | 196 | virtual bool greedy() const 197 | { 198 | throw runtime_error("Internal error node::greedy()."); 199 | #ifdef __SUNPRO_CC 200 | // Stop bogus Solaris compiler warning 201 | return false; 202 | #endif 203 | } 204 | 205 | virtual const node_vector& followpos() const 206 | { 207 | throw runtime_error("Internal error node::followpos()."); 208 | #ifdef __SUNPRO_CC 209 | // Stop bogus Solaris compiler warning 210 | return firstpos; 211 | #endif 212 | } 213 | 214 | virtual node_vector& followpos() 215 | { 216 | throw runtime_error("Internal error node::followpos()."); 217 | #ifdef __SUNPRO_CC 218 | // Stop bogus Solaris compiler warning 219 | return firstpos; 220 | #endif 221 | } 222 | 223 | protected: 224 | virtual void copy_node(node_ptr_vector& node_ptr_vector_, 225 | node_stack& new_node_stack_, bool_stack& perform_op_stack_, 226 | bool& down_) const = 0; 227 | 228 | private: 229 | const bool _nullable = false; 230 | node_vector _firstpos; 231 | node_vector _lastpos; 232 | }; 233 | } 234 | } 235 | 236 | #endif 237 | --------------------------------------------------------------------------------