├── EXPERIMENTAL
├── CREDITS
├── lib
    ├── lexertl14
    │   ├── README.md
    │   └── include
    │   │   └── lexertl
    │   │       ├── observer_ptr.hpp
    │   │       ├── runtime_error.hpp
    │   │       ├── narrow.hpp
    │   │       ├── stream_num.hpp
    │   │       ├── enum_operator.hpp
    │   │       ├── enums.hpp
    │   │       ├── parser
    │   │           ├── tokeniser
    │   │           │   ├── fold4.inc
    │   │           │   ├── re_tokeniser_state.hpp
    │   │           │   └── re_token.hpp
    │   │           └── tree
    │   │           │   ├── iteration_node.hpp
    │   │           │   ├── selection_node.hpp
    │   │           │   ├── leaf_node.hpp
    │   │           │   ├── end_node.hpp
    │   │           │   ├── sequence_node.hpp
    │   │           │   └── node.hpp
    │   │       ├── char_traits.hpp
    │   │       ├── licence_1_0.txt
    │   │       ├── sm_traits.hpp
    │   │       ├── sm_to_csm.hpp
    │   │       ├── partition
    │   │           ├── charset.hpp
    │   │           └── equivset.hpp
    │   │       ├── internals.hpp
    │   │       ├── iterator.hpp
    │   │       ├── replace.hpp
    │   │       ├── memory_file.hpp
    │   │       ├── serialise.hpp
    │   │       └── match_results.hpp
    ├── parsertl14
    │   ├── README.md
    │   └── include
    │   │   └── parsertl
    │   │       ├── ebnf.y
    │   │       ├── runtime_error.hpp
    │   │       ├── enums.hpp
    │   │       ├── narrow.hpp
    │   │       ├── dfa.hpp
    │   │       ├── nt_info.hpp
    │   │       ├── enum_operator.hpp
    │   │       ├── licence_1_0.txt
    │   │       ├── token.hpp
    │   │       ├── capture.hpp
    │   │       ├── match.hpp
    │   │       ├── parse.hpp
    │   │       ├── search_iterator.hpp
    │   │       ├── match_results.hpp
    │   │       ├── iterator.hpp
    │   │       ├── serialise.hpp
    │   │       ├── ebnf_tables.hpp
    │   │       ├── lookup.hpp
    │   │       └── state_machine.hpp
    └── parle
    │   ├── cvt.hpp
    │   └── lexer
    │       └── iterator.hpp
├── tests
    ├── lexer_003.json
    ├── lexer_flags.phpt
    ├── readBison.phpt
    ├── sigil_001.phpt
    ├── stack_001.phpt
    ├── reflection_001.phpt
    ├── lexer_001.phpt
    ├── lexer_position_tracking_002.phpt
    ├── lexer_002.phpt
    ├── lexer_005.phpt
    ├── lexer_006.phpt
    ├── sigil_002.phpt
    ├── words_001.phpt
    ├── reflection_002.phpt
    ├── words_002.phpt
    ├── words_003.phpt
    ├── lexer_004.phpt
    ├── calc_001.phpt
    ├── lexer_position_tracking_001.phpt
    ├── calc_002.phpt
    ├── calc_003.phpt
    ├── lexer_007.phpt
    └── lexer_003.phpt
├── .gitignore
├── config.m4
├── config.w32
├── LICENSE
├── INSTALL.md
├── bench
    ├── parse_str.php
    ├── phlexy_alike.php
    └── parse_str.impl.php
├── .github
    └── workflows
    │   └── main.yml
├── php_parle.h
└── README.md


/EXPERIMENTAL:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CREDITS:
--------------------------------------------------------------------------------
1 | parle
2 | Anatol Belski, Ben Hanson
3 | 


--------------------------------------------------------------------------------
/lib/lexertl14/README.md:
--------------------------------------------------------------------------------
1 | # lexertl14
2 | C++14 version of lexertl
3 | 


--------------------------------------------------------------------------------
/lib/parsertl14/README.md:
--------------------------------------------------------------------------------
1 | # parsertl14
2 | C++14 version of parsertl
3 | 


--------------------------------------------------------------------------------
/tests/lexer_003.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "key": [
 3 | 	  "qelque choose",
 4 | 	  42,
 5 |           "füße"
 6 |   ],
 7 |   "obj": {
 8 |      "prop": 12
 9 |   },
10 |   "some": null
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/lexer_flags.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Lexer flags
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Lexer;
 9 | 
10 | $lex = new Lexer;
11 | $lex->flags = Lexer::DOT_NOT_LF | Lexer::DOT_NOT_CRLF;
12 | var_dump($lex->flags);
13 | $lex->flags |= Lexer::SKIP_WS;
14 | var_dump($lex->flags);
15 | 
16 | ?>
17 | ==DONE==
18 | --EXPECTF--
19 | int(6)
20 | int(14)
21 | ==DONE==
22 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/observer_ptr.hpp:
--------------------------------------------------------------------------------
 1 | // observer_ptr.hpp
 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #ifndef LEXERTL_OBSERVER_PTR_HPP
 8 | #define LEXERTL_OBSERVER_PTR_HPP
 9 | 
10 | namespace lexertl
11 | {
12 |     template<typename T>
13 |     using observer_ptr = T*;
14 | }
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/tests/readBison.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | readBison() test
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | use Parle\Parser;
 8 | 
 9 | $p = new Parser;
10 | 
11 | $p->readBison("%%\n;start: 'a';%%\n");
12 | $p->dump();
13 | 
14 | try
15 | {
16 | 	$p->readBison("@");
17 | }
18 | catch (\Throwable $e)
19 | {
20 | 	echo $e->getMessage(), "\n";
21 | }
22 | ?>
23 | ==DONE==
24 | --EXPECT--
25 | %token 'a'
26 | %%
27 | 
28 | start: 'a';
29 | 
30 | %%
31 | Syntax error on line 1: '@'
32 | ==DONE==
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .deps
 2 | *.lo
 3 | *.la
 4 | .libs
 5 | acinclude.m4
 6 | aclocal.m4
 7 | autom4te.cache
 8 | build
 9 | config.guess
10 | config.h
11 | config.h.in
12 | config.log
13 | config.nice
14 | config.status
15 | config.sub
16 | configure
17 | configure.in
18 | install-sh
19 | libtool
20 | ltmain.sh
21 | Makefile
22 | Makefile.fragments
23 | Makefile.global
24 | Makefile.objects
25 | missing
26 | mkinstalldirs
27 | modules
28 | run-tests.php
29 | tests/*.diff
30 | tests/*.out
31 | tests/*.php
32 | tests/*.exp
33 | tests/*.log
34 | tests/*.sh
35 | config.nice.bat
36 | configure.bat
37 | configure.js
38 | x64
39 | Release
40 | Release_TS
41 | Debug
42 | Debug_TS
43 | *.patch
44 | *.diff
45 | *.tmp
46 | *.swp
47 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/runtime_error.hpp:
--------------------------------------------------------------------------------
 1 | // runtime_error.hpp
 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_RUNTIME_ERROR_HPP
 7 | #define LEXERTL_RUNTIME_ERROR_HPP
 8 | 
 9 | #include <stdexcept>
10 | 
11 | namespace lexertl
12 | {
13 |     class runtime_error : public std::runtime_error
14 |     {
15 |     public:
16 |         runtime_error(const std::string& what_arg_) :
17 |             std::runtime_error(what_arg_)
18 |         {
19 |         }
20 |     };
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/ebnf.y:
--------------------------------------------------------------------------------
 1 | /* Generate code using: bison -S parsertl.cc ebnf.y */
 2 | %token EMPTY IDENTIFIER PREC TERMINAL
 3 | %%
 4 | 
 5 | rule: rhs_or;
 6 | 
 7 | rhs_or: opt_prec_list
 8 |       | rhs_or '|' opt_prec_list;
 9 | 
10 | opt_prec_list: opt_list opt_prec;
11 | 
12 | opt_list:
13 |         | EMPTY
14 |         | rhs_list;
15 | 
16 | rhs_list: rhs
17 |         | rhs_list rhs;
18 | 
19 | rhs: IDENTIFIER
20 |    | TERMINAL
21 |    | '[' rhs_or ']'
22 |    | rhs '?'
23 |    | '{' rhs_or '}'
24 |    | rhs '*'
25 |    | '{' rhs_or '}' '-'
26 |    | rhs '+'
27 |    | '(' rhs_or ')';
28 | 
29 | opt_prec:
30 |         | PREC IDENTIFIER
31 |         | PREC TERMINAL;
32 | 
33 | %%
34 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/runtime_error.hpp:
--------------------------------------------------------------------------------
 1 | // runtime_error.hpp
 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_RUNTIME_ERROR_HPP
 7 | #define PARSERTL_RUNTIME_ERROR_HPP
 8 | 
 9 | #include <stdexcept>
10 | 
11 | namespace parsertl
12 | {
13 |     class runtime_error : public std::runtime_error
14 |     {
15 |     public:
16 |         explicit runtime_error(const std::string& what_arg_) :
17 |             std::runtime_error(what_arg_)
18 |         {
19 |         }
20 |     };
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/enums.hpp:
--------------------------------------------------------------------------------
 1 | // enums.hpp
 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_ENUMS_HPP
 7 | #define PARSERTL_ENUMS_HPP
 8 | 
 9 | namespace parsertl
10 | {
11 |     enum class rule_flags { enable_captures = 1 };
12 |     enum class action
13 |     {
14 |         error,
15 |         shift,
16 |         reduce,
17 |         go_to,
18 |         accept
19 |     };
20 |     enum class error_type
21 |     {
22 |         syntax_error,
23 |         non_associative,
24 |         unknown_token
25 |     };
26 | }
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/narrow.hpp:
--------------------------------------------------------------------------------
 1 | // narrow.hpp
 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_NARROW_HPP
 7 | #define PARSERTL_NARROW_HPP
 8 | 
 9 | #include <sstream>
10 | 
11 | namespace parsertl
12 | {
13 |     template<typename char_type>
14 |     void narrow(const char_type* str_, std::ostringstream& ss_)
15 |     {
16 |         while (*str_)
17 |         {
18 |             // Safe to simply cast to char when string only contains ASCII.
19 |             ss_ << static_cast<char>(*str_++);
20 |         }
21 |     }
22 | }
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/narrow.hpp:
--------------------------------------------------------------------------------
 1 | // narrow.hpp
 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_NARROW_HPP
 7 | #define LEXERTL_NARROW_HPP
 8 | 
 9 | #include <sstream>
10 | 
11 | namespace lexertl
12 | {
13 |     template<typename char_type>
14 |     void narrow(const char_type* str_, std::ostringstream& ss_)
15 |     {
16 |         while (*str_)
17 |         {
18 |             // Safe to simply cast to char.
19 |             // when string only contains ASCII.
20 |             ss_ << static_cast<char>(*str_++);
21 |         }
22 |     }
23 | }
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/dfa.hpp:
--------------------------------------------------------------------------------
 1 | // dfa.hpp
 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_DFA_HPP
 7 | #define PARSERTL_DFA_HPP
 8 | 
 9 | #include <deque>
10 | #include <vector>
11 | 
12 | namespace parsertl
13 | {
14 |     using size_t_pair = std::pair<std::size_t, std::size_t>;
15 |     using size_t_pair_vector = std::vector<size_t_pair>;
16 | 
17 |     struct dfa_state
18 |     {
19 |         size_t_pair_vector _basis;
20 |         size_t_pair_vector _closure;
21 |         size_t_pair_vector _transitions;
22 |     };
23 | 
24 |     // Must be deque due to iterator usage in basic_generator::build_dfa().
25 |     using dfa = std::deque<dfa_state>;
26 | }
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/nt_info.hpp:
--------------------------------------------------------------------------------
 1 | // nt_info.hpp
 2 | // Copyright (c) 2016-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_NT_INFO_HPP
 7 | #define PARSERTL_NT_INFO_HPP
 8 | 
 9 | #include <vector>
10 | 
11 | namespace parsertl
12 | {
13 |     using char_vector = std::vector<char>;
14 | 
15 |     struct nt_info
16 |     {
17 |         bool _nullable = false;
18 |         char_vector _first_set;
19 |         char_vector _follow_set;
20 | 
21 |         explicit nt_info(const std::size_t terminals_) :
22 |             _first_set(terminals_, 0),
23 |             _follow_set(terminals_, 0)
24 |         {
25 |         }
26 |     };
27 | 
28 |     using nt_info_vector = std::vector<nt_info>;
29 | }
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/tests/sigil_001.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Test sigil methods
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Parser;
 9 | use Parle\Lexer;
10 | use Parle\Token;
11 | 
12 | $p = new Parser;
13 | $p->push("start", "'a' B");
14 | $b_idx = $p->push("B", "'b'");
15 | $p->build();
16 | 
17 | $lex = new Lexer;
18 | $lex->push("a", $p->tokenId("'a'"));
19 | $lex->push("b", $p->tokenId("'b'"));
20 | $lex->push("\\s+", Token::SKIP);
21 | $lex->build();
22 | 
23 | $p->consume("a b", $lex);
24 | 
25 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
26 | 	switch ($p->action) {
27 | 		case Parser::ACTION_REDUCE:
28 | 			echo $p->sigilName(($p->reduceId == $b_idx) ? 0 : 1) . "\n";
29 | 			echo $p->sigilCount() . "\n";
30 | 			break;	
31 | 	}
32 | 
33 | 	$p->advance();
34 | }
35 | ?>
36 | ==DONE==
37 | --EXPECT--
38 | 'b'
39 | 1
40 | B
41 | 2
42 | ==DONE==
43 | 


--------------------------------------------------------------------------------
/tests/stack_001.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Stack var_dump()
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php
 7 | 
 8 | $s = new Parle\Stack;
 9 | $s->push(1);
10 | $s->push(2);
11 | $s->push(3);
12 | var_dump($s);
13 | var_dump($s->empty, $s->size, $s->top);
14 | 
15 | $s->pop();
16 | var_dump($s);
17 | 
18 | ?>
19 | ==DONE==
20 | --EXPECTF--
21 | object(Parle\Stack)#%d (4) {
22 |   ["empty"]=>
23 |   bool(false)
24 |   ["size"]=>
25 |   int(3)
26 |   ["top"]=>
27 |   int(3)
28 |   ["elements"]=>
29 |   array(3) {
30 |     [0]=>
31 |     int(3)
32 |     [1]=>
33 |     int(2)
34 |     [2]=>
35 |     int(1)
36 |   }
37 | }
38 | bool(false)
39 | int(3)
40 | int(3)
41 | object(Parle\Stack)#%d (4) {
42 |   ["empty"]=>
43 |   bool(false)
44 |   ["size"]=>
45 |   int(2)
46 |   ["top"]=>
47 |   int(2)
48 |   ["elements"]=>
49 |   array(2) {
50 |     [0]=>
51 |     int(2)
52 |     [1]=>
53 |     int(1)
54 |   }
55 | }
56 | ==DONE==
57 | 


--------------------------------------------------------------------------------
/tests/reflection_001.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | return type in arg info
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | $r = new ReflectionMethod("Parle\\Lexer", "getToken");
 9 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType());
10 | $r = new ReflectionMethod("Parle\\RLexer", "getToken");
11 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType());
12 | $r = new ReflectionMethod("Parle\\Parser", "errorInfo");
13 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType());
14 | $r = new ReflectionMethod("Parle\\RParser", "errorInfo");
15 | var_dump(PHP_VERSION_ID >= 70100 ? $r->getReturnType()->getName() : (string)$r->getReturnType());
16 | 
17 | ?>
18 | ==DONE==
19 | --EXPECTF--
20 | string(11) "Parle\Token"
21 | string(11) "Parle\Token"
22 | string(15) "Parle\ErrorInfo"
23 | string(15) "Parle\ErrorInfo"
24 | ==DONE==
25 | 


--------------------------------------------------------------------------------
/tests/lexer_001.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Lex PHP var statement
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Lexer;
 9 | use Parle\Token;
10 | 
11 | $lex = new Lexer;
12 | $lex->push("\$[a-z]{1,}[a-zA-Z0-9_]+", 1);
13 | $lex->push("=", 2);
14 | $lex->push("[0-9]+", 3);
15 | $lex->push(";", 4);
16 | 
17 | $lex->build();
18 | 
19 | $s = "\$hello=42;";
20 | $lex->consume($s);
21 | 
22 | $lex->advance();
23 | $tok = $lex->getToken();
24 | 
25 | while (Token::EOI != $tok->id) {
26 | 	var_dump($tok);
27 | 	$lex->advance();
28 | 	$tok = $lex->getToken();
29 | }
30 | 
31 | ?>
32 | ==DONE==
33 | --EXPECTF--
34 | object(Parle\Token)#%d (2) {
35 |   ["id"]=>
36 |   int(1)
37 |   ["value"]=>
38 |   string(6) "$hello"
39 | }
40 | object(Parle\Token)#%d (2) {
41 |   ["id"]=>
42 |   int(2)
43 |   ["value"]=>
44 |   string(1) "="
45 | }
46 | object(Parle\Token)#%d (2) {
47 |   ["id"]=>
48 |   int(3)
49 |   ["value"]=>
50 |   string(2) "42"
51 | }
52 | object(Parle\Token)#%d (2) {
53 |   ["id"]=>
54 |   int(4)
55 |   ["value"]=>
56 |   string(1) ";"
57 | }
58 | ==DONE==
59 | 


--------------------------------------------------------------------------------
/tests/lexer_position_tracking_002.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Lex test line property
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Lexer;
 9 | use Parle\Token;
10 | 
11 | $lex = new Lexer;
12 | $lex->push("[a-z]", 1);
13 | $lex->push("[\n]", 2);
14 | $lex->push("bc", 3);
15 | $lex->push("ij", 4);
16 | 
17 | $lex->build();
18 | 
19 | $lines = array(
20 | 	"abc",
21 | 	"de",
22 | 	"f",
23 | 	"ghijk",
24 | 	"xyz",
25 | );
26 | $s = implode("\n", $lines);
27 | //$s = "abc\nd\n\r\nf\nxyz";
28 | $lex->consume($s);
29 | 
30 | printf("L C M  T\n");
31 | do {
32 | 	$lex->advance();
33 | 	$tok = $lex->getToken();
34 | 	printf("%d %d %2d %s\n", $lex->line, $lex->column, $lex->marker, (2 == $tok->id ? ">LF<" : $tok->value));
35 | } while (Token::EOI != $tok->id);
36 | 
37 | ?>
38 | ==DONE==
39 | --EXPECTF--
40 | L C M  T
41 | 0 0  0 a
42 | 0 1  1 bc
43 | 0 3  3 >LF<
44 | 1 0  4 d
45 | 1 1  5 e
46 | 1 2  6 >LF<
47 | 2 0  7 f
48 | 2 1  8 >LF<
49 | 3 0  9 g
50 | 3 1 10 h
51 | 3 2 11 ij
52 | 3 4 13 k
53 | 3 5 14 >LF<
54 | 4 0 15 x
55 | 4 1 16 y
56 | 4 2 17 z
57 | 4 3 18 
58 | ==DONE==
59 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/stream_num.hpp:
--------------------------------------------------------------------------------
 1 | // stream_num.hpp
 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_STREAM_NUM_HPP
 7 | #define LEXERTL_STREAM_NUM_HPP
 8 | 
 9 | #include <sstream>
10 | 
11 | namespace lexertl
12 | {
13 | 	template<typename T>
14 | 	void stream_num(const T num_, std::ostream& ss_)
15 | 	{
16 | 		ss_ << num_;
17 | 	}
18 | 
19 | 	template<typename T>
20 | 	void stream_num(const T num_, std::wostream& ss_)
21 | 	{
22 | 		ss_ << num_;
23 | 	}
24 | 
25 | 	// MSVC doesn't support streaming integers etc to
26 | 	// std::basic_ostringstream<char32_t>.
27 | 	template<typename T>
28 | 	void stream_num(const T num_, std::basic_ostream<char32_t>& ss_)
29 | 	{
30 | 		std::stringstream css_;
31 | 		std::string count_;
32 | 
33 | 		css_ << num_;
34 | 		count_ = css_.str();
35 | 
36 | 		for (char c_ : count_)
37 | 		{
38 | 			ss_ << static_cast<char32_t>(c_);
39 | 		}
40 | 	}
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/config.m4:
--------------------------------------------------------------------------------
 1 | dnl $Id$
 2 | dnl config.m4 for extension parle
 3 | 
 4 | PHP_ARG_ENABLE(parle, whether to enable parle support,
 5 | [  --enable-parle           Enable lexer/parser support])
 6 | PHP_ARG_ENABLE(parle-utf32, whether to enable internal UTF-32 support in parle,
 7 | [  --enable-parle-utf32     Enable internal UTF-32 support for lexer/parser], no, no)
 8 | 
 9 | if test "$PHP_PARLE" != "no"; then
10 |   PHP_REQUIRE_CXX()
11 | 
12 |   AC_DEFINE(HAVE_PARLE,1,[ ])
13 |   PHP_SUBST(PARLE_SHARED_LIBADD)
14 | 
15 |   PHP_NEW_EXTENSION(parle, parle.cpp, $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 -std=c++14, cxx)
16 | 
17 |   PHP_ADD_INCLUDE($ext_srcdir/lib/lexertl14)
18 |   PHP_ADD_INCLUDE($ext_builddir/lib/lexertl14)
19 |   PHP_ADD_INCLUDE($ext_srcdir/lib/parsertl14)
20 |   PHP_ADD_INCLUDE($ext_builddir/lib/parsertl14)
21 |   PHP_ADD_INCLUDE($ext_srcdir/lib/parle)
22 |   PHP_ADD_INCLUDE($ext_builddir/lib/parle)
23 |   PHP_ADD_INCLUDE($ext_srcdir/lib)
24 |   PHP_ADD_INCLUDE($ext_builddir/lib)
25 | 
26 |   if test "$PHP_PARLE_UTF32" != "no"; then
27 |     AC_DEFINE(HAVE_PARLE_UTF32,1,[ ])
28 |   fi
29 |   dnl PHP_INSTALL_HEADERS([ext/parle/php_parle.h])
30 | fi
31 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/enum_operator.hpp:
--------------------------------------------------------------------------------
 1 | // enum_operator.hpp
 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_ENUM_OPERATOR_HPP
 7 | #define LEXERTL_ENUM_OPERATOR_HPP
 8 | 
 9 | #include <type_traits>
10 | 
11 | namespace lexertl
12 | {
13 | 	// Operator to convert enum class to underlying type (usually int)
14 | 	// Example:
15 | 	// enum class number { one = 1, two, three };
16 | 	// int num = *number::two;
17 | 	template <typename T>
18 | 	auto operator*(T e) noexcept ->
19 | 		std::enable_if_t<std::is_enum<T>::value, uint16_t>
20 | 	{
21 | 		return static_cast<uint16_t>(e);
22 | 	}
23 | 
24 | 	// This is the compile time version of the above operator
25 | 	// (e.g. Setting a C style array size using an enum)
26 | 	template <typename T>
27 | 	constexpr auto operator+(T e) noexcept ->
28 | 		std::enable_if_t<std::is_enum<T>::value, uint16_t>
29 | 	{
30 | 		return static_cast<uint16_t>(e);
31 | 	}
32 | }
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/enum_operator.hpp:
--------------------------------------------------------------------------------
 1 | // enum_operator.hpp
 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_ENUM_OPERATOR_HPP
 7 | #define PARSERTL_ENUM_OPERATOR_HPP
 8 | 
 9 | #include <type_traits>
10 | 
11 | namespace parsertl
12 | {
13 | 	// Operator to convert enum class to underlying type (usually int)
14 | 	// Example:
15 | 	// enum class number { one = 1, two, three };
16 | 	// int num = *number::two;
17 | 	template <typename T>
18 | 	auto operator*(T e) noexcept ->
19 | 		std::enable_if_t<std::is_enum<T>::value, uint16_t>
20 | 	{
21 | 		return static_cast<uint16_t>(e);
22 | 	}
23 | 
24 | 	// This is the compile time version of the above operator
25 | 	// (e.g. Setting a C style array size using an enum)
26 | 	template <typename T>
27 | 	constexpr auto operator+(T e) noexcept ->
28 | 		std::enable_if_t<std::is_enum<T>::value, uint16_t>
29 | 	{
30 | 		return static_cast<uint16_t>(e);
31 | 	}
32 | }
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/enums.hpp:
--------------------------------------------------------------------------------
 1 | // enums.hpp
 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #ifndef LEXERTL_ENUMS_HPP
 8 | #define LEXERTL_ENUMS_HPP
 9 | 
10 | namespace lexertl
11 | {
12 |     enum class regex_flags
13 |     {
14 |         icase = 1, dot_not_newline = 2, dot_not_cr_lf = 4,
15 |         skip_ws = 8, match_zero_len = 16
16 |     };
17 |     // 0 = end_state, 1 = id, 2 = user_id, 3 = push_dfa
18 |     // 4 = next_dfa, 5 = dead_state, 6 = dfa start
19 |     enum class state_index
20 |     {
21 |         end_state, id, user_id, push_dfa,
22 |         next_dfa, eol, dead_state, transitions
23 |     };
24 |     // Rule flags:
25 |     enum class feature_bit
26 |     {
27 |         bol = 1, eol = 2, skip = 4, again = 8,
28 |         multi_state = 16, recursive = 32, advance = 64
29 |     };
30 |     // End state flags:
31 |     enum class state_bit
32 |     {
33 |         end_state = 1, greedy = 2, pop_dfa = 4
34 |     };
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tokeniser/fold4.inc:
--------------------------------------------------------------------------------
 1 |             {{0x10400, 0x10427}, {0x10428, 0x1044f}},
 2 |             {{0x10428, 0x1044f}, {0x10400, 0x10427}},
 3 |             {{0x104b0, 0x104d3}, {0x104d8, 0x104fb}},
 4 |             {{0x104d8, 0x104fb}, {0x104b0, 0x104d3}},
 5 |             {{0x10570, 0x1057a}, {0x10597, 0x105a1}},
 6 |             {{0x1057c, 0x1058a}, {0x105a3, 0x105b1}},
 7 |             {{0x1058c, 0x10592}, {0x105b3, 0x105b9}},
 8 |             {{0x10594, 0x10595}, {0x105bb, 0x105bc}},
 9 |             {{0x10597, 0x105a1}, {0x10570, 0x1057a}},
10 |             {{0x105a3, 0x105b1}, {0x1057c, 0x1058a}},
11 |             {{0x105b3, 0x105b9}, {0x1058c, 0x10592}},
12 |             {{0x105bb, 0x105bc}, {0x10594, 0x10595}},
13 |             {{0x10c80, 0x10cb2}, {0x10cc0, 0x10cf2}},
14 |             {{0x10cc0, 0x10cf2}, {0x10c80, 0x10cb2}},
15 |             {{0x118a0, 0x118bf}, {0x118c0, 0x118df}},
16 |             {{0x118c0, 0x118df}, {0x118a0, 0x118bf}},
17 |             {{0x16e40, 0x16e5f}, {0x16e60, 0x16e7f}},
18 |             {{0x16e60, 0x16e7f}, {0x16e40, 0x16e5f}},
19 |             {{0x1e900, 0x1e921}, {0x1e922, 0x1e943}},
20 |             {{0x1e922, 0x1e943}, {0x1e900, 0x1e921}},
21 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/char_traits.hpp:
--------------------------------------------------------------------------------
 1 | // char_traits.hpp
 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #ifndef LEXERTL_CHAR_TRAITS_HPP
 8 | #define LEXERTL_CHAR_TRAITS_HPP
 9 | 
10 | #include <cstdint>
11 | 
12 | namespace lexertl
13 | {
14 |     template<typename ch_type>
15 |     struct basic_char_traits
16 |     {
17 |         using char_type = ch_type;
18 |         using index_type = ch_type;
19 | 
20 |         static index_type max_val()
21 |         {
22 |             const std::uint32_t max_ = 0x10ffff;
23 | 
24 |             return sizeof(char_type) > 2 ?
25 |                 max_ : (max_ & 0xffff);
26 |         }
27 |     };
28 | 
29 |     template<>
30 |     struct basic_char_traits<char>
31 |     {
32 |         using char_type = char;
33 |         using index_type = unsigned char;
34 | 
35 |         static index_type max_val()
36 |         {
37 |             // Prevent annoying warning (VC++)
38 |             index_type zero_ = 0;
39 | 
40 |             return ~zero_;
41 |         }
42 |     };
43 | }
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/tests/lexer_002.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Lex various number formats
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Lexer;
 9 | use Parle\Token;
10 | 
11 | $lex = new Lexer;
12 | 
13 | $lex->push("0b[01]+", 1);
14 | $lex->push("0[0-7]*", 2);
15 | $lex->push("[1-9][0-9]*", 3);
16 | $lex->push("0x[0-9a-fA-F]+", 4);
17 | 
18 | $lex->build();
19 | 
20 | $nums = array(
21 | 	"0x42 0b010101 075 24",
22 | );
23 | 
24 | foreach ($nums as $in) {
25 | 
26 | 	$lex->consume($in);
27 | 
28 | 	$lex->advance();
29 | 	$tok = $lex->getToken();
30 | 
31 | 	while (Token::EOI != $tok->id) {
32 | 		if ($tok->id != Token::UNKNOWN) {
33 | 			var_dump($tok);
34 | 		}
35 | 		$lex->advance();
36 | 		$tok = $lex->getToken();
37 | 	}
38 | }
39 | 
40 | ?>
41 | ==DONE==
42 | --EXPECTF--
43 | object(Parle\Token)#%d (2) {
44 |   ["id"]=>
45 |   int(4)
46 |   ["value"]=>
47 |   string(4) "0x42"
48 | }
49 | object(Parle\Token)#%d (2) {
50 |   ["id"]=>
51 |   int(1)
52 |   ["value"]=>
53 |   string(8) "0b010101"
54 | }
55 | object(Parle\Token)#%d (2) {
56 |   ["id"]=>
57 |   int(2)
58 |   ["value"]=>
59 |   string(3) "075"
60 | }
61 | object(Parle\Token)#%d (2) {
62 |   ["id"]=>
63 |   int(3)
64 |   ["value"]=>
65 |   string(2) "24"
66 | }
67 | ==DONE==
68 | 


--------------------------------------------------------------------------------
/config.w32:
--------------------------------------------------------------------------------
 1 | // $Id$
 2 | // vim:ft=javascript
 3 | 
 4 | ARG_ENABLE("parle", "Enable lexer/parser support", "no");
 5 | ARG_ENABLE("parle-utf32", "Enable internal UTF-32 support for lexer/parser", "no");
 6 | 
 7 | if (PHP_PARLE != "no") {
 8 | 	var parle_lib_path = configure_module_dirname + "\\lib";
 9 | 	if (CHECK_HEADER_ADD_INCLUDE("include/lexertl/generator.hpp", "CFLAGS_PARLE", PHP_PARLE + ";" + parle_lib_path + "\\lexertl14") &&
10 | 		CHECK_HEADER_ADD_INCLUDE("include/parsertl/generator.hpp", "CFLAGS_PARLE", PHP_PARLE + ";" + parle_lib_path + "\\parsertl14") &&
11 | 		CHECK_HEADER_ADD_INCLUDE("parle/lexer/iterator.hpp", "CFLAGS_PARLE", PHP_PARLE + ";" + parle_lib_path)) {
12 | 		EXTENSION("parle", "parle.cpp", PHP_PARLE_SHARED, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
13 | 		ADD_FLAG("CFLAGS_PARLE", " /I " + parle_lib_path + "\\lexertl14 /I " + parle_lib_path + "\\parsertl14 /EHsc -std:c++14");
14 | 		ADD_FLAG("CFLAGS_BD_EXT_PARLE", ' /D ZEND_WIN32_KEEP_INLINE=1 /U ZEND_WIN32_FORCE_INLINE ');
15 | 		/*PHP_INSTALL_HEADERS("ext/parle", "php_parle.h");*/
16 | 		AC_DEFINE("HAVE_PARLE", 1, "Have parle extension");
17 | 		if (PHP_PARLE_UTF32 != "no") {
18 | 			AC_DEFINE("HAVE_PARLE_UTF32", 1, "Have internal UTF-32 support in parle");
19 | 			ADD_FLAG("CFLAGS_PARLE", " /D HAVE_PARLE_UTF32=1");
20 | 		}
21 | 	} else {
22 | 		WARNING("parle not enabled; libraries and headers not found");
23 | 	}
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Anatol Belski
 2 | All rights reserved.
 3 | 
 4 | Author: Anatol Belski <ab@php.net>
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions
 8 | are met:
 9 | 1. Redistributions of source code must retain the above copyright
10 | 	notice, this list of conditions and the following disclaimer.
11 | 2. Redistributions in binary form must reproduce the above copyright
12 | 	notice, this list of conditions and the following disclaimer in the
13 | 	documentation and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 | ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 | SUCH DAMAGE.
26 | 
27 | 


--------------------------------------------------------------------------------
/tests/lexer_005.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Lexer marker and cursor
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php
 7 | 
 8 | use Parle\{Token, Lexer};
 9 | 
10 | 
11 | $lex = new Lexer;
12 | 
13 | $lex->push("\$[a-zA-Z_][a-zA-Z0-9_]*", 1);
14 | $lex->push("=", 2);
15 | $lex->push("\d+", 3);
16 | $lex->push(";", 4);
17 | 
18 | $lex->build();
19 | 
20 | $s = '$x = 42;' . "\n" . '$y;';
21 | $lex->consume($s);
22 | 
23 | echo "marker: ", $lex->marker, ", cursor: ", $lex->cursor, "\n";
24 | do {
25 | 	$lex->advance();
26 | 	$tok = $lex->getToken();
27 | 	echo "marker: ", $lex->marker, ", cursor: ", $lex->cursor, ", token: '", $tok->value, "'\n";
28 | } while (Token::EOI != $tok->id || $lex->bol);
29 | 
30 | $len = strlen($s);
31 | if ($lex->cursor == $len) { 
32 | 	echo "End of input at ", $len, "\n";
33 | } else {
34 | 	echo "End of input should be at ", $len, ", but the cursor is at ", $lex->cursor, "\n";
35 | }
36 | ?>
37 | ==DONE==
38 | --EXPECTF--
39 | marker: 0, cursor: 0
40 | marker: 0, cursor: 2, token: '$x'
41 | marker: 2, cursor: 3, token: ' '
42 | marker: 3, cursor: 4, token: '='
43 | marker: 4, cursor: 5, token: ' '
44 | marker: 5, cursor: 7, token: '42'
45 | marker: 7, cursor: 8, token: ';'
46 | marker: 8, cursor: 9, token: '
47 | '
48 | marker: 9, cursor: 11, token: '$y'
49 | marker: 11, cursor: 12, token: ';'
50 | marker: 12, cursor: 12, token: ''
51 | End of input at 12
52 | ==DONE==
53 | 


--------------------------------------------------------------------------------
/tests/lexer_006.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Lexer token callback
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php
 7 | 
 8 | use Parle\{Token, Lexer};
 9 | 
10 | $in = '$x =  42;' . "\n\n" . '$y;';
11 | 
12 | $lex = new Lexer;
13 | 
14 | $lex->push("\$[a-zA-Z_][a-zA-Z0-9_]*", 1);
15 | $lex->push("=", 2);
16 | $lex->push("\d+", 3);
17 | $lex->push(";", 4);
18 | $lex->push("[ ]", 42);
19 | $lex->callout(42, function () use ($in, $lex) {
20 | 	$tok = $lex->getToken();
21 | 	echo "Custom handler called, token ", $tok->id, " won't return\n";
22 | 	$i = $lex->cursor;
23 | 	while (" " == $in[$i]) $i++;
24 | 	$lex->reset($i);
25 | 	$lex->advance();
26 | });
27 | $f = function () use ($in, $lex)
28 | {
29 | 	$tok = $lex->getToken();
30 | 	echo "Custom handler called, token ", $tok->id, " won't return\n";
31 | 	$i = $lex->cursor;
32 | 	while ("\n" == $in[$i]) $i++;
33 | 	$lex->reset($i);
34 | 	$lex->advance();
35 | };
36 | $lex->push("[\n]", 24);
37 | $lex->callout(24, $f);
38 | 
39 | $lex->build();
40 | 
41 | $lex->consume($in);
42 | 
43 | do {
44 | 	$lex->advance();
45 | 	$tok = $lex->getToken();
46 | 	echo $tok->id, "\n";
47 | } while (Token::EOI != $tok->id);
48 | 
49 | ?>
50 | ==DONE==
51 | --EXPECT--
52 | 1
53 | Custom handler called, token 42 won't return
54 | 2
55 | Custom handler called, token 42 won't return
56 | 3
57 | 4
58 | Custom handler called, token 24 won't return
59 | 1
60 | 4
61 | 0
62 | ==DONE==
63 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/licence_1_0.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 
25 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/licence_1_0.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/sigil_002.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Test sigil exceptions
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Parser;
 9 | use Parle\RParser;
10 | use Parle\RLexer;
11 | use Parle\Token;
12 | 
13 | 
14 | try {
15 | 	$p = new RParser;
16 | 	$p->sigilName(0);
17 | } catch (\Throwable $e) {
18 | 	echo $e->getMessage(), "\n";
19 | }
20 | 
21 | try {
22 | 	$p = new Parser;
23 | 	$p->sigil(3);
24 | } catch (\Throwable $e) {
25 | 	echo $e->getMessage(), "\n";
26 | }
27 | 
28 | try {
29 | 	$p = new RParser;
30 | 	$p->sigilCount();
31 | } catch (\Throwable $e) {
32 | 	echo $e->getMessage(), "\n";
33 | }
34 | 
35 | $p = new RParser;
36 | $p->push("start", "'a'");
37 | $p->build();
38 | 
39 | $lex = new RLexer;
40 | $lex->push("a", $p->tokenId("'a'"));
41 | $lex->push("\\s+", Token::SKIP);
42 | $lex->build();
43 | 
44 | $p->consume("a", $lex);
45 | 
46 | while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
47 | 	switch ($p->action) {
48 | 		case Parser::ACTION_REDUCE:
49 | 			// throw here
50 | 			echo $p->sigilName(42);
51 | 			break;	
52 | 	}
53 | 
54 | 	$p->advance();
55 | }
56 | ?>
57 | --EXPECTF--
58 | Not in a reduce state!
59 | Not in a reduce state!
60 | Not in a reduce state!
61 | 
62 | Fatal error: Uncaught Parle\ParserException: Invalid index 42 in %ssigil_002.php:%d
63 | Stack trace:
64 | #0 %ssigil_002.php(45): Parle\RParser->sigilName(42)
65 | #1 {main}
66 |   thrown in %ssigil_002.php on line %d
67 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | INSTALLATION
 2 | ============
 3 | 
 4 | # Pre-requisites
 5 | 
 6 | * PHP version 7.4 and above.
 7 | * A [C++14](http://en.cppreference.com/w/cpp/compiler_support) capable compiler is required. At least clang-5.0, GCC 5.0 and VS2015 are known to successfully build the extension.
 8 | 
 9 | 
10 | # Binary packages
11 | 
12 | ## Windows
13 | 
14 | DLL for Windows can be downloaded for the [PECL page](https://pecl.php.net/package/parle).
15 | 
16 | If no DLL is available or there's another reason to build, please follow the [wiki](https://wiki.php.net/internals/windows/stepbystepbuild_sdk_2#building_pecl_extensions)
17 | instructions on how to setup the [php-sdk](https://github.com/php/php-sdk-binary-tools) and build an extension.
18 | 
19 | ## RPM
20 | 
21 | RPM for Fedora, RHEL and CentOS can be installed from the [Remi repository](https://rpms.remirepo.net/).
22 | 
23 | 
24 | # Building from sources
25 | 
26 | ## From PECL
27 | 
28 | Released versions can be installed using the ```pecl``` command:
29 | 
30 | ```
31 | pecl install parle-beta
32 | ```
33 | 
34 | By default, `pecl` will ask about enabling the UTF-32 support. For an unattended installation, the below can be considered:
35 | 
36 | ```
37 | echo | pecl install parle-beta
38 | ```
39 | 
40 | In this case, any package related question will be answered automatically with their default values.
41 | 
42 | ## From git
43 | 
44 | Using a clone of this repository to retrieve latest developement sources:
45 | 
46 | ```
47 | git clone https://github.com/weltling/parle.git
48 | cd parle
49 | phpize
50 | ./configure
51 | make
52 | ```
53 | 


--------------------------------------------------------------------------------
/tests/words_001.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Parse words from a string
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Parser;
 9 | use Parle\Lexer;
10 | use Parle\Token;
11 | 
12 | $p = new Parser;
13 | $p->token("WORD");
14 | $p->push("start", "sentence");
15 | $p->push("sentence", "words");
16 | $word_idx = $p->push("words", "WORD");
17 | $words_idx = $p->push("words", "words WORD");
18 | $p->build();
19 | 
20 | $lex = new Lexer;
21 | $lex->push("[^[:blank:][:punct:]]+", $p->tokenId("WORD"));
22 | $lex->push(".", Token::SKIP);
23 | $lex->build();
24 | 
25 | 
26 | $words = array(
27 | 	"Sah ein Knab' ein Röslein stehn",
28 | 	"Но, чтобы стоять, я должен держаться корней.",
29 | 	"Homines sumus nun dei.",
30 | );
31 | 
32 | foreach ($words as $in) {
33 | 	$p->consume($in, $lex);
34 | 	$out = array();
35 | 
36 | 	while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
37 | 		switch ($p->action) {
38 | 			case Parser::ACTION_ERROR:
39 | 				throw new ParserException("Parser error");
40 | 				break;
41 | 			case Parser::ACTION_REDUCE:
42 | 				switch ($p->reduceId)
43 | 				{
44 | 					case $word_idx:
45 | 						$out[] = $p->sigil(0);
46 | 						break;
47 | 					case $words_idx:
48 | 						$out[] = $p->sigil(1);
49 | 						break;
50 | 				}
51 | 		}
52 | 
53 | 		$p->advance();
54 | 	}
55 | 
56 | 	var_dump(implode(" ", $out));
57 | }
58 | 
59 | ?>
60 | ==DONE==
61 | --EXPECT--
62 | string(31) "Sah ein Knab ein Röslein stehn"
63 | string(76) "Но чтобы стоять я должен держаться корней"
64 | string(21) "Homines sumus nun dei"
65 | ==DONE==
66 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/sm_traits.hpp:
--------------------------------------------------------------------------------
 1 | // sm_traits.hpp
 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #ifndef LEXERTL_SM_TRAITS_HPP
 8 | #define LEXERTL_SM_TRAITS_HPP
 9 | 
10 | namespace lexertl
11 | {
12 |     template<typename ch_type, typename sm_type, bool comp, bool look,
13 |         bool dfa_nfa>
14 |     struct basic_sm_traits
15 |     {
16 |         enum
17 |         {
18 |             char_24_bit = sizeof(ch_type) > 2, compressed = comp, lookup = look,
19 |             is_dfa = dfa_nfa
20 |         };
21 |         using input_char_type = ch_type;
22 |         using char_type = ch_type;
23 |         using id_type = sm_type;
24 | 
25 |         static id_type npos()
26 |         {
27 |             return static_cast<id_type>(~0);
28 |         }
29 |     };
30 | 
31 |     template<typename ch_type, typename sm_type, bool look, bool dfa_nfa>
32 |     struct basic_sm_traits<ch_type, sm_type, true, look, dfa_nfa>
33 |     {
34 |         enum
35 |         {
36 |             char_24_bit = sizeof(ch_type) > 2, compressed = true, lookup = look,
37 |             is_dfa = dfa_nfa
38 |         };
39 |         using input_char_type = ch_type;
40 |         using char_type = unsigned char;
41 |         using id_type = sm_type;
42 | 
43 |         static id_type npos()
44 |         {
45 |             return static_cast<id_type>(~0);
46 |         }
47 |     };
48 | }
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/token.hpp:
--------------------------------------------------------------------------------
 1 | // token.hpp
 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_TOKEN_HPP
 7 | #define PARSERTL_TOKEN_HPP
 8 | 
 9 | #include <string>
10 | #include <vector>
11 | 
12 | namespace parsertl
13 | {
14 |     template<typename iterator>
15 |     struct token
16 |     {
17 |         using char_type = typename iterator::value_type::char_type;
18 |         using iter_type = typename iterator::value_type::iter_type;
19 |         using string = std::basic_string<char_type>;
20 |         using token_vector = std::vector<token<iterator>>;
21 |         std::size_t id = static_cast<std::size_t>(~0);
22 |         iter_type first = iter_type();
23 |         iter_type second = iter_type();
24 | 
25 |         token() = default;
26 | 
27 |         token(const std::size_t id_, const iter_type& first_,
28 |             const iter_type& second_) :
29 |             id(id_),
30 |             first(first_),
31 |             second(second_)
32 |         {
33 |         }
34 | 
35 |         string str() const
36 |         {
37 |             return string(first, second);
38 |         }
39 | 
40 |         string substr(const std::size_t soffset_,
41 |             const std::size_t eoffset_) const
42 |         {
43 |             return string(first + soffset_, second - eoffset_);
44 |         }
45 | 
46 |         std::size_t length() const
47 |         {
48 |             return second - first;
49 |         }
50 |     };
51 | }
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/tests/reflection_002.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Test lexer/parser argument checking
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\Parser;
 9 | use Parle\RParser;
10 | use Parle\ParserException;
11 | use Parle\Lexer;
12 | use Parle\RLexer;
13 | 
14 | $in = "1 + 1";
15 | 
16 | // variation 0
17 | try {
18 | 	$p = new Parser;
19 | 	$p->token("INTEGER");
20 | 	$p->push("start", "exp");
21 | 	$int_idx = $p->push("exp", "INTEGER");
22 | 	$p->build();
23 | 
24 | 	$lex = new RLexer;
25 | 	$lex->push("\\d+", $p->tokenId("INTEGER"));
26 | 	$lex->build();
27 | 
28 | 
29 | } catch (\Throwable $e) {
30 | 	echo $e->getMessage(), PHP_EOL;
31 | }
32 | 
33 | try {
34 | 	$p->validate($in, $lex);
35 | } catch (\Throwable $e) {
36 | 	echo $e->getMessage(), PHP_EOL;
37 | }
38 | 
39 | try {
40 | 	$p->consume($in, $lex);
41 | } catch (\Throwable $e) {
42 | 	echo $e->getMessage(), PHP_EOL;
43 | }
44 | 
45 | 
46 | // variation 1
47 | try {
48 | 	$p = new RParser;
49 | 	$p->token("INTEGER");
50 | 	$p->push("start", "exp");
51 | 	$int_idx = $p->push("exp", "INTEGER");
52 | 	$p->build();
53 | 
54 | 	$lex = new Lexer;
55 | 	$lex->push("\\d+", $p->tokenId("INTEGER"));
56 | 	$lex->build();
57 | 
58 | 
59 | } catch (\Throwable $e) {
60 | 	echo $e->getMessage(), PHP_EOL;
61 | }
62 | 
63 | try {
64 | 	$p->validate($in, $lex);
65 | } catch (\Throwable $e) {
66 | 	echo $e->getMessage(), PHP_EOL;
67 | }
68 | 
69 | try {
70 | 	$p->consume($in, $lex);
71 | } catch (\Throwable $e) {
72 | 	echo $e->getMessage(), PHP_EOL;
73 | }
74 | 
75 | ?>
76 | ==DONE==
77 | --EXPECTF--
78 | %s\Parser::validate()%s Parle\RLexer given
79 | %s\Parser::consume()%s Parle\RLexer given
80 | %s\RParser::validate()%s Parle\Lexer given
81 | %s\RParser::consume()%s Parle\Lexer given
82 | ==DONE==
83 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/capture.hpp:
--------------------------------------------------------------------------------
 1 | // capture.hpp
 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_CAPTURE_HPP
 7 | #define PARSERTL_CAPTURE_HPP
 8 | 
 9 | #include <iterator>
10 | #include <string>
11 | 
12 | namespace parsertl
13 | {
14 |     template<typename iterator>
15 |     struct capture
16 |     {
17 |         using iter_type = iterator;
18 |         using char_type = typename std::iterator_traits<iter_type>::value_type;
19 |         using string = std::basic_string<char_type>;
20 | 
21 |         iterator first = iterator();
22 |         iterator second = iterator();
23 | 
24 |         capture() = default;
25 | 
26 |         capture(const iterator& first_,
27 |             const iterator& second_) :
28 |             first(first_),
29 |             second(second_)
30 |         {
31 |         }
32 | 
33 |         bool operator==(const capture& rhs_) const
34 |         {
35 |             return first == rhs_.first &&
36 |                 second == rhs_.second;
37 |         }
38 | 
39 |         bool empty() const
40 |         {
41 |             return first == second;
42 |         }
43 | 
44 |         string str() const
45 |         {
46 |             return string(first, second);
47 |         }
48 | 
49 |         string substr(const std::size_t soffset_,
50 |             const std::size_t eoffset_) const
51 |         {
52 |             return string(first + soffset_, second - eoffset_);
53 |         }
54 | 
55 |         std::size_t length() const
56 |         {
57 |             return second - first;
58 |         }
59 |     };
60 | }
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/tests/words_002.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Parse words from a string, UTF-8 regex
 3 | --SKIPIF--
 4 | <?php
 5 | if (!extension_loaded("parle")) print "skip";
 6 | if (Parle\INTERNAL_UTF32) print "skip not for internal UTF-32";
 7 | ?>
 8 | --FILE--
 9 | <?php 
10 | 
11 | use Parle\Parser;
12 | use Parle\Lexer;
13 | use Parle\Token;
14 | 
15 | $p = new Parser;
16 | $p->token("WORD");
17 | $p->push("start", "sentence");
18 | $p->push("sentence", "words");
19 | $word_idx = $p->push("words", "WORD");
20 | $words_idx = $p->push("words", "words WORD");
21 | $p->build();
22 | 
23 | $lex = new Lexer;
24 | $lex->push("[ -\\x7f]{+}[\\x80-\\xbf]{+}[\\xc2-\\xdf]{+}[\\xe0-\\xef]{+}[\\xf0-\\xff]+", $p->tokenId("WORD"));
25 | $lex->push("\\s+", Token::SKIP);
26 | $lex->build();
27 | 
28 | /* UTF-8 */
29 | $words = array(
30 | 	"füße абракадабра 芬蘭",
31 | 	"Sah ein Knab' ein Röslein stehn",
32 | 	"Но, чтобы стоять, я должен держаться корней.",
33 | 	"Homines sumus nun dei.",
34 | 	"français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe",
35 | );
36 | 
37 | foreach ($words as $in) {
38 | 	$p->consume($in, $lex);
39 | 	$out = array();
40 | 
41 | 	while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
42 | 		switch ($p->action) {
43 | 			case Parser::ACTION_ERROR:
44 | 				throw new ParserException("Parser error");
45 | 				break;
46 | 			case Parser::ACTION_REDUCE:
47 | 				switch ($p->reduceId)
48 | 				{
49 | 					case $word_idx:
50 | 						$out[] = $p->sigil(0);
51 | 						break;
52 | 					case $words_idx:
53 | 						$out[] = $p->sigil(1);
54 | 						break;
55 | 				}
56 | 		}
57 | 
58 | 		$p->advance();
59 | 	}
60 | 
61 | 	var_dump(implode(" ", $out));
62 | }
63 | 
64 | ?>
65 | ==DONE==
66 | --EXPECT--
67 | string(36) "füße абракадабра 芬蘭"
68 | string(32) "Sah ein Knab' ein Röslein stehn"
69 | string(79) "Но, чтобы стоять, я должен держаться корней."
70 | string(22) "Homines sumus nun dei."
71 | string(87) "français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe"
72 | ==DONE==
73 | 


--------------------------------------------------------------------------------
/tests/words_003.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Parse words from a string, UTF-8 regex
 3 | --SKIPIF--
 4 | <?php
 5 | if (!extension_loaded("parle")) print "skip";
 6 | if (!Parle\INTERNAL_UTF32) print "skip reqire internal UTF-32";
 7 | ?>
 8 | --FILE--
 9 | <?php 
10 | 
11 | use Parle\Parser;
12 | use Parle\Lexer;
13 | use Parle\Token;
14 | 
15 | $p = new Parser;
16 | $p->token("WORD");
17 | $p->push("start", "sentence");
18 | $p->push("sentence", "words");
19 | $words_idx = $p->push("words", "words WORD");
20 | $word_idx = $p->push("words", "WORD");
21 | $p->build();
22 | 
23 | $lex = new Lexer;
24 | //$lex->push("[ -\\x10ffff]+", $p->tokenId("WORD"));
25 | $lex->push("[\p{L}\p{P}\p{InCJK_Unified_Ideographs}]+", $p->tokenId("WORD"));
26 | $lex->push(".", Token::SKIP);
27 | $lex->build();
28 | 
29 | /* UTF-8 */
30 | $words = array(
31 | 	"füße абракадабра 芬蘭",
32 | 	"Sah ein Knab' ein Röslein stehn",
33 | 	"Но, чтобы стоять, я должен держаться корней.",
34 | 	"Homines sumus nun dei.",
35 | 	"français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe",
36 | );
37 | 
38 | foreach ($words as $in) {
39 | 	$p->consume($in, $lex);
40 | 	$out = array();
41 | 
42 | 	while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
43 | 		switch ($p->action) {
44 | 			case Parser::ACTION_ERROR:
45 | 				throw new ParserException("Parser error");
46 | 				break;
47 | 			case Parser::ACTION_REDUCE:
48 | 				switch ($p->reduceId)
49 | 				{
50 | 					case $word_idx:
51 | 						$out[] = $p->sigil(0);
52 | 						break;
53 | 					case $words_idx:
54 | 						$out[] = $p->sigil(1);
55 | 						break;
56 | 				}
57 | 		}
58 | 
59 | 		$p->advance();
60 | 	}
61 | 
62 | 	var_dump(implode(" ", $out));
63 | }
64 | 
65 | ?>
66 | ==DONE==
67 | --EXPECT--
68 | string(36) "füße абракадабра 芬蘭"
69 | string(32) "Sah ein Knab' ein Röslein stehn"
70 | string(79) "Но, чтобы стоять, я должен держаться корней."
71 | string(22) "Homines sumus nun dei."
72 | string(87) "français éléphant fièvre là où gâteau être île chômage dû Noël maïs aigüe"
73 | ==DONE==
74 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/sm_to_csm.hpp:
--------------------------------------------------------------------------------
 1 | // sm_to_csm.hpp
 2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_SM_TO_CSM_HPP
 7 | #define LEXERTL_SM_TO_CSM_HPP
 8 | 
 9 | #include "enum_operator.hpp"
10 | #include "enums.hpp"
11 | #include "observer_ptr.hpp"
12 | #include <cstddef>
13 | 
14 | namespace lexertl
15 | {
16 |     template<typename sm, typename char_state_machine>
17 |     void sm_to_csm(const sm& sm_, char_state_machine& csm_)
18 |     {
19 |         using id_type = typename sm::traits::id_type;
20 |         using internals = typename sm::internals;
21 |         using string_token = typename char_state_machine::state::string_token;
22 |         using index_type = typename string_token::index_type;
23 |         using string_token_vector =
24 |             typename char_state_machine::string_token_vector;
25 |         const internals& internals_ = sm_.data();
26 |         const std::size_t dfas_ = internals_._dfa.size();
27 | 
28 |         for (id_type i_ = 0; i_ < dfas_; ++i_)
29 |         {
30 |             if (internals_._dfa_alphabet[i_] == 0) continue;
31 | 
32 |             const std::size_t alphabet_ = internals_._dfa_alphabet[i_] -
33 |                 *state_index::transitions;
34 |             string_token_vector token_vector_(alphabet_, string_token());
35 |             observer_ptr<const id_type> ptr_ = &internals_._lookup[i_].front();
36 | 
37 |             for (std::size_t c_ = 0; c_ < 256; ++c_, ++ptr_)
38 |             {
39 |                 if (*ptr_ >= *state_index::transitions)
40 |                 {
41 |                     string_token& token_ = token_vector_
42 |                         [*ptr_ - *state_index::transitions];
43 | 
44 |                     token_.insert(typename string_token::range
45 |                     (index_type(c_), index_type(c_)));
46 |                 }
47 |             }
48 | 
49 |             csm_.append(token_vector_, internals_, i_);
50 |         }
51 |     }
52 | }
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/partition/charset.hpp:
--------------------------------------------------------------------------------
 1 | // charset.hpp
 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #ifndef LEXERTL_CHARSET_HPP
 8 | #define LEXERTL_CHARSET_HPP
 9 | 
10 | #include <algorithm>
11 | #include <iterator>
12 | #include <set>
13 | #include "../string_token.hpp"
14 | 
15 | namespace lexertl
16 | {
17 |     namespace detail
18 |     {
19 |         template<typename char_type, typename id_type>
20 |         struct basic_charset
21 |         {
22 |             using token = basic_string_token<char_type>;
23 |             using index_set = std::set<id_type>;
24 | 
25 |             token _token;
26 |             index_set _index_set;
27 | 
28 |             basic_charset() = default;
29 | 
30 |             basic_charset(const token& token_, const id_type index_) :
31 |                 _token(token_)
32 |             {
33 |                 _index_set.insert(index_);
34 |             }
35 | 
36 |             bool empty() const
37 |             {
38 |                 return _token.empty() && _index_set.empty();
39 |             }
40 | 
41 |             void intersect(basic_charset& rhs_, basic_charset& overlap_)
42 |             {
43 |                 _token.intersect(rhs_._token, overlap_._token);
44 | 
45 |                 if (!overlap_._token.empty())
46 |                 {
47 |                     std::merge(_index_set.begin(), _index_set.end(),
48 |                         rhs_._index_set.begin(), rhs_._index_set.end(),
49 |                         std::inserter(overlap_._index_set,
50 |                             overlap_._index_set.end()));
51 | 
52 |                     if (_token.empty())
53 |                     {
54 |                         _index_set.clear();
55 |                     }
56 | 
57 |                     if (rhs_._token.empty())
58 |                     {
59 |                         rhs_._index_set.clear();
60 |                     }
61 |                 }
62 |             }
63 |         };
64 |     }
65 | }
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/bench/parse_str.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | include_once "parse_str.impl.php";
 4 | 
 5 | //$in = "hello=10";
 6 | //$in = "hello=10&world=20";
 7 | //$in = "hello=10&world[]=20";
 8 | //$in = "hello=10&world[my]=20";
 9 | //$in = "first=value&arr[0]=foo+bar&arr[]=baz";
10 | //$in = "first=value&arr[0]=foo+bar&arr[]=baz&arr[chk]=";
11 | $in = "hello[bbb][][]=30&world[][]=37&hello[bbb][]=7&foo=10&bar[]=abc+def&empty=";
12 | $in = rtrim(str_repeat($in . "&", 160), "&");
13 | 
14 | $out0 = array();
15 | $startTime = microtime(true);
16 | Parle\parse_str($in, $out0);
17 | $endTime = microtime(true);
18 | echo 'Took ', (($endTime - $startTime) * 1000), ' milliseconds with parle', "\n";
19 | 
20 | $out1 = array();
21 | $startTime = microtime(true);
22 | \parse_str($in, $out1);
23 | $endTime = microtime(true);
24 | echo 'Took ', (($endTime - $startTime) * 1000), ' milliseconds with native parse_str', "\n";
25 | 
26 | //var_dump($out0);
27 | //var_dump($out1);
28 | 
29 | /* Check same with the random input. */
30 | 
31 | function rand_in(bool $for_key = false, $len = -1)
32 | {
33 | 	$len = $len < 0 ? rand(8, 16) : $len;
34 | 
35 | 	$src = range("a", "z");
36 | 	$src = array_merge($src, range(0, 9));
37 | 	if (!$for_key) {
38 | 		$src = array_merge($src, array("+", "-"));
39 | 	}
40 | 
41 | 	$ret = "";
42 | 	for ($i = 0; $i < $len; ++$i) {
43 | 	    $ret .= $src[mt_rand(0, count($src) - 1)];
44 | 	}
45 | 
46 | 	return $ret;
47 | }
48 | 
49 | $in = rand_in(true) . "[" . rand_in(true) . "][][]=" . rand_in()
50 | 	. "&" . rand_in(true) . "[][]=" . rand_in()
51 | 	. "&" . rand_in(true) . "[" . rand_in(true) . "][]=" . rand_in()
52 |         . "&" . rand_in(true) . "=" . rand_in()
53 | 	. "&" . rand_in(true) . "[]=" . rand_in()
54 | 	. "&" . rand_in(true) . "=";
55 | $in = rtrim(str_repeat($in . "&", 160), "&");
56 | 
57 | $out0 = array();
58 | $startTime = microtime(true);
59 | Parle\parse_str($in, $out0);
60 | $endTime = microtime(true);
61 | echo 'Took ', (($endTime - $startTime) * 1000), ' milliseconds on random input with parle', "\n";
62 | 
63 | $out1 = array();
64 | $startTime = microtime(true);
65 | \parse_str($in, $out1);
66 | $endTime = microtime(true);
67 | echo 'Took ', (($endTime - $startTime) * 1000), ' milliseconds on random input with native parse_str', "\n";
68 | 
69 | //var_dump($out0);
70 | //var_dump($out1);
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/tests/lexer_004.phpt:
--------------------------------------------------------------------------------
  1 | --TEST--
  2 | Restartable lexing
  3 | --SKIPIF--
  4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
  5 | --FILE--
  6 | <?php 
  7 | 
  8 | use Parle\RLexer;
  9 | use Parle\Token;
 10 | 
 11 | $lex = new Parle\RLexer;
 12 | $lex->push("can", 1);
 13 | $lex->push("^cmd$", 2);
 14 | $lex->push("^cmd", 3);
 15 | $lex->push("cmd$", 4);
 16 | $lex->push("[a-z]+", 50);
 17 | $lex->push("\\s+", 100);
 18 | 
 19 | $lex->build();
 20 | 
 21 | $s = "can\ncmd\na cmd\ncmd again\nanother cmd";
 22 | $lex->consume($s);
 23 | 
 24 | $lex->reset(4);
 25 | $lex->bol = true;
 26 | 
 27 | $lex->advance();
 28 | $tok = $lex->getToken();
 29 | 
 30 | while (Token::EOI != $tok->id) {
 31 | 	var_dump($tok);
 32 | 	$lex->advance();
 33 | 	$tok = $lex->getToken();
 34 | }
 35 | 
 36 | ?>
 37 | ==DONE==
 38 | --EXPECTF--
 39 | object(Parle\Token)#%d (2) {
 40 |   ["id"]=>
 41 |   int(2)
 42 |   ["value"]=>
 43 |   string(3) "cmd"
 44 | }
 45 | object(Parle\Token)#%d (2) {
 46 |   ["id"]=>
 47 |   int(100)
 48 |   ["value"]=>
 49 |   string(1) "
 50 | "
 51 | }
 52 | object(Parle\Token)#%d (2) {
 53 |   ["id"]=>
 54 |   int(50)
 55 |   ["value"]=>
 56 |   string(1) "a"
 57 | }
 58 | object(Parle\Token)#%d (2) {
 59 |   ["id"]=>
 60 |   int(100)
 61 |   ["value"]=>
 62 |   string(1) " "
 63 | }
 64 | object(Parle\Token)#%d (2) {
 65 |   ["id"]=>
 66 |   int(4)
 67 |   ["value"]=>
 68 |   string(3) "cmd"
 69 | }
 70 | object(Parle\Token)#%d (2) {
 71 |   ["id"]=>
 72 |   int(100)
 73 |   ["value"]=>
 74 |   string(1) "
 75 | "
 76 | }
 77 | object(Parle\Token)#%d (2) {
 78 |   ["id"]=>
 79 |   int(3)
 80 |   ["value"]=>
 81 |   string(3) "cmd"
 82 | }
 83 | object(Parle\Token)#%d (2) {
 84 |   ["id"]=>
 85 |   int(100)
 86 |   ["value"]=>
 87 |   string(1) " "
 88 | }
 89 | object(Parle\Token)#%d (2) {
 90 |   ["id"]=>
 91 |   int(50)
 92 |   ["value"]=>
 93 |   string(5) "again"
 94 | }
 95 | object(Parle\Token)#%d (2) {
 96 |   ["id"]=>
 97 |   int(100)
 98 |   ["value"]=>
 99 |   string(1) "
100 | "
101 | }
102 | object(Parle\Token)#%d (2) {
103 |   ["id"]=>
104 |   int(50)
105 |   ["value"]=>
106 |   string(7) "another"
107 | }
108 | object(Parle\Token)#%d (2) {
109 |   ["id"]=>
110 |   int(100)
111 |   ["value"]=>
112 |   string(1) " "
113 | }
114 | object(Parle\Token)#%d (2) {
115 |   ["id"]=>
116 |   int(4)
117 |   ["value"]=>
118 |   string(3) "cmd"
119 | }
120 | ==DONE==
121 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/internals.hpp:
--------------------------------------------------------------------------------
 1 | // internals.hpp
 2 | // Copyright (c) 2009-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_INTERNALS_HPP
 7 | #define LEXERTL_INTERNALS_HPP
 8 | 
 9 | #include "enum_operator.hpp"
10 | #include "enums.hpp"
11 | #include <memory>
12 | #include <vector>
13 | 
14 | namespace lexertl
15 | {
16 |     namespace detail
17 |     {
18 |         template<typename id_type>
19 |         struct basic_internals
20 |         {
21 |             using id_type_vector = std::vector<id_type>;
22 |             using id_type_vector_vector = std::vector<id_type_vector>;
23 | 
24 |             id_type _eoi = 0;
25 |             id_type_vector_vector _lookup;
26 |             id_type_vector _dfa_alphabet;
27 |             id_type _features = 0;
28 |             id_type_vector_vector _dfa;
29 | 
30 |             void clear()
31 |             {
32 |                 _eoi = 0;
33 |                 _lookup.clear();
34 |                 _dfa_alphabet.clear();
35 |                 _features = 0;
36 |                 _dfa.clear();
37 |             }
38 | 
39 |             bool empty() const
40 |             {
41 |                 return _dfa.empty();
42 |             }
43 | 
44 |             void add_states(const std::size_t num_)
45 |             {
46 |                 for (std::size_t index_ = 0; index_ < num_; ++index_)
47 |                 {
48 |                     // lookup *always* has a size 256 now.
49 |                     _lookup.push_back(id_type_vector(256,
50 |                         static_cast<id_type>(*state_index::dead_state)));
51 |                     _dfa_alphabet.push_back(0);
52 |                     _dfa.emplace_back();
53 |                 }
54 |             }
55 | 
56 |             void swap(basic_internals& internals_) noexcept
57 |             {
58 |                 std::swap(_eoi, internals_._eoi);
59 |                 _lookup.swap(internals_._lookup);
60 |                 _dfa_alphabet.swap(internals_._dfa_alphabet);
61 |                 std::swap(_features, internals_._features);
62 |                 _dfa.swap(internals_._dfa);
63 |             }
64 |         };
65 |     }
66 | }
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   build:
13 |     strategy:
14 |       matrix:
15 |         operating-system: ['ubuntu-22.04']
16 |         php-versions: ['7.4', '8.0', '8.1', '8.2']
17 |         utf32: [1, 0]
18 |         gcc-versions: ['9', '11', '12']
19 |     runs-on: ${{ matrix.operating-system }}
20 |     steps:
21 |       - uses: actions/checkout@v3
22 |       - uses: egor-tensin/setup-gcc@v1
23 |         with:
24 |           version: ${{ matrix.gcc-versions }}
25 |       - uses: shivammathur/setup-php@v2
26 |         with:
27 |           php-version: ${{ matrix.php-versions }}
28 |       - run: phpize
29 |       - run: |
30 |           if test ${{ matrix.utf32 }} = 1; then UTF32_OPT=--enable-parle-utf32; fi
31 |           CC=gcc-${{ matrix.gcc-versions }} CXX=g++-${{ matrix.gcc-versions }} ./configure $UTF32_OPT
32 |       - run: make
33 |       - run: make test TESTS="-P -q --show-diff"
34 |   pecl:
35 |     runs-on: ubuntu-latest
36 |     container: php:8.2-cli-alpine
37 |     steps:
38 |       - name: Install required system packages
39 |         run: apk add --update $PHPIZE_DEPS
40 |       - name: Checkout
41 |         uses: actions/checkout@v3
42 |       - name: Create temporary directory
43 |         id: temp-dir
44 |         run: printf "path=%s\n" "$(mktemp -d)" >>"$GITHUB_OUTPUT"
45 |       - name: Create package
46 |         run: |
47 |           cd "${{ steps.temp-dir.outputs.path }}"
48 |           pecl package "$GITHUB_WORKSPACE/package.xml"
49 |       - name: Compile package
50 |         run: printf '' | pecl install ${{ steps.temp-dir.outputs.path }}/parle-*.tgz
51 |       - name: Enable extension
52 |         run: docker-php-ext-enable parle
53 |       - name: Check for PHP startup warnings
54 |         run: |
55 |           php -d display_errors=stderr -d display_startup_errors=1 -d error_reporting=-1 -r ';' 2>/tmp/php-startup-warnings
56 |           if [ -s /tmp/php-startup-warnings ]; then
57 |             echo 'The PHP extension was successfully installed, but PHP raised these warnings:' >&2
58 |             cat /tmp/php-startup-warnings >&2
59 |             exit 1
60 |           fi
61 |           echo "PHP didn't raise any warnings at startup."
62 |       - name: Inspect extension
63 |         run: php --ri parle
64 | 


--------------------------------------------------------------------------------
/bench/phlexy_alike.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /*
 4 |   Mimic benchmarks from https://github.com/nikic/Phlexy 
 5 |   Care about patterns, not 100% matching with PCRE, same functionality.
 6 |   Also, including all the creation time, too.
 7 |  */
 8 | 
 9 | use Parle\Lexer;
10 | use Parle\RLexer;
11 | use Parle\Token;
12 | 
13 | // array(id, reg)
14 | $csvDefs = array(
15 | 	array(1, "[^\"\,\r\n]+"),
16 | 	array(2, "[\"][^\"]+[\"]"),
17 | 	array(3, "[,]"),
18 | 	array(4, "[\r]?[\n]"),
19 | );
20 | $alphabet = range('a', 'z');
21 | $alphabetDefs = array();
22 | foreach($alphabet as $c) {
23 | 	$alphabetDefs[] = array(ord($c), "[" . $c . "]");
24 | }
25 | 
26 | $cvsString = trim(str_repeat('hallo world,foo bar,more foo,more bar,"rare , escape",some more,stuff' . "\n", 5000));
27 | $allAString = str_repeat('a', 100000);
28 | $allZString = str_repeat('z', 20000);
29 | $randomString = '';
30 | for ($i = 0; $i < 50000; ++$i) {
31 |     $randomString .= $alphabet[mt_rand(0, count($alphabet) - 1)];
32 | }
33 | 
34 | echo 'Timing lexing of CSV data:', "\n";
35 | $lex = new Lexer;
36 | testLexingPerformance($lex, $csvDefs, $cvsString);
37 | $lex = new RLexer;
38 | testLexingPerformance($lex, $csvDefs, $cvsString);
39 | echo "\n";
40 | 
41 | 
42 | echo 'Timing alphabet lexing of all "a":', "\n";
43 | $lex = new Lexer;
44 | testLexingPerformance($lex, $alphabetDefs, $allAString);
45 | $lex = new RLexer;
46 | testLexingPerformance($lex, $alphabetDefs, $allAString);
47 | echo "\n";
48 | 
49 | 
50 | echo 'Timing alphabet lexing of all "z":', "\n";
51 | $lex = new Lexer;
52 | testLexingPerformance($lex, $alphabetDefs, $allZString);
53 | $lex = new RLexer;
54 | testLexingPerformance($lex, $alphabetDefs, $allZString);
55 | echo "\n";
56 | 
57 | 
58 | echo 'Timing alphabet lexing of random string:', "\n";
59 | $lex = new Lexer;
60 | testLexingPerformance($lex, $alphabetDefs, $randomString);
61 | $lex = new RLexer;
62 | testLexingPerformance($lex, $alphabetDefs, $randomString);
63 | echo "\n";
64 | 
65 | function testLexingPerformance($lex, $defs, $in)
66 | {
67 | 	$startTime = microtime(true);
68 | 
69 | 	foreach($defs as $d) {
70 | 		$lex->push($d[1], $d[0]);
71 | 	}
72 | 	$lex->build();
73 | 	$lex->consume($in);
74 | 
75 | 	do {
76 | 		$lex->advance();
77 | 		$tok = $lex->getToken();
78 | //		var_dump($tok);
79 | 	} while (Token::EOI != $tok->id);
80 | 
81 | 	$endTime = microtime(true);
82 | 
83 | 	echo 'Took ', $endTime - $startTime, ' seconds (', get_class($lex), ')', "\n";
84 | }
85 | 


--------------------------------------------------------------------------------
/tests/calc_001.phpt:
--------------------------------------------------------------------------------
 1 | --TEST--
 2 | Simple stackless calc
 3 | --SKIPIF--
 4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
 5 | --FILE--
 6 | <?php 
 7 | 
 8 | use Parle\RParser;
 9 | use Parle\ParserException;
10 | use Parle\RLexer;
11 | use Parle\Token;
12 | 
13 | $p = new RParser;
14 | 
15 | $p->token("INTEGER");
16 | $p->left("'+' '-'");
17 | $p->left("'*' '/'");
18 | 
19 | $p->push("start", "exp");
20 | $add_idx = $p->push("exp", "exp '+' exp");
21 | $sub_idx = $p->push("exp", "exp '-' exp");
22 | $mul_idx = $p->push("exp", "exp '*' exp");
23 | $div_idx = $p->push("exp", "exp '/' exp");
24 | $int_idx = $p->push("exp", "INTEGER");
25 | 
26 | $p->build();
27 | 
28 | $lex = new RLexer;
29 | $lex->push("[+]", $p->tokenId("'+'"));
30 | $lex->push("[-]", $p->tokenId("'-'"));
31 | $lex->push("[*]", $p->tokenId("'*'"));
32 | $lex->push("[/]", $p->tokenId("'/'"));
33 | $lex->push("\\d+", $p->tokenId("INTEGER"));
34 | $lex->push("\\s+", Token::SKIP);
35 | 
36 | $lex->build();
37 | 
38 | $exp = array(
39 | 	"1 + 1",
40 | 	"33 / 10",
41 | 	"100 * 45",
42 | 	"17 - 45",
43 | );
44 | 
45 | foreach ($exp as $in) {
46 | 	if (!$p->validate($in, $lex)) {
47 | 		throw new ParserException("Failed to validate input");
48 | 	}
49 | 
50 | 	$p->consume($in, $lex);
51 | 
52 | 	while (RParser::ACTION_ERROR != $p->action && RParser::ACTION_ACCEPT != $p->action) {
53 | 		switch ($p->action) {
54 | 			case RParser::ACTION_ERROR:
55 | 				throw new ParserException("Parser error");
56 | 				break;
57 | 			case RParser::ACTION_SHIFT:
58 | 			case RParser::ACTION_GOTO:
59 | 			case RParser::ACTION_ACCEPT:
60 | 				break;
61 | 			case RParser::ACTION_REDUCE:
62 | 				switch ($p->reduceId) {
63 | 					case $add_idx:
64 | 						$l = $p->sigil(0);
65 | 						$r = $p->sigil(2);
66 | 						echo "$l + $r = " . ($l + $r) . "\n";
67 | 						break;
68 | 					case $sub_idx:
69 | 						$l = $p->sigil(0);
70 | 						$r = $p->sigil(2);
71 | 						echo "$l - $r = " . ($l - $r) . "\n";
72 | 						break;
73 | 					case $mul_idx:
74 | 						$l = $p->sigil(0);
75 | 						$r = $p->sigil(2);
76 | 						echo "$l * $r = " . ($l * $r) . "\n";
77 | 						break;
78 | 					case $div_idx:
79 | 						$l = $p->sigil(0);
80 | 						$r = $p->sigil(2);
81 | 						echo "$l / $r = " . ($l / $r) . "\n";
82 | 						break;
83 | 				}
84 | 			break;
85 | 		}
86 | 		$p->advance();
87 | 	}
88 | }
89 | 
90 | ?>
91 | ==DONE==
92 | --EXPECT--
93 | 1 + 1 = 2
94 | 33 / 10 = 3.3
95 | 100 * 45 = 4500
96 | 17 - 45 = -28
97 | ==DONE==
98 | 


--------------------------------------------------------------------------------
/lib/parle/cvt.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022 Anatol Belski
 3 |  * All rights reserved.
 4 |  *
 5 |  * Author: Anatol Belski <ab@php.net>
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions
 9 |  * are met:
10 |  * 1. Redistributions of source code must retain the above copyright
11 |  *	notice, this list of conditions and the following disclaimer.
12 |  * 2. Redistributions in binary form must reproduce the above copyright
13 |  *	notice, this list of conditions and the following disclaimer in the
14 |  *	documentation and/or other materials provided with the distribution.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 |  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 |  * SUCH DAMAGE.
27 |  *
28 |  */
29 | 
30 | /* $Id$ */
31 | 
32 | 
33 | #ifndef PARLE_OSTREAM_HPP
34 | #define PARLE_OSTREAM_HPP
35 | 
36 | #if PARLE_U32
37 | #include <locale>
38 | #include <codecvt>
39 | 
40 | namespace parle
41 | {
42 | #ifndef ZTS
43 | 	static std::wstring_convert<std::codecvt_utf8<parle::char_type>, parle::char_type> cvt;
44 | #else
45 | 	static thread_local std::wstring_convert<std::codecvt_utf8<parle::char_type>, parle::char_type> cvt;
46 | #endif
47 | }
48 | 
49 | #define PARLE_CVT_U32(sptr) parle::cvt.from_bytes(sptr).c_str()
50 | #define PARLE_SCVT_U32(s) parle::cvt.from_bytes(s)
51 | #if defined(_MSC_VER)
52 | #define PARLE_PRE_U32(ca) PARLE_SCVT_U32(ca)
53 | #else
54 | #define PARLE_PRE_U32(ca) U ## ca
55 | #endif
56 | #define PARLE_CVT_U8(sptr) parle::cvt.to_bytes(sptr).c_str()
57 | #define PARLE_SCVT_U8(s) parle::cvt.to_bytes(s)
58 | #else
59 | #define PARLE_CVT_U32(sptr) sptr
60 | #define PARLE_SCVT_U32(s) s
61 | #define PARLE_PRE_U32(ca) ca
62 | #define PARLE_CVT_U8(sptr) sptr
63 | #define PARLE_SCVT_U8(s) s
64 | #endif
65 | 
66 | #endif /* PARLE_PHP_OSTREAM_CPP */
67 |  
68 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/match.hpp:
--------------------------------------------------------------------------------
 1 | // match.hpp
 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_MATCH_HPP
 7 | #define PARSERTL_MATCH_HPP
 8 | 
 9 | #include "lookup.hpp"
10 | #include "parse.hpp"
11 | 
12 | namespace parsertl
13 | {
14 |     // Parse entire sequence and return boolean
15 |     template<typename lexer_iterator, typename sm_type>
16 |     bool match(lexer_iterator iter_, const sm_type& sm_)
17 |     {
18 |         basic_match_results<sm_type> results_(iter_->id, sm_);
19 | 
20 |         return parse(iter_, sm_, results_);
21 |     }
22 | 
23 |     template<typename lexer_iterator, typename sm_type, typename captures>
24 |     bool match(lexer_iterator iter_, const sm_type& sm_, captures& captures_)
25 |     {
26 |         basic_match_results<sm_type> results_(iter_->id, sm_);
27 |         // Qualify token to prevent arg dependant lookup
28 |         using token = parsertl::token<lexer_iterator>;
29 |         typename token::token_vector productions_;
30 | 
31 |         captures_.clear();
32 |         captures_.resize(sm_._captures.back().first +
33 |             sm_._captures.back().second.size() + 1);
34 |         captures_[0].emplace_back(iter_->first, iter_->second);
35 | 
36 |         while (results_.entry.action != action::error &&
37 |             results_.entry.action != action::accept)
38 |         {
39 |             if (results_.entry.action == action::reduce)
40 |             {
41 |                 const auto& row_ = sm_._captures[results_.entry.param];
42 | 
43 |                 if (!row_.second.empty())
44 |                 {
45 |                     std::size_t index_ = 0;
46 | 
47 |                     for (const auto& pair_ : row_.second)
48 |                     {
49 |                         const auto& token1_ = results_.
50 |                             dollar(pair_.first, sm_, productions_);
51 |                         const auto& token2_ = results_.
52 |                             dollar(pair_.second, sm_, productions_);
53 |                         auto& entry_ = captures_[row_.first + index_ + 1];
54 | 
55 |                         entry_.emplace_back(token1_.first, token2_.second);
56 |                         ++index_;
57 |                     }
58 |                 }
59 |             }
60 | 
61 |             lookup(iter_, sm_, results_, productions_);
62 |         }
63 | 
64 |         return results_.entry.action == action::accept;
65 |     }
66 | }
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/php_parle.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022 Anatol Belski
 3 |  * All rights reserved.
 4 |  *
 5 |  * Author: Anatol Belski <ab@php.net>
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions
 9 |  * are met:
10 |  * 1. Redistributions of source code must retain the above copyright
11 |  *	notice, this list of conditions and the following disclaimer.
12 |  * 2. Redistributions in binary form must reproduce the above copyright
13 |  *	notice, this list of conditions and the following disclaimer in the
14 |  *	documentation and/or other materials provided with the distribution.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 |  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 |  * SUCH DAMAGE.
27 |  *
28 |  */
29 | 
30 | /* $Id$ */
31 | 
32 | #ifndef PHP_PARLE_H
33 | #define PHP_PARLE_H
34 | 
35 | extern zend_module_entry parle_module_entry;
36 | #define phpext_parle_ptr &parle_module_entry
37 | 
38 | #define PHP_PARLE_VERSION "0.8.6-dev"
39 | 
40 | #ifdef PHP_WIN32
41 | #	define PHP_PARLE_API __declspec(dllexport)
42 | #elif defined(__GNUC__) && __GNUC__ >= 4
43 | #	define PHP_PARLE_API __attribute__ ((visibility("default")))
44 | #else
45 | #	define PHP_PARLE_API
46 | #endif
47 | 
48 | #ifdef ZTS
49 | #include "TSRM.h"
50 | #endif
51 | 
52 | /*
53 |   	Declare any global variables you may need between the BEGIN
54 | 	and END macros here:
55 | 
56 | ZEND_BEGIN_MODULE_GLOBALS(parle)
57 | 	zend_long  global_value;
58 | 	char *global_string;
59 | ZEND_END_MODULE_GLOBALS(parle)
60 | */
61 | 
62 | /* Always refer to the globals in your function as PARLE_G(variable).
63 |    You are encouraged to rename these macros something shorter, see
64 |    examples in any other php module directory.
65 | */
66 | #define PARLE_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(parle, v)
67 | 
68 | #if defined(ZTS) && defined(COMPILE_DL_PARLE)
69 | ZEND_TSRMLS_CACHE_EXTERN()
70 | #endif
71 | 
72 | #endif	/* PHP_PARLE_H */
73 | 
74 | 
75 | /*
76 |  * Local variables:
77 |  * tab-width: 4
78 |  * c-basic-offset: 4
79 |  * End:
80 |  * vim600: noet sw=4 ts=4 fdm=marker
81 |  * vim<600: noet sw=4 ts=4
82 |  */
83 | 


--------------------------------------------------------------------------------
/tests/lexer_position_tracking_001.phpt:
--------------------------------------------------------------------------------
  1 | --TEST--
  2 | Lexer functionality while it's used by parser
  3 | --SKIPIF--
  4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
  5 | --FILE--
  6 | <?php
  7 | 
  8 | use Parle\{Parser, Lexer, Token, ParserException};
  9 | 
 10 | $par = new Parser;
 11 | $par->token("NEWLINE");
 12 | $par->token("LETTER");
 13 | $par->token("' '");
 14 | $par->push("START", "LETTERS");
 15 | $prod_0 = $par->push("LETTERS", "LETTER | NEWLINE");
 16 | $prod_1 = $par->push("LETTERS", "LETTERS LETTER");
 17 | $par->push("LETTERS", "LETTERS NEWLINE");
 18 | $par->build();
 19 | 
 20 | 
 21 | $lex = new Lexer;
 22 | $lex->push("[a-z]", $par->tokenId("LETTER"));
 23 | $lex->push("[\r]?[\n]", $par->tokenId("NEWLINE"));
 24 | $lex->build();
 25 | 
 26 | $in = "abc\ndef\r\nghf\nxy\\z";
 27 | $par->consume($in, $lex);
 28 | 
 29 | 
 30 | do {
 31 | 	switch ($par->action) {
 32 | 	case Parser::ACTION_ERROR:
 33 | 		$i = $par->errorInfo();
 34 | 		var_dump($i, $lex, $lex->getToken(), substr($in, $lex->marker, $lex->cursor - $lex->marker));
 35 | 		throw new ParserException("Error");
 36 | 		break;
 37 | 	/*case Parser::ACTION_GOTO:
 38 | 		echo "Trace: '", $par->trace(), "', token: '", $lex->getToken()->value, "'", PHP_EOL;
 39 | 		break;
 40 | 	case Parser::ACTION_SHIFT:
 41 | 		echo "Trace: '", $par->trace(), "', token: '", $lex->getToken()->value, "'", PHP_EOL;
 42 | 		break;*/
 43 | 	case Parser::ACTION_REDUCE:
 44 | 		//echo "Trace: ", $par->trace(), PHP_EOL;
 45 | 		switch ($par->reduceId) {
 46 | 			case $prod_0;
 47 | 				echo " Match: '", $par->sigil(0), "', token: '", substr($in, $lex->marker, $lex->cursor - $lex->marker), "'", PHP_EOL;
 48 | 				break;
 49 | 			case $prod_1;
 50 | 				echo " Match: '", $par->sigil(1), "', token: '", substr($in, $lex->marker, $lex->cursor - $lex->marker), "'", PHP_EOL;
 51 | 				break;
 52 | 		}
 53 | 		break;
 54 | 	}
 55 | 	$par->advance();
 56 | } while (Parser::ACTION_ACCEPT != $par->action);
 57 | 
 58 | ?>
 59 | ==DONE==
 60 | --EXPECTF--
 61 | Match: 'a', token: 'b'
 62 |  Match: 'b', token: 'c'
 63 |  Match: 'c', token: '
 64 | '
 65 |  Match: 'd', token: 'e'
 66 |  Match: 'e', token: 'f'
 67 |  Match: 'f', token: '
 68 | '
 69 |  Match: 'g', token: 'h'
 70 |  Match: 'h', token: 'f'
 71 |  Match: 'f', token: '
 72 | '
 73 |  Match: 'x', token: 'y'
 74 | object(Parle\ErrorInfo)#%d (3) {
 75 |   ["id"]=>
 76 |   int(2)
 77 |   ["position"]=>
 78 |   int(15)
 79 |   ["token"]=>
 80 |   object(Parle\Token)#4 (2) {
 81 |     ["id"]=>
 82 |     int(65535)
 83 |     ["value"]=>
 84 |     string(1) "\"
 85 |   }
 86 | }
 87 | object(Parle\Lexer)#%d (7) {
 88 |   ["bol"]=>
 89 |   bool(false)
 90 |   ["flags"]=>
 91 |   int(6)
 92 |   ["state"]=>
 93 |   int(0)
 94 |   ["marker"]=>
 95 |   int(15)
 96 |   ["cursor"]=>
 97 |   int(16)
 98 |   ["line"]=>
 99 |   int(2)
100 |   ["coulmn"]=>
101 |   int(2)
102 | }
103 | object(Parle\Token)#%d (2) {
104 |   ["id"]=>
105 |   int(65535)
106 |   ["value"]=>
107 |   string(1) "\"
108 | }
109 | string(1) "\"
110 | 
111 | Fatal error: Uncaught Parle\ParserException: Error in %s:%d
112 | Stack trace:
113 | #0 {main}
114 |   thrown in %s on line %d
115 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/parse.hpp:
--------------------------------------------------------------------------------
 1 | // parse.hpp
 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef PARSERTL_PARSE_HPP
 7 | #define PARSERTL_PARSE_HPP
 8 | 
 9 | #include "match_results.hpp"
10 | #include <vector>
11 | 
12 | namespace parsertl
13 | {
14 |     // Parse entire sequence and return boolean
15 |     template<typename lexer_iterator, typename sm_type>
16 |     bool parse(lexer_iterator& iter_, const sm_type& sm_,
17 |         basic_match_results<sm_type>& results_)
18 |     {
19 |         while (results_.entry.action != action::error)
20 |         {
21 |             switch (results_.entry.action)
22 |             {
23 |             case action::shift:
24 |                 results_.stack.push_back(results_.entry.param);
25 | 
26 |                 if (iter_->id != 0)
27 |                     ++iter_;
28 | 
29 |                 results_.token_id = iter_->id;
30 | 
31 |                 if (results_.token_id == lexer_iterator::value_type::npos())
32 |                 {
33 |                     results_.entry.action = action::error;
34 |                     results_.entry.param =
35 |                         static_cast<typename sm_type::id_type>
36 |                         (error_type::unknown_token);
37 |                 }
38 |                 else
39 |                 {
40 |                     results_.entry =
41 |                         sm_.at(results_.stack.back(), results_.token_id);
42 |                 }
43 | 
44 |                 break;
45 |             case action::reduce:
46 |             {
47 |                 const std::size_t size_ =
48 |                     sm_._rules[results_.entry.param].second.size();
49 | 
50 |                 if (size_)
51 |                 {
52 |                     results_.stack.resize(results_.stack.size() - size_);
53 |                 }
54 | 
55 |                 results_.token_id = sm_._rules[results_.entry.param].first;
56 |                 results_.entry =
57 |                     sm_.at(results_.stack.back(), results_.token_id);
58 |                 break;
59 |             }
60 |             case action::go_to:
61 |                 results_.stack.push_back(results_.entry.param);
62 |                 results_.token_id = iter_->id;
63 |                 results_.entry =
64 |                     sm_.at(results_.stack.back(), results_.token_id);
65 |                 break;
66 |             default:
67 |                 // accept
68 |                 // error
69 |                 break;
70 |             }
71 | 
72 |             if (results_.entry.action == action::accept)
73 |             {
74 |                 const std::size_t size_ =
75 |                     sm_._rules[results_.entry.param].second.size();
76 | 
77 |                 if (size_)
78 |                 {
79 |                     results_.stack.resize(results_.stack.size() - size_);
80 |                 }
81 | 
82 |                 break;
83 |             }
84 |         }
85 | 
86 |         return results_.entry.action == action::accept;
87 |     }
88 | }
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/tests/calc_002.phpt:
--------------------------------------------------------------------------------
  1 | --TEST--
  2 | Advanced calc with state
  3 | --SKIPIF--
  4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
  5 | --FILE--
  6 | <?php 
  7 | 
  8 | use Parle\Parser;
  9 | use Parle\Stack;
 10 | use Parle\ParserException;
 11 | use Parle\Lexer;
 12 | use Parle\Token;
 13 | 
 14 | $p = new Parser;
 15 | $p->token("INTEGER");
 16 | $p->left("'+' '-'");
 17 | $p->left("'*' '/'");
 18 | $p->precedence("NEGATE");
 19 | $p->right("'^'");
 20 | 
 21 | $p->push("start", "exp");
 22 | $add_idx = $p->push("exp", "exp '+' exp");
 23 | $sub_idx = $p->push("exp", "exp '-' exp");
 24 | $mul_idx = $p->push("exp", "exp '*' exp");
 25 | $div_idx = $p->push("exp", "exp '/' exp");
 26 | $p->push("exp", "'(' exp ')'");
 27 | $neg_idx = $p->push("exp", "'-' exp %prec NEGATE");
 28 | $exp_idx = $p->push("exp", "exp '^' exp");
 29 | $int_idx = $p->push("exp", "INTEGER");
 30 | 
 31 | $p->build();
 32 | 
 33 | $lex = new Lexer;
 34 | $lex->push("[+]", $p->tokenId("'+'"));
 35 | $lex->push("[-]", $p->tokenId("'-'"));
 36 | $lex->push("[*]", $p->tokenId("'*'"));
 37 | $lex->push("[\\^]", $p->tokenId("'^'"));
 38 | $lex->push("[/]", $p->tokenId("'/'"));
 39 | $lex->push("\\d+", $p->tokenId("INTEGER"));
 40 | $lex->push("[(]", $p->tokenId("'('"));
 41 | $lex->push("[)]", $p->tokenId("')'"));
 42 | $lex->push("\\s+", Token::SKIP);
 43 | 
 44 | $lex->build();
 45 | 
 46 | $exp = array(
 47 | 	"1 + 2^4",
 48 | 	"33 / (10 + 1)",
 49 | 	"100 * 45 / 10",
 50 | 	"55 - 10*5",
 51 | 	"10 - -4",
 52 | 	"10000000^0 + 10 - 3^2",
 53 | );
 54 | 
 55 | foreach ($exp as $in) {
 56 | 	if (!$p->validate($in, $lex)) {
 57 | 		throw new ParserException("Failed to validate input");
 58 | 	}
 59 | 
 60 | 	$p->consume($in, $lex);
 61 | 
 62 | 	$stack = new Stack;
 63 | 
 64 | 	while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
 65 | 		switch ($p->action) {
 66 | 			case Parser::ACTION_ERROR:
 67 | 				throw new ParserException("Parser error");
 68 | 				break;
 69 | 			case Parser::ACTION_SHIFT:
 70 | 			case Parser::ACTION_GOTO:
 71 | 			case Parser::ACTION_ACCEPT:
 72 | 				break;
 73 | 			case Parser::ACTION_REDUCE:
 74 | 				switch ($p->reduceId) {
 75 | 					case $add_idx:
 76 | 						$op0 = $stack->top;
 77 | 						$stack->pop();
 78 | 						$stack->top += $op0;
 79 | 						break;
 80 | 					case $sub_idx:
 81 | 						$op0 = $stack->top;
 82 | 						$stack->pop();
 83 | 						$stack->top -= $op0;
 84 | 						break;
 85 | 					case $mul_idx:
 86 | 						$op0 = $stack->top;
 87 | 						$stack->pop();
 88 | 						$stack->top *= $op0;
 89 | 						break;
 90 | 					case $div_idx:
 91 | 						$op0 = $stack->top;
 92 | 						$stack->pop();
 93 | 						$stack->top /= $op0;
 94 | 						break;
 95 | 					case $exp_idx:
 96 | 						$op0 = $stack->top;
 97 | 						$stack->pop();
 98 | 						$stack->top = $stack->top ** $op0;
 99 | 						break;
100 | 					case $neg_idx:
101 | 						$stack->top = -$stack->top;
102 | 						break;
103 | 					case $int_idx:
104 | 						$i = (int)$p->sigil();
105 | 						$stack->push($i);
106 | 						break;
107 | 				}
108 | 
109 | 			break;
110 | 		}
111 | 		$p->advance();
112 | 	}
113 | 	echo "$in = " . $stack->top . "\n";
114 | }
115 | 
116 | ?>
117 | ==DONE==
118 | --EXPECT--
119 | 1 + 2^4 = 17
120 | 33 / (10 + 1) = 3
121 | 100 * 45 / 10 = 450
122 | 55 - 10*5 = 5
123 | 10 - -4 = 14
124 | 10000000^0 + 10 - 3^2 = 2
125 | ==DONE==
126 | 


--------------------------------------------------------------------------------
/tests/calc_003.phpt:
--------------------------------------------------------------------------------
  1 | --TEST--
  2 | Advanced calc with state
  3 | --SKIPIF--
  4 | <?php if (!extension_loaded("parle")) print "skip"; ?>
  5 | --FILE--
  6 | <?php 
  7 | 
  8 | use Parle\Parser;
  9 | use Parle\Stack;
 10 | use Parle\ParserException;
 11 | use Parle\Lexer;
 12 | use Parle\Token;
 13 | 
 14 | $p = new Parser;
 15 | $p->token("INTEGER");
 16 | $p->left("'+' '-'");
 17 | $p->left("'*' '/'");
 18 | $p->precedence("NEGATE");
 19 | $p->right("'^'");
 20 | 
 21 | $p->push("start", "exp");
 22 | $add_idx = $p->push("exp", "exp '+' exp");
 23 | $sub_idx = $p->push("exp", "exp '-' exp");
 24 | $mul_idx = $p->push("exp", "exp '*' exp");
 25 | $div_idx = $p->push("exp", "exp '/' exp");
 26 | $p->push("exp", "'(' exp ')'");
 27 | $neg_idx = $p->push("exp", "'-' exp %prec NEGATE");
 28 | $exp_idx = $p->push("exp", "exp '^' exp");
 29 | $int_idx = $p->push("exp", "INTEGER");
 30 | 
 31 | $p->build();
 32 | 
 33 | $lex = new Lexer;
 34 | $lex->push("[+]", $p->tokenId("'+'"));
 35 | $lex->push("[-]", $p->tokenId("'-'"));
 36 | $lex->push("[*]", $p->tokenId("'*'"));
 37 | $lex->push("[\\^]", $p->tokenId("'^'"));
 38 | $lex->push("[/]", $p->tokenId("'/'"));
 39 | $lex->push("\\d+", $p->tokenId("INTEGER"));
 40 | $lex->push("[(]", $p->tokenId("'('"));
 41 | $lex->push("[)]", $p->tokenId("')'"));
 42 | $lex->push("\\s+", 42);
 43 | $lex->callout(42, function () use ($lex) {
 44 | 	do {
 45 | 		$lex->advance();
 46 | 		$tok = $lex->getToken();
 47 | 	} while (42 == $tok->id);
 48 | });
 49 | 
 50 | $lex->build();
 51 | 
 52 | $exp = array(
 53 | 	"1 + 2^4",
 54 | 	"33 / (10 + 1)",
 55 | 	"100 * 45 / 10",
 56 | 	"55 - 10*5",
 57 | 	"10 - -4",
 58 | 	"10000000^0 + 10 - 3^2",
 59 | );
 60 | 
 61 | foreach ($exp as $in) {
 62 | 	if (!$p->validate($in, $lex)) {
 63 | 		throw new ParserException("Failed to validate input");
 64 | 	}
 65 | 
 66 | 	$p->consume($in, $lex);
 67 | 
 68 | 	$stack = new Stack;
 69 | 
 70 | 	while (Parser::ACTION_ERROR != $p->action && Parser::ACTION_ACCEPT != $p->action) {
 71 | 		switch ($p->action) {
 72 | 			case Parser::ACTION_ERROR:
 73 | 				throw new ParserException("Parser error");
 74 | 				break;
 75 | 			case Parser::ACTION_SHIFT:
 76 | 			case Parser::ACTION_GOTO:
 77 | 			case Parser::ACTION_ACCEPT:
 78 | 				break;
 79 | 			case Parser::ACTION_REDUCE:
 80 | 				switch ($p->reduceId) {
 81 | 					case $add_idx:
 82 | 						$op0 = $stack->top;
 83 | 						$stack->pop();
 84 | 						$stack->top += $op0;
 85 | 						break;
 86 | 					case $sub_idx:
 87 | 						$op0 = $stack->top;
 88 | 						$stack->pop();
 89 | 						$stack->top -= $op0;
 90 | 						break;
 91 | 					case $mul_idx:
 92 | 						$op0 = $stack->top;
 93 | 						$stack->pop();
 94 | 						$stack->top *= $op0;
 95 | 						break;
 96 | 					case $div_idx:
 97 | 						$op0 = $stack->top;
 98 | 						$stack->pop();
 99 | 						$stack->top /= $op0;
100 | 						break;
101 | 					case $exp_idx:
102 | 						$op0 = $stack->top;
103 | 						$stack->pop();
104 | 						$stack->top = $stack->top ** $op0;
105 | 						break;
106 | 					case $neg_idx:
107 | 						$stack->top = -$stack->top;
108 | 						break;
109 | 					case $int_idx:
110 | 						$i = (int)$p->sigil();
111 | 						$stack->push($i);
112 | 						break;
113 | 				}
114 | 
115 | 			break;
116 | 		}
117 | 		$p->advance();
118 | 	}
119 | 	echo "$in = " . $stack->top . "\n";
120 | }
121 | 
122 | ?>
123 | ==DONE==
124 | --EXPECT--
125 | 1 + 2^4 = 17
126 | 33 / (10 + 1) = 3
127 | 100 * 45 / 10 = 450
128 | 55 - 10*5 = 5
129 | 10 - -4 = 14
130 | 10000000^0 + 10 - 3^2 = 2
131 | ==DONE==
132 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tree/iteration_node.hpp:
--------------------------------------------------------------------------------
 1 | // iteration_node.hpp
 2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
 3 | //
 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 6 | #ifndef LEXERTL_ITERATION_NODE_HPP
 7 | #define LEXERTL_ITERATION_NODE_HPP
 8 | 
 9 | #include "node.hpp"
10 | 
11 | namespace lexertl
12 | {
13 |     namespace detail
14 |     {
15 |         template<typename id_type>
16 |         class basic_iteration_node : public basic_node<id_type>
17 |         {
18 |         public:
19 |             using node = basic_node<id_type>;
20 |             using bool_stack = typename node::bool_stack;
21 |             using const_node_stack = typename node::const_node_stack;
22 |             using node_ptr_vector = typename node::node_ptr_vector;
23 |             using node_stack = typename node::node_stack;
24 |             using node_type = typename node::node_type;
25 |             using node_vector = typename node::node_vector;
26 | 
27 |             basic_iteration_node(observer_ptr<node> next_, const bool greedy_) :
28 |                 node(true),
29 |                 _next(next_),
30 |                 _greedy(greedy_)
31 |             {
32 |                 _next->append_firstpos(node::firstpos());
33 |                 _next->append_lastpos(node::lastpos());
34 | 
35 |                 for (observer_ptr<node> node_ : node::lastpos())
36 |                 {
37 |                     node_->append_followpos(node::firstpos());
38 |                 }
39 | 
40 |                 for (observer_ptr<node> node_ : node::firstpos())
41 |                 {
42 |                     node_->greedy(greedy_);
43 |                 }
44 |             }
45 | 
46 |             ~basic_iteration_node() override = default;
47 | 
48 |             node_type what_type() const override
49 |             {
50 |                 return node::node_type::ITERATION;
51 |             }
52 | 
53 |             bool traverse(const_node_stack& node_stack_,
54 |                 bool_stack& perform_op_stack_) const override
55 |             {
56 |                 perform_op_stack_.push(true);
57 |                 node_stack_.push(_next);
58 |                 return true;
59 |             }
60 | 
61 |         private:
62 |             observer_ptr<node> _next;
63 |             bool _greedy;
64 | 
65 |             void copy_node(node_ptr_vector& node_ptr_vector_,
66 |                 node_stack& new_node_stack_, bool_stack& perform_op_stack_,
67 |                 bool& down_) const override
68 |             {
69 |                 if (perform_op_stack_.top())
70 |                 {
71 |                     observer_ptr<node> ptr_ = new_node_stack_.top();
72 | 
73 |                     node_ptr_vector_.push_back(std::make_unique
74 |                         <basic_iteration_node>(ptr_, _greedy));
75 |                     new_node_stack_.top() = node_ptr_vector_.back().get();
76 |                 }
77 |                 else
78 |                 {
79 |                     down_ = true;
80 |                 }
81 | 
82 |                 perform_op_stack_.pop();
83 |             }
84 | 
85 |             // No copy construction.
86 |             basic_iteration_node(const basic_iteration_node&) = delete;
87 |             // No assignment.
88 |             const basic_iteration_node& operator =
89 |                 (const basic_iteration_node&) = delete;
90 |         };
91 |     }
92 | }
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/search_iterator.hpp:
--------------------------------------------------------------------------------
  1 | // iterator.hpp
  2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_SEARCH_ITERATOR_HPP
  7 | #define PARSERTL_SEARCH_ITERATOR_HPP
  8 | 
  9 | #include "capture.hpp"
 10 | #include "../../../lexertl14/include/lexertl/iterator.hpp"
 11 | #include "match_results.hpp"
 12 | #include "search.hpp"
 13 | 
 14 | namespace parsertl
 15 | {
 16 |     template<typename lexer_iterator, typename sm_type,
 17 |         typename id_type = std::uint16_t>
 18 |     class search_iterator
 19 |     {
 20 |     public:
 21 |         using iter_type = typename lexer_iterator::value_type::iter_type;
 22 |         using results = std::vector<std::vector<capture<iter_type>>>;
 23 |         using value_type = results;
 24 |         using difference_type = ptrdiff_t;
 25 |         using pointer = const value_type*;
 26 |         using reference = const value_type&;
 27 |         using iterator_category = std::forward_iterator_tag;
 28 | 
 29 |         search_iterator() = default;
 30 | 
 31 |         search_iterator(const lexer_iterator& iter_, const sm_type& sm_) :
 32 |             _iter(iter_),
 33 |             _sm(&sm_)
 34 |         {
 35 |             _captures.emplace_back();
 36 |             _captures.back().emplace_back(iter_->first, iter_->first);
 37 |             lookup();
 38 |         }
 39 | 
 40 |         search_iterator& operator ++()
 41 |         {
 42 |             lookup();
 43 |             return *this;
 44 |         }
 45 | 
 46 |         search_iterator operator ++(int)
 47 |         {
 48 |             search_iterator iter_ = *this;
 49 | 
 50 |             lookup();
 51 |             return iter_;
 52 |         }
 53 | 
 54 |         const value_type& operator *() const
 55 |         {
 56 |             return _captures;
 57 |         }
 58 | 
 59 |         const value_type* operator ->() const
 60 |         {
 61 |             return &_captures;
 62 |         }
 63 | 
 64 |         bool operator ==(const search_iterator& rhs_) const
 65 |         {
 66 |             return _sm == rhs_._sm &&
 67 |                 (_sm == nullptr ?
 68 |                 true :
 69 |                 _captures == rhs_._captures);
 70 |         }
 71 | 
 72 |         bool operator !=(const search_iterator& rhs_) const
 73 |         {
 74 |             return !(*this == rhs_);
 75 |         }
 76 | 
 77 |     private:
 78 |         lexer_iterator _iter;
 79 |         results _captures;
 80 |         const sm_type* _sm = nullptr;
 81 | 
 82 |         void lookup()
 83 |         {
 84 |             lexer_iterator end;
 85 | 
 86 |             _captures.clear();
 87 | 
 88 |             if (search(_iter, end, *_sm, _captures))
 89 |             {
 90 |                 _iter = end;
 91 |             }
 92 |             else
 93 |             {
 94 |                 _sm = nullptr;
 95 |             }
 96 |         }
 97 |     };
 98 | 
 99 |     using ssearch_iterator =
100 |         search_iterator<lexertl::siterator, state_machine>;
101 |     using csearch_iterator =
102 |         search_iterator<lexertl::citerator, state_machine>;
103 |     using wssearch_iterator =
104 |         search_iterator<lexertl::wsiterator, state_machine>;
105 |     using wcsearch_iterator =
106 |         search_iterator<lexertl::wciterator, state_machine>;
107 | }
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tokeniser/re_tokeniser_state.hpp:
--------------------------------------------------------------------------------
  1 | // tokeniser_state.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_RE_TOKENISER_STATE_HPP
  7 | #define LEXERTL_RE_TOKENISER_STATE_HPP
  8 | 
  9 | #include "../../char_traits.hpp"
 10 | #include "../../enums.hpp"
 11 | #include <locale>
 12 | #include "../../narrow.hpp"
 13 | #include <stack>
 14 | 
 15 | namespace lexertl
 16 | {
 17 |     namespace detail
 18 |     {
 19 |         template<typename ch_type, typename id_type>
 20 |         struct basic_re_tokeniser_state
 21 |         {
 22 |             using char_type = ch_type;
 23 |             using index_type =
 24 |                 typename basic_char_traits<char_type>::index_type;
 25 | 
 26 |             const char_type* const _start;
 27 |             const char_type* const _end;
 28 |             const char_type* _curr;
 29 |             id_type _id;
 30 |             std::size_t _flags;
 31 |             std::stack<std::size_t> _flags_stack;
 32 |             std::locale _locale;
 33 |             const char_type* _macro_name;
 34 |             long _paren_count = 0;
 35 |             bool _in_string = false;
 36 |             id_type _nl_id = static_cast<id_type>(~0);
 37 | 
 38 |             basic_re_tokeniser_state(const char_type* start_,
 39 |                 const char_type* const end_, id_type id_,
 40 |                 const std::size_t flags_, const std::locale locale_,
 41 |                 const char_type* macro_name_) :
 42 |                 _start(start_),
 43 |                 _end(end_),
 44 |                 _curr(start_),
 45 |                 _id(id_),
 46 |                 _flags(flags_),
 47 |                 _locale(locale_),
 48 |                 _macro_name(macro_name_)
 49 |             {
 50 |             }
 51 | 
 52 |             inline bool next(char_type& ch_)
 53 |             {
 54 |                 if (_curr >= _end)
 55 |                 {
 56 |                     ch_ = 0;
 57 |                     return true;
 58 |                 }
 59 |                 else
 60 |                 {
 61 |                     ch_ = *_curr;
 62 |                     increment();
 63 |                     return false;
 64 |                 }
 65 |             }
 66 | 
 67 |             inline void increment()
 68 |             {
 69 |                 ++_curr;
 70 |             }
 71 | 
 72 |             inline std::size_t index() const
 73 |             {
 74 |                 return _curr - _start;
 75 |             }
 76 | 
 77 |             inline bool eos() const
 78 |             {
 79 |                 return _curr >= _end;
 80 |             }
 81 | 
 82 |             inline void unexpected_end(std::ostringstream& ss_) const
 83 |             {
 84 |                 ss_ << "Unexpected end of regex";
 85 |             }
 86 | 
 87 |             inline void error(std::ostringstream& ss_) const
 88 |             {
 89 |                 ss_ << " in ";
 90 | 
 91 |                 if (_macro_name)
 92 |                 {
 93 |                     ss_ << "MACRO '";
 94 |                     narrow(_macro_name, ss_);
 95 |                     ss_ << "'.";
 96 |                 }
 97 |                 else
 98 |                 {
 99 |                     ss_ << "rule id " << _id << '.';
100 |                 }
101 |             }
102 |         };
103 |     }
104 | }
105 | 
106 | #endif
107 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/iterator.hpp:
--------------------------------------------------------------------------------
  1 | // iterator.hpp
  2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | 
  7 | #ifndef LEXERTL_ITERATOR_HPP
  8 | #define LEXERTL_ITERATOR_HPP
  9 | 
 10 | #include <iterator>
 11 | #include "lookup.hpp"
 12 | #include "state_machine.hpp"
 13 | 
 14 | namespace lexertl
 15 | {
 16 |     template<typename iter, typename sm_type, typename results>
 17 |     class iterator
 18 |     {
 19 |     public:
 20 |         using id_type = typename results::id_type;
 21 |         using value_type = results;
 22 |         using difference_type = ptrdiff_t;
 23 |         using pointer = const value_type*;
 24 |         using reference = const value_type&;
 25 |         using iterator_category = std::forward_iterator_tag;
 26 | 
 27 |         iterator() = default;
 28 | 
 29 |         iterator(const iter& start_, const iter& end_, const sm_type& sm_,
 30 |             const bool bol_ = true, const id_type state_ = 0) :
 31 |             _results(start_, end_, bol_, state_),
 32 |             _sm(&sm_)
 33 |         {
 34 |             lookup();
 35 |         }
 36 | 
 37 |         iterator& operator ++()
 38 |         {
 39 |             lookup();
 40 |             return *this;
 41 |         }
 42 | 
 43 |         iterator operator ++(int)
 44 |         {
 45 |             iterator iter_ = *this;
 46 | 
 47 |             lookup();
 48 |             return iter_;
 49 |         }
 50 | 
 51 |         const value_type& operator *() const
 52 |         {
 53 |             return _results;
 54 |         }
 55 | 
 56 |         const value_type* operator ->() const
 57 |         {
 58 |             return &_results;
 59 |         }
 60 | 
 61 |         bool operator ==(const iterator& rhs_) const
 62 |         {
 63 |             return _sm == rhs_._sm && (_sm == nullptr ? true :
 64 |                 _results == rhs_._results);
 65 |         }
 66 | 
 67 |         bool operator !=(const iterator& rhs_) const
 68 |         {
 69 |             return !(*this == rhs_);
 70 |         }
 71 | 
 72 |         const sm_type& sm() const
 73 |         {
 74 |             return *_sm;
 75 |         }
 76 | 
 77 |     private:
 78 |         value_type _results;
 79 |         const sm_type* _sm = nullptr;
 80 | 
 81 |         void lookup()
 82 |         {
 83 |             lexertl::lookup(*_sm, _results);
 84 | 
 85 |             if (_results.first == _results.eoi)
 86 |             {
 87 |                 _sm = nullptr;
 88 |             }
 89 |         }
 90 |     };
 91 | 
 92 |     using siterator =
 93 |         iterator<std::string::const_iterator, state_machine, smatch>;
 94 |     using citerator = iterator<const char*, state_machine, cmatch>;
 95 |     using wsiterator =
 96 |         iterator<std::wstring::const_iterator, wstate_machine, wsmatch>;
 97 |     using wciterator = iterator<const wchar_t*, wstate_machine, wcmatch>;
 98 |     using u32siterator = iterator<std::u32string::const_iterator,
 99 |         u32state_machine, u32smatch>;
100 |     using u32citerator = iterator<const char32_t*, u32state_machine, u32cmatch>;
101 | 
102 |     using sriterator =
103 |         iterator<std::string::const_iterator, state_machine, srmatch>;
104 |     using criterator = iterator<const char*, state_machine, crmatch>;
105 |     using wsriterator =
106 |         iterator<std::wstring::const_iterator, wstate_machine, wsrmatch>;
107 |     using wcriterator =
108 |         iterator<const wchar_t*, wstate_machine, wcrmatch>;
109 |     using u32sriterator = iterator<std::u32string::const_iterator,
110 |         u32state_machine, u32srmatch>;
111 |     using u32criterator = iterator<const char32_t*, u32state_machine,
112 |         u32crmatch>;
113 | }
114 | 
115 | #endif
116 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tree/selection_node.hpp:
--------------------------------------------------------------------------------
  1 | // selection_node.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_SELECTION_NODE_HPP
  7 | #define LEXERTL_SELECTION_NODE_HPP
  8 | 
  9 | #include "node.hpp"
 10 | 
 11 | namespace lexertl
 12 | {
 13 |     namespace detail
 14 |     {
 15 |         template<typename id_type>
 16 |         class basic_selection_node : public basic_node<id_type>
 17 |         {
 18 |         public:
 19 |             using node = basic_node<id_type>;
 20 |             using bool_stack = typename node::bool_stack;
 21 |             using const_node_stack = typename node::const_node_stack;
 22 |             using node_ptr_vector = typename node::node_ptr_vector;
 23 |             using node_stack = typename node::node_stack;
 24 |             using node_type = typename node::node_type;
 25 | 
 26 |             basic_selection_node(observer_ptr<node> left_,
 27 |                 observer_ptr<node> right_) :
 28 |                 node(left_->nullable() || right_->nullable()),
 29 |                 _left(left_),
 30 |                 _right(right_)
 31 |             {
 32 |                 _left->append_firstpos(node::firstpos());
 33 |                 _right->append_firstpos(node::firstpos());
 34 |                 _left->append_lastpos(node::lastpos());
 35 |                 _right->append_lastpos(node::lastpos());
 36 |             }
 37 | 
 38 |             ~basic_selection_node() override = default;
 39 | 
 40 |             node_type what_type() const override
 41 |             {
 42 |                 return node::node_type::SELECTION;
 43 |             }
 44 | 
 45 |             bool traverse(const_node_stack& node_stack_,
 46 |                 bool_stack& perform_op_stack_) const override
 47 |             {
 48 |                 perform_op_stack_.push(true);
 49 | 
 50 |                 switch (_right->what_type())
 51 |                 {
 52 |                 case node::node_type::SEQUENCE:
 53 |                 case node::node_type::SELECTION:
 54 |                 case node::node_type::ITERATION:
 55 |                     perform_op_stack_.push(false);
 56 |                     break;
 57 |                 default:
 58 |                     break;
 59 |                 }
 60 | 
 61 |                 node_stack_.push(_right);
 62 |                 node_stack_.push(_left);
 63 |                 return true;
 64 |             }
 65 | 
 66 |         private:
 67 |             observer_ptr<node> _left;
 68 |             observer_ptr<node> _right;
 69 | 
 70 |             void copy_node(node_ptr_vector& node_ptr_vector_,
 71 |                 node_stack& new_node_stack_, bool_stack& perform_op_stack_,
 72 |                 bool& down_) const override
 73 |             {
 74 |                 if (perform_op_stack_.top())
 75 |                 {
 76 |                     observer_ptr<node> rhs_ = new_node_stack_.top();
 77 | 
 78 |                     new_node_stack_.pop();
 79 | 
 80 |                     observer_ptr<node> lhs_ = new_node_stack_.top();
 81 | 
 82 |                     node_ptr_vector_.push_back(std::make_unique
 83 |                         <basic_selection_node>(lhs_, rhs_));
 84 |                     new_node_stack_.top() = node_ptr_vector_.back().get();
 85 |                 }
 86 |                 else
 87 |                 {
 88 |                     down_ = true;
 89 |                 }
 90 | 
 91 |                 perform_op_stack_.pop();
 92 |             }
 93 | 
 94 |             // No copy construction.
 95 |             basic_selection_node(const basic_selection_node&) = delete;
 96 |             // No assignment.
 97 |             const basic_selection_node& operator =
 98 |                 (const basic_selection_node&) = delete;
 99 |         };
100 |     }
101 | }
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tree/leaf_node.hpp:
--------------------------------------------------------------------------------
  1 | // leaf_node.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_LEAF_NODE_HPP
  7 | #define LEXERTL_LEAF_NODE_HPP
  8 | 
  9 | #include "../../enums.hpp" // null_token
 10 | #include "node.hpp"
 11 | 
 12 | namespace lexertl
 13 | {
 14 |     namespace detail
 15 |     {
 16 |         template<typename id_type>
 17 |         class basic_leaf_node : public basic_node<id_type>
 18 |         {
 19 |         public:
 20 |             using node = basic_node<id_type>;
 21 |             using bool_stack = typename node::bool_stack;
 22 |             using const_node_stack = typename node::const_node_stack;
 23 |             using node_ptr_vector = typename node::node_ptr_vector;
 24 |             using node_stack = typename node::node_stack;
 25 |             using node_type = typename node::node_type;
 26 |             using node_vector = typename node::node_vector;
 27 | 
 28 |             basic_leaf_node(const id_type token_, const bool greedy_) :
 29 |                 node(token_ == node::null_token()),
 30 |                 _token(token_),
 31 |                 _set_greedy(!greedy_),
 32 |                 _greedy(greedy_)
 33 |             {
 34 |                 if (!node::nullable())
 35 |                 {
 36 |                     node::firstpos().push_back(this);
 37 |                     node::lastpos().push_back(this);
 38 |                 }
 39 |             }
 40 | 
 41 |             ~basic_leaf_node() override = default;
 42 | 
 43 |             void append_followpos
 44 |                 (const node_vector& followpos_) override
 45 |             {
 46 |                 _followpos.insert(_followpos.end(),
 47 |                     followpos_.begin(), followpos_.end());
 48 |             }
 49 | 
 50 |             node_type what_type() const override
 51 |             {
 52 |                 return node::node_type::LEAF;
 53 |             }
 54 | 
 55 |             bool traverse(const_node_stack&/*node_stack_*/,
 56 |                 bool_stack&/*perform_op_stack_*/) const override
 57 |             {
 58 |                 return false;
 59 |             }
 60 | 
 61 |             id_type token() const override
 62 |             {
 63 |                 return _token;
 64 |             }
 65 | 
 66 |             bool set_greedy() const override
 67 |             {
 68 |                 return _set_greedy;
 69 |             }
 70 | 
 71 |             void greedy(const bool greedy_) override
 72 |             {
 73 |                 if (!_set_greedy)
 74 |                 {
 75 |                     _greedy = greedy_;
 76 |                     _set_greedy = true;
 77 |                 }
 78 |             }
 79 | 
 80 |             bool greedy() const override
 81 |             {
 82 |                 return _greedy;
 83 |             }
 84 | 
 85 |             const node_vector& followpos() const override
 86 |             {
 87 |                 return _followpos;
 88 |             }
 89 | 
 90 |             node_vector& followpos() override
 91 |             {
 92 |                 return _followpos;
 93 |             }
 94 | 
 95 |         private:
 96 |             id_type _token;
 97 |             bool _set_greedy;
 98 |             bool _greedy;
 99 |             node_vector _followpos;
100 | 
101 |             void copy_node(node_ptr_vector& node_ptr_vector_,
102 |                 node_stack& new_node_stack_, bool_stack&/*perform_op_stack_*/,
103 |                 bool&/*down_*/) const override
104 |             {
105 |                 node_ptr_vector_.push_back(std::make_unique<basic_leaf_node>
106 |                     (_token, _greedy));
107 |                 new_node_stack_.push(node_ptr_vector_.back().get());
108 |             }
109 |         };
110 |     }
111 | }
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tree/end_node.hpp:
--------------------------------------------------------------------------------
  1 | // end_node.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_END_NODE_HPP
  7 | #define LEXERTL_END_NODE_HPP
  8 | 
  9 | #include "node.hpp"
 10 | 
 11 | namespace lexertl
 12 | {
 13 |     namespace detail
 14 |     {
 15 |         template<typename id_type>
 16 |         class basic_end_node : public basic_node<id_type>
 17 |         {
 18 |         public:
 19 |             using node = basic_node<id_type>;
 20 |             using bool_stack = typename node::bool_stack;
 21 |             using const_node_stack = typename node::const_node_stack;
 22 |             using node_ptr_vector = typename node::node_ptr_vector;
 23 |             using node_stack = typename node::node_stack;
 24 |             using node_type = typename node::node_type;
 25 |             using node_vector = typename node::node_vector;
 26 | 
 27 |             basic_end_node(const id_type id_, const id_type user_id_,
 28 |                 const id_type next_dfa_, const id_type push_dfa_,
 29 |                 const bool pop_dfa_, const bool greedy_) :
 30 |                 node(false),
 31 |                 _id(id_),
 32 |                 _user_id(user_id_),
 33 |                 _next_dfa(next_dfa_),
 34 |                 _push_dfa(push_dfa_),
 35 |                 _pop_dfa(pop_dfa_),
 36 |                 _greedy(greedy_)
 37 |             {
 38 |                 node::firstpos().push_back(this);
 39 |                 node::lastpos().push_back(this);
 40 |             }
 41 | 
 42 |             ~basic_end_node() override = default;
 43 | 
 44 |             node_type what_type() const override
 45 |             {
 46 |                 return node::node_type::END;
 47 |             }
 48 | 
 49 |             bool traverse(const_node_stack&/*node_stack_*/,
 50 |                 bool_stack&/*perform_op_stack_*/) const override
 51 |             {
 52 |                 return false;
 53 |             }
 54 | 
 55 |             bool greedy() const override
 56 |             {
 57 |                 return _greedy;
 58 |             }
 59 | 
 60 |             const node_vector& followpos() const override
 61 |             {
 62 |                 // _followpos is always empty..!
 63 |                 return _followpos;
 64 |             }
 65 | 
 66 |             node_vector& followpos() override
 67 |             {
 68 |                 // _followpos is always empty..!
 69 |                 return _followpos;
 70 |             }
 71 | 
 72 |             bool end_state() const override
 73 |             {
 74 |                 return true;
 75 |             }
 76 | 
 77 |             id_type id() const override
 78 |             {
 79 |                 return _id;
 80 |             }
 81 | 
 82 |             id_type user_id() const override
 83 |             {
 84 |                 return _user_id;
 85 |             }
 86 | 
 87 |             id_type next_dfa() const override
 88 |             {
 89 |                 return _next_dfa;
 90 |             }
 91 | 
 92 |             id_type push_dfa() const override
 93 |             {
 94 |                 return _push_dfa;
 95 |             }
 96 | 
 97 |             bool pop_dfa() const override
 98 |             {
 99 |                 return _pop_dfa;
100 |             }
101 | 
102 |         private:
103 |             id_type _id;
104 |             id_type _user_id;
105 |             id_type _next_dfa;
106 |             id_type _push_dfa;
107 |             bool _pop_dfa;
108 |             bool _greedy;
109 |             node_vector _followpos;
110 | 
111 |             void copy_node(node_ptr_vector&/*node_ptr_vector_*/,
112 |                 node_stack&/*new_node_stack_*/,
113 |                 bool_stack&/*perform_op_stack_*/,
114 |                 bool&/*down_*/) const override
115 |             {
116 |                 // Nothing to do, as end_nodes are not copied.
117 |             }
118 |         };
119 |     }
120 | }
121 | 
122 | #endif
123 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/match_results.hpp:
--------------------------------------------------------------------------------
  1 | // match_results.hpp
  2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_MATCH_RESULTS_HPP
  7 | #define PARSERTL_MATCH_RESULTS_HPP
  8 | 
  9 | #include "runtime_error.hpp"
 10 | #include "state_machine.hpp"
 11 | #include <vector>
 12 | 
 13 | namespace parsertl
 14 | {
 15 |     template<typename sm_type>
 16 |     struct basic_match_results
 17 |     {
 18 |         using id_type = typename sm_type::id_type;
 19 |         std::vector<id_type> stack;
 20 |         id_type token_id = static_cast<id_type>(~0);
 21 |         typename sm_type::entry entry;
 22 | 
 23 |         basic_match_results()
 24 |         {
 25 |             stack.push_back(0);
 26 |             entry.action = action::error;
 27 |             entry.param = static_cast<id_type>(error_type::unknown_token);
 28 |         }
 29 | 
 30 |         explicit basic_match_results(const std::size_t reserved_) :
 31 |             stack(reserved_)
 32 |         {
 33 |             basic_match_results();
 34 |         }
 35 | 
 36 |         basic_match_results(const id_type token_id_, const sm_type& sm_)
 37 |         {
 38 |             reset(token_id_, sm_);
 39 |         }
 40 | 
 41 |         basic_match_results(const id_type token_id_, const sm_type& sm_,
 42 |             const std::size_t reserved_) :
 43 |             stack(reserved_)
 44 |         {
 45 |             basic_match_results(token_id_, sm_);
 46 |         }
 47 | 
 48 |         void clear()
 49 |         {
 50 |             stack.clear();
 51 |             stack.push_back(0);
 52 |             token_id = static_cast<id_type>(~0);
 53 |             entry.clear();
 54 |         }
 55 | 
 56 |         void reset(const id_type token_id_, const sm_type& sm_)
 57 |         {
 58 |             stack.clear();
 59 |             stack.push_back(0);
 60 |             token_id = token_id_;
 61 | 
 62 |             if (token_id == static_cast<id_type>(~0))
 63 |             {
 64 |                 entry.action = action::error;
 65 |                 entry.param = static_cast<id_type>(error_type::unknown_token);
 66 |             }
 67 |             else
 68 |             {
 69 |                 entry = sm_.at(stack.back(), token_id);
 70 |             }
 71 |         }
 72 | 
 73 |         id_type reduce_id() const
 74 |         {
 75 |             if (entry.action != action::reduce)
 76 |             {
 77 |                 throw runtime_error("Not in a reduce state!");
 78 |             }
 79 | 
 80 |             return entry.param;
 81 |         }
 82 | 
 83 |         template<typename token_vector>
 84 |         typename token_vector::value_type& dollar(const std::size_t index_,
 85 |             const sm_type& sm_, token_vector& productions) const
 86 |         {
 87 |             if (entry.action != action::reduce)
 88 |             {
 89 |                 throw runtime_error("Not in a reduce state!");
 90 |             }
 91 | 
 92 |             return productions[productions.size() -
 93 |                 production_size(sm_, entry.param) + index_];
 94 |         }
 95 | 
 96 |         template<typename token_vector>
 97 |         const typename token_vector::value_type&
 98 |             dollar(const std::size_t index_, const sm_type& sm_,
 99 |                 const token_vector& productions) const
100 |         {
101 |             if (entry.action != action::reduce)
102 |             {
103 |                 throw runtime_error("Not in a reduce state!");
104 |             }
105 | 
106 |             return productions[productions.size() -
107 |                 production_size(sm_, entry.param) + index_];
108 |         }
109 | 
110 |         std::size_t production_size(const sm_type& sm,
111 |             const std::size_t index_) const
112 |         {
113 |             return sm._rules[index_].second.size();
114 |         }
115 | 
116 |         bool operator ==(const basic_match_results& rhs_) const
117 |         {
118 |             return stack == rhs_.stack &&
119 |                 token_id == rhs_.token_id &&
120 |                 entry == rhs_.entry;
121 |         }
122 |     };
123 | 
124 |     using match_results = basic_match_results<state_machine>;
125 |     using uncompressed_match_results =
126 |         basic_match_results<uncompressed_state_machine>;
127 | }
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/replace.hpp:
--------------------------------------------------------------------------------
  1 | // replace.hpp
  2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_REPLACE_HPP
  7 | #define LEXERTL_REPLACE_HPP
  8 | 
  9 | #include "lookup.hpp"
 10 | #include "state_machine.hpp"
 11 | #include "match_results.hpp"
 12 | 
 13 | namespace lexertl
 14 | {
 15 |     template<class out_iter, class fwd_iter,
 16 |         class id_type, class char_type,
 17 |         class traits, class alloc>
 18 |     out_iter replace(out_iter out, fwd_iter first, fwd_iter second,
 19 |         const basic_state_machine<char_type, id_type>& sm,
 20 |         const std::basic_string<char_type, traits, alloc>& fmt)
 21 |     {
 22 |         return replace(out, first, second, sm, fmt.c_str());
 23 |     }
 24 | 
 25 |     template<class out_iter, class fwd_iter,
 26 |         class id_type, class char_type>
 27 |     out_iter replace(out_iter out, fwd_iter first, fwd_iter second,
 28 |         const basic_state_machine<char_type, id_type>& sm,
 29 |         const char_type* fmt)
 30 |     {
 31 |         const char_type* end_fmt = fmt;
 32 |         fwd_iter last = first;
 33 |         lexertl::match_results<fwd_iter> results(first, second);
 34 | 
 35 |         while (*end_fmt)
 36 |             ++end_fmt;
 37 | 
 38 |         // Lookahead
 39 |         lexertl::lookup(sm, results);
 40 | 
 41 |         while (results.id != 0)
 42 |         {
 43 |             std::copy(last, results.first, out);
 44 |             std::copy(fmt, end_fmt, out);
 45 |             last = results.second;
 46 |             lexertl::lookup(sm, results);
 47 |         }
 48 | 
 49 |         std::copy(last, results.first, out);
 50 |         return out;
 51 |     }
 52 | 
 53 |     template<class id_type, class char_type,
 54 |         class straits, class salloc,
 55 |         class ftraits, class falloc>
 56 |     std::basic_string<char_type, straits, salloc>
 57 |         replace(const std::basic_string<char_type, straits, salloc>& s,
 58 |         const basic_state_machine<char_type, id_type>& sm,
 59 |         const std::basic_string<char_type, ftraits, falloc>& fmt)
 60 |     {
 61 |         std::basic_string<char_type, straits, salloc> ret;
 62 | 
 63 |         replace(std::back_inserter(ret), s.cbegin(), s.cend(), sm, fmt);
 64 |         return ret;
 65 |     }
 66 | 
 67 |     template<class id_type, class char_type,
 68 |         class straits, class salloc>
 69 |     std::basic_string<char_type, straits, salloc>
 70 |         replace(const std::basic_string<char_type, straits, salloc>& s,
 71 |             const basic_state_machine<char_type, id_type>& sm,
 72 |             const char_type* fmt)
 73 |     {
 74 |         std::basic_string<char_type, straits, salloc> ret;
 75 | 
 76 |         replace(std::back_inserter(ret), s.cbegin(), s.cend(), sm, fmt);
 77 |         return ret;
 78 |     }
 79 | 
 80 |     template<class id_type, class char_type,
 81 |         class straits, class salloc>
 82 |     std::basic_string<char_type, straits, salloc>
 83 |         replace(const char_type* s,
 84 |             const basic_state_machine<char_type, id_type>& sm,
 85 |             const std::basic_string<char_type, straits, salloc>& fmt)
 86 |     {
 87 |         std::basic_string<char_type, straits, salloc> ret;
 88 |         const char_type* end_s = s;
 89 |         
 90 |         while (*end_s)
 91 |             ++end_s;
 92 | 
 93 |         replace(std::back_inserter(ret), s, end_s, sm, fmt);
 94 |         return ret;
 95 |     }
 96 | 
 97 |     template<class id_type, class char_type>
 98 |     std::basic_string<char_type> replace(const char_type* s,
 99 |         const basic_state_machine<char_type, id_type>& sm,
100 |         const char_type* fmt)
101 |     {
102 |         std::basic_string<char_type> ret;
103 |         const char_type* end_s = s; while (*end_s) ++end_s;
104 |         const char_type* last = s;
105 |         lexertl::match_results<const char_type*> results(s, end_s);
106 | 
107 |         // Lookahead
108 |         lexertl::lookup(sm, results);
109 | 
110 |         while (results.id != 0)
111 |         {
112 |             ret.append(last, results.first);
113 |             ret.append(fmt);
114 |             last = results.second;
115 |             lexertl::lookup(sm, results);
116 |         }
117 | 
118 |         ret.append(last, results.first);
119 |         return ret;
120 |     }
121 | }
122 | 
123 | #endif
124 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tree/sequence_node.hpp:
--------------------------------------------------------------------------------
  1 | // sequence_node.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_SEQUENCE_NODE_HPP
  7 | #define LEXERTL_SEQUENCE_NODE_HPP
  8 | 
  9 | #include "node.hpp"
 10 | 
 11 | namespace lexertl
 12 | {
 13 |     namespace detail
 14 |     {
 15 |         template<typename id_type>
 16 |         class basic_sequence_node : public basic_node<id_type>
 17 |         {
 18 |         public:
 19 |             using node = basic_node<id_type>;
 20 |             using bool_stack = typename node::bool_stack;
 21 |             using const_node_stack = typename node::const_node_stack;
 22 |             using node_ptr_vector = typename node::node_ptr_vector;
 23 |             using node_stack = typename node::node_stack;
 24 |             using node_type = typename node::node_type;
 25 |             using node_vector = typename node::node_vector;
 26 | 
 27 |             basic_sequence_node(observer_ptr<node> left_,
 28 |                 observer_ptr<node> right_) :
 29 |                 node(left_->nullable() && right_->nullable()),
 30 |                 _left(left_),
 31 |                 _right(right_)
 32 |             {
 33 |                 _left->append_firstpos(node::firstpos());
 34 | 
 35 |                 if (_left->nullable())
 36 |                 {
 37 |                     _right->append_firstpos(node::firstpos());
 38 |                 }
 39 | 
 40 |                 if (_right->nullable())
 41 |                 {
 42 |                     _left->append_lastpos(node::lastpos());
 43 |                 }
 44 | 
 45 |                 _right->append_lastpos(node::lastpos());
 46 | 
 47 |                 node_vector& lastpos_ = _left->lastpos();
 48 |                 const node_vector& firstpos_ = _right->firstpos();
 49 | 
 50 |                 for (observer_ptr<node> node_ : lastpos_)
 51 |                 {
 52 |                     node_->append_followpos(firstpos_);
 53 |                 }
 54 |             }
 55 | 
 56 |             ~basic_sequence_node() override = default;
 57 | 
 58 |             node_type what_type() const override
 59 |             {
 60 |                 return node::node_type::SEQUENCE;
 61 |             }
 62 | 
 63 |             bool traverse(const_node_stack& node_stack_,
 64 |                 bool_stack& perform_op_stack_) const override
 65 |             {
 66 |                 perform_op_stack_.push(true);
 67 | 
 68 |                 switch (_right->what_type())
 69 |                 {
 70 |                 case node::node_type::SEQUENCE:
 71 |                 case node::node_type::SELECTION:
 72 |                 case node::node_type::ITERATION:
 73 |                     perform_op_stack_.push(false);
 74 |                     break;
 75 |                 default:
 76 |                     break;
 77 |                 }
 78 | 
 79 |                 node_stack_.push(_right);
 80 |                 node_stack_.push(_left);
 81 |                 return true;
 82 |             }
 83 | 
 84 |         private:
 85 |             observer_ptr<node> _left;
 86 |             observer_ptr<node> _right;
 87 | 
 88 |             void copy_node(node_ptr_vector& node_ptr_vector_,
 89 |                 node_stack& new_node_stack_, bool_stack& perform_op_stack_,
 90 |                 bool& down_) const override
 91 |             {
 92 |                 if (perform_op_stack_.top())
 93 |                 {
 94 |                     observer_ptr<node> rhs_ = new_node_stack_.top();
 95 | 
 96 |                     new_node_stack_.pop();
 97 | 
 98 |                     observer_ptr<node> lhs_ = new_node_stack_.top();
 99 | 
100 |                     node_ptr_vector_.push_back(std::make_unique
101 |                         <basic_sequence_node>(lhs_, rhs_));
102 |                     new_node_stack_.top() = node_ptr_vector_.back().get();
103 |                 }
104 |                 else
105 |                 {
106 |                     down_ = true;
107 |                 }
108 | 
109 |                 perform_op_stack_.pop();
110 |             }
111 | 
112 |             // No copy construction.
113 |             basic_sequence_node(const basic_sequence_node&) = delete;
114 |             // No assignment.
115 |             const basic_sequence_node& operator =
116 |                 (const basic_sequence_node&) = delete;
117 |         };
118 |     }
119 | }
120 | 
121 | #endif
122 | 


--------------------------------------------------------------------------------
/lib/parle/lexer/iterator.hpp:
--------------------------------------------------------------------------------
  1 | // Based on lexertl/iterator.hpp
  2 | 
  3 | #ifndef PARLE_LEXER_ITERATOR_HPP
  4 | #define PARLE_LEXER_ITERATOR_HPP
  5 | 
  6 | #include <iterator>
  7 | #include <unordered_map>
  8 | #include "include/lexertl/lookup.hpp"
  9 | #include "include/lexertl/runtime_error.hpp"
 10 | 
 11 | #undef lookup
 12 | 
 13 | namespace parle
 14 | {
 15 | namespace lexer
 16 | {
 17 | template<typename iter, typename sm_type, typename results, typename lexer_obj_type, typename token_cb_type, typename id_type>
 18 | class iterator
 19 | {
 20 | public:
 21 | 	using value_type = results;
 22 | 	using difference_type = ptrdiff_t;
 23 | 	using pointer = const value_type *;
 24 | 	using reference = const value_type &;
 25 | 	using iterator_category = std::forward_iterator_tag;
 26 | 	using cb_map = std::unordered_map<id_type, token_cb_type>;
 27 | 
 28 | 	iterator() :
 29 | 		_results(iter(), iter()),
 30 | 		_sm(nullptr),
 31 | 		_lex(nullptr)
 32 | 	{
 33 | 	}
 34 | 
 35 | 	iterator(const iter &start_, const iter &end_, lexer_obj_type &lex, bool do_next = false) :
 36 | 		_results(start_, end_),
 37 | 		_sm(&lex.sm),
 38 | 		_lex(&lex)
 39 | 	{
 40 | 
 41 | 		if (do_next) {
 42 | 			lookup();
 43 | 		}
 44 | 	}
 45 | 
 46 | 	void set_bol(bool bol)
 47 | 	{
 48 | 		_results.bol = bol;
 49 | 	}
 50 | 
 51 | 	void reset(const iter &start_, const iter &end_)
 52 | 	{
 53 | 		if (_results.first > start_) {
 54 | 			throw lexertl::runtime_error("Can only reset to a forward position");
 55 | 		}
 56 | 		_results.first = start_;
 57 | 		_results.second = start_;
 58 | 		_results.eoi = end_;
 59 | 	}
 60 | 
 61 | 	// Only need this because of warnings with gcc with -Weffc++
 62 | 	iterator(const iterator &rhs_)
 63 | 	{
 64 | 		_results = rhs_._results;
 65 | 		_sm = rhs_._sm;
 66 | 		_lex = rhs_._lex;
 67 | 	}
 68 | 
 69 | 	// Only need this because of warnings with gcc with -Weffc++
 70 | 	iterator &operator =(const iterator &rhs_)
 71 | 	{
 72 | 		if (&rhs_ != this)
 73 | 		{
 74 | 			_results = rhs_._results;
 75 | 			_sm = rhs_._sm;
 76 | 			_lex = rhs_._lex;
 77 | 		}
 78 | 
 79 | 		return *this;
 80 | 	}
 81 | 
 82 | 	iterator &operator ++()
 83 | 	{
 84 | 		lookup();
 85 | 		return *this;
 86 | 	}
 87 | 
 88 | 	iterator operator ++(int)
 89 | 	{
 90 | 		iterator iter_ = *this;
 91 | 
 92 | 		lookup();
 93 | 		return iter_;
 94 | 	}
 95 | 
 96 | 	const value_type &operator *() const
 97 | 	{
 98 | 		return _results;
 99 | 	}
100 | 
101 | 	const value_type *operator ->() const
102 | 	{
103 | 		return &_results;
104 | 	}
105 | 
106 | 	bool operator ==(const iterator &rhs_) const
107 | 	{
108 | 		return _sm == rhs_._sm && (_sm == nullptr ? true :
109 | 			_results == rhs_._results);
110 | 	}
111 | 
112 | 	bool operator !=(const iterator &rhs_) const
113 | 	{
114 | 		return !(*this == rhs_);
115 | 	}
116 | 
117 | public:
118 | 	size_t line = SIZE_MAX;
119 | 	size_t column = SIZE_MAX;
120 | private:
121 | 	value_type _results;
122 | 	const sm_type *_sm;
123 | 	lexer_obj_type *_lex;
124 | 
125 | 	void lookup()
126 | 	{
127 | 		if (_results.bol) {
128 | 			line++;
129 | 			column = 0;
130 | 		} else {
131 | 			column += _results.second - _results.first;
132 | 		}
133 | 
134 | 		lexertl::lookup(*_sm, _results);
135 | 
136 | 		if (_lex->cb_map.size() > 0) {
137 | 			auto it = _lex->cb_map.find(_results.id);
138 | 			if (_lex->cb_map.end() != it) {
139 | 				zval result;
140 | 				token_cb_type cb = it->second;
141 | 				zend_fcall_info fci;
142 | 				zend_fcall_info_cache fcc;
143 | 
144 | 				if (FAILURE == zend_fcall_info_init(&cb.cb, 0, &fci, &fcc, NULL, NULL)) {
145 | 					zend_throw_exception_ex(ParleLexerException_ce, 0, "Failed to prepare function call");
146 | 					if (_results.first == _results.eoi) {
147 | 						_sm = nullptr;
148 | 					}
149 | 					return;
150 | 				}
151 | 				ZVAL_NULL(&result);
152 | 				fci.retval = &result;
153 | 				fci.param_count = 0;
154 | 
155 | 				if (FAILURE == zend_call_function(&fci, &fcc)) {
156 | 					zend_throw_exception_ex(ParleLexerException_ce, 0, "Callback execution failed");
157 | 					if (_results.first == _results.eoi) {
158 | 						_sm = nullptr;
159 | 					}
160 | 					return;
161 | 				}
162 | 
163 | #if 0
164 | 				convert_to_boolean(&result);
165 | 				if (Z_TYPE(result) == IS_FALSE && _results.first != _results.eoi) {
166 | 					lexertl::lookup(*_sm, _results);
167 | 				}
168 | #endif
169 | 			}
170 | 		}
171 | 
172 | 		if (_results.first == _results.eoi) {
173 | 			_sm = nullptr;
174 | 		}
175 | 	}
176 | };
177 | }
178 | }
179 | 
180 | #endif
181 | 
182 | /*
183 |  * Local variables:
184 |  * tab-width: 4
185 |  * c-basic-offset: 4
186 |  * End:
187 |  * vim600: noet sw=4 ts=4 fdm=marker
188 |  * vim<600: noet sw=4 ts=4
189 |  */
190 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/iterator.hpp:
--------------------------------------------------------------------------------
  1 | // iterator.hpp
  2 | // Copyright (c) 2022-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_ITERATOR_HPP
  7 | #define PARSERTL_ITERATOR_HPP
  8 | 
  9 | #include "../../../lexertl14/include/lexertl/iterator.hpp"
 10 | #include "lookup.hpp"
 11 | #include "match_results.hpp"
 12 | #include "token.hpp"
 13 | 
 14 | namespace parsertl
 15 | {
 16 |     template<typename lexer_iterator, typename sm_type,
 17 |         typename id_type = std::uint16_t>
 18 |     class iterator
 19 |     {
 20 |     public:
 21 |         using results = basic_match_results<sm_type>;
 22 |         using value_type = results;
 23 |         using difference_type = ptrdiff_t;
 24 |         using pointer = const value_type*;
 25 |         using reference = const value_type&;
 26 |         using iterator_category = std::forward_iterator_tag;
 27 | 
 28 |         // Qualify token to prevent arg dependant lookup
 29 |         using token = parsertl::token<lexer_iterator>;
 30 |         using token_vector = typename token::token_vector;
 31 | 
 32 |         iterator() = default;
 33 | 
 34 |         iterator(const lexer_iterator& iter_, const sm_type& sm_) :
 35 |             _iter(iter_),
 36 |             _results(_iter->id, sm_),
 37 |             _sm(&sm_)
 38 |         {
 39 |             // The first action can only ever be reduce
 40 |             // if the grammar treats no input as valid.
 41 |             if (_results.entry.action != action::reduce)
 42 |                 lookup();
 43 |         }
 44 | 
 45 |         iterator(const lexer_iterator& iter_, const sm_type& sm_,
 46 |             const std::size_t reserved_) :
 47 |             _iter(iter_),
 48 |             _results(_iter->id, sm_, reserved_),
 49 |             _productions(reserved_),
 50 |             _sm(&sm_)
 51 |         {
 52 |             // The first action can only ever be reduce
 53 |             // if the grammar treats no input as valid.
 54 |             if (_results.entry.action != action::reduce)
 55 |                 lookup();
 56 |         }
 57 | 
 58 |         typename token_vector::value_type dollar(const std::size_t index_) const
 59 |         {
 60 |             return _results.dollar(index_, *_sm, _productions);
 61 |         }
 62 | 
 63 |         iterator& operator ++()
 64 |         {
 65 |             lookup();
 66 |             return *this;
 67 |         }
 68 | 
 69 |         iterator operator ++(int)
 70 |         {
 71 |             iterator iter_ = *this;
 72 | 
 73 |             lookup();
 74 |             return iter_;
 75 |         }
 76 | 
 77 |         const value_type& operator *() const
 78 |         {
 79 |             return _results;
 80 |         }
 81 | 
 82 |         const value_type* operator ->() const
 83 |         {
 84 |             return &_results;
 85 |         }
 86 | 
 87 |         bool operator ==(const iterator& rhs_) const
 88 |         {
 89 |             return _sm == rhs_._sm &&
 90 |                 (_sm == nullptr ? true :
 91 |                     _results == rhs_._results);
 92 |         }
 93 | 
 94 |         bool operator !=(const iterator& rhs_) const
 95 |         {
 96 |             return !(*this == rhs_);
 97 |         }
 98 | 
 99 |     private:
100 |         lexer_iterator _iter;
101 |         basic_match_results<sm_type> _results;
102 |         token_vector _productions;
103 |         const sm_type* _sm = nullptr;
104 | 
105 |         void lookup()
106 |         {
107 |             // do while because we need to move past the current reduce action
108 |             do
109 |             {
110 |                 parsertl::lookup(_iter, *_sm, _results, _productions);
111 |             } while (_results.entry.action == action::shift ||
112 |                 _results.entry.action == action::go_to);
113 | 
114 |             switch (_results.entry.action)
115 |             {
116 |             case action::accept:
117 |             case action::error:
118 |                 _sm = nullptr;
119 |                 break;
120 |             default:
121 |                 break;
122 |             }
123 |         }
124 |     };
125 | 
126 |     using siterator = iterator<lexertl::siterator, state_machine>;
127 |     using citerator = iterator<lexertl::citerator, state_machine>;
128 |     using wsiterator = iterator<lexertl::wsiterator, state_machine>;
129 |     using wciterator = iterator<lexertl::wciterator, state_machine>;
130 | }
131 | 
132 | #endif
133 | 


--------------------------------------------------------------------------------
/tests/lexer_007.phpt:
--------------------------------------------------------------------------------
  1 | --TEST--
  2 | Lex JSON
  3 | --SKIPIF--
  4 | <?php
  5 | 
  6 | if (!extension_loaded("parle")) print "skip";
  7 | if (!Parle\INTERNAL_UTF32) print "skip reqire internal UTF-32";
  8 | 
  9 | ?>
 10 | --FILE--
 11 | <?php
 12 | 
 13 | use Parle\RLexer;
 14 | use Parle\Token;
 15 | 
 16 | const eOpenOb = 42;
 17 | const eCloseOb = 43;
 18 | const eOpenArr = 44;
 19 | const eCloseArr = 45;
 20 | const eName = 46;
 21 | const eString = 47;
 22 | const eNumber = 48;
 23 | const eBoolean = 49;
 24 | const eNull = 50;
 25 | 
 26 | $lex = new RLexer;
 27 | 
 28 | /* UTF-32 */
 29 | $lex->insertMacro("STRING", "[\"]([ -\\x10ffff]{-}[\"\\\\]|\\\\([\"\\\\/bfnrt]|u[0-9a-fA-F]{4}))*[\"]");
 30 | $lex->insertMacro("NUMBER", "-?(0|[1-9]\\d*)([.]\\d+)?([eE][-+]?\\d+)?");
 31 | $lex->insertMacro("BOOL", "true|false");
 32 | $lex->insertMacro("NULL", "null");
 33 | 
 34 | $lex->pushState("END");
 35 | 
 36 | $lex->pushState("OBJECT");
 37 | $lex->pushState("NAME");
 38 | $lex->pushState("COLON");
 39 | $lex->pushState("OB_VALUE");
 40 | $lex->pushState("OB_COMMA");
 41 | 
 42 | $lex->pushState("ARRAY");
 43 | $lex->pushState("ARR_COMMA");
 44 | $lex->pushState("ARR_VALUE");
 45 | 
 46 | $lex->push("INITIAL", "[{]", eOpenOb, ">OBJECT:END");
 47 | $lex->push("INITIAL", "[[]", eOpenArr, ">ARRAY:END");
 48 | 
 49 | $lex->push("OBJECT,OB_COMMA", "[}]", eCloseOb, "<");
 50 | $lex->push("OBJECT,NAME", "{STRING}", eName, "COLON");
 51 | $lex->push("COLON", ":", Token::SKIP, "OB_VALUE");
 52 | 
 53 | $lex->push("OB_VALUE", "{STRING}", eString, "OB_COMMA");
 54 | $lex->push("OB_VALUE", "{NUMBER}", eNumber, "OB_COMMA");
 55 | $lex->push("OB_VALUE", "{BOOL}", eBoolean, "OB_COMMA");
 56 | $lex->push("OB_VALUE", "{NULL}", eNull, "OB_COMMA");
 57 | $lex->push("OB_VALUE", "[{]", eOpenOb, ">OBJECT:OB_COMMA");
 58 | $lex->push("OB_VALUE", "[[]", eOpenArr, ">ARRAY:OB_COMMA");
 59 | 
 60 | $lex->push("OB_COMMA", ",", Token::SKIP, "NAME");
 61 | 
 62 | $lex->push("ARRAY,ARR_COMMA", "\\]", eCloseArr, "<");
 63 | $lex->push("ARRAY,ARR_VALUE", "{STRING}", eString, "ARR_COMMA");
 64 | $lex->push("ARRAY,ARR_VALUE", "{NUMBER}", eNumber, "ARR_COMMA");
 65 | $lex->push("ARRAY,ARR_VALUE", "{BOOL}", eBoolean, "ARR_COMMA");
 66 | $lex->push("ARRAY,ARR_VALUE", "{NULL}", eNull, "ARR_COMMA");
 67 | $lex->push("ARRAY,ARR_VALUE", "[{]", eOpenOb, ">OBJECT:ARR_COMMA");
 68 | $lex->push("ARRAY,ARR_VALUE", "[[]", eOpenArr, ">ARRAY:ARR_COMMA");
 69 | 
 70 | $lex->push("ARR_COMMA", ",", Token::SKIP, "ARR_VALUE");
 71 | $lex->push("*", "[ \t\r\n]+", Token::SKIP, ".");
 72 | 
 73 | $lex->build();
 74 | 
 75 | $in = file_get_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . "lexer_003.json");
 76 | 
 77 | $lex->consume($in);
 78 | 
 79 | 
 80 | $lex->advance();
 81 | $tok = $lex->getToken();
 82 | while (Token::EOI != $tok->id) {
 83 | 	var_dump($tok);
 84 | 	$lex->advance();
 85 | 	$tok = $lex->getToken();
 86 | }
 87 | 
 88 | ?>
 89 | ==DONE==
 90 | --EXPECTF--
 91 | object(Parle\Token)#%d (2) {
 92 |   ["id"]=>
 93 |   int(42)
 94 |   ["value"]=>
 95 |   string(1) "{"
 96 | }
 97 | object(Parle\Token)#%d (2) {
 98 |   ["id"]=>
 99 |   int(46)
100 |   ["value"]=>
101 |   string(5) ""key""
102 | }
103 | object(Parle\Token)#%d (2) {
104 |   ["id"]=>
105 |   int(44)
106 |   ["value"]=>
107 |   string(1) "["
108 | }
109 | object(Parle\Token)#%d (2) {
110 |   ["id"]=>
111 |   int(47)
112 |   ["value"]=>
113 |   string(15) ""qelque choose""
114 | }
115 | object(Parle\Token)#%d (2) {
116 |   ["id"]=>
117 |   int(48)
118 |   ["value"]=>
119 |   string(2) "42"
120 | }
121 | object(Parle\Token)#%d (2) {
122 |   ["id"]=>
123 |   int(47)
124 |   ["value"]=>
125 |   string(8) ""füße""
126 | }
127 | object(Parle\Token)#%d (2) {
128 |   ["id"]=>
129 |   int(45)
130 |   ["value"]=>
131 |   string(1) "]"
132 | }
133 | object(Parle\Token)#%d (2) {
134 |   ["id"]=>
135 |   int(46)
136 |   ["value"]=>
137 |   string(5) ""obj""
138 | }
139 | object(Parle\Token)#%d (2) {
140 |   ["id"]=>
141 |   int(42)
142 |   ["value"]=>
143 |   string(1) "{"
144 | }
145 | object(Parle\Token)#%d (2) {
146 |   ["id"]=>
147 |   int(46)
148 |   ["value"]=>
149 |   string(6) ""prop""
150 | }
151 | object(Parle\Token)#%d (2) {
152 |   ["id"]=>
153 |   int(48)
154 |   ["value"]=>
155 |   string(2) "12"
156 | }
157 | object(Parle\Token)#%d (2) {
158 |   ["id"]=>
159 |   int(43)
160 |   ["value"]=>
161 |   string(1) "}"
162 | }
163 | object(Parle\Token)#%d (2) {
164 |   ["id"]=>
165 |   int(46)
166 |   ["value"]=>
167 |   string(6) ""some""
168 | }
169 | object(Parle\Token)#%d (2) {
170 |   ["id"]=>
171 |   int(50)
172 |   ["value"]=>
173 |   string(4) "null"
174 | }
175 | object(Parle\Token)#%d (2) {
176 |   ["id"]=>
177 |   int(43)
178 |   ["value"]=>
179 |   string(1) "}"
180 | }
181 | ==DONE==
182 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/memory_file.hpp:
--------------------------------------------------------------------------------
  1 | // memory_file.hpp
  2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/)
  3 | // Inspired by http://en.wikibooks.org/wiki/Optimizing_C%2B%2B/
  4 | // General_optimization_techniques/Input/Output#Memory-mapped_file
  5 | //
  6 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  7 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8 | 
  9 | #ifndef LEXERTL_MEMORY_FILE_HPP
 10 | #define LEXERTL_MEMORY_FILE_HPP
 11 | 
 12 | #include <cstddef>
 13 | 
 14 | #ifdef _WIN32
 15 | #include <Windows.h>
 16 | #else
 17 | #include <fcntl.h>
 18 | #include <unistd.h>
 19 | #include <sys/mman.h>
 20 | #include <sys/stat.h>
 21 | #endif
 22 | 
 23 | // Only files small enough to fit into memory are supported.
 24 | namespace lexertl
 25 | {
 26 |     template<typename char_type>
 27 |     class basic_memory_file
 28 |     {
 29 |     public:
 30 |         basic_memory_file() = default;
 31 | 
 32 |         explicit basic_memory_file(const char* pathname_)
 33 |         {
 34 |             open(pathname_);
 35 |         }
 36 | 
 37 |         // No copy construction.
 38 |         basic_memory_file(const basic_memory_file&) = delete;
 39 |         // No assignment.
 40 |         basic_memory_file& operator =(const basic_memory_file&) = delete;
 41 | 
 42 |         ~basic_memory_file()
 43 |         {
 44 |             close();
 45 |         }
 46 | 
 47 |         void open(const char* pathname_)
 48 |         {
 49 |             if (_data)
 50 |             {
 51 |                 close();
 52 |             }
 53 | 
 54 | #ifdef _WIN32
 55 |             _fh = ::CreateFileA(pathname_, GENERIC_READ, FILE_SHARE_READ,
 56 |                 nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
 57 |             _fmh = nullptr;
 58 | 
 59 |             if (_fh != INVALID_HANDLE_VALUE)
 60 |             {
 61 |                 _fmh = ::CreateFileMapping(_fh, nullptr, PAGE_READONLY, 0, 0,
 62 |                     nullptr);
 63 | 
 64 |                 if (_fmh != nullptr)
 65 |                 {
 66 |                     _data = static_cast<char_type*>(::MapViewOfFile
 67 |                     (_fmh, FILE_MAP_READ, 0, 0, 0));
 68 | 
 69 |                     if (_data)
 70 |                     {
 71 |                         _size = ::GetFileSize(_fh, nullptr) /
 72 |                             sizeof(char_type);
 73 |                     }
 74 |                 }
 75 |             }
 76 | #else
 77 |             _fh = ::open(pathname_, O_RDONLY);
 78 | 
 79 |             if (_fh > -1)
 80 |             {
 81 |                 struct stat sbuf_;
 82 | 
 83 |                 if (::fstat(_fh, &sbuf_) > -1)
 84 |                 {
 85 |                     _data = static_cast<const char_type*>
 86 |                         (::mmap(0, sbuf_.st_size, PROT_READ,
 87 |                             MAP_SHARED, _fh, 0));
 88 | 
 89 |                     if (_data == MAP_FAILED)
 90 |                     {
 91 |                         _data = nullptr;
 92 |                     }
 93 |                     else
 94 |                     {
 95 |                         _size = sbuf_.st_size / sizeof(char_type);
 96 |                     }
 97 |                 }
 98 |             }
 99 | #endif
100 |         }
101 | 
102 |         const char_type* data() const
103 |         {
104 |             return _data;
105 |         }
106 | 
107 |         std::size_t size() const
108 |         {
109 |             return _size;
110 |         }
111 | 
112 |         void close()
113 |         {
114 |             if (_data)
115 |             {
116 | #ifdef _WIN32
117 |                 ::UnmapViewOfFile(_data);
118 |                 ::CloseHandle(_fmh);
119 |                 ::CloseHandle(_fh);
120 | #else
121 |                 ::munmap(const_cast<char_type*>(_data), _size);
122 |                 ::close(_fh);
123 |                 _fh = 0;
124 | #endif
125 |                 _data = nullptr;
126 |                 _size = 0;
127 | #ifdef _WIN32
128 |                 _fh = nullptr;
129 |                 _fmh = nullptr;
130 | #endif
131 |             }
132 |         }
133 | 
134 |     private:
135 |         const char_type* _data = nullptr;
136 |         std::size_t _size = 0;
137 | #ifdef _WIN32
138 |         HANDLE _fh = nullptr;
139 |         HANDLE _fmh = nullptr;
140 | #else
141 |         int _fh = 0;
142 | #endif
143 |     };
144 | 
145 |     using memory_file = basic_memory_file<char>;
146 |     using wmemory_file = basic_memory_file<wchar_t>;
147 |     using u32memory_file = basic_memory_file<char32_t>;
148 | }
149 | 
150 | #endif
151 | 


--------------------------------------------------------------------------------
/tests/lexer_003.phpt:
--------------------------------------------------------------------------------
  1 | --TEST--
  2 | Lex JSON
  3 | --SKIPIF--
  4 | <?php
  5 | 
  6 | if (!extension_loaded("parle")) print "skip";
  7 | if (Parle\INTERNAL_UTF32) print "skip not for internal UTF-32";
  8 | 
  9 | ?>
 10 | --FILE--
 11 | <?php
 12 | 
 13 | use Parle\RLexer;
 14 | use Parle\Token;
 15 | 
 16 | const eOpenOb = 42;
 17 | const eCloseOb = 43;
 18 | const eOpenArr = 44;
 19 | const eCloseArr = 45;
 20 | const eName = 46;
 21 | const eString = 47;
 22 | const eNumber = 48;
 23 | const eBoolean = 49;
 24 | const eNull = 50;
 25 | 
 26 | $lex = new RLexer;
 27 | 
 28 | /* UTF-8 */
 29 | $lex->insertMacro("STRING", "[\"]([ -\x7f]{+}[\x80-\xbf]{+}[\xc2-\xdf]{+}[\xe0-\xef]{+}[\xf0-\xff]{-}[\"\\\]|\\\([\"\\\/bfnrt]|u[0-9a-fA-F]{4}))*[\"]");
 30 | $lex->insertMacro("NUMBER", "-?(0|[1-9]\\d*)([.]\\d+)?([eE][-+]?\\d+)?");
 31 | $lex->insertMacro("BOOL", "true|false");
 32 | $lex->insertMacro("NULL", "null");
 33 | 
 34 | $lex->pushState("END");
 35 | 
 36 | $lex->pushState("OBJECT");
 37 | $lex->pushState("NAME");
 38 | $lex->pushState("COLON");
 39 | $lex->pushState("OB_VALUE");
 40 | $lex->pushState("OB_COMMA");
 41 | 
 42 | $lex->pushState("ARRAY");
 43 | $lex->pushState("ARR_COMMA");
 44 | $lex->pushState("ARR_VALUE");
 45 | 
 46 | $lex->push("INITIAL", "[{]", eOpenOb, ">OBJECT:END");
 47 | $lex->push("INITIAL", "[[]", eOpenArr, ">ARRAY:END");
 48 | 
 49 | $lex->push("OBJECT,OB_COMMA", "[}]", eCloseOb, "<");
 50 | $lex->push("OBJECT,NAME", "{STRING}", eName, "COLON");
 51 | $lex->push("COLON", ":", Token::SKIP, "OB_VALUE");
 52 | 
 53 | $lex->push("OB_VALUE", "{STRING}", eString, "OB_COMMA");
 54 | $lex->push("OB_VALUE", "{NUMBER}", eNumber, "OB_COMMA");
 55 | $lex->push("OB_VALUE", "{BOOL}", eBoolean, "OB_COMMA");
 56 | $lex->push("OB_VALUE", "{NULL}", eNull, "OB_COMMA");
 57 | $lex->push("OB_VALUE", "[{]", eOpenOb, ">OBJECT:OB_COMMA");
 58 | $lex->push("OB_VALUE", "[[]", eOpenArr, ">ARRAY:OB_COMMA");
 59 | 
 60 | $lex->push("OB_COMMA", ",", Token::SKIP, "NAME");
 61 | 
 62 | $lex->push("ARRAY,ARR_COMMA", "\\]", eCloseArr, "<");
 63 | $lex->push("ARRAY,ARR_VALUE", "{STRING}", eString, "ARR_COMMA");
 64 | $lex->push("ARRAY,ARR_VALUE", "{NUMBER}", eNumber, "ARR_COMMA");
 65 | $lex->push("ARRAY,ARR_VALUE", "{BOOL}", eBoolean, "ARR_COMMA");
 66 | $lex->push("ARRAY,ARR_VALUE", "{NULL}", eNull, "ARR_COMMA");
 67 | $lex->push("ARRAY,ARR_VALUE", "[{]", eOpenOb, ">OBJECT:ARR_COMMA");
 68 | $lex->push("ARRAY,ARR_VALUE", "[[]", eOpenArr, ">ARRAY:ARR_COMMA");
 69 | 
 70 | $lex->push("ARR_COMMA", ",", Token::SKIP, "ARR_VALUE");
 71 | $lex->push("*", "[ \t\r\n]+", Token::SKIP, ".");
 72 | 
 73 | $lex->build();
 74 | 
 75 | $in = file_get_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . "lexer_003.json");
 76 | 
 77 | $lex->consume($in);
 78 | 
 79 | 
 80 | $lex->advance();
 81 | $tok = $lex->getToken();
 82 | while (Token::EOI != $tok->id) {
 83 | 	var_dump($tok);
 84 | 	$lex->advance();
 85 | 	$tok = $lex->getToken();
 86 | }
 87 | 
 88 | ?>
 89 | ==DONE==
 90 | --EXPECTF--
 91 | object(Parle\Token)#%d (2) {
 92 |   ["id"]=>
 93 |   int(42)
 94 |   ["value"]=>
 95 |   string(1) "{"
 96 | }
 97 | object(Parle\Token)#%d (2) {
 98 |   ["id"]=>
 99 |   int(46)
100 |   ["value"]=>
101 |   string(5) ""key""
102 | }
103 | object(Parle\Token)#%d (2) {
104 |   ["id"]=>
105 |   int(44)
106 |   ["value"]=>
107 |   string(1) "["
108 | }
109 | object(Parle\Token)#%d (2) {
110 |   ["id"]=>
111 |   int(47)
112 |   ["value"]=>
113 |   string(15) ""qelque choose""
114 | }
115 | object(Parle\Token)#%d (2) {
116 |   ["id"]=>
117 |   int(48)
118 |   ["value"]=>
119 |   string(2) "42"
120 | }
121 | object(Parle\Token)#%d (2) {
122 |   ["id"]=>
123 |   int(47)
124 |   ["value"]=>
125 |   string(8) ""füße""
126 | }
127 | object(Parle\Token)#%d (2) {
128 |   ["id"]=>
129 |   int(45)
130 |   ["value"]=>
131 |   string(1) "]"
132 | }
133 | object(Parle\Token)#%d (2) {
134 |   ["id"]=>
135 |   int(46)
136 |   ["value"]=>
137 |   string(5) ""obj""
138 | }
139 | object(Parle\Token)#%d (2) {
140 |   ["id"]=>
141 |   int(42)
142 |   ["value"]=>
143 |   string(1) "{"
144 | }
145 | object(Parle\Token)#%d (2) {
146 |   ["id"]=>
147 |   int(46)
148 |   ["value"]=>
149 |   string(6) ""prop""
150 | }
151 | object(Parle\Token)#%d (2) {
152 |   ["id"]=>
153 |   int(48)
154 |   ["value"]=>
155 |   string(2) "12"
156 | }
157 | object(Parle\Token)#%d (2) {
158 |   ["id"]=>
159 |   int(43)
160 |   ["value"]=>
161 |   string(1) "}"
162 | }
163 | object(Parle\Token)#%d (2) {
164 |   ["id"]=>
165 |   int(46)
166 |   ["value"]=>
167 |   string(6) ""some""
168 | }
169 | object(Parle\Token)#%d (2) {
170 |   ["id"]=>
171 |   int(50)
172 |   ["value"]=>
173 |   string(4) "null"
174 | }
175 | object(Parle\Token)#%d (2) {
176 |   ["id"]=>
177 |   int(43)
178 |   ["value"]=>
179 |   string(1) "}"
180 | }
181 | ==DONE==
182 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/serialise.hpp:
--------------------------------------------------------------------------------
  1 | // serialise.hpp
  2 | // Copyright (c) 2007-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_SERIALISE_HPP
  7 | #define LEXERTL_SERIALISE_HPP
  8 | 
  9 | #include "runtime_error.hpp"
 10 | #include "state_machine.hpp"
 11 | 
 12 | namespace lexertl
 13 | {
 14 |     namespace detail
 15 |     {
 16 |         template<typename char_type, typename id_type, class stream>
 17 |         void output_vec(const std::vector<id_type>& vec_, stream& stream_)
 18 |         {
 19 |             std::basic_ostringstream<char_type> ss_;
 20 |             std::basic_string<char_type> str_;
 21 |             std::size_t line_len_ = 0;
 22 | 
 23 |             stream_ << vec_.size() << '\n';
 24 | 
 25 |             for (const id_type l_ : vec_)
 26 |             {
 27 |                 ss_ << l_;
 28 |                 str_ = ss_.str();
 29 | 
 30 |                 if (line_len_ + str_.size() + 1 > 80)
 31 |                 {
 32 |                     stream_ << '\n' << str_ << ' ';
 33 |                     line_len_ = str_.size() + 1;
 34 |                 }
 35 |                 else
 36 |                 {
 37 |                     stream_ << str_ << ' ';
 38 |                     line_len_ += str_.size() + 1;
 39 |                 }
 40 | 
 41 |                 ss_.str("");
 42 |             }
 43 | 
 44 |             stream_ << '\n';
 45 |         }
 46 | 
 47 |         template<typename char_type, class stream, typename id_type>
 48 |         void input_vec(stream& stream_, std::vector<id_type>& vec_)
 49 |         {
 50 |             std::size_t num_ = 0;
 51 | 
 52 |             stream_>> num_;
 53 |             vec_.reserve(num_);
 54 | 
 55 |             for (std::size_t idx_ = 0; idx_ < num_; ++idx_)
 56 |             {
 57 |                 std::size_t id_ = 0;
 58 | 
 59 |                 stream_ >> id_;
 60 |                 vec_.push_back(static_cast<id_type>(id_));
 61 |             }
 62 |         }
 63 |     }
 64 | 
 65 |     template<typename char_type, typename id_type, class stream>
 66 |     void save(const basic_state_machine<char_type, id_type>& sm_,
 67 |         stream& stream_)
 68 |     {
 69 |         using internals = detail::basic_internals<id_type>;
 70 |         const internals& internals_ = sm_.data();
 71 | 
 72 |         // Version number
 73 |         stream_ << 1 << '\n';
 74 |         stream_ << sizeof(char_type) << '\n';
 75 |         stream_ << sizeof(id_type) << '\n';
 76 |         stream_ << internals_._eoi << '\n';
 77 |         stream_ << internals_._lookup.size() << '\n';
 78 | 
 79 |         for (const auto& vec_ : internals_._lookup)
 80 |         {
 81 |             detail::output_vec<char_type>(vec_, stream_);
 82 |         }
 83 | 
 84 |         detail::output_vec<char_type>(internals_._dfa_alphabet, stream_);
 85 |         stream_ << internals_._features << '\n';
 86 |         stream_ << internals_._dfa.size() << '\n';
 87 | 
 88 |         for (const auto& vec_ : internals_._dfa)
 89 |         {
 90 |             detail::output_vec<char_type>(vec_, stream_);
 91 |         }
 92 |     }
 93 | 
 94 |     template<typename char_type, typename id_type, class stream>
 95 |     void load(stream& stream_, basic_state_machine<char_type, id_type>& sm_)
 96 |     {
 97 |         using internals = detail::basic_internals<id_type>;
 98 |         internals& internals_ = sm_.data();
 99 |         std::size_t num_ = 0;
100 | 
101 |         internals_.clear();
102 |         // Version
103 |         stream_ >> num_;
104 |         // sizeof(char_type)
105 |         stream_ >> num_;
106 | 
107 |         if (num_ != sizeof(char_type))
108 |             throw runtime_error("char_type mismatch in lexertl::load()");
109 | 
110 |         // sizeof(id_type)
111 |         stream_ >> num_;
112 | 
113 |         if (num_ != sizeof(id_type))
114 |             throw runtime_error("id_type mismatch in lexertl::load()");
115 | 
116 |         stream_ >> internals_._eoi;
117 |         stream_ >> num_;
118 |         internals_._lookup.reserve(num_);
119 | 
120 |         for (std::size_t idx_ = 0; idx_ < num_; ++idx_)
121 |         {
122 |             internals_._lookup.emplace_back();
123 |             detail::input_vec<char_type>(stream_, internals_._lookup.back());
124 |         }
125 | 
126 |         detail::input_vec<char_type>(stream_, internals_._dfa_alphabet);
127 |         stream_ >> internals_._features;
128 |         stream_ >> num_;
129 |         internals_._dfa.reserve(num_);
130 | 
131 |         for (std::size_t idx_ = 0; idx_ < num_; ++idx_)
132 |         {
133 |             internals_._dfa.emplace_back();
134 |             detail::input_vec<char_type>(stream_, internals_._dfa.back());
135 |         }
136 |     }
137 | }
138 | 
139 | #endif
140 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/serialise.hpp:
--------------------------------------------------------------------------------
  1 | // serialise.hpp
  2 | // Copyright (c) 2007-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_SERIALISE_HPP
  7 | #define PARSERTL_SERIALISE_HPP
  8 | 
  9 | #include "runtime_error.hpp"
 10 | #include "../../../lexertl14/include/lexertl/serialise.hpp"
 11 | #include "state_machine.hpp"
 12 | 
 13 | namespace parsertl
 14 | {
 15 |     template <typename id_type, class stream>
 16 |     void save(const basic_state_machine<id_type>& sm_, stream& stream_)
 17 |     {
 18 |         // Version number
 19 |         stream_ << 1 << '\n';
 20 |         stream_ << sizeof(id_type) << '\n';
 21 |         stream_ << sm_._columns << '\n';
 22 |         stream_ << sm_._rows << '\n';
 23 |         stream_ << sm_._rules.size() << '\n';
 24 | 
 25 |         for (const auto& rule_ : sm_._rules)
 26 |         {
 27 |             stream_ << rule_.first << '\n';
 28 |             lexertl::detail::output_vec<char>(rule_.second, stream_);
 29 |         }
 30 | 
 31 |         stream_ << sm_._captures.size() << '\n';
 32 | 
 33 |         for (const auto& capture_ : sm_._captures)
 34 |         {
 35 |             stream_ << capture_.first << '\n';
 36 |             stream_ << capture_.second.size() << '\n';
 37 | 
 38 |             for (const auto& pair_ : capture_.second)
 39 |             {
 40 |                 stream_ << pair_.first << ' ' << pair_.second << '\n';
 41 |             }
 42 |         }
 43 | 
 44 |         stream_ << sm_._table.size() << '\n';
 45 | 
 46 |         for (const auto& vec_ : sm_._table)
 47 |         {
 48 |             stream_ << vec_.size() << '\n';
 49 | 
 50 |             for (const auto& pair_ : vec_)
 51 |             {
 52 |                 stream_ << pair_.first << ' ';
 53 |                 stream_ << static_cast<std::size_t>(pair_.second.action) << ' ';
 54 |                 stream_ << pair_.second.param << '\n';
 55 |             }
 56 |         }
 57 |     }
 58 | 
 59 |     template <class stream, typename id_type>
 60 |     void load(stream& stream_, basic_state_machine<id_type>& sm_)
 61 |     {
 62 |         std::size_t num_ = 0;
 63 | 
 64 |         sm_.clear();
 65 |         // Version
 66 |         stream_ >> num_;
 67 |         // sizeof(id_type)
 68 |         stream_ >> num_;
 69 | 
 70 |         if (num_ != sizeof(id_type))
 71 |             throw runtime_error("id_type mismatch in parsertl::load()");
 72 | 
 73 |         stream_ >> sm_._columns;
 74 |         stream_ >> sm_._rows;
 75 |         stream_ >> num_;
 76 |         sm_._rules.reserve(num_);
 77 | 
 78 |         for (std::size_t idx_ = 0; idx_ < num_; ++idx_)
 79 |         {
 80 |             sm_._rules.emplace_back();
 81 | 
 82 |             auto& rule_ = sm_._rules.back();
 83 | 
 84 |             stream_ >> rule_.first;
 85 |             lexertl::detail::input_vec<char>(stream_, rule_.second);
 86 |         }
 87 | 
 88 |         stream_ >> num_;
 89 |         sm_._captures.reserve(num_);
 90 | 
 91 |         for (std::size_t idx_ = 0, rows_ = num_; idx_ < rows_; ++idx_)
 92 |         {
 93 |             sm_._captures.emplace_back();
 94 | 
 95 |             auto& capture_ = sm_._captures.back();
 96 | 
 97 |             stream_ >> capture_.first;
 98 |             stream_ >> num_;
 99 |             capture_.second.reserve(num_);
100 | 
101 |             for (std::size_t idx2_ = 0, entries_ = num_;
102 |                 idx2_ < entries_; ++idx2_)
103 |             {
104 |                 capture_.second.emplace_back();
105 | 
106 |                 auto& pair_ = capture_.second.back();
107 | 
108 |                 stream_ >> num_;
109 |                 pair_.first = static_cast<id_type>(num_);
110 |                 stream_ >> num_;
111 |                 pair_.second = static_cast<id_type>(num_);
112 |             }
113 |         }
114 | 
115 |         stream_ >> num_;
116 |         sm_._table.reserve(num_);
117 | 
118 |         for (std::size_t idx_ = 0, rows_ = num_; idx_ < rows_; ++idx_)
119 |         {
120 |             sm_._table.emplace_back();
121 | 
122 |             auto& vec_ = sm_._table.back();
123 | 
124 |             stream_ >> num_;
125 |             vec_.reserve(num_);
126 | 
127 |             for (std::size_t idx2_ = 0, entries_ = num_;
128 |                 idx2_ < entries_; ++idx2_)
129 |             {
130 |                 vec_.emplace_back();
131 | 
132 |                 auto& pair_ = vec_.back();
133 | 
134 |                 stream_ >> num_;
135 |                 pair_.first = static_cast<id_type>(num_);
136 |                 stream_ >> num_;
137 |                 pair_.second.action = static_cast<action>(num_);
138 |                 stream_ >> num_;
139 |                 pair_.second.param = static_cast<id_type>(num_);
140 |             }
141 |         }
142 |     }
143 | }
144 | 
145 | #endif
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | Parle provides lexing and parsing facilities for PHP
  3 | =============================================
  4 | Lexing and parsing is used widely in the PHP core and extensions. Usually such a functionality is packed into a piece of C/C++ and depends on tools like [flex](http://flex.sourceforge.net/), [re2c](http://re2c.org/), [Bison](http://www.gnu.org/software/bison/), [LEMON](http://www.hwaci.com/sw/lemon/) or similar. With Parle, it is possible to implement lexing and parsing in PHP while relying on features and principles of the parser/lexer generator tools for C/C++. The Lexer and Parser classes are there in the Parle namespace.
  5 | The implementation bases on the work of [Ben Hanson](http://www.benhanson.net/)
  6 | 
  7 | - https://github.com/BenHanson/lexertl14
  8 | - https://github.com/BenHanson/parsertl14
  9 | 
 10 | The lexer is based on the pattern matching similar to flex. The parser is LALR(1).
 11 | 
 12 | Supported is PHP 7.4 and above. A [C++14](http://en.cppreference.com/w/cpp/compiler_support) capable compiler is required. As of version 0.7.3 parle can optionally be compiled with internal UTF-32 support, making it possible to use Unicode character classes in patterns.
 13 | 
 14 | The full extension documentation is available in the [PHP Manual](http://php.net/parle).
 15 | 
 16 | Installation
 17 | ============
 18 | 
 19 | Read the [INSTALL.md](./INSTALL.md) documentation.
 20 | 
 21 | 
 22 | Example tokenizing comma separated integer list
 23 | ============================================
 24 | ```php
 25 | 
 26 | use Parle\Token;
 27 | use Parle\Lexer;
 28 | use Parle\LexerException;
 29 | 
 30 | /* name => id */
 31 | $token = array(
 32 |         "COMMA" => 1,
 33 |         "CRLF" => 2,
 34 |         "DECIMAL" => 3,
 35 | );
 36 | /* id => name */
 37 | $tokenIdToName = array_flip($token);
 38 | 
 39 | $lex = new Lexer;
 40 | $lex->push("[\x2c]", $token["COMMA"]);
 41 | $lex->push("[\r][\n]", $token["CRLF"]);
 42 | $lex->push("[\d]+", $token["DECIMAL"]);
 43 | $lex->build();
 44 | 
 45 | $in = "0,1,2\r\n3,42,5\r\n6,77,8\r\n";
 46 | 
 47 | $lex->consume($in);
 48 | 
 49 | do {
 50 |         $lex->advance();
 51 |         $tok = $lex->getToken();
 52 | 
 53 |         if (Token::UNKNOWN == $tok->id) {
 54 |                 throw new LexerException("Unknown token '{$tok->value}' at offset {$lex->marker}.");
 55 |         }
 56 | 
 57 |         echo "TOKEN: ", $tokenIdToName[$tok->id], PHP_EOL;
 58 | } while (Token::EOI != $tok->id);
 59 | 
 60 | ```
 61 | 
 62 | 
 63 | Example parsing comma separated number list
 64 | ===========================
 65 | ```php
 66 | 
 67 | use Parle\Lexer;
 68 | use Parle\Parser;
 69 | use Parle\ParserException;
 70 | 
 71 | $p = new Parser;
 72 | $p->token("CRLF");
 73 | $p->token("COMMA");
 74 | $p->token("INTEGER");
 75 | $p->token("'\"'");
 76 | $p->push("START", "RECORDS");
 77 | $prod_record_0 = $p->push("RECORDS", "RECORD CRLF");
 78 | $prod_record_1 = $p->push("RECORDS", "RECORDS RECORD CRLF");
 79 | $prod_int_0 = $p->push("RECORD", "INTEGER");
 80 | $prod_int_1 = $p->push("RECORD", "RECORD COMMA INTEGER");
 81 | $p->push("DECIMAL", "INTEGER COMMA INTEGER"); /* Production index unused. */
 82 | $prod_dec_0 = $p->push("RECORD", "'\"' DECIMAL '\"'");
 83 | $prod_dec_1 = $p->push("RECORD", "RECORD COMMA '\"' DECIMAL '\"'");
 84 | $p->build();
 85 | 
 86 | $lex = new Lexer;
 87 | $lex->push("[\x2c]", $p->tokenId("COMMA"));
 88 | $lex->push("[\r][\n]", $p->tokenId("CRLF"));
 89 | $lex->push("[\d]+", $p->tokenId("INTEGER"));
 90 | $lex->push("[\x22]", $p->tokenId("'\"'"));
 91 | $lex->build();
 92 | 
 93 | /* Specifically using comma as both list separator and as a decimal mark. */
 94 | $in = "000,111,222\r\n\"333,3\",444,555\r\n666,777,\"888,8\"\r\n";
 95 | 
 96 | $p->consume($in, $lex);
 97 | 
 98 | do {
 99 | 	switch ($p->action) {
100 | 		case Parser::ACTION_ERROR:
101 | 			$err = $p->errorInfo();
102 | 			if (Parser::ERROR_UNKNOWN_TOKEN == $err->id) {
103 | 				$tok = $err->token;
104 | 				$msg = "Unknown token '{$tok->value}' at offset {$err->position}";
105 | 			} else if (Parser::ERROR_NON_ASSOCIATIVE == $err->id) {
106 | 				$tok = $err->token;
107 | 				$msg = "Token '{$tok->id}' at offset {$lex->marker} is not associative";
108 | 			} else if (Parser::ERROR_SYNTAX == $err->id) {
109 | 				$tok = $err->token;
110 | 				$msg = "Syntax error at offset {$lex->marker}";
111 | 			} else {
112 | 				$msg = "Parse error";
113 | 			}
114 | 			throw new ParserException($msg);
115 | 			break;
116 | 		case Parser::ACTION_SHIFT:
117 | 		case Parser::ACTION_GOTO:
118 | 		case Parser::ACTION_ACCEPT:
119 | 			continue;
120 | 			break;
121 | 		case Parser::ACTION_REDUCE:
122 | 			switch ($p->reduceId) {
123 | 				case $prod_int_0:
124 | 					/* INTEGER */
125 | 					echo $p->sigil(), PHP_EOL;
126 | 					break;
127 | 				case $prod_int_1:
128 | 					/* RECORD COMMA INTEGER */
129 | 					echo $p->sigil(2), PHP_EOL;
130 | 					break;
131 | 				case $prod_dec_0:
132 | 					/* '\"' DECIMAL '\"' */
133 | 					echo $p->sigil(1), PHP_EOL;
134 | 					break;
135 | 				case $prod_dec_1:
136 | 					/* RECORD COMMA '\"' DECIMAL '\"' */
137 | 					echo $p->sigil(3), PHP_EOL;
138 | 					break;
139 | 				case $prod_record_0:
140 | 				case $prod_record_1:
141 | 					echo "=====", PHP_EOL;
142 | 					break;
143 | 			}
144 | 			break;
145 | 	}
146 | 	$p->advance();
147 | } while (Parser::ACTION_ACCEPT != $p->action);
148 | 
149 | ```
150 | 
151 | 


--------------------------------------------------------------------------------
/bench/parse_str.impl.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Parle;
  4 | 
  5 | use Parle\{Parser, ParserException, Lexer, Token, Stack};
  6 | 
  7 | /* As in the PHP manual, strings like first=value&arr[]=foo+bar&arr[]=baz */
  8 | /* Native urldecode, array and some other helper implementations used. */
  9 | 
 10 | class ParseStrParser extends Parser
 11 | {
 12 | 	protected $lex;
 13 | 	protected $stack;
 14 | 	protected $tokenNameToId = array();
 15 | 	protected $tokenIdToName = array();
 16 | 	protected $prodHandler = array();
 17 | 	protected $result = array();
 18 | 	protected $debug = false;
 19 | 
 20 | 	public function __construct(Lexer $lex, bool $debug = false)
 21 | 	{
 22 | 		$this->lex = $lex;
 23 | 		$this->stack = new Stack;
 24 | 		$this->debug = $debug;
 25 | 	}
 26 | 
 27 | 	public function init()
 28 | 	{
 29 | 		$this->terminal("left", "'='", "[=]");
 30 | 		$this->terminal("token", "']'", "[\]]");
 31 | 		$this->terminal("right", "'['", "[\[]");
 32 | 		$this->terminal("left", "'&'", "[&]");
 33 | 		$this->terminal("token", "T_STR", "[^=\[\]&\s]+");
 34 | 
 35 | 		$this->production("START", "PAIRS");
 36 | 		$this->production("PAIRS", "PAIR");
 37 | 		$this->production("PAIRS", "PAIRS '&' PAIR");
 38 | 		$this->production("VALUE", "");
 39 | 		$this->production("VALUE", "T_STR");
 40 | 		$this->production("ARRKEY", "", "handleEmptyDimensionKey");
 41 | 		$this->production("ARRKEY", "T_STR", "handleDimensionKey");
 42 | 		$this->production("ARRDIM", "'[' ARRKEY ']'");
 43 | 		$this->production("ARRDIM", "'[' ARRKEY ']' ARRDIM");
 44 | 		$this->production("PAIR", "T_STR ARRDIM '=' VALUE", "handleArray");
 45 | 		$this->production("PAIR", "T_STR '=' VALUE", "handleScalar");
 46 | 
 47 | 		$this->build();
 48 | 		$this->lex->build();
 49 | 	}
 50 | 
 51 | 	protected function terminal(string $assoc, string $sym, string $reg)
 52 | 	{
 53 | 		switch ($assoc) {
 54 | 		default:
 55 | 			throw new ParserException("Unknown associativity '$assoc'.");
 56 | 		case "left":
 57 | 			$this->left($sym);
 58 | 			break;
 59 | 		case "right":
 60 | 			$this->right($sym);
 61 | 			break;
 62 | 		case "token":
 63 | 			$this->token($sym);
 64 | 			break;
 65 | 		case "nonassoc":
 66 | 			$this->nonassoc($sym);
 67 | 			break;
 68 | 		}
 69 | 
 70 | 		$id = $this->tokenId($sym);
 71 | 		$this->lex->push($reg, $id);
 72 | 
 73 | 		$this->tokenNameToId[$sym] = $id;
 74 | 		$this->tokenIdToName[$id] = $sym;
 75 | 	}
 76 | 
 77 | 	protected function production(string $name, string $rule, $handler = NULL)
 78 | 	{
 79 | 		$id = $this->push($name, $rule);
 80 | 		if ($handler) {
 81 | 			$this->prodHandler[$id] = array($this, $handler);
 82 | 		}
 83 | 	}
 84 | 
 85 | 	private function handleEmptyDimensionKey()
 86 | 	{
 87 | 		$this->stack->push(NULL);
 88 | 	}
 89 | 
 90 | 	private function handleDimensionKey()
 91 | 	{
 92 | 		$this->stack->push($this->sigil());
 93 | 	}
 94 | 
 95 | 	private function handleScalar()
 96 | 	{
 97 | 		$name = $this->sigil();
 98 | 		$val = $this->sigil(2);
 99 | 		$this->result[$name] = urldecode($val);
100 | 	}
101 | 
102 | 	private function handleArray()
103 | 	{
104 | 		$name = $this->sigil();
105 | 		$val = $this->sigil(3);
106 | 
107 | 		// create top array element
108 | 		$k = $this->stack->top;
109 | 		$tmp = array();
110 | 		if ($k) {
111 | 			$tmp[$k] = urldecode($val);
112 | 		} else {
113 | 			$tmp[] = urldecode($val);
114 | 		}
115 | 		$this->stack->pop();
116 | 
117 | 		// check if there are more dimensions
118 | 		while (!$this->stack->empty) {
119 | 			$k = $this->stack->top;
120 | 			$tmp2 = array();
121 | 			if ($k) {
122 | 				$tmp2[$k] = $tmp;
123 | 			} else {
124 | 				$tmp2[] = $tmp;
125 | 			}
126 | 			$this->stack->pop();
127 | 			$tmp = $tmp2;
128 | 		}
129 | 		if (!array_key_exists($name, $this->result)) {
130 | 			$this->result[$name] = array();
131 | 		}
132 | 		$this->result[$name] = array_merge_recursive($this->result[$name], $tmp);
133 | 	}
134 | 
135 | 	public function parse($in)
136 | 	{
137 | 		$this->result = array();
138 | 		$this->stack = new Stack;
139 | 
140 | 		$this->consume($in, $this->lex);
141 | 
142 | 		while (Parser::ACTION_ACCEPT != $this->action) {
143 | 			switch ($this->action) {
144 | 				case Parser::ACTION_ERROR:
145 | 					$i = $this->errorInfo();
146 | 					switch ($i->id) {
147 | 						case Parser::ERROR_SYNTAX:
148 | 							throw new ParserException("Syntax error at " . $i->position);
149 | 						case Parser::ERROR_NON_ASSOCIATIVE:
150 | 							throw new ParserException("Token " . $this->tokenIdToName[$i->token->id] . "is not associative");
151 | 						case Parser::ERROR_UNKNOWN_TOKEN:
152 | 							throw new ParserException("Unknown token '" . $i->token->value . "' at " . $i->position);
153 | 					}
154 | 					break;
155 | 				case Parser::ACTION_SHIFT:
156 | 				case Parser::ACTION_GOTO:
157 | 					if ($this->debug) {
158 | 						echo $this->trace(), PHP_EOL;
159 | 					}
160 | 					break;
161 | 				case Parser::ACTION_REDUCE:
162 | 					if ($this->debug) {
163 | 						echo $this->trace(), PHP_EOL;
164 | 					}
165 | 					if (array_key_exists($this->reduceId, $this->prodHandler)) {
166 | 						if ($this->debug) {
167 | 							echo "calling ", $this->prodHandler[$this->reduceId][1], PHP_EOL;
168 | 						}
169 | 						call_user_func($this->prodHandler[$this->reduceId]);
170 | 					}
171 | 					break;
172 | 			}
173 | 			$this->advance();
174 | 		}
175 | 
176 | 		return $this->result;
177 | 	}
178 | }
179 | 
180 | function parse_str(string $in, array &$result = array())
181 | {
182 | 	$p = new ParseStrParser(new Lexer);
183 | 
184 | 	$p->init();
185 | 
186 | 	$result = $p->parse($in);
187 | }
188 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/ebnf_tables.hpp:
--------------------------------------------------------------------------------
  1 | // ebnf_tables.hpp
  2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_EBNF_TABLES_HPP
  7 | #define PARSERTL_EBNF_TABLES_HPP
  8 | 
  9 | #include <cstdint>
 10 | #include <vector>
 11 | 
 12 | namespace parsertl
 13 | {
 14 |     struct ebnf_tables
 15 |     {
 16 |         enum class yyconsts
 17 |         {
 18 |             YYFINAL = 16,
 19 |             YYLAST = 32,
 20 |             YYNTOKENS = 18,
 21 |             YYPACT_NINF = -4,
 22 |             YYTABLE_NINF = -1
 23 |         };
 24 | 
 25 |         enum class yytokentype
 26 |         {
 27 |             EMPTY = 258,
 28 |             IDENTIFIER = 259,
 29 |             PREC = 260,
 30 |             TERMINAL = 261
 31 |         };
 32 | 
 33 |         const std::vector<uint8_t> yytranslate =
 34 |         {
 35 |                0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 36 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 37 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 38 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 39 |               16,    17,    13,    15,     2,    14,     2,     2,     2,     2,
 40 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 41 |                2,     2,     2,    10,     2,     2,     2,     2,     2,     2,
 42 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 43 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 44 |                2,     8,     2,     9,     2,     2,     2,     2,     2,     2,
 45 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 46 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 47 |                2,     2,     2,    11,     7,    12,     2,     2,     2,     2,
 48 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 49 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 50 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 51 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 52 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 53 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 54 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 55 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 56 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 57 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 58 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 59 |                2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
 60 |                2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
 61 |                5,     6
 62 |         };
 63 |         const std::vector<uint8_t> yyr1 =
 64 |         {
 65 |                0,    18,    19,    20,    20,    21,    22,    22,    22,    23,
 66 |               23,    24,    24,    24,    24,    24,    24,    24,    24,    24,
 67 |               25,    25,    25
 68 |         };
 69 |         const std::vector<uint8_t> yyr2
 70 |         {
 71 |                0,     2,     1,     1,     3,     2,     0,     1,     1,     1,
 72 |                2,     1,     1,     3,     2,     3,     2,     4,     2,     3,
 73 |                0,     2,     2
 74 |         };
 75 |         const std::vector<uint8_t> yydefact =
 76 |         {
 77 |                6,     7,    11,    12,     6,     6,     6,     0,     2,     3,
 78 |               20,     8,     9,     0,     0,     0,     1,     6,     0,     5,
 79 |               10,    14,    16,    18,    13,    15,    19,     4,    21,    22,
 80 |               17
 81 |         };
 82 |         const std::vector<int8_t> yydefgoto =
 83 |         {
 84 |               -1,     7,     8,     9,    10,    11,    12,    19
 85 |         };
 86 |         const std::vector<int8_t> yypact =
 87 |         {
 88 |               -3,    -4,    -4,    -4,    -3,    -3,    -3,    19,    18,    -4,
 89 |               22,    -2,     5,     3,     4,     0,    -4,    -3,    20,    -4,
 90 |                5,    -4,    -4,    -4,    -4,    14,    -4,    -4,    -4,    -4,
 91 |               -4
 92 |         };
 93 |         const std::vector<int8_t> yypgoto =
 94 |         {
 95 |               -4,    -4,    17,    12,    -4,    -4,    21,    -4
 96 |         };
 97 |         const std::vector<uint8_t> yytable =
 98 |         {
 99 |                1,     2,     2,     3,     3,     4,     4,    17,     5,     5,
100 |               17,    17,    24,     6,     6,    21,    25,    26,    22,    16,
101 |               23,    13,    14,    15,    28,    17,    29,    18,    30,    27,
102 |                0,     0,    20
103 |         };
104 |         const std::vector<int8_t> yycheck =
105 |         {
106 |                3,     4,     4,     6,     6,     8,     8,     7,    11,    11,
107 |                7,     7,     9,    16,    16,    10,    12,    17,    13,     0,
108 |               15,     4,     5,     6,     4,     7,     6,     5,    14,    17,
109 |               -1,    -1,    11
110 |         };
111 |     };
112 | }
113 | 
114 | #endif
115 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/match_results.hpp:
--------------------------------------------------------------------------------
  1 | // match_results.hpp
  2 | // Copyright (c) 2015-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_MATCH_RESULTS_HPP
  7 | #define LEXERTL_MATCH_RESULTS_HPP
  8 | 
  9 | #include "char_traits.hpp"
 10 | #include "enum_operator.hpp"
 11 | #include "enums.hpp"
 12 | #include <iterator>
 13 | #include <stack>
 14 | #include <string>
 15 | 
 16 | namespace lexertl
 17 | {
 18 |     template<typename iter, typename id_t = uint16_t,
 19 |         std::size_t flags = +feature_bit::bol | +feature_bit::eol |
 20 |         +feature_bit::skip | +feature_bit::again | +feature_bit::multi_state |
 21 |         +feature_bit::advance>
 22 |     struct match_results
 23 |     {
 24 |         using id_type = id_t;
 25 |         using iter_type = iter;
 26 |         using char_type = typename std::iterator_traits<iter_type>::value_type;
 27 |         using index_type = typename basic_char_traits<char_type>::index_type;
 28 |         using string = std::basic_string<char_type>;
 29 | 
 30 |         id_type id = 0;
 31 |         id_type user_id = npos();
 32 |         iter_type first = iter_type();
 33 |         iter_type second = iter_type();
 34 |         iter_type eoi = iter_type();
 35 |         bool bol = true;
 36 |         id_type state = 0;
 37 | 
 38 |         match_results() = default;
 39 | 
 40 |         match_results(const iter_type& start_, const iter_type& end_,
 41 |             const bool bol_ = true, const id_type state_ = 0) :
 42 |             first(start_),
 43 |             second(start_),
 44 |             eoi(end_),
 45 |             bol(bol_),
 46 |             state(state_)
 47 |         {
 48 |         }
 49 | 
 50 |         virtual ~match_results() = default;
 51 | 
 52 |         string str() const
 53 |         {
 54 |             return string(first, second);
 55 |         }
 56 | 
 57 |         string substr(const std::size_t soffset_,
 58 |             const std::size_t eoffset_) const
 59 |         {
 60 |             return string(first + soffset_, second - eoffset_);
 61 |         }
 62 | 
 63 |         virtual void clear()
 64 |         {
 65 |             id = 0;
 66 |             user_id = npos();
 67 |             first = eoi;
 68 |             second = eoi;
 69 |             bol = true;
 70 |             state = 0;
 71 |         }
 72 | 
 73 |         virtual void reset(const iter_type& start_, const iter_type& end_)
 74 |         {
 75 |             id = 0;
 76 |             user_id = npos();
 77 |             first = start_;
 78 |             second = start_;
 79 |             eoi = end_;
 80 |             bol = true;
 81 |             state = 0;
 82 |         }
 83 | 
 84 |         std::size_t length() const
 85 |         {
 86 |             return second - first;
 87 |         }
 88 | 
 89 |         static id_type npos()
 90 |         {
 91 |             return static_cast<id_type>(~0);
 92 |         }
 93 | 
 94 |         static id_type skip()
 95 |         {
 96 |             return static_cast<id_type>(~1);
 97 |         }
 98 | 
 99 |         bool operator ==(const match_results& rhs_) const
100 |         {
101 |             return id == rhs_.id &&
102 |                 user_id == rhs_.user_id &&
103 |                 first == rhs_.first &&
104 |                 second == rhs_.second &&
105 |                 eoi == rhs_.eoi &&
106 |                 bol == rhs_.bol &&
107 |                 state == rhs_.state;
108 |         }
109 |     };
110 | 
111 |     template<typename iter, typename id_type = uint16_t,
112 |         std::size_t flags = +feature_bit::bol | +feature_bit::eol |
113 |         +feature_bit::skip | +feature_bit::again | +feature_bit::multi_state |
114 |         +feature_bit::recursive | +feature_bit::advance>
115 |     struct recursive_match_results :
116 |         public match_results<iter, id_type, flags>
117 |     {
118 |         using id_type_pair = std::pair<id_type, id_type>;
119 |         std::stack<id_type_pair> stack;
120 | 
121 |         recursive_match_results() :
122 |             match_results<iter, id_type, flags>()
123 |         {
124 |         }
125 | 
126 |         recursive_match_results(const iter& start_, const iter& end_,
127 |             const bool bol_ = true, const id_type state_ = 0) :
128 |             match_results<iter, id_type, flags>(start_, end_, bol_, state_)
129 |         {
130 |         }
131 | 
132 |         ~recursive_match_results() override = default;
133 | 
134 |         void clear() override
135 |         {
136 |             match_results<iter, id_type, flags>::clear();
137 | 
138 |             while (!stack.empty()) stack.pop();
139 |         }
140 | 
141 |         void reset(const iter& start_, const iter& end_) override
142 |         {
143 |             match_results<iter, id_type, flags>::reset(start_, end_);
144 | 
145 |             while (!stack.empty()) stack.pop();
146 |         }
147 |     };
148 | 
149 |     using smatch = match_results<std::string::const_iterator>;
150 |     using cmatch = match_results<const char*>;
151 |     using wsmatch = match_results<std::wstring::const_iterator>;
152 |     using wcmatch = match_results<const wchar_t*>;
153 |     using u32smatch = match_results<std::u32string::const_iterator>;
154 |     using u32cmatch = match_results<const char32_t*>;
155 | 
156 |     using srmatch =
157 |         recursive_match_results<std::string::const_iterator>;
158 |     using crmatch = recursive_match_results<const char*>;
159 |     using wsrmatch =
160 |         recursive_match_results<std::wstring::const_iterator>;
161 |     using wcrmatch = recursive_match_results<const wchar_t*>;
162 |     using u32srmatch =
163 |         recursive_match_results<std::u32string::const_iterator>;
164 |     using u32crmatch = recursive_match_results<const char32_t*>;
165 | }
166 | 
167 | #endif
168 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/lookup.hpp:
--------------------------------------------------------------------------------
  1 | // lookup.hpp
  2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_LOOKUP_HPP
  7 | #define PARSERTL_LOOKUP_HPP
  8 | 
  9 | #include "match_results.hpp"
 10 | #include "token.hpp"
 11 | 
 12 | namespace parsertl
 13 | {
 14 |     // parse sequence but do not keep track of productions
 15 |     template<typename lexer_iterator, typename sm_type>
 16 |     void lookup(lexer_iterator& iter_, const sm_type& sm_,
 17 |         basic_match_results<sm_type>& results_)
 18 |     {
 19 |         switch (results_.entry.action)
 20 |         {
 21 |         case action::shift:
 22 |             results_.stack.push_back(results_.entry.param);
 23 | 
 24 |             if (iter_->id != 0)
 25 |                 ++iter_;
 26 | 
 27 |             results_.token_id = iter_->id;
 28 | 
 29 |             if (results_.token_id == lexer_iterator::value_type::npos())
 30 |             {
 31 |                 results_.entry.action = action::error;
 32 |                 results_.entry.param = static_cast<typename sm_type::id_type>
 33 |                     (error_type::unknown_token);
 34 |             }
 35 |             else
 36 |             {
 37 |                 results_.entry =
 38 |                     sm_.at(results_.entry.param, results_.token_id);
 39 |             }
 40 | 
 41 |             break;
 42 |         case action::reduce:
 43 |         {
 44 |             const std::size_t size_ =
 45 |                 sm_._rules[results_.entry.param].second.size();
 46 | 
 47 |             if (size_)
 48 |             {
 49 |                 results_.stack.resize(results_.stack.size() - size_);
 50 |             }
 51 | 
 52 |             results_.token_id = sm_._rules[results_.entry.param].first;
 53 |             results_.entry = sm_.at(results_.stack.back(), results_.token_id);
 54 |             break;
 55 |         }
 56 |         case action::go_to:
 57 |             results_.stack.push_back(results_.entry.param);
 58 |             results_.token_id = iter_->id;
 59 |             results_.entry = sm_.at(results_.stack.back(), results_.token_id);
 60 |             break;
 61 |         case action::accept:
 62 |         {
 63 |             const std::size_t size_ =
 64 |                 sm_._rules[results_.entry.param].second.size();
 65 | 
 66 |             if (size_)
 67 |             {
 68 |                 results_.stack.resize(results_.stack.size() - size_);
 69 |             }
 70 | 
 71 |             break;
 72 |         }
 73 |         default:
 74 |             // action::error
 75 |             break;
 76 |         }
 77 |     }
 78 | 
 79 |     // Parse sequence and maintain production vector
 80 |     template<typename lexer_iterator, typename sm_type, typename token_vector>
 81 |     void lookup(lexer_iterator& iter_, const sm_type& sm_,
 82 |         basic_match_results<sm_type>& results_, token_vector& productions_)
 83 |     {
 84 |         switch (results_.entry.action)
 85 |         {
 86 |         case action::shift:
 87 |             results_.stack.push_back(results_.entry.param);
 88 |             productions_.emplace_back(iter_->id, iter_->first, iter_->second);
 89 | 
 90 |             if (iter_->id != 0)
 91 |                 ++iter_;
 92 | 
 93 |             results_.token_id = iter_->id;
 94 | 
 95 |             if (results_.token_id == lexer_iterator::value_type::npos())
 96 |             {
 97 |                 results_.entry.action = action::error;
 98 |                 results_.entry.param = static_cast<typename sm_type::id_type>
 99 |                     (error_type::unknown_token);
100 |             }
101 |             else
102 |             {
103 |                 results_.entry =
104 |                     sm_.at(results_.entry.param, results_.token_id);
105 |             }
106 | 
107 |             break;
108 |         case action::reduce:
109 |         {
110 |             const std::size_t size_ =
111 |                 sm_._rules[results_.entry.param].second.size();
112 |             typename token_vector::value_type token_;
113 | 
114 |             if (size_)
115 |             {
116 |                 results_.stack.resize(results_.stack.size() - size_);
117 |                 token_.first = (productions_.end() - size_)->first;
118 |                 token_.second = productions_.back().second;
119 |                 productions_.resize(productions_.size() - size_);
120 |             }
121 |             else
122 |             {
123 |                 if (productions_.empty())
124 |                 {
125 |                     token_.first = token_.second = iter_->first;
126 |                 }
127 |                 else
128 |                 {
129 |                     token_.first = token_.second = productions_.back().second;
130 |                 }
131 |             }
132 | 
133 |             results_.token_id = sm_._rules[results_.entry.param].first;
134 |             results_.entry = sm_.at(results_.stack.back(), results_.token_id);
135 |             token_.id = results_.token_id;
136 |             productions_.push_back(token_);
137 |             break;
138 |         }
139 |         case action::go_to:
140 |             results_.stack.push_back(results_.entry.param);
141 |             results_.token_id = iter_->id;
142 |             results_.entry = sm_.at(results_.stack.back(), results_.token_id);
143 |             break;
144 |         case action::accept:
145 |         {
146 |             const std::size_t size_ =
147 |                 sm_._rules[results_.entry.param].second.size();
148 | 
149 |             if (size_)
150 |             {
151 |                 results_.stack.resize(results_.stack.size() - size_);
152 |             }
153 | 
154 |             break;
155 |         }
156 |         default:
157 |             // action::error
158 |             break;
159 |         }
160 |     }
161 | }
162 | 
163 | #endif
164 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/partition/equivset.hpp:
--------------------------------------------------------------------------------
  1 | // equivset.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_EQUIVSET_HPP
  7 | #define LEXERTL_EQUIVSET_HPP
  8 | 
  9 | #include <algorithm>
 10 | #include <iterator>
 11 | #include "../parser/tree/node.hpp"
 12 | #include <set>
 13 | 
 14 | namespace lexertl
 15 | {
 16 |     namespace detail
 17 |     {
 18 |         template<typename id_type>
 19 |         struct basic_equivset
 20 |         {
 21 |             using index_set = std::set<id_type>;
 22 |             using index_vector = std::vector<id_type>;
 23 |             using node = basic_node<id_type>;
 24 |             using node_vector = std::vector<observer_ptr<node>>;
 25 | 
 26 |             index_vector _index_vector;
 27 |             id_type _id = 0;
 28 |             bool _greedy = true;
 29 |             node_vector _followpos;
 30 | 
 31 |             basic_equivset() = default;
 32 | 
 33 |             basic_equivset(const index_set& index_set_, const id_type id_,
 34 |                 const bool greedy_, const node_vector& followpos_) :
 35 |                 _index_vector(index_set_.begin(), index_set_.end()),
 36 |                 _id(id_),
 37 |                 _greedy(greedy_),
 38 |                 _followpos(followpos_)
 39 |             {
 40 |             }
 41 | 
 42 |             bool empty() const
 43 |             {
 44 |                 return _index_vector.empty() && _followpos.empty();
 45 |             }
 46 | 
 47 |             void intersect(basic_equivset& rhs_, basic_equivset& overlap_)
 48 |             {
 49 |                 intersect_indexes(rhs_._index_vector, overlap_._index_vector);
 50 | 
 51 |                 if (!overlap_._index_vector.empty())
 52 |                 {
 53 |                     // Note that the LHS takes priority in order to
 54 |                     // respect rule ordering priority in the lex spec.
 55 |                     overlap_._id = _id;
 56 |                     process_greedy(rhs_, overlap_);
 57 |                     overlap_._followpos = _followpos;
 58 | 
 59 |                     auto overlap_begin_ = overlap_._followpos.cbegin();
 60 |                     auto overlap_end_ = overlap_._followpos.cend();
 61 | 
 62 |                     for (observer_ptr<node> node_ : rhs_._followpos)
 63 |                     {
 64 |                         if (std::find(overlap_begin_, overlap_end_, node_) ==
 65 |                             overlap_end_)
 66 |                         {
 67 |                             overlap_._followpos.push_back(node_);
 68 |                             overlap_begin_ = overlap_._followpos.begin();
 69 |                             overlap_end_ = overlap_._followpos.end();
 70 |                         }
 71 |                     }
 72 | 
 73 |                     if (_index_vector.empty())
 74 |                     {
 75 |                         _followpos.clear();
 76 |                     }
 77 | 
 78 |                     if (rhs_._index_vector.empty())
 79 |                     {
 80 |                         rhs_._followpos.clear();
 81 |                     }
 82 |                 }
 83 |             }
 84 | 
 85 |         private:
 86 |             void process_greedy(basic_equivset& rhs_, basic_equivset& overlap_)
 87 |             {
 88 |                 if (_greedy)
 89 |                     overlap_._greedy = true;
 90 |                 else
 91 |                 {
 92 |                     bool greedy_ = false;
 93 | 
 94 |                     for (const node* node_ : rhs_._followpos)
 95 |                     {
 96 |                         // If a 'hard greedy' transition is present,
 97 |                         // then respect that above all else.
 98 |                         if (node_->what_type() == node::node_type::LEAF &&
 99 |                             node_->greedy() && node_->set_greedy())
100 |                         {
101 |                             greedy_ = true;
102 |                             break;
103 |                         }
104 |                     }
105 | 
106 |                     overlap_._greedy = greedy_;
107 |                 }
108 |             }
109 | 
110 |             void intersect_indexes(index_vector& rhs_, index_vector& overlap_)
111 |             {
112 |                 std::set_intersection(_index_vector.begin(),
113 |                     _index_vector.end(), rhs_.begin(), rhs_.end(),
114 |                     std::back_inserter(overlap_));
115 | 
116 |                 if (!overlap_.empty())
117 |                 {
118 |                     remove(overlap_, _index_vector);
119 |                     remove(overlap_, rhs_);
120 |                 }
121 |             }
122 | 
123 |             void remove(const index_vector& source_, index_vector& dest_) const
124 |             {
125 |                 auto inter_ = source_.cbegin();
126 |                 auto inter_end_ = source_.cend();
127 |                 auto reader_ = std::find(dest_.begin(), dest_.end(), *inter_);
128 |                 auto writer_ = reader_;
129 |                 auto dest_end_ = dest_.end();
130 | 
131 |                 while (writer_ != dest_end_ && inter_ != inter_end_)
132 |                 {
133 |                     if (*reader_ == *inter_)
134 |                     {
135 |                         ++inter_;
136 |                         ++reader_;
137 |                     }
138 |                     else
139 |                     {
140 |                         *writer_++ = *reader_++;
141 |                     }
142 |                 }
143 | 
144 |                 while (reader_ != dest_end_)
145 |                 {
146 |                     *writer_++ = *reader_++;
147 |                 }
148 | 
149 |                 dest_.resize(dest_.size() - source_.size());
150 |             }
151 |         };
152 |     }
153 | }
154 | 
155 | #endif
156 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tokeniser/re_token.hpp:
--------------------------------------------------------------------------------
  1 | // re_token.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_RE_TOKEN_HPP
  7 | #define LEXERTL_RE_TOKEN_HPP
  8 | 
  9 | #include "../../string_token.hpp"
 10 | 
 11 | namespace lexertl
 12 | {
 13 |     namespace detail
 14 |     {
 15 |         // Note that tokens following END are never seen by parser.hpp.
 16 |         enum class token_type
 17 |         {
 18 |             BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
 19 |             DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
 20 |             ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
 21 |             END, DIFF
 22 |         };
 23 | 
 24 |         template<typename input_char_type, typename char_type>
 25 |         struct basic_re_token
 26 |         {
 27 |             using string_token = basic_string_token<char_type>;
 28 |             using string = std::basic_string<input_char_type>;
 29 | 
 30 |             token_type _type;
 31 |             string _extra;
 32 |             string_token _str;
 33 | 
 34 |             explicit basic_re_token(const token_type type_ =
 35 |                 token_type::BEGIN) :
 36 |                 _type(type_)
 37 |             {
 38 |             }
 39 | 
 40 |             void clear()
 41 |             {
 42 |                 _type = token_type::BEGIN;
 43 |                 _extra.clear();
 44 |                 _str.clear();
 45 |             }
 46 | 
 47 |             void swap(basic_re_token& rhs_) noexcept
 48 |             {
 49 |                 std::swap(_type, rhs_._type);
 50 |                 _extra.swap(rhs_._extra);
 51 |                 _str.swap(rhs_._str);
 52 |             }
 53 | 
 54 |             char precedence(const token_type type_) const
 55 |             {
 56 |                 // Moved in here for Solaris compiler.
 57 |                 static const char precedence_table_
 58 |                     [static_cast<int>(token_type::END) + 1]
 59 |                     [static_cast<int>(token_type::END) + 1] =
 60 |                 {
 61 |                     //        BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP,  | , CHR, BOL, EOL, MCR,  ( ,  ) ,  ? , ?? ,  * , *? ,  + , +?, {n}?, {n}, END
 62 |                     /*BEGIN*/{ ' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 63 |                     /*REGEX*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 64 |                     /*OREXP*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 65 |                     /* SEQ */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 66 |                     /* SUB */{ ' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 67 |                     /*EXPRE*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 68 |                     /* RPT */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>' },
 69 |                     /*DUPLI*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 70 |                     /*  |  */{ ' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' },
 71 |                     /*CHARA*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
 72 |                     /* BOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
 73 |                     /* EOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
 74 |                     /*MACRO*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
 75 |                     /*  (  */{ ' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' },
 76 |                     /*  )  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
 77 |                     /*  ?  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 78 |                     /* ??  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 79 |                     /*  *  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 80 |                     /* *?  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 81 |                     /*  +  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 82 |                     /* +?  */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 83 |                     /*{n,m}*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 84 |                     /*{nm}?*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
 85 |                     /* END */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }
 86 |                 };
 87 | 
 88 |                 return precedence_table_[static_cast<int>(_type)]
 89 |                     [static_cast<int>(type_)];
 90 |             }
 91 | 
 92 |             const char* precedence_string() const
 93 |             {
 94 |                 // Moved in here for Solaris compiler.
 95 |                 static const char* precedence_strings_
 96 |                     [static_cast<int>(token_type::END) + 1] =
 97 |                 {
 98 |                     "BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
 99 |                     "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO",
100 |                     "(", ")", "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}",
101 |                     "{n[,[m]]}?", "END"
102 |                 };
103 | 
104 |                 return precedence_strings_[static_cast<int>(_type)];
105 |             }
106 |         };
107 |     }
108 | }
109 | 
110 | #endif
111 | 


--------------------------------------------------------------------------------
/lib/parsertl14/include/parsertl/state_machine.hpp:
--------------------------------------------------------------------------------
  1 | // state_machine.hpp
  2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef PARSERTL_STATE_MACHINE_HPP
  7 | #define PARSERTL_STATE_MACHINE_HPP
  8 | 
  9 | #include <algorithm>
 10 | #include <cstdint>
 11 | #include "enums.hpp"
 12 | #include <vector>
 13 | 
 14 | namespace parsertl
 15 | {
 16 |     template<typename id_ty>
 17 |     struct base_state_machine
 18 |     {
 19 |         using id_type = id_ty;
 20 |         using id_type_pair = std::pair<id_type, id_type>;
 21 |         using capture_vector = std::vector<id_type_pair>;
 22 |         using capture = std::pair<std::size_t, capture_vector>;
 23 |         using captures_vector = std::vector<capture>;
 24 |         using id_type_vector = std::vector<id_type>;
 25 |         using id_type_vector_pair = std::pair<id_type, id_type_vector>;
 26 |         using rules = std::vector<id_type_vector_pair>;
 27 | 
 28 |         std::size_t _columns = 0;
 29 |         std::size_t _rows = 0;
 30 |         rules _rules;
 31 |         captures_vector _captures;
 32 | 
 33 |         // If you get a compile error here you have
 34 |         // failed to define an unsigned id type.
 35 |         static_assert(std::is_unsigned<id_type>::value,
 36 |             "Your id type is signed");
 37 | 
 38 |         struct entry
 39 |         {
 40 |             // Qualify action to prevent compilation error
 41 |             parsertl::action action;
 42 |             id_type param;
 43 | 
 44 |             entry() :
 45 |                 // Qualify action to prevent compilation error
 46 |                 action(parsertl::action::error),
 47 |                 param(static_cast<id_type>(error_type::syntax_error))
 48 |             {
 49 |             }
 50 | 
 51 |             // Qualify action to prevent compilation error
 52 |             entry(const parsertl::action action_, const id_type param_) :
 53 |                 action(action_),
 54 |                 param(param_)
 55 |             {
 56 |             }
 57 | 
 58 |             void clear() noexcept
 59 |             {
 60 |                 // Qualify action to prevent compilation error
 61 |                 action = parsertl::action::error;
 62 |                 param = static_cast<id_type>(error_type::syntax_error);
 63 |             }
 64 | 
 65 |             bool operator ==(const entry& rhs_) const
 66 |             {
 67 |                 return action == rhs_.action && param == rhs_.param;
 68 |             }
 69 |         };
 70 | 
 71 |         // No need to specify constructor.
 72 |         // Just in case someone wants to use a pointer to the base
 73 |         virtual ~base_state_machine() = default;
 74 | 
 75 |         virtual void clear() noexcept
 76 |         {
 77 |             _columns = _rows = 0;
 78 |             _rules.clear();
 79 |             _captures.clear();
 80 |         }
 81 |     };
 82 | 
 83 |     // Uses a vector of vectors for the state machine
 84 |     template<typename id_ty>
 85 |     struct basic_state_machine : base_state_machine<id_ty>
 86 |     {
 87 |         using base_sm = base_state_machine<id_ty>;
 88 |         using id_type = id_ty;
 89 |         using entry = typename base_sm::entry;
 90 |         using id_type_entry_pair = std::pair<id_type, entry>;
 91 |         using id_type_entry_pair_vec = std::vector<id_type_entry_pair>;
 92 |         using table = std::vector<id_type_entry_pair_vec>;
 93 | 
 94 |         table _table;
 95 | 
 96 |         // No need to specify constructor.
 97 |         ~basic_state_machine() override = default;
 98 | 
 99 |         void clear() noexcept override
100 |         {
101 |             base_sm::clear();
102 |             _table.clear();
103 |         }
104 | 
105 |         bool empty() const
106 |         {
107 |             return _table.empty();
108 |         }
109 | 
110 |         entry at(const std::size_t state_) const
111 |         {
112 |             const auto& s_ = _table[state_];
113 |             auto iter_ = std::find_if(s_.begin(), s_.end(),
114 |                 [](const auto& pair)
115 |                 {
116 |                     return pair.first == 0;
117 |                 });
118 | 
119 |             if (iter_ == s_.end())
120 |                 return entry();
121 |             else
122 |                 return iter_->second;
123 |         }
124 | 
125 |         entry at(const std::size_t state_, const std::size_t token_id_) const
126 |         {
127 |             const auto& s_ = _table[state_];
128 |             auto iter_ = std::find_if(s_.begin(), s_.end(),
129 |                 [token_id_](const auto& pair)
130 |                 {
131 |                     return pair.first == token_id_;
132 |                 });
133 | 
134 |             if (iter_ == s_.end())
135 |                 return entry();
136 |             else
137 |                 return iter_->second;
138 |         }
139 | 
140 |         void set(const std::size_t state_, const std::size_t token_id_,
141 |             const entry& entry_)
142 |         {
143 |             auto& s_ = _table[state_];
144 |             auto iter_ = std::find_if(s_.begin(), s_.end(),
145 |                 [token_id_](const auto& pair)
146 |                 {
147 |                     return pair.first == token_id_;
148 |                 });
149 | 
150 |             if (iter_ == s_.end())
151 |                 s_.emplace_back(static_cast<id_type>(token_id_), entry_);
152 |             else
153 |                 iter_->second = entry_;
154 |         }
155 | 
156 |         void push()
157 |         {
158 |             _table.resize(base_sm::_rows);
159 |         }
160 |     };
161 | 
162 |     // Uses uncompressed 2d array for state machine
163 |     template<typename id_ty>
164 |     struct basic_uncompressed_state_machine : base_state_machine<id_ty>
165 |     {
166 |         using base_sm = base_state_machine<id_ty>;
167 |         using id_type = id_ty;
168 |         using entry = typename base_sm::entry;
169 |         using table = std::vector<entry>;
170 | 
171 |         table _table;
172 | 
173 |         // No need to specify constructor.
174 |         ~basic_uncompressed_state_machine() override = default;
175 | 
176 |         void clear() noexcept override
177 |         {
178 |             base_sm::clear();
179 |             _table.clear();
180 |         }
181 | 
182 |         bool empty() const
183 |         {
184 |             return _table.empty();
185 |         }
186 | 
187 |         entry at(const std::size_t state_) const
188 |         {
189 |             return _table[state_ * base_sm::_columns];
190 |         }
191 | 
192 |         entry at(const std::size_t state_, const std::size_t token_id_) const
193 |         {
194 |             return _table[state_ * base_sm::_columns + token_id_];
195 |         }
196 | 
197 |         void set(const std::size_t state_, const std::size_t token_id_,
198 |             const entry& entry_)
199 |         {
200 |             _table[state_ * base_sm::_columns + token_id_] = entry_;
201 |         }
202 | 
203 |         void push()
204 |         {
205 |             _table.resize(base_sm::_columns * base_sm::_rows);
206 |         }
207 |     };
208 | 
209 |     using state_machine = basic_state_machine<uint16_t>;
210 |     using uncompressed_state_machine =
211 |         basic_uncompressed_state_machine<uint16_t>;
212 | }
213 | 
214 | #endif
215 | 


--------------------------------------------------------------------------------
/lib/lexertl14/include/lexertl/parser/tree/node.hpp:
--------------------------------------------------------------------------------
  1 | // node.hpp
  2 | // Copyright (c) 2005-2023 Ben Hanson (http://www.benhanson.net/)
  3 | //
  4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6 | #ifndef LEXERTL_NODE_HPP
  7 | #define LEXERTL_NODE_HPP
  8 | 
  9 | #include <assert.h>
 10 | #include <memory>
 11 | #include "../../observer_ptr.hpp"
 12 | #include "../../runtime_error.hpp"
 13 | #include <stack>
 14 | #include <vector>
 15 | 
 16 | namespace lexertl
 17 | {
 18 |     namespace detail
 19 |     {
 20 |         template<typename id_type>
 21 |         class basic_node
 22 |         {
 23 |         public:
 24 |             enum class node_type { LEAF, SEQUENCE, SELECTION, ITERATION, END };
 25 | 
 26 |             using bool_stack = std::stack<bool>;
 27 |             using node_stack = std::stack<observer_ptr<basic_node>>;
 28 |             using const_node_stack = std::stack<observer_ptr<const basic_node>>;
 29 |             using node_vector = std::vector<observer_ptr<basic_node>>;
 30 |             using node_ptr_vector = std::vector<std::unique_ptr<basic_node>>;
 31 | 
 32 |             basic_node() = default;
 33 | 
 34 |             explicit basic_node(const bool nullable_) :
 35 |                 _nullable(nullable_)
 36 |             {
 37 |             }
 38 | 
 39 |             basic_node(const basic_node&) = delete;
 40 |             const basic_node& operator =(const basic_node&) = delete;
 41 |             virtual ~basic_node() = default;
 42 | 
 43 |             static id_type null_token()
 44 |             {
 45 |                 return static_cast<id_type>(~0);
 46 |             }
 47 | 
 48 |             bool nullable() const
 49 |             {
 50 |                 return _nullable;
 51 |             }
 52 | 
 53 |             void append_firstpos(node_vector& firstpos_) const
 54 |             {
 55 |                 firstpos_.insert(firstpos_.end(),
 56 |                     _firstpos.begin(), _firstpos.end());
 57 |             }
 58 | 
 59 |             void append_lastpos(node_vector& lastpos_) const
 60 |             {
 61 |                 lastpos_.insert(lastpos_.end(),
 62 |                     _lastpos.begin(), _lastpos.end());
 63 |             }
 64 | 
 65 |             virtual void append_followpos(const node_vector&/*followpos_*/)
 66 |             {
 67 |                 throw runtime_error("Internal error node::append_followpos().");
 68 |             }
 69 | 
 70 |             observer_ptr<basic_node> copy
 71 |                 (node_ptr_vector& node_ptr_vector_) const
 72 |             {
 73 |                 observer_ptr<basic_node> new_root_ = nullptr;
 74 |                 const_node_stack node_stack_;
 75 |                 bool_stack perform_op_stack_;
 76 |                 bool down_ = true;
 77 |                 node_stack new_node_stack_;
 78 | 
 79 |                 node_stack_.push(this);
 80 | 
 81 |                 while (!node_stack_.empty())
 82 |                 {
 83 |                     while (down_)
 84 |                     {
 85 |                         down_ = node_stack_.top()->traverse(node_stack_,
 86 |                             perform_op_stack_);
 87 |                     }
 88 | 
 89 |                     while (!down_ && !node_stack_.empty())
 90 |                     {
 91 |                         observer_ptr<const basic_node> top_ = node_stack_.top();
 92 | 
 93 |                         top_->copy_node(node_ptr_vector_, new_node_stack_,
 94 |                             perform_op_stack_, down_);
 95 | 
 96 |                         if (!down_) node_stack_.pop();
 97 |                     }
 98 |                 }
 99 | 
100 |                 assert(new_node_stack_.size() == 1);
101 |                 new_root_ = new_node_stack_.top();
102 |                 new_node_stack_.pop();
103 |                 return new_root_;
104 |             }
105 | 
106 |             virtual node_type what_type() const = 0;
107 | 
108 |             virtual bool traverse(const_node_stack& node_stack_,
109 |                 bool_stack& perform_op_stack_) const = 0;
110 | 
111 |             node_vector& firstpos()
112 |             {
113 |                 return _firstpos;
114 |             }
115 | 
116 |             const node_vector& firstpos() const
117 |             {
118 |                 return _firstpos;
119 |             }
120 | 
121 |             // _lastpos modified externally, so not const &
122 |             node_vector& lastpos()
123 |             {
124 |                 return _lastpos;
125 |             }
126 | 
127 |             virtual bool end_state() const
128 |             {
129 |                 return false;
130 |             }
131 | 
132 |             virtual id_type id() const
133 |             {
134 |                 throw runtime_error("Internal error node::id().");
135 | #ifdef __SUNPRO_CC
136 |                 // Stop bogus Solaris compiler warning
137 |                 return id_type();
138 | #endif
139 |             }
140 | 
141 |             virtual id_type user_id() const
142 |             {
143 |                 throw runtime_error("Internal error node::user_id().");
144 | #ifdef __SUNPRO_CC
145 |                 // Stop bogus Solaris compiler warning
146 |                 return id_type();
147 | #endif
148 |             }
149 | 
150 |             virtual id_type next_dfa() const
151 |             {
152 |                 throw runtime_error("Internal error node::next_dfa().");
153 | #ifdef __SUNPRO_CC
154 |                 // Stop bogus Solaris compiler warning
155 |                 return id_type();
156 | #endif
157 |             }
158 | 
159 |             virtual id_type push_dfa() const
160 |             {
161 |                 throw runtime_error("Internal error node::push_dfa().");
162 | #ifdef __SUNPRO_CC
163 |                 // Stop bogus Solaris compiler warning
164 |                 return id_type();
165 | #endif
166 |             }
167 | 
168 |             virtual bool pop_dfa() const
169 |             {
170 |                 throw runtime_error("Internal error node::pop_dfa().");
171 | #ifdef __SUNPRO_CC
172 |                 // Stop bogus Solaris compiler warning
173 |                 return false;
174 | #endif
175 |             }
176 | 
177 |             virtual id_type token() const
178 |             {
179 |                 throw runtime_error("Internal error node::token().");
180 | #ifdef __SUNPRO_CC
181 |                 // Stop bogus Solaris compiler warning
182 |                 return id_type();
183 | #endif
184 |             }
185 | 
186 |             virtual bool set_greedy() const
187 |             {
188 |                 throw runtime_error("Internal error node::set_greedy().");
189 |             }
190 | 
191 |             virtual void greedy(const bool /*greedy_*/)
192 |             {
193 |                 throw runtime_error("Internal error node::greedy(bool).");
194 |             }
195 | 
196 |             virtual bool greedy() const
197 |             {
198 |                 throw runtime_error("Internal error node::greedy().");
199 | #ifdef __SUNPRO_CC
200 |                 // Stop bogus Solaris compiler warning
201 |                 return false;
202 | #endif
203 |             }
204 | 
205 |             virtual const node_vector& followpos() const
206 |             {
207 |                 throw runtime_error("Internal error node::followpos().");
208 | #ifdef __SUNPRO_CC
209 |                 // Stop bogus Solaris compiler warning
210 |                 return firstpos;
211 | #endif
212 |             }
213 | 
214 |             virtual node_vector& followpos()
215 |             {
216 |                 throw runtime_error("Internal error node::followpos().");
217 | #ifdef __SUNPRO_CC
218 |                 // Stop bogus Solaris compiler warning
219 |                 return firstpos;
220 | #endif
221 |             }
222 | 
223 |         protected:
224 |             virtual void copy_node(node_ptr_vector& node_ptr_vector_,
225 |                 node_stack& new_node_stack_, bool_stack& perform_op_stack_,
226 |                 bool& down_) const = 0;
227 | 
228 |         private:
229 |             const bool _nullable = false;
230 |             node_vector _firstpos;
231 |             node_vector _lastpos;
232 |         };
233 |     }
234 | }
235 | 
236 | #endif
237 | 


--------------------------------------------------------------------------------