├── .clang-format
├── .github
    └── workflows
    │   └── cmake.yml
├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── docs
    ├── build.sh
    ├── index.html
    ├── index.js
    ├── native.cpp
    ├── native.js
    ├── native.wasm
    └── style.css
├── example
    ├── CMakeLists.txt
    ├── calc.cc
    ├── calc2.cc
    ├── calc3.cc
    ├── calc4.cc
    ├── calc5.cc
    ├── choice.cc
    ├── docx.cc
    ├── enter_leave.cc
    ├── indent.cc
    └── sequence.cc
├── grammar
    ├── cpp-peglib.peg
    ├── csv.peg
    ├── json.peg
    └── pl0.peg
├── lint
    ├── CMakeLists.txt
    ├── README.md
    └── peglint.cc
├── peg.vim
├── peglib.h
├── pl0
    ├── CMakeLists.txt
    ├── Makefile
    ├── README.md
    ├── pl0.cc
    └── samples
    │   ├── fib.pas
    │   ├── gcd.pas
    │   └── square.pas
└── test
    ├── CMakeLists.txt
    ├── test1.cc
    ├── test2.cc
    └── test3.cc


/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: LLVM
2 | AllowShortBlocksOnASingleLine: true
3 | AllowShortCaseLabelsOnASingleLine: true
4 | AllowShortIfStatementsOnASingleLine: true
5 | Cpp11BracedListStyle: true
6 | 


--------------------------------------------------------------------------------
/.github/workflows/cmake.yml:
--------------------------------------------------------------------------------
 1 | name: CMake
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | env:
 6 |   BUILD_TYPE: Release
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         os: [ubuntu-latest, ubuntu-24.04, macos-latest, windows-latest, windows-2019]
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v4
17 | 
18 |     - name: Configure CMake
19 |       run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
20 | 
21 |     - name: Build
22 |       run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
23 | 
24 |     - name: Test
25 |       working-directory: ${{github.workspace}}/build
26 |       run: ctest -C ${{env.BUILD_TYPE}}
27 | 
28 |     - name: Configure CMake with C++20
29 |       run: cmake -B ${{github.workspace}}/build_20 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_STANDARD=20
30 | 
31 |     - name: Build with C++20
32 |       run: cmake --build ${{github.workspace}}/build_20 --config ${{env.BUILD_TYPE}}
33 | 
34 |     - name: Test with C++20
35 |       working-directory: ${{github.workspace}}/build_20
36 |       run: ctest -C ${{env.BUILD_TYPE}}
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | 
30 | # Others
31 | *.dSYM
32 | *.swp
33 | Debug
34 | Release
35 | *.suo
36 | *.sdf
37 | *.user
38 | xcuserdata
39 | *.xcworkspace
40 | temp*
41 | build*/
42 | Makefile
43 | CMakeFiles
44 | CMakeCache.txt
45 | *.cmake
46 | *.vcxproj.filters
47 | *.opensdf
48 | .idea/
49 | grammar/test/*
50 | .DS_Store
51 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(peglib)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 17)
 5 | set(CMAKE_CXX_EXTENSIONS OFF)
 6 | 
 7 | if(MSVC)
 8 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus /utf-8 /D_CRT_SECURE_NO_DEPRECATE")
 9 | else()
10 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
11 | endif()
12 | 
13 | set(THREADS_PREFER_PTHREAD_FLAG ON)
14 | find_package(Threads)
15 | 
16 | if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
17 |   set(add_link_deps Threads::Threads)
18 | endif()
19 | 
20 | add_library(peglib INTERFACE)
21 | target_include_directories(peglib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
22 | 
23 | option(BUILD_TESTS "Build cpp-peglib tests" ON)
24 | option(PEGLIB_BUILD_LINT "Build cpp-peglib lint utility" OFF)
25 | option(PEGLIB_BUILD_EXAMPLES "Build cpp-peglib examples" OFF)
26 | option(PEGLIB_BUILD_PL0 "Build pl0 interpreter" OFF)
27 | 
28 | if (${BUILD_TESTS})
29 |   add_subdirectory(test)
30 |   enable_testing()
31 | endif()
32 | 
33 | if (${PEGLIB_BUILD_LINT})
34 |   add_subdirectory(lint)
35 | endif()
36 | 
37 | if (${PEGLIB_BUILD_EXAMPLES})
38 |   add_subdirectory(example)
39 | endif()
40 | 
41 | if (${PEGLIB_BUILD_PL0})
42 |   add_subdirectory(pl0)
43 | endif()
44 | 
45 | install(FILES peglib.h DESTINATION include)
46 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2022 yhirose
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | cpp-peglib
  2 | ==========
  3 | 
  4 | [![](https://github.com/yhirose/cpp-peglib/workflows/CMake/badge.svg)](https://github.com/yhirose/cpp-peglib/actions)
  5 | 
  6 | C++17 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library. You can start using it right away just by including `peglib.h` in your project.
  7 | 
  8 | Since this library only supports C++17 compilers, please make sure that the compiler option `-std=c++17` is enabled.
  9 | (`/std:c++17 /Zc:__cplusplus` for MSVC)
 10 | 
 11 | You can also try the online version, PEG Playground at https://yhirose.github.io/cpp-peglib.
 12 | 
 13 | The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf) by Bryan Ford. *cpp-peglib* also supports the following additional syntax for now:
 14 | 
 15 | * `'...'i` (Case-insensitive literal operator)
 16 | * `[...]i` (Case-insensitive character class operator)
 17 | * `[^...]` (Negated character class operator)
 18 | * `[^...]i` (Case-insensitive negated character class operator)
 19 | * `{2,5}` (Regex-like repetition operator)
 20 | * `<` ... `>` (Token boundary operator)
 21 | * `~` (Ignore operator)
 22 | * `\x20` (Hex number char)
 23 | * `\u10FFFF` (Unicode char)
 24 | * `%whitespace` (Automatic whitespace skipping)
 25 | * `%word` (Word expression)
 26 | * `$name(` ... `)` (Capture scope operator)
 27 | * `$name<` ... `>` (Named capture operator)
 28 | * `$name` (Backreference operator)
 29 | * `|` (Dictionary operator)
 30 | * `↑` (Cut operator)
 31 | * `MACRO_NAME(` ... `)` (Parameterized rule or Macro)
 32 | * `{ precedence L - + L / * }` (Parsing infix expression)
 33 | * `%recovery(` ... `)` (Error recovery operator)
 34 | * `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`)
 35 | * `label { error_message "..." }` (Error message instruction)
 36 | * `{ no_ast_opt }` (No AST node optimization instruction)
 37 | 
 38 | 'End of Input' check will be done as default. To disable the check, please call `disable_eoi_check`.
 39 | 
 40 | This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
 41 | 
 42 | IMPORTANT NOTE for some Linux distributions such as Ubuntu and CentOS: Need `-pthread` option when linking. See [#23](https://github.com/yhirose/cpp-peglib/issues/23#issuecomment-261126127), [#46](https://github.com/yhirose/cpp-peglib/issues/46#issuecomment-417870473) and [#62](https://github.com/yhirose/cpp-peglib/issues/62#issuecomment-492032680).
 43 | 
 44 | I am sure that you will enjoy this excellent ["Practical parsing with PEG and cpp-peglib"](https://berthub.eu/articles/posts/practical-peg-parsing/) article by [bert hubert](https://berthub.eu/)!
 45 | 
 46 | How to use
 47 | ----------
 48 | 
 49 | This is a simple calculator sample. It shows how to define grammar, associate semantic actions to the grammar, and handle semantic values.
 50 | 
 51 | ```cpp
 52 | // (1) Include the header file
 53 | #include <peglib.h>
 54 | #include <assert.h>
 55 | #include <iostream>
 56 | 
 57 | using namespace peg;
 58 | using namespace std;
 59 | 
 60 | int main(void) {
 61 |   // (2) Make a parser
 62 |   parser parser(R"(
 63 |     # Grammar for Calculator...
 64 |     Additive    <- Multiplicative '+' Additive / Multiplicative
 65 |     Multiplicative   <- Primary '*' Multiplicative / Primary
 66 |     Primary     <- '(' Additive ')' / Number
 67 |     Number      <- < [0-9]+ >
 68 |     %whitespace <- [ \t]*
 69 |   )");
 70 | 
 71 |   assert(static_cast<bool>(parser) == true);
 72 | 
 73 |   // (3) Setup actions
 74 |   parser["Additive"] = [](const SemanticValues &vs) {
 75 |     switch (vs.choice()) {
 76 |     case 0: // "Multiplicative '+' Additive"
 77 |       return any_cast<int>(vs[0]) + any_cast<int>(vs[1]);
 78 |     default: // "Multiplicative"
 79 |       return any_cast<int>(vs[0]);
 80 |     }
 81 |   };
 82 | 
 83 |   parser["Multiplicative"] = [](const SemanticValues &vs) {
 84 |     switch (vs.choice()) {
 85 |     case 0: // "Primary '*' Multiplicative"
 86 |       return any_cast<int>(vs[0]) * any_cast<int>(vs[1]);
 87 |     default: // "Primary"
 88 |       return any_cast<int>(vs[0]);
 89 |     }
 90 |   };
 91 | 
 92 |   parser["Number"] = [](const SemanticValues &vs) {
 93 |     return vs.token_to_number<int>();
 94 |   };
 95 | 
 96 |   // (4) Parse
 97 |   parser.enable_packrat_parsing(); // Enable packrat parsing.
 98 | 
 99 |   int val;
100 |   parser.parse(" (1 + 2) * 3 ", val);
101 | 
102 |   assert(val == 9);
103 | }
104 | ```
105 | 
106 | To show syntax errors in grammar text:
107 | 
108 | ```cpp
109 | auto grammar = R"(
110 |   # Grammar for Calculator...
111 |   Additive    <- Multiplicative '+' Additive / Multiplicative
112 |   Multiplicative   <- Primary '*' Multiplicative / Primary
113 |   Primary     <- '(' Additive ')' / Number
114 |   Number      <- < [0-9]+ >
115 |   %whitespace <- [ \t]*
116 | )";
117 | 
118 | parser parser;
119 | 
120 | parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) {
121 |   cerr << line << ":" << col << ": " << msg << "\n";
122 | });
123 | 
124 | auto ok = parser.load_grammar(grammar);
125 | assert(ok);
126 | ```
127 | 
128 | There are four semantic actions available:
129 | 
130 | ```cpp
131 | [](const SemanticValues& vs, any& dt)
132 | [](const SemanticValues& vs)
133 | [](SemanticValues& vs, any& dt)
134 | [](SemanticValues& vs)
135 | ```
136 | 
137 | `SemanticValues` value contains the following information:
138 | 
139 | * Semantic values
140 | * Matched string information
141 | * Token information if the rule is literal or uses a token boundary operator
142 | * Choice number when the rule is 'prioritized choice'
143 | 
144 | `any& dt` is a 'read-write' context data which can be used for whatever purposes. The initial context data is set in `peg::parser::parse` method.
145 | 
146 | A semantic action can return a value of arbitrary data type, which will be wrapped by `peg::any`. If a user returns nothing in a semantic action, the first semantic value in the `const SemanticValues& vs` argument will be returned. (Yacc parser has the same behavior.)
147 | 
148 | Here shows the `SemanticValues` structure:
149 | 
150 | ```cpp
151 | struct SemanticValues : protected std::vector<any>
152 | {
153 |   // Input text
154 |   const char* path;
155 |   const char* ss;
156 | 
157 |   // Matched string
158 |   std::string_view sv() const { return sv_; }
159 | 
160 |   // Line number and column at which the matched string is
161 |   std::pair<size_t, size_t> line_info() const;
162 | 
163 |   // Tokens
164 |   std::vector<std::string_view> tokens;
165 |   std::string_view token(size_t id = 0) const;
166 | 
167 |   // Token conversion
168 |   std::string token_to_string(size_t id = 0) const;
169 |   template <typename T> T token_to_number() const;
170 | 
171 |   // Choice number (0 based index)
172 |   size_t choice() const;
173 | 
174 |   // Transform the semantic value vector to another vector
175 |   template <typename T> vector<T> transform(size_t beg = 0, size_t end = -1) const;
176 | }
177 | ```
178 | 
179 | The following example uses `<` ... `>` operator, which is *token boundary* operator.
180 | 
181 | ```cpp
182 | peg::parser parser(R"(
183 |   ROOT  <- _ TOKEN (',' _ TOKEN)*
184 |   TOKEN <- < [a-z0-9]+ > _
185 |   _     <- [ \t\r\n]*
186 | )");
187 | 
188 | parser["TOKEN"] = [](const SemanticValues& vs) {
189 |   // 'token' doesn't include trailing whitespaces
190 |   auto token = vs.token();
191 | };
192 | 
193 | auto ret = parser.parse(" token1, token2 ");
194 | ```
195 | 
196 | We can ignore unnecessary semantic values from the list by using `~` operator.
197 | 
198 | ```cpp
199 | peg::parser parser(R"(
200 |   ROOT  <-  _ ITEM (',' _ ITEM _)*
201 |   ITEM  <-  ([a-z0-9])+
202 |   ~_    <-  [ \t]*
203 | )");
204 | 
205 | parser["ROOT"] = [&](const SemanticValues& vs) {
206 |   assert(vs.size() == 2); // should be 2 instead of 5.
207 | };
208 | 
209 | auto ret = parser.parse(" item1, item2 ");
210 | ```
211 | 
212 | The following grammar is the same as the above.
213 | 
214 | ```cpp
215 | peg::parser parser(R"(
216 |   ROOT  <-  ~_ ITEM (',' ~_ ITEM ~_)*
217 |   ITEM  <-  ([a-z0-9])+
218 |   _     <-  [ \t]*
219 | )");
220 | ```
221 | 
222 | *Semantic predicate* support is available with a *predicate* action.
223 | 
224 | ```cpp
225 | peg::parser parser("NUMBER  <-  [0-9]+");
226 | 
227 | parser["NUMBER"] = [](const SemanticValues &vs) {
228 |   return vs.token_to_number<long>();
229 | };
230 | 
231 | parser["NUMBER"].predicate = [](const SemanticValues &vs,
232 |                                 const std::any & /*dt*/, std::string &msg) {
233 |   if (vs.token_to_number<long>() != 100) {
234 |     msg = "value error!!";
235 |     return false;
236 |   }
237 |   return true;
238 | };
239 | 
240 | long val;
241 | auto ret = parser.parse("100", val);
242 | assert(ret == true);
243 | assert(val == 100);
244 | 
245 | ret = parser.parse("200", val);
246 | assert(ret == false);
247 | ```
248 | 
249 | *enter* and *leave* actions are also available.
250 | 
251 | ```cpp
252 | parser["RULE"].enter = [](const Context &c, const char* s, size_t n, any& dt) {
253 |   std::cout << "enter" << std::endl;
254 | };
255 | 
256 | parser["RULE"] = [](const SemanticValues& vs, any& dt) {
257 |   std::cout << "action!" << std::endl;
258 | };
259 | 
260 | parser["RULE"].leave = [](const Context &c, const char* s, size_t n, size_t matchlen, any& value, any& dt) {
261 |   std::cout << "leave" << std::endl;
262 | };
263 | ```
264 | 
265 | You can receive error information via a logger:
266 | 
267 | ```cpp
268 | parser.set_logger([](size_t line, size_t col, const string& msg) {
269 |   ...
270 | });
271 | 
272 | parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) {
273 |   ...
274 | });
275 | ```
276 | 
277 | Ignoring Whitespaces
278 | --------------------
279 | 
280 | As you can see in the first example, we can ignore whitespaces between tokens automatically with `%whitespace` rule.
281 | 
282 | `%whitespace` rule can be applied to the following three conditions:
283 | 
284 | * trailing spaces on tokens
285 | * leading spaces on text
286 | * trailing spaces on literal strings in rules
287 | 
288 | These are valid tokens:
289 | 
290 | ```
291 | KEYWORD   <- 'keyword'
292 | KEYWORDI  <- 'case_insensitive_keyword'
293 | WORD      <-  < [a-zA-Z0-9] [a-zA-Z0-9-_]* >    # token boundary operator is used.
294 | IDNET     <-  < IDENT_START_CHAR IDENT_CHAR* >  # token boundary operator is used.
295 | ```
296 | 
297 | The following grammar accepts ` one, "two three", four `.
298 | 
299 | ```
300 | ROOT         <- ITEM (',' ITEM)*
301 | ITEM         <- WORD / PHRASE
302 | WORD         <- < [a-z]+ >
303 | PHRASE       <- < '"' (!'"' .)* '"' >
304 | 
305 | %whitespace  <-  [ \t\r\n]*
306 | ```
307 | 
308 | Word expression
309 | ---------------
310 | 
311 | ```cpp
312 | peg::parser parser(R"(
313 |   ROOT         <-  'hello' 'world'
314 |   %whitespace  <-  [ \t\r\n]*
315 |   %word        <-  [a-z]+
316 | )");
317 | 
318 | parser.parse("hello world"); // OK
319 | parser.parse("helloworld");  // NG
320 | ```
321 | 
322 | Capture/Backreference
323 | ---------------------
324 | 
325 | ```cpp
326 | peg::parser parser(R"(
327 |   ROOT      <- CONTENT
328 |   CONTENT   <- (ELEMENT / TEXT)*
329 |   ELEMENT   <- $(STAG CONTENT ETAG)
330 |   STAG      <- '<' $tag< TAG_NAME > '>'
331 |   ETAG      <- '</' $tag '>'
332 |   TAG_NAME  <- 'b' / 'u'
333 |   TEXT      <- TEXT_DATA
334 |   TEXT_DATA <- ![<] .
335 | )");
336 | 
337 | parser.parse("This is <b>a <u>test</u> text</b>."); // OK
338 | parser.parse("This is <b>a <u>test</b> text</u>."); // NG
339 | parser.parse("This is <b>a <u>test text</b>.");     // NG
340 | ```
341 | 
342 | Dictionary
343 | ----------
344 | 
345 | `|` operator allows us to make a word dictionary for fast lookup by using Trie structure internally. We don't have to worry about the order of words.
346 | 
347 | ```peg
348 | START <- 'This month is ' MONTH '.'
349 | MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...'
350 | ```
351 | 
352 | We are able to find which item is matched with `choice()`.
353 | 
354 | ```cpp
355 | parser["MONTH"] = [](const SemanticValues &vs) {
356 |   auto id = vs.choice();
357 | };
358 | ```
359 | 
360 | It supports the case-insensitive mode.
361 | 
362 | ```peg
363 | START <- 'This month is ' MONTH '.'
364 | MONTH <- 'Jan'i | 'January'i | 'Feb'i | 'February'i | '...'i
365 | ```
366 | 
367 | Cut operator
368 | ------------
369 | 
370 | `↑` operator could mitigate the backtrack performance problem, but has a risk to change the meaning of grammar.
371 | 
372 | ```peg
373 | S <- '(' ↑ P ')' / '"' ↑ P '"' / P
374 | P <- 'a' / 'b' / 'c'
375 | ```
376 | 
377 | When we parse `(z` with the above grammar, we don't have to backtrack in `S` after `(` is matched, because a cut operator is inserted there.
378 | 
379 | Parameterized Rule or Macro
380 | ---------------------------
381 | 
382 | ```peg
383 | # Syntax
384 | Start      ← _ Expr
385 | Expr       ← Sum
386 | Sum        ← List(Product, SumOpe)
387 | Product    ← List(Value, ProOpe)
388 | Value      ← Number / T('(') Expr T(')')
389 | 
390 | # Token
391 | SumOpe     ← T('+' / '-')
392 | ProOpe     ← T('*' / '/')
393 | Number     ← T([0-9]+)
394 | ~_         ← [ \t\r\n]*
395 | 
396 | # Macro
397 | List(I, D) ← I (D I)*
398 | T(x)       ← < x > _
399 | ```
400 | 
401 | Parsing infix expression by Precedence climbing
402 | -----------------------------------------------
403 | 
404 | Regarding the *precedence climbing algorithm*, please see [this article](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing).
405 | 
406 | ```cpp
407 | parser parser(R"(
408 |   EXPRESSION             <-  INFIX_EXPRESSION(ATOM, OPERATOR)
409 |   ATOM                   <-  NUMBER / '(' EXPRESSION ')'
410 |   OPERATOR               <-  < [-+/*] >
411 |   NUMBER                 <-  < '-'? [0-9]+ >
412 |   %whitespace            <-  [ \t]*
413 | 
414 |   # Declare order of precedence
415 |   INFIX_EXPRESSION(A, O) <-  A (O A)* {
416 |     precedence
417 |       L + -
418 |       L * /
419 |   }
420 | )");
421 | 
422 | parser["INFIX_EXPRESSION"] = [](const SemanticValues& vs) -> long {
423 |   auto result = any_cast<long>(vs[0]);
424 |   if (vs.size() > 1) {
425 |     auto ope = any_cast<char>(vs[1]);
426 |     auto num = any_cast<long>(vs[2]);
427 |     switch (ope) {
428 |       case '+': result += num; break;
429 |       case '-': result -= num; break;
430 |       case '*': result *= num; break;
431 |       case '/': result /= num; break;
432 |     }
433 |   }
434 |   return result;
435 | };
436 | parser["OPERATOR"] = [](const SemanticValues& vs) { return *vs.sv(); };
437 | parser["NUMBER"] = [](const SemanticValues& vs) { return vs.token_to_number<long>(); };
438 | 
439 | long val;
440 | parser.parse(" -1 + (1 + 2) * 3 - -1", val);
441 | assert(val == 9);
442 | ```
443 | 
444 | *precedence* instruction can be applied only to the following 'list' style rule.
445 | 
446 | ```
447 | Rule <- Atom (Operator Atom)* {
448 |   precedence
449 |     L - +
450 |     L / *
451 |     R ^
452 | }
453 | ```
454 | 
455 | *precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator *literal* tokens follow. The first entry has the highest order level.
456 | 
457 | AST generation
458 | --------------
459 | 
460 | *cpp-peglib* is able to generate an AST (Abstract Syntax Tree) when parsing. `enable_ast` method on `peg::parser` class enables the feature.
461 | 
462 | NOTE: An AST node holds a corresponding token as `std::string_vew` for performance and less memory usage. It is users' responsibility to keep the original source text along with the generated AST tree.
463 | 
464 | ```
465 | peg::parser parser(R"(
466 |   ...
467 |   definition1 <- ... { no_ast_opt }
468 |   definition2 <- ... { no_ast_opt }
469 |   ...
470 | )");
471 | 
472 | parser.enable_ast();
473 | 
474 | shared_ptr<peg::Ast> ast;
475 | if (parser.parse("...", ast)) {
476 |   cout << peg::ast_to_s(ast);
477 | 
478 |   ast = parser.optimize_ast(ast);
479 |   cout << peg::ast_to_s(ast);
480 | }
481 | ```
482 | 
483 | `optimize_ast` removes redundant nodes to make an AST simpler. If you want to disable this behavior from particular rules, `no_ast_opt` instruction can be used.
484 | 
485 | It internally calls `peg::AstOptimizer` to do the job. You can make your own AST optimizers to fit your needs.
486 | 
487 | See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc).
488 | 
489 | Make a parser with parser combinators
490 | -------------------------------------
491 | 
492 | Instead of making a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser combinators*. Here is an example:
493 | 
494 | ```cpp
495 | using namespace peg;
496 | using namespace std;
497 | 
498 | vector<string> tags;
499 | 
500 | Definition ROOT, TAG_NAME, _;
501 | ROOT     <= seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _)));
502 | TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const SemanticValues& vs) {
503 |               tags.push_back(vs.token_to_string());
504 |             };
505 | _        <= zom(cls(" \t"));
506 | 
507 | auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
508 | ```
509 | 
510 | The following are available operators:
511 | 
512 | | Operator | Description                     | Operator | Description         |
513 | |:---------|:--------------------------------|:---------|:--------------------|
514 | | seq      | Sequence                        | cho      | Prioritized Choice  |
515 | | zom      | Zero or More                    | oom      | One or More         |
516 | | opt      | Optional                        | apd      | And predicate       |
517 | | npd      | Not predicate                   | lit      | Literal string      |
518 | | liti     | Case-insensitive Literal string | cls      | Character class     |
519 | | ncls     | Negated Character class         | chr      | Character           |
520 | | dot      | Any character                   | tok      | Token boundary      |
521 | | ign      | Ignore semantic value           | csc      | Capture scope       |
522 | | cap      | Capture                         | bkr      | Back reference      |
523 | | dic      | Dictionary                      | pre      | Infix expression    |
524 | | rec      | Infix expression                | usr      | User defined parser |
525 | | rep      | Repetition                      |          |                     |
526 | 
527 | Adjust definitions
528 | ------------------
529 | 
530 | It's possible to add/override definitions.
531 | 
532 | ```cpp
533 | auto syntax = R"(
534 |   ROOT <- _ 'Hello' _ NAME '!' _
535 | )";
536 | 
537 | Rules additional_rules = {
538 |   {
539 |     "NAME", usr([](const char* s, size_t n, SemanticValues& vs, any& dt) -> size_t {
540 |       static vector<string> names = { "PEG", "BNF" };
541 |       for (const auto& name: names) {
542 |         if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) {
543 |           return name.size(); // processed length
544 |         }
545 |       }
546 |       return -1; // parse error
547 |     })
548 |   },
549 |   {
550 |     "~_", zom(cls(" \t\r\n"))
551 |   }
552 | };
553 | 
554 | auto g = parser(syntax, additional_rules);
555 | 
556 | assert(g.parse(" Hello BNF! "));
557 | ```
558 | 
559 | Unicode support
560 | ---------------
561 | 
562 | cpp-peglib accepts UTF8 text. `.` matches a Unicode codepoint. Also, it supports `\u????`.
563 | 
564 | Error report and recovery
565 | -------------------------
566 | 
567 | cpp-peglib supports the furthest failure error position report as described in the Bryan Ford original document.
568 | 
569 | For better error report and recovery, cpp-peglib supports 'recovery' operator with label which can be associated with a recovery expression and a custom error message. This idea comes from the fantastic ["Syntax Error Recovery in Parsing Expression Grammars"](https://arxiv.org/pdf/1806.11150.pdf) paper by Sergio Medeiros and Fabio Mascarenhas.
570 | 
571 | The custom message supports `%t` which is a placeholder for the unexpected token, and `%c` for the unexpected Unicode char.
572 | 
573 | Here is an example of Java-like grammar:
574 | 
575 | ```peg
576 | # java.peg
577 | Prog        ← 'public' 'class' NAME '{' 'public' 'static' 'void' 'main' '(' 'String' '[' ']' NAME ')' BlockStmt '}'
578 | BlockStmt   ← '{' (!'}' Stmt^stmtb)* '}' # Annotated with `stmtb`
579 | Stmt        ← IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt
580 | IfStmt      ← 'if' '(' Exp ')' Stmt ('else' Stmt)?
581 | WhileStmt   ← 'while' '(' Exp^condw ')' Stmt # Annotated with `condw`
582 | DecStmt     ← 'int' NAME ('=' Exp)? ';'
583 | AssignStmt  ← NAME '=' Exp ';'^semia # Annotated with `semi`
584 | PrintStmt   ← 'System.out.println' '(' Exp ')' ';'
585 | Exp         ← RelExp ('==' RelExp)*
586 | RelExp      ← AddExp ('<' AddExp)*
587 | AddExp      ← MulExp (('+' / '-') MulExp)*
588 | MulExp      ← AtomExp (('*' / '/') AtomExp)*
589 | AtomExp     ← '(' Exp ')' / NUMBER / NAME
590 | 
591 | NUMBER      ← < [0-9]+ >
592 | NAME        ← < [a-zA-Z_][a-zA-Z_0-9]* >
593 | 
594 | %whitespace ← [ \t\n]*
595 | %word       ← NAME
596 | 
597 | # Recovery operator labels
598 | semia       ← '' { error_message "missing semicolon in assignment." }
599 | stmtb       ← (!(Stmt / 'else' / '}') .)* { error_message "invalid statement" }
600 | condw       ← &'==' ('==' RelExp)* / &'<' ('<' AddExp)* / (!')' .)*
601 | ```
602 | 
603 | For instance, `';'^semi` is a syntactic sugar for `(';' / %recovery(semi))`. `%recover` operator tries to recover the error at ';' by skipping input text with the recovery expression `semi`. Also `semi` is associated with a custom message "missing semicolon in assignment."
604 | 
605 | Here is the result:
606 | 
607 | ```java
608 | > cat sample.java
609 | public class Example {
610 |   public static void main(String[] args) {
611 |     int n = 5;
612 |     int f = 1;
613 |     while( < n) {
614 |       f = f * n;
615 |       n = n - 1
616 |     };
617 |     System.out.println(f);
618 |   }
619 | }
620 | 
621 | > peglint java.peg sample.java
622 | sample.java:5:12: syntax error, unexpected '<', expecting '(', <NUMBER>, <NAME>.
623 | sample.java:8:5: missing semicolon in assignment.
624 | sample.java:8:6: invalid statement
625 | ```
626 | 
627 | As you can see, it can now show more than one error, and provide more meaningful error messages than the default messages.
628 | 
629 | ### Custom error message for definitions
630 | 
631 | We can associate custom error messages to definitions.
632 | 
633 | ```peg
634 | # custom_message.peg
635 | START       <- CODE (',' CODE)*
636 | CODE        <- < '0x' [a-fA-F0-9]+ > { error_message 'code format error...' }
637 | %whitespace <- [ \t]*
638 | ```
639 | 
640 | ```
641 | > cat custom_message.txt
642 | 0x1234,0x@@@@,0xABCD
643 | 
644 | > peglint custom_message.peg custom_message.txt
645 | custom_message.txt:1:8: code format error...
646 | ```
647 | 
648 | NOTE: If there is more than one element with an error message instruction in a prioritized choice, this feature may not work as you expect.
649 | 
650 | Change the Start Definition Rule
651 | --------------------------------
652 | 
653 | We can change the start definition rule as below.
654 | 
655 | ```cpp
656 | auto grammar = R"(
657 |   Start       <- A
658 |   A           <- B (',' B)*
659 |   B           <- '[one]' / '[two]'
660 |   %whitespace <- [ \t\n]*
661 | )";
662 | 
663 | peg::parser parser(grammar, "A"); // Start Rule is "A"
664 | 
665 |   or
666 | 
667 | peg::parser parser;
668 | parser.load_grammar(grammar, "A"); // Start Rule is "A"
669 | 
670 | parser.parse(" [one] , [two] "); // OK
671 | ```
672 | 
673 | peglint - PEG syntax lint utility
674 | ---------------------------------
675 | 
676 | ### Build peglint
677 | 
678 | ```
679 | > cd lint
680 | > mkdir build
681 | > cd build
682 | > cmake ..
683 | > make
684 | > ./peglint
685 | usage: grammar_file_path [source_file_path]
686 | 
687 |   options:
688 |     --source: source text
689 |     --packrat: enable packrat memoise
690 |     --ast: show AST tree
691 |     --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction
692 |     --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction
693 |     --trace: show concise trace messages
694 |     --profile: show profile report
695 |     --verbose: verbose output for trace and profile
696 | ```
697 | 
698 | ### Grammar check
699 | 
700 | ```
701 | > cat a.peg
702 | Additive    <- Multiplicative '+' Additive / Multiplicative
703 | Multiplicative   <- Primary '*' Multiplicative / Primary
704 | Primary     <- '(' Additive ')' / Number
705 | %whitespace <- [ \t\r\n]*
706 | 
707 | > peglint a.peg
708 | [commandline]:3:35: 'Number' is not defined.
709 | ```
710 | 
711 | ### Source check
712 | 
713 | ```
714 | > cat a.peg
715 | Additive    <- Multiplicative '+' Additive / Multiplicative
716 | Multiplicative   <- Primary '*' Multiplicative / Primary
717 | Primary     <- '(' Additive ')' / Number
718 | Number      <- < [0-9]+ >
719 | %whitespace <- [ \t\r\n]*
720 | 
721 | > peglint --source "1 + a * 3" a.peg
722 | [commandline]:1:3: syntax error
723 | ```
724 | 
725 | ### AST
726 | 
727 | ```
728 | > cat a.txt
729 | 1 + 2 * 3
730 | 
731 | > peglint --ast a.peg a.txt
732 | + Additive
733 |   + Multiplicative
734 |     + Primary
735 |       - Number (1)
736 |   + Additive
737 |     + Multiplicative
738 |       + Primary
739 |         - Number (2)
740 |       + Multiplicative
741 |         + Primary
742 |           - Number (3)
743 | ```
744 | 
745 | ### AST optimization
746 | 
747 | ```
748 | > peglint --ast --opt --source "1 + 2 * 3" a.peg
749 | + Additive
750 |   - Multiplicative[Number] (1)
751 |   + Additive[Multiplicative]
752 |     - Primary[Number] (2)
753 |     - Multiplicative[Number] (3)
754 | ```
755 | 
756 | ### Adjust AST optimization with `no_ast_opt` instruction
757 | 
758 | ```
759 | > cat a.peg
760 | Additive    <- Multiplicative '+' Additive / Multiplicative
761 | Multiplicative   <- Primary '*' Multiplicative / Primary
762 | Primary     <- '(' Additive ')' / Number          { no_ast_opt }
763 | Number      <- < [0-9]+ >
764 | %whitespace <- [ \t\r\n]*
765 | 
766 | > peglint --ast --opt --source "1 + 2 * 3" a.peg
767 | + Additive/0
768 |   + Multiplicative/1[Primary]
769 |     - Number (1)
770 |   + Additive/1[Multiplicative]
771 |     + Primary/1
772 |       - Number (2)
773 |     + Multiplicative/1[Primary]
774 |       - Number (3)
775 | 
776 | > peglint --ast --opt-only --source "1 + 2 * 3" a.peg
777 | + Additive/0
778 |   + Multiplicative/1
779 |     - Primary/1[Number] (1)
780 |   + Additive/1
781 |     + Multiplicative/0
782 |       - Primary/1[Number] (2)
783 |       + Multiplicative/1
784 |         - Primary/1[Number] (3)
785 | ```
786 | 
787 | Sample codes
788 | ------------
789 | 
790 | * [Calculator](https://github.com/yhirose/cpp-peglib/blob/master/example/calc.cc)
791 | * [Calculator (with parser operators)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc2.cc)
792 | * [Calculator (AST version)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc)
793 | * [Calculator (parsing expressions by precedence climbing)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc4.cc)
794 | * [Calculator (AST version and parsing expressions by precedence climbing)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc5.cc)
795 | * [A tiny PL/0 JIT compiler in less than 900 LOC with LLVM and PEG parser](https://github.com/yhirose/pl0-jit-compiler)
796 | * [A Programming Language just for writing Fizz Buzz program. :)](https://github.com/yhirose/fizzbuzzlang)
797 | 
798 | License
799 | -------
800 | 
801 | MIT license (© 2022 Yuji Hirose)
802 | 


--------------------------------------------------------------------------------
/docs/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | source ~/Projects/emsdk/emsdk_env.sh
3 | emcc -std=c++17 -O3 --bind -o native.js -s ALLOW_MEMORY_GROWTH native.cpp
4 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 | <title>PEG Playground</title>
 5 | <link rel="stylesheet" href="style.css" media="all">
 6 | </head>
 7 | <body>
 8 | <div id="main">
 9 |   <div class="editor-container">
10 |     <ul class="editor-header">
11 |       <li><span><span id="grammar-validation" class="validation"></span></span></li>
12 |       <li><span>Grammar</span></li>
13 |     </ul>
14 |     <pre id="grammar-editor" class="editor-area">{{syntax}}</pre>
15 |     <div id="grammar-info" class="editor-info"></div>
16 |   </div>
17 |   <div class="editor-container">
18 |     <ul class="editor-header">
19 |       <li><span><span id="code-validation" class="validation"></span></span></li>
20 |       <li><span>Source Code</span></li>
21 |       <li class="editor-options">
22 |         <ul class="editor-header-options">
23 |           <li class="option"><label>Start Rule: </label><input id="start-rule" type="text"></li>
24 |           <li class="option"><input id="packrat" type="checkbox"><label>Packrat</label></li>
25 |           <li class="option"><input id="auto-refresh" type="checkbox"><label>Auto Refresh</label></li>
26 |           <li class="option"><button id="parse" class="parse">Parse</button></li>
27 |         </ul>
28 |       </li>
29 |     </ul>
30 |     <pre id="code-editor" class="editor-area">{{source}}</pre>
31 |     <div class="editor-sub-header"><input id="show-ast" class="show-toggle" type="checkbox">AST</div>
32 |     <pre id="code-ast" class="editor-area"></pre>
33 |     <div class="editor-sub-header"><input id="show-ast-optimized" class="show-toggle" type="checkbox">Optimized AST&nbsp;&nbsp;&nbsp;&nbsp;
34 |       mode:&nbsp;<select id="opt-mode" type="checkbox"><option value="all">All</option><option value="only">Only</option></select>
35 |     </div>
36 |     <pre id="code-ast-optimized" class="editor-area"></pre>
37 |     <div class="editor-sub-header"><input id="show-profile" class="show-toggle" type="checkbox">Profile</div>
38 |     <div id="code-profile" class="editor-area"></div>
39 | 
40 |     <div id="code-info" class="editor-info"></div>
41 |   </div>
42 | </div>
43 | <div id="overlay"></div>
44 | <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.2/ace.js"></script>
45 | <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
46 | <script src="index.js"></script>
47 | <script src="native.js"></script>
48 | </body>
49 | </html>
50 | 


--------------------------------------------------------------------------------
/docs/index.js:
--------------------------------------------------------------------------------
  1 | // Setup editors
  2 | function setupInfoArea(id) {
  3 |   const e = ace.edit(id);
  4 |   e.setShowPrintMargin(false);
  5 |   e.setOptions({
  6 |     readOnly: true,
  7 |     highlightActiveLine: false,
  8 |     highlightGutterLine: false
  9 |   })
 10 |   e.renderer.$cursorLayer.element.style.opacity=0;
 11 |   return e;
 12 | }
 13 | 
 14 | function setupEditorArea(id, lsKey) {
 15 |   const e = ace.edit(id);
 16 |   e.setShowPrintMargin(false);
 17 |   e.setValue(localStorage.getItem(lsKey) || '');
 18 |   e.moveCursorTo(0, 0);
 19 |   return e;
 20 | }
 21 | 
 22 | const grammar = setupEditorArea("grammar-editor", "grammarText");
 23 | const code = setupEditorArea("code-editor", "codeText");
 24 | 
 25 | const codeAst = setupInfoArea("code-ast");
 26 | const codeAstOptimized = setupInfoArea("code-ast-optimized");
 27 | const codeProfile = setupInfoArea("code-profile");
 28 | 
 29 | $('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all');
 30 | $('#start-rule').val(localStorage.getItem('startRule') || '');
 31 | $('#packrat').prop('checked', localStorage.getItem('packrat') === 'true');
 32 | $('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true');
 33 | $('#parse').prop('disabled', $('#auto-refresh').prop('checked'));
 34 | 
 35 | // Parse
 36 | function escapeHtml(unsafe) {
 37 |   return unsafe
 38 |     .replace(/&/g, "&amp;")
 39 |     .replace(/</g, "&lt;")
 40 |     .replace(/>/g, "&gt;")
 41 |     .replace(/"/g, "&quot;")
 42 |     .replace(/'/g, "&#039;");
 43 | }
 44 | 
 45 | function generateErrorListHTML(errors) {
 46 |   let html = '<ul>';
 47 | 
 48 |   html += $.map(errors, function (x) {
 49 |     if (x.gln && x.gcol) {
 50 |       return `<li data-ln="${x.ln}" data-col="${x.col}" data-gln="${x.gln}" data-gcol="${x.gcol}"><span>${x.ln}:${x.col}</span> <span>${escapeHtml(x.msg)}</span></li>`;
 51 |     } else {
 52 |       return `<li data-ln="${x.ln}" data-col="${x.col}"><span>${x.ln}:${x.col}</span> <span>${escapeHtml(x.msg)}</span></li>`;
 53 |     }
 54 |   }).join('');
 55 | 
 56 |   html += '<ul>';
 57 | 
 58 |   return html;
 59 | }
 60 | 
 61 | function updateLocalStorage() {
 62 |   localStorage.setItem('grammarText', grammar.getValue());
 63 |   localStorage.setItem('codeText', code.getValue());
 64 |   localStorage.setItem('optimizationMode', $('#opt-mode').val());
 65 |   localStorage.setItem('startRule', $('#start-rule').val());
 66 |   localStorage.setItem('packrat', $('#packrat').prop('checked'));
 67 |   localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked'));
 68 | }
 69 | 
 70 | function parse() {
 71 |   const $grammarValidation = $('#grammar-validation');
 72 |   const $grammarInfo = $('#grammar-info');
 73 |   const grammarText = grammar.getValue();
 74 | 
 75 |   const $codeValidation = $('#code-validation');
 76 |   const $codeInfo = $('#code-info');
 77 |   const codeText = code.getValue();
 78 | 
 79 |   const optimizationMode = $('#opt-mode').val();
 80 |   const startRule = $('#start-rule').val();
 81 |   const packrat = $('#packrat').prop('checked');
 82 | 
 83 |   $grammarInfo.html('');
 84 |   $grammarValidation.hide();
 85 |   $codeInfo.html('');
 86 |   $codeValidation.hide();
 87 |   codeAst.setValue('');
 88 |   codeAstOptimized.setValue('');
 89 |   codeProfile.setValue('');
 90 | 
 91 |   if (grammarText.length === 0) {
 92 |    return;
 93 |   }
 94 | 
 95 |   const mode = optimizationMode == 'all';
 96 | 
 97 |   $('#overlay').css({
 98 |     'z-index': '1',
 99 |     'display': 'block',
100 |     'background-color': 'rgba(0, 0, 0, 0.1)'
101 |   });
102 |   window.setTimeout(() => {
103 |     const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat, startRule));
104 |       $('#overlay').css({
105 |         'z-index': '-1',
106 |         'display': 'none',
107 |         'background-color': 'rgba(1, 1, 1, 1.0)'
108 |       });
109 | 
110 |     if (data.grammar_valid) {
111 |       $grammarValidation.removeClass('validation-invalid').show();
112 | 
113 |       codeAst.insert(data.ast);
114 |       codeAstOptimized.insert(data.astOptimized);
115 |       codeProfile.insert(data.profile);
116 | 
117 |       if (data.source_valid) {
118 |         $codeValidation.removeClass('validation-invalid').show();
119 |       } else {
120 |         $codeValidation.addClass('validation-invalid').show();
121 |       }
122 | 
123 |       if (data.code.length > 0) {
124 |         const html = generateErrorListHTML(data.code);
125 |         $codeInfo.html(html);
126 |       }
127 |     } else {
128 |       $grammarValidation.addClass('validation-invalid').show();
129 |     }
130 | 
131 |     if (data.grammar.length > 0) {
132 |       const html = generateErrorListHTML(data.grammar);
133 |       $grammarInfo.html(html);
134 |     }
135 |   }, 0);
136 | }
137 | 
138 | // Event handing for text editing
139 | let timer;
140 | function setupTimer() {
141 |   clearTimeout(timer);
142 |   timer = setTimeout(() => {
143 |     updateLocalStorage();
144 |     if ($('#auto-refresh').prop('checked')) {
145 |       parse();
146 |     }
147 |   }, 750);
148 | };
149 | grammar.getSession().on('change', setupTimer);
150 | code.getSession().on('change', setupTimer);
151 | 
152 | // Event handing in the info area
153 | function makeOnClickInInfo(editor) {
154 |   return function () {
155 |     const el = $(this);
156 |     editor.navigateTo(el.data('ln') - 1, el.data('col') - 1);
157 |     editor.scrollToLine(el.data('ln') - 1, true, false, null);
158 |     editor.focus();
159 | 
160 |     if(el.data('gln') && el.data('gcol')) {
161 |       grammar.navigateTo(el.data('gln') - 1, el.data('gcol') - 1);
162 |       grammar.scrollToLine(el.data('gln') - 1, true, false, null);
163 |     }
164 |   }
165 | };
166 | $('#grammar-info').on('click', 'li', makeOnClickInInfo(grammar));
167 | $('#code-info').on('click', 'li', makeOnClickInInfo(code));
168 | 
169 | // Event handing in the AST optimization
170 | $('#opt-mode').on('change', setupTimer);
171 | $('#start-rule').on('keydown', setupTimer);
172 | $('#packrat').on('change', setupTimer);
173 | $('#auto-refresh').on('change', () => {
174 |   updateLocalStorage();
175 |   $('#parse').prop('disabled', $('#auto-refresh').prop('checked'));
176 |   setupTimer();
177 | });
178 | $('#parse').on('click', parse);
179 | 
180 | // Resize editors to fit their parents
181 | function resizeEditorsToParent() {
182 | 	code.resize();
183 |   code.renderer.updateFull();
184 | 	codeAst.resize();
185 |   codeAst.renderer.updateFull();
186 | 	codeAstOptimized.resize();
187 |   codeAstOptimized.renderer.updateFull();
188 | 	codeProfile.resize();
189 |   codeProfile.renderer.updateFull();
190 | }
191 | 
192 | // Show windows
193 | function setupToolWindow(lsKeyName, buttonSel, codeSel) {
194 |   let show = localStorage.getItem(lsKeyName) === 'true';
195 |   $(buttonSel).prop('checked', show);
196 |   $(codeSel).css({ 'display': show ? 'block' : 'none' });
197 | 
198 |   $(buttonSel).on('change', () => {
199 |     show = !show;
200 |     localStorage.setItem(lsKeyName, show);
201 |     $(codeSel).css({ 'display': show ? 'block' : 'none' });
202 |     resizeEditorsToParent();
203 |   });
204 | }
205 | setupToolWindow('show-ast', '#show-ast', '#code-ast');
206 | setupToolWindow('show-ast-optimized', '#show-ast-optimized', '#code-ast-optimized');
207 | setupToolWindow('show-profile', '#show-profile', '#code-profile');
208 | 
209 | // Show page
210 | $('#main').css({
211 |   'display': 'flex',
212 | });
213 | 
214 | // WebAssembly
215 | var Module = {
216 |   onRuntimeInitialized: function() {
217 |     // Initial parse
218 |     if ($('#auto-refresh').prop('checked')) {
219 |       parse();
220 |     }
221 |   }
222 | };
223 | 


--------------------------------------------------------------------------------
/docs/native.cpp:
--------------------------------------------------------------------------------
  1 | #include "../peglib.h"
  2 | #include <cstdio>
  3 | #include <emscripten/bind.h>
  4 | #include <functional>
  5 | #include <iomanip>
  6 | #include <sstream>
  7 | 
  8 | // https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784
  9 | std::string escape_json(const std::string &s) {
 10 |   std::ostringstream o;
 11 |   for (auto c : s) {
 12 |     if (c == '"' || c == '\\' || ('\x00' <= c && c <= '\x1f')) {
 13 |       o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << (int)c;
 14 |     } else {
 15 |       o << c;
 16 |     }
 17 |   }
 18 |   return o.str();
 19 | }
 20 | 
 21 | std::function<void(size_t, size_t, const std::string &, const std::string &)>
 22 | makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
 23 |   init = true;
 24 |   return [&](size_t ln, size_t col, const std::string &msg,
 25 |              const std::string &rule) mutable {
 26 |     if (!init) { json += ","; }
 27 |     json += "{";
 28 |     json += R"("ln":)" + std::to_string(ln) + ",";
 29 |     json += R"("col":)" + std::to_string(col) + ",";
 30 |     json += R"("msg":")" + escape_json(msg) + R"(")";
 31 |     if (!rule.empty()) {
 32 |       auto it = peg.get_grammar().find(rule);
 33 |       if (it != peg.get_grammar().end()) {
 34 |         auto [gln, gcol] = it->second.line_;
 35 |         json += ",";
 36 |         json += R"("gln":)" + std::to_string(gln) + ",";
 37 |         json += R"("gcol":)" + std::to_string(gcol);
 38 |       }
 39 |     }
 40 |     json += "}";
 41 | 
 42 |     init = false;
 43 |   };
 44 | }
 45 | 
 46 | bool parse_grammar(const std::string &text, peg::parser &peg,
 47 |                    const std::string &startRule, std::string &json) {
 48 |   bool init;
 49 |   peg.set_logger(makeJSONFormatter(peg, json, init));
 50 |   json += "[";
 51 |   auto ret = peg.load_grammar(text.data(), text.size(), startRule);
 52 |   json += "]";
 53 |   return ret;
 54 | }
 55 | 
 56 | bool parse_code(const std::string &text, peg::parser &peg, std::string &json,
 57 |                 std::shared_ptr<peg::Ast> &ast) {
 58 |   peg.enable_ast();
 59 |   bool init;
 60 |   peg.set_logger(makeJSONFormatter(peg, json, init));
 61 |   json += "[";
 62 |   auto ret = peg.parse_n(text.data(), text.size(), ast);
 63 |   json += "]";
 64 |   return ret;
 65 | }
 66 | 
 67 | std::string lint(const std::string &grammarText, const std::string &codeText,
 68 |                  bool opt_mode, bool packrat, const std::string &startRule) {
 69 |   std::string grammarResult;
 70 |   std::string codeResult;
 71 |   std::string astResult;
 72 |   std::string astResultOptimized;
 73 |   std::string profileResult;
 74 | 
 75 |   peg::parser peg;
 76 |   auto is_grammar_valid =
 77 |       parse_grammar(grammarText, peg, startRule, grammarResult);
 78 |   auto is_source_valid = false;
 79 | 
 80 |   if (is_grammar_valid && peg) {
 81 |     std::stringstream ss;
 82 |     peg::enable_profiling(peg, ss);
 83 | 
 84 |     if (packrat) { peg.enable_packrat_parsing(); }
 85 | 
 86 |     std::shared_ptr<peg::Ast> ast;
 87 |     is_source_valid = parse_code(codeText, peg, codeResult, ast);
 88 | 
 89 |     profileResult = escape_json(ss.str());
 90 | 
 91 |     if (ast) {
 92 |       astResult = escape_json(peg::ast_to_s(ast));
 93 |       astResultOptimized =
 94 |           escape_json(peg::ast_to_s(peg.optimize_ast(ast, opt_mode)));
 95 |     }
 96 |   }
 97 | 
 98 |   std::string json;
 99 |   json += "{";
100 |   json +=
101 |       std::string("\"grammar_valid\":") + (is_grammar_valid ? "true" : "false");
102 |   json += ",\"grammar\":" + grammarResult;
103 |   json +=
104 |       std::string(",\"source_valid\":") + (is_source_valid ? "true" : "false");
105 |   if (!codeResult.empty()) {
106 |     json += ",\"code\":" + codeResult;
107 |     json += ",\"ast\":\"" + astResult + "\"";
108 |     json += ",\"astOptimized\":\"" + astResultOptimized + "\"";
109 |     json += ",\"profile\":\"" + profileResult + "\"";
110 |   }
111 |   json += "}";
112 | 
113 |   return json;
114 | }
115 | 
116 | EMSCRIPTEN_BINDINGS(native) { emscripten::function("lint", &lint); }
117 | 


--------------------------------------------------------------------------------
/docs/native.js:
--------------------------------------------------------------------------------
1 | var Module=typeof Module!="undefined"?Module:{};var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var read_,readAsync,readBinary,setWindowTitle;function logExceptionOnExit(e){if(e instanceof ExitStatus)return;let toLog=e;err("exiting due to exception: "+toLog)}var fs;var nodePath;var requireNodeFS;if(ENVIRONMENT_IS_NODE){if(ENVIRONMENT_IS_WORKER){scriptDirectory=require("path").dirname(scriptDirectory)+"/"}else{scriptDirectory=__dirname+"/"}requireNodeFS=(()=>{if(!nodePath){fs=require("fs");nodePath=require("path")}});read_=function shell_read(filename,binary){requireNodeFS();filename=nodePath["normalize"](filename);return fs.readFileSync(filename,binary?undefined:"utf8")};readBinary=(filename=>{var ret=read_(filename,true);if(!ret.buffer){ret=new Uint8Array(ret)}return ret});readAsync=((filename,onload,onerror)=>{requireNodeFS();filename=nodePath["normalize"](filename);fs.readFile(filename,function(err,data){if(err)onerror(err);else onload(data.buffer)})});if(process["argv"].length>1){thisProgram=process["argv"][1].replace(/\\/g,"/")}arguments_=process["argv"].slice(2);if(typeof module!="undefined"){module["exports"]=Module}process["on"]("uncaughtException",function(ex){if(!(ex instanceof ExitStatus)){throw ex}});process["on"]("unhandledRejection",function(reason){throw reason});quit_=((status,toThrow)=>{if(keepRuntimeAlive()){process["exitCode"]=status;throw toThrow}logExceptionOnExit(toThrow);process["exit"](status)});Module["inspect"]=function(){return"[Emscripten Module object]"}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(scriptDirectory.indexOf("blob:")!==0){scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}else{scriptDirectory=""}{read_=(url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.send(null);return xhr.responseText});if(ENVIRONMENT_IS_WORKER){readBinary=(url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)})}readAsync=((url,onload,onerror)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=(()=>{if(xhr.status==200||xhr.status==0&&xhr.response){onload(xhr.response);return}onerror()});xhr.onerror=onerror;xhr.send(null)})}setWindowTitle=(title=>document.title=title)}else{}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.warn.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["quit"])quit_=Module["quit"];var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];var noExitRuntime=Module["noExitRuntime"]||true;if(typeof WebAssembly!="object"){abort("no native wasm support detected")}var wasmMemory;var ABORT=false;var EXITSTATUS;var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):undefined;function UTF8ArrayToString(heapOrArray,idx,maxBytesToRead){var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}else{var str="";while(idx<endPtr){var u0=heapOrArray[idx++];if(!(u0&128)){str+=String.fromCharCode(u0);continue}var u1=heapOrArray[idx++]&63;if((u0&224)==192){str+=String.fromCharCode((u0&31)<<6|u1);continue}var u2=heapOrArray[idx++]&63;if((u0&240)==224){u0=(u0&15)<<12|u1<<6|u2}else{u0=(u0&7)<<18|u1<<12|u2<<6|heapOrArray[idx++]&63}if(u0<65536){str+=String.fromCharCode(u0)}else{var ch=u0-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}}}return str}function UTF8ToString(ptr,maxBytesToRead){return ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead):""}function stringToUTF8Array(str,heap,outIdx,maxBytesToWrite){if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i<str.length;++i){var u=str.charCodeAt(i);if(u>=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}}heap[outIdx]=0;return outIdx-startIdx}function stringToUTF8(str,outPtr,maxBytesToWrite){return stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite)}function lengthBytesUTF8(str){var len=0;for(var i=0;i<str.length;++i){var u=str.charCodeAt(i);if(u>=55296&&u<=57343)u=65536+((u&1023)<<10)|str.charCodeAt(++i)&1023;if(u<=127)++len;else if(u<=2047)len+=2;else if(u<=65535)len+=3;else len+=4}return len}var UTF16Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf-16le"):undefined;function UTF16ToString(ptr,maxBytesToRead){var endPtr=ptr;var idx=endPtr>>1;var maxIdx=idx+maxBytesToRead/2;while(!(idx>=maxIdx)&&HEAPU16[idx])++idx;endPtr=idx<<1;if(endPtr-ptr>32&&UTF16Decoder){return UTF16Decoder.decode(HEAPU8.subarray(ptr,endPtr))}else{var str="";for(var i=0;!(i>=maxBytesToRead/2);++i){var codeUnit=HEAP16[ptr+i*2>>1];if(codeUnit==0)break;str+=String.fromCharCode(codeUnit)}return str}}function stringToUTF16(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite<str.length*2?maxBytesToWrite/2:str.length;for(var i=0;i<numCharsToWrite;++i){var codeUnit=str.charCodeAt(i);HEAP16[outPtr>>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr}function lengthBytesUTF16(str){return str.length*2}function UTF32ToString(ptr,maxBytesToRead){var i=0;var str="";while(!(i>=maxBytesToRead/4)){var utf32=HEAP32[ptr+i*4>>2];if(utf32==0)break;++i;if(utf32>=65536){var ch=utf32-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}else{str+=String.fromCharCode(utf32)}}return str}function stringToUTF32(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i<str.length;++i){var codeUnit=str.charCodeAt(i);if(codeUnit>=55296&&codeUnit<=57343){var trailSurrogate=str.charCodeAt(++i);codeUnit=65536+((codeUnit&1023)<<10)|trailSurrogate&1023}HEAP32[outPtr>>2]=codeUnit;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr}function lengthBytesUTF32(str){var len=0;for(var i=0;i<str.length;++i){var codeUnit=str.charCodeAt(i);if(codeUnit>=55296&&codeUnit<=57343)++i;len+=4}return len}function writeArrayToMemory(array,buffer){HEAP8.set(array,buffer)}function writeAsciiToMemory(str,buffer,dontAddNull){for(var i=0;i<str.length;++i){HEAP8[buffer++>>0]=str.charCodeAt(i)}if(!dontAddNull)HEAP8[buffer>>0]=0}var buffer,HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateGlobalBufferAndViews(buf){buffer=buf;Module["HEAP8"]=HEAP8=new Int8Array(buf);Module["HEAP16"]=HEAP16=new Int16Array(buf);Module["HEAP32"]=HEAP32=new Int32Array(buf);Module["HEAPU8"]=HEAPU8=new Uint8Array(buf);Module["HEAPU16"]=HEAPU16=new Uint16Array(buf);Module["HEAPU32"]=HEAPU32=new Uint32Array(buf);Module["HEAPF32"]=HEAPF32=new Float32Array(buf);Module["HEAPF64"]=HEAPF64=new Float64Array(buf)}var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||16777216;var wasmTable;var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function keepRuntimeAlive(){return noExitRuntime}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){runtimeInitialized=true;callRuntimeCallbacks(__ATINIT__)}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;function addRunDependency(id){runDependencies++;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}}function removeRunDependency(id){runDependencies--;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){{if(Module["onAbort"]){Module["onAbort"](what)}}what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);throw e}var dataURIPrefix="data:application/octet-stream;base64,";function isDataURI(filename){return filename.startsWith(dataURIPrefix)}function isFileURI(filename){return filename.startsWith("file://")}var wasmBinaryFile;wasmBinaryFile="native.wasm";if(!isDataURI(wasmBinaryFile)){wasmBinaryFile=locateFile(wasmBinaryFile)}function getBinary(file){try{if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}else{throw"both async and sync fetching of the wasm failed"}}catch(err){abort(err)}}function getBinaryPromise(){if(!wasmBinary&&(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER)){if(typeof fetch=="function"&&!isFileURI(wasmBinaryFile)){return fetch(wasmBinaryFile,{credentials:"same-origin"}).then(function(response){if(!response["ok"]){throw"failed to load wasm binary file at '"+wasmBinaryFile+"'"}return response["arrayBuffer"]()}).catch(function(){return getBinary(wasmBinaryFile)})}else{if(readAsync){return new Promise(function(resolve,reject){readAsync(wasmBinaryFile,function(response){resolve(new Uint8Array(response))},reject)})}}}return Promise.resolve().then(function(){return getBinary(wasmBinaryFile)})}function createWasm(){var info={"a":asmLibraryArg};function receiveInstance(instance,module){var exports=instance.exports;Module["asm"]=exports;wasmMemory=Module["asm"]["v"];updateGlobalBufferAndViews(wasmMemory.buffer);wasmTable=Module["asm"]["y"];addOnInit(Module["asm"]["w"]);removeRunDependency("wasm-instantiate")}addRunDependency("wasm-instantiate");function receiveInstantiationResult(result){receiveInstance(result["instance"])}function instantiateArrayBuffer(receiver){return getBinaryPromise().then(function(binary){return WebAssembly.instantiate(binary,info)}).then(function(instance){return instance}).then(receiver,function(reason){err("failed to asynchronously prepare wasm: "+reason);abort(reason)})}function instantiateAsync(){if(!wasmBinary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(wasmBinaryFile)&&!isFileURI(wasmBinaryFile)&&typeof fetch=="function"){return fetch(wasmBinaryFile,{credentials:"same-origin"}).then(function(response){var result=WebAssembly.instantiateStreaming(response,info);return result.then(receiveInstantiationResult,function(reason){err("wasm streaming compile failed: "+reason);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(receiveInstantiationResult)})})}else{return instantiateArrayBuffer(receiveInstantiationResult)}}if(Module["instantiateWasm"]){try{var exports=Module["instantiateWasm"](info,receiveInstance);return exports}catch(e){err("Module.instantiateWasm callback failed with error: "+e);return false}}instantiateAsync();return{}}function callRuntimeCallbacks(callbacks){while(callbacks.length>0){var callback=callbacks.shift();if(typeof callback=="function"){callback(Module);continue}var func=callback.func;if(typeof func=="number"){if(callback.arg===undefined){getWasmTableEntry(func)()}else{getWasmTableEntry(func)(callback.arg)}}else{func(callback.arg===undefined?null:callback.arg)}}}var wasmTableMirror=[];function getWasmTableEntry(funcPtr){var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func}function ___assert_fail(condition,filename,line,func){abort("Assertion failed: "+UTF8ToString(condition)+", at: "+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])}function ___cxa_allocate_exception(size){return _malloc(size+24)+24}function ExceptionInfo(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24;this.set_type=function(type){HEAPU32[this.ptr+4>>2]=type};this.get_type=function(){return HEAPU32[this.ptr+4>>2]};this.set_destructor=function(destructor){HEAPU32[this.ptr+8>>2]=destructor};this.get_destructor=function(){return HEAPU32[this.ptr+8>>2]};this.set_refcount=function(refcount){HEAP32[this.ptr>>2]=refcount};this.set_caught=function(caught){caught=caught?1:0;HEAP8[this.ptr+12>>0]=caught};this.get_caught=function(){return HEAP8[this.ptr+12>>0]!=0};this.set_rethrown=function(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13>>0]=rethrown};this.get_rethrown=function(){return HEAP8[this.ptr+13>>0]!=0};this.init=function(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor);this.set_refcount(0);this.set_caught(false);this.set_rethrown(false)};this.add_ref=function(){var value=HEAP32[this.ptr>>2];HEAP32[this.ptr>>2]=value+1};this.release_ref=function(){var prev=HEAP32[this.ptr>>2];HEAP32[this.ptr>>2]=prev-1;return prev===1};this.set_adjusted_ptr=function(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr};this.get_adjusted_ptr=function(){return HEAPU32[this.ptr+16>>2]};this.get_exception_ptr=function(){var isPointer=___cxa_is_pointer_type(this.get_type());if(isPointer){return HEAPU32[this.excPtr>>2]}var adjusted=this.get_adjusted_ptr();if(adjusted!==0)return adjusted;return this.excPtr}}var exceptionLast=0;var uncaughtExceptionCount=0;function ___cxa_throw(ptr,type,destructor){var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw ptr}function __embind_register_bigint(primitiveType,name,size,minRange,maxRange){}function getShiftFromSize(size){switch(size){case 1:return 0;case 2:return 1;case 4:return 2;case 8:return 3;default:throw new TypeError("Unknown type size: "+size)}}function embind_init_charCodes(){var codes=new Array(256);for(var i=0;i<256;++i){codes[i]=String.fromCharCode(i)}embind_charCodes=codes}var embind_charCodes=undefined;function readLatin1String(ptr){var ret="";var c=ptr;while(HEAPU8[c]){ret+=embind_charCodes[HEAPU8[c++]]}return ret}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var char_0=48;var char_9=57;function makeLegalFunctionName(name){if(undefined===name){return"_unknown"}name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return"_"+name}return name}function createNamedFunction(name,body){name=makeLegalFunctionName(name);return new Function("body","return function "+name+"() {\n"+'    "use strict";'+"    return body.apply(this, arguments);\n"+"};\n")(body)}function extendError(baseErrorType,errorName){var errorClass=createNamedFunction(errorName,function(message){this.name=errorName;this.message=message;var stack=new Error(message).stack;if(stack!==undefined){this.stack=this.toString()+"\n"+stack.replace(/^Error(:[^\n]*)?\n/,"")}});errorClass.prototype=Object.create(baseErrorType.prototype);errorClass.prototype.constructor=errorClass;errorClass.prototype.toString=function(){if(this.message===undefined){return this.name}else{return this.name+": "+this.message}};return errorClass}var BindingError=undefined;function throwBindingError(message){throw new BindingError(message)}var InternalError=undefined;function throwInternalError(message){throw new InternalError(message)}function whenDependentTypesAreResolved(myTypes,dependentTypes,getTypeConverters){myTypes.forEach(function(type){typeDependencies[type]=dependentTypes});function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i<myTypes.length;++i){registerType(myTypes[i],myTypeConverters[i])}}var typeConverters=new Array(dependentTypes.length);var unregisteredTypes=[];var registered=0;dependentTypes.forEach((dt,i)=>{if(registeredTypes.hasOwnProperty(dt)){typeConverters[i]=registeredTypes[dt]}else{unregisteredTypes.push(dt);if(!awaitingDependencies.hasOwnProperty(dt)){awaitingDependencies[dt]=[]}awaitingDependencies[dt].push(()=>{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}});if(0===unregisteredTypes.length){onComplete(typeConverters)}}function registerType(rawType,registeredInstance,options={}){if(!("argPackAdvance"in registeredInstance)){throw new TypeError("registerType registeredInstance requires argPackAdvance")}var name=registeredInstance.name;if(!rawType){throwBindingError('type "'+name+'" must have a positive integer typeid pointer')}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError("Cannot register type '"+name+"' twice")}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function __embind_register_bool(rawType,name,size,trueValue,falseValue){var shift=getShiftFromSize(size);name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(wt){return!!wt},"toWireType":function(destructors,o){return o?trueValue:falseValue},"argPackAdvance":8,"readValueFromPointer":function(pointer){var heap;if(size===1){heap=HEAP8}else if(size===2){heap=HEAP16}else if(size===4){heap=HEAP32}else{throw new TypeError("Unknown boolean type size: "+name)}return this["fromWireType"](heap[pointer>>shift])},destructorFunction:null})}var emval_free_list=[];var emval_handle_array=[{},{value:undefined},{value:null},{value:true},{value:false}];function __emval_decref(handle){if(handle>4&&0===--emval_handle_array[handle].refcount){emval_handle_array[handle]=undefined;emval_free_list.push(handle)}}function count_emval_handles(){var count=0;for(var i=5;i<emval_handle_array.length;++i){if(emval_handle_array[i]!==undefined){++count}}return count}function get_first_emval(){for(var i=5;i<emval_handle_array.length;++i){if(emval_handle_array[i]!==undefined){return emval_handle_array[i]}}return null}function init_emval(){Module["count_emval_handles"]=count_emval_handles;Module["get_first_emval"]=get_first_emval}var Emval={toValue:handle=>{if(!handle){throwBindingError("Cannot use deleted val. handle = "+handle)}return emval_handle_array[handle].value},toHandle:value=>{switch(value){case undefined:return 1;case null:return 2;case true:return 3;case false:return 4;default:{var handle=emval_free_list.length?emval_free_list.pop():emval_handle_array.length;emval_handle_array[handle]={refcount:1,value:value};return handle}}}};function simpleReadValueFromPointer(pointer){return this["fromWireType"](HEAPU32[pointer>>2])}function __embind_register_emval(rawType,name){name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(handle){var rv=Emval.toValue(handle);__emval_decref(handle);return rv},"toWireType":function(destructors,value){return Emval.toHandle(value)},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:null})}function floatReadValueFromPointer(name,shift){switch(shift){case 2:return function(pointer){return this["fromWireType"](HEAPF32[pointer>>2])};case 3:return function(pointer){return this["fromWireType"](HEAPF64[pointer>>3])};default:throw new TypeError("Unknown float type: "+name)}}function __embind_register_float(rawType,name,size){var shift=getShiftFromSize(size);name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(value){return value},"toWireType":function(destructors,value){return value},"argPackAdvance":8,"readValueFromPointer":floatReadValueFromPointer(name,shift),destructorFunction:null})}function new_(constructor,argumentList){if(!(constructor instanceof Function)){throw new TypeError("new_ called with constructor type "+typeof constructor+" which is not a function")}var dummy=createNamedFunction(constructor.name||"unknownFunctionName",function(){});dummy.prototype=constructor.prototype;var obj=new dummy;var r=constructor.apply(obj,argumentList);return r instanceof Object?r:obj}function runDestructors(destructors){while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}}function craftInvokerFunction(humanName,argTypes,classType,cppInvokerFunc,cppTargetFunc){var argCount=argTypes.length;if(argCount<2){throwBindingError("argTypes array size mismatch! Must at least get return value and 'this' types!")}var isClassMethodFunc=argTypes[1]!==null&&classType!==null;var needsDestructorStack=false;for(var i=1;i<argTypes.length;++i){if(argTypes[i]!==null&&argTypes[i].destructorFunction===undefined){needsDestructorStack=true;break}}var returns=argTypes[0].name!=="void";var argsList="";var argsListWired="";for(var i=0;i<argCount-2;++i){argsList+=(i!==0?", ":"")+"arg"+i;argsListWired+=(i!==0?", ":"")+"arg"+i+"Wired"}var invokerFnBody="return function "+makeLegalFunctionName(humanName)+"("+argsList+") {\n"+"if (arguments.length !== "+(argCount-2)+") {\n"+"throwBindingError('function "+humanName+" called with ' + arguments.length + ' arguments, expected "+(argCount-2)+" args!');\n"+"}\n";if(needsDestructorStack){invokerFnBody+="var destructors = [];\n"}var dtorStack=needsDestructorStack?"destructors":"null";var args1=["throwBindingError","invoker","fn","runDestructors","retType","classParam"];var args2=[throwBindingError,cppInvokerFunc,cppTargetFunc,runDestructors,argTypes[0],argTypes[1]];if(isClassMethodFunc){invokerFnBody+="var thisWired = classParam.toWireType("+dtorStack+", this);\n"}for(var i=0;i<argCount-2;++i){invokerFnBody+="var arg"+i+"Wired = argType"+i+".toWireType("+dtorStack+", arg"+i+"); // "+argTypes[i+2].name+"\n";args1.push("argType"+i);args2.push(argTypes[i+2])}if(isClassMethodFunc){argsListWired="thisWired"+(argsListWired.length>0?", ":"")+argsListWired}invokerFnBody+=(returns?"var rv = ":"")+"invoker(fn"+(argsListWired.length>0?", ":"")+argsListWired+");\n";if(needsDestructorStack){invokerFnBody+="runDestructors(destructors);\n"}else{for(var i=isClassMethodFunc?1:2;i<argTypes.length;++i){var paramName=i===1?"thisWired":"arg"+(i-2)+"Wired";if(argTypes[i].destructorFunction!==null){invokerFnBody+=paramName+"_dtor("+paramName+"); // "+argTypes[i].name+"\n";args1.push(paramName+"_dtor");args2.push(argTypes[i].destructorFunction)}}}if(returns){invokerFnBody+="var ret = retType.fromWireType(rv);\n"+"return ret;\n"}else{}invokerFnBody+="}\n";args1.push(invokerFnBody);var invokerFunction=new_(Function,args1).apply(null,args2);return invokerFunction}function ensureOverloadTable(proto,methodName,humanName){if(undefined===proto[methodName].overloadTable){var prevFunc=proto[methodName];proto[methodName]=function(){if(!proto[methodName].overloadTable.hasOwnProperty(arguments.length)){throwBindingError("Function '"+humanName+"' called with an invalid number of arguments ("+arguments.length+") - expects one of ("+proto[methodName].overloadTable+")!")}return proto[methodName].overloadTable[arguments.length].apply(this,arguments)};proto[methodName].overloadTable=[];proto[methodName].overloadTable[prevFunc.argCount]=prevFunc}}function exposePublicSymbol(name,value,numArguments){if(Module.hasOwnProperty(name)){if(undefined===numArguments||undefined!==Module[name].overloadTable&&undefined!==Module[name].overloadTable[numArguments]){throwBindingError("Cannot register public name '"+name+"' twice")}ensureOverloadTable(Module,name,name);if(Module.hasOwnProperty(numArguments)){throwBindingError("Cannot register multiple overloads of a function with the same number of arguments ("+numArguments+")!")}Module[name].overloadTable[numArguments]=value}else{Module[name]=value;if(undefined!==numArguments){Module[name].numArguments=numArguments}}}function heap32VectorToArray(count,firstElement){var array=[];for(var i=0;i<count;i++){array.push(HEAP32[(firstElement>>2)+i])}return array}function replacePublicSymbol(name,value,numArguments){if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistant public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}}function dynCallLegacy(sig,ptr,args){var f=Module["dynCall_"+sig];return args&&args.length?f.apply(null,[ptr].concat(args)):f.call(null,ptr)}function dynCall(sig,ptr,args){if(sig.includes("j")){return dynCallLegacy(sig,ptr,args)}return getWasmTableEntry(ptr).apply(null,args)}function getDynCaller(sig,ptr){var argCache=[];return function(){argCache.length=0;Object.assign(argCache,arguments);return dynCall(sig,ptr,argCache)}}function embind__requireFunction(signature,rawFunction){signature=readLatin1String(signature);function makeDynCaller(){if(signature.includes("j")){return getDynCaller(signature,rawFunction)}return getWasmTableEntry(rawFunction)}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError("unknown function pointer with signature "+signature+": "+rawFunction)}return fp}var UnboundTypeError=undefined;function getTypeName(type){var ptr=___getTypeName(type);var rv=readLatin1String(ptr);_free(ptr);return rv}function throwUnboundTypeError(message,types){var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(message+": "+unboundTypes.map(getTypeName).join([", "]))}function __embind_register_function(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn){var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=readLatin1String(name);rawInvoker=embind__requireFunction(signature,rawInvoker);exposePublicSymbol(name,function(){throwUnboundTypeError("Cannot call "+name+" due to unbound types",argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,function(argTypes){var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn),argCount-1);return[]})}function integerReadValueFromPointer(name,shift,signed){switch(shift){case 0:return signed?function readS8FromPointer(pointer){return HEAP8[pointer]}:function readU8FromPointer(pointer){return HEAPU8[pointer]};case 1:return signed?function readS16FromPointer(pointer){return HEAP16[pointer>>1]}:function readU16FromPointer(pointer){return HEAPU16[pointer>>1]};case 2:return signed?function readS32FromPointer(pointer){return HEAP32[pointer>>2]}:function readU32FromPointer(pointer){return HEAPU32[pointer>>2]};default:throw new TypeError("Unknown integer type: "+name)}}function __embind_register_integer(primitiveType,name,size,minRange,maxRange){name=readLatin1String(name);if(maxRange===-1){maxRange=4294967295}var shift=getShiftFromSize(size);var fromWireType=value=>value;if(minRange===0){var bitshift=32-8*size;fromWireType=(value=>value<<bitshift>>>bitshift)}var isUnsignedType=name.includes("unsigned");var checkAssertions=(value,toTypeName)=>{};var toWireType;if(isUnsignedType){toWireType=function(destructors,value){checkAssertions(value,this.name);return value>>>0}}else{toWireType=function(destructors,value){checkAssertions(value,this.name);return value}}registerType(primitiveType,{name:name,"fromWireType":fromWireType,"toWireType":toWireType,"argPackAdvance":8,"readValueFromPointer":integerReadValueFromPointer(name,shift,minRange!==0),destructorFunction:null})}function __embind_register_memory_view(rawType,dataTypeIndex,name){var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){handle=handle>>2;var heap=HEAPU32;var size=heap[handle];var data=heap[handle+1];return new TA(buffer,data,size)}name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":decodeMemoryView,"argPackAdvance":8,"readValueFromPointer":decodeMemoryView},{ignoreDuplicateRegistrations:true})}function __embind_register_std_string(rawType,name){name=readLatin1String(name);var stdStringIsUTF8=name==="std::string";registerType(rawType,{name:name,"fromWireType":function(value){var length=HEAPU32[value>>2];var str;if(stdStringIsUTF8){var decodeStartPtr=value+4;for(var i=0;i<=length;++i){var currentBytePtr=value+4+i;if(i==length||HEAPU8[currentBytePtr]==0){var maxRead=currentBytePtr-decodeStartPtr;var stringSegment=UTF8ToString(decodeStartPtr,maxRead);if(str===undefined){str=stringSegment}else{str+=String.fromCharCode(0);str+=stringSegment}decodeStartPtr=currentBytePtr+1}}}else{var a=new Array(length);for(var i=0;i<length;++i){a[i]=String.fromCharCode(HEAPU8[value+4+i])}str=a.join("")}_free(value);return str},"toWireType":function(destructors,value){if(value instanceof ArrayBuffer){value=new Uint8Array(value)}var getLength;var valueIsOfTypeString=typeof value=="string";if(!(valueIsOfTypeString||value instanceof Uint8Array||value instanceof Uint8ClampedArray||value instanceof Int8Array)){throwBindingError("Cannot pass non-string to std::string")}if(stdStringIsUTF8&&valueIsOfTypeString){getLength=(()=>lengthBytesUTF8(value))}else{getLength=(()=>value.length)}var length=getLength();var ptr=_malloc(4+length+1);HEAPU32[ptr>>2]=length;if(stdStringIsUTF8&&valueIsOfTypeString){stringToUTF8(value,ptr+4,length+1)}else{if(valueIsOfTypeString){for(var i=0;i<length;++i){var charCode=value.charCodeAt(i);if(charCode>255){_free(ptr);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}HEAPU8[ptr+4+i]=charCode}}else{for(var i=0;i<length;++i){HEAPU8[ptr+4+i]=value[i]}}}if(destructors!==null){destructors.push(_free,ptr)}return ptr},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:function(ptr){_free(ptr)}})}function __embind_register_std_wstring(rawType,charSize,name){name=readLatin1String(name);var decodeString,encodeString,getHeap,lengthBytesUTF,shift;if(charSize===2){decodeString=UTF16ToString;encodeString=stringToUTF16;lengthBytesUTF=lengthBytesUTF16;getHeap=(()=>HEAPU16);shift=1}else if(charSize===4){decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32;getHeap=(()=>HEAPU32);shift=2}registerType(rawType,{name:name,"fromWireType":function(value){var length=HEAPU32[value>>2];var HEAP=getHeap();var str;var decodeStartPtr=value+4;for(var i=0;i<=length;++i){var currentBytePtr=value+4+i*charSize;if(i==length||HEAP[currentBytePtr>>shift]==0){var maxReadBytes=currentBytePtr-decodeStartPtr;var stringSegment=decodeString(decodeStartPtr,maxReadBytes);if(str===undefined){str=stringSegment}else{str+=String.fromCharCode(0);str+=stringSegment}decodeStartPtr=currentBytePtr+charSize}}_free(value);return str},"toWireType":function(destructors,value){if(!(typeof value=="string")){throwBindingError("Cannot pass non-string to C++ string type "+name)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);HEAPU32[ptr>>2]=length>>shift;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:function(ptr){_free(ptr)}})}function __embind_register_void(rawType,name){name=readLatin1String(name);registerType(rawType,{isVoid:true,name:name,"argPackAdvance":0,"fromWireType":function(){return undefined},"toWireType":function(destructors,o){return undefined}})}var nowIsMonotonic=true;function __emscripten_get_now_is_monotonic(){return nowIsMonotonic}function _abort(){abort("")}var _emscripten_get_now;if(ENVIRONMENT_IS_NODE){_emscripten_get_now=(()=>{var t=process["hrtime"]();return t[0]*1e3+t[1]/1e6})}else _emscripten_get_now=(()=>performance.now());function _emscripten_memcpy_big(dest,src,num){HEAPU8.copyWithin(dest,src,src+num)}function _emscripten_get_heap_max(){return 2147483648}function emscripten_realloc_buffer(size){try{wasmMemory.grow(size-buffer.byteLength+65535>>>16);updateGlobalBufferAndViews(wasmMemory.buffer);return 1}catch(e){}}function _emscripten_resize_heap(requestedSize){var oldSize=HEAPU8.length;requestedSize=requestedSize>>>0;var maxHeapSize=_emscripten_get_heap_max();if(requestedSize>maxHeapSize){return false}let alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=emscripten_realloc_buffer(newSize);if(replacement){return true}}return false}var ENV={};function getExecutableName(){return thisProgram||"./this.program"}function getEnvStrings(){if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8";var env={"USER":"web_user","LOGNAME":"web_user","PATH":"/","PWD":"/","HOME":"/home/web_user","LANG":lang,"_":getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(x+"="+env[x])}getEnvStrings.strings=strings}return getEnvStrings.strings}var SYSCALLS={varargs:undefined,get:function(){SYSCALLS.varargs+=4;var ret=HEAP32[SYSCALLS.varargs-4>>2];return ret},getStr:function(ptr){var ret=UTF8ToString(ptr);return ret}};function _environ_get(__environ,environ_buf){var bufSize=0;getEnvStrings().forEach(function(string,i){var ptr=environ_buf+bufSize;HEAP32[__environ+i*4>>2]=ptr;writeAsciiToMemory(string,ptr);bufSize+=string.length+1});return 0}function _environ_sizes_get(penviron_count,penviron_buf_size){var strings=getEnvStrings();HEAP32[penviron_count>>2]=strings.length;var bufSize=0;strings.forEach(function(string){bufSize+=string.length+1});HEAP32[penviron_buf_size>>2]=bufSize;return 0}function __isLeapYear(year){return year%4===0&&(year%100!==0||year%400===0)}function __arraySum(array,index){var sum=0;for(var i=0;i<=index;sum+=array[i++]){}return sum}var __MONTH_DAYS_LEAP=[31,29,31,30,31,30,31,31,30,31,30,31];var __MONTH_DAYS_REGULAR=[31,28,31,30,31,30,31,31,30,31,30,31];function __addDays(date,days){var newDate=new Date(date.getTime());while(days>0){var leap=__isLeapYear(newDate.getFullYear());var currentMonth=newDate.getMonth();var daysInCurrentMonth=(leap?__MONTH_DAYS_LEAP:__MONTH_DAYS_REGULAR)[currentMonth];if(days>daysInCurrentMonth-newDate.getDate()){days-=daysInCurrentMonth-newDate.getDate()+1;newDate.setDate(1);if(currentMonth<11){newDate.setMonth(currentMonth+1)}else{newDate.setMonth(0);newDate.setFullYear(newDate.getFullYear()+1)}}else{newDate.setDate(newDate.getDate()+days);return newDate}}return newDate}function _strftime(s,maxsize,format,tm){var tm_zone=HEAP32[tm+40>>2];var date={tm_sec:HEAP32[tm>>2],tm_min:HEAP32[tm+4>>2],tm_hour:HEAP32[tm+8>>2],tm_mday:HEAP32[tm+12>>2],tm_mon:HEAP32[tm+16>>2],tm_year:HEAP32[tm+20>>2],tm_wday:HEAP32[tm+24>>2],tm_yday:HEAP32[tm+28>>2],tm_isdst:HEAP32[tm+32>>2],tm_gmtoff:HEAP32[tm+36>>2],tm_zone:tm_zone?UTF8ToString(tm_zone):""};var pattern=UTF8ToString(format);var EXPANSION_RULES_1={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"};for(var rule in EXPANSION_RULES_1){pattern=pattern.replace(new RegExp(rule,"g"),EXPANSION_RULES_1[rule])}var WEEKDAYS=["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"];var MONTHS=["January","February","March","April","May","June","July","August","September","October","November","December"];function leadingSomething(value,digits,character){var str=typeof value=="number"?value.toString():value||"";while(str.length<digits){str=character[0]+str}return str}function leadingNulls(value,digits){return leadingSomething(value,digits,"0")}function compareByDay(date1,date2){function sgn(value){return value<0?-1:value>0?1:0}var compare;if((compare=sgn(date1.getFullYear()-date2.getFullYear()))===0){if((compare=sgn(date1.getMonth()-date2.getMonth()))===0){compare=sgn(date1.getDate()-date2.getDate())}}return compare}function getFirstWeekStartDate(janFourth){switch(janFourth.getDay()){case 0:return new Date(janFourth.getFullYear()-1,11,29);case 1:return janFourth;case 2:return new Date(janFourth.getFullYear(),0,3);case 3:return new Date(janFourth.getFullYear(),0,2);case 4:return new Date(janFourth.getFullYear(),0,1);case 5:return new Date(janFourth.getFullYear()-1,11,31);case 6:return new Date(janFourth.getFullYear()-1,11,30)}}function getWeekBasedYear(date){var thisDate=__addDays(new Date(date.tm_year+1900,0,1),date.tm_yday);var janFourthThisYear=new Date(thisDate.getFullYear(),0,4);var janFourthNextYear=new Date(thisDate.getFullYear()+1,0,4);var firstWeekStartThisYear=getFirstWeekStartDate(janFourthThisYear);var firstWeekStartNextYear=getFirstWeekStartDate(janFourthNextYear);if(compareByDay(firstWeekStartThisYear,thisDate)<=0){if(compareByDay(firstWeekStartNextYear,thisDate)<=0){return thisDate.getFullYear()+1}else{return thisDate.getFullYear()}}else{return thisDate.getFullYear()-1}}var EXPANSION_RULES_2={"%a":function(date){return WEEKDAYS[date.tm_wday].substring(0,3)},"%A":function(date){return WEEKDAYS[date.tm_wday]},"%b":function(date){return MONTHS[date.tm_mon].substring(0,3)},"%B":function(date){return MONTHS[date.tm_mon]},"%C":function(date){var year=date.tm_year+1900;return leadingNulls(year/100|0,2)},"%d":function(date){return leadingNulls(date.tm_mday,2)},"%e":function(date){return leadingSomething(date.tm_mday,2," ")},"%g":function(date){return getWeekBasedYear(date).toString().substring(2)},"%G":function(date){return getWeekBasedYear(date)},"%H":function(date){return leadingNulls(date.tm_hour,2)},"%I":function(date){var twelveHour=date.tm_hour;if(twelveHour==0)twelveHour=12;else if(twelveHour>12)twelveHour-=12;return leadingNulls(twelveHour,2)},"%j":function(date){return leadingNulls(date.tm_mday+__arraySum(__isLeapYear(date.tm_year+1900)?__MONTH_DAYS_LEAP:__MONTH_DAYS_REGULAR,date.tm_mon-1),3)},"%m":function(date){return leadingNulls(date.tm_mon+1,2)},"%M":function(date){return leadingNulls(date.tm_min,2)},"%n":function(){return"\n"},"%p":function(date){if(date.tm_hour>=0&&date.tm_hour<12){return"AM"}else{return"PM"}},"%S":function(date){return leadingNulls(date.tm_sec,2)},"%t":function(){return"\t"},"%u":function(date){return date.tm_wday||7},"%U":function(date){var days=date.tm_yday+7-date.tm_wday;return leadingNulls(Math.floor(days/7),2)},"%V":function(date){var val=Math.floor((date.tm_yday+7-(date.tm_wday+6)%7)/7);if((date.tm_wday+371-date.tm_yday-2)%7<=2){val++}if(!val){val=52;var dec31=(date.tm_wday+7-date.tm_yday-1)%7;if(dec31==4||dec31==5&&__isLeapYear(date.tm_year%400-1)){val++}}else if(val==53){var jan1=(date.tm_wday+371-date.tm_yday)%7;if(jan1!=4&&(jan1!=3||!__isLeapYear(date.tm_year)))val=1}return leadingNulls(val,2)},"%w":function(date){return date.tm_wday},"%W":function(date){var days=date.tm_yday+7-(date.tm_wday+6)%7;return leadingNulls(Math.floor(days/7),2)},"%y":function(date){return(date.tm_year+1900).toString().substring(2)},"%Y":function(date){return date.tm_year+1900},"%z":function(date){var off=date.tm_gmtoff;var ahead=off>=0;off=Math.abs(off)/60;off=off/60*100+off%60;return(ahead?"+":"-")+String("0000"+off).slice(-4)},"%Z":function(date){return date.tm_zone},"%%":function(){return"%"}};pattern=pattern.replace(/%%/g,"\0\0");for(var rule in EXPANSION_RULES_2){if(pattern.includes(rule)){pattern=pattern.replace(new RegExp(rule,"g"),EXPANSION_RULES_2[rule](date))}}pattern=pattern.replace(/\0\0/g,"%");var bytes=intArrayFromString(pattern,false);if(bytes.length>maxsize){return 0}writeArrayToMemory(bytes,s);return bytes.length-1}function _strftime_l(s,maxsize,format,tm){return _strftime(s,maxsize,format,tm)}embind_init_charCodes();BindingError=Module["BindingError"]=extendError(Error,"BindingError");InternalError=Module["InternalError"]=extendError(Error,"InternalError");init_emval();UnboundTypeError=Module["UnboundTypeError"]=extendError(Error,"UnboundTypeError");function intArrayFromString(stringy,dontAddNull,length){var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array}var asmLibraryArg={"b":___assert_fail,"e":___cxa_allocate_exception,"d":___cxa_throw,"l":__embind_register_bigint,"j":__embind_register_bool,"t":__embind_register_emval,"i":__embind_register_float,"u":__embind_register_function,"c":__embind_register_integer,"a":__embind_register_memory_view,"h":__embind_register_std_string,"f":__embind_register_std_wstring,"k":__embind_register_void,"p":__emscripten_get_now_is_monotonic,"g":_abort,"r":_emscripten_get_now,"s":_emscripten_memcpy_big,"q":_emscripten_resize_heap,"n":_environ_get,"o":_environ_sizes_get,"m":_strftime_l};var asm=createWasm();var ___wasm_call_ctors=Module["___wasm_call_ctors"]=function(){return(___wasm_call_ctors=Module["___wasm_call_ctors"]=Module["asm"]["w"]).apply(null,arguments)};var _malloc=Module["_malloc"]=function(){return(_malloc=Module["_malloc"]=Module["asm"]["x"]).apply(null,arguments)};var ___getTypeName=Module["___getTypeName"]=function(){return(___getTypeName=Module["___getTypeName"]=Module["asm"]["z"]).apply(null,arguments)};var ___embind_register_native_and_builtin_types=Module["___embind_register_native_and_builtin_types"]=function(){return(___embind_register_native_and_builtin_types=Module["___embind_register_native_and_builtin_types"]=Module["asm"]["A"]).apply(null,arguments)};var _free=Module["_free"]=function(){return(_free=Module["_free"]=Module["asm"]["B"]).apply(null,arguments)};var ___cxa_is_pointer_type=Module["___cxa_is_pointer_type"]=function(){return(___cxa_is_pointer_type=Module["___cxa_is_pointer_type"]=Module["asm"]["C"]).apply(null,arguments)};var dynCall_viijii=Module["dynCall_viijii"]=function(){return(dynCall_viijii=Module["dynCall_viijii"]=Module["asm"]["D"]).apply(null,arguments)};var dynCall_iiiiij=Module["dynCall_iiiiij"]=function(){return(dynCall_iiiiij=Module["dynCall_iiiiij"]=Module["asm"]["E"]).apply(null,arguments)};var dynCall_iiiiijj=Module["dynCall_iiiiijj"]=function(){return(dynCall_iiiiijj=Module["dynCall_iiiiijj"]=Module["asm"]["F"]).apply(null,arguments)};var dynCall_iiiiiijj=Module["dynCall_iiiiiijj"]=function(){return(dynCall_iiiiiijj=Module["dynCall_iiiiiijj"]=Module["asm"]["G"]).apply(null,arguments)};var calledRun;function ExitStatus(status){this.name="ExitStatus";this.message="Program terminated with exit("+status+")";this.status=status}dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function run(args){args=args||arguments_;if(runDependencies>0){return}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();if(Module["onRuntimeInitialized"])Module["onRuntimeInitialized"]();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}}Module["run"]=run;if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run();
2 | 


--------------------------------------------------------------------------------
/docs/native.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhirose/cpp-peglib/16685ba0fe2574a4f4786dec93ffa21158e728e6/docs/native.wasm


--------------------------------------------------------------------------------
/docs/style.css:
--------------------------------------------------------------------------------
  1 | * {
  2 |   box-sizing: border-box;
  3 |   margin: 0;
  4 |   padding: 0;
  5 |   text-decoration: none;
  6 |   list-style: none;
  7 | }
  8 | body {
  9 |   display: flex;
 10 |   flex-direction: column;
 11 |   height: 100vh;
 12 | }
 13 | #main {
 14 |   flex: 1;
 15 |   display: none;
 16 |   z-index: 0;
 17 | }
 18 | .editor-container {
 19 |   flex: 1;
 20 |   width: 100%;
 21 |   display: flex;
 22 |   flex-direction: column;
 23 |   margin: 6px;
 24 | }
 25 | .editor-container:first-child {
 26 |   margin-right: 0;
 27 | }
 28 | .editor-header {
 29 |   display: flex;
 30 |   margin: 0 2px;
 31 | }
 32 | .editor-header > li {
 33 |   height: 32px;
 34 |   line-height: 24px;
 35 | }
 36 | .editor-header > li > span {
 37 |   margin-right: 6px;
 38 | }
 39 | .editor-options {
 40 |   margin-left: auto;
 41 | }
 42 | .editor-header-options {
 43 |   display: flex;
 44 | }
 45 | .validation {
 46 |   display: inline-block;
 47 |   height: 20px;
 48 |   width: 20px;
 49 |   margin: 2px 0;
 50 |   border-radius: 50%;
 51 |   background-color: lightgreen;
 52 | }
 53 | .validation-invalid {
 54 |   background-color: pink;
 55 | }
 56 | .option {
 57 |   margin-right: 8px;
 58 | }
 59 | .option:last-child {
 60 |   margin-right: 0;
 61 | }
 62 | .option input[type=checkbox] {
 63 |   margin-right: 4px;
 64 | }
 65 | .option .parse {
 66 |   padding-left: 8px;
 67 |   padding-right: 8px;
 68 |   height: 24px;
 69 |   cursor: pointer;
 70 | }
 71 | .editor-area {
 72 |   flex: 1;
 73 |   border: 1px solid lightgray;
 74 | }
 75 | .editor-info {
 76 |   margin-top: 6px;
 77 |   height: 160px;
 78 |   border: 1px solid lightgray;
 79 |   padding: 8px;
 80 |   overflow-y: auto;
 81 | }
 82 | .editor-info li {
 83 |   cursor: pointer;
 84 | }
 85 | .editor-info li:hover{
 86 |   background-color: lightyellow;
 87 | }
 88 | .editor-sub-header {
 89 |   padding: 4px;
 90 | }
 91 | .show-toggle {
 92 |   margin-right: 6px;
 93 | }
 94 | #overlay {
 95 |   position: absolute;
 96 |   width: 100vw;
 97 |   height: 100vh;
 98 |   cursor: wait;
 99 |   display: none;
100 |   z-index: -1;
101 | }
102 | 


--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(example)
 3 | 
 4 | add_executable(calc calc.cc)
 5 | target_include_directories(calc PRIVATE ..)
 6 | target_link_libraries(calc ${add_link_deps})
 7 | 
 8 | add_executable(calc2 calc2.cc)
 9 | target_include_directories(calc2 PRIVATE ..)
10 | target_link_libraries(calc2 ${add_link_deps})
11 | 
12 | add_executable(calc3 calc3.cc)
13 | target_include_directories(calc3 PRIVATE ..)
14 | target_link_libraries(calc3 ${add_link_deps})
15 | 
16 | add_executable(calc4 calc4.cc)
17 | target_include_directories(calc4 PRIVATE ..)
18 | target_link_libraries(calc4 ${add_link_deps})
19 | 
20 | add_executable(calc5 calc5.cc)
21 | target_include_directories(calc5 PRIVATE ..)
22 | target_link_libraries(calc5 ${add_link_deps})
23 | 
24 | add_executable(indent indent.cc)
25 | target_include_directories(indent PRIVATE ..)
26 | target_link_libraries(indent ${add_link_deps})
27 | 
28 | add_executable(docx docx.cc)
29 | target_include_directories(docx PRIVATE ..)
30 | target_link_libraries(docx ${add_link_deps})
31 | 
32 | add_executable(sequence sequence.cc)
33 | target_include_directories(sequence PRIVATE ..)
34 | target_link_libraries(sequence ${add_link_deps})
35 | 
36 | add_executable(enter_leave enter_leave.cc)
37 | target_include_directories(enter_leave PRIVATE ..)
38 | target_link_libraries(enter_leave ${add_link_deps})
39 | 
40 | add_executable(choice choice.cc)
41 | target_include_directories(choice PRIVATE ..)
42 | target_link_libraries(choice ${add_link_deps})
43 | 


--------------------------------------------------------------------------------
/example/calc.cc:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <iostream>
 3 | #include <peglib.h>
 4 | 
 5 | using namespace peg;
 6 | using namespace std;
 7 | 
 8 | int main(void) {
 9 |   // (2) Make a parser
10 |   parser parser(R"(
11 |         # Grammar for Calculator...
12 |         Additive    <- Multiplicative '+' Additive / Multiplicative
13 |         Multiplicative   <- Primary '*' Multiplicative^cond / Primary
14 |         Primary     <- '(' Additive ')' / Number
15 |         Number      <- < [0-9]+ >
16 |         %whitespace <- [ \t]*
17 |         cond <- '' { error_message "missing multiplicative" }
18 |     )");
19 | 
20 |   assert(static_cast<bool>(parser) == true);
21 | 
22 |   // (3) Setup actions
23 |   parser["Additive"] = [](const SemanticValues &vs) {
24 |     switch (vs.choice()) {
25 |     case 0: // "Multiplicative '+' Additive"
26 |       return any_cast<int>(vs[0]) + any_cast<int>(vs[1]);
27 |     default: // "Multiplicative"
28 |       return any_cast<int>(vs[0]);
29 |     }
30 |   };
31 | 
32 |   parser["Multiplicative"] = [](const SemanticValues &vs) {
33 |     switch (vs.choice()) {
34 |     case 0: // "Primary '*' Multiplicative"
35 |       return any_cast<int>(vs[0]) * any_cast<int>(vs[1]);
36 |     default: // "Primary"
37 |       return any_cast<int>(vs[0]);
38 |     }
39 |   };
40 | 
41 |   parser["Number"] = [](const SemanticValues &vs) {
42 |     return vs.token_to_number<int>();
43 |   };
44 | 
45 |   // (4) Parse
46 |   parser.enable_packrat_parsing(); // Enable packrat parsing.
47 | 
48 |   int val = 0;
49 |   parser.parse(" (1 + 2) * ", val);
50 | 
51 |   // assert(val == 9);
52 |   assert(val == 0);
53 | }
54 | 


--------------------------------------------------------------------------------
/example/calc2.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  calc2.cc
 3 | //
 4 | //  Copyright (c) 2015 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | #include <cstdlib>
 9 | #include <iostream>
10 | #include <peglib.h>
11 | 
12 | using namespace peg;
13 | 
14 | //
15 | //  PEG syntax:
16 | //
17 | //      EXPRESSION       <-  TERM (TERM_OPERATOR TERM)*
18 | //      TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
19 | //      FACTOR           <-  NUMBER / '(' EXPRESSION ')'
20 | //      TERM_OPERATOR    <-  [-+]
21 | //      FACTOR_OPERATOR  <-  [/*]
22 | //      NUMBER           <-  [0-9]+
23 | //
24 | int main(int argc, const char **argv) {
25 |   if (argc < 2 || std::string("--help") == argv[1]) {
26 |     std::cout << "usage: calc [formula]" << std::endl;
27 |     return 1;
28 |   }
29 | 
30 |   auto reduce = [](const SemanticValues &vs) {
31 |     auto result = std::any_cast<long>(vs[0]);
32 |     for (auto i = 1u; i < vs.size(); i += 2) {
33 |       auto num = std::any_cast<long>(vs[i + 1]);
34 |       auto ope = std::any_cast<char>(vs[i]);
35 |       switch (ope) {
36 |       case '+': result += num; break;
37 |       case '-': result -= num; break;
38 |       case '*': result *= num; break;
39 |       case '/': result /= num; break;
40 |       }
41 |     }
42 |     return result;
43 |   };
44 | 
45 |   Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
46 | 
47 |   EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
48 |   TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
49 |   FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
50 |   TERM_OPERATOR <= cls("+-"),
51 |       [](const SemanticValues &vs) { return static_cast<char>(*vs.sv().data()); };
52 |   FACTOR_OPERATOR <= cls("*/"),
53 |       [](const SemanticValues &vs) { return static_cast<char>(*vs.sv().data()); };
54 |   NUMBER <= oom(cls("0-9")),
55 |       [](const SemanticValues &vs) { return vs.token_to_number<long>(); };
56 | 
57 |   auto expr = argv[1];
58 |   long val = 0;
59 |   if (EXPRESSION.parse_and_get_value(expr, val).ret) {
60 |     std::cout << expr << " = " << val << std::endl;
61 |     return 0;
62 |   }
63 | 
64 |   return -1;
65 | }
66 | 
67 | // vim: et ts=4 sw=4 cin cino={1s ff=unix
68 | 


--------------------------------------------------------------------------------
/example/calc3.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  calc3.cc
 3 | //
 4 | //  Copyright (c) 2015 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | #include <cstdlib>
 9 | #include <iostream>
10 | #include <peglib.h>
11 | 
12 | using namespace peg;
13 | 
14 | int main(int argc, const char **argv) {
15 |   if (argc < 2 || std::string("--help") == argv[1]) {
16 |     std::cout << "usage: calc3 [formula]" << std::endl;
17 |     return 1;
18 |   }
19 | 
20 |   std::function<long(const Ast &)> eval = [&](const Ast &ast) {
21 |     if (ast.name == "NUMBER") {
22 |       return ast.token_to_number<long>();
23 |     } else {
24 |       const auto &nodes = ast.nodes;
25 |       auto result = eval(*nodes[0]);
26 |       for (auto i = 1u; i < nodes.size(); i += 2) {
27 |         auto num = eval(*nodes[i + 1]);
28 |         auto ope = nodes[i]->token[0];
29 |         switch (ope) {
30 |         case '+': result += num; break;
31 |         case '-': result -= num; break;
32 |         case '*': result *= num; break;
33 |         case '/': result /= num; break;
34 |         }
35 |       }
36 |       return result;
37 |     }
38 |   };
39 | 
40 |   parser parser(R"(
41 |         EXPRESSION       <-  TERM (TERM_OPERATOR TERM)*
42 |         TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
43 |         FACTOR           <-  NUMBER / '(' EXPRESSION ')'
44 | 
45 |         TERM_OPERATOR    <-  < [-+] >
46 |         FACTOR_OPERATOR  <-  < [/*] >
47 |         NUMBER           <-  < [0-9]+ >
48 | 
49 |         %whitespace      <-  [ \t\r\n]*
50 |     )");
51 | 
52 |   parser.enable_ast();
53 | 
54 |   auto expr = argv[1];
55 |   std::shared_ptr<Ast> ast;
56 |   if (parser.parse(expr, ast)) {
57 |     ast = parser.optimize_ast(ast);
58 |     std::cout << ast_to_s(ast);
59 |     std::cout << expr << " = " << eval(*ast) << std::endl;
60 |     return 0;
61 |   }
62 | 
63 |   std::cout << "syntax error..." << std::endl;
64 | 
65 |   return -1;
66 | }
67 | 
68 | // vim: et ts=4 sw=4 cin cino={1s ff=unix
69 | 


--------------------------------------------------------------------------------
/example/calc4.cc:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <iostream>
 3 | #include <peglib.h>
 4 | 
 5 | using namespace peg;
 6 | using namespace std;
 7 | 
 8 | int main(void) {
 9 |   parser parser(R"(
10 |         EXPRESSION  <- ATOM (OPERATOR ATOM)* {
11 |                          precedence
12 |                            L - +
13 |                            L / *
14 |                        }
15 |         ATOM        <- NUMBER / '(' EXPRESSION ')'
16 |         OPERATOR    <- < [-+/*] >
17 |         NUMBER      <- < '-'? [0-9]+ >
18 |         %whitespace <- [ \t\r\n]*
19 |     )");
20 | 
21 |   parser["EXPRESSION"] = [](const SemanticValues &vs) {
22 |     auto result = any_cast<long>(vs[0]);
23 |     if (vs.size() > 1) {
24 |       auto ope = any_cast<char>(vs[1]);
25 |       auto num = any_cast<long>(vs[2]);
26 |       switch (ope) {
27 |       case '+': result += num; break;
28 |       case '-': result -= num; break;
29 |       case '*': result *= num; break;
30 |       case '/': result /= num; break;
31 |       }
32 |     }
33 |     return result;
34 |   };
35 |   parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
36 |   parser["NUMBER"] = [](const SemanticValues &vs) { return atol(vs.sv().data()); };
37 | 
38 |   long val;
39 |   parser.parse(" -1 + (1 + 2) * 3 - -1", val);
40 | 
41 |   assert(val == 9);
42 | }
43 | 


--------------------------------------------------------------------------------
/example/calc5.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  calc5.cc
 3 | //
 4 | //  Copyright (c) 2015 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | #include <cstdlib>
 9 | #include <iostream>
10 | #include <peglib.h>
11 | 
12 | using namespace peg;
13 | 
14 | int main(int argc, const char **argv) {
15 |   if (argc < 2 || std::string("--help") == argv[1]) {
16 |     std::cout << "usage: calc5 [formula]" << std::endl;
17 |     return 1;
18 |   }
19 | 
20 |   std::function<long(const Ast &)> eval = [&](const Ast &ast) {
21 |     if (ast.name == "NUMBER") {
22 |       return ast.token_to_number<long>();
23 |     } else {
24 |       const auto &nodes = ast.nodes;
25 |       auto result = eval(*nodes[0]);
26 |       if (nodes.size() > 1) {
27 |         auto ope = nodes[1]->token[0];
28 |         auto num = eval(*nodes[2]);
29 |         switch (ope) {
30 |         case '+': result += num; break;
31 |         case '-': result -= num; break;
32 |         case '*': result *= num; break;
33 |         case '/': result /= num; break;
34 |         }
35 |       }
36 |       return result;
37 |     }
38 |   };
39 | 
40 |   parser parser(R"(
41 |         EXPRESSION  <- ATOM (OPERATOR ATOM)* {
42 |                          precedence
43 |                            L - +
44 |                            L / *
45 |                        }
46 |         ATOM        <- NUMBER / '(' EXPRESSION ')'
47 |         OPERATOR    <- < [-+/*] >
48 |         NUMBER      <- < '-'? [0-9]+ >
49 |         %whitespace <- [ \t\r\n]*
50 |     )");
51 | 
52 |   parser.enable_ast();
53 | 
54 |   auto expr = argv[1];
55 |   std::shared_ptr<Ast> ast;
56 |   if (parser.parse(expr, ast)) {
57 |     ast = parser.optimize_ast(ast);
58 |     std::cout << ast_to_s(ast);
59 |     std::cout << expr << " = " << eval(*ast) << std::endl;
60 |     return 0;
61 |   }
62 | 
63 |   std::cout << "syntax error..." << std::endl;
64 | 
65 |   return -1;
66 | }
67 | 
68 | // vim: et ts=4 sw=4 cin cino={1s ff=unix
69 | 


--------------------------------------------------------------------------------
/example/choice.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  choice.cc
 3 | //
 4 | //  Copyright (c) 2023 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | #include <cstdlib>
 9 | #include <iostream>
10 | #include <peglib.h>
11 | 
12 | using namespace peg;
13 | 
14 | int main(void) {
15 |   parser parser(R"(
16 | type <- 'string' / 'int' / 'double'
17 | %whitespace <- [ \t\r\n]*
18 |   )");
19 | 
20 |   parser["type"] = [](const SemanticValues &vs) {
21 |     std::cout << vs.choice() << std::endl;
22 |   };
23 | 
24 |   if (parser.parse("int")) { return 0; }
25 | 
26 |   std::cout << "syntax error..." << std::endl;
27 |   return -1;
28 | }
29 | 


--------------------------------------------------------------------------------
/example/enter_leave.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  enter_leave.cc
 3 | //
 4 | //  Copyright (c) 2023 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | #include <cstdlib>
 9 | #include <iostream>
10 | #include <peglib.h>
11 | 
12 | using namespace peg;
13 | 
14 | int main(void) {
15 |   parser parser(R"(
16 |     S <- A+
17 |     A <- 'A'
18 |   )");
19 | 
20 |   parser["A"].enter = [](const Context & /*c*/, const char * /*s*/,
21 |                          size_t /*n*/, std::any & /*dt*/) {
22 |     std::cout << "enter" << std::endl;
23 |   };
24 | 
25 |   parser["A"] = [](const SemanticValues & /*vs*/, std::any & /*dt*/) {
26 |     std::cout << "action!" << std::endl;
27 |   };
28 | 
29 |   parser["A"].leave = [](const Context & /*c*/, const char * /*s*/,
30 |                          size_t /*n*/, size_t /*matchlen*/,
31 |                          std::any & /*value*/, std::any & /*dt*/) {
32 |     std::cout << "leave" << std::endl;
33 |   };
34 | 
35 |   if (parser.parse("A")) { return 0; }
36 | 
37 |   std::cout << "syntax error..." << std::endl;
38 |   return -1;
39 | }
40 | 


--------------------------------------------------------------------------------
/example/indent.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  indent.cc
 3 | //
 4 | //  Copyright (c) 2022 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | // Based on https://gist.github.com/dmajda/04002578dd41ae8190fc
 9 | 
10 | #include <cstdlib>
11 | #include <iostream>
12 | #include <peglib.h>
13 | 
14 | using namespace peg;
15 | 
16 | int main(void) {
17 |   parser parser(R"(Start <- Statements {}
18 | Statements <- Statement*
19 | Statement <- Samedent (S / I)
20 | 
21 | S <- 'S' EOS { no_ast_opt }
22 | I <- 'I' EOL Block / 'I' EOS { no_ast_opt }
23 | 
24 | Block <- Statements {}
25 | 
26 | ~Samedent <- ' '* {}
27 | 
28 | ~EOS <- EOL / EOF
29 | ~EOL <- '\n'
30 | ~EOF <- !.
31 |   )");
32 | 
33 |   size_t indent = 0;
34 | 
35 |   parser["Block"].enter = [&](const Context & /*c*/, const char * /*s*/,
36 |                               size_t /*n*/, std::any & /*dt*/) { indent += 2; };
37 | 
38 |   parser["Block"].leave = [&](const Context & /*c*/, const char * /*s*/,
39 |                               size_t /*n*/, size_t /*matchlen*/,
40 |                               std::any & /*value*/,
41 |                               std::any & /*dt*/) { indent -= 2; };
42 | 
43 |   parser["Samedent"].predicate =
44 |       [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
45 |         if (indent != vs.sv().size()) {
46 |           msg = "different indent...";
47 |           return false;
48 |         }
49 |         return true;
50 |       };
51 | 
52 |   parser.enable_ast();
53 | 
54 |   const auto source = R"(I
55 |   S
56 |   I
57 |     I
58 |       S
59 |       S
60 |     S
61 |   S
62 | )";
63 | 
64 |   std::shared_ptr<Ast> ast;
65 |   if (parser.parse(source, ast)) {
66 |     ast = parser.optimize_ast(ast);
67 |     std::cout << ast_to_s(ast);
68 |     return 0;
69 |   }
70 | 
71 |   std::cout << "syntax error..." << std::endl;
72 |   return -1;
73 | }
74 | 


--------------------------------------------------------------------------------
/example/sequence.cc:
--------------------------------------------------------------------------------
 1 | //
 2 | //  sequence.cc
 3 | //
 4 | //  Copyright (c) 2023 Yuji Hirose. All rights reserved.
 5 | //  MIT License
 6 | //
 7 | 
 8 | #include <cstdlib>
 9 | #include <iostream>
10 | #include <peglib.h>
11 | 
12 | using namespace peg;
13 | 
14 | int main(void) {
15 |   parser parser(R"(
16 | START       <- SEQUENCE_A
17 | SEQUENCE_A  <- SEQUENCE('A')
18 | SEQUENCE(X) <- X (',' X)*
19 |   )");
20 | 
21 |   parser["SEQUENCE_A"] = [](const SemanticValues & /*vs*/) {
22 |     std::cout << "SEQUENCE_A" << std::endl;
23 |   };
24 | 
25 |   if (parser.parse("A,A")) { return 0; }
26 | 
27 |   std::cout << "syntax error..." << std::endl;
28 |   return -1;
29 | }
30 | 


--------------------------------------------------------------------------------
/grammar/cpp-peglib.peg:
--------------------------------------------------------------------------------
  1 | # Setup PEG syntax parser
  2 | Grammar <-  Spacing  Definition+  EndOfFile
  3 | 
  4 | Definition <-
  5 | 	Ignore  IdentCont  Parameters  LEFTARROW Expression  Instruction?
  6 | 	/ Ignore  Identifier  LEFTARROW  Expression Instruction?
  7 | 
  8 | Expression <-  Sequence  (SLASH  Sequence)*
  9 | 
 10 | Sequence <-  (CUT /  Prefix)*
 11 | 
 12 | Prefix <-  (AND /  NOT)?  SuffixWithLabel
 13 | 
 14 | SuffixWithLabel <- Suffix  (LABEL  Identifier)?
 15 | 
 16 | Suffix <-  Primary  Loop?
 17 | 
 18 | Loop <-  QUESTION /  STAR /  PLUS /  Repetition
 19 | 
 20 | Primary <-
 21 | 	Ignore  IdentCont  Arguments !LEFTARROW
 22 | 	/ Ignore  Identifier !(Parameters?  LEFTARROW)
 23 | 	/ OPEN  Expression  CLOSE
 24 | 	/ BeginTok  Expression  EndTok
 25 | 	/ BeginCapScope  Expression  EndCapScope
 26 | 	/ BeginCap  Expression  EndCap
 27 | 	/ CapScope
 28 | 	/ BackRef
 29 | 	/ DictionaryI
 30 | 	/ LiteralI
 31 | 	/ Dictionary
 32 | 	/ Literal
 33 | 	/ NegatedClassI
 34 | 	/ NegatedClass
 35 | 	/ ClassI
 36 | 	/ Class
 37 | 	/ DOT
 38 | 
 39 | Identifier <-  IdentCont  Spacing
 40 | 
 41 | IdentCont <- <IdentStart  IdentRest*>
 42 | 
 43 | IdentStart <-  !"↑"  !"⇑" ([a-zA-Z_%] /  [\u0080-\uFFFF])
 44 | 
 45 | IdentRest <-  IdentStart /  [0-9]
 46 | 
 47 | Dictionary <-  LiteralD  (PIPE  LiteralD)+
 48 | 
 49 | DictionaryI <- LiteralID (PIPE LiteralID)*
 50 | 
 51 | lit_ope <-
 52 | 	[']  <(![']  Char)*> [']  Spacing
 53 | 	/ ["]  <(!["]  Char)*> ["]  Spacing
 54 | 
 55 | Literal <-  lit_ope
 56 | 
 57 | LiteralD <-  lit_ope
 58 | 
 59 | lit_case_ignore_ope <-
 60 | 	[']  <(![']  Char)*>  "'i" Spacing
 61 | 	/ ["]  <(!["]  Char)*>  '"i' Spacing
 62 | 
 63 | LiteralI <- lit_case_ignore_ope
 64 | 
 65 | LiteralID <- lit_case_ignore_ope
 66 | 
 67 | # NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
 68 | Class <-  '['  !'^' <(!']'  Range)+>  ']' Spacing
 69 | ClassI <-  '['  !'^' <(!']'  Range)+>  ']i' Spacing
 70 | NegatedClass <-  "[^" <(!']'  Range)+>  ']' Spacing
 71 | NegatedClassI <-  "[^" <(!']'  Range)+>  ']i' Spacing
 72 | 
 73 | Range <-  (Char  '-'  ! ']' Char) /  Char
 74 | 
 75 | Char <-
 76 | 	'\\'  [nrt'\"[\]\\^]
 77 | 	/ '\\'  [0-3]  [0-7]  [0-7]
 78 | 	/ '\\'  [0-7]  [0-7]?
 79 | 	/ "\\x"  [0-9a-fA-F]  [0-9a-fA-F]?
 80 | 	/ "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5})
 81 | 	/ !'\\'   .
 82 | 
 83 | Repetition <- BeginBracket  RepetitionRange  EndBracket
 84 | 
 85 | RepetitionRange <-
 86 | 	Number  COMMA  Number
 87 | 	/ Number  COMMA
 88 | 	/  Number
 89 | 	/ COMMA  Number
 90 | 
 91 | Number <-  [0-9]+  Spacing
 92 | 
 93 | CapScope <- BeginCapScope Expression EndCapScope
 94 | 
 95 | LEFTARROW <-  ("<-" / "←")  Spacing
 96 | 
 97 | ~SLASH <-  '/'  Spacing
 98 | ~PIPE <-  '|'  Spacing
 99 | AND <-  '&'  Spacing
100 | NOT <-  '!'  Spacing
101 | QUESTION <- '?'  Spacing
102 | STAR <-  '*'  Spacing
103 | PLUS <-  '+'  Spacing
104 | ~OPEN <-  '('  Spacing
105 | ~CLOSE <- ')'  Spacing
106 | DOT <-  '.'  Spacing
107 | 
108 | CUT <-  "↑"  Spacing
109 | ~LABEL <-  ('^' /  "⇑")  Spacing
110 | 
111 | ~Spacing <-  (Space /  Comment)*
112 | Comment <- '#'  (!EndOfLine   . )*
113 | Space <-  ' ' /  '\t' /  EndOfLine
114 | EndOfLine <-  "\r\n" /  '\n' /  '\r'
115 | EndOfFile <-  ! .
116 | 
117 | ~BeginTok <-  '<'  Spacing
118 | ~EndTok <-  '>'  Spacing
119 | 
120 | ~BeginCapScope <-  '$'  '('  Spacing
121 | ~EndCapScope <-  ')'  Spacing
122 | 
123 | BeginCap <-  '$'  <IdentCont>  '<'  Spacing
124 | ~EndCap <-  '>'  Spacing
125 | 
126 | BackRef <-  '$'  <IdentCont>  Spacing
127 | 
128 | IGNORE <-  '~'
129 | 
130 | Ignore <-  IGNORE?
131 | Parameters <-  OPEN  Identifier (COMMA  Identifier)*  CLOSE
132 | Arguments <-  OPEN  Expression (COMMA  Expression)*  CLOSE
133 | ~COMMA <-  ','  Spacing
134 | 
135 | # Instruction grammars
136 | Instruction <-
137 | 	BeginBracket (InstructionItem  (InstructionItemSeparator InstructionItem)*)? EndBracket
138 | InstructionItem <- PrecedenceClimbing /  ErrorMessage /  NoAstOpt
139 | ~InstructionItemSeparator <-  ';'  Spacing
140 | 
141 | ~SpacesZom <-  Space*
142 | ~SpacesOom <-  Space+
143 | ~BeginBracket <-  '{'  Spacing
144 | ~EndBracket <-  '}'  Spacing
145 | 
146 | # PrecedenceClimbing instruction
147 | PrecedenceClimbing <- "precedence"  SpacesOom  PrecedenceInfo (SpacesOom  PrecedenceInfo)*  SpacesZom
148 | PrecedenceInfo <- PrecedenceAssoc (~SpacesOom  PrecedenceOpe)+
149 | PrecedenceOpe <-
150 | 	['] <(!(Space /  ['])  Char)*> [']
151 | 	/ ["] <(!(Space /  ["])  Char)*> ["]
152 | 	/ <(!(PrecedenceAssoc /  Space /  '}')  . )+>
153 | PrecedenceAssoc <-  [LR]
154 | 
155 | # Error message instruction
156 | ErrorMessage <- "message"  SpacesOom  LiteralD  SpacesZom
157 | 
158 | # No Ast node optimization instruction
159 | NoAstOpt <-  "no_ast_opt"  SpacesZom
160 | 


--------------------------------------------------------------------------------
/grammar/csv.peg:
--------------------------------------------------------------------------------
 1 | # CSV grammar based on RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt)
 2 | 
 3 | file        <- (header NL)? record (NL record)* NL?
 4 | header      <- name (COMMA name)*
 5 | record      <- field (COMMA field)*
 6 | name        <- field
 7 | field       <- escaped / non_escaped
 8 | escaped     <- DQUOTE (TEXTDATA / COMMA / CR / LF / D_DQUOTE)* DQUOTE
 9 | non_escaped <- TEXTDATA*
10 | COMMA       <- ','
11 | CR          <- '\r'
12 | DQUOTE      <- '"'
13 | LF          <- '\n'
14 | NL          <- CR LF / CR / LF
15 | TEXTDATA    <- !([",] / NL) .
16 | D_DQUOTE    <- '"' '"'
17 | 


--------------------------------------------------------------------------------
/grammar/json.peg:
--------------------------------------------------------------------------------
 1 | # JSON grammar based on RFC 4627 (http://www.ietf.org/rfc/rfc4627.txt)
 2 | 
 3 | json        <- object / array
 4 | 
 5 | object      <- '{' (member (',' member)*)? '}' { no_ast_opt }
 6 | member      <- string ':' value
 7 | 
 8 | array       <- '[' (value (',' value)*)? ']'
 9 | 
10 | value       <- boolean / null / number / string / object / array
11 | 
12 | boolean     <- 'false' / 'true'
13 | null        <- 'null'
14 | 
15 | number      <- < minus int frac exp >
16 | minus       <- '-'?
17 | int         <- '0' / [1-9][0-9]*
18 | frac        <- ('.' [0-9]+)?
19 | exp         <- ([eE] [-+]? [0-9]+)?
20 | 
21 | string      <- '"' < char* > '"'
22 | char        <- unescaped / escaped
23 | escaped     <- '\\' (["\\/bfnrt] / 'u' [a-fA-F0-9]{4})
24 | unescaped   <- [\u0020-\u0021\u0023-\u005b\u005d-\u10ffff]
25 | 
26 | %whitespace <- [ \t\r\n]*
27 | 


--------------------------------------------------------------------------------
/grammar/pl0.peg:
--------------------------------------------------------------------------------
 1 | 
 2 | program    <- _ block '.' _
 3 | 
 4 | block      <- const var procedure statement
 5 | const      <- ('CONST' __ ident '=' _ number (',' _ ident '=' _ number)* ';' _)?
 6 | var        <- ('VAR' __ ident (',' _ ident)* ';' _)?
 7 | procedure  <- ('PROCEDURE' __ ident ';' _ block ';' _)*
 8 | 
 9 | statement  <- (assignment / call / statements / if / while / out / in)?
10 | assignment <- ident ':=' _ expression
11 | call       <- 'CALL' __ ident
12 | statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __
13 | if         <- 'IF' __ condition 'THEN' __ statement
14 | while      <- 'WHILE' __ condition 'DO' __ statement
15 | out        <- ('out' __ / 'write' __ / '!' _) expression
16 | in         <- ('in' __ / 'read' __ / '?' _) ident
17 | 
18 | condition  <- odd / compare
19 | odd        <- 'ODD' __ expression
20 | compare    <- expression compare_op expression
21 | compare_op <- < '=' / '#' / '<=' / '<' / '>=' / '>' > _
22 | 
23 | expression <- sign term (term_op term)*
24 | sign       <- < [-+]? > _
25 | term_op    <- < [-+] > _
26 | 
27 | term       <- factor (factor_op factor)*
28 | factor_op  <- < [*/] > _
29 | 
30 | factor     <- ident / number / '(' _ expression ')' _
31 | 
32 | ident      <- < [a-z] [a-z0-9]* > _
33 | number     <- < [0-9]+ > _
34 | 
35 | ~_         <- [ \t\r\n]*
36 | ~__        <- ![a-z0-9_] _
37 | 
38 | 


--------------------------------------------------------------------------------
/lint/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.14)
2 | project(peglint)
3 | 
4 | add_executable(peglint peglint.cc)
5 | target_include_directories(peglint PRIVATE ..)
6 | target_link_libraries(peglint ${add_link_deps})
7 | 


--------------------------------------------------------------------------------
/lint/README.md:
--------------------------------------------------------------------------------
  1 | peglint
  2 | -------
  3 | 
  4 | The lint utility for PEG.
  5 | 
  6 | ```
  7 | usage: grammar_file_path [source_file_path]
  8 | 
  9 |   options:
 10 |     --ast: show AST tree
 11 |     --packrat: enable packrat memoise
 12 |     --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction
 13 |     --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction
 14 |     --source: source text
 15 |     --trace: show concise trace messages
 16 |     --profile: show profile report
 17 |     --verbose: verbose output for trace and profile
 18 | ```
 19 | 
 20 | ### Build peglint
 21 | 
 22 | ```
 23 | > cd lint
 24 | > mkdir build
 25 | > cd build
 26 | > cmake ..
 27 | > make
 28 | ```
 29 | 
 30 | ### Lint grammar
 31 | 
 32 | ```
 33 | > cat a.peg
 34 | A <- 'hello' ^ 'world'
 35 | 
 36 | > peglint a.peg
 37 | a.peg:1:16: syntax error
 38 | ```
 39 | 
 40 | ```
 41 | > cat a.peg
 42 | A <- B
 43 | 
 44 | > peglint a.peg
 45 | a.peg:1:6: 'B' is not defined.
 46 | ```
 47 | 
 48 | ```
 49 | > cat a.peg
 50 | A <- B / C
 51 | B <- 'b'
 52 | C <- A
 53 | 
 54 | > peglint a.peg
 55 | a.peg:1:10: 'C' is left recursive.
 56 | a.peg:3:6: 'A' is left recursive.
 57 | ```
 58 | 
 59 | ### Lint source text
 60 | 
 61 | ```
 62 | > cat a.peg
 63 | Additive    <- Multiplicative '+' Additive / Multiplicative
 64 | Multiplicative   <- Primary '*' Multiplicative / Primary
 65 | Primary     <- '(' Additive ')' / Number
 66 | Number      <- < [0-9]+ >
 67 | %whitespace <- [ \t\r\n]*
 68 | 
 69 | > peglint --source "1 + a * 3" a.peg
 70 | [commandline]:1:3: syntax error
 71 | ```
 72 | 
 73 | ### AST
 74 | 
 75 | ```
 76 | > cat a.txt
 77 | 1 + 2 * 3
 78 | 
 79 | > peglint --ast a.peg a.txt
 80 | + Additive
 81 |   + Multiplicative
 82 |     + Primary
 83 |       - Number (1)
 84 |   + Additive
 85 |     + Multiplicative
 86 |       + Primary
 87 |         - Number (2)
 88 |       + Multiplicative
 89 |         + Primary
 90 |           - Number (3)
 91 | ```
 92 | 
 93 | ### AST optimization
 94 | 
 95 | ```
 96 | > peglint --ast --opt --source "1 + 2 * 3" a.peg
 97 | + Additive
 98 |   - Multiplicative[Number] (1)
 99 |   + Additive[Multiplicative]
100 |     - Primary[Number] (2)
101 |     - Multiplicative[Number] (3)
102 | ```
103 | 
104 | ### Adjust AST optimization with `no_ast_opt` instruction
105 | 
106 | ```
107 | > cat a.peg
108 | Additive    <- Multiplicative '+' Additive / Multiplicative
109 | Multiplicative   <- Primary '*' Multiplicative / Primary
110 | Primary     <- '(' Additive ')' / Number          { no_ast_opt }
111 | Number      <- < [0-9]+ >
112 | %whitespace <- [ \t\r\n]*
113 | 
114 | > peglint --ast --opt --source "1 + 2 * 3" a.peg
115 | + Additive/0
116 |   + Multiplicative/1[Primary]
117 |     - Number (1)
118 |   + Additive/1[Multiplicative]
119 |     + Primary/1
120 |       - Number (2)
121 |     + Multiplicative/1[Primary]
122 |       - Number (3)
123 | 
124 | > peglint --ast --opt-only --source "1 + 2 * 3" a.peg
125 | + Additive/0
126 |   + Multiplicative/1
127 |     - Primary/1[Number] (1)
128 |   + Additive/1
129 |     + Multiplicative/0
130 |       - Primary/1[Number] (2)
131 |       + Multiplicative/1
132 |         - Primary/1[Number] (3)
133 | ```
134 | 


--------------------------------------------------------------------------------
/lint/peglint.cc:
--------------------------------------------------------------------------------
  1 | //
  2 | //  peglint.cc
  3 | //
  4 | //  Copyright (c) 2022 Yuji Hirose. All rights reserved.
  5 | //  MIT License
  6 | //
  7 | 
  8 | #include <fstream>
  9 | #include <peglib.h>
 10 | #include <sstream>
 11 | 
 12 | using namespace std;
 13 | 
 14 | inline bool read_file(const char *path, vector<char> &buff) {
 15 |   ifstream ifs(path, ios::in | ios::binary);
 16 |   if (ifs.fail()) { return false; }
 17 | 
 18 |   buff.resize(static_cast<unsigned int>(ifs.seekg(0, ios::end).tellg()));
 19 |   if (!buff.empty()) {
 20 |     ifs.seekg(0, ios::beg).read(&buff[0], static_cast<streamsize>(buff.size()));
 21 |   }
 22 |   return true;
 23 | }
 24 | 
 25 | inline vector<string> split(const string &s, char delim) {
 26 |   vector<string> elems;
 27 |   stringstream ss(s);
 28 |   string elem;
 29 |   while (getline(ss, elem, delim)) {
 30 |     elems.push_back(elem);
 31 |   }
 32 |   return elems;
 33 | }
 34 | 
 35 | int main(int argc, const char **argv) {
 36 |   auto opt_packrat = false;
 37 |   auto opt_ast = false;
 38 |   auto opt_optimize = false;
 39 |   auto opt_mode = true;
 40 |   auto opt_help = false;
 41 |   auto opt_source = false;
 42 |   vector<char> source;
 43 |   auto opt_trace = false;
 44 |   auto opt_verbose = false;
 45 |   auto opt_profile = false;
 46 |   vector<const char *> path_list;
 47 | 
 48 |   auto argi = 1;
 49 |   while (argi < argc) {
 50 |     auto arg = argv[argi++];
 51 |     if (string("--help") == arg) {
 52 |       opt_help = true;
 53 |     } else if (string("--packrat") == arg) {
 54 |       opt_packrat = true;
 55 |     } else if (string("--ast") == arg) {
 56 |       opt_ast = true;
 57 |     } else if (string("--opt") == arg || string("--opt-all") == arg) {
 58 |       opt_optimize = true;
 59 |       opt_mode = true;
 60 |     } else if (string("--opt-only") == arg) {
 61 |       opt_optimize = true;
 62 |       opt_mode = false;
 63 |     } else if (string("--source") == arg) {
 64 |       opt_source = true;
 65 |       if (argi < argc) {
 66 |         std::string text = argv[argi++];
 67 |         source.assign(text.begin(), text.end());
 68 |       }
 69 |     } else if (string("--trace") == arg) {
 70 |       opt_trace = true;
 71 |     } else if (string("--profile") == arg) {
 72 |       opt_profile = true;
 73 |     } else if (string("--verbose") == arg) {
 74 |       opt_verbose = true;
 75 |     } else {
 76 |       path_list.push_back(arg);
 77 |     }
 78 |   }
 79 | 
 80 |   if (path_list.empty() || opt_help) {
 81 |     cerr << R"(usage: grammar_file_path [source_file_path]
 82 | 
 83 |   options:
 84 |     --source: source text
 85 |     --packrat: enable packrat memoise
 86 |     --ast: show AST tree
 87 |     --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction
 88 |     --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction
 89 |     --trace: show concise trace messages
 90 |     --profile: show profile report
 91 |     --verbose: verbose output for trace and profile
 92 | )";
 93 | 
 94 |     return 1;
 95 |   }
 96 | 
 97 |   // Check PEG grammar
 98 |   auto syntax_path = path_list[0];
 99 | 
100 |   vector<char> syntax;
101 |   if (!read_file(syntax_path, syntax)) {
102 |     cerr << "can't open the grammar file." << endl;
103 |     return -1;
104 |   }
105 | 
106 |   peg::parser parser;
107 | 
108 |   parser.set_logger([&](size_t ln, size_t col, const string &msg) {
109 |     cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
110 |   });
111 | 
112 |   if (!parser.load_grammar(syntax.data(), syntax.size())) { return -1; }
113 | 
114 |   if (path_list.size() < 2 && !opt_source) { return 0; }
115 | 
116 |   // Check source
117 |   std::string source_path = "[commandline]";
118 |   if (path_list.size() >= 2) {
119 |     if (!read_file(path_list[1], source)) {
120 |       cerr << "can't open the code file." << endl;
121 |       return -1;
122 |     }
123 |     source_path = path_list[1];
124 |   }
125 | 
126 |   parser.set_logger([&](size_t ln, size_t col, const string &msg) {
127 |     cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
128 |   });
129 | 
130 |   if (opt_packrat) { parser.enable_packrat_parsing(); }
131 | 
132 |   if (opt_trace) { enable_tracing(parser, std::cout); }
133 | 
134 |   if (opt_profile) { enable_profiling(parser, std::cout); }
135 | 
136 |   parser.set_verbose_trace(opt_verbose);
137 | 
138 |   if (opt_ast) {
139 |     parser.enable_ast();
140 | 
141 |     std::shared_ptr<peg::Ast> ast;
142 |     auto ret = parser.parse_n(source.data(), source.size(), ast);
143 | 
144 |     if (ast) {
145 |       if (opt_optimize) { ast = parser.optimize_ast(ast, opt_mode); }
146 |       std::cout << peg::ast_to_s(ast);
147 |     }
148 | 
149 |     if (!ret) { return -1; }
150 |   } else {
151 |     if (!parser.parse_n(source.data(), source.size())) { return -1; }
152 |   }
153 | 
154 |   return 0;
155 | }
156 | 


--------------------------------------------------------------------------------
/peg.vim:
--------------------------------------------------------------------------------
 1 | 
 2 | set commentstring=#\ %s
 3 | 
 4 | syn match pegAssign /<-/
 5 | syn match pegAssign2 /←/
 6 | 
 7 | syn match pegName /\v[a-zA-Z_][a-zA-Z0-9_]*/
 8 | 
 9 | syn match pegLineComment '#.*'
10 | 
11 | syn region pegStringD start=/\v"/ skip=/\v\\./ end=/\v"/
12 | syn region pegStringS start=/\v'/ skip=/\v\\./ end=/\v'/
13 | syn region pegClass start=/\v\[/ skip=/\v\\./ end=/\v]/
14 | 
15 | "syn match pegOperator /\(*\|?\|+\|!\|\.\|\~\)/
16 | 
17 | hi def link pegAssign Statement
18 | hi def link pegAssign2 Statement
19 | 
20 | hi def link pegName Identifier
21 | 
22 | hi def link pegLineComment Comment
23 | 
24 | hi def link pegStringD String
25 | hi def link pegStringS String
26 | hi def link pegClass String
27 | 
28 | let b:current_syntax = "peg"
29 | 


--------------------------------------------------------------------------------
/pl0/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(pl0)
 3 | 
 4 | find_package(LLVM REQUIRED CONFIG)
 5 | 
 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
 7 | 
 8 | add_executable(pl0 pl0.cc)
 9 | set(add_link_deps ${add_link_deps} LLVM)
10 | target_include_directories(pl0 PUBLIC ${LLVM_INCLUDE_DIRS})
11 | target_include_directories(pl0 PRIVATE ..)
12 | target_link_libraries(pl0 ${add_link_deps})
13 | 


--------------------------------------------------------------------------------
/pl0/Makefile:
--------------------------------------------------------------------------------
1 | pl0: pl0.cc ../peglib.h
2 | 	clang++ -std=c++11 -g -O0 pl0.cc `llvm-config --cxxflags --ldflags --system-libs --libs` -I.. -o pl0
3 | 


--------------------------------------------------------------------------------
/pl0/README.md:
--------------------------------------------------------------------------------
 1 | PL/0 language example
 2 | =====================
 3 | 
 4 |   https://en.wikipedia.org/wiki/PL/0
 5 | 
 6 |   * PL/0 PEG syntax
 7 |   * AST generation with symbol scope
 8 |   * Interpreter (slow...)
 9 |   * LLVM Code generation
10 |   * LLVM JIT execution (fast!)
11 | 
12 | Build
13 | -----
14 | 
15 | ```
16 | brew install llvm
17 | export PATH="$PATH:/usr/local/opt/llvm/bin"
18 | make
19 | ```
20 | 
21 | Usage
22 | -----
23 | 
24 | ```
25 | pl0 PATH [--ast] [--llvm] [--jit]
26 | 
27 |   --ast: Show AST tree
28 |   --llvm: Dump LLVM IR
29 |   --jit: LLVM JIT execution
30 | ```
31 | 


--------------------------------------------------------------------------------
/pl0/pl0.cc:
--------------------------------------------------------------------------------
  1 | //
  2 | //  pl0.cc - PL/0 language (https://en.wikipedia.org/wiki/PL/0)
  3 | //
  4 | //  Copyright (c) 2022 Yuji Hirose. All rights reserved.
  5 | //  MIT License
  6 | //
  7 | 
  8 | #include <peglib.h>
  9 | #include <fstream>
 10 | #include <iostream>
 11 | #include <sstream>
 12 | #include "llvm/ExecutionEngine/ExecutionEngine.h"
 13 | #include "llvm/ExecutionEngine/GenericValue.h"
 14 | #include "llvm/ExecutionEngine/MCJIT.h"
 15 | #include "llvm/IR/IRBuilder.h"
 16 | #include "llvm/IR/ValueSymbolTable.h"
 17 | #include "llvm/IR/Verifier.h"
 18 | #include "llvm/Support/TargetSelect.h"
 19 | 
 20 | using namespace peg;
 21 | using namespace peg::udl;
 22 | using namespace llvm;
 23 | using namespace std;
 24 | 
 25 | /*
 26 |  * PEG Grammar
 27 |  */
 28 | auto grammar = R"(
 29 |   program    <- _ block '.' _
 30 | 
 31 |   block      <- const var procedure statement
 32 |   const      <- ('CONST' __ ident '=' _ number (',' _ ident '=' _ number)* ';' _)?
 33 |   var        <- ('VAR' __ ident (',' _ ident)* ';' _)?
 34 |   procedure  <- ('PROCEDURE' __ ident ';' _ block ';' _)*
 35 | 
 36 |   statement  <- (assignment / call / statements / if / while / out / in)?
 37 |   assignment <- ident ':=' _ expression
 38 |   call       <- 'CALL' __ ident
 39 |   statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __
 40 |   if         <- 'IF' __ condition 'THEN' __ statement
 41 |   while      <- 'WHILE' __ condition 'DO' __ statement
 42 |   out        <- ('out' __ / 'write' __ / '!' _) expression
 43 |   in         <- ('in' __ / 'read' __ / '?' _) ident
 44 | 
 45 |   condition  <- odd / compare
 46 |   odd        <- 'ODD' __ expression
 47 |   compare    <- expression compare_op expression
 48 |   compare_op <- < '=' / '#' / '<=' / '<' / '>=' / '>' > _
 49 | 
 50 |   expression <- sign term (term_op term)*
 51 |   sign       <- < [-+]? > _
 52 |   term_op    <- < [-+] > _
 53 | 
 54 |   term       <- factor (factor_op factor)*
 55 |   factor_op  <- < [*/] > _
 56 | 
 57 |   factor     <- ident / number / '(' _ expression ')' _
 58 | 
 59 |   ident      <- < [a-z] [a-z0-9]* > _
 60 |   number     <- < [0-9]+ > _
 61 | 
 62 |   ~_         <- [ \t\r\n]*
 63 |   ~__        <- ![a-z0-9_] _
 64 | )";
 65 | 
 66 | /*
 67 |  * Utilities
 68 |  */
 69 | string format_error_message(const string& path, size_t ln, size_t col,
 70 |                             const string& msg) {
 71 |   stringstream ss;
 72 |   ss << path << ":" << ln << ":" << col << ": " << msg << endl;
 73 |   return ss.str();
 74 | }
 75 | 
 76 | /*
 77 |  * Ast
 78 |  */
 79 | struct SymbolScope;
 80 | 
 81 | struct Annotation {
 82 |   shared_ptr<SymbolScope> scope;
 83 | };
 84 | 
 85 | typedef AstBase<Annotation> AstPL0;
 86 | shared_ptr<SymbolScope> get_closest_scope(shared_ptr<AstPL0> ast) {
 87 |   ast = ast->parent.lock();
 88 |   while (ast->tag != "block"_) {
 89 |     ast = ast->parent.lock();
 90 |   }
 91 |   return ast->scope;
 92 | }
 93 | 
 94 | /*
 95 |  * Symbol Table
 96 |  */
 97 | struct SymbolScope {
 98 |   SymbolScope(shared_ptr<SymbolScope> outer) : outer(outer) {}
 99 | 
100 |   bool has_symbol(const string& ident, bool extend = true) const {
101 |     auto ret = constants.count(ident) || variables.count(ident);
102 |     return ret ? true : (extend && outer ? outer->has_symbol(ident) : false);
103 |   }
104 | 
105 |   bool has_constant(const string& ident, bool extend = true) const {
106 |     return constants.count(ident)
107 |                ? true
108 |                : (extend && outer ? outer->has_constant(ident) : false);
109 |   }
110 | 
111 |   bool has_variable(const string& ident, bool extend = true) const {
112 |     return variables.count(ident)
113 |                ? true
114 |                : (extend && outer ? outer->has_variable(ident) : false);
115 |   }
116 | 
117 |   bool has_procedure(const string& ident, bool extend = true) const {
118 |     return procedures.count(ident)
119 |                ? true
120 |                : (extend && outer ? outer->has_procedure(ident) : false);
121 |   }
122 | 
123 |   shared_ptr<AstPL0> get_procedure(const string& ident) const {
124 |     auto it = procedures.find(ident);
125 |     return it != procedures.end() ? it->second : outer->get_procedure(ident);
126 |   }
127 | 
128 |   map<string, int> constants;
129 |   set<string> variables;
130 |   map<string, shared_ptr<AstPL0>> procedures;
131 |   set<string> free_variables;
132 | 
133 |  private:
134 |   shared_ptr<SymbolScope> outer;
135 | };
136 | 
137 | void throw_runtime_error(const shared_ptr<AstPL0> node, const string& msg) {
138 |   throw runtime_error(
139 |       format_error_message(node->path, node->line, node->column, msg));
140 | }
141 | 
142 | struct SymbolTable {
143 |   static void build_on_ast(const shared_ptr<AstPL0> ast,
144 |                            shared_ptr<SymbolScope> scope = nullptr) {
145 |     switch (ast->tag) {
146 |       case "block"_:
147 |         block(ast, scope);
148 |         break;
149 |       case "assignment"_:
150 |         assignment(ast, scope);
151 |         break;
152 |       case "call"_:
153 |         call(ast, scope);
154 |         break;
155 |       case "ident"_:
156 |         ident(ast, scope);
157 |         break;
158 |       default:
159 |         for (auto node : ast->nodes) {
160 |           build_on_ast(node, scope);
161 |         }
162 |         break;
163 |     }
164 |   }
165 | 
166 |  private:
167 |   static void block(const shared_ptr<AstPL0> ast,
168 |                     shared_ptr<SymbolScope> outer) {
169 |     // block <- const var procedure statement
170 |     auto scope = make_shared<SymbolScope>(outer);
171 |     const auto& nodes = ast->nodes;
172 |     constants(nodes[0], scope);
173 |     variables(nodes[1], scope);
174 |     procedures(nodes[2], scope);
175 |     build_on_ast(nodes[3], scope);
176 |     ast->scope = scope;
177 |   }
178 | 
179 |   static void constants(const shared_ptr<AstPL0> ast,
180 |                         shared_ptr<SymbolScope> scope) {
181 |     // const <- ('CONST' __ ident '=' _ number(',' _ ident '=' _ number)* ';'
182 |     // _)?
183 |     const auto& nodes = ast->nodes;
184 |     for (auto i = 0u; i < nodes.size(); i += 2) {
185 |       const auto& ident = nodes[i + 0]->token_to_string();
186 |       if (scope->has_symbol(ident)) {
187 |         throw_runtime_error(nodes[i], "'" + ident + "' is already defined...");
188 |       }
189 |       auto number = nodes[i + 1]->token_to_number<int>();
190 |       scope->constants.emplace(ident, number);
191 |     }
192 |   }
193 | 
194 |   static void variables(const shared_ptr<AstPL0> ast,
195 |                         shared_ptr<SymbolScope> scope) {
196 |     // var <- ('VAR' __ ident(',' _ ident)* ';' _) ?
197 |     const auto& nodes = ast->nodes;
198 |     for (auto i = 0u; i < nodes.size(); i += 1) {
199 |       const auto& ident = nodes[i]->token_to_string();
200 |       if (scope->has_symbol(ident)) {
201 |         throw_runtime_error(nodes[i], "'" + ident + "' is already defined...");
202 |       }
203 |       scope->variables.emplace(ident);
204 |     }
205 |   }
206 | 
207 |   static void procedures(const shared_ptr<AstPL0> ast,
208 |                          shared_ptr<SymbolScope> scope) {
209 |     // procedure <- ('PROCEDURE' __ ident ';' _ block ';' _)*
210 |     const auto& nodes = ast->nodes;
211 |     for (auto i = 0u; i < nodes.size(); i += 2) {
212 |       const auto& ident = nodes[i + 0]->token_to_string();
213 |       auto block = nodes[i + 1];
214 |       scope->procedures[ident] = block;
215 |       build_on_ast(block, scope);
216 |     }
217 |   }
218 | 
219 |   static void assignment(const shared_ptr<AstPL0> ast,
220 |                          shared_ptr<SymbolScope> scope) {
221 |     // assignment <- ident ':=' _ expression
222 |     const auto& ident = ast->nodes[0]->token_to_string();
223 |     if (scope->has_constant(ident)) {
224 |       throw_runtime_error(ast->nodes[0],
225 |                           "cannot modify constant value '" + ident + "'...");
226 |     } else if (!scope->has_variable(ident)) {
227 |       throw_runtime_error(ast->nodes[0],
228 |                           "undefined variable '" + ident + "'...");
229 |     }
230 | 
231 |     build_on_ast(ast->nodes[1], scope);
232 | 
233 |     if (!scope->has_symbol(ident, false)) {
234 |       scope->free_variables.emplace(ident);
235 |     }
236 |   }
237 | 
238 |   static void call(const shared_ptr<AstPL0> ast,
239 |                    shared_ptr<SymbolScope> scope) {
240 |     // call <- 'CALL' __ ident
241 |     const auto& ident = ast->nodes[0]->token_to_string();
242 |     if (!scope->has_procedure(ident)) {
243 |       throw_runtime_error(ast->nodes[0],
244 |                           "undefined procedure '" + ident + "'...");
245 |     }
246 | 
247 |     auto block = scope->get_procedure(ident);
248 |     if (block->scope) {
249 |       for (const auto& free : block->scope->free_variables) {
250 |         if (!scope->has_symbol(free, false)) {
251 |           scope->free_variables.emplace(free);
252 |         }
253 |       }
254 |     }
255 |   }
256 | 
257 |   static void ident(const shared_ptr<AstPL0> ast,
258 |                     shared_ptr<SymbolScope> scope) {
259 |     const auto& ident = ast->token_to_string();
260 |     if (!scope->has_symbol(ident)) {
261 |       throw_runtime_error(ast, "undefined variable '" + ident + "'...");
262 |     }
263 | 
264 |     if (!scope->has_symbol(ident, false)) {
265 |       scope->free_variables.emplace(ident);
266 |     }
267 |   }
268 | };
269 | 
270 | /*
271 |  * Interpreter
272 |  */
273 | struct Environment {
274 |   Environment(shared_ptr<SymbolScope> scope, shared_ptr<Environment> outer)
275 |       : scope(scope), outer(outer) {}
276 | 
277 |   int get_value(const shared_ptr<AstPL0> ast, const string& ident) const {
278 |     auto it = scope->constants.find(ident);
279 |     if (it != scope->constants.end()) {
280 |       return it->second;
281 |     } else if (scope->variables.count(ident)) {
282 |       if (variables.find(ident) == variables.end()) {
283 |         throw_runtime_error(ast, "uninitialized variable '" + ident + "'...");
284 |       }
285 |       return variables.at(ident);
286 |     }
287 |     return outer->get_value(ast, ident);
288 |   }
289 | 
290 |   void set_variable(const string& ident, int val) {
291 |     if (scope->variables.count(ident)) {
292 |       variables[ident] = val;
293 |     } else {
294 |       outer->set_variable(ident, val);
295 |     }
296 |   }
297 | 
298 |   shared_ptr<AstPL0> get_procedure(const string& ident) const {
299 |     return scope->get_procedure(ident);
300 |   }
301 | 
302 |  private:
303 |   shared_ptr<SymbolScope> scope;
304 |   shared_ptr<Environment> outer;
305 |   map<string, int> variables;
306 | };
307 | 
308 | struct Interpreter {
309 |   static void exec(const shared_ptr<AstPL0> ast,
310 |                    shared_ptr<Environment> env = nullptr) {
311 |     switch (ast->tag) {
312 |       case "block"_:
313 |         exec_block(ast, env);
314 |         break;
315 |       case "statement"_:
316 |         exec_statement(ast, env);
317 |         break;
318 |       case "assignment"_:
319 |         exec_assignment(ast, env);
320 |         break;
321 |       case "call"_:
322 |         exec_call(ast, env);
323 |         break;
324 |       case "statements"_:
325 |         exec_statements(ast, env);
326 |         break;
327 |       case "if"_:
328 |         exec_if(ast, env);
329 |         break;
330 |       case "while"_:
331 |         exec_while(ast, env);
332 |         break;
333 |       case "out"_:
334 |         exec_out(ast, env);
335 |         break;
336 |       case "in"_:
337 |         exec_in(ast, env);
338 |         break;
339 |       default:
340 |         exec(ast->nodes[0], env);
341 |         break;
342 |     }
343 |   }
344 | 
345 |  private:
346 |   static void exec_block(const shared_ptr<AstPL0> ast,
347 |                          shared_ptr<Environment> outer) {
348 |     // block <- const var procedure statement
349 |     exec(ast->nodes[3], make_shared<Environment>(ast->scope, outer));
350 |   }
351 | 
352 |   static void exec_statement(const shared_ptr<AstPL0> ast,
353 |                              shared_ptr<Environment> env) {
354 |     // statement  <- (assignment / call / statements / if / while / out / in)?
355 |     if (!ast->nodes.empty()) {
356 |       exec(ast->nodes[0], env);
357 |     }
358 |   }
359 | 
360 |   static void exec_assignment(const shared_ptr<AstPL0> ast,
361 |                               shared_ptr<Environment> env) {
362 |     // assignment <- ident ':=' _ expression
363 |     env->set_variable(ast->nodes[0]->token_to_string(), eval(ast->nodes[1], env));
364 |   }
365 | 
366 |   static void exec_call(const shared_ptr<AstPL0> ast,
367 |                         shared_ptr<Environment> env) {
368 |     // call <- 'CALL' __ ident
369 |     exec_block(env->get_procedure(ast->nodes[0]->token_to_string()), env);
370 |   }
371 | 
372 |   static void exec_statements(const shared_ptr<AstPL0> ast,
373 |                               shared_ptr<Environment> env) {
374 |     // statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __
375 |     for (auto stmt : ast->nodes) {
376 |       exec(stmt, env);
377 |     }
378 |   }
379 | 
380 |   static void exec_if(const shared_ptr<AstPL0> ast,
381 |                       shared_ptr<Environment> env) {
382 |     // if <- 'IF' __ condition 'THEN' __ statement
383 |     if (eval_condition(ast->nodes[0], env)) {
384 |       exec(ast->nodes[1], env);
385 |     }
386 |   }
387 | 
388 |   static void exec_while(const shared_ptr<AstPL0> ast,
389 |                          shared_ptr<Environment> env) {
390 |     // while <- 'WHILE' __ condition 'DO' __ statement
391 |     auto cond = ast->nodes[0];
392 |     auto stmt = ast->nodes[1];
393 |     while (eval_condition(cond, env)) {
394 |       exec(stmt, env);
395 |     }
396 |   }
397 | 
398 |   static void exec_out(const shared_ptr<AstPL0> ast,
399 |                        shared_ptr<Environment> env) {
400 |     // out <- ('out' __ / 'write' __ / '!' _) expression
401 |     cout << eval(ast->nodes[0], env) << endl;
402 |   }
403 | 
404 |   static void exec_in(const shared_ptr<AstPL0> ast,
405 |                       shared_ptr<Environment> env) {
406 |     // in <- ('in' __ / 'read' __ / '?' _) ident
407 |     int val;
408 |     cin >> val;
409 |     env->set_variable(ast->nodes[0]->token_to_string(), val);
410 |   }
411 | 
412 |   static bool eval_condition(const shared_ptr<AstPL0> ast,
413 |                              shared_ptr<Environment> env) {
414 |     // condition <- odd / compare
415 |     const auto& node = ast->nodes[0];
416 |     switch (node->tag) {
417 |       case "odd"_:
418 |         return eval_odd(node, env);
419 |       case "compare"_:
420 |         return eval_compare(node, env);
421 |       default:
422 |         throw logic_error("invalid AstPL0 type");
423 |     }
424 |   }
425 | 
426 |   static bool eval_odd(const shared_ptr<AstPL0> ast,
427 |                        shared_ptr<Environment> env) {
428 |     // odd <- 'ODD' __ expression
429 |     return eval_expression(ast->nodes[0], env) != 0;
430 |   }
431 | 
432 |   static bool eval_compare(const shared_ptr<AstPL0> ast,
433 |                            shared_ptr<Environment> env) {
434 |     // compare <- expression compare_op expression
435 |     const auto& nodes = ast->nodes;
436 |     auto lval = eval_expression(nodes[0], env);
437 |     auto op = peg::str2tag(nodes[1]->token_to_string().c_str());
438 |     auto rval = eval_expression(nodes[2], env);
439 |     switch (op) {
440 |       case "="_:
441 |         return lval == rval;
442 |       case "#"_:
443 |         return lval != rval;
444 |       case "<="_:
445 |         return lval <= rval;
446 |       case "<"_:
447 |         return lval < rval;
448 |       case ">="_:
449 |         return lval >= rval;
450 |       case ">"_:
451 |         return lval > rval;
452 |       default:
453 |         throw logic_error("invalid operator");
454 |     }
455 |   }
456 | 
457 |   static int eval(const shared_ptr<AstPL0> ast, shared_ptr<Environment> env) {
458 |     switch (ast->tag) {
459 |       case "expression"_:
460 |         return eval_expression(ast, env);
461 |       case "term"_:
462 |         return eval_term(ast, env);
463 |       case "ident"_:
464 |         return eval_ident(ast, env);
465 |       case "number"_:
466 |         return eval_number(ast, env);
467 |       default:
468 |         return eval(ast->nodes[0], env);
469 |     }
470 |   }
471 | 
472 |   static int eval_expression(const shared_ptr<AstPL0> ast,
473 |                              shared_ptr<Environment> env) {
474 |     // expression <- sign term (term_op term)*
475 |     const auto& nodes = ast->nodes;
476 |     auto sign = nodes[0]->token_to_string();
477 |     auto sign_val = (sign.empty() || sign == "+") ? 1 : -1;
478 |     auto val = eval(nodes[1], env) * sign_val;
479 |     for (auto i = 2u; i < nodes.size(); i += 2) {
480 |       auto ope = nodes[i + 0]->token_to_string()[0];
481 |       auto rval = eval(nodes[i + 1], env);
482 |       switch (ope) {
483 |         case '+':
484 |           val = val + rval;
485 |           break;
486 |         case '-':
487 |           val = val - rval;
488 |           break;
489 |       }
490 |     }
491 |     return val;
492 |   }
493 | 
494 |   static int eval_term(const shared_ptr<AstPL0> ast,
495 |                        shared_ptr<Environment> env) {
496 |     // term <- factor (factor_op factor)*
497 |     const auto& nodes = ast->nodes;
498 |     auto val = eval(nodes[0], env);
499 |     for (auto i = 1u; i < nodes.size(); i += 2) {
500 |       auto ope = nodes[i + 0]->token_to_string()[0];
501 |       auto rval = eval(nodes[i + 1], env);
502 |       switch (ope) {
503 |         case '*':
504 |           val = val * rval;
505 |           break;
506 |         case '/':
507 |           if (rval == 0) {
508 |             throw_runtime_error(ast, "divide by 0 error");
509 |           }
510 |           val = val / rval;
511 |           break;
512 |       }
513 |     }
514 |     return val;
515 |   }
516 | 
517 |   static int eval_ident(const shared_ptr<AstPL0> ast,
518 |                         shared_ptr<Environment> env) {
519 |     return env->get_value(ast, ast->token_to_string());
520 |   }
521 | 
522 |   static int eval_number(const shared_ptr<AstPL0> ast,
523 |                          shared_ptr<Environment> env) {
524 |     return stol(ast->token_to_string());
525 |   }
526 | };
527 | 
528 | /*
529 |  * LLVM
530 |  */
531 | struct LLVM {
532 |   LLVM(const shared_ptr<AstPL0> ast) : builder_(context_) {
533 |     module_ = make_unique<Module>("pl0", context_);
534 |     compile(ast);
535 |   }
536 | 
537 |   void dump() { module_->print(llvm::outs(), nullptr); }
538 | 
539 |   void exec() {
540 |     unique_ptr<ExecutionEngine> ee(EngineBuilder(std::move(module_)).create());
541 |     std::vector<GenericValue> noargs;
542 |     auto fn = ee->FindFunctionNamed("main");
543 |     auto ret = ee->runFunction(fn, noargs);
544 |   }
545 | 
546 |  private:
547 |   LLVMContext context_;
548 |   IRBuilder<> builder_;
549 |   unique_ptr<Module> module_;
550 | 
551 |   void compile(const shared_ptr<AstPL0> ast) {
552 |     InitializeNativeTarget();
553 |     InitializeNativeTargetAsmPrinter();
554 |     compile_libs();
555 |     compile_program(ast);
556 |   }
557 | 
558 |   void compile_switch(const shared_ptr<AstPL0> ast) {
559 |     switch (ast->tag) {
560 |       case "assignment"_:
561 |         compile_assignment(ast);
562 |         break;
563 |       case "call"_:
564 |         compile_call(ast);
565 |         break;
566 |       case "statements"_:
567 |         compile_statements(ast);
568 |         break;
569 |       case "if"_:
570 |         compile_if(ast);
571 |         break;
572 |       case "while"_:
573 |         compile_while(ast);
574 |         break;
575 |       case "out"_:
576 |         compile_out(ast);
577 |         break;
578 |       default:
579 |         compile_switch(ast->nodes[0]);
580 |         break;
581 |     }
582 |   }
583 | 
584 |   Value* compile_switch_value(const shared_ptr<AstPL0> ast) {
585 |     switch (ast->tag) {
586 |       case "odd"_:
587 |         return compile_odd(ast);
588 |       case "compare"_:
589 |         return compile_compare(ast);
590 |       case "expression"_:
591 |         return compile_expression(ast);
592 |       case "ident"_:
593 |         return compile_ident(ast);
594 |       case "number"_:
595 |         return compile_number(ast);
596 |       default:
597 |         return compile_switch_value(ast->nodes[0]);
598 |     }
599 |   }
600 | 
601 |   void compile_libs() {
602 |     auto printfF = module_->getOrInsertFunction(
603 |         "printf",
604 |         FunctionType::get(builder_.getInt32Ty(),
605 |                           PointerType::get(builder_.getInt8Ty(), 0), true));
606 | 
607 | #if LLVM_VERSION_MAJOR >= 9
608 |     auto funccallee = module_->getOrInsertFunction("out", builder_.getVoidTy(), builder_.getInt32Ty());
609 |     auto outC = funccallee.getCallee();
610 | #else
611 |     auto outC = module_->getOrInsertFunction("out", builder_.getVoidTy(), builder_.getInt32Ty());
612 | #endif
613 |     auto outF = cast<Function>(outC);
614 | 
615 |     {
616 |       auto BB = BasicBlock::Create(context_, "entry", outF);
617 |       builder_.SetInsertPoint(BB);
618 | 
619 |       auto val = &*outF->arg_begin();
620 | 
621 |       auto fmt = builder_.CreateGlobalStringPtr("%d\n");
622 |       std::vector<Value*> args = {fmt, val};
623 |       builder_.CreateCall(printfF, args);
624 | 
625 |       builder_.CreateRetVoid();
626 |     }
627 |   }
628 | 
629 |   void compile_program(const shared_ptr<AstPL0> ast) {
630 | #if LLVM_VERSION_MAJOR >= 9
631 |     auto funccallee = module_->getOrInsertFunction("main", builder_.getVoidTy());
632 |     auto c = funccallee.getCallee();
633 | #else
634 |     auto c = module_->getOrInsertFunction("main", builder_.getVoidTy());
635 | #endif
636 |     auto fn = cast<Function>(c);
637 | 
638 |     {
639 |       auto BB = BasicBlock::Create(context_, "entry", fn);
640 |       builder_.SetInsertPoint(BB);
641 |       compile_block(ast->nodes[0]);
642 |       builder_.CreateRetVoid();
643 |       verifyFunction(*fn);
644 |     }
645 |   }
646 | 
647 |   void compile_block(const shared_ptr<AstPL0> ast) {
648 |     compile_const(ast->nodes[0]);
649 |     compile_var(ast->nodes[1]);
650 |     compile_procedure(ast->nodes[2]);
651 |     compile_statement(ast->nodes[3]);
652 |   }
653 | 
654 |   void compile_const(const shared_ptr<AstPL0> ast) {
655 |     for (auto i = 0u; i < ast->nodes.size(); i += 2) {
656 |       auto ident = ast->nodes[i]->token_to_string();
657 |       auto number = stoi(ast->nodes[i + 1]->token_to_string());
658 | 
659 |       auto alloca =
660 |           builder_.CreateAlloca(builder_.getInt32Ty(), nullptr, ident);
661 |       builder_.CreateStore(builder_.getInt32(number), alloca);
662 |     }
663 |   }
664 | 
665 |   void compile_var(const shared_ptr<AstPL0> ast) {
666 |     for (const auto node : ast->nodes) {
667 |       auto ident = node->token_to_string();
668 |       builder_.CreateAlloca(builder_.getInt32Ty(), nullptr, ident);
669 |     }
670 |   }
671 | 
672 |   void compile_procedure(const shared_ptr<AstPL0> ast) {
673 |     for (auto i = 0u; i < ast->nodes.size(); i += 2) {
674 |       auto ident = ast->nodes[i]->token_to_string();
675 |       auto block = ast->nodes[i + 1];
676 | 
677 |       std::vector<Type*> pt(block->scope->free_variables.size(),
678 |                             Type::getInt32PtrTy(context_));
679 |       auto ft = FunctionType::get(builder_.getVoidTy(), pt, false);
680 | #if LLVM_VERSION_MAJOR >= 9
681 |       auto funccallee = module_->getOrInsertFunction(ident, ft);
682 |       auto c = funccallee.getCallee();
683 | #else
684 |       auto c = module_->getOrInsertFunction(ident, ft);
685 | #endif
686 |       auto fn = cast<Function>(c);
687 | 
688 |       {
689 |         auto it = block->scope->free_variables.begin();
690 |         for (auto& arg : fn->args()) {
691 |           arg.setName(*it);
692 |           ++it;
693 |         }
694 |       }
695 | 
696 |       {
697 |         auto prevBB = builder_.GetInsertBlock();
698 |         auto BB = BasicBlock::Create(context_, "entry", fn);
699 |         builder_.SetInsertPoint(BB);
700 |         compile_block(block);
701 |         builder_.CreateRetVoid();
702 |         verifyFunction(*fn);
703 |         builder_.SetInsertPoint(prevBB);
704 |       }
705 |     }
706 |   }
707 | 
708 |   void compile_statement(const shared_ptr<AstPL0> ast) {
709 |     if (!ast->nodes.empty()) {
710 |       compile_switch(ast->nodes[0]);
711 |     }
712 |   }
713 | 
714 |   void compile_assignment(const shared_ptr<AstPL0> ast) {
715 |     auto ident = ast->nodes[0]->token_to_string();
716 | 
717 |     auto fn = builder_.GetInsertBlock()->getParent();
718 |     auto tbl = fn->getValueSymbolTable();
719 |     auto var = tbl->lookup(ident);
720 |     if (!var) {
721 |       throw_runtime_error(ast, "'" + ident + "' is not defined...");
722 |     }
723 | 
724 |     auto val = compile_expression(ast->nodes[1]);
725 |     builder_.CreateStore(val, var);
726 |   }
727 | 
728 |   void compile_call(const shared_ptr<AstPL0> ast) {
729 |     auto ident = ast->nodes[0]->token_to_string();
730 | 
731 |     auto scope = get_closest_scope(ast);
732 |     auto block = scope->get_procedure(ident);
733 | 
734 |     std::vector<Value*> args;
735 |     for (auto& free : block->scope->free_variables) {
736 |       auto fn = builder_.GetInsertBlock()->getParent();
737 |       auto tbl = fn->getValueSymbolTable();
738 |       auto var = tbl->lookup(free);
739 |       if (!var) {
740 |         throw_runtime_error(ast, "'" + free + "' is not defined...");
741 |       }
742 |       args.push_back(var);
743 |     }
744 | 
745 |     auto fn = module_->getFunction(ident);
746 |     builder_.CreateCall(fn, args);
747 |   }
748 | 
749 |   void compile_statements(const shared_ptr<AstPL0> ast) {
750 |     for (auto node : ast->nodes) {
751 |       compile_statement(node);
752 |     }
753 |   }
754 | 
755 |   void compile_if(const shared_ptr<AstPL0> ast) {
756 |     auto cond = compile_condition(ast->nodes[0]);
757 | 
758 |     auto fn = builder_.GetInsertBlock()->getParent();
759 |     auto ifThen = BasicBlock::Create(context_, "if.then", fn);
760 |     auto ifEnd = BasicBlock::Create(context_, "if.end");
761 | 
762 |     builder_.CreateCondBr(cond, ifThen, ifEnd);
763 | 
764 |     builder_.SetInsertPoint(ifThen);
765 |     compile_statement(ast->nodes[1]);
766 |     builder_.CreateBr(ifEnd);
767 | 
768 |     fn->getBasicBlockList().push_back(ifEnd);
769 |     builder_.SetInsertPoint(ifEnd);
770 |   }
771 | 
772 |   void compile_while(const shared_ptr<AstPL0> ast) {
773 |     auto whileCond = BasicBlock::Create(context_, "while.cond");
774 |     builder_.CreateBr(whileCond);
775 | 
776 |     auto fn = builder_.GetInsertBlock()->getParent();
777 |     fn->getBasicBlockList().push_back(whileCond);
778 |     builder_.SetInsertPoint(whileCond);
779 | 
780 |     auto cond = compile_condition(ast->nodes[0]);
781 | 
782 |     auto whileBody = BasicBlock::Create(context_, "while.body", fn);
783 |     auto whileEnd = BasicBlock::Create(context_, "while.end");
784 |     builder_.CreateCondBr(cond, whileBody, whileEnd);
785 | 
786 |     builder_.SetInsertPoint(whileBody);
787 |     compile_statement(ast->nodes[1]);
788 | 
789 |     builder_.CreateBr(whileCond);
790 | 
791 |     fn->getBasicBlockList().push_back(whileEnd);
792 |     builder_.SetInsertPoint(whileEnd);
793 |   }
794 | 
795 |   Value* compile_condition(const shared_ptr<AstPL0> ast) {
796 |     return compile_switch_value(ast->nodes[0]);
797 |   }
798 | 
799 |   Value* compile_odd(const shared_ptr<AstPL0> ast) {
800 |     auto val = compile_expression(ast->nodes[0]);
801 |     return builder_.CreateICmpNE(val, builder_.getInt32(0), "icmpne");
802 |   }
803 | 
804 |   Value* compile_compare(const shared_ptr<AstPL0> ast) {
805 |     auto lhs = compile_expression(ast->nodes[0]);
806 |     auto rhs = compile_expression(ast->nodes[2]);
807 | 
808 |     const auto& ope = ast->nodes[1]->token_to_string();
809 |     switch (ope[0]) {
810 |       case '=':
811 |         return builder_.CreateICmpEQ(lhs, rhs, "icmpeq");
812 |       case '#':
813 |         return builder_.CreateICmpNE(lhs, rhs, "icmpne");
814 |       case '<':
815 |         if (ope.size() == 1) {
816 |           return builder_.CreateICmpSLT(lhs, rhs, "icmpslt");
817 |         }
818 |         // '<='
819 |         return builder_.CreateICmpSLE(lhs, rhs, "icmpsle");
820 |       case '>':
821 |         if (ope.size() == 1) {
822 |           return builder_.CreateICmpSGT(lhs, rhs, "icmpsgt");
823 |         }
824 |         // '>='
825 |         return builder_.CreateICmpSGE(lhs, rhs, "icmpsge");
826 |     }
827 |     return nullptr;
828 |   }
829 | 
830 |   void compile_out(const shared_ptr<AstPL0> ast) {
831 |     auto val = compile_expression(ast->nodes[0]);
832 |     auto outF = module_->getFunction("out");
833 |     builder_.CreateCall(outF, val);
834 |   }
835 | 
836 |   Value* compile_expression(const shared_ptr<AstPL0> ast) {
837 |     const auto& nodes = ast->nodes;
838 | 
839 |     auto sign = nodes[0]->token_to_string();
840 |     auto negative = !(sign.empty() || sign == "+");
841 | 
842 |     auto val = compile_term(nodes[1]);
843 |     if (negative) {
844 |       val = builder_.CreateNeg(val, "negative");
845 |     }
846 | 
847 |     for (auto i = 2u; i < nodes.size(); i += 2) {
848 |       auto ope = nodes[i + 0]->token_to_string()[0];
849 |       auto rval = compile_term(nodes[i + 1]);
850 |       switch (ope) {
851 |         case '+':
852 |           val = builder_.CreateAdd(val, rval, "add");
853 |           break;
854 |         case '-':
855 |           val = builder_.CreateSub(val, rval, "sub");
856 |           break;
857 |       }
858 |     }
859 |     return val;
860 |   }
861 | 
862 |   Value* compile_term(const shared_ptr<AstPL0> ast) {
863 |     const auto& nodes = ast->nodes;
864 |     auto val = compile_factor(nodes[0]);
865 |     for (auto i = 1u; i < nodes.size(); i += 2) {
866 |       auto ope = nodes[i + 0]->token_to_string()[0];
867 |       auto rval = compile_switch_value(nodes[i + 1]);
868 |       switch (ope) {
869 |         case '*':
870 |           val = builder_.CreateMul(val, rval, "mul");
871 |           break;
872 |         case '/': {
873 |           // TODO: Zero devide error?
874 |           // auto ret = builder_.CreateICmpEQ(rval, builder_.getInt32(0),
875 |           // "icmpeq");
876 |           // if (!ret) {
877 |           //   throw_runtime_error(ast, "divide by 0 error");
878 |           // }
879 |           val = builder_.CreateSDiv(val, rval, "div");
880 |           break;
881 |         }
882 |       }
883 |     }
884 |     return val;
885 |   }
886 | 
887 |   Value* compile_factor(const shared_ptr<AstPL0> ast) {
888 |     return compile_switch_value(ast->nodes[0]);
889 |   }
890 | 
891 |   Value* compile_ident(const shared_ptr<AstPL0> ast) {
892 |     auto ident = ast->token_to_string();
893 | 
894 |     auto fn = builder_.GetInsertBlock()->getParent();
895 |     auto tbl = fn->getValueSymbolTable();
896 |     auto var = tbl->lookup(ident);
897 |     if (!var) {
898 |       throw_runtime_error(ast, "'" + ident + "' is not defined...");
899 |     }
900 | 
901 |     return builder_.CreateLoad(var);
902 |   }
903 | 
904 |   Value* compile_number(const shared_ptr<AstPL0> ast) {
905 |     return ConstantInt::getIntegerValue(builder_.getInt32Ty(),
906 |                                         APInt(32, ast->token_to_string(), 10));
907 |   }
908 | };
909 | 
910 | /*
911 |  * Main
912 |  */
913 | int main(int argc, const char** argv) {
914 |   if (argc < 2) {
915 |     cout << "usage: pl0 PATH [--ast] [--llvm] [--jit]" << endl;
916 |     return 1;
917 |   }
918 | 
919 |   // Parser commandline parameters
920 |   auto path = argv[1];
921 |   bool opt_jit = false;
922 |   bool opt_ast = false;
923 |   bool opt_llvm = false;
924 |   {
925 |     auto argi = 2;
926 |     while (argi < argc) {
927 |       if (string("--ast") == argv[argi]) {
928 |         opt_ast = true;
929 |       } else if (string("--jit") == argv[argi]) {
930 |         opt_jit = true;
931 |       } else if (string("--llvm") == argv[argi]) {
932 |         opt_llvm = true;
933 |       }
934 |       argi++;
935 |     }
936 |   }
937 | 
938 |   // Read a source file into memory
939 |   vector<char> source;
940 |   ifstream ifs(path, ios::in | ios::binary);
941 |   if (ifs.fail()) {
942 |     cerr << "can't open the source file." << endl;
943 |     return -1;
944 |   }
945 |   source.resize(static_cast<unsigned int>(ifs.seekg(0, ios::end).tellg()));
946 |   if (!source.empty()) {
947 |     ifs.seekg(0, ios::beg)
948 |         .read(&source[0], static_cast<streamsize>(source.size()));
949 |   }
950 | 
951 |   // Setup a PEG parser
952 |   parser parser(grammar);
953 |   parser.enable_ast<AstPL0>();
954 |   parser.log = [&](size_t ln, size_t col, const string& msg) {
955 |     cerr << format_error_message(path, ln, col, msg) << endl;
956 |   };
957 | 
958 |   // Parse the source and make an AST
959 |   shared_ptr<AstPL0> ast;
960 |   if (parser.parse_n(source.data(), source.size(), ast, path)) {
961 |     try {
962 |       SymbolTable::build_on_ast(ast);
963 | 
964 |       if (opt_ast) {
965 |         cout << ast_to_s<AstPL0>(ast);
966 |       }
967 | 
968 |       if (opt_llvm || opt_jit) {
969 |         LLVM compiler(ast);
970 | 
971 |         if (opt_llvm) {
972 |           compiler.dump();
973 |         }
974 |         if (opt_jit) {
975 |           compiler.exec();
976 |         }
977 |       } else {
978 |         Interpreter::exec(ast);
979 |       }
980 | 
981 |     } catch (const runtime_error& e) {
982 |       cerr << e.what() << endl;
983 |     }
984 |     return 0;
985 |   }
986 | 
987 |   return -1;
988 | }
989 | 


--------------------------------------------------------------------------------
/pl0/samples/fib.pas:
--------------------------------------------------------------------------------
 1 | VAR i, x, r;
 2 | 
 3 | PROCEDURE fib;
 4 | VAR xx, r1, r2;
 5 | BEGIN
 6 |   xx := x;
 7 |   IF xx = 0 THEN r := 1;
 8 |   IF xx = 1 THEN r := 1;
 9 |   IF xx >= 2 THEN BEGIN
10 |     x := xx - 2;
11 |     CALL fib;
12 |     r1 := r;
13 | 
14 |     x := xx - 1;
15 |     CALL fib;
16 |     r2 := r;
17 |     r := r1 + r2;
18 |   END
19 | END;
20 | 
21 | BEGIN
22 |   i := 0;
23 |   WHILE i < 25 DO BEGIN
24 |     x := i;
25 |     CALL fib;
26 |     write i;
27 |     write r;
28 |     i := i + 1;
29 |   END
30 | END.
31 | 


--------------------------------------------------------------------------------
/pl0/samples/gcd.pas:
--------------------------------------------------------------------------------
 1 | CONST
 2 |   m =  7,
 3 |   n = 85;
 4 | 
 5 | VAR
 6 |   x, y, z, q, r;
 7 | 
 8 | PROCEDURE multiply;
 9 | VAR a, b;
10 | BEGIN
11 |   a := x;
12 |   b := y;
13 |   z := 0;
14 |   WHILE b > 0 DO BEGIN
15 |     IF ODD b THEN z := z + a;
16 |     a := 2 * a;
17 |     b := b / 2;
18 |   END
19 | END;
20 | 
21 | PROCEDURE divide;
22 | VAR w;
23 | BEGIN
24 |   r := x;
25 |   q := 0;
26 |   w := y;
27 |   WHILE w <= r DO w := 2 * w;
28 |   WHILE w > y DO BEGIN
29 |     q := 2 * q;
30 |     w := w / 2;
31 |     IF w <= r THEN BEGIN
32 |       r := r - w;
33 |       q := q + 1
34 |     END
35 |   END
36 | END;
37 | 
38 | PROCEDURE gcd;
39 | VAR f, g;
40 | BEGIN
41 |   f := x;
42 |   g := y;
43 |   WHILE f # g DO BEGIN
44 |     IF f < g THEN g := g - f;
45 |     IF g < f THEN f := f - g;
46 |   END;
47 |   z := f
48 | END;
49 | 
50 | BEGIN
51 |   x := m;
52 |   y := n;
53 |   CALL multiply;
54 |   x := 25;
55 |   y :=  3;
56 |   CALL divide;
57 |   x := 84;
58 |   y := 36;
59 |   CALL gcd;
60 |   write z;
61 | END.
62 | 


--------------------------------------------------------------------------------
/pl0/samples/square.pas:
--------------------------------------------------------------------------------
 1 | 
 2 | VAR x, squ;
 3 | 
 4 | PROCEDURE square;
 5 | BEGIN
 6 |    squ := x * x
 7 | END;
 8 | 
 9 | BEGIN
10 |    x := 1;
11 |    WHILE x <= 10 DO
12 |    BEGIN
13 |       CALL square;
14 |       ! squ;
15 |       x := x + 1
16 |    END
17 | END.
18 | 
19 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(test)
 3 | 
 4 | include(FetchContent)
 5 | FetchContent_Declare(
 6 |   googletest
 7 |   URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
 8 | )
 9 | # For Windows: Prevent overriding the parent project's compiler/linker settings
10 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
11 | FetchContent_MakeAvailable(googletest)
12 | 
13 | enable_testing()
14 | 
15 | add_executable(peglib-test-main test1.cc test2.cc test3.cc)
16 | 
17 | target_include_directories(peglib-test-main PRIVATE ..)
18 | 
19 | include(GoogleTest)
20 | gtest_discover_tests(peglib-test-main)
21 | target_link_libraries(peglib-test-main PRIVATE gtest_main)
22 | 


--------------------------------------------------------------------------------
/test/test1.cc:
--------------------------------------------------------------------------------
   1 | ﻿#include <gtest/gtest.h>
   2 | #include <peglib.h>
   3 | 
   4 | using namespace peg;
   5 | 
   6 | #if !defined(PEGLIB_NO_UNICODE_CHARS)
   7 | TEST(GeneralTest, Simple_syntax_test_with_unicode) {
   8 |   parser parser(u8" ROOT ← _ "
   9 |                 " _ <- ' ' ");
  10 | 
  11 |   bool ret = parser;
  12 |   EXPECT_TRUE(ret);
  13 | }
  14 | #endif
  15 | 
  16 | TEST(GeneralTest, Simple_syntax_test) {
  17 |   parser parser(R"(
  18 |         ROOT <- _
  19 |         _ <- ' '
  20 |     )");
  21 | 
  22 |   bool ret = parser;
  23 |   EXPECT_TRUE(ret);
  24 | }
  25 | 
  26 | TEST(GeneralTest, Empty_syntax_test) {
  27 |   parser parser("");
  28 |   bool ret = parser;
  29 |   EXPECT_FALSE(ret);
  30 | }
  31 | 
  32 | TEST(GeneralTest, Start_rule_with_ignore_operator_test) {
  33 |   parser parser(R"(
  34 |         ~ROOT <- _
  35 |         _ <- ' '
  36 |     )");
  37 | 
  38 |   bool ret = parser;
  39 |   EXPECT_FALSE(ret);
  40 | }
  41 | 
  42 | TEST(GeneralTest, Invalid_UTF8_text_test) {
  43 |   std::string s = "a <- '";
  44 |   s += static_cast<char>(0xe8); // Make invalid utf8 text...
  45 | 
  46 |   parser parser(s.data());
  47 | 
  48 |   bool ret = parser;
  49 |   EXPECT_FALSE(ret);
  50 | }
  51 | 
  52 | TEST(GeneralTest, Backslash_escape_sequence_test) {
  53 |   parser parser(R"(
  54 |         ROOT <- _
  55 |         _ <- '\\'
  56 |     )");
  57 | 
  58 |   bool ret = parser;
  59 |   EXPECT_TRUE(ret);
  60 | }
  61 | 
  62 | TEST(GeneralTest, Invalid_escape_sequence_test) {
  63 |   parser parser(R"(
  64 |         ROOT <- _
  65 |         _ <- '\'
  66 |     )");
  67 | 
  68 |   bool ret = parser;
  69 |   EXPECT_FALSE(ret);
  70 | }
  71 | 
  72 | TEST(GeneralTest, Action_taking_non_const_Semantic_Values_parameter) {
  73 |   parser parser(R"(
  74 |         ROOT <- TEXT
  75 |         TEXT <- [a-zA-Z]+
  76 |     )");
  77 | 
  78 |   parser["ROOT"] = [&](SemanticValues &vs) {
  79 |     auto s = std::string(std::any_cast<std::string_view>(vs[0]));
  80 |     s[0] = 'H'; // mutate
  81 |     return s;   // move
  82 |   };
  83 | 
  84 |   parser["TEXT"] = [&](SemanticValues &vs) { return vs.token(); };
  85 | 
  86 |   std::string val;
  87 |   auto ret = parser.parse("hello", val);
  88 |   EXPECT_TRUE(ret);
  89 |   EXPECT_EQ("Hello", val);
  90 | }
  91 | 
  92 | TEST(GeneralTest, String_capture_test) {
  93 |   parser parser(R"(
  94 |         ROOT      <-  _ ('[' TAG_NAME ']' _)*
  95 |         TAG_NAME  <-  (!']' .)+
  96 |         _         <-  [ \t]*
  97 |     )");
  98 | 
  99 |   std::vector<std::string_view> tags;
 100 | 
 101 |   parser["TAG_NAME"] = [&](const SemanticValues &vs) {
 102 |     tags.push_back(vs.sv());
 103 |   };
 104 | 
 105 |   auto ret = parser.parse(" [tag1] [tag:2] [tag-3] ");
 106 | 
 107 |   EXPECT_TRUE(ret);
 108 |   EXPECT_EQ(3, tags.size());
 109 |   EXPECT_EQ("tag1", tags[0]);
 110 |   EXPECT_EQ("tag:2", tags[1]);
 111 |   EXPECT_EQ("tag-3", tags[2]);
 112 | }
 113 | 
 114 | using namespace peg;
 115 | 
 116 | TEST(GeneralTest, String_capture_test2) {
 117 |   std::vector<std::string_view> tags;
 118 | 
 119 |   Definition ROOT, TAG, TAG_NAME, WS;
 120 |   ROOT <= seq(WS, zom(TAG));
 121 |   TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
 122 |   TAG_NAME <= oom(seq(npd(chr(']')), dot())),
 123 |       [&](const SemanticValues &vs) { tags.push_back(vs.sv()); };
 124 |   WS <= zom(cls(" \t"));
 125 | 
 126 |   auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
 127 | 
 128 |   EXPECT_TRUE(r.ret);
 129 |   EXPECT_EQ(3, tags.size());
 130 |   EXPECT_EQ("tag1", tags[0]);
 131 |   EXPECT_EQ("tag:2", tags[1]);
 132 |   EXPECT_EQ("tag-3", tags[2]);
 133 | }
 134 | 
 135 | TEST(GeneralTest, String_capture_test3) {
 136 |   parser pg(R"(
 137 |         ROOT  <- _ TOKEN*
 138 |         TOKEN <- '[' < (!']' .)+ > ']' _
 139 |         _     <- [ \t\r\n]*
 140 |     )");
 141 | 
 142 |   std::vector<std::string_view> tags;
 143 | 
 144 |   pg["TOKEN"] = [&](const SemanticValues &vs) { tags.push_back(vs.token()); };
 145 | 
 146 |   auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
 147 | 
 148 |   EXPECT_TRUE(ret);
 149 |   EXPECT_EQ(3, tags.size());
 150 |   EXPECT_EQ("tag1", tags[0]);
 151 |   EXPECT_EQ("tag:2", tags[1]);
 152 |   EXPECT_EQ("tag-3", tags[2]);
 153 | }
 154 | 
 155 | TEST(GeneralTest, Cyclic_grammar_test) {
 156 |   Definition PARENT;
 157 |   Definition CHILD;
 158 | 
 159 |   PARENT <= seq(CHILD);
 160 |   CHILD <= seq(PARENT);
 161 | }
 162 | 
 163 | TEST(GeneralTest, Visit_test) {
 164 |   Definition ROOT, TAG, TAG_NAME, WS;
 165 | 
 166 |   ROOT <= seq(WS, zom(TAG));
 167 |   TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
 168 |   TAG_NAME <= oom(seq(npd(chr(']')), dot()));
 169 |   WS <= zom(cls(" \t"));
 170 | 
 171 |   AssignIDToDefinition defIds;
 172 |   ROOT.accept(defIds);
 173 | 
 174 |   EXPECT_EQ(4, defIds.ids.size());
 175 | }
 176 | 
 177 | TEST(GeneralTest, Token_check_test) {
 178 |   parser parser(R"(
 179 |         EXPRESSION       <-  _ TERM (TERM_OPERATOR TERM)*
 180 |         TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
 181 |         FACTOR           <-  NUMBER / '(' _ EXPRESSION ')' _
 182 |         TERM_OPERATOR    <-  < [-+] > _
 183 |         FACTOR_OPERATOR  <-  < [/*] > _
 184 |         NUMBER           <-  < [0-9]+ > _
 185 |         _                <-  [ \t\r\n]*
 186 |     )");
 187 | 
 188 |   EXPECT_FALSE(parser["EXPRESSION"].is_token());
 189 |   EXPECT_FALSE(parser["FACTOR"].is_token());
 190 |   EXPECT_TRUE(parser["FACTOR_OPERATOR"].is_token());
 191 |   EXPECT_TRUE(parser["NUMBER"].is_token());
 192 |   EXPECT_TRUE(parser["_"].is_token());
 193 | }
 194 | 
 195 | TEST(GeneralTest, Lambda_action_test) {
 196 |   parser parser(R"(
 197 |        START <- (CHAR)*
 198 |        CHAR  <- .
 199 |     )");
 200 | 
 201 |   std::string ss;
 202 |   parser["CHAR"] = [&](const SemanticValues &vs) { ss += *vs.sv().data(); };
 203 | 
 204 |   bool ret = parser.parse("hello");
 205 |   EXPECT_TRUE(ret);
 206 |   EXPECT_EQ("hello", ss);
 207 | }
 208 | 
 209 | TEST(GeneralTest, enter_leave_handlers_test) {
 210 |   parser parser(R"(
 211 |         START  <- LTOKEN '=' RTOKEN
 212 |         LTOKEN <- TOKEN
 213 |         RTOKEN <- TOKEN
 214 |         TOKEN  <- [A-Za-z]+
 215 |     )");
 216 | 
 217 |   parser["LTOKEN"].enter = [&](const Context & /*c*/, const char *, size_t,
 218 |                                std::any &dt) {
 219 |     auto &require_upper_case = *std::any_cast<bool *>(dt);
 220 |     require_upper_case = false;
 221 |   };
 222 |   parser["LTOKEN"].leave = [&](const Context & /*c*/, const char *, size_t,
 223 |                                size_t, std::any &, std::any &dt) {
 224 |     auto &require_upper_case = *std::any_cast<bool *>(dt);
 225 |     require_upper_case = true;
 226 |   };
 227 | 
 228 |   auto message = "should be upper case string...";
 229 | 
 230 |   parser["TOKEN"].predicate = [&](const SemanticValues &vs, const std::any &dt,
 231 |                                   std::string &msg) {
 232 |     auto &require_upper_case = *std::any_cast<bool *>(dt);
 233 |     if (require_upper_case) {
 234 |       const auto &s = vs.sv();
 235 |       if (!std::all_of(s.begin(), s.end(), ::isupper)) {
 236 |         msg = message;
 237 |         return false;
 238 |       }
 239 |     }
 240 |     return true;
 241 |   };
 242 | 
 243 |   bool require_upper_case = false;
 244 |   std::any dt = &require_upper_case;
 245 |   EXPECT_FALSE(parser.parse("hello=world", dt));
 246 |   EXPECT_FALSE(parser.parse("HELLO=world", dt));
 247 |   EXPECT_TRUE(parser.parse("hello=WORLD", dt));
 248 |   EXPECT_TRUE(parser.parse("HELLO=WORLD", dt));
 249 | 
 250 |   parser.set_logger([&](size_t ln, size_t col, const std::string &msg) {
 251 |     EXPECT_EQ(1, ln);
 252 |     EXPECT_EQ(7, col);
 253 |     EXPECT_EQ(message, msg);
 254 |   });
 255 |   parser.parse("hello=world", dt);
 256 | }
 257 | 
 258 | TEST(GeneralTest, WHITESPACE_test) {
 259 |   parser parser(R"(
 260 |         # Rules
 261 |         ROOT         <-  ITEM (',' ITEM)*
 262 |         ITEM         <-  WORD / PHRASE
 263 | 
 264 |         # Tokens
 265 |         WORD         <-  < [a-zA-Z0-9_]+ >
 266 |         PHRASE       <-  < '"' (!'"' .)* '"' >
 267 | 
 268 |         %whitespace  <-  [ \t\r\n]*
 269 |     )");
 270 | 
 271 |   auto ret = parser.parse(R"(  one, 	 "two, three",   four  )");
 272 | 
 273 |   EXPECT_TRUE(ret);
 274 | }
 275 | 
 276 | TEST(GeneralTest, WHITESPACE_test2) {
 277 |   parser parser(R"(
 278 |         # Rules
 279 |         ROOT         <-  ITEM (',' ITEM)*
 280 |         ITEM         <-  '[' < [a-zA-Z0-9_]+ > ']'
 281 | 
 282 |         %whitespace  <-  (SPACE / TAB)*
 283 |         SPACE        <-  ' '
 284 |         TAB          <-  '\t'
 285 |     )");
 286 | 
 287 |   std::vector<std::string_view> items;
 288 |   parser["ITEM"] = [&](const SemanticValues &vs) {
 289 |     items.push_back(vs.token());
 290 |   };
 291 | 
 292 |   auto ret = parser.parse(R"([one], 	[two] ,[three] )");
 293 | 
 294 |   EXPECT_TRUE(ret);
 295 |   EXPECT_EQ(3, items.size());
 296 |   EXPECT_EQ("one", items[0]);
 297 |   EXPECT_EQ("two", items[1]);
 298 |   EXPECT_EQ("three", items[2]);
 299 | }
 300 | 
 301 | TEST(GeneralTest, WHITESPACE_test3) {
 302 |   parser parser(R"(
 303 |         StrQuot      <- < '"' < (StrEscape / StrChars)* > '"' >
 304 |         StrEscape    <- '\\' any
 305 |         StrChars     <- (!'"' !'\\' any)+
 306 |         any          <- .
 307 |         %whitespace  <- [ \t]*
 308 |     )");
 309 | 
 310 |   parser["StrQuot"] = [](const SemanticValues &vs) {
 311 |     EXPECT_EQ(R"(  aaa \" bbb  )", vs.token());
 312 |   };
 313 | 
 314 |   auto ret = parser.parse(R"( "  aaa \" bbb  " )");
 315 |   EXPECT_TRUE(ret);
 316 | }
 317 | 
 318 | TEST(GeneralTest, WHITESPACE_test4) {
 319 |   parser parser(R"(
 320 |         ROOT         <-  HELLO OPE WORLD
 321 |         HELLO        <-  'hello'
 322 |         OPE          <-  < [-+] >
 323 |         WORLD        <-  'world' / 'WORLD'
 324 |         %whitespace  <-  [ \t\r\n]*
 325 |     )");
 326 | 
 327 |   parser["HELLO"] = [](const SemanticValues &vs) {
 328 |     EXPECT_EQ("hello", vs.token());
 329 |   };
 330 | 
 331 |   parser["OPE"] = [](const SemanticValues &vs) { EXPECT_EQ("+", vs.token()); };
 332 | 
 333 |   parser["WORLD"] = [](const SemanticValues &vs) {
 334 |     EXPECT_EQ("world", vs.token());
 335 |   };
 336 | 
 337 |   auto ret = parser.parse("  hello + world  ");
 338 |   EXPECT_TRUE(ret);
 339 | }
 340 | 
 341 | TEST(GeneralTest, Word_expression_test) {
 342 |   parser parser(R"(
 343 |         ROOT         <-  'hello' ','? 'world'
 344 |         %whitespace  <-  [ \t\r\n]*
 345 |         %word        <-  [a-z]+
 346 |     )");
 347 | 
 348 |   EXPECT_FALSE(parser.parse("helloworld"));
 349 |   EXPECT_TRUE(parser.parse("hello world"));
 350 |   EXPECT_TRUE(parser.parse("hello,world"));
 351 |   EXPECT_TRUE(parser.parse("hello, world"));
 352 |   EXPECT_TRUE(parser.parse("hello , world"));
 353 | }
 354 | 
 355 | TEST(GeneralTest, Word_expression_test_PrioritizedChoice) {
 356 |   parser parser(R"(
 357 |     Identifier  ← < !Keyword [a-z][a-z]* >
 358 |     Keyword     ← 'def' / 'to'
 359 |     %whitespace ← [ \t\r\n]*
 360 |     %word       ← [a-z]+
 361 |   )");
 362 | 
 363 |   EXPECT_TRUE(parser.parse("toa"));
 364 | }
 365 | 
 366 | TEST(GeneralTest, Word_expression_test_Dictionary) {
 367 |   parser parser(R"(
 368 |     Identifier  ← < !Keyword [a-z][a-z]* >
 369 |     Keyword     ← 'def' | 'to'
 370 |     %whitespace ← [ \t\r\n]*
 371 |     %word       ← [a-z]+
 372 |   )");
 373 | 
 374 |   EXPECT_TRUE(parser.parse("toa"));
 375 | }
 376 | 
 377 | TEST(GeneralTest, Word_expression_case_ignore_test_Dictionary) {
 378 |   parser parser(R"(
 379 |     Identifier  ← < !Keyword [a-z][a-z]* >
 380 |     Keyword     ← 'def'i | 'to'i
 381 |     %whitespace ← [ \t\r\n]*
 382 |     %word       ← [a-z]+
 383 |   )");
 384 | 
 385 |   EXPECT_TRUE(parser.parse("toa"));
 386 | }
 387 | 
 388 | TEST(GeneralTest, Word_expression_syntax_error_test_Dictionary) {
 389 |   parser parser(R"(
 390 |     Identifier  ← < !Keyword [a-z][a-z]* >
 391 |     Keyword     ← 'def' | 'to'i
 392 |     %whitespace ← [ \t\r\n]*
 393 |     %word       ← [a-z]+
 394 |   )");
 395 | 
 396 |   EXPECT_FALSE(parser);
 397 | }
 398 | 
 399 | TEST(GeneralTest, Skip_token_test) {
 400 |   parser parser("  ROOT  <-  _ ITEM (',' _ ITEM _)* "
 401 |                 "  ITEM  <-  ([a-z0-9])+  "
 402 |                 "  ~_    <-  [ \t]*    ");
 403 | 
 404 |   parser["ROOT"] = [&](const SemanticValues &vs) { EXPECT_EQ(2, vs.size()); };
 405 | 
 406 |   auto ret = parser.parse(" item1, item2 ");
 407 | 
 408 |   EXPECT_TRUE(ret);
 409 | }
 410 | 
 411 | TEST(GeneralTest, Skip_token_test2) {
 412 |   parser parser(R"(
 413 |         ROOT        <-  ITEM (',' ITEM)*
 414 |         ITEM        <-  < ([a-z0-9])+ >
 415 |         %whitespace <-  [ \t]*
 416 |     )");
 417 | 
 418 |   parser["ROOT"] = [&](const SemanticValues &vs) { EXPECT_EQ(2, vs.size()); };
 419 | 
 420 |   auto ret = parser.parse(" item1, item2 ");
 421 | 
 422 |   EXPECT_TRUE(ret);
 423 | }
 424 | 
 425 | TEST(GeneralTest, Custom_AST_test) {
 426 |   struct CustomType {
 427 |     bool dummy = false;
 428 |   };
 429 |   using CustomAst = AstBase<CustomType>;
 430 | 
 431 |   parser parser(R"(
 432 |         ROOT <- _ TEXT*
 433 |         TEXT <- [a-zA-Z]+ _
 434 |         _ <- [ \t\r\n]*
 435 |     )");
 436 | 
 437 |   parser.enable_ast<CustomAst>();
 438 |   std::shared_ptr<CustomAst> ast;
 439 |   bool ret = parser.parse("a b c", ast);
 440 |   EXPECT_TRUE(ret);
 441 |   EXPECT_EQ(4, ast->nodes.size());
 442 | }
 443 | 
 444 | TEST(GeneralTest, Backtracking_test) {
 445 |   parser parser(R"(
 446 |        START <- PAT1 / PAT2
 447 |        PAT1  <- HELLO ' One'
 448 |        PAT2  <- HELLO ' Two'
 449 |        HELLO <- 'Hello'
 450 |     )");
 451 | 
 452 |   size_t count = 0;
 453 |   parser["HELLO"] = [&](const SemanticValues & /*vs*/) { count++; };
 454 | 
 455 |   parser.enable_packrat_parsing();
 456 | 
 457 |   bool ret = parser.parse("Hello Two");
 458 |   EXPECT_TRUE(ret);
 459 |   EXPECT_EQ(1, count); // Skip second time
 460 | }
 461 | 
 462 | TEST(GeneralTest, Backtracking_with_AST) {
 463 |   parser parser(R"(
 464 |         S <- A? B (A B)* A
 465 |         A <- 'a'
 466 |         B <- 'b'
 467 |     )");
 468 | 
 469 |   parser.enable_ast();
 470 |   std::shared_ptr<Ast> ast;
 471 |   bool ret = parser.parse("ba", ast);
 472 |   EXPECT_TRUE(ret);
 473 |   EXPECT_EQ(2, ast->nodes.size());
 474 | }
 475 | 
 476 | TEST(GeneralTest, Octal_Hex_Unicode_value_test) {
 477 |   parser parser(R"( ROOT <- '\132\x7a\u30f3' )");
 478 | 
 479 |   auto ret = parser.parse("Zzン");
 480 | 
 481 |   EXPECT_TRUE(ret);
 482 | }
 483 | 
 484 | TEST(GeneralTest, Ignore_case_literal_test) {
 485 |   parser parser(R"(
 486 |     ROOT         <-  HELLO WORLD
 487 |     HELLO        <-  'hello'i
 488 |     WORLD        <-  'world'i
 489 |     %whitespace  <-  [ \t\r\n]*
 490 |   )");
 491 | 
 492 |   parser["HELLO"] = [](const SemanticValues &vs) {
 493 |     EXPECT_EQ("Hello", vs.token());
 494 |   };
 495 | 
 496 |   parser["WORLD"] = [](const SemanticValues &vs) {
 497 |     EXPECT_EQ("World", vs.token());
 498 |   };
 499 | 
 500 |   auto ret = parser.parse("  Hello World  ");
 501 |   EXPECT_TRUE(ret);
 502 | }
 503 | 
 504 | TEST(GeneralTest, Ignore_case_character_class_test) {
 505 |   parser parser(R"(ROOT <-  [a-z]i+)");
 506 | 
 507 |   EXPECT_TRUE(parser.parse("abc"));
 508 |   EXPECT_TRUE(parser.parse("ABC"));
 509 |   EXPECT_TRUE(parser.parse("Abc"));
 510 |   EXPECT_TRUE(parser.parse("Abc"));
 511 |   EXPECT_FALSE(parser.parse("123"));
 512 | }
 513 | 
 514 | TEST(GeneralTest, Ignore_case_negate_character_class_test) {
 515 |   parser parser(R"(ROOT <-  [^a-z]i+)");
 516 | 
 517 |   EXPECT_TRUE(parser.parse("123"));
 518 |   EXPECT_FALSE(parser.parse("ABC"));
 519 | }
 520 | 
 521 | TEST(GeneralTest, mutable_lambda_test) {
 522 |   std::vector<std::string_view> vec;
 523 | 
 524 |   parser pg("ROOT <- 'mutable lambda test'");
 525 | 
 526 |   // This test makes sure if the following code can be compiled.
 527 |   pg["TOKEN"] = [=](const SemanticValues &vs) mutable {
 528 |     vec.push_back(vs.sv());
 529 |   };
 530 | }
 531 | 
 532 | TEST(GeneralTest, Simple_calculator_test) {
 533 |   parser parser(R"(
 534 |         Additive  <- Multiplicative '+' Additive / Multiplicative
 535 |         Multiplicative <- Primary '*' Multiplicative / Primary
 536 |         Primary   <- '(' Additive ')' / Number
 537 |         Number    <- [0-9]+
 538 |     )");
 539 | 
 540 |   parser["Additive"] = [](const SemanticValues &vs) {
 541 |     switch (vs.choice()) {
 542 |     case 0: return std::any_cast<int>(vs[0]) + std::any_cast<int>(vs[1]);
 543 |     default: return std::any_cast<int>(vs[0]);
 544 |     }
 545 |   };
 546 | 
 547 |   parser["Multiplicative"] = [](const SemanticValues &vs) {
 548 |     switch (vs.choice()) {
 549 |     case 0: return std::any_cast<int>(vs[0]) * std::any_cast<int>(vs[1]);
 550 |     default: return std::any_cast<int>(vs[0]);
 551 |     }
 552 |   };
 553 | 
 554 |   parser["Number"] = [](const SemanticValues &vs) {
 555 |     return vs.token_to_number<int>();
 556 |   };
 557 | 
 558 |   int val;
 559 |   parser.parse("(1+2)*3", val);
 560 | 
 561 |   EXPECT_EQ(9, val);
 562 | }
 563 | 
 564 | TEST(GeneralTest, Simple_calculator_with_recovery_test) {
 565 |   parser parser(R"(
 566 |         Additive    <- Multiplicative '+' Additive / Multiplicative
 567 |         Multiplicative   <- Primary '*' Multiplicative^cond / Primary
 568 |         Primary     <- '(' Additive ')' / Number
 569 |         Number      <- < [0-9]+ >
 570 |         %whitespace <- [ \t]*
 571 |         cond <- '' { error_message "missing multiplicative" }
 572 |     )");
 573 | 
 574 |   parser["Additive"] = [](const SemanticValues &vs) {
 575 |     switch (vs.choice()) {
 576 |     case 0: return std::any_cast<int>(vs[0]) + std::any_cast<int>(vs[1]);
 577 |     default: return std::any_cast<int>(vs[0]);
 578 |     }
 579 |   };
 580 | 
 581 |   parser["Multiplicative"] = [](const SemanticValues &vs) {
 582 |     switch (vs.choice()) {
 583 |     case 0: return std::any_cast<int>(vs[0]) * std::any_cast<int>(vs[1]);
 584 |     default: return std::any_cast<int>(vs[0]);
 585 |     }
 586 |   };
 587 | 
 588 |   parser["Number"] = [](const SemanticValues &vs) {
 589 |     return vs.token_to_number<int>();
 590 |   };
 591 | 
 592 |   int val = 0;
 593 |   auto ret = parser.parse(" (1 + 2) * ", val);
 594 | 
 595 |   EXPECT_FALSE(ret);
 596 |   EXPECT_EQ(0, val);
 597 | }
 598 | 
 599 | TEST(GeneralTest, Calculator_test) {
 600 |   // Construct grammar
 601 |   Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
 602 | 
 603 |   EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
 604 |   TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
 605 |   FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
 606 |   TERM_OPERATOR <= cls("+-");
 607 |   FACTOR_OPERATOR <= cls("*/");
 608 |   NUMBER <= oom(cls("0-9"));
 609 | 
 610 |   // Setup actions
 611 |   auto reduce = [](const SemanticValues &vs) -> long {
 612 |     long ret = std::any_cast<long>(vs[0]);
 613 |     for (auto i = 1u; i < vs.size(); i += 2) {
 614 |       auto num = std::any_cast<long>(vs[i + 1]);
 615 |       switch (std::any_cast<char>(vs[i])) {
 616 |       case '+': ret += num; break;
 617 |       case '-': ret -= num; break;
 618 |       case '*': ret *= num; break;
 619 |       case '/': ret /= num; break;
 620 |       }
 621 |     }
 622 |     return ret;
 623 |   };
 624 | 
 625 |   EXPRESSION = reduce;
 626 |   TERM = reduce;
 627 |   TERM_OPERATOR = [](const SemanticValues &vs) { return *vs.sv().data(); };
 628 |   FACTOR_OPERATOR = [](const SemanticValues &vs) { return *vs.sv().data(); };
 629 |   NUMBER = [](const SemanticValues &vs) { return vs.token_to_number<long>(); };
 630 | 
 631 |   // Parse
 632 |   long val;
 633 |   auto r = EXPRESSION.parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
 634 | 
 635 |   EXPECT_TRUE(r.ret);
 636 |   EXPECT_EQ(-3, val);
 637 | }
 638 | 
 639 | TEST(GeneralTest, Calculator_test2) {
 640 |   // Parse syntax
 641 |   auto syntax = R"(
 642 |         # Grammar for Calculator...
 643 |         EXPRESSION       <-  TERM (TERM_OPERATOR TERM)*
 644 |         TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
 645 |         FACTOR           <-  NUMBER / '(' EXPRESSION ')'
 646 |         TERM_OPERATOR    <-  [-+]
 647 |         FACTOR_OPERATOR  <-  [/*]
 648 |         NUMBER           <-  [0-9]+
 649 |     )";
 650 | 
 651 |   auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {});
 652 |   auto &g = *cxt.grammar;
 653 | 
 654 |   // Setup actions
 655 |   auto reduce = [](const SemanticValues &vs) -> long {
 656 |     long ret = std::any_cast<long>(vs[0]);
 657 |     for (auto i = 1u; i < vs.size(); i += 2) {
 658 |       auto num = std::any_cast<long>(vs[i + 1]);
 659 |       switch (std::any_cast<char>(vs[i])) {
 660 |       case '+': ret += num; break;
 661 |       case '-': ret -= num; break;
 662 |       case '*': ret *= num; break;
 663 |       case '/': ret /= num; break;
 664 |       }
 665 |     }
 666 |     return ret;
 667 |   };
 668 | 
 669 |   g["EXPRESSION"] = reduce;
 670 |   g["TERM"] = reduce;
 671 |   g["TERM_OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
 672 |   g["FACTOR_OPERATOR"] = [](const SemanticValues &vs) {
 673 |     return *vs.sv().data();
 674 |   };
 675 |   g["NUMBER"] = [](const SemanticValues &vs) {
 676 |     return vs.token_to_number<long>();
 677 |   };
 678 | 
 679 |   // Parse
 680 |   long val;
 681 |   auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
 682 | 
 683 |   EXPECT_TRUE(r.ret);
 684 |   EXPECT_EQ(-3, val);
 685 | }
 686 | 
 687 | TEST(GeneralTest, Calculator_test3) {
 688 |   // Parse syntax
 689 |   parser parser(R"(
 690 |         # Grammar for Calculator...
 691 |         EXPRESSION       <-  TERM (TERM_OPERATOR TERM)*
 692 |         TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
 693 |         FACTOR           <-  NUMBER / '(' EXPRESSION ')'
 694 |         TERM_OPERATOR    <-  [-+]
 695 |         FACTOR_OPERATOR  <-  [/*]
 696 |         NUMBER           <-  [0-9]+
 697 |     )");
 698 | 
 699 |   auto reduce = [](const SemanticValues &vs) -> long {
 700 |     long ret = std::any_cast<long>(vs[0]);
 701 |     for (auto i = 1u; i < vs.size(); i += 2) {
 702 |       auto num = std::any_cast<long>(vs[i + 1]);
 703 |       switch (std::any_cast<char>(vs[i])) {
 704 |       case '+': ret += num; break;
 705 |       case '-': ret -= num; break;
 706 |       case '*': ret *= num; break;
 707 |       case '/': ret /= num; break;
 708 |       }
 709 |     }
 710 |     return ret;
 711 |   };
 712 | 
 713 |   // Setup actions
 714 |   parser["EXPRESSION"] = reduce;
 715 |   parser["TERM"] = reduce;
 716 |   parser["TERM_OPERATOR"] = [](const SemanticValues &vs) {
 717 |     return static_cast<char>(*vs.sv().data());
 718 |   };
 719 |   parser["FACTOR_OPERATOR"] = [](const SemanticValues &vs) {
 720 |     return static_cast<char>(*vs.sv().data());
 721 |   };
 722 |   parser["NUMBER"] = [](const SemanticValues &vs) {
 723 |     return vs.token_to_number<long>();
 724 |   };
 725 | 
 726 |   // Parse
 727 |   long val;
 728 |   auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val);
 729 | 
 730 |   EXPECT_TRUE(ret);
 731 |   EXPECT_EQ(-3, val);
 732 | }
 733 | 
 734 | TEST(GeneralTest, Calculator_test_with_AST) {
 735 |   parser parser(R"(
 736 |         EXPRESSION       <-  _ TERM (TERM_OPERATOR TERM)*
 737 |         TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
 738 |         FACTOR           <-  NUMBER / '(' _ EXPRESSION ')' _
 739 |         TERM_OPERATOR    <-  < [-+] > _
 740 |         FACTOR_OPERATOR  <-  < [/*] > _
 741 |         NUMBER           <-  < [0-9]+ > _
 742 |         ~_               <-  [ \t\r\n]*
 743 |     )");
 744 | 
 745 |   parser.enable_ast();
 746 | 
 747 |   std::function<long(const Ast &)> eval = [&](const Ast &ast) {
 748 |     if (ast.name == "NUMBER") {
 749 |       return ast.token_to_number<long>();
 750 |     } else {
 751 |       const auto &nodes = ast.nodes;
 752 |       auto result = eval(*nodes[0]);
 753 |       for (auto i = 1u; i < nodes.size(); i += 2) {
 754 |         auto num = eval(*nodes[i + 1]);
 755 |         auto ope = nodes[i]->token[0];
 756 |         switch (ope) {
 757 |         case '+': result += num; break;
 758 |         case '-': result -= num; break;
 759 |         case '*': result *= num; break;
 760 |         case '/': result /= num; break;
 761 |         }
 762 |       }
 763 |       return result;
 764 |     }
 765 |   };
 766 | 
 767 |   std::shared_ptr<Ast> ast;
 768 |   auto ret = parser.parse("1+2*3*(4-5+6)/7-8", ast);
 769 |   ast = parser.optimize_ast(ast);
 770 |   auto val = eval(*ast);
 771 | 
 772 |   EXPECT_TRUE(ret);
 773 |   EXPECT_EQ(-3, val);
 774 | }
 775 | 
 776 | TEST(GeneralTest, Calculator_test_with_combinators_and_AST) {
 777 |   // Construct grammar
 778 |   AST_DEFINITIONS(EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR,
 779 |                   NUMBER);
 780 | 
 781 |   EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
 782 |   TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
 783 |   FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
 784 |   TERM_OPERATOR <= cls("+-");
 785 |   FACTOR_OPERATOR <= cls("*/");
 786 |   NUMBER <= oom(cls("0-9"));
 787 | 
 788 |   std::function<long(const Ast &)> eval = [&](const Ast &ast) {
 789 |     if (ast.name == "NUMBER") {
 790 |       return ast.token_to_number<long>();
 791 |     } else {
 792 |       const auto &nodes = ast.nodes;
 793 |       auto result = eval(*nodes[0]);
 794 |       for (auto i = 1u; i < nodes.size(); i += 2) {
 795 |         auto num = eval(*nodes[i + 1]);
 796 |         auto ope = nodes[i]->token[0];
 797 |         switch (ope) {
 798 |         case '+': result += num; break;
 799 |         case '-': result -= num; break;
 800 |         case '*': result *= num; break;
 801 |         case '/': result /= num; break;
 802 |         }
 803 |       }
 804 |       return result;
 805 |     }
 806 |   };
 807 | 
 808 |   std::shared_ptr<Ast> ast;
 809 |   auto r = EXPRESSION.parse_and_get_value("1+2*3*(4-5+6)/7-8", ast);
 810 |   ast = AstOptimizer(true).optimize(ast);
 811 |   auto val = eval(*ast);
 812 | 
 813 |   EXPECT_TRUE(r.ret);
 814 |   EXPECT_EQ(-3, val);
 815 | }
 816 | 
 817 | TEST(GeneralTest, Ignore_semantic_value_test) {
 818 |   parser parser(R"(
 819 |        START <-  ~HELLO WORLD
 820 |        HELLO <- 'Hello' _
 821 |        WORLD <- 'World' _
 822 |        _     <- [ \t\r\n]*
 823 |     )");
 824 | 
 825 |   parser.enable_ast();
 826 | 
 827 |   std::shared_ptr<Ast> ast;
 828 |   auto ret = parser.parse("Hello World", ast);
 829 | 
 830 |   EXPECT_TRUE(ret);
 831 |   EXPECT_EQ(1, ast->nodes.size());
 832 |   EXPECT_EQ("WORLD", ast->nodes[0]->name);
 833 | }
 834 | 
 835 | TEST(GeneralTest, Ignore_semantic_value_of_or_predicate_test) {
 836 |   parser parser(R"(
 837 |        START       <- _ !DUMMY HELLO_WORLD '.'
 838 |        HELLO_WORLD <- HELLO 'World' _
 839 |        HELLO       <- 'Hello' _
 840 |        DUMMY       <- 'dummy' _
 841 |        ~_          <- [ \t\r\n]*
 842 |    )");
 843 | 
 844 |   parser.enable_ast();
 845 | 
 846 |   std::shared_ptr<Ast> ast;
 847 |   auto ret = parser.parse("Hello World.", ast);
 848 | 
 849 |   EXPECT_TRUE(ret);
 850 |   EXPECT_EQ(1, ast->nodes.size());
 851 |   EXPECT_EQ("HELLO_WORLD", ast->nodes[0]->name);
 852 | }
 853 | 
 854 | TEST(GeneralTest, Ignore_semantic_value_of_and_predicate_test) {
 855 |   parser parser(R"(
 856 |        START       <- _ &HELLO HELLO_WORLD '.'
 857 |        HELLO_WORLD <- HELLO 'World' _
 858 |        HELLO       <- 'Hello' _
 859 |        ~_          <- [ \t\r\n]*
 860 |     )");
 861 | 
 862 |   parser.enable_ast();
 863 | 
 864 |   std::shared_ptr<Ast> ast;
 865 |   auto ret = parser.parse("Hello World.", ast);
 866 | 
 867 |   EXPECT_TRUE(ret);
 868 |   EXPECT_EQ(1, ast->nodes.size());
 869 |   EXPECT_EQ("HELLO_WORLD", ast->nodes[0]->name);
 870 | }
 871 | 
 872 | TEST(GeneralTest, Literal_token_on_AST_test1) {
 873 |   parser parser(R"(
 874 |         STRING_LITERAL  <- '"' (('\\"' / '\\t' / '\\n') / (!["] .))* '"'
 875 |     )");
 876 |   parser.enable_ast();
 877 | 
 878 |   std::shared_ptr<Ast> ast;
 879 |   auto ret = parser.parse(R"("a\tb")", ast);
 880 | 
 881 |   EXPECT_TRUE(ret);
 882 |   EXPECT_TRUE(ast->is_token);
 883 |   EXPECT_EQ(R"("a\tb")", ast->token);
 884 |   EXPECT_TRUE(ast->nodes.empty());
 885 | }
 886 | 
 887 | TEST(GeneralTest, Literal_token_on_AST_test2) {
 888 |   parser parser(R"(
 889 |         STRING_LITERAL  <-  '"' (ESC / CHAR)* '"'
 890 |         ESC             <-  ('\\"' / '\\t' / '\\n')
 891 |         CHAR            <-  (!["] .)
 892 |     )");
 893 |   parser.enable_ast();
 894 | 
 895 |   std::shared_ptr<Ast> ast;
 896 |   auto ret = parser.parse(R"("a\tb")", ast);
 897 | 
 898 |   EXPECT_TRUE(ret);
 899 |   EXPECT_FALSE(ast->is_token);
 900 |   EXPECT_TRUE(ast->token.empty());
 901 |   EXPECT_EQ(3, ast->nodes.size());
 902 | }
 903 | 
 904 | TEST(GeneralTest, Literal_token_on_AST_test3) {
 905 |   parser parser(R"(
 906 |         STRING_LITERAL  <-  < '"' (ESC / CHAR)* '"' >
 907 |         ESC             <-  ('\\"' / '\\t' / '\\n')
 908 |         CHAR            <-  (!["] .)
 909 |     )");
 910 |   parser.enable_ast();
 911 | 
 912 |   std::shared_ptr<Ast> ast;
 913 |   auto ret = parser.parse(R"("a\tb")", ast);
 914 | 
 915 |   EXPECT_TRUE(ret);
 916 |   EXPECT_TRUE(ast->is_token);
 917 |   EXPECT_EQ(R"("a\tb")", ast->token);
 918 |   EXPECT_TRUE(ast->nodes.empty());
 919 | }
 920 | 
 921 | TEST(GeneralTest, Literal_token_on_AST_test4) {
 922 |   parser parser(R"(
 923 |         STRING_LITERAL  <-  < '"' < (ESC / CHAR)* > '"' >
 924 |         ESC             <-  ('\\"' / '\\t' / '\\n')
 925 |         CHAR            <-  (!["] .)
 926 |     )");
 927 |   parser.enable_ast();
 928 | 
 929 |   std::shared_ptr<Ast> ast;
 930 |   auto ret = parser.parse(R"("a\tb")", ast);
 931 | 
 932 |   EXPECT_TRUE(ret);
 933 |   EXPECT_TRUE(ast->is_token);
 934 |   EXPECT_EQ(R"(a\tb)", ast->token);
 935 |   EXPECT_TRUE(ast->nodes.empty());
 936 | }
 937 | 
 938 | TEST(GeneralTest, Missing_missing_definitions_test) {
 939 |   parser parser(R"(
 940 |         A <- B C
 941 |     )");
 942 | 
 943 |   EXPECT_FALSE(parser);
 944 | }
 945 | 
 946 | TEST(GeneralTest, Definition_duplicates_test) {
 947 |   parser parser(R"(
 948 |         A <- ''
 949 |         A <- ''
 950 |     )");
 951 | 
 952 |   EXPECT_FALSE(parser);
 953 | }
 954 | 
 955 | TEST(GeneralTest, Semantic_values_test) {
 956 |   parser parser(R"(
 957 |         term <- ( a b c x )? a b c
 958 |         a <- 'a'
 959 |         b <- 'b'
 960 |         c <- 'c'
 961 |         x <- 'x'
 962 |     )");
 963 | 
 964 |   for (const auto &item : parser.get_grammar()) {
 965 |     const auto &rule = item.first;
 966 |     parser[rule.data()] = [rule](const SemanticValues &vs, std::any &) {
 967 |       if (rule == "term") {
 968 |         EXPECT_EQ("a at 0", std::any_cast<std::string>(vs[0]));
 969 |         EXPECT_EQ("b at 1", std::any_cast<std::string>(vs[1]));
 970 |         EXPECT_EQ("c at 2", std::any_cast<std::string>(vs[2]));
 971 |         return std::string();
 972 |       } else {
 973 |         return rule + " at " + std::to_string(vs.sv().data() - vs.ss);
 974 |       }
 975 |     };
 976 |   }
 977 | 
 978 |   EXPECT_TRUE(parser.parse("abc"));
 979 | }
 980 | 
 981 | TEST(GeneralTest, Ordered_choice_count) {
 982 |   parser parser(R"(
 983 |         S <- 'a' / 'b'
 984 |     )");
 985 | 
 986 |   parser["S"] = [](const SemanticValues &vs) {
 987 |     EXPECT_EQ(1, vs.choice());
 988 |     EXPECT_EQ(2, vs.choice_count());
 989 |   };
 990 | 
 991 |   parser.parse("b");
 992 | }
 993 | 
 994 | TEST(GeneralTest, Ordered_choice_count_2) {
 995 |   parser parser(R"(
 996 |         S <- ('a' / 'b')*
 997 |     )");
 998 | 
 999 |   parser["S"] = [](const SemanticValues &vs) {
1000 |     EXPECT_EQ(0, vs.choice());
1001 |     EXPECT_EQ(0, vs.choice_count());
1002 |   };
1003 | 
1004 |   parser.parse("b");
1005 | }
1006 | 
1007 | TEST(GeneralTest, Semantic_value_tag) {
1008 |   parser parser(R"(
1009 |         S <- A? B* C?
1010 |         A <- 'a'
1011 |         B <- 'b'
1012 |         C <- 'c'
1013 |     )");
1014 | 
1015 |   {
1016 |     using namespace udl;
1017 |     parser["S"] = [](const SemanticValues &vs) {
1018 |       EXPECT_EQ(1, vs.size());
1019 |       EXPECT_EQ(1, vs.tags.size());
1020 |       EXPECT_EQ("C"_, vs.tags[0]);
1021 |     };
1022 |     auto ret = parser.parse("c");
1023 |     EXPECT_TRUE(ret);
1024 |   }
1025 | 
1026 |   {
1027 |     using namespace udl;
1028 |     parser["S"] = [](const SemanticValues &vs) {
1029 |       EXPECT_EQ(2, vs.size());
1030 |       EXPECT_EQ(2, vs.tags.size());
1031 |       EXPECT_EQ("B"_, vs.tags[0]);
1032 |       EXPECT_EQ("B"_, vs.tags[1]);
1033 |     };
1034 |     auto ret = parser.parse("bb");
1035 |     EXPECT_TRUE(ret);
1036 |   }
1037 | 
1038 |   {
1039 |     using namespace udl;
1040 |     parser["S"] = [](const SemanticValues &vs) {
1041 |       EXPECT_EQ(2, vs.size());
1042 |       EXPECT_EQ(2, vs.tags.size());
1043 |       EXPECT_EQ("A"_, vs.tags[0]);
1044 |       EXPECT_EQ("C"_, vs.tags[1]);
1045 |     };
1046 |     auto ret = parser.parse("ac");
1047 |     EXPECT_TRUE(ret);
1048 |   }
1049 | }
1050 | 
1051 | TEST(GeneralTest, Negated_Class_test) {
1052 |   parser parser(R"(
1053 |         ROOT <- [^a-z_]+
1054 |     )");
1055 | 
1056 |   bool ret = parser;
1057 |   EXPECT_TRUE(ret);
1058 | 
1059 |   EXPECT_TRUE(parser.parse("ABC123"));
1060 |   EXPECT_FALSE(parser.parse("ABcZ"));
1061 |   EXPECT_FALSE(parser.parse("ABCZ_"));
1062 |   EXPECT_FALSE(parser.parse(""));
1063 | }
1064 | 
1065 | TEST(GeneralTest, token_to_number_float_test) {
1066 |   parser parser(R"(
1067 |     S <- '1.1'
1068 |   )");
1069 |   parser.enable_ast();
1070 | 
1071 |   std::shared_ptr<Ast> ast;
1072 |   auto ret = parser.parse("1.1", ast);
1073 | 
1074 |   EXPECT_TRUE(ret);
1075 |   EXPECT_TRUE(ast->is_token);
1076 |   EXPECT_EQ("1.1", ast->token);
1077 |   EXPECT_EQ(1.1f, ast->token_to_number<float>());
1078 |   EXPECT_TRUE(ast->nodes.empty());
1079 | }
1080 | 
1081 | TEST(GeneralTest, ParentReferencesShouldNotBeExpired) {
1082 |   auto parser = peg::parser(R"(
1083 | 		ROOT            <- OPTIMIZES_AWAY
1084 | 		OPTIMIZES_AWAY  <- ITEM+
1085 | 		ITEM            <- 'a'
1086 | 	)");
1087 |   parser.enable_ast<peg::Ast>();
1088 | 
1089 |   std::shared_ptr<peg::Ast> ast;
1090 |   parser.parse("aaa", ast);
1091 |   ast = parser.optimize_ast(ast);
1092 | 
1093 |   EXPECT_FALSE(ast->nodes[0]->parent.expired());
1094 | }
1095 | 
1096 | TEST(GeneralTest, EndOfInputTest) {
1097 |   auto parser = peg::parser(R"(
1098 |     S <- '[[' (!']]' .)* ']]' !.
1099 | 	)");
1100 | 
1101 |   parser.disable_eoi_check();
1102 | 
1103 |   auto ret = parser.parse("[[]]]");
1104 |   EXPECT_FALSE(ret);
1105 | }
1106 | 
1107 | TEST(GeneralTest, DefaultEndOfInputTest) {
1108 |   auto parser = peg::parser(R"(
1109 |     S <- '[[' (!']]' .)* ']]'
1110 | 	)");
1111 | 
1112 |   auto ret = parser.parse("[[]]]");
1113 |   EXPECT_FALSE(ret);
1114 | }
1115 | 
1116 | TEST(GeneralTest, DisableEndOfInputCheckTest) {
1117 |   auto parser = peg::parser(R"(
1118 |     S <- '[[' (!']]' .)* ']]'
1119 | 	)");
1120 | 
1121 |   parser.disable_eoi_check();
1122 | 
1123 |   auto ret = parser.parse("[[]]]");
1124 |   EXPECT_TRUE(ret);
1125 | }
1126 | 
1127 | TEST(GeneralTest, InvalidCutOperator) {
1128 |   auto parser = peg::parser(R"(
1129 |     S <- 'a' ↑ 'b'
1130 | 	)");
1131 | 
1132 |   auto ret = parser.parse("ab");
1133 |   EXPECT_TRUE(ret);
1134 | 
1135 |   ret = parser.parse("ac");
1136 |   EXPECT_FALSE(ret);
1137 | 
1138 |   ret = parser.parse("b");
1139 |   EXPECT_FALSE(ret);
1140 | }
1141 | 
1142 | TEST(GeneralTest, HeuristicErrorTokenTest) {
1143 |   auto parser = peg::parser(R"(
1144 |     program      <- enum+
1145 |     enum         <- 'enum' enum_kind^untyped_enum
1146 |     enum_kind    <- 'sequence' / 'bitmask'
1147 | 
1148 |     %whitespace  <- [ \r\t\n]*
1149 |     %word        <- [a-zA-Z0-9_]
1150 | 
1151 |     untyped_enum <- '' { message "invalid/missing enum type, expected one of 'sequence' or 'bitmask', got '%t'"}
1152 | 	)");
1153 | 
1154 |   parser.set_logger([&](size_t ln, size_t col, const std::string &msg) {
1155 |     EXPECT_EQ(1, ln);
1156 |     EXPECT_EQ(6, col);
1157 |     EXPECT_EQ("invalid/missing enum type, expected one of 'sequence' or "
1158 |               "'bitmask', got 'sequencer'",
1159 |               msg);
1160 |   });
1161 | 
1162 |   auto ret = parser.parse("enum sequencer");
1163 |   EXPECT_FALSE(ret);
1164 | }
1165 | 
1166 | TEST(GeneralTest, LiteralContentInAST) {
1167 |   parser parser(R"(
1168 | PROGRAM                <-  STATEMENTS
1169 | 
1170 | STATEMENTS             <-  (STATEMENT ';'?)*
1171 | STATEMENT              <-  ASSIGNMENT / RETURN / EXPRESSION_STATEMENT
1172 | 
1173 | ASSIGNMENT             <-  'let' IDENTIFIER '=' EXPRESSION
1174 | RETURN                 <-  'return' EXPRESSION
1175 | EXPRESSION_STATEMENT   <-  EXPRESSION
1176 | 
1177 | EXPRESSION             <-  INFIX_EXPR(PREFIX_EXPR, INFIX_OPE)
1178 | INFIX_EXPR(ATOM, OPE)  <-  ATOM (OPE ATOM)* {
1179 |                              precedence
1180 |                                L == !=
1181 |                                L < >
1182 |                                L + -
1183 |                                L * /
1184 |                            }
1185 | 
1186 | IF                     <-  'if' '(' EXPRESSION ')' BLOCK ('else' BLOCK)?
1187 | 
1188 | FUNCTION               <-  'fn' '(' PARAMETERS ')' BLOCK
1189 | PARAMETERS             <-  LIST(IDENTIFIER, ',')
1190 | 
1191 | BLOCK                  <-  '{' STATEMENTS '}'
1192 | 
1193 | CALL                   <-  PRIMARY (ARGUMENTS / INDEX)*
1194 | ARGUMENTS              <-  '(' LIST(EXPRESSION, ',') ')'
1195 | INDEX                  <-   '[' EXPRESSION ']'
1196 | 
1197 | PREFIX_EXPR            <-  PREFIX_OPE* CALL
1198 | PRIMARY                <-  IF / FUNCTION / ARRAY / HASH / INTEGER / BOOLEAN / NULL / IDENTIFIER / STRING / '(' EXPRESSION ')'
1199 | 
1200 | ARRAY                  <-  '[' LIST(EXPRESSION, ',') ']'
1201 | 
1202 | HASH                   <-  '{' LIST(HASH_PAIR, ',') '}'
1203 | HASH_PAIR              <-  EXPRESSION ':' EXPRESSION
1204 | 
1205 | IDENTIFIER             <-  < !KEYWORD [a-zA-Z]+ >
1206 | INTEGER                <-  < [0-9]+ >
1207 | STRING                 <-  < ["] < (!["] .)* > ["] >
1208 | BOOLEAN                <-  'true' / 'false'
1209 | NULL                   <-  'null'
1210 | PREFIX_OPE             <-  < [-!] >
1211 | INFIX_OPE              <-  < [-+/*<>] / '==' / '!=' >
1212 | 
1213 | KEYWORD                <-  ('null' | 'true' | 'false' | 'let' | 'return' | 'if' | 'else' | 'fn') ![a-zA-Z]
1214 | 
1215 | LIST(ITEM, DELM)       <-  (ITEM (~DELM ITEM)*)?
1216 | 
1217 | LINE_COMMENT           <-  '//' (!LINE_END .)* &LINE_END
1218 | LINE_END               <-  '\r\n' / '\r' / '\n' / !.
1219 | 
1220 | %whitespace            <-  ([ \t\r\n]+ / LINE_COMMENT)*
1221 | %word                  <-  [a-zA-Z]+
1222 |   )");
1223 |   parser.enable_ast();
1224 | 
1225 |   std::shared_ptr<Ast> ast;
1226 |   auto ret = parser.parse(R"({1: 1, 2: 2, 3: 3})", ast);
1227 | 
1228 |   EXPECT_TRUE(ret);
1229 | 
1230 |   auto opt =
1231 |       AstOptimizer(true, {"EXPRESSION_STATEMENT", "PARAMETERS", "ARGUMENTS",
1232 |                           "INDEX", "RETURN", "BLOCK", "ARRAY", "HASH"});
1233 |   ast = opt.optimize(ast);
1234 | 
1235 |   EXPECT_EQ("EXPRESSION_STATEMENT", ast->name);
1236 | 
1237 |   auto node = ast->nodes[0];
1238 |   EXPECT_EQ("HASH", node->name);
1239 | 
1240 |   std::map<std::string, int64_t> expected = {
1241 |       {"1", 1},
1242 |       {"2", 2},
1243 |       {"3", 3},
1244 |   };
1245 | 
1246 |   for (auto node : node->nodes) {
1247 |     auto key = node->nodes[0];
1248 |     auto val = node->nodes[1];
1249 |     EXPECT_EQ("INTEGER", key->name);
1250 | 
1251 |     auto expectedValue = expected[key->token_to_string()];
1252 |     EXPECT_EQ("INTEGER", val->name);
1253 |     EXPECT_EQ(expectedValue, val->token_to_number<int64_t>());
1254 |   }
1255 | }
1256 | 
1257 | TEST(GeneralTest, ChoiceWithWhitespace) {
1258 |   auto parser = peg::parser(R"(
1259 |     type <- 'string' / 'int' / 'double'
1260 |     %whitespace <- ' '*
1261 |   )");
1262 | 
1263 |   parser["type"] = [](const SemanticValues &vs) {
1264 |     auto n = vs.choice();
1265 |     EXPECT_EQ(1, n);
1266 |   };
1267 | 
1268 |   auto ret = parser.parse("int");
1269 |   EXPECT_TRUE(ret);
1270 | }
1271 | 
1272 | TEST(GeneralTest, PassingContextAndOutputParameter) {
1273 |   parser parser(R"(
1274 |         START  <- TOKEN
1275 |         TOKEN  <- [0-9]+
1276 |     )");
1277 | 
1278 |   parser["TOKEN"] = [&](const peg::SemanticValues &vs, std::any & /*dt*/) {
1279 |     return vs.token_to_number<int>();
1280 |   };
1281 | 
1282 |   int output = 0;
1283 |   std::any dt = std::string{"context"};
1284 |   parser.parse<int>("42", dt, output);
1285 |   EXPECT_EQ(42, output);
1286 | }
1287 | 
1288 | TEST(GeneralTest, SpecifyStartRule) {
1289 |   auto grammar = R"(
1290 |     Start       <- A
1291 |     A           <- B (',' B)*
1292 |     B           <- '[one]' / '[two]'
1293 |     %whitespace <- [ \t\n]*
1294 |   )";
1295 | 
1296 |   {
1297 |     parser peg(grammar, "AAA");
1298 |     EXPECT_FALSE(peg);
1299 |   }
1300 | 
1301 |   {
1302 |     parser peg(grammar, "A");
1303 |     EXPECT_TRUE(peg.parse(" [one] , [two] "));
1304 |   }
1305 | 
1306 |   {
1307 |     parser peg(grammar);
1308 |     EXPECT_TRUE(peg.parse(" [one] , [two] "));
1309 | 
1310 |     peg.load_grammar(grammar, "A");
1311 |     EXPECT_TRUE(peg.parse(" [one] , [two] "));
1312 |   }
1313 | 
1314 |   {
1315 |     parser peg;
1316 | 
1317 |     peg.load_grammar(grammar);
1318 |     EXPECT_TRUE(peg.parse(" [one] , [two] "));
1319 | 
1320 |     peg.load_grammar(grammar, "A");
1321 |     EXPECT_TRUE(peg.parse(" [one] , [two] "));
1322 |   }
1323 | }
1324 | 


--------------------------------------------------------------------------------
/test/test3.cc:
--------------------------------------------------------------------------------
  1 | ﻿#include <gtest/gtest.h>
  2 | #include <peglib.h>
  3 | 
  4 | using namespace peg;
  5 | 
  6 | TEST(PEGTest, PEG_Grammar) {
  7 |   EXPECT_TRUE(ParserGenerator::parse_test(
  8 |       "Grammar",
  9 |       " Definition <- a / ( b c ) / d \n rule2 <- [a-zA-Z][a-z0-9-]+ "));
 10 | }
 11 | 
 12 | TEST(PEGTest, PEG_Definition) {
 13 |   EXPECT_TRUE(ParserGenerator::parse_test("Definition",
 14 |                                           "Definition <- a / (b c) / d "));
 15 |   EXPECT_TRUE(
 16 |       ParserGenerator::parse_test("Definition", "Definition <- a / b c / d "));
 17 |   EXPECT_TRUE(ParserGenerator::parse_test("Definition", u8"Definitiond ← a "));
 18 |   EXPECT_FALSE(ParserGenerator::parse_test("Definition", "Definition "));
 19 |   EXPECT_FALSE(ParserGenerator::parse_test("Definition", " "));
 20 |   EXPECT_FALSE(ParserGenerator::parse_test("Definition", ""));
 21 |   EXPECT_FALSE(
 22 |       ParserGenerator::parse_test("Definition", "Definition = a / (b c) / d "));
 23 |   EXPECT_TRUE(ParserGenerator::parse_test("Definition", "Macro(param) <- a "));
 24 |   EXPECT_FALSE(
 25 |       ParserGenerator::parse_test("Definition", "Macro (param) <- a "));
 26 | }
 27 | 
 28 | TEST(PEGTest, PEG_Expression) {
 29 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a / (b c) / d "));
 30 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a / b c / d "));
 31 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a b "));
 32 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", ""));
 33 |   EXPECT_FALSE(ParserGenerator::parse_test("Expression", " "));
 34 |   EXPECT_FALSE(ParserGenerator::parse_test("Expression", " a b "));
 35 | 
 36 |   // NOTE: The followings are actually allowed in the original Ford's paper...
 37 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a//b "));
 38 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a // b "));
 39 |   EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a / / b "));
 40 | }
 41 | 
 42 | TEST(PEGTest, PEG_Sequence) {
 43 |   EXPECT_TRUE(ParserGenerator::parse_test("Sequence", "a b c d "));
 44 |   EXPECT_TRUE(ParserGenerator::parse_test("Sequence", ""));
 45 |   EXPECT_FALSE(ParserGenerator::parse_test("Sequence", "!"));
 46 |   EXPECT_FALSE(ParserGenerator::parse_test("Sequence", "<-"));
 47 |   EXPECT_FALSE(ParserGenerator::parse_test("Sequence", " a"));
 48 | }
 49 | 
 50 | TEST(PEGTest, PEG_Prefix) {
 51 |   EXPECT_TRUE(ParserGenerator::parse_test("Prefix", "&[a]"));
 52 |   EXPECT_TRUE(ParserGenerator::parse_test("Prefix", "![']"));
 53 |   EXPECT_FALSE(ParserGenerator::parse_test("Prefix", "-[']"));
 54 |   EXPECT_FALSE(ParserGenerator::parse_test("Prefix", ""));
 55 |   EXPECT_FALSE(ParserGenerator::parse_test("Prefix", " a"));
 56 | }
 57 | 
 58 | TEST(PEGTest, PEG_Suffix) {
 59 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa "));
 60 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa? "));
 61 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa* "));
 62 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa+ "));
 63 |   EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "aaa{} "));
 64 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{10} "));
 65 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{10,} "));
 66 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{10,100} "));
 67 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{,100} "));
 68 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", ". + "));
 69 |   EXPECT_TRUE(ParserGenerator::parse_test("Suffix", ". {10} "));
 70 |   EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "?"));
 71 |   EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "+"));
 72 |   EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "{10}"));
 73 |   EXPECT_FALSE(ParserGenerator::parse_test("Suffix", ""));
 74 |   EXPECT_FALSE(ParserGenerator::parse_test("Suffix", " a"));
 75 | }
 76 | 
 77 | TEST(PEGTest, PEG_Primary) {
 78 |   EXPECT_TRUE(ParserGenerator::parse_test("Primary", "_Identifier0_ "));
 79 |   EXPECT_FALSE(ParserGenerator::parse_test("Primary", "_Identifier0_<-"));
 80 |   EXPECT_TRUE(ParserGenerator::parse_test("Primary",
 81 |                                           "( _Identifier0_ _Identifier1_ )"));
 82 |   EXPECT_TRUE(ParserGenerator::parse_test("Primary", "'Literal String'"));
 83 |   EXPECT_TRUE(ParserGenerator::parse_test("Primary", "\"Literal String\""));
 84 |   EXPECT_TRUE(ParserGenerator::parse_test("Primary", "[a-zA-Z]"));
 85 |   EXPECT_TRUE(ParserGenerator::parse_test("Primary", "."));
 86 |   EXPECT_FALSE(ParserGenerator::parse_test("Primary", ""));
 87 |   EXPECT_FALSE(ParserGenerator::parse_test("Primary", " "));
 88 |   EXPECT_FALSE(ParserGenerator::parse_test("Primary", " a"));
 89 |   EXPECT_FALSE(ParserGenerator::parse_test("Primary", ""));
 90 | }
 91 | 
 92 | TEST(PEGTest, PEG_Identifier) {
 93 |   EXPECT_TRUE(ParserGenerator::parse_test("Identifier", "_Identifier0_ "));
 94 |   EXPECT_FALSE(ParserGenerator::parse_test("Identifier", "0Identifier_ "));
 95 |   EXPECT_FALSE(ParserGenerator::parse_test("Identifier", "Iden|t "));
 96 |   EXPECT_FALSE(ParserGenerator::parse_test("Identifier", " "));
 97 |   EXPECT_FALSE(ParserGenerator::parse_test("Identifier", " a"));
 98 |   EXPECT_FALSE(ParserGenerator::parse_test("Identifier", ""));
 99 | }
100 | 
101 | TEST(PEGTest, PEG_IdentStart) {
102 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentStart", "_"));
103 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentStart", "a"));
104 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentStart", "Z"));
105 |   EXPECT_FALSE(ParserGenerator::parse_test("IdentStart", ""));
106 |   EXPECT_FALSE(ParserGenerator::parse_test("IdentStart", " "));
107 |   EXPECT_FALSE(ParserGenerator::parse_test("IdentStart", "0"));
108 | }
109 | 
110 | TEST(PEGTest, PEG_IdentRest) {
111 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "_"));
112 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "a"));
113 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "Z"));
114 |   EXPECT_FALSE(ParserGenerator::parse_test("IdentRest", ""));
115 |   EXPECT_FALSE(ParserGenerator::parse_test("IdentRest", " "));
116 |   EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "0"));
117 | }
118 | 
119 | TEST(PEGTest, PEG_Literal) {
120 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'abc' "));
121 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\nb\\tc' "));
122 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\277\tc' "));
123 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\77\tc' "));
124 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", "'a\\80\tc' "));
125 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'\n' "));
126 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\'b' "));
127 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", "'a'b' "));
128 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", "'a\"'b' "));
129 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", "\"'\\\"abc\\\"'\" "));
130 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", "\"'\"abc\"'\" "));
131 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", "abc"));
132 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", ""));
133 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", "\\"));
134 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", u8"'日本語'"));
135 |   EXPECT_TRUE(ParserGenerator::parse_test("Literal", u8"\"日本語\""));
136 |   EXPECT_FALSE(ParserGenerator::parse_test("Literal", u8"日本語"));
137 | }
138 | 
139 | TEST(PEGTest, PEG_Class) {
140 |   EXPECT_FALSE(ParserGenerator::parse_test(
141 |       "Class", "[]")); // NOTE: This is different from the Brian Ford's paper,
142 |                        // but same as RegExp
143 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a]"));
144 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-z]"));
145 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[az]"));
146 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-zA-Z-]"));
147 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-zA-Z-0-9]"));
148 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-]"));
149 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[-a]"));
150 |   EXPECT_FALSE(ParserGenerator::parse_test("Class", "["));
151 |   EXPECT_FALSE(ParserGenerator::parse_test("Class", "[a"));
152 |   EXPECT_FALSE(ParserGenerator::parse_test("Class", "]"));
153 |   EXPECT_FALSE(ParserGenerator::parse_test("Class", "a]"));
154 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", u8"[あ-ん]"));
155 |   EXPECT_FALSE(ParserGenerator::parse_test("Class", u8"あ-ん"));
156 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[-+]"));
157 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[+-]"));
158 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[\\^]"));
159 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[-]"));
160 |   EXPECT_TRUE(ParserGenerator::parse_test("Class", "[\\-]"));
161 | }
162 | 
163 | TEST(PEGTest, PEG_Negated_Class) {
164 |   EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "[^]"));
165 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a]"));
166 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-z]"));
167 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^az]"));
168 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-zA-Z-]"));
169 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-zA-Z-0-9]"));
170 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-]"));
171 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^-a]"));
172 |   EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "[^"));
173 |   EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "[^a"));
174 |   EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "^]"));
175 |   EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "^a]"));
176 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", u8"[^あ-ん]"));
177 |   EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", u8"^あ-ん"));
178 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^-+]"));
179 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^+-]"));
180 |   EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^^]"));
181 | }
182 | 
183 | TEST(PEGTest, PEG_Range) {
184 |   EXPECT_TRUE(ParserGenerator::parse_test("Range", "a"));
185 |   EXPECT_TRUE(ParserGenerator::parse_test("Range", "a-z"));
186 |   EXPECT_FALSE(ParserGenerator::parse_test("Range", "az"));
187 |   EXPECT_FALSE(ParserGenerator::parse_test("Range", ""));
188 |   EXPECT_FALSE(ParserGenerator::parse_test("Range", "a-"));
189 |   EXPECT_FALSE(ParserGenerator::parse_test("Range", "-a"));
190 | }
191 | 
192 | TEST(PEGTest, PEG_Char) {
193 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\f"));
194 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\n"));
195 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\r"));
196 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\t"));
197 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\v"));
198 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\'"));
199 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\\""));
200 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\["));
201 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\]"));
202 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\\\"));
203 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\000"));
204 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\377"));
205 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\477"));
206 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\087"));
207 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\079"));
208 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\00"));
209 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\77"));
210 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\80"));
211 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\08"));
212 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\0"));
213 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\7"));
214 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\8"));
215 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\x0"));
216 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\x00"));
217 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\x000"));
218 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xa"));
219 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xab"));
220 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xabc"));
221 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xA"));
222 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xAb"));
223 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xAbc"));
224 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xg"));
225 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xga"));
226 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u0"));
227 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u00"));
228 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u0000"));
229 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u000000"));
230 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u0000000"));
231 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\uFFFF"));
232 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u10000"));
233 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u10FFFF"));
234 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u110000"));
235 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\uFFFFFF"));
236 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "a"));
237 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "."));
238 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", "0"));
239 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\"));
240 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", " "));
241 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", "  "));
242 |   EXPECT_FALSE(ParserGenerator::parse_test("Char", ""));
243 |   EXPECT_TRUE(ParserGenerator::parse_test("Char", u8"あ"));
244 | }
245 | 
246 | TEST(PEGTest, PEG_Operators) {
247 |   EXPECT_TRUE(ParserGenerator::parse_test("LEFTARROW", "<-"));
248 |   EXPECT_TRUE(ParserGenerator::parse_test("SLASH", "/ "));
249 |   EXPECT_TRUE(ParserGenerator::parse_test("AND", "& "));
250 |   EXPECT_TRUE(ParserGenerator::parse_test("NOT", "! "));
251 |   EXPECT_TRUE(ParserGenerator::parse_test("QUESTION", "? "));
252 |   EXPECT_TRUE(ParserGenerator::parse_test("STAR", "* "));
253 |   EXPECT_TRUE(ParserGenerator::parse_test("PLUS", "+ "));
254 |   EXPECT_TRUE(ParserGenerator::parse_test("OPEN", "( "));
255 |   EXPECT_TRUE(ParserGenerator::parse_test("CLOSE", ") "));
256 |   EXPECT_TRUE(ParserGenerator::parse_test("DOT", ". "));
257 | }
258 | 
259 | TEST(PEGTest, PEG_Comment) {
260 |   EXPECT_TRUE(ParserGenerator::parse_test("Comment", "# Comment.\n"));
261 |   EXPECT_TRUE(ParserGenerator::parse_test("Comment", "# Comment."));
262 |   EXPECT_FALSE(ParserGenerator::parse_test("Comment", " "));
263 |   EXPECT_FALSE(ParserGenerator::parse_test("Comment", "a"));
264 | }
265 | 
266 | TEST(PEGTest, PEG_Space) {
267 |   EXPECT_TRUE(ParserGenerator::parse_test("Space", " "));
268 |   EXPECT_TRUE(ParserGenerator::parse_test("Space", "\t"));
269 |   EXPECT_TRUE(ParserGenerator::parse_test("Space", "\n"));
270 |   EXPECT_FALSE(ParserGenerator::parse_test("Space", ""));
271 |   EXPECT_FALSE(ParserGenerator::parse_test("Space", "a"));
272 | }
273 | 
274 | TEST(PEGTest, PEG_EndOfLine) {
275 |   EXPECT_TRUE(ParserGenerator::parse_test("EndOfLine", "\r\n"));
276 |   EXPECT_TRUE(ParserGenerator::parse_test("EndOfLine", "\n"));
277 |   EXPECT_TRUE(ParserGenerator::parse_test("EndOfLine", "\r"));
278 |   EXPECT_FALSE(ParserGenerator::parse_test("EndOfLine", " "));
279 |   EXPECT_FALSE(ParserGenerator::parse_test("EndOfLine", ""));
280 |   EXPECT_FALSE(ParserGenerator::parse_test("EndOfLine", "a"));
281 | }
282 | 
283 | TEST(PEGTest, PEG_EndOfFile) {
284 |   EXPECT_TRUE(ParserGenerator::parse_test("EndOfFile", ""));
285 |   EXPECT_FALSE(ParserGenerator::parse_test("EndOfFile", " "));
286 | }
287 | 


--------------------------------------------------------------------------------