├── .clang-format ├── .github └── workflows │ └── cmake.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── docs ├── build.sh ├── index.html ├── index.js ├── native.cpp ├── native.js ├── native.wasm └── style.css ├── example ├── CMakeLists.txt ├── calc.cc ├── calc2.cc ├── calc3.cc ├── calc4.cc ├── calc5.cc ├── choice.cc ├── docx.cc ├── enter_leave.cc ├── indent.cc └── sequence.cc ├── grammar ├── cpp-peglib.peg ├── csv.peg ├── json.peg └── pl0.peg ├── lint ├── CMakeLists.txt ├── README.md └── peglint.cc ├── peg.vim ├── peglib.h ├── pl0 ├── CMakeLists.txt ├── Makefile ├── README.md ├── pl0.cc └── samples │ ├── fib.pas │ ├── gcd.pas │ └── square.pas └── test ├── CMakeLists.txt ├── test1.cc ├── test2.cc └── test3.cc /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | AllowShortBlocksOnASingleLine: true 3 | AllowShortCaseLabelsOnASingleLine: true 4 | AllowShortIfStatementsOnASingleLine: true 5 | Cpp11BracedListStyle: true 6 | -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | BUILD_TYPE: Release 7 | 8 | jobs: 9 | build: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest, ubuntu-24.04, macos-latest, windows-latest, windows-2019] 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Configure CMake 19 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} 20 | 21 | - name: Build 22 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} 23 | 24 | - name: Test 25 | working-directory: ${{github.workspace}}/build 26 | run: ctest -C ${{env.BUILD_TYPE}} 27 | 28 | - name: Configure CMake with C++20 29 | run: cmake -B ${{github.workspace}}/build_20 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_STANDARD=20 30 | 31 | - name: Build with C++20 32 | run: cmake --build ${{github.workspace}}/build_20 --config ${{env.BUILD_TYPE}} 33 | 34 | - name: Test with C++20 35 | working-directory: ${{github.workspace}}/build_20 36 | run: ctest -C ${{env.BUILD_TYPE}} 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | # Others 31 | *.dSYM 32 | *.swp 33 | Debug 34 | Release 35 | *.suo 36 | *.sdf 37 | *.user 38 | xcuserdata 39 | *.xcworkspace 40 | temp* 41 | build*/ 42 | Makefile 43 | CMakeFiles 44 | CMakeCache.txt 45 | *.cmake 46 | *.vcxproj.filters 47 | *.opensdf 48 | .idea/ 49 | grammar/test/* 50 | .DS_Store 51 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(peglib) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_EXTENSIONS OFF) 6 | 7 | if(MSVC) 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus /utf-8 /D_CRT_SECURE_NO_DEPRECATE") 9 | else() 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") 11 | endif() 12 | 13 | set(THREADS_PREFER_PTHREAD_FLAG ON) 14 | find_package(Threads) 15 | 16 | if(CMAKE_SYSTEM_NAME STREQUAL "Linux") 17 | set(add_link_deps Threads::Threads) 18 | endif() 19 | 20 | add_library(peglib INTERFACE) 21 | target_include_directories(peglib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 22 | 23 | option(BUILD_TESTS "Build cpp-peglib tests" ON) 24 | option(PEGLIB_BUILD_LINT "Build cpp-peglib lint utility" OFF) 25 | option(PEGLIB_BUILD_EXAMPLES "Build cpp-peglib examples" OFF) 26 | option(PEGLIB_BUILD_PL0 "Build pl0 interpreter" OFF) 27 | 28 | if (${BUILD_TESTS}) 29 | add_subdirectory(test) 30 | enable_testing() 31 | endif() 32 | 33 | if (${PEGLIB_BUILD_LINT}) 34 | add_subdirectory(lint) 35 | endif() 36 | 37 | if (${PEGLIB_BUILD_EXAMPLES}) 38 | add_subdirectory(example) 39 | endif() 40 | 41 | if (${PEGLIB_BUILD_PL0}) 42 | add_subdirectory(pl0) 43 | endif() 44 | 45 | install(FILES peglib.h DESTINATION include) 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 yhirose 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cpp-peglib 2 | ========== 3 | 4 | [![](https://github.com/yhirose/cpp-peglib/workflows/CMake/badge.svg)](https://github.com/yhirose/cpp-peglib/actions) 5 | 6 | C++17 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library. You can start using it right away just by including `peglib.h` in your project. 7 | 8 | Since this library only supports C++17 compilers, please make sure that the compiler option `-std=c++17` is enabled. 9 | (`/std:c++17 /Zc:__cplusplus` for MSVC) 10 | 11 | You can also try the online version, PEG Playground at https://yhirose.github.io/cpp-peglib. 12 | 13 | The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf) by Bryan Ford. *cpp-peglib* also supports the following additional syntax for now: 14 | 15 | * `'...'i` (Case-insensitive literal operator) 16 | * `[...]i` (Case-insensitive character class operator) 17 | * `[^...]` (Negated character class operator) 18 | * `[^...]i` (Case-insensitive negated character class operator) 19 | * `{2,5}` (Regex-like repetition operator) 20 | * `<` ... `>` (Token boundary operator) 21 | * `~` (Ignore operator) 22 | * `\x20` (Hex number char) 23 | * `\u10FFFF` (Unicode char) 24 | * `%whitespace` (Automatic whitespace skipping) 25 | * `%word` (Word expression) 26 | * `$name(` ... `)` (Capture scope operator) 27 | * `$name<` ... `>` (Named capture operator) 28 | * `$name` (Backreference operator) 29 | * `|` (Dictionary operator) 30 | * `↑` (Cut operator) 31 | * `MACRO_NAME(` ... `)` (Parameterized rule or Macro) 32 | * `{ precedence L - + L / * }` (Parsing infix expression) 33 | * `%recovery(` ... `)` (Error recovery operator) 34 | * `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`) 35 | * `label { error_message "..." }` (Error message instruction) 36 | * `{ no_ast_opt }` (No AST node optimization instruction) 37 | 38 | 'End of Input' check will be done as default. To disable the check, please call `disable_eoi_check`. 39 | 40 | This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing. 41 | 42 | IMPORTANT NOTE for some Linux distributions such as Ubuntu and CentOS: Need `-pthread` option when linking. See [#23](https://github.com/yhirose/cpp-peglib/issues/23#issuecomment-261126127), [#46](https://github.com/yhirose/cpp-peglib/issues/46#issuecomment-417870473) and [#62](https://github.com/yhirose/cpp-peglib/issues/62#issuecomment-492032680). 43 | 44 | I am sure that you will enjoy this excellent ["Practical parsing with PEG and cpp-peglib"](https://berthub.eu/articles/posts/practical-peg-parsing/) article by [bert hubert](https://berthub.eu/)! 45 | 46 | How to use 47 | ---------- 48 | 49 | This is a simple calculator sample. It shows how to define grammar, associate semantic actions to the grammar, and handle semantic values. 50 | 51 | ```cpp 52 | // (1) Include the header file 53 | #include 54 | #include 55 | #include 56 | 57 | using namespace peg; 58 | using namespace std; 59 | 60 | int main(void) { 61 | // (2) Make a parser 62 | parser parser(R"( 63 | # Grammar for Calculator... 64 | Additive <- Multiplicative '+' Additive / Multiplicative 65 | Multiplicative <- Primary '*' Multiplicative / Primary 66 | Primary <- '(' Additive ')' / Number 67 | Number <- < [0-9]+ > 68 | %whitespace <- [ \t]* 69 | )"); 70 | 71 | assert(static_cast(parser) == true); 72 | 73 | // (3) Setup actions 74 | parser["Additive"] = [](const SemanticValues &vs) { 75 | switch (vs.choice()) { 76 | case 0: // "Multiplicative '+' Additive" 77 | return any_cast(vs[0]) + any_cast(vs[1]); 78 | default: // "Multiplicative" 79 | return any_cast(vs[0]); 80 | } 81 | }; 82 | 83 | parser["Multiplicative"] = [](const SemanticValues &vs) { 84 | switch (vs.choice()) { 85 | case 0: // "Primary '*' Multiplicative" 86 | return any_cast(vs[0]) * any_cast(vs[1]); 87 | default: // "Primary" 88 | return any_cast(vs[0]); 89 | } 90 | }; 91 | 92 | parser["Number"] = [](const SemanticValues &vs) { 93 | return vs.token_to_number(); 94 | }; 95 | 96 | // (4) Parse 97 | parser.enable_packrat_parsing(); // Enable packrat parsing. 98 | 99 | int val; 100 | parser.parse(" (1 + 2) * 3 ", val); 101 | 102 | assert(val == 9); 103 | } 104 | ``` 105 | 106 | To show syntax errors in grammar text: 107 | 108 | ```cpp 109 | auto grammar = R"( 110 | # Grammar for Calculator... 111 | Additive <- Multiplicative '+' Additive / Multiplicative 112 | Multiplicative <- Primary '*' Multiplicative / Primary 113 | Primary <- '(' Additive ')' / Number 114 | Number <- < [0-9]+ > 115 | %whitespace <- [ \t]* 116 | )"; 117 | 118 | parser parser; 119 | 120 | parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) { 121 | cerr << line << ":" << col << ": " << msg << "\n"; 122 | }); 123 | 124 | auto ok = parser.load_grammar(grammar); 125 | assert(ok); 126 | ``` 127 | 128 | There are four semantic actions available: 129 | 130 | ```cpp 131 | [](const SemanticValues& vs, any& dt) 132 | [](const SemanticValues& vs) 133 | [](SemanticValues& vs, any& dt) 134 | [](SemanticValues& vs) 135 | ``` 136 | 137 | `SemanticValues` value contains the following information: 138 | 139 | * Semantic values 140 | * Matched string information 141 | * Token information if the rule is literal or uses a token boundary operator 142 | * Choice number when the rule is 'prioritized choice' 143 | 144 | `any& dt` is a 'read-write' context data which can be used for whatever purposes. The initial context data is set in `peg::parser::parse` method. 145 | 146 | A semantic action can return a value of arbitrary data type, which will be wrapped by `peg::any`. If a user returns nothing in a semantic action, the first semantic value in the `const SemanticValues& vs` argument will be returned. (Yacc parser has the same behavior.) 147 | 148 | Here shows the `SemanticValues` structure: 149 | 150 | ```cpp 151 | struct SemanticValues : protected std::vector 152 | { 153 | // Input text 154 | const char* path; 155 | const char* ss; 156 | 157 | // Matched string 158 | std::string_view sv() const { return sv_; } 159 | 160 | // Line number and column at which the matched string is 161 | std::pair line_info() const; 162 | 163 | // Tokens 164 | std::vector tokens; 165 | std::string_view token(size_t id = 0) const; 166 | 167 | // Token conversion 168 | std::string token_to_string(size_t id = 0) const; 169 | template T token_to_number() const; 170 | 171 | // Choice number (0 based index) 172 | size_t choice() const; 173 | 174 | // Transform the semantic value vector to another vector 175 | template vector transform(size_t beg = 0, size_t end = -1) const; 176 | } 177 | ``` 178 | 179 | The following example uses `<` ... `>` operator, which is *token boundary* operator. 180 | 181 | ```cpp 182 | peg::parser parser(R"( 183 | ROOT <- _ TOKEN (',' _ TOKEN)* 184 | TOKEN <- < [a-z0-9]+ > _ 185 | _ <- [ \t\r\n]* 186 | )"); 187 | 188 | parser["TOKEN"] = [](const SemanticValues& vs) { 189 | // 'token' doesn't include trailing whitespaces 190 | auto token = vs.token(); 191 | }; 192 | 193 | auto ret = parser.parse(" token1, token2 "); 194 | ``` 195 | 196 | We can ignore unnecessary semantic values from the list by using `~` operator. 197 | 198 | ```cpp 199 | peg::parser parser(R"( 200 | ROOT <- _ ITEM (',' _ ITEM _)* 201 | ITEM <- ([a-z0-9])+ 202 | ~_ <- [ \t]* 203 | )"); 204 | 205 | parser["ROOT"] = [&](const SemanticValues& vs) { 206 | assert(vs.size() == 2); // should be 2 instead of 5. 207 | }; 208 | 209 | auto ret = parser.parse(" item1, item2 "); 210 | ``` 211 | 212 | The following grammar is the same as the above. 213 | 214 | ```cpp 215 | peg::parser parser(R"( 216 | ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* 217 | ITEM <- ([a-z0-9])+ 218 | _ <- [ \t]* 219 | )"); 220 | ``` 221 | 222 | *Semantic predicate* support is available with a *predicate* action. 223 | 224 | ```cpp 225 | peg::parser parser("NUMBER <- [0-9]+"); 226 | 227 | parser["NUMBER"] = [](const SemanticValues &vs) { 228 | return vs.token_to_number(); 229 | }; 230 | 231 | parser["NUMBER"].predicate = [](const SemanticValues &vs, 232 | const std::any & /*dt*/, std::string &msg) { 233 | if (vs.token_to_number() != 100) { 234 | msg = "value error!!"; 235 | return false; 236 | } 237 | return true; 238 | }; 239 | 240 | long val; 241 | auto ret = parser.parse("100", val); 242 | assert(ret == true); 243 | assert(val == 100); 244 | 245 | ret = parser.parse("200", val); 246 | assert(ret == false); 247 | ``` 248 | 249 | *enter* and *leave* actions are also available. 250 | 251 | ```cpp 252 | parser["RULE"].enter = [](const Context &c, const char* s, size_t n, any& dt) { 253 | std::cout << "enter" << std::endl; 254 | }; 255 | 256 | parser["RULE"] = [](const SemanticValues& vs, any& dt) { 257 | std::cout << "action!" << std::endl; 258 | }; 259 | 260 | parser["RULE"].leave = [](const Context &c, const char* s, size_t n, size_t matchlen, any& value, any& dt) { 261 | std::cout << "leave" << std::endl; 262 | }; 263 | ``` 264 | 265 | You can receive error information via a logger: 266 | 267 | ```cpp 268 | parser.set_logger([](size_t line, size_t col, const string& msg) { 269 | ... 270 | }); 271 | 272 | parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) { 273 | ... 274 | }); 275 | ``` 276 | 277 | Ignoring Whitespaces 278 | -------------------- 279 | 280 | As you can see in the first example, we can ignore whitespaces between tokens automatically with `%whitespace` rule. 281 | 282 | `%whitespace` rule can be applied to the following three conditions: 283 | 284 | * trailing spaces on tokens 285 | * leading spaces on text 286 | * trailing spaces on literal strings in rules 287 | 288 | These are valid tokens: 289 | 290 | ``` 291 | KEYWORD <- 'keyword' 292 | KEYWORDI <- 'case_insensitive_keyword' 293 | WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used. 294 | IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used. 295 | ``` 296 | 297 | The following grammar accepts ` one, "two three", four `. 298 | 299 | ``` 300 | ROOT <- ITEM (',' ITEM)* 301 | ITEM <- WORD / PHRASE 302 | WORD <- < [a-z]+ > 303 | PHRASE <- < '"' (!'"' .)* '"' > 304 | 305 | %whitespace <- [ \t\r\n]* 306 | ``` 307 | 308 | Word expression 309 | --------------- 310 | 311 | ```cpp 312 | peg::parser parser(R"( 313 | ROOT <- 'hello' 'world' 314 | %whitespace <- [ \t\r\n]* 315 | %word <- [a-z]+ 316 | )"); 317 | 318 | parser.parse("hello world"); // OK 319 | parser.parse("helloworld"); // NG 320 | ``` 321 | 322 | Capture/Backreference 323 | --------------------- 324 | 325 | ```cpp 326 | peg::parser parser(R"( 327 | ROOT <- CONTENT 328 | CONTENT <- (ELEMENT / TEXT)* 329 | ELEMENT <- $(STAG CONTENT ETAG) 330 | STAG <- '<' $tag< TAG_NAME > '>' 331 | ETAG <- '' 332 | TAG_NAME <- 'b' / 'u' 333 | TEXT <- TEXT_DATA 334 | TEXT_DATA <- ![<] . 335 | )"); 336 | 337 | parser.parse("This is a test text."); // OK 338 | parser.parse("This is a test text."); // NG 339 | parser.parse("This is a test text."); // NG 340 | ``` 341 | 342 | Dictionary 343 | ---------- 344 | 345 | `|` operator allows us to make a word dictionary for fast lookup by using Trie structure internally. We don't have to worry about the order of words. 346 | 347 | ```peg 348 | START <- 'This month is ' MONTH '.' 349 | MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...' 350 | ``` 351 | 352 | We are able to find which item is matched with `choice()`. 353 | 354 | ```cpp 355 | parser["MONTH"] = [](const SemanticValues &vs) { 356 | auto id = vs.choice(); 357 | }; 358 | ``` 359 | 360 | It supports the case-insensitive mode. 361 | 362 | ```peg 363 | START <- 'This month is ' MONTH '.' 364 | MONTH <- 'Jan'i | 'January'i | 'Feb'i | 'February'i | '...'i 365 | ``` 366 | 367 | Cut operator 368 | ------------ 369 | 370 | `↑` operator could mitigate the backtrack performance problem, but has a risk to change the meaning of grammar. 371 | 372 | ```peg 373 | S <- '(' ↑ P ')' / '"' ↑ P '"' / P 374 | P <- 'a' / 'b' / 'c' 375 | ``` 376 | 377 | When we parse `(z` with the above grammar, we don't have to backtrack in `S` after `(` is matched, because a cut operator is inserted there. 378 | 379 | Parameterized Rule or Macro 380 | --------------------------- 381 | 382 | ```peg 383 | # Syntax 384 | Start ← _ Expr 385 | Expr ← Sum 386 | Sum ← List(Product, SumOpe) 387 | Product ← List(Value, ProOpe) 388 | Value ← Number / T('(') Expr T(')') 389 | 390 | # Token 391 | SumOpe ← T('+' / '-') 392 | ProOpe ← T('*' / '/') 393 | Number ← T([0-9]+) 394 | ~_ ← [ \t\r\n]* 395 | 396 | # Macro 397 | List(I, D) ← I (D I)* 398 | T(x) ← < x > _ 399 | ``` 400 | 401 | Parsing infix expression by Precedence climbing 402 | ----------------------------------------------- 403 | 404 | Regarding the *precedence climbing algorithm*, please see [this article](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing). 405 | 406 | ```cpp 407 | parser parser(R"( 408 | EXPRESSION <- INFIX_EXPRESSION(ATOM, OPERATOR) 409 | ATOM <- NUMBER / '(' EXPRESSION ')' 410 | OPERATOR <- < [-+/*] > 411 | NUMBER <- < '-'? [0-9]+ > 412 | %whitespace <- [ \t]* 413 | 414 | # Declare order of precedence 415 | INFIX_EXPRESSION(A, O) <- A (O A)* { 416 | precedence 417 | L + - 418 | L * / 419 | } 420 | )"); 421 | 422 | parser["INFIX_EXPRESSION"] = [](const SemanticValues& vs) -> long { 423 | auto result = any_cast(vs[0]); 424 | if (vs.size() > 1) { 425 | auto ope = any_cast(vs[1]); 426 | auto num = any_cast(vs[2]); 427 | switch (ope) { 428 | case '+': result += num; break; 429 | case '-': result -= num; break; 430 | case '*': result *= num; break; 431 | case '/': result /= num; break; 432 | } 433 | } 434 | return result; 435 | }; 436 | parser["OPERATOR"] = [](const SemanticValues& vs) { return *vs.sv(); }; 437 | parser["NUMBER"] = [](const SemanticValues& vs) { return vs.token_to_number(); }; 438 | 439 | long val; 440 | parser.parse(" -1 + (1 + 2) * 3 - -1", val); 441 | assert(val == 9); 442 | ``` 443 | 444 | *precedence* instruction can be applied only to the following 'list' style rule. 445 | 446 | ``` 447 | Rule <- Atom (Operator Atom)* { 448 | precedence 449 | L - + 450 | L / * 451 | R ^ 452 | } 453 | ``` 454 | 455 | *precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator *literal* tokens follow. The first entry has the highest order level. 456 | 457 | AST generation 458 | -------------- 459 | 460 | *cpp-peglib* is able to generate an AST (Abstract Syntax Tree) when parsing. `enable_ast` method on `peg::parser` class enables the feature. 461 | 462 | NOTE: An AST node holds a corresponding token as `std::string_vew` for performance and less memory usage. It is users' responsibility to keep the original source text along with the generated AST tree. 463 | 464 | ``` 465 | peg::parser parser(R"( 466 | ... 467 | definition1 <- ... { no_ast_opt } 468 | definition2 <- ... { no_ast_opt } 469 | ... 470 | )"); 471 | 472 | parser.enable_ast(); 473 | 474 | shared_ptr ast; 475 | if (parser.parse("...", ast)) { 476 | cout << peg::ast_to_s(ast); 477 | 478 | ast = parser.optimize_ast(ast); 479 | cout << peg::ast_to_s(ast); 480 | } 481 | ``` 482 | 483 | `optimize_ast` removes redundant nodes to make an AST simpler. If you want to disable this behavior from particular rules, `no_ast_opt` instruction can be used. 484 | 485 | It internally calls `peg::AstOptimizer` to do the job. You can make your own AST optimizers to fit your needs. 486 | 487 | See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc). 488 | 489 | Make a parser with parser combinators 490 | ------------------------------------- 491 | 492 | Instead of making a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser combinators*. Here is an example: 493 | 494 | ```cpp 495 | using namespace peg; 496 | using namespace std; 497 | 498 | vector tags; 499 | 500 | Definition ROOT, TAG_NAME, _; 501 | ROOT <= seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _))); 502 | TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const SemanticValues& vs) { 503 | tags.push_back(vs.token_to_string()); 504 | }; 505 | _ <= zom(cls(" \t")); 506 | 507 | auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); 508 | ``` 509 | 510 | The following are available operators: 511 | 512 | | Operator | Description | Operator | Description | 513 | |:---------|:--------------------------------|:---------|:--------------------| 514 | | seq | Sequence | cho | Prioritized Choice | 515 | | zom | Zero or More | oom | One or More | 516 | | opt | Optional | apd | And predicate | 517 | | npd | Not predicate | lit | Literal string | 518 | | liti | Case-insensitive Literal string | cls | Character class | 519 | | ncls | Negated Character class | chr | Character | 520 | | dot | Any character | tok | Token boundary | 521 | | ign | Ignore semantic value | csc | Capture scope | 522 | | cap | Capture | bkr | Back reference | 523 | | dic | Dictionary | pre | Infix expression | 524 | | rec | Infix expression | usr | User defined parser | 525 | | rep | Repetition | | | 526 | 527 | Adjust definitions 528 | ------------------ 529 | 530 | It's possible to add/override definitions. 531 | 532 | ```cpp 533 | auto syntax = R"( 534 | ROOT <- _ 'Hello' _ NAME '!' _ 535 | )"; 536 | 537 | Rules additional_rules = { 538 | { 539 | "NAME", usr([](const char* s, size_t n, SemanticValues& vs, any& dt) -> size_t { 540 | static vector names = { "PEG", "BNF" }; 541 | for (const auto& name: names) { 542 | if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { 543 | return name.size(); // processed length 544 | } 545 | } 546 | return -1; // parse error 547 | }) 548 | }, 549 | { 550 | "~_", zom(cls(" \t\r\n")) 551 | } 552 | }; 553 | 554 | auto g = parser(syntax, additional_rules); 555 | 556 | assert(g.parse(" Hello BNF! ")); 557 | ``` 558 | 559 | Unicode support 560 | --------------- 561 | 562 | cpp-peglib accepts UTF8 text. `.` matches a Unicode codepoint. Also, it supports `\u????`. 563 | 564 | Error report and recovery 565 | ------------------------- 566 | 567 | cpp-peglib supports the furthest failure error position report as described in the Bryan Ford original document. 568 | 569 | For better error report and recovery, cpp-peglib supports 'recovery' operator with label which can be associated with a recovery expression and a custom error message. This idea comes from the fantastic ["Syntax Error Recovery in Parsing Expression Grammars"](https://arxiv.org/pdf/1806.11150.pdf) paper by Sergio Medeiros and Fabio Mascarenhas. 570 | 571 | The custom message supports `%t` which is a placeholder for the unexpected token, and `%c` for the unexpected Unicode char. 572 | 573 | Here is an example of Java-like grammar: 574 | 575 | ```peg 576 | # java.peg 577 | Prog ← 'public' 'class' NAME '{' 'public' 'static' 'void' 'main' '(' 'String' '[' ']' NAME ')' BlockStmt '}' 578 | BlockStmt ← '{' (!'}' Stmt^stmtb)* '}' # Annotated with `stmtb` 579 | Stmt ← IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt 580 | IfStmt ← 'if' '(' Exp ')' Stmt ('else' Stmt)? 581 | WhileStmt ← 'while' '(' Exp^condw ')' Stmt # Annotated with `condw` 582 | DecStmt ← 'int' NAME ('=' Exp)? ';' 583 | AssignStmt ← NAME '=' Exp ';'^semia # Annotated with `semi` 584 | PrintStmt ← 'System.out.println' '(' Exp ')' ';' 585 | Exp ← RelExp ('==' RelExp)* 586 | RelExp ← AddExp ('<' AddExp)* 587 | AddExp ← MulExp (('+' / '-') MulExp)* 588 | MulExp ← AtomExp (('*' / '/') AtomExp)* 589 | AtomExp ← '(' Exp ')' / NUMBER / NAME 590 | 591 | NUMBER ← < [0-9]+ > 592 | NAME ← < [a-zA-Z_][a-zA-Z_0-9]* > 593 | 594 | %whitespace ← [ \t\n]* 595 | %word ← NAME 596 | 597 | # Recovery operator labels 598 | semia ← '' { error_message "missing semicolon in assignment." } 599 | stmtb ← (!(Stmt / 'else' / '}') .)* { error_message "invalid statement" } 600 | condw ← &'==' ('==' RelExp)* / &'<' ('<' AddExp)* / (!')' .)* 601 | ``` 602 | 603 | For instance, `';'^semi` is a syntactic sugar for `(';' / %recovery(semi))`. `%recover` operator tries to recover the error at ';' by skipping input text with the recovery expression `semi`. Also `semi` is associated with a custom message "missing semicolon in assignment." 604 | 605 | Here is the result: 606 | 607 | ```java 608 | > cat sample.java 609 | public class Example { 610 | public static void main(String[] args) { 611 | int n = 5; 612 | int f = 1; 613 | while( < n) { 614 | f = f * n; 615 | n = n - 1 616 | }; 617 | System.out.println(f); 618 | } 619 | } 620 | 621 | > peglint java.peg sample.java 622 | sample.java:5:12: syntax error, unexpected '<', expecting '(', , . 623 | sample.java:8:5: missing semicolon in assignment. 624 | sample.java:8:6: invalid statement 625 | ``` 626 | 627 | As you can see, it can now show more than one error, and provide more meaningful error messages than the default messages. 628 | 629 | ### Custom error message for definitions 630 | 631 | We can associate custom error messages to definitions. 632 | 633 | ```peg 634 | # custom_message.peg 635 | START <- CODE (',' CODE)* 636 | CODE <- < '0x' [a-fA-F0-9]+ > { error_message 'code format error...' } 637 | %whitespace <- [ \t]* 638 | ``` 639 | 640 | ``` 641 | > cat custom_message.txt 642 | 0x1234,0x@@@@,0xABCD 643 | 644 | > peglint custom_message.peg custom_message.txt 645 | custom_message.txt:1:8: code format error... 646 | ``` 647 | 648 | NOTE: If there is more than one element with an error message instruction in a prioritized choice, this feature may not work as you expect. 649 | 650 | Change the Start Definition Rule 651 | -------------------------------- 652 | 653 | We can change the start definition rule as below. 654 | 655 | ```cpp 656 | auto grammar = R"( 657 | Start <- A 658 | A <- B (',' B)* 659 | B <- '[one]' / '[two]' 660 | %whitespace <- [ \t\n]* 661 | )"; 662 | 663 | peg::parser parser(grammar, "A"); // Start Rule is "A" 664 | 665 | or 666 | 667 | peg::parser parser; 668 | parser.load_grammar(grammar, "A"); // Start Rule is "A" 669 | 670 | parser.parse(" [one] , [two] "); // OK 671 | ``` 672 | 673 | peglint - PEG syntax lint utility 674 | --------------------------------- 675 | 676 | ### Build peglint 677 | 678 | ``` 679 | > cd lint 680 | > mkdir build 681 | > cd build 682 | > cmake .. 683 | > make 684 | > ./peglint 685 | usage: grammar_file_path [source_file_path] 686 | 687 | options: 688 | --source: source text 689 | --packrat: enable packrat memoise 690 | --ast: show AST tree 691 | --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction 692 | --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction 693 | --trace: show concise trace messages 694 | --profile: show profile report 695 | --verbose: verbose output for trace and profile 696 | ``` 697 | 698 | ### Grammar check 699 | 700 | ``` 701 | > cat a.peg 702 | Additive <- Multiplicative '+' Additive / Multiplicative 703 | Multiplicative <- Primary '*' Multiplicative / Primary 704 | Primary <- '(' Additive ')' / Number 705 | %whitespace <- [ \t\r\n]* 706 | 707 | > peglint a.peg 708 | [commandline]:3:35: 'Number' is not defined. 709 | ``` 710 | 711 | ### Source check 712 | 713 | ``` 714 | > cat a.peg 715 | Additive <- Multiplicative '+' Additive / Multiplicative 716 | Multiplicative <- Primary '*' Multiplicative / Primary 717 | Primary <- '(' Additive ')' / Number 718 | Number <- < [0-9]+ > 719 | %whitespace <- [ \t\r\n]* 720 | 721 | > peglint --source "1 + a * 3" a.peg 722 | [commandline]:1:3: syntax error 723 | ``` 724 | 725 | ### AST 726 | 727 | ``` 728 | > cat a.txt 729 | 1 + 2 * 3 730 | 731 | > peglint --ast a.peg a.txt 732 | + Additive 733 | + Multiplicative 734 | + Primary 735 | - Number (1) 736 | + Additive 737 | + Multiplicative 738 | + Primary 739 | - Number (2) 740 | + Multiplicative 741 | + Primary 742 | - Number (3) 743 | ``` 744 | 745 | ### AST optimization 746 | 747 | ``` 748 | > peglint --ast --opt --source "1 + 2 * 3" a.peg 749 | + Additive 750 | - Multiplicative[Number] (1) 751 | + Additive[Multiplicative] 752 | - Primary[Number] (2) 753 | - Multiplicative[Number] (3) 754 | ``` 755 | 756 | ### Adjust AST optimization with `no_ast_opt` instruction 757 | 758 | ``` 759 | > cat a.peg 760 | Additive <- Multiplicative '+' Additive / Multiplicative 761 | Multiplicative <- Primary '*' Multiplicative / Primary 762 | Primary <- '(' Additive ')' / Number { no_ast_opt } 763 | Number <- < [0-9]+ > 764 | %whitespace <- [ \t\r\n]* 765 | 766 | > peglint --ast --opt --source "1 + 2 * 3" a.peg 767 | + Additive/0 768 | + Multiplicative/1[Primary] 769 | - Number (1) 770 | + Additive/1[Multiplicative] 771 | + Primary/1 772 | - Number (2) 773 | + Multiplicative/1[Primary] 774 | - Number (3) 775 | 776 | > peglint --ast --opt-only --source "1 + 2 * 3" a.peg 777 | + Additive/0 778 | + Multiplicative/1 779 | - Primary/1[Number] (1) 780 | + Additive/1 781 | + Multiplicative/0 782 | - Primary/1[Number] (2) 783 | + Multiplicative/1 784 | - Primary/1[Number] (3) 785 | ``` 786 | 787 | Sample codes 788 | ------------ 789 | 790 | * [Calculator](https://github.com/yhirose/cpp-peglib/blob/master/example/calc.cc) 791 | * [Calculator (with parser operators)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc2.cc) 792 | * [Calculator (AST version)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) 793 | * [Calculator (parsing expressions by precedence climbing)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc4.cc) 794 | * [Calculator (AST version and parsing expressions by precedence climbing)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc5.cc) 795 | * [A tiny PL/0 JIT compiler in less than 900 LOC with LLVM and PEG parser](https://github.com/yhirose/pl0-jit-compiler) 796 | * [A Programming Language just for writing Fizz Buzz program. :)](https://github.com/yhirose/fizzbuzzlang) 797 | 798 | License 799 | ------- 800 | 801 | MIT license (© 2022 Yuji Hirose) 802 | -------------------------------------------------------------------------------- /docs/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | source ~/Projects/emsdk/emsdk_env.sh 3 | emcc -std=c++17 -O3 --bind -o native.js -s ALLOW_MEMORY_GROWTH native.cpp 4 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | PEG Playground 5 | 6 | 7 | 8 |
9 |
10 |
    11 |
  • 12 |
  • Grammar
  • 13 |
14 |
{{syntax}}
15 |
16 |
17 |
18 |
    19 |
  • 20 |
  • Source Code
  • 21 |
  • 22 |
      23 |
    • 24 |
    • 25 |
    • 26 |
    • 27 |
    28 |
  • 29 |
30 |
{{source}}
31 |
AST
32 |

33 |     
Optimized AST     34 | mode:  35 |
36 |

37 |     
Profile
38 |
39 | 40 |
41 |
42 |
43 |
44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /docs/index.js: -------------------------------------------------------------------------------- 1 | // Setup editors 2 | function setupInfoArea(id) { 3 | const e = ace.edit(id); 4 | e.setShowPrintMargin(false); 5 | e.setOptions({ 6 | readOnly: true, 7 | highlightActiveLine: false, 8 | highlightGutterLine: false 9 | }) 10 | e.renderer.$cursorLayer.element.style.opacity=0; 11 | return e; 12 | } 13 | 14 | function setupEditorArea(id, lsKey) { 15 | const e = ace.edit(id); 16 | e.setShowPrintMargin(false); 17 | e.setValue(localStorage.getItem(lsKey) || ''); 18 | e.moveCursorTo(0, 0); 19 | return e; 20 | } 21 | 22 | const grammar = setupEditorArea("grammar-editor", "grammarText"); 23 | const code = setupEditorArea("code-editor", "codeText"); 24 | 25 | const codeAst = setupInfoArea("code-ast"); 26 | const codeAstOptimized = setupInfoArea("code-ast-optimized"); 27 | const codeProfile = setupInfoArea("code-profile"); 28 | 29 | $('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all'); 30 | $('#start-rule').val(localStorage.getItem('startRule') || ''); 31 | $('#packrat').prop('checked', localStorage.getItem('packrat') === 'true'); 32 | $('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true'); 33 | $('#parse').prop('disabled', $('#auto-refresh').prop('checked')); 34 | 35 | // Parse 36 | function escapeHtml(unsafe) { 37 | return unsafe 38 | .replace(/&/g, "&") 39 | .replace(//g, ">") 41 | .replace(/"/g, """) 42 | .replace(/'/g, "'"); 43 | } 44 | 45 | function generateErrorListHTML(errors) { 46 | let html = '
    '; 47 | 48 | html += $.map(errors, function (x) { 49 | if (x.gln && x.gcol) { 50 | return `
  • ${x.ln}:${x.col} ${escapeHtml(x.msg)}
  • `; 51 | } else { 52 | return `
  • ${x.ln}:${x.col} ${escapeHtml(x.msg)}
  • `; 53 | } 54 | }).join(''); 55 | 56 | html += '
      '; 57 | 58 | return html; 59 | } 60 | 61 | function updateLocalStorage() { 62 | localStorage.setItem('grammarText', grammar.getValue()); 63 | localStorage.setItem('codeText', code.getValue()); 64 | localStorage.setItem('optimizationMode', $('#opt-mode').val()); 65 | localStorage.setItem('startRule', $('#start-rule').val()); 66 | localStorage.setItem('packrat', $('#packrat').prop('checked')); 67 | localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked')); 68 | } 69 | 70 | function parse() { 71 | const $grammarValidation = $('#grammar-validation'); 72 | const $grammarInfo = $('#grammar-info'); 73 | const grammarText = grammar.getValue(); 74 | 75 | const $codeValidation = $('#code-validation'); 76 | const $codeInfo = $('#code-info'); 77 | const codeText = code.getValue(); 78 | 79 | const optimizationMode = $('#opt-mode').val(); 80 | const startRule = $('#start-rule').val(); 81 | const packrat = $('#packrat').prop('checked'); 82 | 83 | $grammarInfo.html(''); 84 | $grammarValidation.hide(); 85 | $codeInfo.html(''); 86 | $codeValidation.hide(); 87 | codeAst.setValue(''); 88 | codeAstOptimized.setValue(''); 89 | codeProfile.setValue(''); 90 | 91 | if (grammarText.length === 0) { 92 | return; 93 | } 94 | 95 | const mode = optimizationMode == 'all'; 96 | 97 | $('#overlay').css({ 98 | 'z-index': '1', 99 | 'display': 'block', 100 | 'background-color': 'rgba(0, 0, 0, 0.1)' 101 | }); 102 | window.setTimeout(() => { 103 | const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat, startRule)); 104 | $('#overlay').css({ 105 | 'z-index': '-1', 106 | 'display': 'none', 107 | 'background-color': 'rgba(1, 1, 1, 1.0)' 108 | }); 109 | 110 | if (data.grammar_valid) { 111 | $grammarValidation.removeClass('validation-invalid').show(); 112 | 113 | codeAst.insert(data.ast); 114 | codeAstOptimized.insert(data.astOptimized); 115 | codeProfile.insert(data.profile); 116 | 117 | if (data.source_valid) { 118 | $codeValidation.removeClass('validation-invalid').show(); 119 | } else { 120 | $codeValidation.addClass('validation-invalid').show(); 121 | } 122 | 123 | if (data.code.length > 0) { 124 | const html = generateErrorListHTML(data.code); 125 | $codeInfo.html(html); 126 | } 127 | } else { 128 | $grammarValidation.addClass('validation-invalid').show(); 129 | } 130 | 131 | if (data.grammar.length > 0) { 132 | const html = generateErrorListHTML(data.grammar); 133 | $grammarInfo.html(html); 134 | } 135 | }, 0); 136 | } 137 | 138 | // Event handing for text editing 139 | let timer; 140 | function setupTimer() { 141 | clearTimeout(timer); 142 | timer = setTimeout(() => { 143 | updateLocalStorage(); 144 | if ($('#auto-refresh').prop('checked')) { 145 | parse(); 146 | } 147 | }, 750); 148 | }; 149 | grammar.getSession().on('change', setupTimer); 150 | code.getSession().on('change', setupTimer); 151 | 152 | // Event handing in the info area 153 | function makeOnClickInInfo(editor) { 154 | return function () { 155 | const el = $(this); 156 | editor.navigateTo(el.data('ln') - 1, el.data('col') - 1); 157 | editor.scrollToLine(el.data('ln') - 1, true, false, null); 158 | editor.focus(); 159 | 160 | if(el.data('gln') && el.data('gcol')) { 161 | grammar.navigateTo(el.data('gln') - 1, el.data('gcol') - 1); 162 | grammar.scrollToLine(el.data('gln') - 1, true, false, null); 163 | } 164 | } 165 | }; 166 | $('#grammar-info').on('click', 'li', makeOnClickInInfo(grammar)); 167 | $('#code-info').on('click', 'li', makeOnClickInInfo(code)); 168 | 169 | // Event handing in the AST optimization 170 | $('#opt-mode').on('change', setupTimer); 171 | $('#start-rule').on('keydown', setupTimer); 172 | $('#packrat').on('change', setupTimer); 173 | $('#auto-refresh').on('change', () => { 174 | updateLocalStorage(); 175 | $('#parse').prop('disabled', $('#auto-refresh').prop('checked')); 176 | setupTimer(); 177 | }); 178 | $('#parse').on('click', parse); 179 | 180 | // Resize editors to fit their parents 181 | function resizeEditorsToParent() { 182 | code.resize(); 183 | code.renderer.updateFull(); 184 | codeAst.resize(); 185 | codeAst.renderer.updateFull(); 186 | codeAstOptimized.resize(); 187 | codeAstOptimized.renderer.updateFull(); 188 | codeProfile.resize(); 189 | codeProfile.renderer.updateFull(); 190 | } 191 | 192 | // Show windows 193 | function setupToolWindow(lsKeyName, buttonSel, codeSel) { 194 | let show = localStorage.getItem(lsKeyName) === 'true'; 195 | $(buttonSel).prop('checked', show); 196 | $(codeSel).css({ 'display': show ? 'block' : 'none' }); 197 | 198 | $(buttonSel).on('change', () => { 199 | show = !show; 200 | localStorage.setItem(lsKeyName, show); 201 | $(codeSel).css({ 'display': show ? 'block' : 'none' }); 202 | resizeEditorsToParent(); 203 | }); 204 | } 205 | setupToolWindow('show-ast', '#show-ast', '#code-ast'); 206 | setupToolWindow('show-ast-optimized', '#show-ast-optimized', '#code-ast-optimized'); 207 | setupToolWindow('show-profile', '#show-profile', '#code-profile'); 208 | 209 | // Show page 210 | $('#main').css({ 211 | 'display': 'flex', 212 | }); 213 | 214 | // WebAssembly 215 | var Module = { 216 | onRuntimeInitialized: function() { 217 | // Initial parse 218 | if ($('#auto-refresh').prop('checked')) { 219 | parse(); 220 | } 221 | } 222 | }; 223 | -------------------------------------------------------------------------------- /docs/native.cpp: -------------------------------------------------------------------------------- 1 | #include "../peglib.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784 9 | std::string escape_json(const std::string &s) { 10 | std::ostringstream o; 11 | for (auto c : s) { 12 | if (c == '"' || c == '\\' || ('\x00' <= c && c <= '\x1f')) { 13 | o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << (int)c; 14 | } else { 15 | o << c; 16 | } 17 | } 18 | return o.str(); 19 | } 20 | 21 | std::function 22 | makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) { 23 | init = true; 24 | return [&](size_t ln, size_t col, const std::string &msg, 25 | const std::string &rule) mutable { 26 | if (!init) { json += ","; } 27 | json += "{"; 28 | json += R"("ln":)" + std::to_string(ln) + ","; 29 | json += R"("col":)" + std::to_string(col) + ","; 30 | json += R"("msg":")" + escape_json(msg) + R"(")"; 31 | if (!rule.empty()) { 32 | auto it = peg.get_grammar().find(rule); 33 | if (it != peg.get_grammar().end()) { 34 | auto [gln, gcol] = it->second.line_; 35 | json += ","; 36 | json += R"("gln":)" + std::to_string(gln) + ","; 37 | json += R"("gcol":)" + std::to_string(gcol); 38 | } 39 | } 40 | json += "}"; 41 | 42 | init = false; 43 | }; 44 | } 45 | 46 | bool parse_grammar(const std::string &text, peg::parser &peg, 47 | const std::string &startRule, std::string &json) { 48 | bool init; 49 | peg.set_logger(makeJSONFormatter(peg, json, init)); 50 | json += "["; 51 | auto ret = peg.load_grammar(text.data(), text.size(), startRule); 52 | json += "]"; 53 | return ret; 54 | } 55 | 56 | bool parse_code(const std::string &text, peg::parser &peg, std::string &json, 57 | std::shared_ptr &ast) { 58 | peg.enable_ast(); 59 | bool init; 60 | peg.set_logger(makeJSONFormatter(peg, json, init)); 61 | json += "["; 62 | auto ret = peg.parse_n(text.data(), text.size(), ast); 63 | json += "]"; 64 | return ret; 65 | } 66 | 67 | std::string lint(const std::string &grammarText, const std::string &codeText, 68 | bool opt_mode, bool packrat, const std::string &startRule) { 69 | std::string grammarResult; 70 | std::string codeResult; 71 | std::string astResult; 72 | std::string astResultOptimized; 73 | std::string profileResult; 74 | 75 | peg::parser peg; 76 | auto is_grammar_valid = 77 | parse_grammar(grammarText, peg, startRule, grammarResult); 78 | auto is_source_valid = false; 79 | 80 | if (is_grammar_valid && peg) { 81 | std::stringstream ss; 82 | peg::enable_profiling(peg, ss); 83 | 84 | if (packrat) { peg.enable_packrat_parsing(); } 85 | 86 | std::shared_ptr ast; 87 | is_source_valid = parse_code(codeText, peg, codeResult, ast); 88 | 89 | profileResult = escape_json(ss.str()); 90 | 91 | if (ast) { 92 | astResult = escape_json(peg::ast_to_s(ast)); 93 | astResultOptimized = 94 | escape_json(peg::ast_to_s(peg.optimize_ast(ast, opt_mode))); 95 | } 96 | } 97 | 98 | std::string json; 99 | json += "{"; 100 | json += 101 | std::string("\"grammar_valid\":") + (is_grammar_valid ? "true" : "false"); 102 | json += ",\"grammar\":" + grammarResult; 103 | json += 104 | std::string(",\"source_valid\":") + (is_source_valid ? "true" : "false"); 105 | if (!codeResult.empty()) { 106 | json += ",\"code\":" + codeResult; 107 | json += ",\"ast\":\"" + astResult + "\""; 108 | json += ",\"astOptimized\":\"" + astResultOptimized + "\""; 109 | json += ",\"profile\":\"" + profileResult + "\""; 110 | } 111 | json += "}"; 112 | 113 | return json; 114 | } 115 | 116 | EMSCRIPTEN_BINDINGS(native) { emscripten::function("lint", &lint); } 117 | -------------------------------------------------------------------------------- /docs/native.js: -------------------------------------------------------------------------------- 1 | var Module=typeof Module!="undefined"?Module:{};var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var read_,readAsync,readBinary,setWindowTitle;function logExceptionOnExit(e){if(e instanceof ExitStatus)return;let toLog=e;err("exiting due to exception: "+toLog)}var fs;var nodePath;var requireNodeFS;if(ENVIRONMENT_IS_NODE){if(ENVIRONMENT_IS_WORKER){scriptDirectory=require("path").dirname(scriptDirectory)+"/"}else{scriptDirectory=__dirname+"/"}requireNodeFS=(()=>{if(!nodePath){fs=require("fs");nodePath=require("path")}});read_=function shell_read(filename,binary){requireNodeFS();filename=nodePath["normalize"](filename);return fs.readFileSync(filename,binary?undefined:"utf8")};readBinary=(filename=>{var ret=read_(filename,true);if(!ret.buffer){ret=new Uint8Array(ret)}return ret});readAsync=((filename,onload,onerror)=>{requireNodeFS();filename=nodePath["normalize"](filename);fs.readFile(filename,function(err,data){if(err)onerror(err);else onload(data.buffer)})});if(process["argv"].length>1){thisProgram=process["argv"][1].replace(/\\/g,"/")}arguments_=process["argv"].slice(2);if(typeof module!="undefined"){module["exports"]=Module}process["on"]("uncaughtException",function(ex){if(!(ex instanceof ExitStatus)){throw ex}});process["on"]("unhandledRejection",function(reason){throw reason});quit_=((status,toThrow)=>{if(keepRuntimeAlive()){process["exitCode"]=status;throw toThrow}logExceptionOnExit(toThrow);process["exit"](status)});Module["inspect"]=function(){return"[Emscripten Module object]"}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(scriptDirectory.indexOf("blob:")!==0){scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}else{scriptDirectory=""}{read_=(url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.send(null);return xhr.responseText});if(ENVIRONMENT_IS_WORKER){readBinary=(url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)})}readAsync=((url,onload,onerror)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=(()=>{if(xhr.status==200||xhr.status==0&&xhr.response){onload(xhr.response);return}onerror()});xhr.onerror=onerror;xhr.send(null)})}setWindowTitle=(title=>document.title=title)}else{}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.warn.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["quit"])quit_=Module["quit"];var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];var noExitRuntime=Module["noExitRuntime"]||true;if(typeof WebAssembly!="object"){abort("no native wasm support detected")}var wasmMemory;var ABORT=false;var EXITSTATUS;var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):undefined;function UTF8ArrayToString(heapOrArray,idx,maxBytesToRead){var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}else{var str="";while(idx>10,56320|ch&1023)}}}return str}function UTF8ToString(ptr,maxBytesToRead){return ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead):""}function stringToUTF8Array(str,heap,outIdx,maxBytesToWrite){if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}}heap[outIdx]=0;return outIdx-startIdx}function stringToUTF8(str,outPtr,maxBytesToWrite){return stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite)}function lengthBytesUTF8(str){var len=0;for(var i=0;i=55296&&u<=57343)u=65536+((u&1023)<<10)|str.charCodeAt(++i)&1023;if(u<=127)++len;else if(u<=2047)len+=2;else if(u<=65535)len+=3;else len+=4}return len}var UTF16Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf-16le"):undefined;function UTF16ToString(ptr,maxBytesToRead){var endPtr=ptr;var idx=endPtr>>1;var maxIdx=idx+maxBytesToRead/2;while(!(idx>=maxIdx)&&HEAPU16[idx])++idx;endPtr=idx<<1;if(endPtr-ptr>32&&UTF16Decoder){return UTF16Decoder.decode(HEAPU8.subarray(ptr,endPtr))}else{var str="";for(var i=0;!(i>=maxBytesToRead/2);++i){var codeUnit=HEAP16[ptr+i*2>>1];if(codeUnit==0)break;str+=String.fromCharCode(codeUnit)}return str}}function stringToUTF16(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr}function lengthBytesUTF16(str){return str.length*2}function UTF32ToString(ptr,maxBytesToRead){var i=0;var str="";while(!(i>=maxBytesToRead/4)){var utf32=HEAP32[ptr+i*4>>2];if(utf32==0)break;++i;if(utf32>=65536){var ch=utf32-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}else{str+=String.fromCharCode(utf32)}}return str}function stringToUTF32(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i=55296&&codeUnit<=57343){var trailSurrogate=str.charCodeAt(++i);codeUnit=65536+((codeUnit&1023)<<10)|trailSurrogate&1023}HEAP32[outPtr>>2]=codeUnit;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr}function lengthBytesUTF32(str){var len=0;for(var i=0;i=55296&&codeUnit<=57343)++i;len+=4}return len}function writeArrayToMemory(array,buffer){HEAP8.set(array,buffer)}function writeAsciiToMemory(str,buffer,dontAddNull){for(var i=0;i>0]=str.charCodeAt(i)}if(!dontAddNull)HEAP8[buffer>>0]=0}var buffer,HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateGlobalBufferAndViews(buf){buffer=buf;Module["HEAP8"]=HEAP8=new Int8Array(buf);Module["HEAP16"]=HEAP16=new Int16Array(buf);Module["HEAP32"]=HEAP32=new Int32Array(buf);Module["HEAPU8"]=HEAPU8=new Uint8Array(buf);Module["HEAPU16"]=HEAPU16=new Uint16Array(buf);Module["HEAPU32"]=HEAPU32=new Uint32Array(buf);Module["HEAPF32"]=HEAPF32=new Float32Array(buf);Module["HEAPF64"]=HEAPF64=new Float64Array(buf)}var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||16777216;var wasmTable;var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function keepRuntimeAlive(){return noExitRuntime}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){runtimeInitialized=true;callRuntimeCallbacks(__ATINIT__)}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;function addRunDependency(id){runDependencies++;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}}function removeRunDependency(id){runDependencies--;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){{if(Module["onAbort"]){Module["onAbort"](what)}}what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);throw e}var dataURIPrefix="data:application/octet-stream;base64,";function isDataURI(filename){return filename.startsWith(dataURIPrefix)}function isFileURI(filename){return filename.startsWith("file://")}var wasmBinaryFile;wasmBinaryFile="native.wasm";if(!isDataURI(wasmBinaryFile)){wasmBinaryFile=locateFile(wasmBinaryFile)}function getBinary(file){try{if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}else{throw"both async and sync fetching of the wasm failed"}}catch(err){abort(err)}}function getBinaryPromise(){if(!wasmBinary&&(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER)){if(typeof fetch=="function"&&!isFileURI(wasmBinaryFile)){return fetch(wasmBinaryFile,{credentials:"same-origin"}).then(function(response){if(!response["ok"]){throw"failed to load wasm binary file at '"+wasmBinaryFile+"'"}return response["arrayBuffer"]()}).catch(function(){return getBinary(wasmBinaryFile)})}else{if(readAsync){return new Promise(function(resolve,reject){readAsync(wasmBinaryFile,function(response){resolve(new Uint8Array(response))},reject)})}}}return Promise.resolve().then(function(){return getBinary(wasmBinaryFile)})}function createWasm(){var info={"a":asmLibraryArg};function receiveInstance(instance,module){var exports=instance.exports;Module["asm"]=exports;wasmMemory=Module["asm"]["v"];updateGlobalBufferAndViews(wasmMemory.buffer);wasmTable=Module["asm"]["y"];addOnInit(Module["asm"]["w"]);removeRunDependency("wasm-instantiate")}addRunDependency("wasm-instantiate");function receiveInstantiationResult(result){receiveInstance(result["instance"])}function instantiateArrayBuffer(receiver){return getBinaryPromise().then(function(binary){return WebAssembly.instantiate(binary,info)}).then(function(instance){return instance}).then(receiver,function(reason){err("failed to asynchronously prepare wasm: "+reason);abort(reason)})}function instantiateAsync(){if(!wasmBinary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(wasmBinaryFile)&&!isFileURI(wasmBinaryFile)&&typeof fetch=="function"){return fetch(wasmBinaryFile,{credentials:"same-origin"}).then(function(response){var result=WebAssembly.instantiateStreaming(response,info);return result.then(receiveInstantiationResult,function(reason){err("wasm streaming compile failed: "+reason);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(receiveInstantiationResult)})})}else{return instantiateArrayBuffer(receiveInstantiationResult)}}if(Module["instantiateWasm"]){try{var exports=Module["instantiateWasm"](info,receiveInstance);return exports}catch(e){err("Module.instantiateWasm callback failed with error: "+e);return false}}instantiateAsync();return{}}function callRuntimeCallbacks(callbacks){while(callbacks.length>0){var callback=callbacks.shift();if(typeof callback=="function"){callback(Module);continue}var func=callback.func;if(typeof func=="number"){if(callback.arg===undefined){getWasmTableEntry(func)()}else{getWasmTableEntry(func)(callback.arg)}}else{func(callback.arg===undefined?null:callback.arg)}}}var wasmTableMirror=[];function getWasmTableEntry(funcPtr){var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func}function ___assert_fail(condition,filename,line,func){abort("Assertion failed: "+UTF8ToString(condition)+", at: "+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])}function ___cxa_allocate_exception(size){return _malloc(size+24)+24}function ExceptionInfo(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24;this.set_type=function(type){HEAPU32[this.ptr+4>>2]=type};this.get_type=function(){return HEAPU32[this.ptr+4>>2]};this.set_destructor=function(destructor){HEAPU32[this.ptr+8>>2]=destructor};this.get_destructor=function(){return HEAPU32[this.ptr+8>>2]};this.set_refcount=function(refcount){HEAP32[this.ptr>>2]=refcount};this.set_caught=function(caught){caught=caught?1:0;HEAP8[this.ptr+12>>0]=caught};this.get_caught=function(){return HEAP8[this.ptr+12>>0]!=0};this.set_rethrown=function(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13>>0]=rethrown};this.get_rethrown=function(){return HEAP8[this.ptr+13>>0]!=0};this.init=function(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor);this.set_refcount(0);this.set_caught(false);this.set_rethrown(false)};this.add_ref=function(){var value=HEAP32[this.ptr>>2];HEAP32[this.ptr>>2]=value+1};this.release_ref=function(){var prev=HEAP32[this.ptr>>2];HEAP32[this.ptr>>2]=prev-1;return prev===1};this.set_adjusted_ptr=function(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr};this.get_adjusted_ptr=function(){return HEAPU32[this.ptr+16>>2]};this.get_exception_ptr=function(){var isPointer=___cxa_is_pointer_type(this.get_type());if(isPointer){return HEAPU32[this.excPtr>>2]}var adjusted=this.get_adjusted_ptr();if(adjusted!==0)return adjusted;return this.excPtr}}var exceptionLast=0;var uncaughtExceptionCount=0;function ___cxa_throw(ptr,type,destructor){var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw ptr}function __embind_register_bigint(primitiveType,name,size,minRange,maxRange){}function getShiftFromSize(size){switch(size){case 1:return 0;case 2:return 1;case 4:return 2;case 8:return 3;default:throw new TypeError("Unknown type size: "+size)}}function embind_init_charCodes(){var codes=new Array(256);for(var i=0;i<256;++i){codes[i]=String.fromCharCode(i)}embind_charCodes=codes}var embind_charCodes=undefined;function readLatin1String(ptr){var ret="";var c=ptr;while(HEAPU8[c]){ret+=embind_charCodes[HEAPU8[c++]]}return ret}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var char_0=48;var char_9=57;function makeLegalFunctionName(name){if(undefined===name){return"_unknown"}name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return"_"+name}return name}function createNamedFunction(name,body){name=makeLegalFunctionName(name);return new Function("body","return function "+name+"() {\n"+' "use strict";'+" return body.apply(this, arguments);\n"+"};\n")(body)}function extendError(baseErrorType,errorName){var errorClass=createNamedFunction(errorName,function(message){this.name=errorName;this.message=message;var stack=new Error(message).stack;if(stack!==undefined){this.stack=this.toString()+"\n"+stack.replace(/^Error(:[^\n]*)?\n/,"")}});errorClass.prototype=Object.create(baseErrorType.prototype);errorClass.prototype.constructor=errorClass;errorClass.prototype.toString=function(){if(this.message===undefined){return this.name}else{return this.name+": "+this.message}};return errorClass}var BindingError=undefined;function throwBindingError(message){throw new BindingError(message)}var InternalError=undefined;function throwInternalError(message){throw new InternalError(message)}function whenDependentTypesAreResolved(myTypes,dependentTypes,getTypeConverters){myTypes.forEach(function(type){typeDependencies[type]=dependentTypes});function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{if(registeredTypes.hasOwnProperty(dt)){typeConverters[i]=registeredTypes[dt]}else{unregisteredTypes.push(dt);if(!awaitingDependencies.hasOwnProperty(dt)){awaitingDependencies[dt]=[]}awaitingDependencies[dt].push(()=>{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}});if(0===unregisteredTypes.length){onComplete(typeConverters)}}function registerType(rawType,registeredInstance,options={}){if(!("argPackAdvance"in registeredInstance)){throw new TypeError("registerType registeredInstance requires argPackAdvance")}var name=registeredInstance.name;if(!rawType){throwBindingError('type "'+name+'" must have a positive integer typeid pointer')}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError("Cannot register type '"+name+"' twice")}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function __embind_register_bool(rawType,name,size,trueValue,falseValue){var shift=getShiftFromSize(size);name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(wt){return!!wt},"toWireType":function(destructors,o){return o?trueValue:falseValue},"argPackAdvance":8,"readValueFromPointer":function(pointer){var heap;if(size===1){heap=HEAP8}else if(size===2){heap=HEAP16}else if(size===4){heap=HEAP32}else{throw new TypeError("Unknown boolean type size: "+name)}return this["fromWireType"](heap[pointer>>shift])},destructorFunction:null})}var emval_free_list=[];var emval_handle_array=[{},{value:undefined},{value:null},{value:true},{value:false}];function __emval_decref(handle){if(handle>4&&0===--emval_handle_array[handle].refcount){emval_handle_array[handle]=undefined;emval_free_list.push(handle)}}function count_emval_handles(){var count=0;for(var i=5;i{if(!handle){throwBindingError("Cannot use deleted val. handle = "+handle)}return emval_handle_array[handle].value},toHandle:value=>{switch(value){case undefined:return 1;case null:return 2;case true:return 3;case false:return 4;default:{var handle=emval_free_list.length?emval_free_list.pop():emval_handle_array.length;emval_handle_array[handle]={refcount:1,value:value};return handle}}}};function simpleReadValueFromPointer(pointer){return this["fromWireType"](HEAPU32[pointer>>2])}function __embind_register_emval(rawType,name){name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(handle){var rv=Emval.toValue(handle);__emval_decref(handle);return rv},"toWireType":function(destructors,value){return Emval.toHandle(value)},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:null})}function floatReadValueFromPointer(name,shift){switch(shift){case 2:return function(pointer){return this["fromWireType"](HEAPF32[pointer>>2])};case 3:return function(pointer){return this["fromWireType"](HEAPF64[pointer>>3])};default:throw new TypeError("Unknown float type: "+name)}}function __embind_register_float(rawType,name,size){var shift=getShiftFromSize(size);name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(value){return value},"toWireType":function(destructors,value){return value},"argPackAdvance":8,"readValueFromPointer":floatReadValueFromPointer(name,shift),destructorFunction:null})}function new_(constructor,argumentList){if(!(constructor instanceof Function)){throw new TypeError("new_ called with constructor type "+typeof constructor+" which is not a function")}var dummy=createNamedFunction(constructor.name||"unknownFunctionName",function(){});dummy.prototype=constructor.prototype;var obj=new dummy;var r=constructor.apply(obj,argumentList);return r instanceof Object?r:obj}function runDestructors(destructors){while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}}function craftInvokerFunction(humanName,argTypes,classType,cppInvokerFunc,cppTargetFunc){var argCount=argTypes.length;if(argCount<2){throwBindingError("argTypes array size mismatch! Must at least get return value and 'this' types!")}var isClassMethodFunc=argTypes[1]!==null&&classType!==null;var needsDestructorStack=false;for(var i=1;i0?", ":"")+argsListWired}invokerFnBody+=(returns?"var rv = ":"")+"invoker(fn"+(argsListWired.length>0?", ":"")+argsListWired+");\n";if(needsDestructorStack){invokerFnBody+="runDestructors(destructors);\n"}else{for(var i=isClassMethodFunc?1:2;i>2)+i])}return array}function replacePublicSymbol(name,value,numArguments){if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistant public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}}function dynCallLegacy(sig,ptr,args){var f=Module["dynCall_"+sig];return args&&args.length?f.apply(null,[ptr].concat(args)):f.call(null,ptr)}function dynCall(sig,ptr,args){if(sig.includes("j")){return dynCallLegacy(sig,ptr,args)}return getWasmTableEntry(ptr).apply(null,args)}function getDynCaller(sig,ptr){var argCache=[];return function(){argCache.length=0;Object.assign(argCache,arguments);return dynCall(sig,ptr,argCache)}}function embind__requireFunction(signature,rawFunction){signature=readLatin1String(signature);function makeDynCaller(){if(signature.includes("j")){return getDynCaller(signature,rawFunction)}return getWasmTableEntry(rawFunction)}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError("unknown function pointer with signature "+signature+": "+rawFunction)}return fp}var UnboundTypeError=undefined;function getTypeName(type){var ptr=___getTypeName(type);var rv=readLatin1String(ptr);_free(ptr);return rv}function throwUnboundTypeError(message,types){var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(message+": "+unboundTypes.map(getTypeName).join([", "]))}function __embind_register_function(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn){var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=readLatin1String(name);rawInvoker=embind__requireFunction(signature,rawInvoker);exposePublicSymbol(name,function(){throwUnboundTypeError("Cannot call "+name+" due to unbound types",argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,function(argTypes){var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn),argCount-1);return[]})}function integerReadValueFromPointer(name,shift,signed){switch(shift){case 0:return signed?function readS8FromPointer(pointer){return HEAP8[pointer]}:function readU8FromPointer(pointer){return HEAPU8[pointer]};case 1:return signed?function readS16FromPointer(pointer){return HEAP16[pointer>>1]}:function readU16FromPointer(pointer){return HEAPU16[pointer>>1]};case 2:return signed?function readS32FromPointer(pointer){return HEAP32[pointer>>2]}:function readU32FromPointer(pointer){return HEAPU32[pointer>>2]};default:throw new TypeError("Unknown integer type: "+name)}}function __embind_register_integer(primitiveType,name,size,minRange,maxRange){name=readLatin1String(name);if(maxRange===-1){maxRange=4294967295}var shift=getShiftFromSize(size);var fromWireType=value=>value;if(minRange===0){var bitshift=32-8*size;fromWireType=(value=>value<>>bitshift)}var isUnsignedType=name.includes("unsigned");var checkAssertions=(value,toTypeName)=>{};var toWireType;if(isUnsignedType){toWireType=function(destructors,value){checkAssertions(value,this.name);return value>>>0}}else{toWireType=function(destructors,value){checkAssertions(value,this.name);return value}}registerType(primitiveType,{name:name,"fromWireType":fromWireType,"toWireType":toWireType,"argPackAdvance":8,"readValueFromPointer":integerReadValueFromPointer(name,shift,minRange!==0),destructorFunction:null})}function __embind_register_memory_view(rawType,dataTypeIndex,name){var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){handle=handle>>2;var heap=HEAPU32;var size=heap[handle];var data=heap[handle+1];return new TA(buffer,data,size)}name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":decodeMemoryView,"argPackAdvance":8,"readValueFromPointer":decodeMemoryView},{ignoreDuplicateRegistrations:true})}function __embind_register_std_string(rawType,name){name=readLatin1String(name);var stdStringIsUTF8=name==="std::string";registerType(rawType,{name:name,"fromWireType":function(value){var length=HEAPU32[value>>2];var str;if(stdStringIsUTF8){var decodeStartPtr=value+4;for(var i=0;i<=length;++i){var currentBytePtr=value+4+i;if(i==length||HEAPU8[currentBytePtr]==0){var maxRead=currentBytePtr-decodeStartPtr;var stringSegment=UTF8ToString(decodeStartPtr,maxRead);if(str===undefined){str=stringSegment}else{str+=String.fromCharCode(0);str+=stringSegment}decodeStartPtr=currentBytePtr+1}}}else{var a=new Array(length);for(var i=0;ilengthBytesUTF8(value))}else{getLength=(()=>value.length)}var length=getLength();var ptr=_malloc(4+length+1);HEAPU32[ptr>>2]=length;if(stdStringIsUTF8&&valueIsOfTypeString){stringToUTF8(value,ptr+4,length+1)}else{if(valueIsOfTypeString){for(var i=0;i255){_free(ptr);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}HEAPU8[ptr+4+i]=charCode}}else{for(var i=0;iHEAPU16);shift=1}else if(charSize===4){decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32;getHeap=(()=>HEAPU32);shift=2}registerType(rawType,{name:name,"fromWireType":function(value){var length=HEAPU32[value>>2];var HEAP=getHeap();var str;var decodeStartPtr=value+4;for(var i=0;i<=length;++i){var currentBytePtr=value+4+i*charSize;if(i==length||HEAP[currentBytePtr>>shift]==0){var maxReadBytes=currentBytePtr-decodeStartPtr;var stringSegment=decodeString(decodeStartPtr,maxReadBytes);if(str===undefined){str=stringSegment}else{str+=String.fromCharCode(0);str+=stringSegment}decodeStartPtr=currentBytePtr+charSize}}_free(value);return str},"toWireType":function(destructors,value){if(!(typeof value=="string")){throwBindingError("Cannot pass non-string to C++ string type "+name)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);HEAPU32[ptr>>2]=length>>shift;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:function(ptr){_free(ptr)}})}function __embind_register_void(rawType,name){name=readLatin1String(name);registerType(rawType,{isVoid:true,name:name,"argPackAdvance":0,"fromWireType":function(){return undefined},"toWireType":function(destructors,o){return undefined}})}var nowIsMonotonic=true;function __emscripten_get_now_is_monotonic(){return nowIsMonotonic}function _abort(){abort("")}var _emscripten_get_now;if(ENVIRONMENT_IS_NODE){_emscripten_get_now=(()=>{var t=process["hrtime"]();return t[0]*1e3+t[1]/1e6})}else _emscripten_get_now=(()=>performance.now());function _emscripten_memcpy_big(dest,src,num){HEAPU8.copyWithin(dest,src,src+num)}function _emscripten_get_heap_max(){return 2147483648}function emscripten_realloc_buffer(size){try{wasmMemory.grow(size-buffer.byteLength+65535>>>16);updateGlobalBufferAndViews(wasmMemory.buffer);return 1}catch(e){}}function _emscripten_resize_heap(requestedSize){var oldSize=HEAPU8.length;requestedSize=requestedSize>>>0;var maxHeapSize=_emscripten_get_heap_max();if(requestedSize>maxHeapSize){return false}let alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=emscripten_realloc_buffer(newSize);if(replacement){return true}}return false}var ENV={};function getExecutableName(){return thisProgram||"./this.program"}function getEnvStrings(){if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8";var env={"USER":"web_user","LOGNAME":"web_user","PATH":"/","PWD":"/","HOME":"/home/web_user","LANG":lang,"_":getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(x+"="+env[x])}getEnvStrings.strings=strings}return getEnvStrings.strings}var SYSCALLS={varargs:undefined,get:function(){SYSCALLS.varargs+=4;var ret=HEAP32[SYSCALLS.varargs-4>>2];return ret},getStr:function(ptr){var ret=UTF8ToString(ptr);return ret}};function _environ_get(__environ,environ_buf){var bufSize=0;getEnvStrings().forEach(function(string,i){var ptr=environ_buf+bufSize;HEAP32[__environ+i*4>>2]=ptr;writeAsciiToMemory(string,ptr);bufSize+=string.length+1});return 0}function _environ_sizes_get(penviron_count,penviron_buf_size){var strings=getEnvStrings();HEAP32[penviron_count>>2]=strings.length;var bufSize=0;strings.forEach(function(string){bufSize+=string.length+1});HEAP32[penviron_buf_size>>2]=bufSize;return 0}function __isLeapYear(year){return year%4===0&&(year%100!==0||year%400===0)}function __arraySum(array,index){var sum=0;for(var i=0;i<=index;sum+=array[i++]){}return sum}var __MONTH_DAYS_LEAP=[31,29,31,30,31,30,31,31,30,31,30,31];var __MONTH_DAYS_REGULAR=[31,28,31,30,31,30,31,31,30,31,30,31];function __addDays(date,days){var newDate=new Date(date.getTime());while(days>0){var leap=__isLeapYear(newDate.getFullYear());var currentMonth=newDate.getMonth();var daysInCurrentMonth=(leap?__MONTH_DAYS_LEAP:__MONTH_DAYS_REGULAR)[currentMonth];if(days>daysInCurrentMonth-newDate.getDate()){days-=daysInCurrentMonth-newDate.getDate()+1;newDate.setDate(1);if(currentMonth<11){newDate.setMonth(currentMonth+1)}else{newDate.setMonth(0);newDate.setFullYear(newDate.getFullYear()+1)}}else{newDate.setDate(newDate.getDate()+days);return newDate}}return newDate}function _strftime(s,maxsize,format,tm){var tm_zone=HEAP32[tm+40>>2];var date={tm_sec:HEAP32[tm>>2],tm_min:HEAP32[tm+4>>2],tm_hour:HEAP32[tm+8>>2],tm_mday:HEAP32[tm+12>>2],tm_mon:HEAP32[tm+16>>2],tm_year:HEAP32[tm+20>>2],tm_wday:HEAP32[tm+24>>2],tm_yday:HEAP32[tm+28>>2],tm_isdst:HEAP32[tm+32>>2],tm_gmtoff:HEAP32[tm+36>>2],tm_zone:tm_zone?UTF8ToString(tm_zone):""};var pattern=UTF8ToString(format);var EXPANSION_RULES_1={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"};for(var rule in EXPANSION_RULES_1){pattern=pattern.replace(new RegExp(rule,"g"),EXPANSION_RULES_1[rule])}var WEEKDAYS=["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"];var MONTHS=["January","February","March","April","May","June","July","August","September","October","November","December"];function leadingSomething(value,digits,character){var str=typeof value=="number"?value.toString():value||"";while(str.length0?1:0}var compare;if((compare=sgn(date1.getFullYear()-date2.getFullYear()))===0){if((compare=sgn(date1.getMonth()-date2.getMonth()))===0){compare=sgn(date1.getDate()-date2.getDate())}}return compare}function getFirstWeekStartDate(janFourth){switch(janFourth.getDay()){case 0:return new Date(janFourth.getFullYear()-1,11,29);case 1:return janFourth;case 2:return new Date(janFourth.getFullYear(),0,3);case 3:return new Date(janFourth.getFullYear(),0,2);case 4:return new Date(janFourth.getFullYear(),0,1);case 5:return new Date(janFourth.getFullYear()-1,11,31);case 6:return new Date(janFourth.getFullYear()-1,11,30)}}function getWeekBasedYear(date){var thisDate=__addDays(new Date(date.tm_year+1900,0,1),date.tm_yday);var janFourthThisYear=new Date(thisDate.getFullYear(),0,4);var janFourthNextYear=new Date(thisDate.getFullYear()+1,0,4);var firstWeekStartThisYear=getFirstWeekStartDate(janFourthThisYear);var firstWeekStartNextYear=getFirstWeekStartDate(janFourthNextYear);if(compareByDay(firstWeekStartThisYear,thisDate)<=0){if(compareByDay(firstWeekStartNextYear,thisDate)<=0){return thisDate.getFullYear()+1}else{return thisDate.getFullYear()}}else{return thisDate.getFullYear()-1}}var EXPANSION_RULES_2={"%a":function(date){return WEEKDAYS[date.tm_wday].substring(0,3)},"%A":function(date){return WEEKDAYS[date.tm_wday]},"%b":function(date){return MONTHS[date.tm_mon].substring(0,3)},"%B":function(date){return MONTHS[date.tm_mon]},"%C":function(date){var year=date.tm_year+1900;return leadingNulls(year/100|0,2)},"%d":function(date){return leadingNulls(date.tm_mday,2)},"%e":function(date){return leadingSomething(date.tm_mday,2," ")},"%g":function(date){return getWeekBasedYear(date).toString().substring(2)},"%G":function(date){return getWeekBasedYear(date)},"%H":function(date){return leadingNulls(date.tm_hour,2)},"%I":function(date){var twelveHour=date.tm_hour;if(twelveHour==0)twelveHour=12;else if(twelveHour>12)twelveHour-=12;return leadingNulls(twelveHour,2)},"%j":function(date){return leadingNulls(date.tm_mday+__arraySum(__isLeapYear(date.tm_year+1900)?__MONTH_DAYS_LEAP:__MONTH_DAYS_REGULAR,date.tm_mon-1),3)},"%m":function(date){return leadingNulls(date.tm_mon+1,2)},"%M":function(date){return leadingNulls(date.tm_min,2)},"%n":function(){return"\n"},"%p":function(date){if(date.tm_hour>=0&&date.tm_hour<12){return"AM"}else{return"PM"}},"%S":function(date){return leadingNulls(date.tm_sec,2)},"%t":function(){return"\t"},"%u":function(date){return date.tm_wday||7},"%U":function(date){var days=date.tm_yday+7-date.tm_wday;return leadingNulls(Math.floor(days/7),2)},"%V":function(date){var val=Math.floor((date.tm_yday+7-(date.tm_wday+6)%7)/7);if((date.tm_wday+371-date.tm_yday-2)%7<=2){val++}if(!val){val=52;var dec31=(date.tm_wday+7-date.tm_yday-1)%7;if(dec31==4||dec31==5&&__isLeapYear(date.tm_year%400-1)){val++}}else if(val==53){var jan1=(date.tm_wday+371-date.tm_yday)%7;if(jan1!=4&&(jan1!=3||!__isLeapYear(date.tm_year)))val=1}return leadingNulls(val,2)},"%w":function(date){return date.tm_wday},"%W":function(date){var days=date.tm_yday+7-(date.tm_wday+6)%7;return leadingNulls(Math.floor(days/7),2)},"%y":function(date){return(date.tm_year+1900).toString().substring(2)},"%Y":function(date){return date.tm_year+1900},"%z":function(date){var off=date.tm_gmtoff;var ahead=off>=0;off=Math.abs(off)/60;off=off/60*100+off%60;return(ahead?"+":"-")+String("0000"+off).slice(-4)},"%Z":function(date){return date.tm_zone},"%%":function(){return"%"}};pattern=pattern.replace(/%%/g,"\0\0");for(var rule in EXPANSION_RULES_2){if(pattern.includes(rule)){pattern=pattern.replace(new RegExp(rule,"g"),EXPANSION_RULES_2[rule](date))}}pattern=pattern.replace(/\0\0/g,"%");var bytes=intArrayFromString(pattern,false);if(bytes.length>maxsize){return 0}writeArrayToMemory(bytes,s);return bytes.length-1}function _strftime_l(s,maxsize,format,tm){return _strftime(s,maxsize,format,tm)}embind_init_charCodes();BindingError=Module["BindingError"]=extendError(Error,"BindingError");InternalError=Module["InternalError"]=extendError(Error,"InternalError");init_emval();UnboundTypeError=Module["UnboundTypeError"]=extendError(Error,"UnboundTypeError");function intArrayFromString(stringy,dontAddNull,length){var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array}var asmLibraryArg={"b":___assert_fail,"e":___cxa_allocate_exception,"d":___cxa_throw,"l":__embind_register_bigint,"j":__embind_register_bool,"t":__embind_register_emval,"i":__embind_register_float,"u":__embind_register_function,"c":__embind_register_integer,"a":__embind_register_memory_view,"h":__embind_register_std_string,"f":__embind_register_std_wstring,"k":__embind_register_void,"p":__emscripten_get_now_is_monotonic,"g":_abort,"r":_emscripten_get_now,"s":_emscripten_memcpy_big,"q":_emscripten_resize_heap,"n":_environ_get,"o":_environ_sizes_get,"m":_strftime_l};var asm=createWasm();var ___wasm_call_ctors=Module["___wasm_call_ctors"]=function(){return(___wasm_call_ctors=Module["___wasm_call_ctors"]=Module["asm"]["w"]).apply(null,arguments)};var _malloc=Module["_malloc"]=function(){return(_malloc=Module["_malloc"]=Module["asm"]["x"]).apply(null,arguments)};var ___getTypeName=Module["___getTypeName"]=function(){return(___getTypeName=Module["___getTypeName"]=Module["asm"]["z"]).apply(null,arguments)};var ___embind_register_native_and_builtin_types=Module["___embind_register_native_and_builtin_types"]=function(){return(___embind_register_native_and_builtin_types=Module["___embind_register_native_and_builtin_types"]=Module["asm"]["A"]).apply(null,arguments)};var _free=Module["_free"]=function(){return(_free=Module["_free"]=Module["asm"]["B"]).apply(null,arguments)};var ___cxa_is_pointer_type=Module["___cxa_is_pointer_type"]=function(){return(___cxa_is_pointer_type=Module["___cxa_is_pointer_type"]=Module["asm"]["C"]).apply(null,arguments)};var dynCall_viijii=Module["dynCall_viijii"]=function(){return(dynCall_viijii=Module["dynCall_viijii"]=Module["asm"]["D"]).apply(null,arguments)};var dynCall_iiiiij=Module["dynCall_iiiiij"]=function(){return(dynCall_iiiiij=Module["dynCall_iiiiij"]=Module["asm"]["E"]).apply(null,arguments)};var dynCall_iiiiijj=Module["dynCall_iiiiijj"]=function(){return(dynCall_iiiiijj=Module["dynCall_iiiiijj"]=Module["asm"]["F"]).apply(null,arguments)};var dynCall_iiiiiijj=Module["dynCall_iiiiiijj"]=function(){return(dynCall_iiiiiijj=Module["dynCall_iiiiiijj"]=Module["asm"]["G"]).apply(null,arguments)};var calledRun;function ExitStatus(status){this.name="ExitStatus";this.message="Program terminated with exit("+status+")";this.status=status}dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function run(args){args=args||arguments_;if(runDependencies>0){return}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();if(Module["onRuntimeInitialized"])Module["onRuntimeInitialized"]();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}}Module["run"]=run;if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run(); 2 | -------------------------------------------------------------------------------- /docs/native.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhirose/cpp-peglib/16685ba0fe2574a4f4786dec93ffa21158e728e6/docs/native.wasm -------------------------------------------------------------------------------- /docs/style.css: -------------------------------------------------------------------------------- 1 | * { 2 | box-sizing: border-box; 3 | margin: 0; 4 | padding: 0; 5 | text-decoration: none; 6 | list-style: none; 7 | } 8 | body { 9 | display: flex; 10 | flex-direction: column; 11 | height: 100vh; 12 | } 13 | #main { 14 | flex: 1; 15 | display: none; 16 | z-index: 0; 17 | } 18 | .editor-container { 19 | flex: 1; 20 | width: 100%; 21 | display: flex; 22 | flex-direction: column; 23 | margin: 6px; 24 | } 25 | .editor-container:first-child { 26 | margin-right: 0; 27 | } 28 | .editor-header { 29 | display: flex; 30 | margin: 0 2px; 31 | } 32 | .editor-header > li { 33 | height: 32px; 34 | line-height: 24px; 35 | } 36 | .editor-header > li > span { 37 | margin-right: 6px; 38 | } 39 | .editor-options { 40 | margin-left: auto; 41 | } 42 | .editor-header-options { 43 | display: flex; 44 | } 45 | .validation { 46 | display: inline-block; 47 | height: 20px; 48 | width: 20px; 49 | margin: 2px 0; 50 | border-radius: 50%; 51 | background-color: lightgreen; 52 | } 53 | .validation-invalid { 54 | background-color: pink; 55 | } 56 | .option { 57 | margin-right: 8px; 58 | } 59 | .option:last-child { 60 | margin-right: 0; 61 | } 62 | .option input[type=checkbox] { 63 | margin-right: 4px; 64 | } 65 | .option .parse { 66 | padding-left: 8px; 67 | padding-right: 8px; 68 | height: 24px; 69 | cursor: pointer; 70 | } 71 | .editor-area { 72 | flex: 1; 73 | border: 1px solid lightgray; 74 | } 75 | .editor-info { 76 | margin-top: 6px; 77 | height: 160px; 78 | border: 1px solid lightgray; 79 | padding: 8px; 80 | overflow-y: auto; 81 | } 82 | .editor-info li { 83 | cursor: pointer; 84 | } 85 | .editor-info li:hover{ 86 | background-color: lightyellow; 87 | } 88 | .editor-sub-header { 89 | padding: 4px; 90 | } 91 | .show-toggle { 92 | margin-right: 6px; 93 | } 94 | #overlay { 95 | position: absolute; 96 | width: 100vw; 97 | height: 100vh; 98 | cursor: wait; 99 | display: none; 100 | z-index: -1; 101 | } 102 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(example) 3 | 4 | add_executable(calc calc.cc) 5 | target_include_directories(calc PRIVATE ..) 6 | target_link_libraries(calc ${add_link_deps}) 7 | 8 | add_executable(calc2 calc2.cc) 9 | target_include_directories(calc2 PRIVATE ..) 10 | target_link_libraries(calc2 ${add_link_deps}) 11 | 12 | add_executable(calc3 calc3.cc) 13 | target_include_directories(calc3 PRIVATE ..) 14 | target_link_libraries(calc3 ${add_link_deps}) 15 | 16 | add_executable(calc4 calc4.cc) 17 | target_include_directories(calc4 PRIVATE ..) 18 | target_link_libraries(calc4 ${add_link_deps}) 19 | 20 | add_executable(calc5 calc5.cc) 21 | target_include_directories(calc5 PRIVATE ..) 22 | target_link_libraries(calc5 ${add_link_deps}) 23 | 24 | add_executable(indent indent.cc) 25 | target_include_directories(indent PRIVATE ..) 26 | target_link_libraries(indent ${add_link_deps}) 27 | 28 | add_executable(docx docx.cc) 29 | target_include_directories(docx PRIVATE ..) 30 | target_link_libraries(docx ${add_link_deps}) 31 | 32 | add_executable(sequence sequence.cc) 33 | target_include_directories(sequence PRIVATE ..) 34 | target_link_libraries(sequence ${add_link_deps}) 35 | 36 | add_executable(enter_leave enter_leave.cc) 37 | target_include_directories(enter_leave PRIVATE ..) 38 | target_link_libraries(enter_leave ${add_link_deps}) 39 | 40 | add_executable(choice choice.cc) 41 | target_include_directories(choice PRIVATE ..) 42 | target_link_libraries(choice ${add_link_deps}) 43 | -------------------------------------------------------------------------------- /example/calc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace peg; 6 | using namespace std; 7 | 8 | int main(void) { 9 | // (2) Make a parser 10 | parser parser(R"( 11 | # Grammar for Calculator... 12 | Additive <- Multiplicative '+' Additive / Multiplicative 13 | Multiplicative <- Primary '*' Multiplicative^cond / Primary 14 | Primary <- '(' Additive ')' / Number 15 | Number <- < [0-9]+ > 16 | %whitespace <- [ \t]* 17 | cond <- '' { error_message "missing multiplicative" } 18 | )"); 19 | 20 | assert(static_cast(parser) == true); 21 | 22 | // (3) Setup actions 23 | parser["Additive"] = [](const SemanticValues &vs) { 24 | switch (vs.choice()) { 25 | case 0: // "Multiplicative '+' Additive" 26 | return any_cast(vs[0]) + any_cast(vs[1]); 27 | default: // "Multiplicative" 28 | return any_cast(vs[0]); 29 | } 30 | }; 31 | 32 | parser["Multiplicative"] = [](const SemanticValues &vs) { 33 | switch (vs.choice()) { 34 | case 0: // "Primary '*' Multiplicative" 35 | return any_cast(vs[0]) * any_cast(vs[1]); 36 | default: // "Primary" 37 | return any_cast(vs[0]); 38 | } 39 | }; 40 | 41 | parser["Number"] = [](const SemanticValues &vs) { 42 | return vs.token_to_number(); 43 | }; 44 | 45 | // (4) Parse 46 | parser.enable_packrat_parsing(); // Enable packrat parsing. 47 | 48 | int val = 0; 49 | parser.parse(" (1 + 2) * ", val); 50 | 51 | // assert(val == 9); 52 | assert(val == 0); 53 | } 54 | -------------------------------------------------------------------------------- /example/calc2.cc: -------------------------------------------------------------------------------- 1 | // 2 | // calc2.cc 3 | // 4 | // Copyright (c) 2015 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace peg; 13 | 14 | // 15 | // PEG syntax: 16 | // 17 | // EXPRESSION <- TERM (TERM_OPERATOR TERM)* 18 | // TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 19 | // FACTOR <- NUMBER / '(' EXPRESSION ')' 20 | // TERM_OPERATOR <- [-+] 21 | // FACTOR_OPERATOR <- [/*] 22 | // NUMBER <- [0-9]+ 23 | // 24 | int main(int argc, const char **argv) { 25 | if (argc < 2 || std::string("--help") == argv[1]) { 26 | std::cout << "usage: calc [formula]" << std::endl; 27 | return 1; 28 | } 29 | 30 | auto reduce = [](const SemanticValues &vs) { 31 | auto result = std::any_cast(vs[0]); 32 | for (auto i = 1u; i < vs.size(); i += 2) { 33 | auto num = std::any_cast(vs[i + 1]); 34 | auto ope = std::any_cast(vs[i]); 35 | switch (ope) { 36 | case '+': result += num; break; 37 | case '-': result -= num; break; 38 | case '*': result *= num; break; 39 | case '/': result /= num; break; 40 | } 41 | } 42 | return result; 43 | }; 44 | 45 | Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; 46 | 47 | EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; 48 | TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; 49 | FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); 50 | TERM_OPERATOR <= cls("+-"), 51 | [](const SemanticValues &vs) { return static_cast(*vs.sv().data()); }; 52 | FACTOR_OPERATOR <= cls("*/"), 53 | [](const SemanticValues &vs) { return static_cast(*vs.sv().data()); }; 54 | NUMBER <= oom(cls("0-9")), 55 | [](const SemanticValues &vs) { return vs.token_to_number(); }; 56 | 57 | auto expr = argv[1]; 58 | long val = 0; 59 | if (EXPRESSION.parse_and_get_value(expr, val).ret) { 60 | std::cout << expr << " = " << val << std::endl; 61 | return 0; 62 | } 63 | 64 | return -1; 65 | } 66 | 67 | // vim: et ts=4 sw=4 cin cino={1s ff=unix 68 | -------------------------------------------------------------------------------- /example/calc3.cc: -------------------------------------------------------------------------------- 1 | // 2 | // calc3.cc 3 | // 4 | // Copyright (c) 2015 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace peg; 13 | 14 | int main(int argc, const char **argv) { 15 | if (argc < 2 || std::string("--help") == argv[1]) { 16 | std::cout << "usage: calc3 [formula]" << std::endl; 17 | return 1; 18 | } 19 | 20 | std::function eval = [&](const Ast &ast) { 21 | if (ast.name == "NUMBER") { 22 | return ast.token_to_number(); 23 | } else { 24 | const auto &nodes = ast.nodes; 25 | auto result = eval(*nodes[0]); 26 | for (auto i = 1u; i < nodes.size(); i += 2) { 27 | auto num = eval(*nodes[i + 1]); 28 | auto ope = nodes[i]->token[0]; 29 | switch (ope) { 30 | case '+': result += num; break; 31 | case '-': result -= num; break; 32 | case '*': result *= num; break; 33 | case '/': result /= num; break; 34 | } 35 | } 36 | return result; 37 | } 38 | }; 39 | 40 | parser parser(R"( 41 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 42 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 43 | FACTOR <- NUMBER / '(' EXPRESSION ')' 44 | 45 | TERM_OPERATOR <- < [-+] > 46 | FACTOR_OPERATOR <- < [/*] > 47 | NUMBER <- < [0-9]+ > 48 | 49 | %whitespace <- [ \t\r\n]* 50 | )"); 51 | 52 | parser.enable_ast(); 53 | 54 | auto expr = argv[1]; 55 | std::shared_ptr ast; 56 | if (parser.parse(expr, ast)) { 57 | ast = parser.optimize_ast(ast); 58 | std::cout << ast_to_s(ast); 59 | std::cout << expr << " = " << eval(*ast) << std::endl; 60 | return 0; 61 | } 62 | 63 | std::cout << "syntax error..." << std::endl; 64 | 65 | return -1; 66 | } 67 | 68 | // vim: et ts=4 sw=4 cin cino={1s ff=unix 69 | -------------------------------------------------------------------------------- /example/calc4.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace peg; 6 | using namespace std; 7 | 8 | int main(void) { 9 | parser parser(R"( 10 | EXPRESSION <- ATOM (OPERATOR ATOM)* { 11 | precedence 12 | L - + 13 | L / * 14 | } 15 | ATOM <- NUMBER / '(' EXPRESSION ')' 16 | OPERATOR <- < [-+/*] > 17 | NUMBER <- < '-'? [0-9]+ > 18 | %whitespace <- [ \t\r\n]* 19 | )"); 20 | 21 | parser["EXPRESSION"] = [](const SemanticValues &vs) { 22 | auto result = any_cast(vs[0]); 23 | if (vs.size() > 1) { 24 | auto ope = any_cast(vs[1]); 25 | auto num = any_cast(vs[2]); 26 | switch (ope) { 27 | case '+': result += num; break; 28 | case '-': result -= num; break; 29 | case '*': result *= num; break; 30 | case '/': result /= num; break; 31 | } 32 | } 33 | return result; 34 | }; 35 | parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 36 | parser["NUMBER"] = [](const SemanticValues &vs) { return atol(vs.sv().data()); }; 37 | 38 | long val; 39 | parser.parse(" -1 + (1 + 2) * 3 - -1", val); 40 | 41 | assert(val == 9); 42 | } 43 | -------------------------------------------------------------------------------- /example/calc5.cc: -------------------------------------------------------------------------------- 1 | // 2 | // calc5.cc 3 | // 4 | // Copyright (c) 2015 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace peg; 13 | 14 | int main(int argc, const char **argv) { 15 | if (argc < 2 || std::string("--help") == argv[1]) { 16 | std::cout << "usage: calc5 [formula]" << std::endl; 17 | return 1; 18 | } 19 | 20 | std::function eval = [&](const Ast &ast) { 21 | if (ast.name == "NUMBER") { 22 | return ast.token_to_number(); 23 | } else { 24 | const auto &nodes = ast.nodes; 25 | auto result = eval(*nodes[0]); 26 | if (nodes.size() > 1) { 27 | auto ope = nodes[1]->token[0]; 28 | auto num = eval(*nodes[2]); 29 | switch (ope) { 30 | case '+': result += num; break; 31 | case '-': result -= num; break; 32 | case '*': result *= num; break; 33 | case '/': result /= num; break; 34 | } 35 | } 36 | return result; 37 | } 38 | }; 39 | 40 | parser parser(R"( 41 | EXPRESSION <- ATOM (OPERATOR ATOM)* { 42 | precedence 43 | L - + 44 | L / * 45 | } 46 | ATOM <- NUMBER / '(' EXPRESSION ')' 47 | OPERATOR <- < [-+/*] > 48 | NUMBER <- < '-'? [0-9]+ > 49 | %whitespace <- [ \t\r\n]* 50 | )"); 51 | 52 | parser.enable_ast(); 53 | 54 | auto expr = argv[1]; 55 | std::shared_ptr ast; 56 | if (parser.parse(expr, ast)) { 57 | ast = parser.optimize_ast(ast); 58 | std::cout << ast_to_s(ast); 59 | std::cout << expr << " = " << eval(*ast) << std::endl; 60 | return 0; 61 | } 62 | 63 | std::cout << "syntax error..." << std::endl; 64 | 65 | return -1; 66 | } 67 | 68 | // vim: et ts=4 sw=4 cin cino={1s ff=unix 69 | -------------------------------------------------------------------------------- /example/choice.cc: -------------------------------------------------------------------------------- 1 | // 2 | // choice.cc 3 | // 4 | // Copyright (c) 2023 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace peg; 13 | 14 | int main(void) { 15 | parser parser(R"( 16 | type <- 'string' / 'int' / 'double' 17 | %whitespace <- [ \t\r\n]* 18 | )"); 19 | 20 | parser["type"] = [](const SemanticValues &vs) { 21 | std::cout << vs.choice() << std::endl; 22 | }; 23 | 24 | if (parser.parse("int")) { return 0; } 25 | 26 | std::cout << "syntax error..." << std::endl; 27 | return -1; 28 | } 29 | -------------------------------------------------------------------------------- /example/enter_leave.cc: -------------------------------------------------------------------------------- 1 | // 2 | // enter_leave.cc 3 | // 4 | // Copyright (c) 2023 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace peg; 13 | 14 | int main(void) { 15 | parser parser(R"( 16 | S <- A+ 17 | A <- 'A' 18 | )"); 19 | 20 | parser["A"].enter = [](const Context & /*c*/, const char * /*s*/, 21 | size_t /*n*/, std::any & /*dt*/) { 22 | std::cout << "enter" << std::endl; 23 | }; 24 | 25 | parser["A"] = [](const SemanticValues & /*vs*/, std::any & /*dt*/) { 26 | std::cout << "action!" << std::endl; 27 | }; 28 | 29 | parser["A"].leave = [](const Context & /*c*/, const char * /*s*/, 30 | size_t /*n*/, size_t /*matchlen*/, 31 | std::any & /*value*/, std::any & /*dt*/) { 32 | std::cout << "leave" << std::endl; 33 | }; 34 | 35 | if (parser.parse("A")) { return 0; } 36 | 37 | std::cout << "syntax error..." << std::endl; 38 | return -1; 39 | } 40 | -------------------------------------------------------------------------------- /example/indent.cc: -------------------------------------------------------------------------------- 1 | // 2 | // indent.cc 3 | // 4 | // Copyright (c) 2022 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | // Based on https://gist.github.com/dmajda/04002578dd41ae8190fc 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace peg; 15 | 16 | int main(void) { 17 | parser parser(R"(Start <- Statements {} 18 | Statements <- Statement* 19 | Statement <- Samedent (S / I) 20 | 21 | S <- 'S' EOS { no_ast_opt } 22 | I <- 'I' EOL Block / 'I' EOS { no_ast_opt } 23 | 24 | Block <- Statements {} 25 | 26 | ~Samedent <- ' '* {} 27 | 28 | ~EOS <- EOL / EOF 29 | ~EOL <- '\n' 30 | ~EOF <- !. 31 | )"); 32 | 33 | size_t indent = 0; 34 | 35 | parser["Block"].enter = [&](const Context & /*c*/, const char * /*s*/, 36 | size_t /*n*/, std::any & /*dt*/) { indent += 2; }; 37 | 38 | parser["Block"].leave = [&](const Context & /*c*/, const char * /*s*/, 39 | size_t /*n*/, size_t /*matchlen*/, 40 | std::any & /*value*/, 41 | std::any & /*dt*/) { indent -= 2; }; 42 | 43 | parser["Samedent"].predicate = 44 | [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) { 45 | if (indent != vs.sv().size()) { 46 | msg = "different indent..."; 47 | return false; 48 | } 49 | return true; 50 | }; 51 | 52 | parser.enable_ast(); 53 | 54 | const auto source = R"(I 55 | S 56 | I 57 | I 58 | S 59 | S 60 | S 61 | S 62 | )"; 63 | 64 | std::shared_ptr ast; 65 | if (parser.parse(source, ast)) { 66 | ast = parser.optimize_ast(ast); 67 | std::cout << ast_to_s(ast); 68 | return 0; 69 | } 70 | 71 | std::cout << "syntax error..." << std::endl; 72 | return -1; 73 | } 74 | -------------------------------------------------------------------------------- /example/sequence.cc: -------------------------------------------------------------------------------- 1 | // 2 | // sequence.cc 3 | // 4 | // Copyright (c) 2023 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace peg; 13 | 14 | int main(void) { 15 | parser parser(R"( 16 | START <- SEQUENCE_A 17 | SEQUENCE_A <- SEQUENCE('A') 18 | SEQUENCE(X) <- X (',' X)* 19 | )"); 20 | 21 | parser["SEQUENCE_A"] = [](const SemanticValues & /*vs*/) { 22 | std::cout << "SEQUENCE_A" << std::endl; 23 | }; 24 | 25 | if (parser.parse("A,A")) { return 0; } 26 | 27 | std::cout << "syntax error..." << std::endl; 28 | return -1; 29 | } 30 | -------------------------------------------------------------------------------- /grammar/cpp-peglib.peg: -------------------------------------------------------------------------------- 1 | # Setup PEG syntax parser 2 | Grammar <- Spacing Definition+ EndOfFile 3 | 4 | Definition <- 5 | Ignore IdentCont Parameters LEFTARROW Expression Instruction? 6 | / Ignore Identifier LEFTARROW Expression Instruction? 7 | 8 | Expression <- Sequence (SLASH Sequence)* 9 | 10 | Sequence <- (CUT / Prefix)* 11 | 12 | Prefix <- (AND / NOT)? SuffixWithLabel 13 | 14 | SuffixWithLabel <- Suffix (LABEL Identifier)? 15 | 16 | Suffix <- Primary Loop? 17 | 18 | Loop <- QUESTION / STAR / PLUS / Repetition 19 | 20 | Primary <- 21 | Ignore IdentCont Arguments !LEFTARROW 22 | / Ignore Identifier !(Parameters? LEFTARROW) 23 | / OPEN Expression CLOSE 24 | / BeginTok Expression EndTok 25 | / BeginCapScope Expression EndCapScope 26 | / BeginCap Expression EndCap 27 | / CapScope 28 | / BackRef 29 | / DictionaryI 30 | / LiteralI 31 | / Dictionary 32 | / Literal 33 | / NegatedClassI 34 | / NegatedClass 35 | / ClassI 36 | / Class 37 | / DOT 38 | 39 | Identifier <- IdentCont Spacing 40 | 41 | IdentCont <- 42 | 43 | IdentStart <- !"↑" !"⇑" ([a-zA-Z_%] / [\u0080-\uFFFF]) 44 | 45 | IdentRest <- IdentStart / [0-9] 46 | 47 | Dictionary <- LiteralD (PIPE LiteralD)+ 48 | 49 | DictionaryI <- LiteralID (PIPE LiteralID)* 50 | 51 | lit_ope <- 52 | ['] <(!['] Char)*> ['] Spacing 53 | / ["] <(!["] Char)*> ["] Spacing 54 | 55 | Literal <- lit_ope 56 | 57 | LiteralD <- lit_ope 58 | 59 | lit_case_ignore_ope <- 60 | ['] <(!['] Char)*> "'i" Spacing 61 | / ["] <(!["] Char)*> '"i' Spacing 62 | 63 | LiteralI <- lit_case_ignore_ope 64 | 65 | LiteralID <- lit_case_ignore_ope 66 | 67 | # NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'. 68 | Class <- '[' !'^' <(!']' Range)+> ']' Spacing 69 | ClassI <- '[' !'^' <(!']' Range)+> ']i' Spacing 70 | NegatedClass <- "[^" <(!']' Range)+> ']' Spacing 71 | NegatedClassI <- "[^" <(!']' Range)+> ']i' Spacing 72 | 73 | Range <- (Char '-' ! ']' Char) / Char 74 | 75 | Char <- 76 | '\\' [nrt'\"[\]\\^] 77 | / '\\' [0-3] [0-7] [0-7] 78 | / '\\' [0-7] [0-7]? 79 | / "\\x" [0-9a-fA-F] [0-9a-fA-F]? 80 | / "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5}) 81 | / !'\\' . 82 | 83 | Repetition <- BeginBracket RepetitionRange EndBracket 84 | 85 | RepetitionRange <- 86 | Number COMMA Number 87 | / Number COMMA 88 | / Number 89 | / COMMA Number 90 | 91 | Number <- [0-9]+ Spacing 92 | 93 | CapScope <- BeginCapScope Expression EndCapScope 94 | 95 | LEFTARROW <- ("<-" / "←") Spacing 96 | 97 | ~SLASH <- '/' Spacing 98 | ~PIPE <- '|' Spacing 99 | AND <- '&' Spacing 100 | NOT <- '!' Spacing 101 | QUESTION <- '?' Spacing 102 | STAR <- '*' Spacing 103 | PLUS <- '+' Spacing 104 | ~OPEN <- '(' Spacing 105 | ~CLOSE <- ')' Spacing 106 | DOT <- '.' Spacing 107 | 108 | CUT <- "↑" Spacing 109 | ~LABEL <- ('^' / "⇑") Spacing 110 | 111 | ~Spacing <- (Space / Comment)* 112 | Comment <- '#' (!EndOfLine . )* 113 | Space <- ' ' / '\t' / EndOfLine 114 | EndOfLine <- "\r\n" / '\n' / '\r' 115 | EndOfFile <- ! . 116 | 117 | ~BeginTok <- '<' Spacing 118 | ~EndTok <- '>' Spacing 119 | 120 | ~BeginCapScope <- '$' '(' Spacing 121 | ~EndCapScope <- ')' Spacing 122 | 123 | BeginCap <- '$' '<' Spacing 124 | ~EndCap <- '>' Spacing 125 | 126 | BackRef <- '$' Spacing 127 | 128 | IGNORE <- '~' 129 | 130 | Ignore <- IGNORE? 131 | Parameters <- OPEN Identifier (COMMA Identifier)* CLOSE 132 | Arguments <- OPEN Expression (COMMA Expression)* CLOSE 133 | ~COMMA <- ',' Spacing 134 | 135 | # Instruction grammars 136 | Instruction <- 137 | BeginBracket (InstructionItem (InstructionItemSeparator InstructionItem)*)? EndBracket 138 | InstructionItem <- PrecedenceClimbing / ErrorMessage / NoAstOpt 139 | ~InstructionItemSeparator <- ';' Spacing 140 | 141 | ~SpacesZom <- Space* 142 | ~SpacesOom <- Space+ 143 | ~BeginBracket <- '{' Spacing 144 | ~EndBracket <- '}' Spacing 145 | 146 | # PrecedenceClimbing instruction 147 | PrecedenceClimbing <- "precedence" SpacesOom PrecedenceInfo (SpacesOom PrecedenceInfo)* SpacesZom 148 | PrecedenceInfo <- PrecedenceAssoc (~SpacesOom PrecedenceOpe)+ 149 | PrecedenceOpe <- 150 | ['] <(!(Space / [']) Char)*> ['] 151 | / ["] <(!(Space / ["]) Char)*> ["] 152 | / <(!(PrecedenceAssoc / Space / '}') . )+> 153 | PrecedenceAssoc <- [LR] 154 | 155 | # Error message instruction 156 | ErrorMessage <- "message" SpacesOom LiteralD SpacesZom 157 | 158 | # No Ast node optimization instruction 159 | NoAstOpt <- "no_ast_opt" SpacesZom 160 | -------------------------------------------------------------------------------- /grammar/csv.peg: -------------------------------------------------------------------------------- 1 | # CSV grammar based on RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt) 2 | 3 | file <- (header NL)? record (NL record)* NL? 4 | header <- name (COMMA name)* 5 | record <- field (COMMA field)* 6 | name <- field 7 | field <- escaped / non_escaped 8 | escaped <- DQUOTE (TEXTDATA / COMMA / CR / LF / D_DQUOTE)* DQUOTE 9 | non_escaped <- TEXTDATA* 10 | COMMA <- ',' 11 | CR <- '\r' 12 | DQUOTE <- '"' 13 | LF <- '\n' 14 | NL <- CR LF / CR / LF 15 | TEXTDATA <- !([",] / NL) . 16 | D_DQUOTE <- '"' '"' 17 | -------------------------------------------------------------------------------- /grammar/json.peg: -------------------------------------------------------------------------------- 1 | # JSON grammar based on RFC 4627 (http://www.ietf.org/rfc/rfc4627.txt) 2 | 3 | json <- object / array 4 | 5 | object <- '{' (member (',' member)*)? '}' { no_ast_opt } 6 | member <- string ':' value 7 | 8 | array <- '[' (value (',' value)*)? ']' 9 | 10 | value <- boolean / null / number / string / object / array 11 | 12 | boolean <- 'false' / 'true' 13 | null <- 'null' 14 | 15 | number <- < minus int frac exp > 16 | minus <- '-'? 17 | int <- '0' / [1-9][0-9]* 18 | frac <- ('.' [0-9]+)? 19 | exp <- ([eE] [-+]? [0-9]+)? 20 | 21 | string <- '"' < char* > '"' 22 | char <- unescaped / escaped 23 | escaped <- '\\' (["\\/bfnrt] / 'u' [a-fA-F0-9]{4}) 24 | unescaped <- [\u0020-\u0021\u0023-\u005b\u005d-\u10ffff] 25 | 26 | %whitespace <- [ \t\r\n]* 27 | -------------------------------------------------------------------------------- /grammar/pl0.peg: -------------------------------------------------------------------------------- 1 | 2 | program <- _ block '.' _ 3 | 4 | block <- const var procedure statement 5 | const <- ('CONST' __ ident '=' _ number (',' _ ident '=' _ number)* ';' _)? 6 | var <- ('VAR' __ ident (',' _ ident)* ';' _)? 7 | procedure <- ('PROCEDURE' __ ident ';' _ block ';' _)* 8 | 9 | statement <- (assignment / call / statements / if / while / out / in)? 10 | assignment <- ident ':=' _ expression 11 | call <- 'CALL' __ ident 12 | statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __ 13 | if <- 'IF' __ condition 'THEN' __ statement 14 | while <- 'WHILE' __ condition 'DO' __ statement 15 | out <- ('out' __ / 'write' __ / '!' _) expression 16 | in <- ('in' __ / 'read' __ / '?' _) ident 17 | 18 | condition <- odd / compare 19 | odd <- 'ODD' __ expression 20 | compare <- expression compare_op expression 21 | compare_op <- < '=' / '#' / '<=' / '<' / '>=' / '>' > _ 22 | 23 | expression <- sign term (term_op term)* 24 | sign <- < [-+]? > _ 25 | term_op <- < [-+] > _ 26 | 27 | term <- factor (factor_op factor)* 28 | factor_op <- < [*/] > _ 29 | 30 | factor <- ident / number / '(' _ expression ')' _ 31 | 32 | ident <- < [a-z] [a-z0-9]* > _ 33 | number <- < [0-9]+ > _ 34 | 35 | ~_ <- [ \t\r\n]* 36 | ~__ <- ![a-z0-9_] _ 37 | 38 | -------------------------------------------------------------------------------- /lint/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(peglint) 3 | 4 | add_executable(peglint peglint.cc) 5 | target_include_directories(peglint PRIVATE ..) 6 | target_link_libraries(peglint ${add_link_deps}) 7 | -------------------------------------------------------------------------------- /lint/README.md: -------------------------------------------------------------------------------- 1 | peglint 2 | ------- 3 | 4 | The lint utility for PEG. 5 | 6 | ``` 7 | usage: grammar_file_path [source_file_path] 8 | 9 | options: 10 | --ast: show AST tree 11 | --packrat: enable packrat memoise 12 | --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction 13 | --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction 14 | --source: source text 15 | --trace: show concise trace messages 16 | --profile: show profile report 17 | --verbose: verbose output for trace and profile 18 | ``` 19 | 20 | ### Build peglint 21 | 22 | ``` 23 | > cd lint 24 | > mkdir build 25 | > cd build 26 | > cmake .. 27 | > make 28 | ``` 29 | 30 | ### Lint grammar 31 | 32 | ``` 33 | > cat a.peg 34 | A <- 'hello' ^ 'world' 35 | 36 | > peglint a.peg 37 | a.peg:1:16: syntax error 38 | ``` 39 | 40 | ``` 41 | > cat a.peg 42 | A <- B 43 | 44 | > peglint a.peg 45 | a.peg:1:6: 'B' is not defined. 46 | ``` 47 | 48 | ``` 49 | > cat a.peg 50 | A <- B / C 51 | B <- 'b' 52 | C <- A 53 | 54 | > peglint a.peg 55 | a.peg:1:10: 'C' is left recursive. 56 | a.peg:3:6: 'A' is left recursive. 57 | ``` 58 | 59 | ### Lint source text 60 | 61 | ``` 62 | > cat a.peg 63 | Additive <- Multiplicative '+' Additive / Multiplicative 64 | Multiplicative <- Primary '*' Multiplicative / Primary 65 | Primary <- '(' Additive ')' / Number 66 | Number <- < [0-9]+ > 67 | %whitespace <- [ \t\r\n]* 68 | 69 | > peglint --source "1 + a * 3" a.peg 70 | [commandline]:1:3: syntax error 71 | ``` 72 | 73 | ### AST 74 | 75 | ``` 76 | > cat a.txt 77 | 1 + 2 * 3 78 | 79 | > peglint --ast a.peg a.txt 80 | + Additive 81 | + Multiplicative 82 | + Primary 83 | - Number (1) 84 | + Additive 85 | + Multiplicative 86 | + Primary 87 | - Number (2) 88 | + Multiplicative 89 | + Primary 90 | - Number (3) 91 | ``` 92 | 93 | ### AST optimization 94 | 95 | ``` 96 | > peglint --ast --opt --source "1 + 2 * 3" a.peg 97 | + Additive 98 | - Multiplicative[Number] (1) 99 | + Additive[Multiplicative] 100 | - Primary[Number] (2) 101 | - Multiplicative[Number] (3) 102 | ``` 103 | 104 | ### Adjust AST optimization with `no_ast_opt` instruction 105 | 106 | ``` 107 | > cat a.peg 108 | Additive <- Multiplicative '+' Additive / Multiplicative 109 | Multiplicative <- Primary '*' Multiplicative / Primary 110 | Primary <- '(' Additive ')' / Number { no_ast_opt } 111 | Number <- < [0-9]+ > 112 | %whitespace <- [ \t\r\n]* 113 | 114 | > peglint --ast --opt --source "1 + 2 * 3" a.peg 115 | + Additive/0 116 | + Multiplicative/1[Primary] 117 | - Number (1) 118 | + Additive/1[Multiplicative] 119 | + Primary/1 120 | - Number (2) 121 | + Multiplicative/1[Primary] 122 | - Number (3) 123 | 124 | > peglint --ast --opt-only --source "1 + 2 * 3" a.peg 125 | + Additive/0 126 | + Multiplicative/1 127 | - Primary/1[Number] (1) 128 | + Additive/1 129 | + Multiplicative/0 130 | - Primary/1[Number] (2) 131 | + Multiplicative/1 132 | - Primary/1[Number] (3) 133 | ``` 134 | -------------------------------------------------------------------------------- /lint/peglint.cc: -------------------------------------------------------------------------------- 1 | // 2 | // peglint.cc 3 | // 4 | // Copyright (c) 2022 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | inline bool read_file(const char *path, vector &buff) { 15 | ifstream ifs(path, ios::in | ios::binary); 16 | if (ifs.fail()) { return false; } 17 | 18 | buff.resize(static_cast(ifs.seekg(0, ios::end).tellg())); 19 | if (!buff.empty()) { 20 | ifs.seekg(0, ios::beg).read(&buff[0], static_cast(buff.size())); 21 | } 22 | return true; 23 | } 24 | 25 | inline vector split(const string &s, char delim) { 26 | vector elems; 27 | stringstream ss(s); 28 | string elem; 29 | while (getline(ss, elem, delim)) { 30 | elems.push_back(elem); 31 | } 32 | return elems; 33 | } 34 | 35 | int main(int argc, const char **argv) { 36 | auto opt_packrat = false; 37 | auto opt_ast = false; 38 | auto opt_optimize = false; 39 | auto opt_mode = true; 40 | auto opt_help = false; 41 | auto opt_source = false; 42 | vector source; 43 | auto opt_trace = false; 44 | auto opt_verbose = false; 45 | auto opt_profile = false; 46 | vector path_list; 47 | 48 | auto argi = 1; 49 | while (argi < argc) { 50 | auto arg = argv[argi++]; 51 | if (string("--help") == arg) { 52 | opt_help = true; 53 | } else if (string("--packrat") == arg) { 54 | opt_packrat = true; 55 | } else if (string("--ast") == arg) { 56 | opt_ast = true; 57 | } else if (string("--opt") == arg || string("--opt-all") == arg) { 58 | opt_optimize = true; 59 | opt_mode = true; 60 | } else if (string("--opt-only") == arg) { 61 | opt_optimize = true; 62 | opt_mode = false; 63 | } else if (string("--source") == arg) { 64 | opt_source = true; 65 | if (argi < argc) { 66 | std::string text = argv[argi++]; 67 | source.assign(text.begin(), text.end()); 68 | } 69 | } else if (string("--trace") == arg) { 70 | opt_trace = true; 71 | } else if (string("--profile") == arg) { 72 | opt_profile = true; 73 | } else if (string("--verbose") == arg) { 74 | opt_verbose = true; 75 | } else { 76 | path_list.push_back(arg); 77 | } 78 | } 79 | 80 | if (path_list.empty() || opt_help) { 81 | cerr << R"(usage: grammar_file_path [source_file_path] 82 | 83 | options: 84 | --source: source text 85 | --packrat: enable packrat memoise 86 | --ast: show AST tree 87 | --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction 88 | --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction 89 | --trace: show concise trace messages 90 | --profile: show profile report 91 | --verbose: verbose output for trace and profile 92 | )"; 93 | 94 | return 1; 95 | } 96 | 97 | // Check PEG grammar 98 | auto syntax_path = path_list[0]; 99 | 100 | vector syntax; 101 | if (!read_file(syntax_path, syntax)) { 102 | cerr << "can't open the grammar file." << endl; 103 | return -1; 104 | } 105 | 106 | peg::parser parser; 107 | 108 | parser.set_logger([&](size_t ln, size_t col, const string &msg) { 109 | cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl; 110 | }); 111 | 112 | if (!parser.load_grammar(syntax.data(), syntax.size())) { return -1; } 113 | 114 | if (path_list.size() < 2 && !opt_source) { return 0; } 115 | 116 | // Check source 117 | std::string source_path = "[commandline]"; 118 | if (path_list.size() >= 2) { 119 | if (!read_file(path_list[1], source)) { 120 | cerr << "can't open the code file." << endl; 121 | return -1; 122 | } 123 | source_path = path_list[1]; 124 | } 125 | 126 | parser.set_logger([&](size_t ln, size_t col, const string &msg) { 127 | cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl; 128 | }); 129 | 130 | if (opt_packrat) { parser.enable_packrat_parsing(); } 131 | 132 | if (opt_trace) { enable_tracing(parser, std::cout); } 133 | 134 | if (opt_profile) { enable_profiling(parser, std::cout); } 135 | 136 | parser.set_verbose_trace(opt_verbose); 137 | 138 | if (opt_ast) { 139 | parser.enable_ast(); 140 | 141 | std::shared_ptr ast; 142 | auto ret = parser.parse_n(source.data(), source.size(), ast); 143 | 144 | if (ast) { 145 | if (opt_optimize) { ast = parser.optimize_ast(ast, opt_mode); } 146 | std::cout << peg::ast_to_s(ast); 147 | } 148 | 149 | if (!ret) { return -1; } 150 | } else { 151 | if (!parser.parse_n(source.data(), source.size())) { return -1; } 152 | } 153 | 154 | return 0; 155 | } 156 | -------------------------------------------------------------------------------- /peg.vim: -------------------------------------------------------------------------------- 1 | 2 | set commentstring=#\ %s 3 | 4 | syn match pegAssign /<-/ 5 | syn match pegAssign2 /←/ 6 | 7 | syn match pegName /\v[a-zA-Z_][a-zA-Z0-9_]*/ 8 | 9 | syn match pegLineComment '#.*' 10 | 11 | syn region pegStringD start=/\v"/ skip=/\v\\./ end=/\v"/ 12 | syn region pegStringS start=/\v'/ skip=/\v\\./ end=/\v'/ 13 | syn region pegClass start=/\v\[/ skip=/\v\\./ end=/\v]/ 14 | 15 | "syn match pegOperator /\(*\|?\|+\|!\|\.\|\~\)/ 16 | 17 | hi def link pegAssign Statement 18 | hi def link pegAssign2 Statement 19 | 20 | hi def link pegName Identifier 21 | 22 | hi def link pegLineComment Comment 23 | 24 | hi def link pegStringD String 25 | hi def link pegStringS String 26 | hi def link pegClass String 27 | 28 | let b:current_syntax = "peg" 29 | -------------------------------------------------------------------------------- /pl0/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(pl0) 3 | 4 | find_package(LLVM REQUIRED CONFIG) 5 | 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") 7 | 8 | add_executable(pl0 pl0.cc) 9 | set(add_link_deps ${add_link_deps} LLVM) 10 | target_include_directories(pl0 PUBLIC ${LLVM_INCLUDE_DIRS}) 11 | target_include_directories(pl0 PRIVATE ..) 12 | target_link_libraries(pl0 ${add_link_deps}) 13 | -------------------------------------------------------------------------------- /pl0/Makefile: -------------------------------------------------------------------------------- 1 | pl0: pl0.cc ../peglib.h 2 | clang++ -std=c++11 -g -O0 pl0.cc `llvm-config --cxxflags --ldflags --system-libs --libs` -I.. -o pl0 3 | -------------------------------------------------------------------------------- /pl0/README.md: -------------------------------------------------------------------------------- 1 | PL/0 language example 2 | ===================== 3 | 4 | https://en.wikipedia.org/wiki/PL/0 5 | 6 | * PL/0 PEG syntax 7 | * AST generation with symbol scope 8 | * Interpreter (slow...) 9 | * LLVM Code generation 10 | * LLVM JIT execution (fast!) 11 | 12 | Build 13 | ----- 14 | 15 | ``` 16 | brew install llvm 17 | export PATH="$PATH:/usr/local/opt/llvm/bin" 18 | make 19 | ``` 20 | 21 | Usage 22 | ----- 23 | 24 | ``` 25 | pl0 PATH [--ast] [--llvm] [--jit] 26 | 27 | --ast: Show AST tree 28 | --llvm: Dump LLVM IR 29 | --jit: LLVM JIT execution 30 | ``` 31 | -------------------------------------------------------------------------------- /pl0/pl0.cc: -------------------------------------------------------------------------------- 1 | // 2 | // pl0.cc - PL/0 language (https://en.wikipedia.org/wiki/PL/0) 3 | // 4 | // Copyright (c) 2022 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "llvm/ExecutionEngine/ExecutionEngine.h" 13 | #include "llvm/ExecutionEngine/GenericValue.h" 14 | #include "llvm/ExecutionEngine/MCJIT.h" 15 | #include "llvm/IR/IRBuilder.h" 16 | #include "llvm/IR/ValueSymbolTable.h" 17 | #include "llvm/IR/Verifier.h" 18 | #include "llvm/Support/TargetSelect.h" 19 | 20 | using namespace peg; 21 | using namespace peg::udl; 22 | using namespace llvm; 23 | using namespace std; 24 | 25 | /* 26 | * PEG Grammar 27 | */ 28 | auto grammar = R"( 29 | program <- _ block '.' _ 30 | 31 | block <- const var procedure statement 32 | const <- ('CONST' __ ident '=' _ number (',' _ ident '=' _ number)* ';' _)? 33 | var <- ('VAR' __ ident (',' _ ident)* ';' _)? 34 | procedure <- ('PROCEDURE' __ ident ';' _ block ';' _)* 35 | 36 | statement <- (assignment / call / statements / if / while / out / in)? 37 | assignment <- ident ':=' _ expression 38 | call <- 'CALL' __ ident 39 | statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __ 40 | if <- 'IF' __ condition 'THEN' __ statement 41 | while <- 'WHILE' __ condition 'DO' __ statement 42 | out <- ('out' __ / 'write' __ / '!' _) expression 43 | in <- ('in' __ / 'read' __ / '?' _) ident 44 | 45 | condition <- odd / compare 46 | odd <- 'ODD' __ expression 47 | compare <- expression compare_op expression 48 | compare_op <- < '=' / '#' / '<=' / '<' / '>=' / '>' > _ 49 | 50 | expression <- sign term (term_op term)* 51 | sign <- < [-+]? > _ 52 | term_op <- < [-+] > _ 53 | 54 | term <- factor (factor_op factor)* 55 | factor_op <- < [*/] > _ 56 | 57 | factor <- ident / number / '(' _ expression ')' _ 58 | 59 | ident <- < [a-z] [a-z0-9]* > _ 60 | number <- < [0-9]+ > _ 61 | 62 | ~_ <- [ \t\r\n]* 63 | ~__ <- ![a-z0-9_] _ 64 | )"; 65 | 66 | /* 67 | * Utilities 68 | */ 69 | string format_error_message(const string& path, size_t ln, size_t col, 70 | const string& msg) { 71 | stringstream ss; 72 | ss << path << ":" << ln << ":" << col << ": " << msg << endl; 73 | return ss.str(); 74 | } 75 | 76 | /* 77 | * Ast 78 | */ 79 | struct SymbolScope; 80 | 81 | struct Annotation { 82 | shared_ptr scope; 83 | }; 84 | 85 | typedef AstBase AstPL0; 86 | shared_ptr get_closest_scope(shared_ptr ast) { 87 | ast = ast->parent.lock(); 88 | while (ast->tag != "block"_) { 89 | ast = ast->parent.lock(); 90 | } 91 | return ast->scope; 92 | } 93 | 94 | /* 95 | * Symbol Table 96 | */ 97 | struct SymbolScope { 98 | SymbolScope(shared_ptr outer) : outer(outer) {} 99 | 100 | bool has_symbol(const string& ident, bool extend = true) const { 101 | auto ret = constants.count(ident) || variables.count(ident); 102 | return ret ? true : (extend && outer ? outer->has_symbol(ident) : false); 103 | } 104 | 105 | bool has_constant(const string& ident, bool extend = true) const { 106 | return constants.count(ident) 107 | ? true 108 | : (extend && outer ? outer->has_constant(ident) : false); 109 | } 110 | 111 | bool has_variable(const string& ident, bool extend = true) const { 112 | return variables.count(ident) 113 | ? true 114 | : (extend && outer ? outer->has_variable(ident) : false); 115 | } 116 | 117 | bool has_procedure(const string& ident, bool extend = true) const { 118 | return procedures.count(ident) 119 | ? true 120 | : (extend && outer ? outer->has_procedure(ident) : false); 121 | } 122 | 123 | shared_ptr get_procedure(const string& ident) const { 124 | auto it = procedures.find(ident); 125 | return it != procedures.end() ? it->second : outer->get_procedure(ident); 126 | } 127 | 128 | map constants; 129 | set variables; 130 | map> procedures; 131 | set free_variables; 132 | 133 | private: 134 | shared_ptr outer; 135 | }; 136 | 137 | void throw_runtime_error(const shared_ptr node, const string& msg) { 138 | throw runtime_error( 139 | format_error_message(node->path, node->line, node->column, msg)); 140 | } 141 | 142 | struct SymbolTable { 143 | static void build_on_ast(const shared_ptr ast, 144 | shared_ptr scope = nullptr) { 145 | switch (ast->tag) { 146 | case "block"_: 147 | block(ast, scope); 148 | break; 149 | case "assignment"_: 150 | assignment(ast, scope); 151 | break; 152 | case "call"_: 153 | call(ast, scope); 154 | break; 155 | case "ident"_: 156 | ident(ast, scope); 157 | break; 158 | default: 159 | for (auto node : ast->nodes) { 160 | build_on_ast(node, scope); 161 | } 162 | break; 163 | } 164 | } 165 | 166 | private: 167 | static void block(const shared_ptr ast, 168 | shared_ptr outer) { 169 | // block <- const var procedure statement 170 | auto scope = make_shared(outer); 171 | const auto& nodes = ast->nodes; 172 | constants(nodes[0], scope); 173 | variables(nodes[1], scope); 174 | procedures(nodes[2], scope); 175 | build_on_ast(nodes[3], scope); 176 | ast->scope = scope; 177 | } 178 | 179 | static void constants(const shared_ptr ast, 180 | shared_ptr scope) { 181 | // const <- ('CONST' __ ident '=' _ number(',' _ ident '=' _ number)* ';' 182 | // _)? 183 | const auto& nodes = ast->nodes; 184 | for (auto i = 0u; i < nodes.size(); i += 2) { 185 | const auto& ident = nodes[i + 0]->token_to_string(); 186 | if (scope->has_symbol(ident)) { 187 | throw_runtime_error(nodes[i], "'" + ident + "' is already defined..."); 188 | } 189 | auto number = nodes[i + 1]->token_to_number(); 190 | scope->constants.emplace(ident, number); 191 | } 192 | } 193 | 194 | static void variables(const shared_ptr ast, 195 | shared_ptr scope) { 196 | // var <- ('VAR' __ ident(',' _ ident)* ';' _) ? 197 | const auto& nodes = ast->nodes; 198 | for (auto i = 0u; i < nodes.size(); i += 1) { 199 | const auto& ident = nodes[i]->token_to_string(); 200 | if (scope->has_symbol(ident)) { 201 | throw_runtime_error(nodes[i], "'" + ident + "' is already defined..."); 202 | } 203 | scope->variables.emplace(ident); 204 | } 205 | } 206 | 207 | static void procedures(const shared_ptr ast, 208 | shared_ptr scope) { 209 | // procedure <- ('PROCEDURE' __ ident ';' _ block ';' _)* 210 | const auto& nodes = ast->nodes; 211 | for (auto i = 0u; i < nodes.size(); i += 2) { 212 | const auto& ident = nodes[i + 0]->token_to_string(); 213 | auto block = nodes[i + 1]; 214 | scope->procedures[ident] = block; 215 | build_on_ast(block, scope); 216 | } 217 | } 218 | 219 | static void assignment(const shared_ptr ast, 220 | shared_ptr scope) { 221 | // assignment <- ident ':=' _ expression 222 | const auto& ident = ast->nodes[0]->token_to_string(); 223 | if (scope->has_constant(ident)) { 224 | throw_runtime_error(ast->nodes[0], 225 | "cannot modify constant value '" + ident + "'..."); 226 | } else if (!scope->has_variable(ident)) { 227 | throw_runtime_error(ast->nodes[0], 228 | "undefined variable '" + ident + "'..."); 229 | } 230 | 231 | build_on_ast(ast->nodes[1], scope); 232 | 233 | if (!scope->has_symbol(ident, false)) { 234 | scope->free_variables.emplace(ident); 235 | } 236 | } 237 | 238 | static void call(const shared_ptr ast, 239 | shared_ptr scope) { 240 | // call <- 'CALL' __ ident 241 | const auto& ident = ast->nodes[0]->token_to_string(); 242 | if (!scope->has_procedure(ident)) { 243 | throw_runtime_error(ast->nodes[0], 244 | "undefined procedure '" + ident + "'..."); 245 | } 246 | 247 | auto block = scope->get_procedure(ident); 248 | if (block->scope) { 249 | for (const auto& free : block->scope->free_variables) { 250 | if (!scope->has_symbol(free, false)) { 251 | scope->free_variables.emplace(free); 252 | } 253 | } 254 | } 255 | } 256 | 257 | static void ident(const shared_ptr ast, 258 | shared_ptr scope) { 259 | const auto& ident = ast->token_to_string(); 260 | if (!scope->has_symbol(ident)) { 261 | throw_runtime_error(ast, "undefined variable '" + ident + "'..."); 262 | } 263 | 264 | if (!scope->has_symbol(ident, false)) { 265 | scope->free_variables.emplace(ident); 266 | } 267 | } 268 | }; 269 | 270 | /* 271 | * Interpreter 272 | */ 273 | struct Environment { 274 | Environment(shared_ptr scope, shared_ptr outer) 275 | : scope(scope), outer(outer) {} 276 | 277 | int get_value(const shared_ptr ast, const string& ident) const { 278 | auto it = scope->constants.find(ident); 279 | if (it != scope->constants.end()) { 280 | return it->second; 281 | } else if (scope->variables.count(ident)) { 282 | if (variables.find(ident) == variables.end()) { 283 | throw_runtime_error(ast, "uninitialized variable '" + ident + "'..."); 284 | } 285 | return variables.at(ident); 286 | } 287 | return outer->get_value(ast, ident); 288 | } 289 | 290 | void set_variable(const string& ident, int val) { 291 | if (scope->variables.count(ident)) { 292 | variables[ident] = val; 293 | } else { 294 | outer->set_variable(ident, val); 295 | } 296 | } 297 | 298 | shared_ptr get_procedure(const string& ident) const { 299 | return scope->get_procedure(ident); 300 | } 301 | 302 | private: 303 | shared_ptr scope; 304 | shared_ptr outer; 305 | map variables; 306 | }; 307 | 308 | struct Interpreter { 309 | static void exec(const shared_ptr ast, 310 | shared_ptr env = nullptr) { 311 | switch (ast->tag) { 312 | case "block"_: 313 | exec_block(ast, env); 314 | break; 315 | case "statement"_: 316 | exec_statement(ast, env); 317 | break; 318 | case "assignment"_: 319 | exec_assignment(ast, env); 320 | break; 321 | case "call"_: 322 | exec_call(ast, env); 323 | break; 324 | case "statements"_: 325 | exec_statements(ast, env); 326 | break; 327 | case "if"_: 328 | exec_if(ast, env); 329 | break; 330 | case "while"_: 331 | exec_while(ast, env); 332 | break; 333 | case "out"_: 334 | exec_out(ast, env); 335 | break; 336 | case "in"_: 337 | exec_in(ast, env); 338 | break; 339 | default: 340 | exec(ast->nodes[0], env); 341 | break; 342 | } 343 | } 344 | 345 | private: 346 | static void exec_block(const shared_ptr ast, 347 | shared_ptr outer) { 348 | // block <- const var procedure statement 349 | exec(ast->nodes[3], make_shared(ast->scope, outer)); 350 | } 351 | 352 | static void exec_statement(const shared_ptr ast, 353 | shared_ptr env) { 354 | // statement <- (assignment / call / statements / if / while / out / in)? 355 | if (!ast->nodes.empty()) { 356 | exec(ast->nodes[0], env); 357 | } 358 | } 359 | 360 | static void exec_assignment(const shared_ptr ast, 361 | shared_ptr env) { 362 | // assignment <- ident ':=' _ expression 363 | env->set_variable(ast->nodes[0]->token_to_string(), eval(ast->nodes[1], env)); 364 | } 365 | 366 | static void exec_call(const shared_ptr ast, 367 | shared_ptr env) { 368 | // call <- 'CALL' __ ident 369 | exec_block(env->get_procedure(ast->nodes[0]->token_to_string()), env); 370 | } 371 | 372 | static void exec_statements(const shared_ptr ast, 373 | shared_ptr env) { 374 | // statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __ 375 | for (auto stmt : ast->nodes) { 376 | exec(stmt, env); 377 | } 378 | } 379 | 380 | static void exec_if(const shared_ptr ast, 381 | shared_ptr env) { 382 | // if <- 'IF' __ condition 'THEN' __ statement 383 | if (eval_condition(ast->nodes[0], env)) { 384 | exec(ast->nodes[1], env); 385 | } 386 | } 387 | 388 | static void exec_while(const shared_ptr ast, 389 | shared_ptr env) { 390 | // while <- 'WHILE' __ condition 'DO' __ statement 391 | auto cond = ast->nodes[0]; 392 | auto stmt = ast->nodes[1]; 393 | while (eval_condition(cond, env)) { 394 | exec(stmt, env); 395 | } 396 | } 397 | 398 | static void exec_out(const shared_ptr ast, 399 | shared_ptr env) { 400 | // out <- ('out' __ / 'write' __ / '!' _) expression 401 | cout << eval(ast->nodes[0], env) << endl; 402 | } 403 | 404 | static void exec_in(const shared_ptr ast, 405 | shared_ptr env) { 406 | // in <- ('in' __ / 'read' __ / '?' _) ident 407 | int val; 408 | cin >> val; 409 | env->set_variable(ast->nodes[0]->token_to_string(), val); 410 | } 411 | 412 | static bool eval_condition(const shared_ptr ast, 413 | shared_ptr env) { 414 | // condition <- odd / compare 415 | const auto& node = ast->nodes[0]; 416 | switch (node->tag) { 417 | case "odd"_: 418 | return eval_odd(node, env); 419 | case "compare"_: 420 | return eval_compare(node, env); 421 | default: 422 | throw logic_error("invalid AstPL0 type"); 423 | } 424 | } 425 | 426 | static bool eval_odd(const shared_ptr ast, 427 | shared_ptr env) { 428 | // odd <- 'ODD' __ expression 429 | return eval_expression(ast->nodes[0], env) != 0; 430 | } 431 | 432 | static bool eval_compare(const shared_ptr ast, 433 | shared_ptr env) { 434 | // compare <- expression compare_op expression 435 | const auto& nodes = ast->nodes; 436 | auto lval = eval_expression(nodes[0], env); 437 | auto op = peg::str2tag(nodes[1]->token_to_string().c_str()); 438 | auto rval = eval_expression(nodes[2], env); 439 | switch (op) { 440 | case "="_: 441 | return lval == rval; 442 | case "#"_: 443 | return lval != rval; 444 | case "<="_: 445 | return lval <= rval; 446 | case "<"_: 447 | return lval < rval; 448 | case ">="_: 449 | return lval >= rval; 450 | case ">"_: 451 | return lval > rval; 452 | default: 453 | throw logic_error("invalid operator"); 454 | } 455 | } 456 | 457 | static int eval(const shared_ptr ast, shared_ptr env) { 458 | switch (ast->tag) { 459 | case "expression"_: 460 | return eval_expression(ast, env); 461 | case "term"_: 462 | return eval_term(ast, env); 463 | case "ident"_: 464 | return eval_ident(ast, env); 465 | case "number"_: 466 | return eval_number(ast, env); 467 | default: 468 | return eval(ast->nodes[0], env); 469 | } 470 | } 471 | 472 | static int eval_expression(const shared_ptr ast, 473 | shared_ptr env) { 474 | // expression <- sign term (term_op term)* 475 | const auto& nodes = ast->nodes; 476 | auto sign = nodes[0]->token_to_string(); 477 | auto sign_val = (sign.empty() || sign == "+") ? 1 : -1; 478 | auto val = eval(nodes[1], env) * sign_val; 479 | for (auto i = 2u; i < nodes.size(); i += 2) { 480 | auto ope = nodes[i + 0]->token_to_string()[0]; 481 | auto rval = eval(nodes[i + 1], env); 482 | switch (ope) { 483 | case '+': 484 | val = val + rval; 485 | break; 486 | case '-': 487 | val = val - rval; 488 | break; 489 | } 490 | } 491 | return val; 492 | } 493 | 494 | static int eval_term(const shared_ptr ast, 495 | shared_ptr env) { 496 | // term <- factor (factor_op factor)* 497 | const auto& nodes = ast->nodes; 498 | auto val = eval(nodes[0], env); 499 | for (auto i = 1u; i < nodes.size(); i += 2) { 500 | auto ope = nodes[i + 0]->token_to_string()[0]; 501 | auto rval = eval(nodes[i + 1], env); 502 | switch (ope) { 503 | case '*': 504 | val = val * rval; 505 | break; 506 | case '/': 507 | if (rval == 0) { 508 | throw_runtime_error(ast, "divide by 0 error"); 509 | } 510 | val = val / rval; 511 | break; 512 | } 513 | } 514 | return val; 515 | } 516 | 517 | static int eval_ident(const shared_ptr ast, 518 | shared_ptr env) { 519 | return env->get_value(ast, ast->token_to_string()); 520 | } 521 | 522 | static int eval_number(const shared_ptr ast, 523 | shared_ptr env) { 524 | return stol(ast->token_to_string()); 525 | } 526 | }; 527 | 528 | /* 529 | * LLVM 530 | */ 531 | struct LLVM { 532 | LLVM(const shared_ptr ast) : builder_(context_) { 533 | module_ = make_unique("pl0", context_); 534 | compile(ast); 535 | } 536 | 537 | void dump() { module_->print(llvm::outs(), nullptr); } 538 | 539 | void exec() { 540 | unique_ptr ee(EngineBuilder(std::move(module_)).create()); 541 | std::vector noargs; 542 | auto fn = ee->FindFunctionNamed("main"); 543 | auto ret = ee->runFunction(fn, noargs); 544 | } 545 | 546 | private: 547 | LLVMContext context_; 548 | IRBuilder<> builder_; 549 | unique_ptr module_; 550 | 551 | void compile(const shared_ptr ast) { 552 | InitializeNativeTarget(); 553 | InitializeNativeTargetAsmPrinter(); 554 | compile_libs(); 555 | compile_program(ast); 556 | } 557 | 558 | void compile_switch(const shared_ptr ast) { 559 | switch (ast->tag) { 560 | case "assignment"_: 561 | compile_assignment(ast); 562 | break; 563 | case "call"_: 564 | compile_call(ast); 565 | break; 566 | case "statements"_: 567 | compile_statements(ast); 568 | break; 569 | case "if"_: 570 | compile_if(ast); 571 | break; 572 | case "while"_: 573 | compile_while(ast); 574 | break; 575 | case "out"_: 576 | compile_out(ast); 577 | break; 578 | default: 579 | compile_switch(ast->nodes[0]); 580 | break; 581 | } 582 | } 583 | 584 | Value* compile_switch_value(const shared_ptr ast) { 585 | switch (ast->tag) { 586 | case "odd"_: 587 | return compile_odd(ast); 588 | case "compare"_: 589 | return compile_compare(ast); 590 | case "expression"_: 591 | return compile_expression(ast); 592 | case "ident"_: 593 | return compile_ident(ast); 594 | case "number"_: 595 | return compile_number(ast); 596 | default: 597 | return compile_switch_value(ast->nodes[0]); 598 | } 599 | } 600 | 601 | void compile_libs() { 602 | auto printfF = module_->getOrInsertFunction( 603 | "printf", 604 | FunctionType::get(builder_.getInt32Ty(), 605 | PointerType::get(builder_.getInt8Ty(), 0), true)); 606 | 607 | #if LLVM_VERSION_MAJOR >= 9 608 | auto funccallee = module_->getOrInsertFunction("out", builder_.getVoidTy(), builder_.getInt32Ty()); 609 | auto outC = funccallee.getCallee(); 610 | #else 611 | auto outC = module_->getOrInsertFunction("out", builder_.getVoidTy(), builder_.getInt32Ty()); 612 | #endif 613 | auto outF = cast(outC); 614 | 615 | { 616 | auto BB = BasicBlock::Create(context_, "entry", outF); 617 | builder_.SetInsertPoint(BB); 618 | 619 | auto val = &*outF->arg_begin(); 620 | 621 | auto fmt = builder_.CreateGlobalStringPtr("%d\n"); 622 | std::vector args = {fmt, val}; 623 | builder_.CreateCall(printfF, args); 624 | 625 | builder_.CreateRetVoid(); 626 | } 627 | } 628 | 629 | void compile_program(const shared_ptr ast) { 630 | #if LLVM_VERSION_MAJOR >= 9 631 | auto funccallee = module_->getOrInsertFunction("main", builder_.getVoidTy()); 632 | auto c = funccallee.getCallee(); 633 | #else 634 | auto c = module_->getOrInsertFunction("main", builder_.getVoidTy()); 635 | #endif 636 | auto fn = cast(c); 637 | 638 | { 639 | auto BB = BasicBlock::Create(context_, "entry", fn); 640 | builder_.SetInsertPoint(BB); 641 | compile_block(ast->nodes[0]); 642 | builder_.CreateRetVoid(); 643 | verifyFunction(*fn); 644 | } 645 | } 646 | 647 | void compile_block(const shared_ptr ast) { 648 | compile_const(ast->nodes[0]); 649 | compile_var(ast->nodes[1]); 650 | compile_procedure(ast->nodes[2]); 651 | compile_statement(ast->nodes[3]); 652 | } 653 | 654 | void compile_const(const shared_ptr ast) { 655 | for (auto i = 0u; i < ast->nodes.size(); i += 2) { 656 | auto ident = ast->nodes[i]->token_to_string(); 657 | auto number = stoi(ast->nodes[i + 1]->token_to_string()); 658 | 659 | auto alloca = 660 | builder_.CreateAlloca(builder_.getInt32Ty(), nullptr, ident); 661 | builder_.CreateStore(builder_.getInt32(number), alloca); 662 | } 663 | } 664 | 665 | void compile_var(const shared_ptr ast) { 666 | for (const auto node : ast->nodes) { 667 | auto ident = node->token_to_string(); 668 | builder_.CreateAlloca(builder_.getInt32Ty(), nullptr, ident); 669 | } 670 | } 671 | 672 | void compile_procedure(const shared_ptr ast) { 673 | for (auto i = 0u; i < ast->nodes.size(); i += 2) { 674 | auto ident = ast->nodes[i]->token_to_string(); 675 | auto block = ast->nodes[i + 1]; 676 | 677 | std::vector pt(block->scope->free_variables.size(), 678 | Type::getInt32PtrTy(context_)); 679 | auto ft = FunctionType::get(builder_.getVoidTy(), pt, false); 680 | #if LLVM_VERSION_MAJOR >= 9 681 | auto funccallee = module_->getOrInsertFunction(ident, ft); 682 | auto c = funccallee.getCallee(); 683 | #else 684 | auto c = module_->getOrInsertFunction(ident, ft); 685 | #endif 686 | auto fn = cast(c); 687 | 688 | { 689 | auto it = block->scope->free_variables.begin(); 690 | for (auto& arg : fn->args()) { 691 | arg.setName(*it); 692 | ++it; 693 | } 694 | } 695 | 696 | { 697 | auto prevBB = builder_.GetInsertBlock(); 698 | auto BB = BasicBlock::Create(context_, "entry", fn); 699 | builder_.SetInsertPoint(BB); 700 | compile_block(block); 701 | builder_.CreateRetVoid(); 702 | verifyFunction(*fn); 703 | builder_.SetInsertPoint(prevBB); 704 | } 705 | } 706 | } 707 | 708 | void compile_statement(const shared_ptr ast) { 709 | if (!ast->nodes.empty()) { 710 | compile_switch(ast->nodes[0]); 711 | } 712 | } 713 | 714 | void compile_assignment(const shared_ptr ast) { 715 | auto ident = ast->nodes[0]->token_to_string(); 716 | 717 | auto fn = builder_.GetInsertBlock()->getParent(); 718 | auto tbl = fn->getValueSymbolTable(); 719 | auto var = tbl->lookup(ident); 720 | if (!var) { 721 | throw_runtime_error(ast, "'" + ident + "' is not defined..."); 722 | } 723 | 724 | auto val = compile_expression(ast->nodes[1]); 725 | builder_.CreateStore(val, var); 726 | } 727 | 728 | void compile_call(const shared_ptr ast) { 729 | auto ident = ast->nodes[0]->token_to_string(); 730 | 731 | auto scope = get_closest_scope(ast); 732 | auto block = scope->get_procedure(ident); 733 | 734 | std::vector args; 735 | for (auto& free : block->scope->free_variables) { 736 | auto fn = builder_.GetInsertBlock()->getParent(); 737 | auto tbl = fn->getValueSymbolTable(); 738 | auto var = tbl->lookup(free); 739 | if (!var) { 740 | throw_runtime_error(ast, "'" + free + "' is not defined..."); 741 | } 742 | args.push_back(var); 743 | } 744 | 745 | auto fn = module_->getFunction(ident); 746 | builder_.CreateCall(fn, args); 747 | } 748 | 749 | void compile_statements(const shared_ptr ast) { 750 | for (auto node : ast->nodes) { 751 | compile_statement(node); 752 | } 753 | } 754 | 755 | void compile_if(const shared_ptr ast) { 756 | auto cond = compile_condition(ast->nodes[0]); 757 | 758 | auto fn = builder_.GetInsertBlock()->getParent(); 759 | auto ifThen = BasicBlock::Create(context_, "if.then", fn); 760 | auto ifEnd = BasicBlock::Create(context_, "if.end"); 761 | 762 | builder_.CreateCondBr(cond, ifThen, ifEnd); 763 | 764 | builder_.SetInsertPoint(ifThen); 765 | compile_statement(ast->nodes[1]); 766 | builder_.CreateBr(ifEnd); 767 | 768 | fn->getBasicBlockList().push_back(ifEnd); 769 | builder_.SetInsertPoint(ifEnd); 770 | } 771 | 772 | void compile_while(const shared_ptr ast) { 773 | auto whileCond = BasicBlock::Create(context_, "while.cond"); 774 | builder_.CreateBr(whileCond); 775 | 776 | auto fn = builder_.GetInsertBlock()->getParent(); 777 | fn->getBasicBlockList().push_back(whileCond); 778 | builder_.SetInsertPoint(whileCond); 779 | 780 | auto cond = compile_condition(ast->nodes[0]); 781 | 782 | auto whileBody = BasicBlock::Create(context_, "while.body", fn); 783 | auto whileEnd = BasicBlock::Create(context_, "while.end"); 784 | builder_.CreateCondBr(cond, whileBody, whileEnd); 785 | 786 | builder_.SetInsertPoint(whileBody); 787 | compile_statement(ast->nodes[1]); 788 | 789 | builder_.CreateBr(whileCond); 790 | 791 | fn->getBasicBlockList().push_back(whileEnd); 792 | builder_.SetInsertPoint(whileEnd); 793 | } 794 | 795 | Value* compile_condition(const shared_ptr ast) { 796 | return compile_switch_value(ast->nodes[0]); 797 | } 798 | 799 | Value* compile_odd(const shared_ptr ast) { 800 | auto val = compile_expression(ast->nodes[0]); 801 | return builder_.CreateICmpNE(val, builder_.getInt32(0), "icmpne"); 802 | } 803 | 804 | Value* compile_compare(const shared_ptr ast) { 805 | auto lhs = compile_expression(ast->nodes[0]); 806 | auto rhs = compile_expression(ast->nodes[2]); 807 | 808 | const auto& ope = ast->nodes[1]->token_to_string(); 809 | switch (ope[0]) { 810 | case '=': 811 | return builder_.CreateICmpEQ(lhs, rhs, "icmpeq"); 812 | case '#': 813 | return builder_.CreateICmpNE(lhs, rhs, "icmpne"); 814 | case '<': 815 | if (ope.size() == 1) { 816 | return builder_.CreateICmpSLT(lhs, rhs, "icmpslt"); 817 | } 818 | // '<=' 819 | return builder_.CreateICmpSLE(lhs, rhs, "icmpsle"); 820 | case '>': 821 | if (ope.size() == 1) { 822 | return builder_.CreateICmpSGT(lhs, rhs, "icmpsgt"); 823 | } 824 | // '>=' 825 | return builder_.CreateICmpSGE(lhs, rhs, "icmpsge"); 826 | } 827 | return nullptr; 828 | } 829 | 830 | void compile_out(const shared_ptr ast) { 831 | auto val = compile_expression(ast->nodes[0]); 832 | auto outF = module_->getFunction("out"); 833 | builder_.CreateCall(outF, val); 834 | } 835 | 836 | Value* compile_expression(const shared_ptr ast) { 837 | const auto& nodes = ast->nodes; 838 | 839 | auto sign = nodes[0]->token_to_string(); 840 | auto negative = !(sign.empty() || sign == "+"); 841 | 842 | auto val = compile_term(nodes[1]); 843 | if (negative) { 844 | val = builder_.CreateNeg(val, "negative"); 845 | } 846 | 847 | for (auto i = 2u; i < nodes.size(); i += 2) { 848 | auto ope = nodes[i + 0]->token_to_string()[0]; 849 | auto rval = compile_term(nodes[i + 1]); 850 | switch (ope) { 851 | case '+': 852 | val = builder_.CreateAdd(val, rval, "add"); 853 | break; 854 | case '-': 855 | val = builder_.CreateSub(val, rval, "sub"); 856 | break; 857 | } 858 | } 859 | return val; 860 | } 861 | 862 | Value* compile_term(const shared_ptr ast) { 863 | const auto& nodes = ast->nodes; 864 | auto val = compile_factor(nodes[0]); 865 | for (auto i = 1u; i < nodes.size(); i += 2) { 866 | auto ope = nodes[i + 0]->token_to_string()[0]; 867 | auto rval = compile_switch_value(nodes[i + 1]); 868 | switch (ope) { 869 | case '*': 870 | val = builder_.CreateMul(val, rval, "mul"); 871 | break; 872 | case '/': { 873 | // TODO: Zero devide error? 874 | // auto ret = builder_.CreateICmpEQ(rval, builder_.getInt32(0), 875 | // "icmpeq"); 876 | // if (!ret) { 877 | // throw_runtime_error(ast, "divide by 0 error"); 878 | // } 879 | val = builder_.CreateSDiv(val, rval, "div"); 880 | break; 881 | } 882 | } 883 | } 884 | return val; 885 | } 886 | 887 | Value* compile_factor(const shared_ptr ast) { 888 | return compile_switch_value(ast->nodes[0]); 889 | } 890 | 891 | Value* compile_ident(const shared_ptr ast) { 892 | auto ident = ast->token_to_string(); 893 | 894 | auto fn = builder_.GetInsertBlock()->getParent(); 895 | auto tbl = fn->getValueSymbolTable(); 896 | auto var = tbl->lookup(ident); 897 | if (!var) { 898 | throw_runtime_error(ast, "'" + ident + "' is not defined..."); 899 | } 900 | 901 | return builder_.CreateLoad(var); 902 | } 903 | 904 | Value* compile_number(const shared_ptr ast) { 905 | return ConstantInt::getIntegerValue(builder_.getInt32Ty(), 906 | APInt(32, ast->token_to_string(), 10)); 907 | } 908 | }; 909 | 910 | /* 911 | * Main 912 | */ 913 | int main(int argc, const char** argv) { 914 | if (argc < 2) { 915 | cout << "usage: pl0 PATH [--ast] [--llvm] [--jit]" << endl; 916 | return 1; 917 | } 918 | 919 | // Parser commandline parameters 920 | auto path = argv[1]; 921 | bool opt_jit = false; 922 | bool opt_ast = false; 923 | bool opt_llvm = false; 924 | { 925 | auto argi = 2; 926 | while (argi < argc) { 927 | if (string("--ast") == argv[argi]) { 928 | opt_ast = true; 929 | } else if (string("--jit") == argv[argi]) { 930 | opt_jit = true; 931 | } else if (string("--llvm") == argv[argi]) { 932 | opt_llvm = true; 933 | } 934 | argi++; 935 | } 936 | } 937 | 938 | // Read a source file into memory 939 | vector source; 940 | ifstream ifs(path, ios::in | ios::binary); 941 | if (ifs.fail()) { 942 | cerr << "can't open the source file." << endl; 943 | return -1; 944 | } 945 | source.resize(static_cast(ifs.seekg(0, ios::end).tellg())); 946 | if (!source.empty()) { 947 | ifs.seekg(0, ios::beg) 948 | .read(&source[0], static_cast(source.size())); 949 | } 950 | 951 | // Setup a PEG parser 952 | parser parser(grammar); 953 | parser.enable_ast(); 954 | parser.log = [&](size_t ln, size_t col, const string& msg) { 955 | cerr << format_error_message(path, ln, col, msg) << endl; 956 | }; 957 | 958 | // Parse the source and make an AST 959 | shared_ptr ast; 960 | if (parser.parse_n(source.data(), source.size(), ast, path)) { 961 | try { 962 | SymbolTable::build_on_ast(ast); 963 | 964 | if (opt_ast) { 965 | cout << ast_to_s(ast); 966 | } 967 | 968 | if (opt_llvm || opt_jit) { 969 | LLVM compiler(ast); 970 | 971 | if (opt_llvm) { 972 | compiler.dump(); 973 | } 974 | if (opt_jit) { 975 | compiler.exec(); 976 | } 977 | } else { 978 | Interpreter::exec(ast); 979 | } 980 | 981 | } catch (const runtime_error& e) { 982 | cerr << e.what() << endl; 983 | } 984 | return 0; 985 | } 986 | 987 | return -1; 988 | } 989 | -------------------------------------------------------------------------------- /pl0/samples/fib.pas: -------------------------------------------------------------------------------- 1 | VAR i, x, r; 2 | 3 | PROCEDURE fib; 4 | VAR xx, r1, r2; 5 | BEGIN 6 | xx := x; 7 | IF xx = 0 THEN r := 1; 8 | IF xx = 1 THEN r := 1; 9 | IF xx >= 2 THEN BEGIN 10 | x := xx - 2; 11 | CALL fib; 12 | r1 := r; 13 | 14 | x := xx - 1; 15 | CALL fib; 16 | r2 := r; 17 | r := r1 + r2; 18 | END 19 | END; 20 | 21 | BEGIN 22 | i := 0; 23 | WHILE i < 25 DO BEGIN 24 | x := i; 25 | CALL fib; 26 | write i; 27 | write r; 28 | i := i + 1; 29 | END 30 | END. 31 | -------------------------------------------------------------------------------- /pl0/samples/gcd.pas: -------------------------------------------------------------------------------- 1 | CONST 2 | m = 7, 3 | n = 85; 4 | 5 | VAR 6 | x, y, z, q, r; 7 | 8 | PROCEDURE multiply; 9 | VAR a, b; 10 | BEGIN 11 | a := x; 12 | b := y; 13 | z := 0; 14 | WHILE b > 0 DO BEGIN 15 | IF ODD b THEN z := z + a; 16 | a := 2 * a; 17 | b := b / 2; 18 | END 19 | END; 20 | 21 | PROCEDURE divide; 22 | VAR w; 23 | BEGIN 24 | r := x; 25 | q := 0; 26 | w := y; 27 | WHILE w <= r DO w := 2 * w; 28 | WHILE w > y DO BEGIN 29 | q := 2 * q; 30 | w := w / 2; 31 | IF w <= r THEN BEGIN 32 | r := r - w; 33 | q := q + 1 34 | END 35 | END 36 | END; 37 | 38 | PROCEDURE gcd; 39 | VAR f, g; 40 | BEGIN 41 | f := x; 42 | g := y; 43 | WHILE f # g DO BEGIN 44 | IF f < g THEN g := g - f; 45 | IF g < f THEN f := f - g; 46 | END; 47 | z := f 48 | END; 49 | 50 | BEGIN 51 | x := m; 52 | y := n; 53 | CALL multiply; 54 | x := 25; 55 | y := 3; 56 | CALL divide; 57 | x := 84; 58 | y := 36; 59 | CALL gcd; 60 | write z; 61 | END. 62 | -------------------------------------------------------------------------------- /pl0/samples/square.pas: -------------------------------------------------------------------------------- 1 | 2 | VAR x, squ; 3 | 4 | PROCEDURE square; 5 | BEGIN 6 | squ := x * x 7 | END; 8 | 9 | BEGIN 10 | x := 1; 11 | WHILE x <= 10 DO 12 | BEGIN 13 | CALL square; 14 | ! squ; 15 | x := x + 1 16 | END 17 | END. 18 | 19 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(test) 3 | 4 | include(FetchContent) 5 | FetchContent_Declare( 6 | googletest 7 | URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip 8 | ) 9 | # For Windows: Prevent overriding the parent project's compiler/linker settings 10 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 11 | FetchContent_MakeAvailable(googletest) 12 | 13 | enable_testing() 14 | 15 | add_executable(peglib-test-main test1.cc test2.cc test3.cc) 16 | 17 | target_include_directories(peglib-test-main PRIVATE ..) 18 | 19 | include(GoogleTest) 20 | gtest_discover_tests(peglib-test-main) 21 | target_link_libraries(peglib-test-main PRIVATE gtest_main) 22 | -------------------------------------------------------------------------------- /test/test1.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace peg; 5 | 6 | #if !defined(PEGLIB_NO_UNICODE_CHARS) 7 | TEST(GeneralTest, Simple_syntax_test_with_unicode) { 8 | parser parser(u8" ROOT ← _ " 9 | " _ <- ' ' "); 10 | 11 | bool ret = parser; 12 | EXPECT_TRUE(ret); 13 | } 14 | #endif 15 | 16 | TEST(GeneralTest, Simple_syntax_test) { 17 | parser parser(R"( 18 | ROOT <- _ 19 | _ <- ' ' 20 | )"); 21 | 22 | bool ret = parser; 23 | EXPECT_TRUE(ret); 24 | } 25 | 26 | TEST(GeneralTest, Empty_syntax_test) { 27 | parser parser(""); 28 | bool ret = parser; 29 | EXPECT_FALSE(ret); 30 | } 31 | 32 | TEST(GeneralTest, Start_rule_with_ignore_operator_test) { 33 | parser parser(R"( 34 | ~ROOT <- _ 35 | _ <- ' ' 36 | )"); 37 | 38 | bool ret = parser; 39 | EXPECT_FALSE(ret); 40 | } 41 | 42 | TEST(GeneralTest, Invalid_UTF8_text_test) { 43 | std::string s = "a <- '"; 44 | s += static_cast(0xe8); // Make invalid utf8 text... 45 | 46 | parser parser(s.data()); 47 | 48 | bool ret = parser; 49 | EXPECT_FALSE(ret); 50 | } 51 | 52 | TEST(GeneralTest, Backslash_escape_sequence_test) { 53 | parser parser(R"( 54 | ROOT <- _ 55 | _ <- '\\' 56 | )"); 57 | 58 | bool ret = parser; 59 | EXPECT_TRUE(ret); 60 | } 61 | 62 | TEST(GeneralTest, Invalid_escape_sequence_test) { 63 | parser parser(R"( 64 | ROOT <- _ 65 | _ <- '\' 66 | )"); 67 | 68 | bool ret = parser; 69 | EXPECT_FALSE(ret); 70 | } 71 | 72 | TEST(GeneralTest, Action_taking_non_const_Semantic_Values_parameter) { 73 | parser parser(R"( 74 | ROOT <- TEXT 75 | TEXT <- [a-zA-Z]+ 76 | )"); 77 | 78 | parser["ROOT"] = [&](SemanticValues &vs) { 79 | auto s = std::string(std::any_cast(vs[0])); 80 | s[0] = 'H'; // mutate 81 | return s; // move 82 | }; 83 | 84 | parser["TEXT"] = [&](SemanticValues &vs) { return vs.token(); }; 85 | 86 | std::string val; 87 | auto ret = parser.parse("hello", val); 88 | EXPECT_TRUE(ret); 89 | EXPECT_EQ("Hello", val); 90 | } 91 | 92 | TEST(GeneralTest, String_capture_test) { 93 | parser parser(R"( 94 | ROOT <- _ ('[' TAG_NAME ']' _)* 95 | TAG_NAME <- (!']' .)+ 96 | _ <- [ \t]* 97 | )"); 98 | 99 | std::vector tags; 100 | 101 | parser["TAG_NAME"] = [&](const SemanticValues &vs) { 102 | tags.push_back(vs.sv()); 103 | }; 104 | 105 | auto ret = parser.parse(" [tag1] [tag:2] [tag-3] "); 106 | 107 | EXPECT_TRUE(ret); 108 | EXPECT_EQ(3, tags.size()); 109 | EXPECT_EQ("tag1", tags[0]); 110 | EXPECT_EQ("tag:2", tags[1]); 111 | EXPECT_EQ("tag-3", tags[2]); 112 | } 113 | 114 | using namespace peg; 115 | 116 | TEST(GeneralTest, String_capture_test2) { 117 | std::vector tags; 118 | 119 | Definition ROOT, TAG, TAG_NAME, WS; 120 | ROOT <= seq(WS, zom(TAG)); 121 | TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); 122 | TAG_NAME <= oom(seq(npd(chr(']')), dot())), 123 | [&](const SemanticValues &vs) { tags.push_back(vs.sv()); }; 124 | WS <= zom(cls(" \t")); 125 | 126 | auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); 127 | 128 | EXPECT_TRUE(r.ret); 129 | EXPECT_EQ(3, tags.size()); 130 | EXPECT_EQ("tag1", tags[0]); 131 | EXPECT_EQ("tag:2", tags[1]); 132 | EXPECT_EQ("tag-3", tags[2]); 133 | } 134 | 135 | TEST(GeneralTest, String_capture_test3) { 136 | parser pg(R"( 137 | ROOT <- _ TOKEN* 138 | TOKEN <- '[' < (!']' .)+ > ']' _ 139 | _ <- [ \t\r\n]* 140 | )"); 141 | 142 | std::vector tags; 143 | 144 | pg["TOKEN"] = [&](const SemanticValues &vs) { tags.push_back(vs.token()); }; 145 | 146 | auto ret = pg.parse(" [tag1] [tag:2] [tag-3] "); 147 | 148 | EXPECT_TRUE(ret); 149 | EXPECT_EQ(3, tags.size()); 150 | EXPECT_EQ("tag1", tags[0]); 151 | EXPECT_EQ("tag:2", tags[1]); 152 | EXPECT_EQ("tag-3", tags[2]); 153 | } 154 | 155 | TEST(GeneralTest, Cyclic_grammar_test) { 156 | Definition PARENT; 157 | Definition CHILD; 158 | 159 | PARENT <= seq(CHILD); 160 | CHILD <= seq(PARENT); 161 | } 162 | 163 | TEST(GeneralTest, Visit_test) { 164 | Definition ROOT, TAG, TAG_NAME, WS; 165 | 166 | ROOT <= seq(WS, zom(TAG)); 167 | TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); 168 | TAG_NAME <= oom(seq(npd(chr(']')), dot())); 169 | WS <= zom(cls(" \t")); 170 | 171 | AssignIDToDefinition defIds; 172 | ROOT.accept(defIds); 173 | 174 | EXPECT_EQ(4, defIds.ids.size()); 175 | } 176 | 177 | TEST(GeneralTest, Token_check_test) { 178 | parser parser(R"( 179 | EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* 180 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 181 | FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ 182 | TERM_OPERATOR <- < [-+] > _ 183 | FACTOR_OPERATOR <- < [/*] > _ 184 | NUMBER <- < [0-9]+ > _ 185 | _ <- [ \t\r\n]* 186 | )"); 187 | 188 | EXPECT_FALSE(parser["EXPRESSION"].is_token()); 189 | EXPECT_FALSE(parser["FACTOR"].is_token()); 190 | EXPECT_TRUE(parser["FACTOR_OPERATOR"].is_token()); 191 | EXPECT_TRUE(parser["NUMBER"].is_token()); 192 | EXPECT_TRUE(parser["_"].is_token()); 193 | } 194 | 195 | TEST(GeneralTest, Lambda_action_test) { 196 | parser parser(R"( 197 | START <- (CHAR)* 198 | CHAR <- . 199 | )"); 200 | 201 | std::string ss; 202 | parser["CHAR"] = [&](const SemanticValues &vs) { ss += *vs.sv().data(); }; 203 | 204 | bool ret = parser.parse("hello"); 205 | EXPECT_TRUE(ret); 206 | EXPECT_EQ("hello", ss); 207 | } 208 | 209 | TEST(GeneralTest, enter_leave_handlers_test) { 210 | parser parser(R"( 211 | START <- LTOKEN '=' RTOKEN 212 | LTOKEN <- TOKEN 213 | RTOKEN <- TOKEN 214 | TOKEN <- [A-Za-z]+ 215 | )"); 216 | 217 | parser["LTOKEN"].enter = [&](const Context & /*c*/, const char *, size_t, 218 | std::any &dt) { 219 | auto &require_upper_case = *std::any_cast(dt); 220 | require_upper_case = false; 221 | }; 222 | parser["LTOKEN"].leave = [&](const Context & /*c*/, const char *, size_t, 223 | size_t, std::any &, std::any &dt) { 224 | auto &require_upper_case = *std::any_cast(dt); 225 | require_upper_case = true; 226 | }; 227 | 228 | auto message = "should be upper case string..."; 229 | 230 | parser["TOKEN"].predicate = [&](const SemanticValues &vs, const std::any &dt, 231 | std::string &msg) { 232 | auto &require_upper_case = *std::any_cast(dt); 233 | if (require_upper_case) { 234 | const auto &s = vs.sv(); 235 | if (!std::all_of(s.begin(), s.end(), ::isupper)) { 236 | msg = message; 237 | return false; 238 | } 239 | } 240 | return true; 241 | }; 242 | 243 | bool require_upper_case = false; 244 | std::any dt = &require_upper_case; 245 | EXPECT_FALSE(parser.parse("hello=world", dt)); 246 | EXPECT_FALSE(parser.parse("HELLO=world", dt)); 247 | EXPECT_TRUE(parser.parse("hello=WORLD", dt)); 248 | EXPECT_TRUE(parser.parse("HELLO=WORLD", dt)); 249 | 250 | parser.set_logger([&](size_t ln, size_t col, const std::string &msg) { 251 | EXPECT_EQ(1, ln); 252 | EXPECT_EQ(7, col); 253 | EXPECT_EQ(message, msg); 254 | }); 255 | parser.parse("hello=world", dt); 256 | } 257 | 258 | TEST(GeneralTest, WHITESPACE_test) { 259 | parser parser(R"( 260 | # Rules 261 | ROOT <- ITEM (',' ITEM)* 262 | ITEM <- WORD / PHRASE 263 | 264 | # Tokens 265 | WORD <- < [a-zA-Z0-9_]+ > 266 | PHRASE <- < '"' (!'"' .)* '"' > 267 | 268 | %whitespace <- [ \t\r\n]* 269 | )"); 270 | 271 | auto ret = parser.parse(R"( one, "two, three", four )"); 272 | 273 | EXPECT_TRUE(ret); 274 | } 275 | 276 | TEST(GeneralTest, WHITESPACE_test2) { 277 | parser parser(R"( 278 | # Rules 279 | ROOT <- ITEM (',' ITEM)* 280 | ITEM <- '[' < [a-zA-Z0-9_]+ > ']' 281 | 282 | %whitespace <- (SPACE / TAB)* 283 | SPACE <- ' ' 284 | TAB <- '\t' 285 | )"); 286 | 287 | std::vector items; 288 | parser["ITEM"] = [&](const SemanticValues &vs) { 289 | items.push_back(vs.token()); 290 | }; 291 | 292 | auto ret = parser.parse(R"([one], [two] ,[three] )"); 293 | 294 | EXPECT_TRUE(ret); 295 | EXPECT_EQ(3, items.size()); 296 | EXPECT_EQ("one", items[0]); 297 | EXPECT_EQ("two", items[1]); 298 | EXPECT_EQ("three", items[2]); 299 | } 300 | 301 | TEST(GeneralTest, WHITESPACE_test3) { 302 | parser parser(R"( 303 | StrQuot <- < '"' < (StrEscape / StrChars)* > '"' > 304 | StrEscape <- '\\' any 305 | StrChars <- (!'"' !'\\' any)+ 306 | any <- . 307 | %whitespace <- [ \t]* 308 | )"); 309 | 310 | parser["StrQuot"] = [](const SemanticValues &vs) { 311 | EXPECT_EQ(R"( aaa \" bbb )", vs.token()); 312 | }; 313 | 314 | auto ret = parser.parse(R"( " aaa \" bbb " )"); 315 | EXPECT_TRUE(ret); 316 | } 317 | 318 | TEST(GeneralTest, WHITESPACE_test4) { 319 | parser parser(R"( 320 | ROOT <- HELLO OPE WORLD 321 | HELLO <- 'hello' 322 | OPE <- < [-+] > 323 | WORLD <- 'world' / 'WORLD' 324 | %whitespace <- [ \t\r\n]* 325 | )"); 326 | 327 | parser["HELLO"] = [](const SemanticValues &vs) { 328 | EXPECT_EQ("hello", vs.token()); 329 | }; 330 | 331 | parser["OPE"] = [](const SemanticValues &vs) { EXPECT_EQ("+", vs.token()); }; 332 | 333 | parser["WORLD"] = [](const SemanticValues &vs) { 334 | EXPECT_EQ("world", vs.token()); 335 | }; 336 | 337 | auto ret = parser.parse(" hello + world "); 338 | EXPECT_TRUE(ret); 339 | } 340 | 341 | TEST(GeneralTest, Word_expression_test) { 342 | parser parser(R"( 343 | ROOT <- 'hello' ','? 'world' 344 | %whitespace <- [ \t\r\n]* 345 | %word <- [a-z]+ 346 | )"); 347 | 348 | EXPECT_FALSE(parser.parse("helloworld")); 349 | EXPECT_TRUE(parser.parse("hello world")); 350 | EXPECT_TRUE(parser.parse("hello,world")); 351 | EXPECT_TRUE(parser.parse("hello, world")); 352 | EXPECT_TRUE(parser.parse("hello , world")); 353 | } 354 | 355 | TEST(GeneralTest, Word_expression_test_PrioritizedChoice) { 356 | parser parser(R"( 357 | Identifier ← < !Keyword [a-z][a-z]* > 358 | Keyword ← 'def' / 'to' 359 | %whitespace ← [ \t\r\n]* 360 | %word ← [a-z]+ 361 | )"); 362 | 363 | EXPECT_TRUE(parser.parse("toa")); 364 | } 365 | 366 | TEST(GeneralTest, Word_expression_test_Dictionary) { 367 | parser parser(R"( 368 | Identifier ← < !Keyword [a-z][a-z]* > 369 | Keyword ← 'def' | 'to' 370 | %whitespace ← [ \t\r\n]* 371 | %word ← [a-z]+ 372 | )"); 373 | 374 | EXPECT_TRUE(parser.parse("toa")); 375 | } 376 | 377 | TEST(GeneralTest, Word_expression_case_ignore_test_Dictionary) { 378 | parser parser(R"( 379 | Identifier ← < !Keyword [a-z][a-z]* > 380 | Keyword ← 'def'i | 'to'i 381 | %whitespace ← [ \t\r\n]* 382 | %word ← [a-z]+ 383 | )"); 384 | 385 | EXPECT_TRUE(parser.parse("toa")); 386 | } 387 | 388 | TEST(GeneralTest, Word_expression_syntax_error_test_Dictionary) { 389 | parser parser(R"( 390 | Identifier ← < !Keyword [a-z][a-z]* > 391 | Keyword ← 'def' | 'to'i 392 | %whitespace ← [ \t\r\n]* 393 | %word ← [a-z]+ 394 | )"); 395 | 396 | EXPECT_FALSE(parser); 397 | } 398 | 399 | TEST(GeneralTest, Skip_token_test) { 400 | parser parser(" ROOT <- _ ITEM (',' _ ITEM _)* " 401 | " ITEM <- ([a-z0-9])+ " 402 | " ~_ <- [ \t]* "); 403 | 404 | parser["ROOT"] = [&](const SemanticValues &vs) { EXPECT_EQ(2, vs.size()); }; 405 | 406 | auto ret = parser.parse(" item1, item2 "); 407 | 408 | EXPECT_TRUE(ret); 409 | } 410 | 411 | TEST(GeneralTest, Skip_token_test2) { 412 | parser parser(R"( 413 | ROOT <- ITEM (',' ITEM)* 414 | ITEM <- < ([a-z0-9])+ > 415 | %whitespace <- [ \t]* 416 | )"); 417 | 418 | parser["ROOT"] = [&](const SemanticValues &vs) { EXPECT_EQ(2, vs.size()); }; 419 | 420 | auto ret = parser.parse(" item1, item2 "); 421 | 422 | EXPECT_TRUE(ret); 423 | } 424 | 425 | TEST(GeneralTest, Custom_AST_test) { 426 | struct CustomType { 427 | bool dummy = false; 428 | }; 429 | using CustomAst = AstBase; 430 | 431 | parser parser(R"( 432 | ROOT <- _ TEXT* 433 | TEXT <- [a-zA-Z]+ _ 434 | _ <- [ \t\r\n]* 435 | )"); 436 | 437 | parser.enable_ast(); 438 | std::shared_ptr ast; 439 | bool ret = parser.parse("a b c", ast); 440 | EXPECT_TRUE(ret); 441 | EXPECT_EQ(4, ast->nodes.size()); 442 | } 443 | 444 | TEST(GeneralTest, Backtracking_test) { 445 | parser parser(R"( 446 | START <- PAT1 / PAT2 447 | PAT1 <- HELLO ' One' 448 | PAT2 <- HELLO ' Two' 449 | HELLO <- 'Hello' 450 | )"); 451 | 452 | size_t count = 0; 453 | parser["HELLO"] = [&](const SemanticValues & /*vs*/) { count++; }; 454 | 455 | parser.enable_packrat_parsing(); 456 | 457 | bool ret = parser.parse("Hello Two"); 458 | EXPECT_TRUE(ret); 459 | EXPECT_EQ(1, count); // Skip second time 460 | } 461 | 462 | TEST(GeneralTest, Backtracking_with_AST) { 463 | parser parser(R"( 464 | S <- A? B (A B)* A 465 | A <- 'a' 466 | B <- 'b' 467 | )"); 468 | 469 | parser.enable_ast(); 470 | std::shared_ptr ast; 471 | bool ret = parser.parse("ba", ast); 472 | EXPECT_TRUE(ret); 473 | EXPECT_EQ(2, ast->nodes.size()); 474 | } 475 | 476 | TEST(GeneralTest, Octal_Hex_Unicode_value_test) { 477 | parser parser(R"( ROOT <- '\132\x7a\u30f3' )"); 478 | 479 | auto ret = parser.parse("Zzン"); 480 | 481 | EXPECT_TRUE(ret); 482 | } 483 | 484 | TEST(GeneralTest, Ignore_case_literal_test) { 485 | parser parser(R"( 486 | ROOT <- HELLO WORLD 487 | HELLO <- 'hello'i 488 | WORLD <- 'world'i 489 | %whitespace <- [ \t\r\n]* 490 | )"); 491 | 492 | parser["HELLO"] = [](const SemanticValues &vs) { 493 | EXPECT_EQ("Hello", vs.token()); 494 | }; 495 | 496 | parser["WORLD"] = [](const SemanticValues &vs) { 497 | EXPECT_EQ("World", vs.token()); 498 | }; 499 | 500 | auto ret = parser.parse(" Hello World "); 501 | EXPECT_TRUE(ret); 502 | } 503 | 504 | TEST(GeneralTest, Ignore_case_character_class_test) { 505 | parser parser(R"(ROOT <- [a-z]i+)"); 506 | 507 | EXPECT_TRUE(parser.parse("abc")); 508 | EXPECT_TRUE(parser.parse("ABC")); 509 | EXPECT_TRUE(parser.parse("Abc")); 510 | EXPECT_TRUE(parser.parse("Abc")); 511 | EXPECT_FALSE(parser.parse("123")); 512 | } 513 | 514 | TEST(GeneralTest, Ignore_case_negate_character_class_test) { 515 | parser parser(R"(ROOT <- [^a-z]i+)"); 516 | 517 | EXPECT_TRUE(parser.parse("123")); 518 | EXPECT_FALSE(parser.parse("ABC")); 519 | } 520 | 521 | TEST(GeneralTest, mutable_lambda_test) { 522 | std::vector vec; 523 | 524 | parser pg("ROOT <- 'mutable lambda test'"); 525 | 526 | // This test makes sure if the following code can be compiled. 527 | pg["TOKEN"] = [=](const SemanticValues &vs) mutable { 528 | vec.push_back(vs.sv()); 529 | }; 530 | } 531 | 532 | TEST(GeneralTest, Simple_calculator_test) { 533 | parser parser(R"( 534 | Additive <- Multiplicative '+' Additive / Multiplicative 535 | Multiplicative <- Primary '*' Multiplicative / Primary 536 | Primary <- '(' Additive ')' / Number 537 | Number <- [0-9]+ 538 | )"); 539 | 540 | parser["Additive"] = [](const SemanticValues &vs) { 541 | switch (vs.choice()) { 542 | case 0: return std::any_cast(vs[0]) + std::any_cast(vs[1]); 543 | default: return std::any_cast(vs[0]); 544 | } 545 | }; 546 | 547 | parser["Multiplicative"] = [](const SemanticValues &vs) { 548 | switch (vs.choice()) { 549 | case 0: return std::any_cast(vs[0]) * std::any_cast(vs[1]); 550 | default: return std::any_cast(vs[0]); 551 | } 552 | }; 553 | 554 | parser["Number"] = [](const SemanticValues &vs) { 555 | return vs.token_to_number(); 556 | }; 557 | 558 | int val; 559 | parser.parse("(1+2)*3", val); 560 | 561 | EXPECT_EQ(9, val); 562 | } 563 | 564 | TEST(GeneralTest, Simple_calculator_with_recovery_test) { 565 | parser parser(R"( 566 | Additive <- Multiplicative '+' Additive / Multiplicative 567 | Multiplicative <- Primary '*' Multiplicative^cond / Primary 568 | Primary <- '(' Additive ')' / Number 569 | Number <- < [0-9]+ > 570 | %whitespace <- [ \t]* 571 | cond <- '' { error_message "missing multiplicative" } 572 | )"); 573 | 574 | parser["Additive"] = [](const SemanticValues &vs) { 575 | switch (vs.choice()) { 576 | case 0: return std::any_cast(vs[0]) + std::any_cast(vs[1]); 577 | default: return std::any_cast(vs[0]); 578 | } 579 | }; 580 | 581 | parser["Multiplicative"] = [](const SemanticValues &vs) { 582 | switch (vs.choice()) { 583 | case 0: return std::any_cast(vs[0]) * std::any_cast(vs[1]); 584 | default: return std::any_cast(vs[0]); 585 | } 586 | }; 587 | 588 | parser["Number"] = [](const SemanticValues &vs) { 589 | return vs.token_to_number(); 590 | }; 591 | 592 | int val = 0; 593 | auto ret = parser.parse(" (1 + 2) * ", val); 594 | 595 | EXPECT_FALSE(ret); 596 | EXPECT_EQ(0, val); 597 | } 598 | 599 | TEST(GeneralTest, Calculator_test) { 600 | // Construct grammar 601 | Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; 602 | 603 | EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))); 604 | TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); 605 | FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); 606 | TERM_OPERATOR <= cls("+-"); 607 | FACTOR_OPERATOR <= cls("*/"); 608 | NUMBER <= oom(cls("0-9")); 609 | 610 | // Setup actions 611 | auto reduce = [](const SemanticValues &vs) -> long { 612 | long ret = std::any_cast(vs[0]); 613 | for (auto i = 1u; i < vs.size(); i += 2) { 614 | auto num = std::any_cast(vs[i + 1]); 615 | switch (std::any_cast(vs[i])) { 616 | case '+': ret += num; break; 617 | case '-': ret -= num; break; 618 | case '*': ret *= num; break; 619 | case '/': ret /= num; break; 620 | } 621 | } 622 | return ret; 623 | }; 624 | 625 | EXPRESSION = reduce; 626 | TERM = reduce; 627 | TERM_OPERATOR = [](const SemanticValues &vs) { return *vs.sv().data(); }; 628 | FACTOR_OPERATOR = [](const SemanticValues &vs) { return *vs.sv().data(); }; 629 | NUMBER = [](const SemanticValues &vs) { return vs.token_to_number(); }; 630 | 631 | // Parse 632 | long val; 633 | auto r = EXPRESSION.parse_and_get_value("1+2*3*(4-5+6)/7-8", val); 634 | 635 | EXPECT_TRUE(r.ret); 636 | EXPECT_EQ(-3, val); 637 | } 638 | 639 | TEST(GeneralTest, Calculator_test2) { 640 | // Parse syntax 641 | auto syntax = R"( 642 | # Grammar for Calculator... 643 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 644 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 645 | FACTOR <- NUMBER / '(' EXPRESSION ')' 646 | TERM_OPERATOR <- [-+] 647 | FACTOR_OPERATOR <- [/*] 648 | NUMBER <- [0-9]+ 649 | )"; 650 | 651 | auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {}); 652 | auto &g = *cxt.grammar; 653 | 654 | // Setup actions 655 | auto reduce = [](const SemanticValues &vs) -> long { 656 | long ret = std::any_cast(vs[0]); 657 | for (auto i = 1u; i < vs.size(); i += 2) { 658 | auto num = std::any_cast(vs[i + 1]); 659 | switch (std::any_cast(vs[i])) { 660 | case '+': ret += num; break; 661 | case '-': ret -= num; break; 662 | case '*': ret *= num; break; 663 | case '/': ret /= num; break; 664 | } 665 | } 666 | return ret; 667 | }; 668 | 669 | g["EXPRESSION"] = reduce; 670 | g["TERM"] = reduce; 671 | g["TERM_OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 672 | g["FACTOR_OPERATOR"] = [](const SemanticValues &vs) { 673 | return *vs.sv().data(); 674 | }; 675 | g["NUMBER"] = [](const SemanticValues &vs) { 676 | return vs.token_to_number(); 677 | }; 678 | 679 | // Parse 680 | long val; 681 | auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val); 682 | 683 | EXPECT_TRUE(r.ret); 684 | EXPECT_EQ(-3, val); 685 | } 686 | 687 | TEST(GeneralTest, Calculator_test3) { 688 | // Parse syntax 689 | parser parser(R"( 690 | # Grammar for Calculator... 691 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 692 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 693 | FACTOR <- NUMBER / '(' EXPRESSION ')' 694 | TERM_OPERATOR <- [-+] 695 | FACTOR_OPERATOR <- [/*] 696 | NUMBER <- [0-9]+ 697 | )"); 698 | 699 | auto reduce = [](const SemanticValues &vs) -> long { 700 | long ret = std::any_cast(vs[0]); 701 | for (auto i = 1u; i < vs.size(); i += 2) { 702 | auto num = std::any_cast(vs[i + 1]); 703 | switch (std::any_cast(vs[i])) { 704 | case '+': ret += num; break; 705 | case '-': ret -= num; break; 706 | case '*': ret *= num; break; 707 | case '/': ret /= num; break; 708 | } 709 | } 710 | return ret; 711 | }; 712 | 713 | // Setup actions 714 | parser["EXPRESSION"] = reduce; 715 | parser["TERM"] = reduce; 716 | parser["TERM_OPERATOR"] = [](const SemanticValues &vs) { 717 | return static_cast(*vs.sv().data()); 718 | }; 719 | parser["FACTOR_OPERATOR"] = [](const SemanticValues &vs) { 720 | return static_cast(*vs.sv().data()); 721 | }; 722 | parser["NUMBER"] = [](const SemanticValues &vs) { 723 | return vs.token_to_number(); 724 | }; 725 | 726 | // Parse 727 | long val; 728 | auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val); 729 | 730 | EXPECT_TRUE(ret); 731 | EXPECT_EQ(-3, val); 732 | } 733 | 734 | TEST(GeneralTest, Calculator_test_with_AST) { 735 | parser parser(R"( 736 | EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* 737 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 738 | FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ 739 | TERM_OPERATOR <- < [-+] > _ 740 | FACTOR_OPERATOR <- < [/*] > _ 741 | NUMBER <- < [0-9]+ > _ 742 | ~_ <- [ \t\r\n]* 743 | )"); 744 | 745 | parser.enable_ast(); 746 | 747 | std::function eval = [&](const Ast &ast) { 748 | if (ast.name == "NUMBER") { 749 | return ast.token_to_number(); 750 | } else { 751 | const auto &nodes = ast.nodes; 752 | auto result = eval(*nodes[0]); 753 | for (auto i = 1u; i < nodes.size(); i += 2) { 754 | auto num = eval(*nodes[i + 1]); 755 | auto ope = nodes[i]->token[0]; 756 | switch (ope) { 757 | case '+': result += num; break; 758 | case '-': result -= num; break; 759 | case '*': result *= num; break; 760 | case '/': result /= num; break; 761 | } 762 | } 763 | return result; 764 | } 765 | }; 766 | 767 | std::shared_ptr ast; 768 | auto ret = parser.parse("1+2*3*(4-5+6)/7-8", ast); 769 | ast = parser.optimize_ast(ast); 770 | auto val = eval(*ast); 771 | 772 | EXPECT_TRUE(ret); 773 | EXPECT_EQ(-3, val); 774 | } 775 | 776 | TEST(GeneralTest, Calculator_test_with_combinators_and_AST) { 777 | // Construct grammar 778 | AST_DEFINITIONS(EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, 779 | NUMBER); 780 | 781 | EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))); 782 | TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); 783 | FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); 784 | TERM_OPERATOR <= cls("+-"); 785 | FACTOR_OPERATOR <= cls("*/"); 786 | NUMBER <= oom(cls("0-9")); 787 | 788 | std::function eval = [&](const Ast &ast) { 789 | if (ast.name == "NUMBER") { 790 | return ast.token_to_number(); 791 | } else { 792 | const auto &nodes = ast.nodes; 793 | auto result = eval(*nodes[0]); 794 | for (auto i = 1u; i < nodes.size(); i += 2) { 795 | auto num = eval(*nodes[i + 1]); 796 | auto ope = nodes[i]->token[0]; 797 | switch (ope) { 798 | case '+': result += num; break; 799 | case '-': result -= num; break; 800 | case '*': result *= num; break; 801 | case '/': result /= num; break; 802 | } 803 | } 804 | return result; 805 | } 806 | }; 807 | 808 | std::shared_ptr ast; 809 | auto r = EXPRESSION.parse_and_get_value("1+2*3*(4-5+6)/7-8", ast); 810 | ast = AstOptimizer(true).optimize(ast); 811 | auto val = eval(*ast); 812 | 813 | EXPECT_TRUE(r.ret); 814 | EXPECT_EQ(-3, val); 815 | } 816 | 817 | TEST(GeneralTest, Ignore_semantic_value_test) { 818 | parser parser(R"( 819 | START <- ~HELLO WORLD 820 | HELLO <- 'Hello' _ 821 | WORLD <- 'World' _ 822 | _ <- [ \t\r\n]* 823 | )"); 824 | 825 | parser.enable_ast(); 826 | 827 | std::shared_ptr ast; 828 | auto ret = parser.parse("Hello World", ast); 829 | 830 | EXPECT_TRUE(ret); 831 | EXPECT_EQ(1, ast->nodes.size()); 832 | EXPECT_EQ("WORLD", ast->nodes[0]->name); 833 | } 834 | 835 | TEST(GeneralTest, Ignore_semantic_value_of_or_predicate_test) { 836 | parser parser(R"( 837 | START <- _ !DUMMY HELLO_WORLD '.' 838 | HELLO_WORLD <- HELLO 'World' _ 839 | HELLO <- 'Hello' _ 840 | DUMMY <- 'dummy' _ 841 | ~_ <- [ \t\r\n]* 842 | )"); 843 | 844 | parser.enable_ast(); 845 | 846 | std::shared_ptr ast; 847 | auto ret = parser.parse("Hello World.", ast); 848 | 849 | EXPECT_TRUE(ret); 850 | EXPECT_EQ(1, ast->nodes.size()); 851 | EXPECT_EQ("HELLO_WORLD", ast->nodes[0]->name); 852 | } 853 | 854 | TEST(GeneralTest, Ignore_semantic_value_of_and_predicate_test) { 855 | parser parser(R"( 856 | START <- _ &HELLO HELLO_WORLD '.' 857 | HELLO_WORLD <- HELLO 'World' _ 858 | HELLO <- 'Hello' _ 859 | ~_ <- [ \t\r\n]* 860 | )"); 861 | 862 | parser.enable_ast(); 863 | 864 | std::shared_ptr ast; 865 | auto ret = parser.parse("Hello World.", ast); 866 | 867 | EXPECT_TRUE(ret); 868 | EXPECT_EQ(1, ast->nodes.size()); 869 | EXPECT_EQ("HELLO_WORLD", ast->nodes[0]->name); 870 | } 871 | 872 | TEST(GeneralTest, Literal_token_on_AST_test1) { 873 | parser parser(R"( 874 | STRING_LITERAL <- '"' (('\\"' / '\\t' / '\\n') / (!["] .))* '"' 875 | )"); 876 | parser.enable_ast(); 877 | 878 | std::shared_ptr ast; 879 | auto ret = parser.parse(R"("a\tb")", ast); 880 | 881 | EXPECT_TRUE(ret); 882 | EXPECT_TRUE(ast->is_token); 883 | EXPECT_EQ(R"("a\tb")", ast->token); 884 | EXPECT_TRUE(ast->nodes.empty()); 885 | } 886 | 887 | TEST(GeneralTest, Literal_token_on_AST_test2) { 888 | parser parser(R"( 889 | STRING_LITERAL <- '"' (ESC / CHAR)* '"' 890 | ESC <- ('\\"' / '\\t' / '\\n') 891 | CHAR <- (!["] .) 892 | )"); 893 | parser.enable_ast(); 894 | 895 | std::shared_ptr ast; 896 | auto ret = parser.parse(R"("a\tb")", ast); 897 | 898 | EXPECT_TRUE(ret); 899 | EXPECT_FALSE(ast->is_token); 900 | EXPECT_TRUE(ast->token.empty()); 901 | EXPECT_EQ(3, ast->nodes.size()); 902 | } 903 | 904 | TEST(GeneralTest, Literal_token_on_AST_test3) { 905 | parser parser(R"( 906 | STRING_LITERAL <- < '"' (ESC / CHAR)* '"' > 907 | ESC <- ('\\"' / '\\t' / '\\n') 908 | CHAR <- (!["] .) 909 | )"); 910 | parser.enable_ast(); 911 | 912 | std::shared_ptr ast; 913 | auto ret = parser.parse(R"("a\tb")", ast); 914 | 915 | EXPECT_TRUE(ret); 916 | EXPECT_TRUE(ast->is_token); 917 | EXPECT_EQ(R"("a\tb")", ast->token); 918 | EXPECT_TRUE(ast->nodes.empty()); 919 | } 920 | 921 | TEST(GeneralTest, Literal_token_on_AST_test4) { 922 | parser parser(R"( 923 | STRING_LITERAL <- < '"' < (ESC / CHAR)* > '"' > 924 | ESC <- ('\\"' / '\\t' / '\\n') 925 | CHAR <- (!["] .) 926 | )"); 927 | parser.enable_ast(); 928 | 929 | std::shared_ptr ast; 930 | auto ret = parser.parse(R"("a\tb")", ast); 931 | 932 | EXPECT_TRUE(ret); 933 | EXPECT_TRUE(ast->is_token); 934 | EXPECT_EQ(R"(a\tb)", ast->token); 935 | EXPECT_TRUE(ast->nodes.empty()); 936 | } 937 | 938 | TEST(GeneralTest, Missing_missing_definitions_test) { 939 | parser parser(R"( 940 | A <- B C 941 | )"); 942 | 943 | EXPECT_FALSE(parser); 944 | } 945 | 946 | TEST(GeneralTest, Definition_duplicates_test) { 947 | parser parser(R"( 948 | A <- '' 949 | A <- '' 950 | )"); 951 | 952 | EXPECT_FALSE(parser); 953 | } 954 | 955 | TEST(GeneralTest, Semantic_values_test) { 956 | parser parser(R"( 957 | term <- ( a b c x )? a b c 958 | a <- 'a' 959 | b <- 'b' 960 | c <- 'c' 961 | x <- 'x' 962 | )"); 963 | 964 | for (const auto &item : parser.get_grammar()) { 965 | const auto &rule = item.first; 966 | parser[rule.data()] = [rule](const SemanticValues &vs, std::any &) { 967 | if (rule == "term") { 968 | EXPECT_EQ("a at 0", std::any_cast(vs[0])); 969 | EXPECT_EQ("b at 1", std::any_cast(vs[1])); 970 | EXPECT_EQ("c at 2", std::any_cast(vs[2])); 971 | return std::string(); 972 | } else { 973 | return rule + " at " + std::to_string(vs.sv().data() - vs.ss); 974 | } 975 | }; 976 | } 977 | 978 | EXPECT_TRUE(parser.parse("abc")); 979 | } 980 | 981 | TEST(GeneralTest, Ordered_choice_count) { 982 | parser parser(R"( 983 | S <- 'a' / 'b' 984 | )"); 985 | 986 | parser["S"] = [](const SemanticValues &vs) { 987 | EXPECT_EQ(1, vs.choice()); 988 | EXPECT_EQ(2, vs.choice_count()); 989 | }; 990 | 991 | parser.parse("b"); 992 | } 993 | 994 | TEST(GeneralTest, Ordered_choice_count_2) { 995 | parser parser(R"( 996 | S <- ('a' / 'b')* 997 | )"); 998 | 999 | parser["S"] = [](const SemanticValues &vs) { 1000 | EXPECT_EQ(0, vs.choice()); 1001 | EXPECT_EQ(0, vs.choice_count()); 1002 | }; 1003 | 1004 | parser.parse("b"); 1005 | } 1006 | 1007 | TEST(GeneralTest, Semantic_value_tag) { 1008 | parser parser(R"( 1009 | S <- A? B* C? 1010 | A <- 'a' 1011 | B <- 'b' 1012 | C <- 'c' 1013 | )"); 1014 | 1015 | { 1016 | using namespace udl; 1017 | parser["S"] = [](const SemanticValues &vs) { 1018 | EXPECT_EQ(1, vs.size()); 1019 | EXPECT_EQ(1, vs.tags.size()); 1020 | EXPECT_EQ("C"_, vs.tags[0]); 1021 | }; 1022 | auto ret = parser.parse("c"); 1023 | EXPECT_TRUE(ret); 1024 | } 1025 | 1026 | { 1027 | using namespace udl; 1028 | parser["S"] = [](const SemanticValues &vs) { 1029 | EXPECT_EQ(2, vs.size()); 1030 | EXPECT_EQ(2, vs.tags.size()); 1031 | EXPECT_EQ("B"_, vs.tags[0]); 1032 | EXPECT_EQ("B"_, vs.tags[1]); 1033 | }; 1034 | auto ret = parser.parse("bb"); 1035 | EXPECT_TRUE(ret); 1036 | } 1037 | 1038 | { 1039 | using namespace udl; 1040 | parser["S"] = [](const SemanticValues &vs) { 1041 | EXPECT_EQ(2, vs.size()); 1042 | EXPECT_EQ(2, vs.tags.size()); 1043 | EXPECT_EQ("A"_, vs.tags[0]); 1044 | EXPECT_EQ("C"_, vs.tags[1]); 1045 | }; 1046 | auto ret = parser.parse("ac"); 1047 | EXPECT_TRUE(ret); 1048 | } 1049 | } 1050 | 1051 | TEST(GeneralTest, Negated_Class_test) { 1052 | parser parser(R"( 1053 | ROOT <- [^a-z_]+ 1054 | )"); 1055 | 1056 | bool ret = parser; 1057 | EXPECT_TRUE(ret); 1058 | 1059 | EXPECT_TRUE(parser.parse("ABC123")); 1060 | EXPECT_FALSE(parser.parse("ABcZ")); 1061 | EXPECT_FALSE(parser.parse("ABCZ_")); 1062 | EXPECT_FALSE(parser.parse("")); 1063 | } 1064 | 1065 | TEST(GeneralTest, token_to_number_float_test) { 1066 | parser parser(R"( 1067 | S <- '1.1' 1068 | )"); 1069 | parser.enable_ast(); 1070 | 1071 | std::shared_ptr ast; 1072 | auto ret = parser.parse("1.1", ast); 1073 | 1074 | EXPECT_TRUE(ret); 1075 | EXPECT_TRUE(ast->is_token); 1076 | EXPECT_EQ("1.1", ast->token); 1077 | EXPECT_EQ(1.1f, ast->token_to_number()); 1078 | EXPECT_TRUE(ast->nodes.empty()); 1079 | } 1080 | 1081 | TEST(GeneralTest, ParentReferencesShouldNotBeExpired) { 1082 | auto parser = peg::parser(R"( 1083 | ROOT <- OPTIMIZES_AWAY 1084 | OPTIMIZES_AWAY <- ITEM+ 1085 | ITEM <- 'a' 1086 | )"); 1087 | parser.enable_ast(); 1088 | 1089 | std::shared_ptr ast; 1090 | parser.parse("aaa", ast); 1091 | ast = parser.optimize_ast(ast); 1092 | 1093 | EXPECT_FALSE(ast->nodes[0]->parent.expired()); 1094 | } 1095 | 1096 | TEST(GeneralTest, EndOfInputTest) { 1097 | auto parser = peg::parser(R"( 1098 | S <- '[[' (!']]' .)* ']]' !. 1099 | )"); 1100 | 1101 | parser.disable_eoi_check(); 1102 | 1103 | auto ret = parser.parse("[[]]]"); 1104 | EXPECT_FALSE(ret); 1105 | } 1106 | 1107 | TEST(GeneralTest, DefaultEndOfInputTest) { 1108 | auto parser = peg::parser(R"( 1109 | S <- '[[' (!']]' .)* ']]' 1110 | )"); 1111 | 1112 | auto ret = parser.parse("[[]]]"); 1113 | EXPECT_FALSE(ret); 1114 | } 1115 | 1116 | TEST(GeneralTest, DisableEndOfInputCheckTest) { 1117 | auto parser = peg::parser(R"( 1118 | S <- '[[' (!']]' .)* ']]' 1119 | )"); 1120 | 1121 | parser.disable_eoi_check(); 1122 | 1123 | auto ret = parser.parse("[[]]]"); 1124 | EXPECT_TRUE(ret); 1125 | } 1126 | 1127 | TEST(GeneralTest, InvalidCutOperator) { 1128 | auto parser = peg::parser(R"( 1129 | S <- 'a' ↑ 'b' 1130 | )"); 1131 | 1132 | auto ret = parser.parse("ab"); 1133 | EXPECT_TRUE(ret); 1134 | 1135 | ret = parser.parse("ac"); 1136 | EXPECT_FALSE(ret); 1137 | 1138 | ret = parser.parse("b"); 1139 | EXPECT_FALSE(ret); 1140 | } 1141 | 1142 | TEST(GeneralTest, HeuristicErrorTokenTest) { 1143 | auto parser = peg::parser(R"( 1144 | program <- enum+ 1145 | enum <- 'enum' enum_kind^untyped_enum 1146 | enum_kind <- 'sequence' / 'bitmask' 1147 | 1148 | %whitespace <- [ \r\t\n]* 1149 | %word <- [a-zA-Z0-9_] 1150 | 1151 | untyped_enum <- '' { message "invalid/missing enum type, expected one of 'sequence' or 'bitmask', got '%t'"} 1152 | )"); 1153 | 1154 | parser.set_logger([&](size_t ln, size_t col, const std::string &msg) { 1155 | EXPECT_EQ(1, ln); 1156 | EXPECT_EQ(6, col); 1157 | EXPECT_EQ("invalid/missing enum type, expected one of 'sequence' or " 1158 | "'bitmask', got 'sequencer'", 1159 | msg); 1160 | }); 1161 | 1162 | auto ret = parser.parse("enum sequencer"); 1163 | EXPECT_FALSE(ret); 1164 | } 1165 | 1166 | TEST(GeneralTest, LiteralContentInAST) { 1167 | parser parser(R"( 1168 | PROGRAM <- STATEMENTS 1169 | 1170 | STATEMENTS <- (STATEMENT ';'?)* 1171 | STATEMENT <- ASSIGNMENT / RETURN / EXPRESSION_STATEMENT 1172 | 1173 | ASSIGNMENT <- 'let' IDENTIFIER '=' EXPRESSION 1174 | RETURN <- 'return' EXPRESSION 1175 | EXPRESSION_STATEMENT <- EXPRESSION 1176 | 1177 | EXPRESSION <- INFIX_EXPR(PREFIX_EXPR, INFIX_OPE) 1178 | INFIX_EXPR(ATOM, OPE) <- ATOM (OPE ATOM)* { 1179 | precedence 1180 | L == != 1181 | L < > 1182 | L + - 1183 | L * / 1184 | } 1185 | 1186 | IF <- 'if' '(' EXPRESSION ')' BLOCK ('else' BLOCK)? 1187 | 1188 | FUNCTION <- 'fn' '(' PARAMETERS ')' BLOCK 1189 | PARAMETERS <- LIST(IDENTIFIER, ',') 1190 | 1191 | BLOCK <- '{' STATEMENTS '}' 1192 | 1193 | CALL <- PRIMARY (ARGUMENTS / INDEX)* 1194 | ARGUMENTS <- '(' LIST(EXPRESSION, ',') ')' 1195 | INDEX <- '[' EXPRESSION ']' 1196 | 1197 | PREFIX_EXPR <- PREFIX_OPE* CALL 1198 | PRIMARY <- IF / FUNCTION / ARRAY / HASH / INTEGER / BOOLEAN / NULL / IDENTIFIER / STRING / '(' EXPRESSION ')' 1199 | 1200 | ARRAY <- '[' LIST(EXPRESSION, ',') ']' 1201 | 1202 | HASH <- '{' LIST(HASH_PAIR, ',') '}' 1203 | HASH_PAIR <- EXPRESSION ':' EXPRESSION 1204 | 1205 | IDENTIFIER <- < !KEYWORD [a-zA-Z]+ > 1206 | INTEGER <- < [0-9]+ > 1207 | STRING <- < ["] < (!["] .)* > ["] > 1208 | BOOLEAN <- 'true' / 'false' 1209 | NULL <- 'null' 1210 | PREFIX_OPE <- < [-!] > 1211 | INFIX_OPE <- < [-+/*<>] / '==' / '!=' > 1212 | 1213 | KEYWORD <- ('null' | 'true' | 'false' | 'let' | 'return' | 'if' | 'else' | 'fn') ![a-zA-Z] 1214 | 1215 | LIST(ITEM, DELM) <- (ITEM (~DELM ITEM)*)? 1216 | 1217 | LINE_COMMENT <- '//' (!LINE_END .)* &LINE_END 1218 | LINE_END <- '\r\n' / '\r' / '\n' / !. 1219 | 1220 | %whitespace <- ([ \t\r\n]+ / LINE_COMMENT)* 1221 | %word <- [a-zA-Z]+ 1222 | )"); 1223 | parser.enable_ast(); 1224 | 1225 | std::shared_ptr ast; 1226 | auto ret = parser.parse(R"({1: 1, 2: 2, 3: 3})", ast); 1227 | 1228 | EXPECT_TRUE(ret); 1229 | 1230 | auto opt = 1231 | AstOptimizer(true, {"EXPRESSION_STATEMENT", "PARAMETERS", "ARGUMENTS", 1232 | "INDEX", "RETURN", "BLOCK", "ARRAY", "HASH"}); 1233 | ast = opt.optimize(ast); 1234 | 1235 | EXPECT_EQ("EXPRESSION_STATEMENT", ast->name); 1236 | 1237 | auto node = ast->nodes[0]; 1238 | EXPECT_EQ("HASH", node->name); 1239 | 1240 | std::map expected = { 1241 | {"1", 1}, 1242 | {"2", 2}, 1243 | {"3", 3}, 1244 | }; 1245 | 1246 | for (auto node : node->nodes) { 1247 | auto key = node->nodes[0]; 1248 | auto val = node->nodes[1]; 1249 | EXPECT_EQ("INTEGER", key->name); 1250 | 1251 | auto expectedValue = expected[key->token_to_string()]; 1252 | EXPECT_EQ("INTEGER", val->name); 1253 | EXPECT_EQ(expectedValue, val->token_to_number()); 1254 | } 1255 | } 1256 | 1257 | TEST(GeneralTest, ChoiceWithWhitespace) { 1258 | auto parser = peg::parser(R"( 1259 | type <- 'string' / 'int' / 'double' 1260 | %whitespace <- ' '* 1261 | )"); 1262 | 1263 | parser["type"] = [](const SemanticValues &vs) { 1264 | auto n = vs.choice(); 1265 | EXPECT_EQ(1, n); 1266 | }; 1267 | 1268 | auto ret = parser.parse("int"); 1269 | EXPECT_TRUE(ret); 1270 | } 1271 | 1272 | TEST(GeneralTest, PassingContextAndOutputParameter) { 1273 | parser parser(R"( 1274 | START <- TOKEN 1275 | TOKEN <- [0-9]+ 1276 | )"); 1277 | 1278 | parser["TOKEN"] = [&](const peg::SemanticValues &vs, std::any & /*dt*/) { 1279 | return vs.token_to_number(); 1280 | }; 1281 | 1282 | int output = 0; 1283 | std::any dt = std::string{"context"}; 1284 | parser.parse("42", dt, output); 1285 | EXPECT_EQ(42, output); 1286 | } 1287 | 1288 | TEST(GeneralTest, SpecifyStartRule) { 1289 | auto grammar = R"( 1290 | Start <- A 1291 | A <- B (',' B)* 1292 | B <- '[one]' / '[two]' 1293 | %whitespace <- [ \t\n]* 1294 | )"; 1295 | 1296 | { 1297 | parser peg(grammar, "AAA"); 1298 | EXPECT_FALSE(peg); 1299 | } 1300 | 1301 | { 1302 | parser peg(grammar, "A"); 1303 | EXPECT_TRUE(peg.parse(" [one] , [two] ")); 1304 | } 1305 | 1306 | { 1307 | parser peg(grammar); 1308 | EXPECT_TRUE(peg.parse(" [one] , [two] ")); 1309 | 1310 | peg.load_grammar(grammar, "A"); 1311 | EXPECT_TRUE(peg.parse(" [one] , [two] ")); 1312 | } 1313 | 1314 | { 1315 | parser peg; 1316 | 1317 | peg.load_grammar(grammar); 1318 | EXPECT_TRUE(peg.parse(" [one] , [two] ")); 1319 | 1320 | peg.load_grammar(grammar, "A"); 1321 | EXPECT_TRUE(peg.parse(" [one] , [two] ")); 1322 | } 1323 | } 1324 | -------------------------------------------------------------------------------- /test/test3.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace peg; 5 | 6 | TEST(PEGTest, PEG_Grammar) { 7 | EXPECT_TRUE(ParserGenerator::parse_test( 8 | "Grammar", 9 | " Definition <- a / ( b c ) / d \n rule2 <- [a-zA-Z][a-z0-9-]+ ")); 10 | } 11 | 12 | TEST(PEGTest, PEG_Definition) { 13 | EXPECT_TRUE(ParserGenerator::parse_test("Definition", 14 | "Definition <- a / (b c) / d ")); 15 | EXPECT_TRUE( 16 | ParserGenerator::parse_test("Definition", "Definition <- a / b c / d ")); 17 | EXPECT_TRUE(ParserGenerator::parse_test("Definition", u8"Definitiond ← a ")); 18 | EXPECT_FALSE(ParserGenerator::parse_test("Definition", "Definition ")); 19 | EXPECT_FALSE(ParserGenerator::parse_test("Definition", " ")); 20 | EXPECT_FALSE(ParserGenerator::parse_test("Definition", "")); 21 | EXPECT_FALSE( 22 | ParserGenerator::parse_test("Definition", "Definition = a / (b c) / d ")); 23 | EXPECT_TRUE(ParserGenerator::parse_test("Definition", "Macro(param) <- a ")); 24 | EXPECT_FALSE( 25 | ParserGenerator::parse_test("Definition", "Macro (param) <- a ")); 26 | } 27 | 28 | TEST(PEGTest, PEG_Expression) { 29 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a / (b c) / d ")); 30 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a / b c / d ")); 31 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a b ")); 32 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "")); 33 | EXPECT_FALSE(ParserGenerator::parse_test("Expression", " ")); 34 | EXPECT_FALSE(ParserGenerator::parse_test("Expression", " a b ")); 35 | 36 | // NOTE: The followings are actually allowed in the original Ford's paper... 37 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a//b ")); 38 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a // b ")); 39 | EXPECT_TRUE(ParserGenerator::parse_test("Expression", "a / / b ")); 40 | } 41 | 42 | TEST(PEGTest, PEG_Sequence) { 43 | EXPECT_TRUE(ParserGenerator::parse_test("Sequence", "a b c d ")); 44 | EXPECT_TRUE(ParserGenerator::parse_test("Sequence", "")); 45 | EXPECT_FALSE(ParserGenerator::parse_test("Sequence", "!")); 46 | EXPECT_FALSE(ParserGenerator::parse_test("Sequence", "<-")); 47 | EXPECT_FALSE(ParserGenerator::parse_test("Sequence", " a")); 48 | } 49 | 50 | TEST(PEGTest, PEG_Prefix) { 51 | EXPECT_TRUE(ParserGenerator::parse_test("Prefix", "&[a]")); 52 | EXPECT_TRUE(ParserGenerator::parse_test("Prefix", "![']")); 53 | EXPECT_FALSE(ParserGenerator::parse_test("Prefix", "-[']")); 54 | EXPECT_FALSE(ParserGenerator::parse_test("Prefix", "")); 55 | EXPECT_FALSE(ParserGenerator::parse_test("Prefix", " a")); 56 | } 57 | 58 | TEST(PEGTest, PEG_Suffix) { 59 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa ")); 60 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa? ")); 61 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa* ")); 62 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa+ ")); 63 | EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "aaa{} ")); 64 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{10} ")); 65 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{10,} ")); 66 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{10,100} ")); 67 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", "aaa{,100} ")); 68 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", ". + ")); 69 | EXPECT_TRUE(ParserGenerator::parse_test("Suffix", ". {10} ")); 70 | EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "?")); 71 | EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "+")); 72 | EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "{10}")); 73 | EXPECT_FALSE(ParserGenerator::parse_test("Suffix", "")); 74 | EXPECT_FALSE(ParserGenerator::parse_test("Suffix", " a")); 75 | } 76 | 77 | TEST(PEGTest, PEG_Primary) { 78 | EXPECT_TRUE(ParserGenerator::parse_test("Primary", "_Identifier0_ ")); 79 | EXPECT_FALSE(ParserGenerator::parse_test("Primary", "_Identifier0_<-")); 80 | EXPECT_TRUE(ParserGenerator::parse_test("Primary", 81 | "( _Identifier0_ _Identifier1_ )")); 82 | EXPECT_TRUE(ParserGenerator::parse_test("Primary", "'Literal String'")); 83 | EXPECT_TRUE(ParserGenerator::parse_test("Primary", "\"Literal String\"")); 84 | EXPECT_TRUE(ParserGenerator::parse_test("Primary", "[a-zA-Z]")); 85 | EXPECT_TRUE(ParserGenerator::parse_test("Primary", ".")); 86 | EXPECT_FALSE(ParserGenerator::parse_test("Primary", "")); 87 | EXPECT_FALSE(ParserGenerator::parse_test("Primary", " ")); 88 | EXPECT_FALSE(ParserGenerator::parse_test("Primary", " a")); 89 | EXPECT_FALSE(ParserGenerator::parse_test("Primary", "")); 90 | } 91 | 92 | TEST(PEGTest, PEG_Identifier) { 93 | EXPECT_TRUE(ParserGenerator::parse_test("Identifier", "_Identifier0_ ")); 94 | EXPECT_FALSE(ParserGenerator::parse_test("Identifier", "0Identifier_ ")); 95 | EXPECT_FALSE(ParserGenerator::parse_test("Identifier", "Iden|t ")); 96 | EXPECT_FALSE(ParserGenerator::parse_test("Identifier", " ")); 97 | EXPECT_FALSE(ParserGenerator::parse_test("Identifier", " a")); 98 | EXPECT_FALSE(ParserGenerator::parse_test("Identifier", "")); 99 | } 100 | 101 | TEST(PEGTest, PEG_IdentStart) { 102 | EXPECT_TRUE(ParserGenerator::parse_test("IdentStart", "_")); 103 | EXPECT_TRUE(ParserGenerator::parse_test("IdentStart", "a")); 104 | EXPECT_TRUE(ParserGenerator::parse_test("IdentStart", "Z")); 105 | EXPECT_FALSE(ParserGenerator::parse_test("IdentStart", "")); 106 | EXPECT_FALSE(ParserGenerator::parse_test("IdentStart", " ")); 107 | EXPECT_FALSE(ParserGenerator::parse_test("IdentStart", "0")); 108 | } 109 | 110 | TEST(PEGTest, PEG_IdentRest) { 111 | EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "_")); 112 | EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "a")); 113 | EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "Z")); 114 | EXPECT_FALSE(ParserGenerator::parse_test("IdentRest", "")); 115 | EXPECT_FALSE(ParserGenerator::parse_test("IdentRest", " ")); 116 | EXPECT_TRUE(ParserGenerator::parse_test("IdentRest", "0")); 117 | } 118 | 119 | TEST(PEGTest, PEG_Literal) { 120 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'abc' ")); 121 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\nb\\tc' ")); 122 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\277\tc' ")); 123 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\77\tc' ")); 124 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "'a\\80\tc' ")); 125 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'\n' ")); 126 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "'a\\'b' ")); 127 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "'a'b' ")); 128 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "'a\"'b' ")); 129 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", "\"'\\\"abc\\\"'\" ")); 130 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "\"'\"abc\"'\" ")); 131 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "abc")); 132 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "")); 133 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", "\\")); 134 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", u8"'日本語'")); 135 | EXPECT_TRUE(ParserGenerator::parse_test("Literal", u8"\"日本語\"")); 136 | EXPECT_FALSE(ParserGenerator::parse_test("Literal", u8"日本語")); 137 | } 138 | 139 | TEST(PEGTest, PEG_Class) { 140 | EXPECT_FALSE(ParserGenerator::parse_test( 141 | "Class", "[]")); // NOTE: This is different from the Brian Ford's paper, 142 | // but same as RegExp 143 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a]")); 144 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-z]")); 145 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[az]")); 146 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-zA-Z-]")); 147 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-zA-Z-0-9]")); 148 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[a-]")); 149 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[-a]")); 150 | EXPECT_FALSE(ParserGenerator::parse_test("Class", "[")); 151 | EXPECT_FALSE(ParserGenerator::parse_test("Class", "[a")); 152 | EXPECT_FALSE(ParserGenerator::parse_test("Class", "]")); 153 | EXPECT_FALSE(ParserGenerator::parse_test("Class", "a]")); 154 | EXPECT_TRUE(ParserGenerator::parse_test("Class", u8"[あ-ん]")); 155 | EXPECT_FALSE(ParserGenerator::parse_test("Class", u8"あ-ん")); 156 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[-+]")); 157 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[+-]")); 158 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[\\^]")); 159 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[-]")); 160 | EXPECT_TRUE(ParserGenerator::parse_test("Class", "[\\-]")); 161 | } 162 | 163 | TEST(PEGTest, PEG_Negated_Class) { 164 | EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "[^]")); 165 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a]")); 166 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-z]")); 167 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^az]")); 168 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-zA-Z-]")); 169 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-zA-Z-0-9]")); 170 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^a-]")); 171 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^-a]")); 172 | EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "[^")); 173 | EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "[^a")); 174 | EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "^]")); 175 | EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", "^a]")); 176 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", u8"[^あ-ん]")); 177 | EXPECT_FALSE(ParserGenerator::parse_test("NegatedClass", u8"^あ-ん")); 178 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^-+]")); 179 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^+-]")); 180 | EXPECT_TRUE(ParserGenerator::parse_test("NegatedClass", "[^^]")); 181 | } 182 | 183 | TEST(PEGTest, PEG_Range) { 184 | EXPECT_TRUE(ParserGenerator::parse_test("Range", "a")); 185 | EXPECT_TRUE(ParserGenerator::parse_test("Range", "a-z")); 186 | EXPECT_FALSE(ParserGenerator::parse_test("Range", "az")); 187 | EXPECT_FALSE(ParserGenerator::parse_test("Range", "")); 188 | EXPECT_FALSE(ParserGenerator::parse_test("Range", "a-")); 189 | EXPECT_FALSE(ParserGenerator::parse_test("Range", "-a")); 190 | } 191 | 192 | TEST(PEGTest, PEG_Char) { 193 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\f")); 194 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\n")); 195 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\r")); 196 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\t")); 197 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\v")); 198 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\'")); 199 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\\"")); 200 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\[")); 201 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\]")); 202 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\\\")); 203 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\000")); 204 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\377")); 205 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\477")); 206 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\087")); 207 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\079")); 208 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\00")); 209 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\77")); 210 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\80")); 211 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\08")); 212 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\0")); 213 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\7")); 214 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\8")); 215 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\x0")); 216 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\x00")); 217 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\x000")); 218 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xa")); 219 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xab")); 220 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xabc")); 221 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xA")); 222 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\xAb")); 223 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xAbc")); 224 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xg")); 225 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\xga")); 226 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u0")); 227 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u00")); 228 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u0000")); 229 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u000000")); 230 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u0000000")); 231 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\uFFFF")); 232 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u10000")); 233 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "\\u10FFFF")); 234 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\u110000")); 235 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\uFFFFFF")); 236 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "a")); 237 | EXPECT_TRUE(ParserGenerator::parse_test("Char", ".")); 238 | EXPECT_TRUE(ParserGenerator::parse_test("Char", "0")); 239 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "\\")); 240 | EXPECT_TRUE(ParserGenerator::parse_test("Char", " ")); 241 | EXPECT_FALSE(ParserGenerator::parse_test("Char", " ")); 242 | EXPECT_FALSE(ParserGenerator::parse_test("Char", "")); 243 | EXPECT_TRUE(ParserGenerator::parse_test("Char", u8"あ")); 244 | } 245 | 246 | TEST(PEGTest, PEG_Operators) { 247 | EXPECT_TRUE(ParserGenerator::parse_test("LEFTARROW", "<-")); 248 | EXPECT_TRUE(ParserGenerator::parse_test("SLASH", "/ ")); 249 | EXPECT_TRUE(ParserGenerator::parse_test("AND", "& ")); 250 | EXPECT_TRUE(ParserGenerator::parse_test("NOT", "! ")); 251 | EXPECT_TRUE(ParserGenerator::parse_test("QUESTION", "? ")); 252 | EXPECT_TRUE(ParserGenerator::parse_test("STAR", "* ")); 253 | EXPECT_TRUE(ParserGenerator::parse_test("PLUS", "+ ")); 254 | EXPECT_TRUE(ParserGenerator::parse_test("OPEN", "( ")); 255 | EXPECT_TRUE(ParserGenerator::parse_test("CLOSE", ") ")); 256 | EXPECT_TRUE(ParserGenerator::parse_test("DOT", ". ")); 257 | } 258 | 259 | TEST(PEGTest, PEG_Comment) { 260 | EXPECT_TRUE(ParserGenerator::parse_test("Comment", "# Comment.\n")); 261 | EXPECT_TRUE(ParserGenerator::parse_test("Comment", "# Comment.")); 262 | EXPECT_FALSE(ParserGenerator::parse_test("Comment", " ")); 263 | EXPECT_FALSE(ParserGenerator::parse_test("Comment", "a")); 264 | } 265 | 266 | TEST(PEGTest, PEG_Space) { 267 | EXPECT_TRUE(ParserGenerator::parse_test("Space", " ")); 268 | EXPECT_TRUE(ParserGenerator::parse_test("Space", "\t")); 269 | EXPECT_TRUE(ParserGenerator::parse_test("Space", "\n")); 270 | EXPECT_FALSE(ParserGenerator::parse_test("Space", "")); 271 | EXPECT_FALSE(ParserGenerator::parse_test("Space", "a")); 272 | } 273 | 274 | TEST(PEGTest, PEG_EndOfLine) { 275 | EXPECT_TRUE(ParserGenerator::parse_test("EndOfLine", "\r\n")); 276 | EXPECT_TRUE(ParserGenerator::parse_test("EndOfLine", "\n")); 277 | EXPECT_TRUE(ParserGenerator::parse_test("EndOfLine", "\r")); 278 | EXPECT_FALSE(ParserGenerator::parse_test("EndOfLine", " ")); 279 | EXPECT_FALSE(ParserGenerator::parse_test("EndOfLine", "")); 280 | EXPECT_FALSE(ParserGenerator::parse_test("EndOfLine", "a")); 281 | } 282 | 283 | TEST(PEGTest, PEG_EndOfFile) { 284 | EXPECT_TRUE(ParserGenerator::parse_test("EndOfFile", "")); 285 | EXPECT_FALSE(ParserGenerator::parse_test("EndOfFile", " ")); 286 | } 287 | --------------------------------------------------------------------------------