├── README.md ├── .gitignore ├── Makefile ├── promparser.hh ├── hello.cc ├── LICENSE ├── prom2json.cc ├── escaped.cc ├── promtests.cc ├── prometheus.txt ├── promparser.cc └── peglib.h /README.md: -------------------------------------------------------------------------------- 1 | # parsing 2 | Playground for examples how to use cpp-peglib. 3 | 4 | Install libfmt-dev/fmt-devel first, as well as nlohmann-json3-dev 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # rest 35 | *~ 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS:=-std=gnu++17 -Wall -O1 -MMD -MP -g 2 | 3 | PROGRAMS = hello escaped prom2json promtests 4 | 5 | all: $(PROGRAMS) 6 | 7 | clean: 8 | rm -f *~ *.o *.d test $(PROGRAMS) 9 | 10 | -include *.d 11 | 12 | hello: hello.o 13 | $(CXX) -std=gnu++17 $^ -lfmt -o $@ 14 | 15 | escaped: escaped.o 16 | $(CXX) -std=gnu++17 $^ -lfmt -o $@ 17 | 18 | prom2json: promparser.o prom2json.o 19 | $(CXX) -std=gnu++17 $^ -lfmt -o $@ 20 | 21 | 22 | promtests: promparser.o promtests.o 23 | $(CXX) -std=gnu++17 $^ -lfmt -o $@ 24 | 25 | -------------------------------------------------------------------------------- /promparser.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace peg { 8 | struct parser; 9 | } 10 | 11 | class PromParser 12 | { 13 | public: 14 | PromParser(); 15 | ~PromParser(); 16 | struct TstampedValue 17 | { 18 | int64_t tstampmsec; 19 | double value; 20 | }; 21 | 22 | struct PromEntry 23 | { 24 | std::string help; 25 | std::string type; 26 | std::map, TstampedValue> vals; 27 | }; 28 | 29 | 30 | typedef std::map promparseres_t; 31 | promparseres_t parse(const std::string& in); 32 | 33 | private: 34 | std::unique_ptr d_p; 35 | std::string d_error; 36 | }; 37 | -------------------------------------------------------------------------------- /hello.cc: -------------------------------------------------------------------------------- 1 | #include "peglib.h" 2 | #include 3 | using namespace std; 4 | 5 | // run as ./hello "(1, 2, 0.3, -.12, +12)" 6 | 7 | int main(int argc, char** argv) 8 | { 9 | peg::parser p(R"( 10 | Coord <- '(' Number ( ',' Number )* ')' 11 | Number <- [+-]?[0-9]*([.][0-9]*)? 12 | %whitespace <- [\t ]* 13 | )"); 14 | 15 | if(!(bool)p) { 16 | fmt::print("Error in grammar\n"); 17 | return 0; 18 | } 19 | 20 | p["Number"] = [](const peg::SemanticValues &vs) { 21 | return vs.token_to_number(); 22 | }; 23 | 24 | p["Coord"] = [](const peg::SemanticValues &vs) { 25 | vector ret; 26 | for(const auto& v : vs) 27 | ret.push_back(any_cast(v)); 28 | return ret; 29 | }; 30 | 31 | p.set_logger([](size_t line, size_t col, const string& msg) { 32 | fmt::print("Error on line {}:{} -> {}\n", line, col, msg); 33 | }); 34 | 35 | vector result; 36 | auto ok = p.parse(argv[1], result); 37 | 38 | fmt::print("Parse result of '{}' (ok {}): {}\n", 39 | argv[1], ok, result); 40 | 41 | } 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 bert hubert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /prom2json.cc: -------------------------------------------------------------------------------- 1 | #include "promparser.hh" 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | static string readFileFrom(const char* fname) 9 | { 10 | std::ifstream t(fname); 11 | std::stringstream buffer; 12 | buffer << t.rdbuf(); 13 | return buffer.str(); 14 | } 15 | 16 | int main(int argc, char** argv) 17 | { 18 | if(argc != 2) { 19 | fmt::print("Run as: ./promparse prometheus.txt\n"); 20 | return 0; 21 | } 22 | 23 | PromParser pp; 24 | auto result = pp.parse(readFileFrom(argv[1])); 25 | //fmt::print("Got {} names\n", result.size()); 26 | 27 | nlohmann::json j; 28 | for(auto& r : result) { 29 | nlohmann::json inner; 30 | inner["help"] = r.second.help; 31 | inner["type"] = r.second.type; 32 | 33 | nlohmann::json values = nlohmann::json::array(); 34 | for(auto& v : r.second.vals) { 35 | nlohmann::json value; 36 | value["labels"] = v.first; 37 | value["value"] = v.second.value; 38 | value["timestamp"] = v.second.tstampmsec; 39 | values.push_back(value); 40 | } 41 | inner["values"]=values; 42 | j[r.first] = inner; 43 | } 44 | fmt::print("{}\n", j.dump(1)); 45 | } 46 | -------------------------------------------------------------------------------- /escaped.cc: -------------------------------------------------------------------------------- 1 | #include "peglib.h" 2 | #include 3 | using namespace std; 4 | 5 | int main(int argc, char** argv) 6 | { 7 | if(argc != 2) { 8 | fmt::print("Run as: ./escaped '\"hello this is a \\\"string\\\"\"'\n"); 9 | return 0; 10 | } 11 | peg::parser p(R"( 12 | QuotedString <- '"' String '"' 13 | String <- (! '"' Char )* 14 | Char <- ('\\' < . > ) / (!'\\' .) 15 | )"); 16 | 17 | if(!(bool)p) { 18 | fmt::print("Error in grammar\n"); 19 | return 0; 20 | } 21 | 22 | p["String"] = [](const peg::SemanticValues &vs) { 23 | string ret; 24 | for(const auto& v : vs) { 25 | ret += any_cast(v); 26 | } 27 | return ret; 28 | }; 29 | 30 | p["QuotedString"] = [](const peg::SemanticValues &vs) { 31 | return any_cast(vs[0]); 32 | }; 33 | 34 | p["Char"] = [](const peg::SemanticValues &vs) { 35 | fmt::print("Char returning: {}\n", vs.token_to_string()); 36 | string res = vs.token_to_string(); 37 | if(vs.choice() == 0) { // this was an escape 38 | if(res=="n") { 39 | res = "\n"; 40 | } 41 | } 42 | return res; 43 | }; 44 | 45 | 46 | p.set_logger([](size_t line, size_t col, const string& msg) { 47 | fmt::print("Error on line {}:{} -> {}\n", line, col, msg); 48 | }); 49 | 50 | string result; 51 | auto ok = p.parse(argv[1], result); 52 | 53 | fmt::print("Parse result of '{}' (ok {}): {}\n", 54 | argv[1], ok, result); 55 | 56 | } 57 | -------------------------------------------------------------------------------- /promtests.cc: -------------------------------------------------------------------------------- 1 | #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN 2 | #include "doctest.h" 3 | #include "promparser.hh" 4 | 5 | using namespace std; 6 | 7 | TEST_CASE("basic test") { 8 | PromParser p; 9 | auto res = p.parse(R"(# HELP apt_autoremove_pending Apt packages pending autoremoval. 10 | # TYPE apt_autoremove_pending gauge 11 | apt_autoremove_pending 149 12 | )"); 13 | 14 | REQUIRE(res.size() == 1); 15 | CHECK(res.begin()->first=="apt_autoremove_pending"); 16 | map emp; 17 | CHECK(res["apt_autoremove_pending"].vals[emp].value == 149); 18 | CHECK(res["apt_autoremove_pending"].vals[emp].tstampmsec == 0); 19 | CHECK(res["apt_autoremove_pending"].type == "gauge"); 20 | CHECK(res["apt_autoremove_pending"].help == "Apt packages pending autoremoval."); 21 | } 22 | 23 | TEST_CASE("test with label") { 24 | PromParser p; 25 | auto res = p.parse(R"(# HELP apt_upgrades_pending Apt packages pending updates by origin. 26 | # TYPE apt_upgrades_pending gauge 27 | apt_upgrades_pending{arch="all",origin="Debian:bookworm-security/stable-security"} 1 28 | apt_upgrades_pending{arch="amd64",origin="Debian:bookworm-security/stable-security"} 16 29 | # HELP go_goroutines Number of goroutines that currently exist. 30 | # TYPE go_goroutines gauge 31 | go_goroutines 8 32 | )"); 33 | 34 | REQUIRE(res.size() == 2); 35 | CHECK(res.begin()->first=="apt_upgrades_pending"); 36 | map labels{{"arch", "amd64"}, {"origin", "Debian:bookworm-security/stable-security"}}; 37 | CHECK(res["apt_upgrades_pending"].vals[labels].value == 16); 38 | } 39 | 40 | TEST_CASE("test with NaN") { 41 | PromParser p; 42 | auto res = p.parse(R"(# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. 43 | # TYPE go_memstats_mspan_sys_bytes gauge 44 | go_memstats_mspan_sys_bytes NaN 45 | )"); 46 | REQUIRE(res.size() == 1); 47 | map emp; 48 | CHECK(isnan(res["go_memstats_mspan_sys_bytes"].vals[emp].value) == 1); 49 | } 50 | 51 | TEST_CASE("test with large floating point") { 52 | PromParser p; 53 | auto res = p.parse(R"(# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. 54 | # TYPE go_memstats_alloc_bytes_total counter 55 | go_memstats_alloc_bytes_total 3.072603244608e+12 56 | )"); 57 | REQUIRE(res.size() == 1); 58 | map emp; 59 | CHECK(res["go_memstats_alloc_bytes_total"].vals[emp].value == 3.072603244608e+12); 60 | 61 | } 62 | 63 | TEST_CASE("test with escapes in labels") { 64 | PromParser p; 65 | auto res = p.parse(R"(# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. 66 | # TYPE go_gc_duration_seconds summary 67 | go_gc_duration_seconds{quantile="0\n1\n\"2\""} 1.3045e-05 1713712554000 68 | )"); 69 | REQUIRE(res.size() == 1); 70 | map emp; 71 | CHECK(res["go_gc_duration_seconds"].vals.begin()->first.begin()->second == "0\n1\n\"2\""); 72 | 73 | } 74 | 75 | TEST_CASE("test with uncode") { 76 | PromParser p; 77 | auto res = p.parse(R"(# HELP apt_upgrades_held Apt packages pëndİng updates but held back. 78 | # TYPE apt_upgrades_held gauge 79 | apt_upgrades_held{arch="",origin=""} 0 1713712554000 80 | )"); 81 | REQUIRE(res.size() == 1); 82 | map emp; 83 | CHECK(res["apt_upgrades_held"].help == "Apt packages pëndİng updates but held back."); 84 | 85 | } 86 | -------------------------------------------------------------------------------- /prometheus.txt: -------------------------------------------------------------------------------- 1 | # HELP apt_autoremove_pending Apt packages pending autoremoval. 2 | # TYPE apt_autoremove_pending gauge 3 | apt_autoremove_pending 149 4 | # Random comment 5 | # HELP apt_upgrades_held Apt packages pëndİng updates but held back. 6 | # TYPE apt_upgrades_held gauge 7 | apt_upgrades_held{arch="",origin=""} 0 1713712554000 8 | # HELP apt_upgrades_pending Apt packages pending updates by origin. 9 | # TYPE apt_upgrades_pending gauge 10 | apt_upgrades_pending{arch="all",origin="Debian:bookworm-security/stable-security"} 1 11 | apt_upgrades_pending{arch="amd64",origin="Debian:bookworm-security/stable-security"} 16 12 | # HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. 13 | # TYPE go_gc_duration_seconds summary 14 | go_gc_duration_seconds{quantile="0\n1\n\"2\""} 1.3045e-05 1713712554000 15 | go_gc_duration_seconds{quantile="0.25"} 1.7935e-05 1713712554000 16 | go_gc_duration_seconds{quantile="0.5"} 2.2914e-05 1713712554000 17 | go_gc_duration_seconds{quantile="0.75"} 2.8947e-05 1713712554000 18 | go_gc_duration_seconds{quantile="1"} 5.8896e-05 1713712554000 19 | go_gc_duration_seconds_sum 41.237950823 20 | go_gc_duration_seconds_count 1.575957e+06 21 | # HELP go_goroutines Number of goroutines that currently exist. 22 | # TYPE go_goroutines gauge 23 | go_goroutines 8 24 | # HELP go_info Information about the Go environment. 25 | # TYPE go_info gauge 26 | go_info{version="go1.19.8"} 1 27 | # HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. 28 | # TYPE go_memstats_alloc_bytes gauge 29 | go_memstats_alloc_bytes 1.390568e+06 30 | # HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. 31 | # TYPE go_memstats_alloc_bytes_total counter 32 | go_memstats_alloc_bytes_total 3.072603244608e+12 33 | # HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. 34 | # TYPE go_memstats_buck_hash_sys_bytes gauge 35 | go_memstats_buck_hash_sys_bytes 2.577729e+06 36 | # HELP go_memstats_frees_total Total number of frees. 37 | # TYPE go_memstats_frees_total counter 38 | go_memstats_frees_total 6.5032723988e+10 39 | # HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. 40 | # TYPE go_memstats_mcache_sys_bytes gauge 41 | go_memstats_mcache_sys_bytes +Inf 42 | # HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. 43 | # TYPE go_memstats_mspan_inuse_bytes gauge 44 | go_memstats_mspan_inuse_bytes -Inf 45 | # HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. 46 | # TYPE go_memstats_mspan_sys_bytes gauge 47 | go_memstats_mspan_sys_bytes NaN 48 | # HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. 49 | # TYPE go_memstats_next_gc_bytes gauge 50 | go_memstats_next_gc_bytes 4.194304e+06 51 | # HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. 52 | # TYPE go_memstats_other_sys_bytes gauge 53 | go_memstats_other_sys_bytes 819423 54 | # HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. 55 | # TYPE go_memstats_stack_inuse_bytes gauge 56 | go_memstats_stack_inuse_bytes 491520 57 | # HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. 58 | # TYPE go_memstats_stack_sys_bytes gauge 59 | go_memstats_stack_sys_bytes 491520 60 | # HELP go_memstats_sys_bytes Number of bytes obtained from system. 61 | # TYPE go_memstats_sys_bytes gauge 62 | go_memstats_sys_bytes 2.1345544e+07 63 | # HELP go_threads Number of OS threads created. 64 | # TYPE go_threads gauge 65 | go_threads 8 66 | # HELP smartmon_smartctl_version SMART metric smartctl_version 67 | # TYPE smartmon_smartctl_version gauge 68 | smartmon_smartctl_version{version="7.2"} 1 69 | # HELP nvme_temperature_celsius SMART metric temperature_celsius 70 | # TYPE nvme_temperature_celsius gauge 71 | nvme_temperature_celsius{device="nvme0n1"} 32 72 | -------------------------------------------------------------------------------- /promparser.cc: -------------------------------------------------------------------------------- 1 | #include "promparser.hh" 2 | #include "peglib.h" 3 | #include 4 | using namespace std; 5 | 6 | PromParser::PromParser() 7 | { 8 | d_p = std::make_unique(); 9 | auto& p = *d_p; // saves bit of typing 10 | 11 | d_p->set_logger([](size_t line, size_t col, const string& msg, const string &rule) { 12 | fmt::print("line {}, col {}: {}\n", line, col,msg, rule); 13 | }); // gets us some helpful errors if the grammar is wrong 14 | 15 | auto ok = d_p->load_grammar(R"( 16 | root <- ( ( commentline / vline ) '\n')+ 17 | commentline <- ('# HELP ' name ' ' comment) / 18 | ('# TYPE ' name ' ' comment) / 19 | ('#' comment) 20 | comment <- (!'\n' .)* 21 | vline <- (name ' ' value (' ' timestamp)?) / 22 | (name labels ' ' value (' ' timestamp)?) 23 | name <- [a-zA-Z0-9_]+ 24 | labels <- '{' nvpair (',' nvpair)* '}' 25 | nvpair <- name '=' '"' label_value '"' 26 | label_value <- (!'"' char)* 27 | char <- ('\\' . ) / 28 | (!'\\' .) 29 | value <- '+Inf' / '-Inf' / 'NaN' / [0-9.+e-]+ 30 | timestamp <- [+-]?[0-9]* 31 | )" ); 32 | 33 | if(!ok) 34 | throw runtime_error("Error in grammar\n"); 35 | 36 | // this creates an attractive error messsage in case of a problem, and stores it 37 | // so we can throw a useful exception later if parsing fails 38 | d_p->set_logger([this](size_t line, size_t col, const string& msg) { 39 | d_error = fmt::format("Error on line {}:{} -> {}", line, col, msg); 40 | }); 41 | 42 | // This contains a comment line, where choice 0 is "HELP", choice 1 is "TYPE" 43 | // choice 2 is random comment, which we ignore 44 | struct CommentLine 45 | { 46 | size_t choice; 47 | string name; 48 | string comment; 49 | }; 50 | // here we parse a comment line, and return a CommentLine 51 | p["commentline"] = [](const peg::SemanticValues &vs) { 52 | if(vs.choice() == 0) 53 | return CommentLine({vs.choice(), std::any_cast(vs[0]), std::any_cast(vs[1])}); 54 | else if(vs.choice() == 1) 55 | return CommentLine({vs.choice(), std::any_cast(vs[0]), std::any_cast(vs[1])}); 56 | 57 | return CommentLine({vs.choice(), string(), string()}); 58 | }; 59 | 60 | // this merely returns the comment contents as a string 61 | p["comment"] = [](const peg::SemanticValues &vs) { 62 | return vs.token_to_string(); 63 | }; 64 | // and similar for the 'name' rule 65 | p["name"] = [](const peg::SemanticValues &vs) { 66 | return vs.token_to_string(); 67 | }; 68 | // this is where deal with the un-escaping, using the choice() 69 | p["char"] = [](const peg::SemanticValues &vs) { 70 | string res = vs.token_to_string(); 71 | if(vs.choice() == 0) { // this was an escape 72 | char c = res.at(1); 73 | if(c != '\\' && c != 'n' && c != '"') 74 | throw runtime_error(fmt::format("Unknown escape sequence '\\{}'", c)); 75 | if(c == 'n') 76 | c = '\n'; 77 | return c; 78 | } 79 | return res.at(0); 80 | }; 81 | // here we assemble all the "char"'s from above into a label_value string 82 | p["label_value"] = [](const peg::SemanticValues &vs) { 83 | string ret; 84 | for(const auto& v : vs) 85 | ret.append(1, std::any_cast(v)); 86 | return ret; 87 | }; 88 | // only invoked if a timestamp was passed 89 | p["timestamp"] = [](const peg::SemanticValues &vs) { 90 | return vs.token_to_number(); 91 | }; 92 | // combines a label key="value" pair into a std::pair 93 | p["nvpair"] = [](const peg::SemanticValues &vs) { 94 | return std::make_pair(std::any_cast(vs[0]), std::any_cast(vs[1])); 95 | }; 96 | // gathers all these pairs into a map 97 | p["labels"] = [](const peg::SemanticValues &vs) { 98 | map m; 99 | for(const auto& sel : vs) { 100 | const auto p = std::any_cast>(sel); 101 | m.insert(p); 102 | } 103 | return m; 104 | }; 105 | // detects if a numerical value is perhaps +Inf, -Inf, NaN or a floating point value 106 | p["value"] = [](const peg::SemanticValues &vs) { 107 | if(vs.choice() == 0 ) // +Inf 108 | return numeric_limits::infinity(); 109 | else if(vs.choice() == 1 ) // -Inf 110 | return -numeric_limits::infinity(); 111 | else if(vs.choice() == 2 ) // NaN 112 | return numeric_limits::quiet_NaN(); 113 | 114 | return vs.token_to_number(); 115 | }; 116 | // this reflects the contents of a vline 117 | struct VlineDetails 118 | { 119 | string name; 120 | map labels; 121 | double value; 122 | int64_t tstampmsec = 0; 123 | }; 124 | 125 | /* Deals with the two choices, a line witout/without labels 126 | vline <- (name ' ' value (' ' timestamp)?) / 127 | (name labels ' ' value (' ' timestamp)?) 128 | */ 129 | 130 | p["vline"] = [](const peg::SemanticValues &vs) { 131 | VlineDetails d; 132 | unsigned int pos = 0; 133 | d.name = std::any_cast(vs[pos++]); 134 | 135 | if(vs.choice() == 1) { 136 | d.labels = std::any_cast(vs[pos++]); 137 | } 138 | d.value = std::any_cast(vs[pos++]); 139 | 140 | if(pos < vs.size()) { 141 | d.tstampmsec = std::any_cast(vs[pos++]); 142 | } 143 | return d; 144 | }; 145 | // this is the first rule, and the one that ::parse will return 146 | // root consists of an array of VlineDetails and CommentLines 147 | // which we join together in the promparseres_t map 148 | p["root"] = [](const peg::SemanticValues &vs) { 149 | promparseres_t ret; 150 | for(const auto& v : vs) { 151 | if(auto dptr = std::any_cast(&v)) { 152 | ret[dptr->name].vals[dptr->labels]={dptr->tstampmsec, dptr->value}; 153 | } 154 | else if(auto cptr = std::any_cast(&v)) { 155 | if(cptr->choice == 0) 156 | ret[cptr->name].help = cptr->comment; 157 | else if(cptr->choice == 1) 158 | ret[cptr->name].type = cptr->comment; 159 | // ignore random comments (choice == 2) 160 | } 161 | } 162 | return ret; 163 | }; 164 | } 165 | 166 | PromParser::promparseres_t PromParser::parse(const std::string& in) 167 | { 168 | PromParser::promparseres_t ret; 169 | if(!d_p->parse(in, ret)) 170 | throw runtime_error("Unable to parse prometheus input: "+d_error); 171 | return ret; 172 | } 173 | 174 | PromParser::~PromParser(){} // needed -here- because of std::unique_ptr<> 175 | -------------------------------------------------------------------------------- /peglib.h: -------------------------------------------------------------------------------- 1 | // 2 | // peglib.h 3 | // 4 | // Copyright (c) 2022 Yuji Hirose. All rights reserved. 5 | // MIT License 6 | // 7 | 8 | #pragma once 9 | 10 | /* 11 | * Configuration 12 | */ 13 | 14 | #ifndef CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT 15 | #define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT 32 16 | #endif 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #if __has_include() 23 | #include 24 | #endif 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #if !defined(__cplusplus) || __cplusplus < 201703L 41 | #error "Requires complete C++17 support" 42 | #endif 43 | 44 | namespace peg { 45 | 46 | /*----------------------------------------------------------------------------- 47 | * scope_exit 48 | *---------------------------------------------------------------------------*/ 49 | 50 | // This is based on 51 | // "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". 52 | 53 | template struct scope_exit { 54 | explicit scope_exit(EF &&f) 55 | : exit_function(std::move(f)), execute_on_destruction{true} {} 56 | 57 | scope_exit(scope_exit &&rhs) 58 | : exit_function(std::move(rhs.exit_function)), 59 | execute_on_destruction{rhs.execute_on_destruction} { 60 | rhs.release(); 61 | } 62 | 63 | ~scope_exit() { 64 | if (execute_on_destruction) { this->exit_function(); } 65 | } 66 | 67 | void release() { this->execute_on_destruction = false; } 68 | 69 | private: 70 | scope_exit(const scope_exit &) = delete; 71 | void operator=(const scope_exit &) = delete; 72 | scope_exit &operator=(scope_exit &&) = delete; 73 | 74 | EF exit_function; 75 | bool execute_on_destruction; 76 | }; 77 | 78 | /*----------------------------------------------------------------------------- 79 | * UTF8 functions 80 | *---------------------------------------------------------------------------*/ 81 | 82 | inline size_t codepoint_length(const char *s8, size_t l) { 83 | if (l) { 84 | auto b = static_cast(s8[0]); 85 | if ((b & 0x80) == 0) { 86 | return 1; 87 | } else if ((b & 0xE0) == 0xC0 && l >= 2) { 88 | return 2; 89 | } else if ((b & 0xF0) == 0xE0 && l >= 3) { 90 | return 3; 91 | } else if ((b & 0xF8) == 0xF0 && l >= 4) { 92 | return 4; 93 | } 94 | } 95 | return 0; 96 | } 97 | 98 | inline size_t codepoint_count(const char *s8, size_t l) { 99 | size_t count = 0; 100 | for (size_t i = 0; i < l; i += codepoint_length(s8 + i, l - i)) { 101 | count++; 102 | } 103 | return count; 104 | } 105 | 106 | inline size_t encode_codepoint(char32_t cp, char *buff) { 107 | if (cp < 0x0080) { 108 | buff[0] = static_cast(cp & 0x7F); 109 | return 1; 110 | } else if (cp < 0x0800) { 111 | buff[0] = static_cast(0xC0 | ((cp >> 6) & 0x1F)); 112 | buff[1] = static_cast(0x80 | (cp & 0x3F)); 113 | return 2; 114 | } else if (cp < 0xD800) { 115 | buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); 116 | buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); 117 | buff[2] = static_cast(0x80 | (cp & 0x3F)); 118 | return 3; 119 | } else if (cp < 0xE000) { 120 | // D800 - DFFF is invalid... 121 | return 0; 122 | } else if (cp < 0x10000) { 123 | buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); 124 | buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); 125 | buff[2] = static_cast(0x80 | (cp & 0x3F)); 126 | return 3; 127 | } else if (cp < 0x110000) { 128 | buff[0] = static_cast(0xF0 | ((cp >> 18) & 0x7)); 129 | buff[1] = static_cast(0x80 | ((cp >> 12) & 0x3F)); 130 | buff[2] = static_cast(0x80 | ((cp >> 6) & 0x3F)); 131 | buff[3] = static_cast(0x80 | (cp & 0x3F)); 132 | return 4; 133 | } 134 | return 0; 135 | } 136 | 137 | inline std::string encode_codepoint(char32_t cp) { 138 | char buff[4]; 139 | auto l = encode_codepoint(cp, buff); 140 | return std::string(buff, l); 141 | } 142 | 143 | inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes, 144 | char32_t &cp) { 145 | if (l) { 146 | auto b = static_cast(s8[0]); 147 | if ((b & 0x80) == 0) { 148 | bytes = 1; 149 | cp = b; 150 | return true; 151 | } else if ((b & 0xE0) == 0xC0) { 152 | if (l >= 2) { 153 | bytes = 2; 154 | cp = ((static_cast(s8[0] & 0x1F)) << 6) | 155 | (static_cast(s8[1] & 0x3F)); 156 | return true; 157 | } 158 | } else if ((b & 0xF0) == 0xE0) { 159 | if (l >= 3) { 160 | bytes = 3; 161 | cp = ((static_cast(s8[0] & 0x0F)) << 12) | 162 | ((static_cast(s8[1] & 0x3F)) << 6) | 163 | (static_cast(s8[2] & 0x3F)); 164 | return true; 165 | } 166 | } else if ((b & 0xF8) == 0xF0) { 167 | if (l >= 4) { 168 | bytes = 4; 169 | cp = ((static_cast(s8[0] & 0x07)) << 18) | 170 | ((static_cast(s8[1] & 0x3F)) << 12) | 171 | ((static_cast(s8[2] & 0x3F)) << 6) | 172 | (static_cast(s8[3] & 0x3F)); 173 | return true; 174 | } 175 | } 176 | } 177 | return false; 178 | } 179 | 180 | inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) { 181 | size_t bytes; 182 | if (decode_codepoint(s8, l, bytes, cp)) { return bytes; } 183 | return 0; 184 | } 185 | 186 | inline char32_t decode_codepoint(const char *s8, size_t l) { 187 | char32_t cp = 0; 188 | decode_codepoint(s8, l, cp); 189 | return cp; 190 | } 191 | 192 | inline std::u32string decode(const char *s8, size_t l) { 193 | std::u32string out; 194 | size_t i = 0; 195 | while (i < l) { 196 | auto beg = i++; 197 | while (i < l && (s8[i] & 0xc0) == 0x80) { 198 | i++; 199 | } 200 | out += decode_codepoint(&s8[beg], (i - beg)); 201 | } 202 | return out; 203 | } 204 | 205 | template const char *u8(const T *s) { 206 | return reinterpret_cast(s); 207 | } 208 | 209 | /*----------------------------------------------------------------------------- 210 | * escape_characters 211 | *---------------------------------------------------------------------------*/ 212 | 213 | inline std::string escape_characters(const char *s, size_t n) { 214 | std::string str; 215 | for (size_t i = 0; i < n; i++) { 216 | auto c = s[i]; 217 | switch (c) { 218 | case '\f': str += "\\f"; break; 219 | case '\n': str += "\\n"; break; 220 | case '\r': str += "\\r"; break; 221 | case '\t': str += "\\t"; break; 222 | case '\v': str += "\\v"; break; 223 | default: str += c; break; 224 | } 225 | } 226 | return str; 227 | } 228 | 229 | inline std::string escape_characters(std::string_view sv) { 230 | return escape_characters(sv.data(), sv.size()); 231 | } 232 | 233 | /*----------------------------------------------------------------------------- 234 | * resolve_escape_sequence 235 | *---------------------------------------------------------------------------*/ 236 | 237 | inline bool is_hex(char c, int &v) { 238 | if ('0' <= c && c <= '9') { 239 | v = c - '0'; 240 | return true; 241 | } else if ('a' <= c && c <= 'f') { 242 | v = c - 'a' + 10; 243 | return true; 244 | } else if ('A' <= c && c <= 'F') { 245 | v = c - 'A' + 10; 246 | return true; 247 | } 248 | return false; 249 | } 250 | 251 | inline bool is_digit(char c, int &v) { 252 | if ('0' <= c && c <= '9') { 253 | v = c - '0'; 254 | return true; 255 | } 256 | return false; 257 | } 258 | 259 | inline std::pair parse_hex_number(const char *s, size_t n, 260 | size_t i) { 261 | int ret = 0; 262 | int val; 263 | while (i < n && is_hex(s[i], val)) { 264 | ret = static_cast(ret * 16 + val); 265 | i++; 266 | } 267 | return std::pair(ret, i); 268 | } 269 | 270 | inline std::pair parse_octal_number(const char *s, size_t n, 271 | size_t i) { 272 | int ret = 0; 273 | int val; 274 | while (i < n && is_digit(s[i], val)) { 275 | ret = static_cast(ret * 8 + val); 276 | i++; 277 | } 278 | return std::pair(ret, i); 279 | } 280 | 281 | inline std::string resolve_escape_sequence(const char *s, size_t n) { 282 | std::string r; 283 | r.reserve(n); 284 | 285 | size_t i = 0; 286 | while (i < n) { 287 | auto ch = s[i]; 288 | if (ch == '\\') { 289 | i++; 290 | if (i == n) { throw std::runtime_error("Invalid escape sequence..."); } 291 | switch (s[i]) { 292 | case 'f': 293 | r += '\f'; 294 | i++; 295 | break; 296 | case 'n': 297 | r += '\n'; 298 | i++; 299 | break; 300 | case 'r': 301 | r += '\r'; 302 | i++; 303 | break; 304 | case 't': 305 | r += '\t'; 306 | i++; 307 | break; 308 | case 'v': 309 | r += '\v'; 310 | i++; 311 | break; 312 | case '\'': 313 | r += '\''; 314 | i++; 315 | break; 316 | case '"': 317 | r += '"'; 318 | i++; 319 | break; 320 | case '[': 321 | r += '['; 322 | i++; 323 | break; 324 | case ']': 325 | r += ']'; 326 | i++; 327 | break; 328 | case '\\': 329 | r += '\\'; 330 | i++; 331 | break; 332 | case 'x': 333 | case 'u': { 334 | char32_t cp; 335 | std::tie(cp, i) = parse_hex_number(s, n, i + 1); 336 | r += encode_codepoint(cp); 337 | break; 338 | } 339 | default: { 340 | char32_t cp; 341 | std::tie(cp, i) = parse_octal_number(s, n, i); 342 | r += encode_codepoint(cp); 343 | break; 344 | } 345 | } 346 | } else { 347 | r += ch; 348 | i++; 349 | } 350 | } 351 | return r; 352 | } 353 | 354 | /*----------------------------------------------------------------------------- 355 | * token_to_number_ - This function should be removed eventually 356 | *---------------------------------------------------------------------------*/ 357 | 358 | template T token_to_number_(std::string_view sv) { 359 | T n = 0; 360 | #if __has_include() 361 | if constexpr (!std::is_floating_point::value) { 362 | std::from_chars(sv.data(), sv.data() + sv.size(), n); 363 | #else 364 | if constexpr (false) { 365 | #endif 366 | } else { 367 | auto s = std::string(sv); 368 | std::istringstream ss(s); 369 | ss >> n; 370 | } 371 | return n; 372 | } 373 | 374 | /*----------------------------------------------------------------------------- 375 | * Trie 376 | *---------------------------------------------------------------------------*/ 377 | 378 | class Trie { 379 | public: 380 | Trie(const std::vector &items, bool ignore_case) 381 | : ignore_case_(ignore_case) { 382 | for (const auto &item : items) { 383 | for (size_t len = 1; len <= item.size(); len++) { 384 | auto last = len == item.size(); 385 | const auto &s = ignore_case ? to_lower(item) : item; 386 | std::string_view sv(s.data(), len); 387 | auto it = dic_.find(sv); 388 | if (it == dic_.end()) { 389 | dic_.emplace(sv, Info{last, last}); 390 | } else if (last) { 391 | it->second.match = true; 392 | } else { 393 | it->second.done = false; 394 | } 395 | } 396 | } 397 | } 398 | 399 | size_t match(const char *text, size_t text_len) const { 400 | size_t match_len = 0; 401 | auto done = false; 402 | size_t len = 1; 403 | while (!done && len <= text_len) { 404 | const auto &s = ignore_case_ ? to_lower(text) : std::string(text); 405 | std::string_view sv(s.data(), len); 406 | auto it = dic_.find(sv); 407 | if (it == dic_.end()) { 408 | done = true; 409 | } else { 410 | if (it->second.match) { match_len = len; } 411 | if (it->second.done) { done = true; } 412 | } 413 | len += 1; 414 | } 415 | return match_len; 416 | } 417 | 418 | private: 419 | std::string to_lower(std::string s) const { 420 | for (char &c : s) { 421 | c = std::tolower(c); 422 | } 423 | return s; 424 | } 425 | 426 | struct Info { 427 | bool done; 428 | bool match; 429 | }; 430 | 431 | // TODO: Use unordered_map when heterogeneous lookup is supported in C++20 432 | // std::unordered_map dic_; 433 | std::map> dic_; 434 | 435 | bool ignore_case_; 436 | }; 437 | 438 | /*----------------------------------------------------------------------------- 439 | * PEG 440 | *---------------------------------------------------------------------------*/ 441 | 442 | /* 443 | * Line information utility function 444 | */ 445 | inline std::pair line_info(const char *start, const char *cur) { 446 | auto p = start; 447 | auto col_ptr = p; 448 | auto no = 1; 449 | 450 | while (p < cur) { 451 | if (*p == '\n') { 452 | no++; 453 | col_ptr = p + 1; 454 | } 455 | p++; 456 | } 457 | 458 | auto col = codepoint_count(col_ptr, p - col_ptr) + 1; 459 | 460 | return std::pair(no, col); 461 | } 462 | 463 | /* 464 | * String tag 465 | */ 466 | inline constexpr unsigned int str2tag_core(const char *s, size_t l, 467 | unsigned int h) { 468 | return (l == 0) ? h 469 | : str2tag_core(s + 1, l - 1, 470 | (h * 33) ^ static_cast(*s)); 471 | } 472 | 473 | inline constexpr unsigned int str2tag(std::string_view sv) { 474 | return str2tag_core(sv.data(), sv.size(), 0); 475 | } 476 | 477 | namespace udl { 478 | 479 | inline constexpr unsigned int operator"" _(const char *s, size_t l) { 480 | return str2tag_core(s, l, 0); 481 | } 482 | 483 | } // namespace udl 484 | 485 | /* 486 | * Semantic values 487 | */ 488 | class Context; 489 | 490 | struct SemanticValues : protected std::vector { 491 | SemanticValues() = default; 492 | SemanticValues(Context *c) : c_(c) {} 493 | 494 | // Input text 495 | const char *path = nullptr; 496 | const char *ss = nullptr; 497 | 498 | // Matched string 499 | std::string_view sv() const { return sv_; } 500 | 501 | // Definition name 502 | const std::string &name() const { return name_; } 503 | 504 | std::vector tags; 505 | 506 | // Line number and column at which the matched string is 507 | std::pair line_info() const; 508 | 509 | // Choice count 510 | size_t choice_count() const { return choice_count_; } 511 | 512 | // Choice number (0 based index) 513 | size_t choice() const { return choice_; } 514 | 515 | // Tokens 516 | std::vector tokens; 517 | 518 | std::string_view token(size_t id = 0) const { 519 | if (tokens.empty()) { return sv_; } 520 | assert(id < tokens.size()); 521 | return tokens[id]; 522 | } 523 | 524 | // Token conversion 525 | std::string token_to_string(size_t id = 0) const { 526 | return std::string(token(id)); 527 | } 528 | 529 | template T token_to_number() const { 530 | return token_to_number_(token()); 531 | } 532 | 533 | // Transform the semantic value vector to another vector 534 | template 535 | std::vector transform(size_t beg = 0, 536 | size_t end = static_cast(-1)) const { 537 | std::vector r; 538 | end = (std::min)(end, size()); 539 | for (size_t i = beg; i < end; i++) { 540 | r.emplace_back(std::any_cast((*this)[i])); 541 | } 542 | return r; 543 | } 544 | 545 | void append(SemanticValues &chvs) { 546 | sv_ = chvs.sv_; 547 | for (auto &v : chvs) { 548 | emplace_back(std::move(v)); 549 | } 550 | for (auto &tag : chvs.tags) { 551 | tags.emplace_back(std::move(tag)); 552 | } 553 | for (auto &tok : chvs.tokens) { 554 | tokens.emplace_back(std::move(tok)); 555 | } 556 | } 557 | 558 | using std::vector::iterator; 559 | using std::vector::const_iterator; 560 | using std::vector::size; 561 | using std::vector::empty; 562 | using std::vector::assign; 563 | using std::vector::begin; 564 | using std::vector::end; 565 | using std::vector::rbegin; 566 | using std::vector::rend; 567 | using std::vector::operator[]; 568 | using std::vector::at; 569 | using std::vector::resize; 570 | using std::vector::front; 571 | using std::vector::back; 572 | using std::vector::push_back; 573 | using std::vector::pop_back; 574 | using std::vector::insert; 575 | using std::vector::erase; 576 | using std::vector::clear; 577 | using std::vector::swap; 578 | using std::vector::emplace; 579 | using std::vector::emplace_back; 580 | 581 | private: 582 | friend class Context; 583 | friend class Sequence; 584 | friend class PrioritizedChoice; 585 | friend class Repetition; 586 | friend class Holder; 587 | friend class PrecedenceClimbing; 588 | 589 | Context *c_ = nullptr; 590 | std::string_view sv_; 591 | size_t choice_count_ = 0; 592 | size_t choice_ = 0; 593 | std::string name_; 594 | }; 595 | 596 | /* 597 | * Semantic action 598 | */ 599 | template std::any call(F fn, Args &&...args) { 600 | using R = decltype(fn(std::forward(args)...)); 601 | if constexpr (std::is_void::value) { 602 | fn(std::forward(args)...); 603 | return std::any(); 604 | } else if constexpr (std::is_same::type, 605 | std::any>::value) { 606 | return fn(std::forward(args)...); 607 | } else { 608 | return std::any(fn(std::forward(args)...)); 609 | } 610 | } 611 | 612 | template 613 | struct argument_count : argument_count {}; 614 | template 615 | struct argument_count 616 | : std::integral_constant {}; 617 | template 618 | struct argument_count 619 | : std::integral_constant {}; 620 | template 621 | struct argument_count 622 | : std::integral_constant {}; 623 | 624 | class Action { 625 | public: 626 | Action() = default; 627 | Action(Action &&rhs) = default; 628 | template Action(F fn) : fn_(make_adaptor(fn)) {} 629 | template void operator=(F fn) { fn_ = make_adaptor(fn); } 630 | Action &operator=(const Action &rhs) = default; 631 | 632 | operator bool() const { return bool(fn_); } 633 | 634 | std::any operator()(SemanticValues &vs, std::any &dt) const { 635 | return fn_(vs, dt); 636 | } 637 | 638 | private: 639 | using Fty = std::function; 640 | 641 | template Fty make_adaptor(F fn) { 642 | if constexpr (argument_count::value == 1) { 643 | return [fn](auto &vs, auto & /*dt*/) { return call(fn, vs); }; 644 | } else { 645 | return [fn](auto &vs, auto &dt) { return call(fn, vs, dt); }; 646 | } 647 | } 648 | 649 | Fty fn_; 650 | }; 651 | 652 | /* 653 | * Parse result helper 654 | */ 655 | inline bool success(size_t len) { return len != static_cast(-1); } 656 | 657 | inline bool fail(size_t len) { return len == static_cast(-1); } 658 | 659 | /* 660 | * Log 661 | */ 662 | using Log = std::function; 664 | 665 | /* 666 | * ErrorInfo 667 | */ 668 | class Definition; 669 | 670 | struct ErrorInfo { 671 | const char *error_pos = nullptr; 672 | std::vector> expected_tokens; 673 | const char *message_pos = nullptr; 674 | std::string message; 675 | std::string label; 676 | const char *last_output_pos = nullptr; 677 | bool keep_previous_token = false; 678 | 679 | void clear() { 680 | error_pos = nullptr; 681 | expected_tokens.clear(); 682 | message_pos = nullptr; 683 | message.clear(); 684 | } 685 | 686 | void add(const char *error_literal, const Definition *error_rule) { 687 | for (const auto &[t, r] : expected_tokens) { 688 | if (t == error_literal && r == error_rule) { return; } 689 | } 690 | expected_tokens.emplace_back(error_literal, error_rule); 691 | } 692 | 693 | void output_log(const Log &log, const char *s, size_t n); 694 | 695 | private: 696 | int cast_char(char c) const { return static_cast(c); } 697 | 698 | std::string heuristic_error_token(const char *s, size_t n, 699 | const char *pos) const { 700 | auto len = n - std::distance(s, pos); 701 | if (len) { 702 | size_t i = 0; 703 | auto c = cast_char(pos[i++]); 704 | if (!std::ispunct(c) && !std::isspace(c)) { 705 | while (i < len && !std::ispunct(cast_char(pos[i])) && 706 | !std::isspace(cast_char(pos[i]))) { 707 | i++; 708 | } 709 | } 710 | 711 | size_t count = CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT; 712 | size_t j = 0; 713 | while (count > 0 && j < i) { 714 | j += codepoint_length(&pos[j], i - j); 715 | count--; 716 | } 717 | 718 | return escape_characters(pos, j); 719 | } 720 | return std::string(); 721 | } 722 | 723 | std::string replace_all(std::string str, const std::string &from, 724 | const std::string &to) const { 725 | size_t pos = 0; 726 | while ((pos = str.find(from, pos)) != std::string::npos) { 727 | str.replace(pos, from.length(), to); 728 | pos += to.length(); 729 | } 730 | return str; 731 | } 732 | }; 733 | 734 | /* 735 | * Context 736 | */ 737 | class Ope; 738 | 739 | using TracerEnter = std::function; 742 | 743 | using TracerLeave = std::function; 746 | 747 | using TracerStartOrEnd = std::function; 748 | 749 | class Context { 750 | public: 751 | const char *path; 752 | const char *s; 753 | const size_t l; 754 | 755 | ErrorInfo error_info; 756 | bool recovered = false; 757 | 758 | std::vector> value_stack; 759 | size_t value_stack_size = 0; 760 | 761 | std::vector rule_stack; 762 | std::vector>> args_stack; 763 | 764 | size_t in_token_boundary_count = 0; 765 | 766 | std::shared_ptr whitespaceOpe; 767 | bool in_whitespace = false; 768 | 769 | std::shared_ptr wordOpe; 770 | 771 | std::vector> capture_scope_stack; 772 | size_t capture_scope_stack_size = 0; 773 | 774 | std::vector cut_stack; 775 | 776 | const size_t def_count; 777 | const bool enablePackratParsing; 778 | std::vector cache_registered; 779 | std::vector cache_success; 780 | 781 | std::map, std::tuple> 782 | cache_values; 783 | 784 | TracerEnter tracer_enter; 785 | TracerLeave tracer_leave; 786 | std::any trace_data; 787 | const bool verbose_trace; 788 | 789 | Log log; 790 | 791 | Context(const char *path, const char *s, size_t l, size_t def_count, 792 | std::shared_ptr whitespaceOpe, std::shared_ptr wordOpe, 793 | bool enablePackratParsing, TracerEnter tracer_enter, 794 | TracerLeave tracer_leave, std::any trace_data, bool verbose_trace, 795 | Log log) 796 | : path(path), s(s), l(l), whitespaceOpe(whitespaceOpe), wordOpe(wordOpe), 797 | def_count(def_count), enablePackratParsing(enablePackratParsing), 798 | cache_registered(enablePackratParsing ? def_count * (l + 1) : 0), 799 | cache_success(enablePackratParsing ? def_count * (l + 1) : 0), 800 | tracer_enter(tracer_enter), tracer_leave(tracer_leave), 801 | trace_data(trace_data), verbose_trace(verbose_trace), log(log) { 802 | 803 | push_args({}); 804 | push_capture_scope(); 805 | } 806 | 807 | ~Context() { 808 | pop_capture_scope(); 809 | 810 | assert(!value_stack_size); 811 | assert(!capture_scope_stack_size); 812 | assert(cut_stack.empty()); 813 | } 814 | 815 | Context(const Context &) = delete; 816 | Context(Context &&) = delete; 817 | Context operator=(const Context &) = delete; 818 | 819 | template 820 | void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val, 821 | T fn) { 822 | if (!enablePackratParsing) { 823 | fn(val); 824 | return; 825 | } 826 | 827 | auto col = a_s - s; 828 | auto idx = def_count * static_cast(col) + def_id; 829 | 830 | if (cache_registered[idx]) { 831 | if (cache_success[idx]) { 832 | auto key = std::pair(col, def_id); 833 | std::tie(len, val) = cache_values[key]; 834 | return; 835 | } else { 836 | len = static_cast(-1); 837 | return; 838 | } 839 | } else { 840 | fn(val); 841 | cache_registered[idx] = true; 842 | cache_success[idx] = success(len); 843 | if (success(len)) { 844 | auto key = std::pair(col, def_id); 845 | cache_values[key] = std::pair(len, val); 846 | } 847 | return; 848 | } 849 | } 850 | 851 | SemanticValues &push() { 852 | push_capture_scope(); 853 | return push_semantic_values_scope(); 854 | } 855 | 856 | void pop() { 857 | pop_capture_scope(); 858 | pop_semantic_values_scope(); 859 | } 860 | 861 | // Semantic values 862 | SemanticValues &push_semantic_values_scope() { 863 | assert(value_stack_size <= value_stack.size()); 864 | if (value_stack_size == value_stack.size()) { 865 | value_stack.emplace_back(std::make_shared(this)); 866 | } else { 867 | auto &vs = *value_stack[value_stack_size]; 868 | if (!vs.empty()) { 869 | vs.clear(); 870 | if (!vs.tags.empty()) { vs.tags.clear(); } 871 | } 872 | vs.sv_ = std::string_view(); 873 | vs.choice_count_ = 0; 874 | vs.choice_ = 0; 875 | if (!vs.tokens.empty()) { vs.tokens.clear(); } 876 | } 877 | 878 | auto &vs = *value_stack[value_stack_size++]; 879 | vs.path = path; 880 | vs.ss = s; 881 | return vs; 882 | } 883 | 884 | void pop_semantic_values_scope() { value_stack_size--; } 885 | 886 | // Arguments 887 | void push_args(std::vector> &&args) { 888 | args_stack.emplace_back(args); 889 | } 890 | 891 | void pop_args() { args_stack.pop_back(); } 892 | 893 | const std::vector> &top_args() const { 894 | return args_stack[args_stack.size() - 1]; 895 | } 896 | 897 | // Capture scope 898 | void push_capture_scope() { 899 | assert(capture_scope_stack_size <= capture_scope_stack.size()); 900 | if (capture_scope_stack_size == capture_scope_stack.size()) { 901 | capture_scope_stack.emplace_back( 902 | std::map()); 903 | } else { 904 | auto &cs = capture_scope_stack[capture_scope_stack_size]; 905 | if (!cs.empty()) { cs.clear(); } 906 | } 907 | capture_scope_stack_size++; 908 | } 909 | 910 | void pop_capture_scope() { capture_scope_stack_size--; } 911 | 912 | void shift_capture_values() { 913 | assert(capture_scope_stack_size >= 2); 914 | auto curr = &capture_scope_stack[capture_scope_stack_size - 1]; 915 | auto prev = curr - 1; 916 | for (const auto &[k, v] : *curr) { 917 | (*prev)[k] = v; 918 | } 919 | } 920 | 921 | // Error 922 | void set_error_pos(const char *a_s, const char *literal = nullptr); 923 | 924 | // Trace 925 | void trace_enter(const Ope &ope, const char *a_s, size_t n, 926 | const SemanticValues &vs, std::any &dt); 927 | void trace_leave(const Ope &ope, const char *a_s, size_t n, 928 | const SemanticValues &vs, std::any &dt, size_t len); 929 | bool is_traceable(const Ope &ope) const; 930 | 931 | // Line info 932 | std::pair line_info(const char *cur) const { 933 | std::call_once(source_line_index_init_, [this]() { 934 | for (size_t pos = 0; pos < l; pos++) { 935 | if (s[pos] == '\n') { source_line_index.push_back(pos); } 936 | } 937 | source_line_index.push_back(l); 938 | }); 939 | 940 | auto pos = static_cast(std::distance(s, cur)); 941 | 942 | auto it = std::lower_bound( 943 | source_line_index.begin(), source_line_index.end(), pos, 944 | [](size_t element, size_t value) { return element < value; }); 945 | 946 | auto id = static_cast(std::distance(source_line_index.begin(), it)); 947 | auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1); 948 | return std::pair(id + 1, off + 1); 949 | } 950 | 951 | size_t next_trace_id = 0; 952 | std::vector trace_ids; 953 | bool ignore_trace_state = false; 954 | mutable std::once_flag source_line_index_init_; 955 | mutable std::vector source_line_index; 956 | }; 957 | 958 | /* 959 | * Parser operators 960 | */ 961 | class Ope { 962 | public: 963 | struct Visitor; 964 | 965 | virtual ~Ope() = default; 966 | size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c, 967 | std::any &dt) const; 968 | virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs, 969 | Context &c, std::any &dt) const = 0; 970 | virtual void accept(Visitor &v) = 0; 971 | }; 972 | 973 | class Sequence : public Ope { 974 | public: 975 | template 976 | Sequence(const Args &...args) 977 | : opes_{static_cast>(args)...} {} 978 | Sequence(const std::vector> &opes) : opes_(opes) {} 979 | Sequence(std::vector> &&opes) : opes_(opes) {} 980 | 981 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 982 | std::any &dt) const override { 983 | auto &chvs = c.push_semantic_values_scope(); 984 | auto se = scope_exit([&]() { c.pop_semantic_values_scope(); }); 985 | size_t i = 0; 986 | for (const auto &ope : opes_) { 987 | auto len = ope->parse(s + i, n - i, chvs, c, dt); 988 | if (fail(len)) { return len; } 989 | i += len; 990 | } 991 | vs.append(chvs); 992 | return i; 993 | } 994 | 995 | void accept(Visitor &v) override; 996 | 997 | std::vector> opes_; 998 | }; 999 | 1000 | class PrioritizedChoice : public Ope { 1001 | public: 1002 | template 1003 | PrioritizedChoice(bool for_label, const Args &...args) 1004 | : opes_{static_cast>(args)...}, 1005 | for_label_(for_label) {} 1006 | PrioritizedChoice(const std::vector> &opes) 1007 | : opes_(opes) {} 1008 | PrioritizedChoice(std::vector> &&opes) : opes_(opes) {} 1009 | 1010 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1011 | std::any &dt) const override { 1012 | size_t len = static_cast(-1); 1013 | 1014 | if (!for_label_) { c.cut_stack.push_back(false); } 1015 | auto se = scope_exit([&]() { 1016 | if (!for_label_) { c.cut_stack.pop_back(); } 1017 | }); 1018 | 1019 | size_t id = 0; 1020 | for (const auto &ope : opes_) { 1021 | if (!c.cut_stack.empty()) { c.cut_stack.back() = false; } 1022 | 1023 | auto &chvs = c.push(); 1024 | c.error_info.keep_previous_token = id > 0; 1025 | auto se = scope_exit([&]() { 1026 | c.pop(); 1027 | c.error_info.keep_previous_token = false; 1028 | }); 1029 | 1030 | len = ope->parse(s, n, chvs, c, dt); 1031 | 1032 | if (success(len)) { 1033 | vs.append(chvs); 1034 | vs.choice_count_ = opes_.size(); 1035 | vs.choice_ = id; 1036 | c.shift_capture_values(); 1037 | break; 1038 | } else if (!c.cut_stack.empty() && c.cut_stack.back()) { 1039 | break; 1040 | } 1041 | 1042 | id++; 1043 | } 1044 | 1045 | return len; 1046 | } 1047 | 1048 | void accept(Visitor &v) override; 1049 | 1050 | size_t size() const { return opes_.size(); } 1051 | 1052 | std::vector> opes_; 1053 | bool for_label_ = false; 1054 | }; 1055 | 1056 | class Repetition : public Ope { 1057 | public: 1058 | Repetition(const std::shared_ptr &ope, size_t min, size_t max) 1059 | : ope_(ope), min_(min), max_(max) {} 1060 | 1061 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1062 | std::any &dt) const override { 1063 | size_t count = 0; 1064 | size_t i = 0; 1065 | while (count < min_) { 1066 | auto &chvs = c.push(); 1067 | auto se = scope_exit([&]() { c.pop(); }); 1068 | 1069 | auto len = ope_->parse(s + i, n - i, chvs, c, dt); 1070 | 1071 | if (success(len)) { 1072 | vs.append(chvs); 1073 | c.shift_capture_values(); 1074 | } else { 1075 | return len; 1076 | } 1077 | i += len; 1078 | count++; 1079 | } 1080 | 1081 | while (count < max_) { 1082 | auto &chvs = c.push(); 1083 | auto se = scope_exit([&]() { c.pop(); }); 1084 | 1085 | auto len = ope_->parse(s + i, n - i, chvs, c, dt); 1086 | 1087 | if (success(len)) { 1088 | vs.append(chvs); 1089 | c.shift_capture_values(); 1090 | } else { 1091 | break; 1092 | } 1093 | i += len; 1094 | count++; 1095 | } 1096 | return i; 1097 | } 1098 | 1099 | void accept(Visitor &v) override; 1100 | 1101 | bool is_zom() const { 1102 | return min_ == 0 && max_ == std::numeric_limits::max(); 1103 | } 1104 | 1105 | static std::shared_ptr zom(const std::shared_ptr &ope) { 1106 | return std::make_shared(ope, 0, 1107 | std::numeric_limits::max()); 1108 | } 1109 | 1110 | static std::shared_ptr oom(const std::shared_ptr &ope) { 1111 | return std::make_shared(ope, 1, 1112 | std::numeric_limits::max()); 1113 | } 1114 | 1115 | static std::shared_ptr opt(const std::shared_ptr &ope) { 1116 | return std::make_shared(ope, 0, 1); 1117 | } 1118 | 1119 | std::shared_ptr ope_; 1120 | size_t min_; 1121 | size_t max_; 1122 | }; 1123 | 1124 | class AndPredicate : public Ope { 1125 | public: 1126 | AndPredicate(const std::shared_ptr &ope) : ope_(ope) {} 1127 | 1128 | size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, 1129 | Context &c, std::any &dt) const override { 1130 | auto &chvs = c.push(); 1131 | auto se = scope_exit([&]() { c.pop(); }); 1132 | 1133 | auto len = ope_->parse(s, n, chvs, c, dt); 1134 | 1135 | if (success(len)) { 1136 | return 0; 1137 | } else { 1138 | return len; 1139 | } 1140 | } 1141 | 1142 | void accept(Visitor &v) override; 1143 | 1144 | std::shared_ptr ope_; 1145 | }; 1146 | 1147 | class NotPredicate : public Ope { 1148 | public: 1149 | NotPredicate(const std::shared_ptr &ope) : ope_(ope) {} 1150 | 1151 | size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, 1152 | Context &c, std::any &dt) const override { 1153 | auto &chvs = c.push(); 1154 | auto se = scope_exit([&]() { c.pop(); }); 1155 | auto len = ope_->parse(s, n, chvs, c, dt); 1156 | if (success(len)) { 1157 | c.set_error_pos(s); 1158 | return static_cast(-1); 1159 | } else { 1160 | return 0; 1161 | } 1162 | } 1163 | 1164 | void accept(Visitor &v) override; 1165 | 1166 | std::shared_ptr ope_; 1167 | }; 1168 | 1169 | class Dictionary : public Ope, public std::enable_shared_from_this { 1170 | public: 1171 | Dictionary(const std::vector &v, bool ignore_case) 1172 | : trie_(v, ignore_case) {} 1173 | 1174 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1175 | std::any &dt) const override; 1176 | 1177 | void accept(Visitor &v) override; 1178 | 1179 | Trie trie_; 1180 | }; 1181 | 1182 | class LiteralString : public Ope, 1183 | public std::enable_shared_from_this { 1184 | public: 1185 | LiteralString(std::string &&s, bool ignore_case) 1186 | : lit_(s), ignore_case_(ignore_case), is_word_(false) {} 1187 | 1188 | LiteralString(const std::string &s, bool ignore_case) 1189 | : lit_(s), ignore_case_(ignore_case), is_word_(false) {} 1190 | 1191 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1192 | std::any &dt) const override; 1193 | 1194 | void accept(Visitor &v) override; 1195 | 1196 | std::string lit_; 1197 | bool ignore_case_; 1198 | mutable std::once_flag init_is_word_; 1199 | mutable bool is_word_; 1200 | }; 1201 | 1202 | class CharacterClass : public Ope, 1203 | public std::enable_shared_from_this { 1204 | public: 1205 | CharacterClass(const std::string &s, bool negated, bool ignore_case) 1206 | : negated_(negated), ignore_case_(ignore_case) { 1207 | auto chars = decode(s.data(), s.length()); 1208 | auto i = 0u; 1209 | while (i < chars.size()) { 1210 | if (i + 2 < chars.size() && chars[i + 1] == '-') { 1211 | auto cp1 = chars[i]; 1212 | auto cp2 = chars[i + 2]; 1213 | ranges_.emplace_back(std::pair(cp1, cp2)); 1214 | i += 3; 1215 | } else { 1216 | auto cp = chars[i]; 1217 | ranges_.emplace_back(std::pair(cp, cp)); 1218 | i += 1; 1219 | } 1220 | } 1221 | assert(!ranges_.empty()); 1222 | } 1223 | 1224 | CharacterClass(const std::vector> &ranges, 1225 | bool negated, bool ignore_case) 1226 | : ranges_(ranges), negated_(negated), ignore_case_(ignore_case) { 1227 | assert(!ranges_.empty()); 1228 | } 1229 | 1230 | size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, 1231 | Context &c, std::any & /*dt*/) const override { 1232 | if (n < 1) { 1233 | c.set_error_pos(s); 1234 | return static_cast(-1); 1235 | } 1236 | 1237 | char32_t cp = 0; 1238 | auto len = decode_codepoint(s, n, cp); 1239 | 1240 | for (const auto &range : ranges_) { 1241 | if (in_range(range, cp)) { 1242 | if (negated_) { 1243 | c.set_error_pos(s); 1244 | return static_cast(-1); 1245 | } else { 1246 | return len; 1247 | } 1248 | } 1249 | } 1250 | 1251 | if (negated_) { 1252 | return len; 1253 | } else { 1254 | c.set_error_pos(s); 1255 | return static_cast(-1); 1256 | } 1257 | } 1258 | 1259 | void accept(Visitor &v) override; 1260 | 1261 | private: 1262 | bool in_range(const std::pair &range, char32_t cp) const { 1263 | if (ignore_case_) { 1264 | auto cpl = std::tolower(cp); 1265 | return std::tolower(range.first) <= cpl && 1266 | cpl <= std::tolower(range.second); 1267 | } else { 1268 | return range.first <= cp && cp <= range.second; 1269 | } 1270 | } 1271 | 1272 | std::vector> ranges_; 1273 | bool negated_; 1274 | bool ignore_case_; 1275 | }; 1276 | 1277 | class Character : public Ope, public std::enable_shared_from_this { 1278 | public: 1279 | Character(char ch) : ch_(ch) {} 1280 | 1281 | size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, 1282 | Context &c, std::any & /*dt*/) const override { 1283 | if (n < 1 || s[0] != ch_) { 1284 | c.set_error_pos(s); 1285 | return static_cast(-1); 1286 | } 1287 | return 1; 1288 | } 1289 | 1290 | void accept(Visitor &v) override; 1291 | 1292 | char ch_; 1293 | }; 1294 | 1295 | class AnyCharacter : public Ope, 1296 | public std::enable_shared_from_this { 1297 | public: 1298 | size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, 1299 | Context &c, std::any & /*dt*/) const override { 1300 | auto len = codepoint_length(s, n); 1301 | if (len < 1) { 1302 | c.set_error_pos(s); 1303 | return static_cast(-1); 1304 | } 1305 | return len; 1306 | } 1307 | 1308 | void accept(Visitor &v) override; 1309 | }; 1310 | 1311 | class CaptureScope : public Ope { 1312 | public: 1313 | CaptureScope(const std::shared_ptr &ope) : ope_(ope) {} 1314 | 1315 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1316 | std::any &dt) const override { 1317 | c.push_capture_scope(); 1318 | auto se = scope_exit([&]() { c.pop_capture_scope(); }); 1319 | return ope_->parse(s, n, vs, c, dt); 1320 | } 1321 | 1322 | void accept(Visitor &v) override; 1323 | 1324 | std::shared_ptr ope_; 1325 | }; 1326 | 1327 | class Capture : public Ope { 1328 | public: 1329 | using MatchAction = std::function; 1330 | 1331 | Capture(const std::shared_ptr &ope, MatchAction ma) 1332 | : ope_(ope), match_action_(ma) {} 1333 | 1334 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1335 | std::any &dt) const override { 1336 | auto len = ope_->parse(s, n, vs, c, dt); 1337 | if (success(len) && match_action_) { match_action_(s, len, c); } 1338 | return len; 1339 | } 1340 | 1341 | void accept(Visitor &v) override; 1342 | 1343 | std::shared_ptr ope_; 1344 | MatchAction match_action_; 1345 | }; 1346 | 1347 | class TokenBoundary : public Ope { 1348 | public: 1349 | TokenBoundary(const std::shared_ptr &ope) : ope_(ope) {} 1350 | 1351 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1352 | std::any &dt) const override; 1353 | 1354 | void accept(Visitor &v) override; 1355 | 1356 | std::shared_ptr ope_; 1357 | }; 1358 | 1359 | class Ignore : public Ope { 1360 | public: 1361 | Ignore(const std::shared_ptr &ope) : ope_(ope) {} 1362 | 1363 | size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, 1364 | Context &c, std::any &dt) const override { 1365 | auto &chvs = c.push_semantic_values_scope(); 1366 | auto se = scope_exit([&]() { c.pop_semantic_values_scope(); }); 1367 | return ope_->parse(s, n, chvs, c, dt); 1368 | } 1369 | 1370 | void accept(Visitor &v) override; 1371 | 1372 | std::shared_ptr ope_; 1373 | }; 1374 | 1375 | using Parser = std::function; 1377 | 1378 | class User : public Ope { 1379 | public: 1380 | User(Parser fn) : fn_(fn) {} 1381 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, 1382 | Context & /*c*/, std::any &dt) const override { 1383 | assert(fn_); 1384 | return fn_(s, n, vs, dt); 1385 | } 1386 | void accept(Visitor &v) override; 1387 | std::function 1389 | fn_; 1390 | }; 1391 | 1392 | class WeakHolder : public Ope { 1393 | public: 1394 | WeakHolder(const std::shared_ptr &ope) : weak_(ope) {} 1395 | 1396 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1397 | std::any &dt) const override { 1398 | auto ope = weak_.lock(); 1399 | assert(ope); 1400 | return ope->parse(s, n, vs, c, dt); 1401 | } 1402 | 1403 | void accept(Visitor &v) override; 1404 | 1405 | std::weak_ptr weak_; 1406 | }; 1407 | 1408 | class Holder : public Ope { 1409 | public: 1410 | Holder(Definition *outer) : outer_(outer) {} 1411 | 1412 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1413 | std::any &dt) const override; 1414 | 1415 | void accept(Visitor &v) override; 1416 | 1417 | std::any reduce(SemanticValues &vs, std::any &dt) const; 1418 | 1419 | const std::string &name() const; 1420 | const std::string &trace_name() const; 1421 | 1422 | std::shared_ptr ope_; 1423 | Definition *outer_; 1424 | mutable std::once_flag trace_name_init_; 1425 | mutable std::string trace_name_; 1426 | 1427 | friend class Definition; 1428 | }; 1429 | 1430 | using Grammar = std::unordered_map; 1431 | 1432 | class Reference : public Ope, public std::enable_shared_from_this { 1433 | public: 1434 | Reference(const Grammar &grammar, const std::string &name, const char *s, 1435 | bool is_macro, const std::vector> &args) 1436 | : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args), 1437 | rule_(nullptr), iarg_(0) {} 1438 | 1439 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1440 | std::any &dt) const override; 1441 | 1442 | void accept(Visitor &v) override; 1443 | 1444 | std::shared_ptr get_core_operator() const; 1445 | 1446 | const Grammar &grammar_; 1447 | const std::string name_; 1448 | const char *s_; 1449 | 1450 | const bool is_macro_; 1451 | const std::vector> args_; 1452 | 1453 | Definition *rule_; 1454 | size_t iarg_; 1455 | }; 1456 | 1457 | class Whitespace : public Ope { 1458 | public: 1459 | Whitespace(const std::shared_ptr &ope) : ope_(ope) {} 1460 | 1461 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1462 | std::any &dt) const override { 1463 | if (c.in_whitespace) { return 0; } 1464 | c.in_whitespace = true; 1465 | auto se = scope_exit([&]() { c.in_whitespace = false; }); 1466 | return ope_->parse(s, n, vs, c, dt); 1467 | } 1468 | 1469 | void accept(Visitor &v) override; 1470 | 1471 | std::shared_ptr ope_; 1472 | }; 1473 | 1474 | class BackReference : public Ope { 1475 | public: 1476 | BackReference(std::string &&name) : name_(name) {} 1477 | 1478 | BackReference(const std::string &name) : name_(name) {} 1479 | 1480 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1481 | std::any &dt) const override; 1482 | 1483 | void accept(Visitor &v) override; 1484 | 1485 | std::string name_; 1486 | }; 1487 | 1488 | class PrecedenceClimbing : public Ope { 1489 | public: 1490 | using BinOpeInfo = std::map>; 1491 | 1492 | PrecedenceClimbing(const std::shared_ptr &atom, 1493 | const std::shared_ptr &binop, const BinOpeInfo &info, 1494 | const Definition &rule) 1495 | : atom_(atom), binop_(binop), info_(info), rule_(rule) {} 1496 | 1497 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1498 | std::any &dt) const override { 1499 | return parse_expression(s, n, vs, c, dt, 0); 1500 | } 1501 | 1502 | void accept(Visitor &v) override; 1503 | 1504 | std::shared_ptr atom_; 1505 | std::shared_ptr binop_; 1506 | BinOpeInfo info_; 1507 | const Definition &rule_; 1508 | 1509 | private: 1510 | size_t parse_expression(const char *s, size_t n, SemanticValues &vs, 1511 | Context &c, std::any &dt, size_t min_prec) const; 1512 | 1513 | Definition &get_reference_for_binop(Context &c) const; 1514 | }; 1515 | 1516 | class Recovery : public Ope { 1517 | public: 1518 | Recovery(const std::shared_ptr &ope) : ope_(ope) {} 1519 | 1520 | size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, 1521 | std::any &dt) const override; 1522 | 1523 | void accept(Visitor &v) override; 1524 | 1525 | std::shared_ptr ope_; 1526 | }; 1527 | 1528 | class Cut : public Ope, public std::enable_shared_from_this { 1529 | public: 1530 | size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/, 1531 | Context &c, std::any & /*dt*/) const override { 1532 | if (!c.cut_stack.empty()) { c.cut_stack.back() = true; } 1533 | return 0; 1534 | } 1535 | 1536 | void accept(Visitor &v) override; 1537 | }; 1538 | 1539 | /* 1540 | * Factories 1541 | */ 1542 | template std::shared_ptr seq(Args &&...args) { 1543 | return std::make_shared(static_cast>(args)...); 1544 | } 1545 | 1546 | template std::shared_ptr cho(Args &&...args) { 1547 | return std::make_shared( 1548 | false, static_cast>(args)...); 1549 | } 1550 | 1551 | template std::shared_ptr cho4label_(Args &&...args) { 1552 | return std::make_shared( 1553 | true, static_cast>(args)...); 1554 | } 1555 | 1556 | inline std::shared_ptr zom(const std::shared_ptr &ope) { 1557 | return Repetition::zom(ope); 1558 | } 1559 | 1560 | inline std::shared_ptr oom(const std::shared_ptr &ope) { 1561 | return Repetition::oom(ope); 1562 | } 1563 | 1564 | inline std::shared_ptr opt(const std::shared_ptr &ope) { 1565 | return Repetition::opt(ope); 1566 | } 1567 | 1568 | inline std::shared_ptr rep(const std::shared_ptr &ope, size_t min, 1569 | size_t max) { 1570 | return std::make_shared(ope, min, max); 1571 | } 1572 | 1573 | inline std::shared_ptr apd(const std::shared_ptr &ope) { 1574 | return std::make_shared(ope); 1575 | } 1576 | 1577 | inline std::shared_ptr npd(const std::shared_ptr &ope) { 1578 | return std::make_shared(ope); 1579 | } 1580 | 1581 | inline std::shared_ptr dic(const std::vector &v, 1582 | bool ignore_case) { 1583 | return std::make_shared(v, ignore_case); 1584 | } 1585 | 1586 | inline std::shared_ptr lit(std::string &&s) { 1587 | return std::make_shared(s, false); 1588 | } 1589 | 1590 | inline std::shared_ptr liti(std::string &&s) { 1591 | return std::make_shared(s, true); 1592 | } 1593 | 1594 | inline std::shared_ptr cls(const std::string &s) { 1595 | return std::make_shared(s, false, false); 1596 | } 1597 | 1598 | inline std::shared_ptr 1599 | cls(const std::vector> &ranges, 1600 | bool ignore_case = false) { 1601 | return std::make_shared(ranges, false, ignore_case); 1602 | } 1603 | 1604 | inline std::shared_ptr ncls(const std::string &s) { 1605 | return std::make_shared(s, true, false); 1606 | } 1607 | 1608 | inline std::shared_ptr 1609 | ncls(const std::vector> &ranges, 1610 | bool ignore_case = false) { 1611 | return std::make_shared(ranges, true, ignore_case); 1612 | } 1613 | 1614 | inline std::shared_ptr chr(char dt) { 1615 | return std::make_shared(dt); 1616 | } 1617 | 1618 | inline std::shared_ptr dot() { return std::make_shared(); } 1619 | 1620 | inline std::shared_ptr csc(const std::shared_ptr &ope) { 1621 | return std::make_shared(ope); 1622 | } 1623 | 1624 | inline std::shared_ptr cap(const std::shared_ptr &ope, 1625 | Capture::MatchAction ma) { 1626 | return std::make_shared(ope, ma); 1627 | } 1628 | 1629 | inline std::shared_ptr tok(const std::shared_ptr &ope) { 1630 | return std::make_shared(ope); 1631 | } 1632 | 1633 | inline std::shared_ptr ign(const std::shared_ptr &ope) { 1634 | return std::make_shared(ope); 1635 | } 1636 | 1637 | inline std::shared_ptr 1638 | usr(std::function 1640 | fn) { 1641 | return std::make_shared(fn); 1642 | } 1643 | 1644 | inline std::shared_ptr ref(const Grammar &grammar, const std::string &name, 1645 | const char *s, bool is_macro, 1646 | const std::vector> &args) { 1647 | return std::make_shared(grammar, name, s, is_macro, args); 1648 | } 1649 | 1650 | inline std::shared_ptr wsp(const std::shared_ptr &ope) { 1651 | return std::make_shared(std::make_shared(ope)); 1652 | } 1653 | 1654 | inline std::shared_ptr bkr(std::string &&name) { 1655 | return std::make_shared(name); 1656 | } 1657 | 1658 | inline std::shared_ptr pre(const std::shared_ptr &atom, 1659 | const std::shared_ptr &binop, 1660 | const PrecedenceClimbing::BinOpeInfo &info, 1661 | const Definition &rule) { 1662 | return std::make_shared(atom, binop, info, rule); 1663 | } 1664 | 1665 | inline std::shared_ptr rec(const std::shared_ptr &ope) { 1666 | return std::make_shared(ope); 1667 | } 1668 | 1669 | inline std::shared_ptr cut() { return std::make_shared(); } 1670 | 1671 | /* 1672 | * Visitor 1673 | */ 1674 | struct Ope::Visitor { 1675 | virtual ~Visitor() {} 1676 | virtual void visit(Sequence &) {} 1677 | virtual void visit(PrioritizedChoice &) {} 1678 | virtual void visit(Repetition &) {} 1679 | virtual void visit(AndPredicate &) {} 1680 | virtual void visit(NotPredicate &) {} 1681 | virtual void visit(Dictionary &) {} 1682 | virtual void visit(LiteralString &) {} 1683 | virtual void visit(CharacterClass &) {} 1684 | virtual void visit(Character &) {} 1685 | virtual void visit(AnyCharacter &) {} 1686 | virtual void visit(CaptureScope &) {} 1687 | virtual void visit(Capture &) {} 1688 | virtual void visit(TokenBoundary &) {} 1689 | virtual void visit(Ignore &) {} 1690 | virtual void visit(User &) {} 1691 | virtual void visit(WeakHolder &) {} 1692 | virtual void visit(Holder &) {} 1693 | virtual void visit(Reference &) {} 1694 | virtual void visit(Whitespace &) {} 1695 | virtual void visit(BackReference &) {} 1696 | virtual void visit(PrecedenceClimbing &) {} 1697 | virtual void visit(Recovery &) {} 1698 | virtual void visit(Cut &) {} 1699 | }; 1700 | 1701 | struct TraceOpeName : public Ope::Visitor { 1702 | using Ope::Visitor::visit; 1703 | 1704 | void visit(Sequence &) override { name_ = "Sequence"; } 1705 | void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; } 1706 | void visit(Repetition &) override { name_ = "Repetition"; } 1707 | void visit(AndPredicate &) override { name_ = "AndPredicate"; } 1708 | void visit(NotPredicate &) override { name_ = "NotPredicate"; } 1709 | void visit(Dictionary &) override { name_ = "Dictionary"; } 1710 | void visit(LiteralString &) override { name_ = "LiteralString"; } 1711 | void visit(CharacterClass &) override { name_ = "CharacterClass"; } 1712 | void visit(Character &) override { name_ = "Character"; } 1713 | void visit(AnyCharacter &) override { name_ = "AnyCharacter"; } 1714 | void visit(CaptureScope &) override { name_ = "CaptureScope"; } 1715 | void visit(Capture &) override { name_ = "Capture"; } 1716 | void visit(TokenBoundary &) override { name_ = "TokenBoundary"; } 1717 | void visit(Ignore &) override { name_ = "Ignore"; } 1718 | void visit(User &) override { name_ = "User"; } 1719 | void visit(WeakHolder &) override { name_ = "WeakHolder"; } 1720 | void visit(Holder &ope) override { name_ = ope.trace_name().data(); } 1721 | void visit(Reference &) override { name_ = "Reference"; } 1722 | void visit(Whitespace &) override { name_ = "Whitespace"; } 1723 | void visit(BackReference &) override { name_ = "BackReference"; } 1724 | void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; } 1725 | void visit(Recovery &) override { name_ = "Recovery"; } 1726 | void visit(Cut &) override { name_ = "Cut"; } 1727 | 1728 | static std::string get(Ope &ope) { 1729 | TraceOpeName vis; 1730 | ope.accept(vis); 1731 | return vis.name_; 1732 | } 1733 | 1734 | private: 1735 | const char *name_ = nullptr; 1736 | }; 1737 | 1738 | struct AssignIDToDefinition : public Ope::Visitor { 1739 | using Ope::Visitor::visit; 1740 | 1741 | void visit(Sequence &ope) override { 1742 | for (auto op : ope.opes_) { 1743 | op->accept(*this); 1744 | } 1745 | } 1746 | void visit(PrioritizedChoice &ope) override { 1747 | for (auto op : ope.opes_) { 1748 | op->accept(*this); 1749 | } 1750 | } 1751 | void visit(Repetition &ope) override { ope.ope_->accept(*this); } 1752 | void visit(AndPredicate &ope) override { ope.ope_->accept(*this); } 1753 | void visit(NotPredicate &ope) override { ope.ope_->accept(*this); } 1754 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 1755 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 1756 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 1757 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 1758 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 1759 | void visit(Holder &ope) override; 1760 | void visit(Reference &ope) override; 1761 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 1762 | void visit(PrecedenceClimbing &ope) override; 1763 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 1764 | 1765 | std::unordered_map ids; 1766 | }; 1767 | 1768 | struct IsLiteralToken : public Ope::Visitor { 1769 | using Ope::Visitor::visit; 1770 | 1771 | void visit(PrioritizedChoice &ope) override { 1772 | for (auto op : ope.opes_) { 1773 | if (!IsLiteralToken::check(*op)) { return; } 1774 | } 1775 | result_ = true; 1776 | } 1777 | 1778 | void visit(Dictionary &) override { result_ = true; } 1779 | void visit(LiteralString &) override { result_ = true; } 1780 | 1781 | static bool check(Ope &ope) { 1782 | IsLiteralToken vis; 1783 | ope.accept(vis); 1784 | return vis.result_; 1785 | } 1786 | 1787 | private: 1788 | bool result_ = false; 1789 | }; 1790 | 1791 | struct TokenChecker : public Ope::Visitor { 1792 | using Ope::Visitor::visit; 1793 | 1794 | void visit(Sequence &ope) override { 1795 | for (auto op : ope.opes_) { 1796 | op->accept(*this); 1797 | } 1798 | } 1799 | void visit(PrioritizedChoice &ope) override { 1800 | for (auto op : ope.opes_) { 1801 | op->accept(*this); 1802 | } 1803 | } 1804 | void visit(Repetition &ope) override { ope.ope_->accept(*this); } 1805 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 1806 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 1807 | void visit(TokenBoundary &) override { has_token_boundary_ = true; } 1808 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 1809 | void visit(WeakHolder &) override { has_rule_ = true; } 1810 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 1811 | void visit(Reference &ope) override; 1812 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 1813 | void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } 1814 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 1815 | 1816 | static bool is_token(Ope &ope) { 1817 | if (IsLiteralToken::check(ope)) { return true; } 1818 | 1819 | TokenChecker vis; 1820 | ope.accept(vis); 1821 | return vis.has_token_boundary_ || !vis.has_rule_; 1822 | } 1823 | 1824 | private: 1825 | bool has_token_boundary_ = false; 1826 | bool has_rule_ = false; 1827 | }; 1828 | 1829 | struct FindLiteralToken : public Ope::Visitor { 1830 | using Ope::Visitor::visit; 1831 | 1832 | void visit(LiteralString &ope) override { token_ = ope.lit_.data(); } 1833 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 1834 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 1835 | void visit(Reference &ope) override; 1836 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 1837 | 1838 | static const char *token(Ope &ope) { 1839 | FindLiteralToken vis; 1840 | ope.accept(vis); 1841 | return vis.token_; 1842 | } 1843 | 1844 | private: 1845 | const char *token_ = nullptr; 1846 | }; 1847 | 1848 | struct DetectLeftRecursion : public Ope::Visitor { 1849 | using Ope::Visitor::visit; 1850 | 1851 | DetectLeftRecursion(const std::string &name) : name_(name) {} 1852 | 1853 | void visit(Sequence &ope) override { 1854 | for (auto op : ope.opes_) { 1855 | op->accept(*this); 1856 | if (done_) { 1857 | break; 1858 | } else if (error_s) { 1859 | done_ = true; 1860 | break; 1861 | } 1862 | } 1863 | } 1864 | void visit(PrioritizedChoice &ope) override { 1865 | for (auto op : ope.opes_) { 1866 | op->accept(*this); 1867 | if (error_s) { 1868 | done_ = true; 1869 | break; 1870 | } 1871 | } 1872 | } 1873 | void visit(Repetition &ope) override { 1874 | ope.ope_->accept(*this); 1875 | done_ = ope.min_ > 0; 1876 | } 1877 | void visit(AndPredicate &ope) override { 1878 | ope.ope_->accept(*this); 1879 | done_ = false; 1880 | } 1881 | void visit(NotPredicate &ope) override { 1882 | ope.ope_->accept(*this); 1883 | done_ = false; 1884 | } 1885 | void visit(Dictionary &) override { done_ = true; } 1886 | void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); } 1887 | void visit(CharacterClass &) override { done_ = true; } 1888 | void visit(Character &) override { done_ = true; } 1889 | void visit(AnyCharacter &) override { done_ = true; } 1890 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 1891 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 1892 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 1893 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 1894 | void visit(User &) override { done_ = true; } 1895 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 1896 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 1897 | void visit(Reference &ope) override; 1898 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 1899 | void visit(BackReference &) override { done_ = true; } 1900 | void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } 1901 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 1902 | void visit(Cut &) override { done_ = true; } 1903 | 1904 | const char *error_s = nullptr; 1905 | 1906 | private: 1907 | std::string name_; 1908 | std::unordered_set refs_; 1909 | bool done_ = false; 1910 | }; 1911 | 1912 | struct HasEmptyElement : public Ope::Visitor { 1913 | using Ope::Visitor::visit; 1914 | 1915 | HasEmptyElement(std::vector> &refs, 1916 | std::unordered_map &has_error_cache) 1917 | : refs_(refs), has_error_cache_(has_error_cache) {} 1918 | 1919 | void visit(Sequence &ope) override; 1920 | void visit(PrioritizedChoice &ope) override { 1921 | for (auto op : ope.opes_) { 1922 | op->accept(*this); 1923 | if (is_empty) { return; } 1924 | } 1925 | } 1926 | void visit(Repetition &ope) override { 1927 | if (ope.min_ == 0) { 1928 | set_error(); 1929 | } else { 1930 | ope.ope_->accept(*this); 1931 | } 1932 | } 1933 | void visit(AndPredicate &) override { set_error(); } 1934 | void visit(NotPredicate &) override { set_error(); } 1935 | void visit(LiteralString &ope) override { 1936 | if (ope.lit_.empty()) { set_error(); } 1937 | } 1938 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 1939 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 1940 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 1941 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 1942 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 1943 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 1944 | void visit(Reference &ope) override; 1945 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 1946 | void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } 1947 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 1948 | 1949 | bool is_empty = false; 1950 | const char *error_s = nullptr; 1951 | std::string error_name; 1952 | 1953 | private: 1954 | void set_error() { 1955 | is_empty = true; 1956 | tie(error_s, error_name) = refs_.back(); 1957 | } 1958 | std::vector> &refs_; 1959 | std::unordered_map &has_error_cache_; 1960 | }; 1961 | 1962 | struct DetectInfiniteLoop : public Ope::Visitor { 1963 | using Ope::Visitor::visit; 1964 | 1965 | DetectInfiniteLoop(const char *s, const std::string &name, 1966 | std::vector> &refs, 1967 | std::unordered_map &has_error_cache) 1968 | : refs_(refs), has_error_cache_(has_error_cache) { 1969 | refs_.emplace_back(s, name); 1970 | } 1971 | 1972 | DetectInfiniteLoop(std::vector> &refs, 1973 | std::unordered_map &has_error_cache) 1974 | : refs_(refs), has_error_cache_(has_error_cache) {} 1975 | 1976 | void visit(Sequence &ope) override { 1977 | for (auto op : ope.opes_) { 1978 | op->accept(*this); 1979 | if (has_error) { return; } 1980 | } 1981 | } 1982 | void visit(PrioritizedChoice &ope) override { 1983 | for (auto op : ope.opes_) { 1984 | op->accept(*this); 1985 | if (has_error) { return; } 1986 | } 1987 | } 1988 | void visit(Repetition &ope) override { 1989 | if (ope.max_ == std::numeric_limits::max()) { 1990 | HasEmptyElement vis(refs_, has_error_cache_); 1991 | ope.ope_->accept(vis); 1992 | if (vis.is_empty) { 1993 | has_error = true; 1994 | error_s = vis.error_s; 1995 | error_name = vis.error_name; 1996 | } 1997 | } else { 1998 | ope.ope_->accept(*this); 1999 | } 2000 | } 2001 | void visit(AndPredicate &ope) override { ope.ope_->accept(*this); } 2002 | void visit(NotPredicate &ope) override { ope.ope_->accept(*this); } 2003 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 2004 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 2005 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 2006 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 2007 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 2008 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 2009 | void visit(Reference &ope) override; 2010 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 2011 | void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } 2012 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 2013 | 2014 | bool has_error = false; 2015 | const char *error_s = nullptr; 2016 | std::string error_name; 2017 | 2018 | private: 2019 | std::vector> &refs_; 2020 | std::unordered_map &has_error_cache_; 2021 | }; 2022 | 2023 | struct ReferenceChecker : public Ope::Visitor { 2024 | using Ope::Visitor::visit; 2025 | 2026 | ReferenceChecker(const Grammar &grammar, 2027 | const std::vector ¶ms) 2028 | : grammar_(grammar), params_(params) {} 2029 | 2030 | void visit(Sequence &ope) override { 2031 | for (auto op : ope.opes_) { 2032 | op->accept(*this); 2033 | } 2034 | } 2035 | void visit(PrioritizedChoice &ope) override { 2036 | for (auto op : ope.opes_) { 2037 | op->accept(*this); 2038 | } 2039 | } 2040 | void visit(Repetition &ope) override { ope.ope_->accept(*this); } 2041 | void visit(AndPredicate &ope) override { ope.ope_->accept(*this); } 2042 | void visit(NotPredicate &ope) override { ope.ope_->accept(*this); } 2043 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 2044 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 2045 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 2046 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 2047 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 2048 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 2049 | void visit(Reference &ope) override; 2050 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 2051 | void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } 2052 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 2053 | 2054 | std::unordered_map error_s; 2055 | std::unordered_map error_message; 2056 | std::unordered_set referenced; 2057 | 2058 | private: 2059 | const Grammar &grammar_; 2060 | const std::vector ¶ms_; 2061 | }; 2062 | 2063 | struct LinkReferences : public Ope::Visitor { 2064 | using Ope::Visitor::visit; 2065 | 2066 | LinkReferences(Grammar &grammar, const std::vector ¶ms) 2067 | : grammar_(grammar), params_(params) {} 2068 | 2069 | void visit(Sequence &ope) override { 2070 | for (auto op : ope.opes_) { 2071 | op->accept(*this); 2072 | } 2073 | } 2074 | void visit(PrioritizedChoice &ope) override { 2075 | for (auto op : ope.opes_) { 2076 | op->accept(*this); 2077 | } 2078 | } 2079 | void visit(Repetition &ope) override { ope.ope_->accept(*this); } 2080 | void visit(AndPredicate &ope) override { ope.ope_->accept(*this); } 2081 | void visit(NotPredicate &ope) override { ope.ope_->accept(*this); } 2082 | void visit(CaptureScope &ope) override { ope.ope_->accept(*this); } 2083 | void visit(Capture &ope) override { ope.ope_->accept(*this); } 2084 | void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } 2085 | void visit(Ignore &ope) override { ope.ope_->accept(*this); } 2086 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 2087 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 2088 | void visit(Reference &ope) override; 2089 | void visit(Whitespace &ope) override { ope.ope_->accept(*this); } 2090 | void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } 2091 | void visit(Recovery &ope) override { ope.ope_->accept(*this); } 2092 | 2093 | private: 2094 | Grammar &grammar_; 2095 | const std::vector ¶ms_; 2096 | }; 2097 | 2098 | struct FindReference : public Ope::Visitor { 2099 | using Ope::Visitor::visit; 2100 | 2101 | FindReference(const std::vector> &args, 2102 | const std::vector ¶ms) 2103 | : args_(args), params_(params) {} 2104 | 2105 | void visit(Sequence &ope) override { 2106 | std::vector> opes; 2107 | for (auto o : ope.opes_) { 2108 | o->accept(*this); 2109 | opes.push_back(found_ope); 2110 | } 2111 | found_ope = std::make_shared(opes); 2112 | } 2113 | void visit(PrioritizedChoice &ope) override { 2114 | std::vector> opes; 2115 | for (auto o : ope.opes_) { 2116 | o->accept(*this); 2117 | opes.push_back(found_ope); 2118 | } 2119 | found_ope = std::make_shared(opes); 2120 | } 2121 | void visit(Repetition &ope) override { 2122 | ope.ope_->accept(*this); 2123 | found_ope = rep(found_ope, ope.min_, ope.max_); 2124 | } 2125 | void visit(AndPredicate &ope) override { 2126 | ope.ope_->accept(*this); 2127 | found_ope = apd(found_ope); 2128 | } 2129 | void visit(NotPredicate &ope) override { 2130 | ope.ope_->accept(*this); 2131 | found_ope = npd(found_ope); 2132 | } 2133 | void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); } 2134 | void visit(LiteralString &ope) override { 2135 | found_ope = ope.shared_from_this(); 2136 | } 2137 | void visit(CharacterClass &ope) override { 2138 | found_ope = ope.shared_from_this(); 2139 | } 2140 | void visit(Character &ope) override { found_ope = ope.shared_from_this(); } 2141 | void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); } 2142 | void visit(CaptureScope &ope) override { 2143 | ope.ope_->accept(*this); 2144 | found_ope = csc(found_ope); 2145 | } 2146 | void visit(Capture &ope) override { 2147 | ope.ope_->accept(*this); 2148 | found_ope = cap(found_ope, ope.match_action_); 2149 | } 2150 | void visit(TokenBoundary &ope) override { 2151 | ope.ope_->accept(*this); 2152 | found_ope = tok(found_ope); 2153 | } 2154 | void visit(Ignore &ope) override { 2155 | ope.ope_->accept(*this); 2156 | found_ope = ign(found_ope); 2157 | } 2158 | void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } 2159 | void visit(Holder &ope) override { ope.ope_->accept(*this); } 2160 | void visit(Reference &ope) override; 2161 | void visit(Whitespace &ope) override { 2162 | ope.ope_->accept(*this); 2163 | found_ope = wsp(found_ope); 2164 | } 2165 | void visit(PrecedenceClimbing &ope) override { 2166 | ope.atom_->accept(*this); 2167 | found_ope = csc(found_ope); 2168 | } 2169 | void visit(Recovery &ope) override { 2170 | ope.ope_->accept(*this); 2171 | found_ope = rec(found_ope); 2172 | } 2173 | void visit(Cut &ope) override { found_ope = ope.shared_from_this(); } 2174 | 2175 | std::shared_ptr found_ope; 2176 | 2177 | private: 2178 | const std::vector> &args_; 2179 | const std::vector ¶ms_; 2180 | }; 2181 | 2182 | /* 2183 | * Keywords 2184 | */ 2185 | static const char *WHITESPACE_DEFINITION_NAME = "%whitespace"; 2186 | static const char *WORD_DEFINITION_NAME = "%word"; 2187 | static const char *RECOVER_DEFINITION_NAME = "%recover"; 2188 | 2189 | /* 2190 | * Definition 2191 | */ 2192 | class Definition { 2193 | public: 2194 | struct Result { 2195 | bool ret; 2196 | bool recovered; 2197 | size_t len; 2198 | ErrorInfo error_info; 2199 | }; 2200 | 2201 | Definition() : holder_(std::make_shared(this)) {} 2202 | 2203 | Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) { 2204 | holder_->outer_ = this; 2205 | } 2206 | 2207 | Definition(const std::shared_ptr &ope) 2208 | : holder_(std::make_shared(this)) { 2209 | *this <= ope; 2210 | } 2211 | 2212 | operator std::shared_ptr() { 2213 | return std::make_shared(holder_); 2214 | } 2215 | 2216 | Definition &operator<=(const std::shared_ptr &ope) { 2217 | holder_->ope_ = ope; 2218 | return *this; 2219 | } 2220 | 2221 | Result parse(const char *s, size_t n, const char *path = nullptr, 2222 | Log log = nullptr) const { 2223 | SemanticValues vs; 2224 | std::any dt; 2225 | return parse_core(s, n, vs, dt, path, log); 2226 | } 2227 | 2228 | Result parse(const char *s, const char *path = nullptr, 2229 | Log log = nullptr) const { 2230 | auto n = strlen(s); 2231 | return parse(s, n, path, log); 2232 | } 2233 | 2234 | Result parse(const char *s, size_t n, std::any &dt, 2235 | const char *path = nullptr, Log log = nullptr) const { 2236 | SemanticValues vs; 2237 | return parse_core(s, n, vs, dt, path, log); 2238 | } 2239 | 2240 | Result parse(const char *s, std::any &dt, const char *path = nullptr, 2241 | Log log = nullptr) const { 2242 | auto n = strlen(s); 2243 | return parse(s, n, dt, path, log); 2244 | } 2245 | 2246 | template 2247 | Result parse_and_get_value(const char *s, size_t n, T &val, 2248 | const char *path = nullptr, 2249 | Log log = nullptr) const { 2250 | SemanticValues vs; 2251 | std::any dt; 2252 | auto r = parse_core(s, n, vs, dt, path, log); 2253 | if (r.ret && !vs.empty() && vs.front().has_value()) { 2254 | val = std::any_cast(vs[0]); 2255 | } 2256 | return r; 2257 | } 2258 | 2259 | template 2260 | Result parse_and_get_value(const char *s, T &val, const char *path = nullptr, 2261 | Log log = nullptr) const { 2262 | auto n = strlen(s); 2263 | return parse_and_get_value(s, n, val, path, log); 2264 | } 2265 | 2266 | template 2267 | Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val, 2268 | const char *path = nullptr, 2269 | Log log = nullptr) const { 2270 | SemanticValues vs; 2271 | auto r = parse_core(s, n, vs, dt, path, log); 2272 | if (r.ret && !vs.empty() && vs.front().has_value()) { 2273 | val = std::any_cast(vs[0]); 2274 | } 2275 | return r; 2276 | } 2277 | 2278 | template 2279 | Result parse_and_get_value(const char *s, std::any &dt, T &val, 2280 | const char *path = nullptr, 2281 | Log log = nullptr) const { 2282 | auto n = strlen(s); 2283 | return parse_and_get_value(s, n, dt, val, path, log); 2284 | } 2285 | 2286 | #if defined(__cpp_lib_char8_t) 2287 | Result parse(const char8_t *s, size_t n, const char *path = nullptr, 2288 | Log log = nullptr) const { 2289 | return parse(reinterpret_cast(s), n, path, log); 2290 | } 2291 | 2292 | Result parse(const char8_t *s, const char *path = nullptr, 2293 | Log log = nullptr) const { 2294 | return parse(reinterpret_cast(s), path, log); 2295 | } 2296 | 2297 | Result parse(const char8_t *s, size_t n, std::any &dt, 2298 | const char *path = nullptr, Log log = nullptr) const { 2299 | return parse(reinterpret_cast(s), n, dt, path, log); 2300 | } 2301 | 2302 | Result parse(const char8_t *s, std::any &dt, const char *path = nullptr, 2303 | Log log = nullptr) const { 2304 | return parse(reinterpret_cast(s), dt, path, log); 2305 | } 2306 | 2307 | template 2308 | Result parse_and_get_value(const char8_t *s, size_t n, T &val, 2309 | const char *path = nullptr, 2310 | Log log = nullptr) const { 2311 | return parse_and_get_value(reinterpret_cast(s), n, val, *path, 2312 | log); 2313 | } 2314 | 2315 | template 2316 | Result parse_and_get_value(const char8_t *s, T &val, 2317 | const char *path = nullptr, 2318 | Log log = nullptr) const { 2319 | return parse_and_get_value(reinterpret_cast(s), val, *path, 2320 | log); 2321 | } 2322 | 2323 | template 2324 | Result parse_and_get_value(const char8_t *s, size_t n, std::any &dt, T &val, 2325 | const char *path = nullptr, 2326 | Log log = nullptr) const { 2327 | return parse_and_get_value(reinterpret_cast(s), n, dt, val, 2328 | *path, log); 2329 | } 2330 | 2331 | template 2332 | Result parse_and_get_value(const char8_t *s, std::any &dt, T &val, 2333 | const char *path = nullptr, 2334 | Log log = nullptr) const { 2335 | return parse_and_get_value(reinterpret_cast(s), dt, val, 2336 | *path, log); 2337 | } 2338 | #endif 2339 | 2340 | void operator=(Action a) { action = a; } 2341 | 2342 | template Definition &operator,(T fn) { 2343 | operator=(fn); 2344 | return *this; 2345 | } 2346 | 2347 | Definition &operator~() { 2348 | ignoreSemanticValue = true; 2349 | return *this; 2350 | } 2351 | 2352 | void accept(Ope::Visitor &v) { holder_->accept(v); } 2353 | 2354 | std::shared_ptr get_core_operator() const { return holder_->ope_; } 2355 | 2356 | bool is_token() const { 2357 | std::call_once(is_token_init_, [this]() { 2358 | is_token_ = TokenChecker::is_token(*get_core_operator()); 2359 | }); 2360 | return is_token_; 2361 | } 2362 | 2363 | std::string name; 2364 | const char *s_ = nullptr; 2365 | std::pair line_ = {1, 1}; 2366 | 2367 | std::function 2369 | predicate; 2370 | 2371 | size_t id = 0; 2372 | Action action; 2373 | std::function 2374 | enter; 2375 | std::function 2377 | leave; 2378 | bool ignoreSemanticValue = false; 2379 | std::shared_ptr whitespaceOpe; 2380 | std::shared_ptr wordOpe; 2381 | bool enablePackratParsing = false; 2382 | bool is_macro = false; 2383 | std::vector params; 2384 | bool disable_action = false; 2385 | 2386 | TracerEnter tracer_enter; 2387 | TracerLeave tracer_leave; 2388 | bool verbose_trace = false; 2389 | TracerStartOrEnd tracer_start; 2390 | TracerStartOrEnd tracer_end; 2391 | 2392 | std::string error_message; 2393 | bool no_ast_opt = false; 2394 | 2395 | bool eoi_check = true; 2396 | 2397 | private: 2398 | friend class Reference; 2399 | friend class ParserGenerator; 2400 | 2401 | Definition &operator=(const Definition &rhs); 2402 | Definition &operator=(Definition &&rhs); 2403 | 2404 | void initialize_definition_ids() const { 2405 | std::call_once(definition_ids_init_, [&]() { 2406 | AssignIDToDefinition vis; 2407 | holder_->accept(vis); 2408 | if (whitespaceOpe) { whitespaceOpe->accept(vis); } 2409 | if (wordOpe) { wordOpe->accept(vis); } 2410 | definition_ids_.swap(vis.ids); 2411 | }); 2412 | } 2413 | 2414 | Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt, 2415 | const char *path, Log log) const { 2416 | initialize_definition_ids(); 2417 | 2418 | std::shared_ptr ope = holder_; 2419 | 2420 | std::any trace_data; 2421 | if (tracer_start) { tracer_start(trace_data); } 2422 | auto se = scope_exit([&]() { 2423 | if (tracer_end) { tracer_end(trace_data); } 2424 | }); 2425 | 2426 | Context c(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe, 2427 | enablePackratParsing, tracer_enter, tracer_leave, trace_data, 2428 | verbose_trace, log); 2429 | 2430 | size_t i = 0; 2431 | 2432 | if (whitespaceOpe) { 2433 | auto save_ignore_trace_state = c.ignore_trace_state; 2434 | c.ignore_trace_state = !c.verbose_trace; 2435 | auto se = 2436 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2437 | 2438 | auto len = whitespaceOpe->parse(s, n, vs, c, dt); 2439 | if (fail(len)) { return Result{false, c.recovered, i, c.error_info}; } 2440 | 2441 | i = len; 2442 | } 2443 | 2444 | auto len = ope->parse(s + i, n - i, vs, c, dt); 2445 | auto ret = success(len); 2446 | if (ret) { 2447 | i += len; 2448 | if (eoi_check) { 2449 | if (i < n) { 2450 | if (c.error_info.error_pos - c.s < s + i - c.s) { 2451 | c.error_info.message_pos = s + i; 2452 | c.error_info.message = "expected end of input"; 2453 | } 2454 | ret = false; 2455 | } 2456 | } 2457 | } 2458 | return Result{ret, c.recovered, i, c.error_info}; 2459 | } 2460 | 2461 | std::shared_ptr holder_; 2462 | mutable std::once_flag is_token_init_; 2463 | mutable bool is_token_ = false; 2464 | mutable std::once_flag assign_id_to_definition_init_; 2465 | mutable std::once_flag definition_ids_init_; 2466 | mutable std::unordered_map definition_ids_; 2467 | }; 2468 | 2469 | /* 2470 | * Implementations 2471 | */ 2472 | 2473 | inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs, 2474 | Context &c, std::any &dt, const std::string &lit, 2475 | std::once_flag &init_is_word, bool &is_word, 2476 | bool ignore_case) { 2477 | size_t i = 0; 2478 | for (; i < lit.size(); i++) { 2479 | if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i])) 2480 | : (s[i] != lit[i]))) { 2481 | c.set_error_pos(s, lit.data()); 2482 | return static_cast(-1); 2483 | } 2484 | } 2485 | 2486 | // Word check 2487 | if (c.wordOpe) { 2488 | auto save_ignore_trace_state = c.ignore_trace_state; 2489 | c.ignore_trace_state = !c.verbose_trace; 2490 | auto se = 2491 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2492 | 2493 | std::call_once(init_is_word, [&]() { 2494 | SemanticValues dummy_vs; 2495 | Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr, 2496 | nullptr, nullptr, false, nullptr); 2497 | std::any dummy_dt; 2498 | 2499 | auto len = 2500 | c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt); 2501 | is_word = success(len); 2502 | }); 2503 | 2504 | if (is_word) { 2505 | SemanticValues dummy_vs; 2506 | Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr, 2507 | nullptr, nullptr, false, nullptr); 2508 | std::any dummy_dt; 2509 | 2510 | NotPredicate ope(c.wordOpe); 2511 | auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt); 2512 | if (fail(len)) { 2513 | c.set_error_pos(s, lit.data()); 2514 | return len; 2515 | } 2516 | i += len; 2517 | } 2518 | } 2519 | 2520 | // Skip whitespace 2521 | if (!c.in_token_boundary_count && c.whitespaceOpe) { 2522 | auto save_ignore_trace_state = c.ignore_trace_state; 2523 | c.ignore_trace_state = !c.verbose_trace; 2524 | auto se = 2525 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2526 | 2527 | auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt); 2528 | if (fail(len)) { return len; } 2529 | i += len; 2530 | } 2531 | 2532 | return i; 2533 | } 2534 | 2535 | inline std::pair SemanticValues::line_info() const { 2536 | assert(c_); 2537 | return c_->line_info(sv_.data()); 2538 | } 2539 | 2540 | inline void ErrorInfo::output_log(const Log &log, const char *s, size_t n) { 2541 | if (message_pos) { 2542 | if (message_pos > last_output_pos) { 2543 | last_output_pos = message_pos; 2544 | auto line = line_info(s, message_pos); 2545 | std::string msg; 2546 | if (auto unexpected_token = heuristic_error_token(s, n, message_pos); 2547 | !unexpected_token.empty()) { 2548 | msg = replace_all(message, "%t", unexpected_token); 2549 | 2550 | auto unexpected_char = unexpected_token.substr( 2551 | 0, 2552 | codepoint_length(unexpected_token.data(), unexpected_token.size())); 2553 | 2554 | msg = replace_all(msg, "%c", unexpected_char); 2555 | } else { 2556 | msg = message; 2557 | } 2558 | log(line.first, line.second, msg, label); 2559 | } 2560 | } else if (error_pos) { 2561 | if (error_pos > last_output_pos) { 2562 | last_output_pos = error_pos; 2563 | auto line = line_info(s, error_pos); 2564 | 2565 | std::string msg; 2566 | if (expected_tokens.empty()) { 2567 | msg = "syntax error."; 2568 | } else { 2569 | msg = "syntax error"; 2570 | 2571 | // unexpected token 2572 | if (auto unexpected_token = heuristic_error_token(s, n, error_pos); 2573 | !unexpected_token.empty()) { 2574 | msg += ", unexpected '"; 2575 | msg += unexpected_token; 2576 | msg += "'"; 2577 | } 2578 | 2579 | auto first_item = true; 2580 | size_t i = 0; 2581 | while (i < expected_tokens.size()) { 2582 | auto [error_literal, error_rule] = expected_tokens[i]; 2583 | 2584 | // Skip rules start with '_' 2585 | if (!(error_rule && error_rule->name[0] == '_')) { 2586 | msg += (first_item ? ", expecting " : ", "); 2587 | if (error_literal) { 2588 | msg += "'"; 2589 | msg += error_literal; 2590 | msg += "'"; 2591 | } else { 2592 | msg += "<" + error_rule->name + ">"; 2593 | if (label.empty()) { label = error_rule->name; } 2594 | } 2595 | first_item = false; 2596 | } 2597 | 2598 | i++; 2599 | } 2600 | msg += "."; 2601 | } 2602 | log(line.first, line.second, msg, label); 2603 | } 2604 | } 2605 | } 2606 | 2607 | inline void Context::set_error_pos(const char *a_s, const char *literal) { 2608 | if (log) { 2609 | if (error_info.error_pos <= a_s) { 2610 | if (error_info.error_pos < a_s || !error_info.keep_previous_token) { 2611 | error_info.error_pos = a_s; 2612 | error_info.expected_tokens.clear(); 2613 | } 2614 | 2615 | const char *error_literal = nullptr; 2616 | const Definition *error_rule = nullptr; 2617 | 2618 | if (literal) { 2619 | error_literal = literal; 2620 | } else if (!rule_stack.empty()) { 2621 | auto rule = rule_stack.back(); 2622 | auto ope = rule->get_core_operator(); 2623 | if (auto token = FindLiteralToken::token(*ope); 2624 | token && token[0] != '\0') { 2625 | error_literal = token; 2626 | } 2627 | } 2628 | 2629 | for (auto r : rule_stack) { 2630 | error_rule = r; 2631 | if (r->is_token()) { break; } 2632 | } 2633 | 2634 | if (error_literal || error_rule) { 2635 | error_info.add(error_literal, error_rule); 2636 | } 2637 | } 2638 | } 2639 | } 2640 | 2641 | inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n, 2642 | const SemanticValues &vs, std::any &dt) { 2643 | trace_ids.push_back(next_trace_id++); 2644 | tracer_enter(ope, a_s, n, vs, *this, dt, trace_data); 2645 | } 2646 | 2647 | inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n, 2648 | const SemanticValues &vs, std::any &dt, 2649 | size_t len) { 2650 | tracer_leave(ope, a_s, n, vs, *this, dt, len, trace_data); 2651 | trace_ids.pop_back(); 2652 | } 2653 | 2654 | inline bool Context::is_traceable(const Ope &ope) const { 2655 | if (tracer_enter && tracer_leave) { 2656 | if (ignore_trace_state) { return false; } 2657 | return !dynamic_cast(&ope); 2658 | } 2659 | return false; 2660 | } 2661 | 2662 | inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs, 2663 | Context &c, std::any &dt) const { 2664 | if (c.is_traceable(*this)) { 2665 | c.trace_enter(*this, s, n, vs, dt); 2666 | auto len = parse_core(s, n, vs, c, dt); 2667 | c.trace_leave(*this, s, n, vs, dt, len); 2668 | return len; 2669 | } 2670 | return parse_core(s, n, vs, c, dt); 2671 | } 2672 | 2673 | inline size_t Dictionary::parse_core(const char *s, size_t n, 2674 | SemanticValues &vs, Context &c, 2675 | std::any &dt) const { 2676 | auto i = trie_.match(s, n); 2677 | if (i == 0) { 2678 | c.set_error_pos(s); 2679 | return static_cast(-1); 2680 | } 2681 | 2682 | // Word check 2683 | if (c.wordOpe) { 2684 | auto save_ignore_trace_state = c.ignore_trace_state; 2685 | c.ignore_trace_state = !c.verbose_trace; 2686 | auto se = 2687 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2688 | 2689 | { 2690 | SemanticValues dummy_vs; 2691 | Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr, 2692 | nullptr, nullptr, false, nullptr); 2693 | std::any dummy_dt; 2694 | 2695 | NotPredicate ope(c.wordOpe); 2696 | auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt); 2697 | if (fail(len)) { 2698 | c.set_error_pos(s); 2699 | return len; 2700 | } 2701 | i += len; 2702 | } 2703 | } 2704 | 2705 | // Skip whitespace 2706 | if (!c.in_token_boundary_count && c.whitespaceOpe) { 2707 | auto save_ignore_trace_state = c.ignore_trace_state; 2708 | c.ignore_trace_state = !c.verbose_trace; 2709 | auto se = 2710 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2711 | 2712 | auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt); 2713 | if (fail(len)) { return len; } 2714 | i += len; 2715 | } 2716 | 2717 | return i; 2718 | } 2719 | 2720 | inline size_t LiteralString::parse_core(const char *s, size_t n, 2721 | SemanticValues &vs, Context &c, 2722 | std::any &dt) const { 2723 | return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_, 2724 | ignore_case_); 2725 | } 2726 | 2727 | inline size_t TokenBoundary::parse_core(const char *s, size_t n, 2728 | SemanticValues &vs, Context &c, 2729 | std::any &dt) const { 2730 | auto save_ignore_trace_state = c.ignore_trace_state; 2731 | c.ignore_trace_state = !c.verbose_trace; 2732 | auto se = 2733 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2734 | 2735 | size_t len; 2736 | { 2737 | c.in_token_boundary_count++; 2738 | auto se = scope_exit([&]() { c.in_token_boundary_count--; }); 2739 | len = ope_->parse(s, n, vs, c, dt); 2740 | } 2741 | 2742 | if (success(len)) { 2743 | vs.tokens.emplace_back(std::string_view(s, len)); 2744 | 2745 | if (!c.in_token_boundary_count) { 2746 | if (c.whitespaceOpe) { 2747 | auto l = c.whitespaceOpe->parse(s + len, n - len, vs, c, dt); 2748 | if (fail(l)) { return l; } 2749 | len += l; 2750 | } 2751 | } 2752 | } 2753 | return len; 2754 | } 2755 | 2756 | inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, 2757 | Context &c, std::any &dt) const { 2758 | if (!ope_) { 2759 | throw std::logic_error("Uninitialized definition ope was used..."); 2760 | } 2761 | 2762 | // Macro reference 2763 | if (outer_->is_macro) { 2764 | c.rule_stack.push_back(outer_); 2765 | auto len = ope_->parse(s, n, vs, c, dt); 2766 | c.rule_stack.pop_back(); 2767 | return len; 2768 | } 2769 | 2770 | size_t len; 2771 | std::any val; 2772 | 2773 | c.packrat(s, outer_->id, len, val, [&](std::any &a_val) { 2774 | if (outer_->enter) { outer_->enter(c, s, n, dt); } 2775 | auto &chvs = c.push_semantic_values_scope(); 2776 | auto se = scope_exit([&]() { 2777 | c.pop_semantic_values_scope(); 2778 | if (outer_->leave) { outer_->leave(c, s, n, len, a_val, dt); } 2779 | }); 2780 | 2781 | c.rule_stack.push_back(outer_); 2782 | len = ope_->parse(s, n, chvs, c, dt); 2783 | c.rule_stack.pop_back(); 2784 | 2785 | // Invoke action 2786 | if (success(len)) { 2787 | chvs.sv_ = std::string_view(s, len); 2788 | chvs.name_ = outer_->name; 2789 | 2790 | auto ope_ptr = ope_.get(); 2791 | { 2792 | auto tok_ptr = dynamic_cast(ope_ptr); 2793 | if (tok_ptr) { ope_ptr = tok_ptr->ope_.get(); } 2794 | } 2795 | if (!dynamic_cast(ope_ptr)) { 2796 | chvs.choice_count_ = 0; 2797 | chvs.choice_ = 0; 2798 | } 2799 | 2800 | std::string msg; 2801 | if (outer_->predicate && !outer_->predicate(chvs, dt, msg)) { 2802 | if (c.log && !msg.empty() && c.error_info.message_pos < s) { 2803 | c.error_info.message_pos = s; 2804 | c.error_info.message = msg; 2805 | c.error_info.label = outer_->name; 2806 | } 2807 | len = static_cast(-1); 2808 | } 2809 | 2810 | if (success(len)) { 2811 | if (!c.recovered) { a_val = reduce(chvs, dt); } 2812 | } else { 2813 | if (c.log && !msg.empty() && c.error_info.message_pos < s) { 2814 | c.error_info.message_pos = s; 2815 | c.error_info.message = msg; 2816 | c.error_info.label = outer_->name; 2817 | } 2818 | } 2819 | } else { 2820 | if (c.log && !outer_->error_message.empty() && 2821 | c.error_info.message_pos < s) { 2822 | c.error_info.message_pos = s; 2823 | c.error_info.message = outer_->error_message; 2824 | c.error_info.label = outer_->name; 2825 | } 2826 | } 2827 | }); 2828 | 2829 | if (success(len)) { 2830 | if (!outer_->ignoreSemanticValue) { 2831 | vs.emplace_back(std::move(val)); 2832 | vs.tags.emplace_back(str2tag(outer_->name)); 2833 | } 2834 | } 2835 | 2836 | return len; 2837 | } 2838 | 2839 | inline std::any Holder::reduce(SemanticValues &vs, std::any &dt) const { 2840 | if (outer_->action && !outer_->disable_action) { 2841 | return outer_->action(vs, dt); 2842 | } else if (vs.empty()) { 2843 | return std::any(); 2844 | } else { 2845 | return std::move(vs.front()); 2846 | } 2847 | } 2848 | 2849 | inline const std::string &Holder::name() const { return outer_->name; } 2850 | 2851 | inline const std::string &Holder::trace_name() const { 2852 | std::call_once(trace_name_init_, 2853 | [this]() { trace_name_ = "[" + outer_->name + "]"; }); 2854 | return trace_name_; 2855 | } 2856 | 2857 | inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs, 2858 | Context &c, std::any &dt) const { 2859 | auto save_ignore_trace_state = c.ignore_trace_state; 2860 | if (rule_ && rule_->ignoreSemanticValue) { 2861 | c.ignore_trace_state = !c.verbose_trace; 2862 | } 2863 | auto se = 2864 | scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; }); 2865 | 2866 | if (rule_) { 2867 | // Reference rule 2868 | if (rule_->is_macro) { 2869 | // Macro 2870 | FindReference vis(c.top_args(), c.rule_stack.back()->params); 2871 | 2872 | // Collect arguments 2873 | std::vector> args; 2874 | for (auto arg : args_) { 2875 | arg->accept(vis); 2876 | args.emplace_back(std::move(vis.found_ope)); 2877 | } 2878 | 2879 | c.push_args(std::move(args)); 2880 | auto se = scope_exit([&]() { c.pop_args(); }); 2881 | auto ope = get_core_operator(); 2882 | return ope->parse(s, n, vs, c, dt); 2883 | } else { 2884 | // Definition 2885 | c.push_args(std::vector>()); 2886 | auto se = scope_exit([&]() { c.pop_args(); }); 2887 | auto ope = get_core_operator(); 2888 | return ope->parse(s, n, vs, c, dt); 2889 | } 2890 | } else { 2891 | // Reference parameter in macro 2892 | const auto &args = c.top_args(); 2893 | return args[iarg_]->parse(s, n, vs, c, dt); 2894 | } 2895 | } 2896 | 2897 | inline std::shared_ptr Reference::get_core_operator() const { 2898 | return rule_->holder_; 2899 | } 2900 | 2901 | inline size_t BackReference::parse_core(const char *s, size_t n, 2902 | SemanticValues &vs, Context &c, 2903 | std::any &dt) const { 2904 | auto size = static_cast(c.capture_scope_stack_size); 2905 | for (auto i = size - 1; i >= 0; i--) { 2906 | auto index = static_cast(i); 2907 | const auto &cs = c.capture_scope_stack[index]; 2908 | if (cs.find(name_) != cs.end()) { 2909 | const auto &lit = cs.at(name_); 2910 | std::once_flag init_is_word; 2911 | auto is_word = false; 2912 | return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false); 2913 | } 2914 | } 2915 | 2916 | c.error_info.message_pos = s; 2917 | c.error_info.message = "undefined back reference '$" + name_ + "'..."; 2918 | return static_cast(-1); 2919 | } 2920 | 2921 | inline Definition & 2922 | PrecedenceClimbing::get_reference_for_binop(Context &c) const { 2923 | if (rule_.is_macro) { 2924 | // Reference parameter in macro 2925 | const auto &args = c.top_args(); 2926 | auto iarg = dynamic_cast(*binop_).iarg_; 2927 | auto arg = args[iarg]; 2928 | return *dynamic_cast(*arg).rule_; 2929 | } 2930 | 2931 | return *dynamic_cast(*binop_).rule_; 2932 | } 2933 | 2934 | inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, 2935 | SemanticValues &vs, 2936 | Context &c, std::any &dt, 2937 | size_t min_prec) const { 2938 | auto len = atom_->parse(s, n, vs, c, dt); 2939 | if (fail(len)) { return len; } 2940 | 2941 | std::string tok; 2942 | auto &rule = get_reference_for_binop(c); 2943 | auto action = std::move(rule.action); 2944 | 2945 | rule.action = [&](SemanticValues &vs2, std::any &dt2) { 2946 | tok = vs2.token(); 2947 | if (action) { 2948 | return action(vs2, dt2); 2949 | } else if (!vs2.empty()) { 2950 | return vs2[0]; 2951 | } 2952 | return std::any(); 2953 | }; 2954 | auto action_se = scope_exit([&]() { rule.action = std::move(action); }); 2955 | 2956 | auto i = len; 2957 | while (i < n) { 2958 | std::vector save_values(vs.begin(), vs.end()); 2959 | auto save_tokens = vs.tokens; 2960 | 2961 | auto chvs = c.push_semantic_values_scope(); 2962 | auto chlen = binop_->parse(s + i, n - i, chvs, c, dt); 2963 | c.pop_semantic_values_scope(); 2964 | 2965 | if (fail(chlen)) { break; } 2966 | 2967 | auto it = info_.find(tok); 2968 | if (it == info_.end()) { break; } 2969 | 2970 | auto level = std::get<0>(it->second); 2971 | auto assoc = std::get<1>(it->second); 2972 | 2973 | if (level < min_prec) { break; } 2974 | 2975 | vs.emplace_back(std::move(chvs[0])); 2976 | i += chlen; 2977 | 2978 | auto next_min_prec = level; 2979 | if (assoc == 'L') { next_min_prec = level + 1; } 2980 | 2981 | chvs = c.push_semantic_values_scope(); 2982 | chlen = parse_expression(s + i, n - i, chvs, c, dt, next_min_prec); 2983 | c.pop_semantic_values_scope(); 2984 | 2985 | if (fail(chlen)) { 2986 | vs.assign(save_values.begin(), save_values.end()); 2987 | vs.tokens = save_tokens; 2988 | i = chlen; 2989 | break; 2990 | } 2991 | 2992 | vs.emplace_back(std::move(chvs[0])); 2993 | i += chlen; 2994 | 2995 | std::any val; 2996 | if (rule_.action) { 2997 | vs.sv_ = std::string_view(s, i); 2998 | val = rule_.action(vs, dt); 2999 | } else if (!vs.empty()) { 3000 | val = vs[0]; 3001 | } 3002 | vs.clear(); 3003 | vs.emplace_back(std::move(val)); 3004 | } 3005 | 3006 | return i; 3007 | } 3008 | 3009 | inline size_t Recovery::parse_core(const char *s, size_t n, 3010 | SemanticValues & /*vs*/, Context &c, 3011 | std::any & /*dt*/) const { 3012 | const auto &rule = dynamic_cast(*ope_); 3013 | 3014 | // Custom error message 3015 | if (c.log) { 3016 | auto label = dynamic_cast(rule.args_[0].get()); 3017 | if (label && !label->rule_->error_message.empty()) { 3018 | c.error_info.message_pos = s; 3019 | c.error_info.message = label->rule_->error_message; 3020 | c.error_info.label = label->rule_->name; 3021 | } 3022 | } 3023 | 3024 | // Recovery 3025 | auto len = static_cast(-1); 3026 | { 3027 | auto save_log = c.log; 3028 | c.log = nullptr; 3029 | auto se = scope_exit([&]() { c.log = save_log; }); 3030 | 3031 | SemanticValues dummy_vs; 3032 | std::any dummy_dt; 3033 | 3034 | len = rule.parse(s, n, dummy_vs, c, dummy_dt); 3035 | } 3036 | 3037 | if (success(len)) { 3038 | c.recovered = true; 3039 | 3040 | if (c.log) { 3041 | c.error_info.output_log(c.log, c.s, c.l); 3042 | c.error_info.clear(); 3043 | } 3044 | } 3045 | 3046 | // Cut 3047 | if (!c.cut_stack.empty()) { 3048 | c.cut_stack.back() = true; 3049 | 3050 | if (c.cut_stack.size() == 1) { 3051 | // TODO: Remove unneeded entries in packrat memoise table 3052 | } 3053 | } 3054 | 3055 | return len; 3056 | } 3057 | 3058 | inline void Sequence::accept(Visitor &v) { v.visit(*this); } 3059 | inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); } 3060 | inline void Repetition::accept(Visitor &v) { v.visit(*this); } 3061 | inline void AndPredicate::accept(Visitor &v) { v.visit(*this); } 3062 | inline void NotPredicate::accept(Visitor &v) { v.visit(*this); } 3063 | inline void Dictionary::accept(Visitor &v) { v.visit(*this); } 3064 | inline void LiteralString::accept(Visitor &v) { v.visit(*this); } 3065 | inline void CharacterClass::accept(Visitor &v) { v.visit(*this); } 3066 | inline void Character::accept(Visitor &v) { v.visit(*this); } 3067 | inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); } 3068 | inline void CaptureScope::accept(Visitor &v) { v.visit(*this); } 3069 | inline void Capture::accept(Visitor &v) { v.visit(*this); } 3070 | inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); } 3071 | inline void Ignore::accept(Visitor &v) { v.visit(*this); } 3072 | inline void User::accept(Visitor &v) { v.visit(*this); } 3073 | inline void WeakHolder::accept(Visitor &v) { v.visit(*this); } 3074 | inline void Holder::accept(Visitor &v) { v.visit(*this); } 3075 | inline void Reference::accept(Visitor &v) { v.visit(*this); } 3076 | inline void Whitespace::accept(Visitor &v) { v.visit(*this); } 3077 | inline void BackReference::accept(Visitor &v) { v.visit(*this); } 3078 | inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); } 3079 | inline void Recovery::accept(Visitor &v) { v.visit(*this); } 3080 | inline void Cut::accept(Visitor &v) { v.visit(*this); } 3081 | 3082 | inline void AssignIDToDefinition::visit(Holder &ope) { 3083 | auto p = static_cast(ope.outer_); 3084 | if (ids.count(p)) { return; } 3085 | auto id = ids.size(); 3086 | ids[p] = id; 3087 | ope.outer_->id = id; 3088 | ope.ope_->accept(*this); 3089 | } 3090 | 3091 | inline void AssignIDToDefinition::visit(Reference &ope) { 3092 | if (ope.rule_) { 3093 | for (auto arg : ope.args_) { 3094 | arg->accept(*this); 3095 | } 3096 | ope.rule_->accept(*this); 3097 | } 3098 | } 3099 | 3100 | inline void AssignIDToDefinition::visit(PrecedenceClimbing &ope) { 3101 | ope.atom_->accept(*this); 3102 | ope.binop_->accept(*this); 3103 | } 3104 | 3105 | inline void TokenChecker::visit(Reference &ope) { 3106 | if (ope.is_macro_) { 3107 | for (auto arg : ope.args_) { 3108 | arg->accept(*this); 3109 | } 3110 | } else { 3111 | has_rule_ = true; 3112 | } 3113 | } 3114 | 3115 | inline void FindLiteralToken::visit(Reference &ope) { 3116 | if (ope.is_macro_) { 3117 | ope.rule_->accept(*this); 3118 | for (auto arg : ope.args_) { 3119 | arg->accept(*this); 3120 | } 3121 | } 3122 | } 3123 | 3124 | inline void DetectLeftRecursion::visit(Reference &ope) { 3125 | if (ope.name_ == name_) { 3126 | error_s = ope.s_; 3127 | } else if (!refs_.count(ope.name_)) { 3128 | refs_.insert(ope.name_); 3129 | if (ope.rule_) { 3130 | ope.rule_->accept(*this); 3131 | if (done_ == false) { return; } 3132 | } 3133 | } 3134 | done_ = true; 3135 | } 3136 | 3137 | inline void HasEmptyElement::visit(Sequence &ope) { 3138 | auto save_is_empty = false; 3139 | const char *save_error_s = nullptr; 3140 | std::string save_error_name; 3141 | 3142 | auto it = ope.opes_.begin(); 3143 | while (it != ope.opes_.end()) { 3144 | (*it)->accept(*this); 3145 | if (!is_empty) { 3146 | ++it; 3147 | while (it != ope.opes_.end()) { 3148 | DetectInfiniteLoop vis(refs_, has_error_cache_); 3149 | (*it)->accept(vis); 3150 | if (vis.has_error) { 3151 | is_empty = true; 3152 | error_s = vis.error_s; 3153 | error_name = vis.error_name; 3154 | } 3155 | ++it; 3156 | } 3157 | return; 3158 | } 3159 | 3160 | save_is_empty = is_empty; 3161 | save_error_s = error_s; 3162 | save_error_name = error_name; 3163 | 3164 | is_empty = false; 3165 | error_name.clear(); 3166 | ++it; 3167 | } 3168 | 3169 | is_empty = save_is_empty; 3170 | error_s = save_error_s; 3171 | error_name = save_error_name; 3172 | } 3173 | 3174 | inline void HasEmptyElement::visit(Reference &ope) { 3175 | auto it = std::find_if(refs_.begin(), refs_.end(), 3176 | [&](const std::pair &ref) { 3177 | return ope.name_ == ref.second; 3178 | }); 3179 | if (it != refs_.end()) { return; } 3180 | 3181 | if (ope.rule_) { 3182 | refs_.emplace_back(ope.s_, ope.name_); 3183 | ope.rule_->accept(*this); 3184 | refs_.pop_back(); 3185 | } 3186 | } 3187 | 3188 | inline void DetectInfiniteLoop::visit(Reference &ope) { 3189 | auto it = std::find_if(refs_.begin(), refs_.end(), 3190 | [&](const std::pair &ref) { 3191 | return ope.name_ == ref.second; 3192 | }); 3193 | if (it != refs_.end()) { return; } 3194 | 3195 | if (ope.rule_) { 3196 | auto it = has_error_cache_.find(ope.name_); 3197 | if (it != has_error_cache_.end()) { 3198 | has_error = it->second; 3199 | } else { 3200 | refs_.emplace_back(ope.s_, ope.name_); 3201 | ope.rule_->accept(*this); 3202 | refs_.pop_back(); 3203 | has_error_cache_[ope.name_] = has_error; 3204 | } 3205 | } 3206 | 3207 | if (ope.is_macro_) { 3208 | for (auto arg : ope.args_) { 3209 | arg->accept(*this); 3210 | } 3211 | } 3212 | } 3213 | 3214 | inline void ReferenceChecker::visit(Reference &ope) { 3215 | auto it = std::find(params_.begin(), params_.end(), ope.name_); 3216 | if (it != params_.end()) { return; } 3217 | 3218 | if (!grammar_.count(ope.name_)) { 3219 | error_s[ope.name_] = ope.s_; 3220 | error_message[ope.name_] = "'" + ope.name_ + "' is not defined."; 3221 | } else { 3222 | if (!referenced.count(ope.name_)) { referenced.insert(ope.name_); } 3223 | const auto &rule = grammar_.at(ope.name_); 3224 | if (rule.is_macro) { 3225 | if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) { 3226 | error_s[ope.name_] = ope.s_; 3227 | error_message[ope.name_] = "incorrect number of arguments."; 3228 | } 3229 | } else if (ope.is_macro_) { 3230 | error_s[ope.name_] = ope.s_; 3231 | error_message[ope.name_] = "'" + ope.name_ + "' is not macro."; 3232 | } 3233 | for (auto arg : ope.args_) { 3234 | arg->accept(*this); 3235 | } 3236 | } 3237 | } 3238 | 3239 | inline void LinkReferences::visit(Reference &ope) { 3240 | // Check if the reference is a macro parameter 3241 | auto found_param = false; 3242 | for (size_t i = 0; i < params_.size(); i++) { 3243 | const auto ¶m = params_[i]; 3244 | if (param == ope.name_) { 3245 | ope.iarg_ = i; 3246 | found_param = true; 3247 | break; 3248 | } 3249 | } 3250 | 3251 | // Check if the reference is a definition rule 3252 | if (!found_param && grammar_.count(ope.name_)) { 3253 | auto &rule = grammar_.at(ope.name_); 3254 | ope.rule_ = &rule; 3255 | } 3256 | 3257 | for (auto arg : ope.args_) { 3258 | arg->accept(*this); 3259 | } 3260 | } 3261 | 3262 | inline void FindReference::visit(Reference &ope) { 3263 | for (size_t i = 0; i < args_.size(); i++) { 3264 | const auto &name = params_[i]; 3265 | if (name == ope.name_) { 3266 | found_ope = args_[i]; 3267 | return; 3268 | } 3269 | } 3270 | found_ope = ope.shared_from_this(); 3271 | } 3272 | 3273 | /*----------------------------------------------------------------------------- 3274 | * PEG parser generator 3275 | *---------------------------------------------------------------------------*/ 3276 | 3277 | using Rules = std::unordered_map>; 3278 | 3279 | class ParserGenerator { 3280 | public: 3281 | static std::shared_ptr parse(const char *s, size_t n, 3282 | const Rules &rules, std::string &start, 3283 | bool &enablePackratParsing, Log log) { 3284 | return get_instance().perform_core(s, n, rules, start, enablePackratParsing, 3285 | log); 3286 | } 3287 | 3288 | static std::shared_ptr parse(const char *s, size_t n, 3289 | std::string &start, 3290 | bool &enablePackratParsing, Log log) { 3291 | Rules dummy; 3292 | return parse(s, n, dummy, start, enablePackratParsing, log); 3293 | } 3294 | 3295 | // For debugging purpose 3296 | static Grammar &grammar() { return get_instance().g; } 3297 | 3298 | private: 3299 | static ParserGenerator &get_instance() { 3300 | static ParserGenerator instance; 3301 | return instance; 3302 | } 3303 | 3304 | ParserGenerator() { 3305 | make_grammar(); 3306 | setup_actions(); 3307 | } 3308 | 3309 | struct Instruction { 3310 | std::string type; 3311 | std::any data; 3312 | std::string_view sv; 3313 | }; 3314 | 3315 | struct Data { 3316 | std::shared_ptr grammar; 3317 | std::string start; 3318 | const char *start_pos = nullptr; 3319 | 3320 | std::vector> duplicates_of_definition; 3321 | 3322 | std::vector> duplicates_of_instruction; 3323 | std::map> instructions; 3324 | 3325 | std::vector> undefined_back_references; 3326 | std::vector> captures_stack{{}}; 3327 | 3328 | std::set captures_in_current_definition; 3329 | bool enablePackratParsing = true; 3330 | 3331 | Data() : grammar(std::make_shared()) {} 3332 | }; 3333 | 3334 | void make_grammar() { 3335 | // Setup PEG syntax parser 3336 | g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); 3337 | g["Definition"] <= 3338 | cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"], 3339 | g["Expression"], opt(g["Instruction"])), 3340 | seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"], 3341 | opt(g["Instruction"]))); 3342 | g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); 3343 | g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"])); 3344 | g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]); 3345 | g["SuffixWithLabel"] <= 3346 | seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"]))); 3347 | g["Suffix"] <= seq(g["Primary"], opt(g["Loop"])); 3348 | g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]); 3349 | g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"], 3350 | npd(g["LEFTARROW"])), 3351 | seq(g["Ignore"], g["Identifier"], 3352 | npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))), 3353 | seq(g["OPEN"], g["Expression"], g["CLOSE"]), 3354 | seq(g["BeginTok"], g["Expression"], g["EndTok"]), 3355 | g["CapScope"], 3356 | seq(g["BeginCap"], g["Expression"], g["EndCap"]), 3357 | g["BackRef"], g["DictionaryI"], g["LiteralI"], 3358 | g["Dictionary"], g["Literal"], g["NegatedClassI"], 3359 | g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]); 3360 | 3361 | g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]); 3362 | g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"]))); 3363 | 3364 | const static std::vector> range = { 3365 | {0x0080, 0xFFFF}}; 3366 | g["IdentStart"] <= seq(npd(lit(u8(u8"↑"))), npd(lit(u8(u8"⇑"))), 3367 | cho(cls("a-zA-Z_%"), cls(range))); 3368 | 3369 | g["IdentRest"] <= cho(g["IdentStart"], cls("0-9")); 3370 | 3371 | g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"]))); 3372 | 3373 | g["DictionaryI"] <= 3374 | seq(g["LiteralID"], oom(seq(g["PIPE"], g["LiteralID"]))); 3375 | 3376 | auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), 3377 | cls("'"), g["Spacing"]), 3378 | seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), 3379 | cls("\""), g["Spacing"])); 3380 | g["Literal"] <= lit_ope; 3381 | g["LiteralD"] <= lit_ope; 3382 | 3383 | auto lit_case_ignore_ope = 3384 | cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"), 3385 | g["Spacing"]), 3386 | seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"), 3387 | g["Spacing"])); 3388 | g["LiteralI"] <= lit_case_ignore_ope; 3389 | g["LiteralID"] <= lit_case_ignore_ope; 3390 | 3391 | // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'. 3392 | g["Class"] <= seq(chr('['), npd(chr('^')), 3393 | tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'), 3394 | g["Spacing"]); 3395 | g["ClassI"] <= seq(chr('['), npd(chr('^')), 3396 | tok(oom(seq(npd(chr(']')), g["Range"]))), lit("]i"), 3397 | g["Spacing"]); 3398 | 3399 | g["NegatedClass"] <= seq(lit("[^"), 3400 | tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'), 3401 | g["Spacing"]); 3402 | g["NegatedClassI"] <= seq(lit("[^"), 3403 | tok(oom(seq(npd(chr(']')), g["Range"]))), 3404 | lit("]i"), g["Spacing"]); 3405 | 3406 | // NOTE: This is different from The original Brian Ford's paper, and this 3407 | // modification allows us to specify `[+-]` as a valid char class. 3408 | g["Range"] <= 3409 | cho(seq(g["Char"], chr('-'), npd(chr(']')), g["Char"]), g["Char"]); 3410 | 3411 | g["Char"] <= 3412 | cho(seq(chr('\\'), cls("fnrtv'\"[]\\^")), 3413 | seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")), 3414 | seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), 3415 | seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))), 3416 | seq(lit("\\u"), 3417 | cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")), 3418 | rep(cls("0-9a-fA-F"), 4, 4)), 3419 | rep(cls("0-9a-fA-F"), 4, 5))), 3420 | seq(npd(chr('\\')), dot())); 3421 | 3422 | g["Repetition"] <= 3423 | seq(g["BeginBracket"], g["RepetitionRange"], g["EndBracket"]); 3424 | g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]), 3425 | seq(g["Number"], g["COMMA"]), g["Number"], 3426 | seq(g["COMMA"], g["Number"])); 3427 | g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]); 3428 | 3429 | g["CapScope"] <= seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]); 3430 | 3431 | g["LEFTARROW"] <= seq(cho(lit("<-"), lit(u8(u8"←"))), g["Spacing"]); 3432 | ~g["SLASH"] <= seq(chr('/'), g["Spacing"]); 3433 | ~g["PIPE"] <= seq(chr('|'), g["Spacing"]); 3434 | g["AND"] <= seq(chr('&'), g["Spacing"]); 3435 | g["NOT"] <= seq(chr('!'), g["Spacing"]); 3436 | g["QUESTION"] <= seq(chr('?'), g["Spacing"]); 3437 | g["STAR"] <= seq(chr('*'), g["Spacing"]); 3438 | g["PLUS"] <= seq(chr('+'), g["Spacing"]); 3439 | ~g["OPEN"] <= seq(chr('('), g["Spacing"]); 3440 | ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]); 3441 | g["DOT"] <= seq(chr('.'), g["Spacing"]); 3442 | 3443 | g["CUT"] <= seq(lit(u8(u8"↑")), g["Spacing"]); 3444 | ~g["LABEL"] <= seq(cho(chr('^'), lit(u8(u8"⇑"))), g["Spacing"]); 3445 | 3446 | ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"])); 3447 | g["Comment"] <= 3448 | seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]); 3449 | g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]); 3450 | g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r')); 3451 | g["EndOfFile"] <= npd(dot()); 3452 | 3453 | ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]); 3454 | ~g["EndTok"] <= seq(chr('>'), g["Spacing"]); 3455 | 3456 | ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]); 3457 | ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]); 3458 | 3459 | g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]); 3460 | ~g["EndCap"] <= seq(chr('>'), g["Spacing"]); 3461 | 3462 | g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]); 3463 | 3464 | g["IGNORE"] <= chr('~'); 3465 | 3466 | g["Ignore"] <= opt(g["IGNORE"]); 3467 | g["Parameters"] <= seq(g["OPEN"], g["Identifier"], 3468 | zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]); 3469 | g["Arguments"] <= seq(g["OPEN"], g["Expression"], 3470 | zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]); 3471 | ~g["COMMA"] <= seq(chr(','), g["Spacing"]); 3472 | 3473 | // Instruction grammars 3474 | g["Instruction"] <= 3475 | seq(g["BeginBracket"], 3476 | opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"], 3477 | g["InstructionItem"])))), 3478 | g["EndBracket"]); 3479 | g["InstructionItem"] <= 3480 | cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]); 3481 | ~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]); 3482 | 3483 | ~g["SpacesZom"] <= zom(g["Space"]); 3484 | ~g["SpacesOom"] <= oom(g["Space"]); 3485 | ~g["BeginBracket"] <= seq(chr('{'), g["Spacing"]); 3486 | ~g["EndBracket"] <= seq(chr('}'), g["Spacing"]); 3487 | 3488 | // PrecedenceClimbing instruction 3489 | g["PrecedenceClimbing"] <= 3490 | seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"], 3491 | zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]); 3492 | g["PrecedenceInfo"] <= 3493 | seq(g["PrecedenceAssoc"], 3494 | oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"]))); 3495 | g["PrecedenceOpe"] <= 3496 | cho(seq(cls("'"), 3497 | tok(zom(seq(npd(cho(g["Space"], cls("'"))), g["Char"]))), 3498 | cls("'")), 3499 | seq(cls("\""), 3500 | tok(zom(seq(npd(cho(g["Space"], cls("\""))), g["Char"]))), 3501 | cls("\"")), 3502 | tok(oom(seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))), 3503 | dot())))); 3504 | g["PrecedenceAssoc"] <= cls("LR"); 3505 | 3506 | // Error message instruction 3507 | g["ErrorMessage"] <= seq(lit("error_message"), g["SpacesOom"], 3508 | g["LiteralD"], g["SpacesZom"]); 3509 | 3510 | // No Ast node optimization instruction 3511 | g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]); 3512 | 3513 | // Set definition names 3514 | for (auto &x : g) { 3515 | x.second.name = x.first; 3516 | } 3517 | } 3518 | 3519 | void setup_actions() { 3520 | g["Definition"] = [&](const SemanticValues &vs, std::any &dt) { 3521 | auto &data = *std::any_cast(dt); 3522 | 3523 | auto is_macro = vs.choice() == 0; 3524 | auto ignore = std::any_cast(vs[0]); 3525 | auto name = std::any_cast(vs[1]); 3526 | 3527 | std::vector params; 3528 | std::shared_ptr ope; 3529 | auto has_instructions = false; 3530 | 3531 | if (is_macro) { 3532 | params = std::any_cast>(vs[2]); 3533 | ope = std::any_cast>(vs[4]); 3534 | if (vs.size() == 6) { has_instructions = true; } 3535 | } else { 3536 | ope = std::any_cast>(vs[3]); 3537 | if (vs.size() == 5) { has_instructions = true; } 3538 | } 3539 | 3540 | if (has_instructions) { 3541 | auto index = is_macro ? 5 : 4; 3542 | std::unordered_set types; 3543 | for (const auto &instruction : 3544 | std::any_cast>(vs[index])) { 3545 | const auto &type = instruction.type; 3546 | if (types.find(type) == types.end()) { 3547 | data.instructions[name].push_back(instruction); 3548 | types.insert(instruction.type); 3549 | if (type == "declare_symbol" || type == "check_symbol") { 3550 | if (!TokenChecker::is_token(*ope)) { ope = tok(ope); } 3551 | } 3552 | } else { 3553 | data.duplicates_of_instruction.emplace_back(type, 3554 | instruction.sv.data()); 3555 | } 3556 | } 3557 | } 3558 | 3559 | auto &grammar = *data.grammar; 3560 | if (!grammar.count(name)) { 3561 | auto &rule = grammar[name]; 3562 | rule <= ope; 3563 | rule.name = name; 3564 | rule.s_ = vs.sv().data(); 3565 | rule.line_ = line_info(vs.ss, rule.s_); 3566 | rule.ignoreSemanticValue = ignore; 3567 | rule.is_macro = is_macro; 3568 | rule.params = params; 3569 | 3570 | if (data.start.empty()) { 3571 | data.start = rule.name; 3572 | data.start_pos = rule.s_; 3573 | } 3574 | } else { 3575 | data.duplicates_of_definition.emplace_back(name, vs.sv().data()); 3576 | } 3577 | }; 3578 | 3579 | g["Definition"].enter = [](const Context & /*c*/, const char * /*s*/, 3580 | size_t /*n*/, std::any &dt) { 3581 | auto &data = *std::any_cast(dt); 3582 | data.captures_in_current_definition.clear(); 3583 | }; 3584 | 3585 | g["Expression"] = [&](const SemanticValues &vs) { 3586 | if (vs.size() == 1) { 3587 | return std::any_cast>(vs[0]); 3588 | } else { 3589 | std::vector> opes; 3590 | for (auto i = 0u; i < vs.size(); i++) { 3591 | opes.emplace_back(std::any_cast>(vs[i])); 3592 | } 3593 | const std::shared_ptr ope = 3594 | std::make_shared(opes); 3595 | return ope; 3596 | } 3597 | }; 3598 | 3599 | g["Sequence"] = [&](const SemanticValues &vs) { 3600 | if (vs.empty()) { 3601 | return npd(lit("")); 3602 | } else if (vs.size() == 1) { 3603 | return std::any_cast>(vs[0]); 3604 | } else { 3605 | std::vector> opes; 3606 | for (const auto &x : vs) { 3607 | opes.emplace_back(std::any_cast>(x)); 3608 | } 3609 | const std::shared_ptr ope = std::make_shared(opes); 3610 | return ope; 3611 | } 3612 | }; 3613 | 3614 | g["Prefix"] = [&](const SemanticValues &vs) { 3615 | std::shared_ptr ope; 3616 | if (vs.size() == 1) { 3617 | ope = std::any_cast>(vs[0]); 3618 | } else { 3619 | assert(vs.size() == 2); 3620 | auto tok = std::any_cast(vs[0]); 3621 | ope = std::any_cast>(vs[1]); 3622 | if (tok == '&') { 3623 | ope = apd(ope); 3624 | } else { // '!' 3625 | ope = npd(ope); 3626 | } 3627 | } 3628 | return ope; 3629 | }; 3630 | 3631 | g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) { 3632 | auto ope = std::any_cast>(vs[0]); 3633 | if (vs.size() == 1) { 3634 | return ope; 3635 | } else { 3636 | assert(vs.size() == 2); 3637 | auto &data = *std::any_cast(dt); 3638 | const auto &ident = std::any_cast(vs[1]); 3639 | auto label = ref(*data.grammar, ident, vs.sv().data(), false, {}); 3640 | auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME, 3641 | vs.sv().data(), true, {label})); 3642 | return cho4label_(ope, recovery); 3643 | } 3644 | }; 3645 | 3646 | struct Loop { 3647 | enum class Type { opt = 0, zom, oom, rep }; 3648 | Type type; 3649 | std::pair range; 3650 | }; 3651 | 3652 | g["Suffix"] = [&](const SemanticValues &vs) { 3653 | auto ope = std::any_cast>(vs[0]); 3654 | if (vs.size() == 1) { 3655 | return ope; 3656 | } else { 3657 | assert(vs.size() == 2); 3658 | auto loop = std::any_cast(vs[1]); 3659 | switch (loop.type) { 3660 | case Loop::Type::opt: return opt(ope); 3661 | case Loop::Type::zom: return zom(ope); 3662 | case Loop::Type::oom: return oom(ope); 3663 | default: // Regex-like repetition 3664 | return rep(ope, loop.range.first, loop.range.second); 3665 | } 3666 | } 3667 | }; 3668 | 3669 | g["Loop"] = [&](const SemanticValues &vs) { 3670 | switch (vs.choice()) { 3671 | case 0: // Option 3672 | return Loop{Loop::Type::opt, std::pair()}; 3673 | case 1: // Zero or More 3674 | return Loop{Loop::Type::zom, std::pair()}; 3675 | case 2: // One or More 3676 | return Loop{Loop::Type::oom, std::pair()}; 3677 | default: // Regex-like repetition 3678 | return Loop{Loop::Type::rep, 3679 | std::any_cast>(vs[0])}; 3680 | } 3681 | }; 3682 | 3683 | g["Primary"] = [&](const SemanticValues &vs, std::any &dt) { 3684 | auto &data = *std::any_cast(dt); 3685 | 3686 | switch (vs.choice()) { 3687 | case 0: // Macro Reference 3688 | case 1: { // Reference 3689 | auto is_macro = vs.choice() == 0; 3690 | auto ignore = std::any_cast(vs[0]); 3691 | const auto &ident = std::any_cast(vs[1]); 3692 | 3693 | std::vector> args; 3694 | if (is_macro) { 3695 | args = std::any_cast>>(vs[2]); 3696 | } 3697 | 3698 | auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args); 3699 | if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); } 3700 | 3701 | if (ignore) { 3702 | return ign(ope); 3703 | } else { 3704 | return ope; 3705 | } 3706 | } 3707 | case 2: { // (Expression) 3708 | return std::any_cast>(vs[0]); 3709 | } 3710 | case 3: { // TokenBoundary 3711 | return tok(std::any_cast>(vs[0])); 3712 | } 3713 | case 4: { // CaptureScope 3714 | return csc(std::any_cast>(vs[0])); 3715 | } 3716 | case 5: { // Capture 3717 | const auto &name = std::any_cast(vs[0]); 3718 | auto ope = std::any_cast>(vs[1]); 3719 | 3720 | data.captures_stack.back().insert(name); 3721 | data.captures_in_current_definition.insert(name); 3722 | 3723 | return cap(ope, [name](const char *a_s, size_t a_n, Context &c) { 3724 | auto &cs = c.capture_scope_stack[c.capture_scope_stack_size - 1]; 3725 | cs[name] = std::string(a_s, a_n); 3726 | }); 3727 | } 3728 | default: { 3729 | return std::any_cast>(vs[0]); 3730 | } 3731 | } 3732 | }; 3733 | 3734 | g["IdentCont"] = [](const SemanticValues &vs) { 3735 | return std::string(vs.sv().data(), vs.sv().length()); 3736 | }; 3737 | 3738 | g["Dictionary"] = [](const SemanticValues &vs) { 3739 | auto items = vs.transform(); 3740 | return dic(items, false); 3741 | }; 3742 | g["DictionaryI"] = [](const SemanticValues &vs) { 3743 | auto items = vs.transform(); 3744 | return dic(items, true); 3745 | }; 3746 | 3747 | g["Literal"] = [](const SemanticValues &vs) { 3748 | const auto &tok = vs.tokens.front(); 3749 | return lit(resolve_escape_sequence(tok.data(), tok.size())); 3750 | }; 3751 | g["LiteralI"] = [](const SemanticValues &vs) { 3752 | const auto &tok = vs.tokens.front(); 3753 | return liti(resolve_escape_sequence(tok.data(), tok.size())); 3754 | }; 3755 | g["LiteralD"] = [](const SemanticValues &vs) { 3756 | auto &tok = vs.tokens.front(); 3757 | return resolve_escape_sequence(tok.data(), tok.size()); 3758 | }; 3759 | g["LiteralID"] = [](const SemanticValues &vs) { 3760 | auto &tok = vs.tokens.front(); 3761 | return resolve_escape_sequence(tok.data(), tok.size()); 3762 | }; 3763 | 3764 | g["Class"] = [](const SemanticValues &vs) { 3765 | auto ranges = vs.transform>(); 3766 | return cls(ranges); 3767 | }; 3768 | g["ClassI"] = [](const SemanticValues &vs) { 3769 | auto ranges = vs.transform>(); 3770 | return cls(ranges, true); 3771 | }; 3772 | g["NegatedClass"] = [](const SemanticValues &vs) { 3773 | auto ranges = vs.transform>(); 3774 | return ncls(ranges); 3775 | }; 3776 | g["NegatedClassI"] = [](const SemanticValues &vs) { 3777 | auto ranges = vs.transform>(); 3778 | return ncls(ranges, true); 3779 | }; 3780 | g["Range"] = [](const SemanticValues &vs) { 3781 | switch (vs.choice()) { 3782 | case 0: { 3783 | auto s1 = std::any_cast(vs[0]); 3784 | auto s2 = std::any_cast(vs[1]); 3785 | auto cp1 = decode_codepoint(s1.data(), s1.length()); 3786 | auto cp2 = decode_codepoint(s2.data(), s2.length()); 3787 | return std::pair(cp1, cp2); 3788 | } 3789 | case 1: { 3790 | auto s = std::any_cast(vs[0]); 3791 | auto cp = decode_codepoint(s.data(), s.length()); 3792 | return std::pair(cp, cp); 3793 | } 3794 | } 3795 | return std::pair(0, 0); 3796 | }; 3797 | g["Char"] = [](const SemanticValues &vs) { 3798 | return resolve_escape_sequence(vs.sv().data(), vs.sv().length()); 3799 | }; 3800 | 3801 | g["RepetitionRange"] = [&](const SemanticValues &vs) { 3802 | switch (vs.choice()) { 3803 | case 0: { // Number COMMA Number 3804 | auto min = std::any_cast(vs[0]); 3805 | auto max = std::any_cast(vs[1]); 3806 | return std::pair(min, max); 3807 | } 3808 | case 1: // Number COMMA 3809 | return std::pair(std::any_cast(vs[0]), 3810 | std::numeric_limits::max()); 3811 | case 2: { // Number 3812 | auto n = std::any_cast(vs[0]); 3813 | return std::pair(n, n); 3814 | } 3815 | default: // COMMA Number 3816 | return std::pair(std::numeric_limits::min(), 3817 | std::any_cast(vs[0])); 3818 | } 3819 | }; 3820 | g["Number"] = [&](const SemanticValues &vs) { 3821 | return vs.token_to_number(); 3822 | }; 3823 | 3824 | g["CapScope"].enter = [](const Context & /*c*/, const char * /*s*/, 3825 | size_t /*n*/, std::any &dt) { 3826 | auto &data = *std::any_cast(dt); 3827 | data.captures_stack.emplace_back(); 3828 | }; 3829 | g["CapScope"].leave = [](const Context & /*c*/, const char * /*s*/, 3830 | size_t /*n*/, size_t /*matchlen*/, 3831 | std::any & /*value*/, std::any &dt) { 3832 | auto &data = *std::any_cast(dt); 3833 | data.captures_stack.pop_back(); 3834 | }; 3835 | 3836 | g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 3837 | g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 3838 | g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 3839 | g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 3840 | g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; 3841 | 3842 | g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); }; 3843 | 3844 | g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); }; 3845 | 3846 | g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); }; 3847 | 3848 | g["BackRef"] = [&](const SemanticValues &vs, std::any &dt) { 3849 | auto &data = *std::any_cast(dt); 3850 | 3851 | // Undefined back reference check 3852 | { 3853 | auto found = false; 3854 | auto it = data.captures_stack.rbegin(); 3855 | while (it != data.captures_stack.rend()) { 3856 | if (it->find(vs.token()) != it->end()) { 3857 | found = true; 3858 | break; 3859 | } 3860 | ++it; 3861 | } 3862 | if (!found) { 3863 | auto ptr = vs.token().data() - 1; // include '$' symbol 3864 | data.undefined_back_references.emplace_back(vs.token(), ptr); 3865 | } 3866 | } 3867 | 3868 | // NOTE: Disable packrat parsing if a back reference is not defined in 3869 | // captures in the current definition rule. 3870 | if (data.captures_in_current_definition.find(vs.token()) == 3871 | data.captures_in_current_definition.end()) { 3872 | data.enablePackratParsing = false; 3873 | } 3874 | 3875 | return bkr(vs.token_to_string()); 3876 | }; 3877 | 3878 | g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; }; 3879 | 3880 | g["Parameters"] = [](const SemanticValues &vs) { 3881 | return vs.transform(); 3882 | }; 3883 | 3884 | g["Arguments"] = [](const SemanticValues &vs) { 3885 | return vs.transform>(); 3886 | }; 3887 | 3888 | g["PrecedenceClimbing"] = [](const SemanticValues &vs) { 3889 | PrecedenceClimbing::BinOpeInfo binOpeInfo; 3890 | size_t level = 1; 3891 | for (auto v : vs) { 3892 | auto tokens = std::any_cast>(v); 3893 | auto assoc = tokens[0][0]; 3894 | for (size_t i = 1; i < tokens.size(); i++) { 3895 | binOpeInfo[tokens[i]] = std::pair(level, assoc); 3896 | } 3897 | level++; 3898 | } 3899 | Instruction instruction; 3900 | instruction.type = "precedence"; 3901 | instruction.data = binOpeInfo; 3902 | instruction.sv = vs.sv(); 3903 | return instruction; 3904 | }; 3905 | g["PrecedenceInfo"] = [](const SemanticValues &vs) { 3906 | return vs.transform(); 3907 | }; 3908 | g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); }; 3909 | g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); }; 3910 | 3911 | g["ErrorMessage"] = [](const SemanticValues &vs) { 3912 | Instruction instruction; 3913 | instruction.type = "error_message"; 3914 | instruction.data = std::any_cast(vs[0]); 3915 | instruction.sv = vs.sv(); 3916 | return instruction; 3917 | }; 3918 | 3919 | g["NoAstOpt"] = [](const SemanticValues &vs) { 3920 | Instruction instruction; 3921 | instruction.type = "no_ast_opt"; 3922 | instruction.sv = vs.sv(); 3923 | return instruction; 3924 | }; 3925 | 3926 | g["Instruction"] = [](const SemanticValues &vs) { 3927 | return vs.transform(); 3928 | }; 3929 | } 3930 | 3931 | bool apply_precedence_instruction(Definition &rule, 3932 | const PrecedenceClimbing::BinOpeInfo &info, 3933 | const char *s, Log log) { 3934 | try { 3935 | auto &seq = dynamic_cast(*rule.get_core_operator()); 3936 | auto atom = seq.opes_[0]; 3937 | auto &rep = dynamic_cast(*seq.opes_[1]); 3938 | auto &seq1 = dynamic_cast(*rep.ope_); 3939 | auto binop = seq1.opes_[0]; 3940 | auto atom1 = seq1.opes_[1]; 3941 | 3942 | auto atom_name = dynamic_cast(*atom).name_; 3943 | auto binop_name = dynamic_cast(*binop).name_; 3944 | auto atom1_name = dynamic_cast(*atom1).name_; 3945 | 3946 | if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) { 3947 | if (log) { 3948 | auto line = line_info(s, rule.s_); 3949 | log(line.first, line.second, 3950 | "'precedence' instruction cannot be applied to '" + rule.name + 3951 | "'.", 3952 | ""); 3953 | } 3954 | return false; 3955 | } 3956 | 3957 | rule.holder_->ope_ = pre(atom, binop, info, rule); 3958 | rule.disable_action = true; 3959 | } catch (...) { 3960 | if (log) { 3961 | auto line = line_info(s, rule.s_); 3962 | log(line.first, line.second, 3963 | "'precedence' instruction cannot be applied to '" + rule.name + 3964 | "'.", 3965 | ""); 3966 | } 3967 | return false; 3968 | } 3969 | return true; 3970 | } 3971 | 3972 | std::shared_ptr perform_core(const char *s, size_t n, 3973 | const Rules &rules, std::string &start, 3974 | bool &enablePackratParsing, Log log) { 3975 | Data data; 3976 | auto &grammar = *data.grammar; 3977 | 3978 | // Built-in macros 3979 | { 3980 | // `%recover` 3981 | { 3982 | auto &rule = grammar[RECOVER_DEFINITION_NAME]; 3983 | rule <= ref(grammar, "x", "", false, {}); 3984 | rule.name = RECOVER_DEFINITION_NAME; 3985 | rule.s_ = "[native]"; 3986 | rule.ignoreSemanticValue = true; 3987 | rule.is_macro = true; 3988 | rule.params = {"x"}; 3989 | } 3990 | } 3991 | 3992 | std::any dt = &data; 3993 | auto r = g["Grammar"].parse(s, n, dt, nullptr, log); 3994 | 3995 | if (!r.ret) { 3996 | if (log) { 3997 | if (r.error_info.message_pos) { 3998 | auto line = line_info(s, r.error_info.message_pos); 3999 | log(line.first, line.second, r.error_info.message, 4000 | r.error_info.label); 4001 | } else { 4002 | auto line = line_info(s, r.error_info.error_pos); 4003 | log(line.first, line.second, "syntax error", r.error_info.label); 4004 | } 4005 | } 4006 | return nullptr; 4007 | } 4008 | 4009 | // User provided rules 4010 | for (auto [user_name, user_rule] : rules) { 4011 | auto name = user_name; 4012 | auto ignore = false; 4013 | if (!name.empty() && name[0] == '~') { 4014 | ignore = true; 4015 | name.erase(0, 1); 4016 | } 4017 | if (!name.empty()) { 4018 | auto &rule = grammar[name]; 4019 | rule <= user_rule; 4020 | rule.name = name; 4021 | rule.ignoreSemanticValue = ignore; 4022 | } 4023 | } 4024 | 4025 | // Check duplicated definitions 4026 | auto ret = true; 4027 | 4028 | if (!data.duplicates_of_definition.empty()) { 4029 | for (const auto &[name, ptr] : data.duplicates_of_definition) { 4030 | if (log) { 4031 | auto line = line_info(s, ptr); 4032 | log(line.first, line.second, 4033 | "The definition '" + name + "' is already defined.", ""); 4034 | } 4035 | } 4036 | ret = false; 4037 | } 4038 | 4039 | // Check duplicated instructions 4040 | if (!data.duplicates_of_instruction.empty()) { 4041 | for (const auto &[type, ptr] : data.duplicates_of_instruction) { 4042 | if (log) { 4043 | auto line = line_info(s, ptr); 4044 | log(line.first, line.second, 4045 | "The instruction '" + type + "' is already defined.", ""); 4046 | } 4047 | } 4048 | ret = false; 4049 | } 4050 | 4051 | // Check undefined back references 4052 | if (!data.undefined_back_references.empty()) { 4053 | for (const auto &[name, ptr] : data.undefined_back_references) { 4054 | if (log) { 4055 | auto line = line_info(s, ptr); 4056 | log(line.first, line.second, 4057 | "The back reference '" + name + "' is undefined.", ""); 4058 | } 4059 | } 4060 | ret = false; 4061 | } 4062 | 4063 | // Set root definition 4064 | auto &start_rule = grammar[data.start]; 4065 | 4066 | // Check if the start rule has ignore operator 4067 | { 4068 | if (start_rule.ignoreSemanticValue) { 4069 | if (log) { 4070 | auto line = line_info(s, start_rule.s_); 4071 | log(line.first, line.second, 4072 | "Ignore operator cannot be applied to '" + start_rule.name + "'.", 4073 | ""); 4074 | } 4075 | ret = false; 4076 | } 4077 | } 4078 | 4079 | if (!ret) { return nullptr; } 4080 | 4081 | // Check missing definitions 4082 | auto referenced = std::unordered_set{ 4083 | WHITESPACE_DEFINITION_NAME, 4084 | WORD_DEFINITION_NAME, 4085 | RECOVER_DEFINITION_NAME, 4086 | start_rule.name, 4087 | }; 4088 | 4089 | for (auto &[_, rule] : grammar) { 4090 | ReferenceChecker vis(grammar, rule.params); 4091 | rule.accept(vis); 4092 | referenced.insert(vis.referenced.begin(), vis.referenced.end()); 4093 | for (const auto &[name, ptr] : vis.error_s) { 4094 | if (log) { 4095 | auto line = line_info(s, ptr); 4096 | log(line.first, line.second, vis.error_message[name], ""); 4097 | } 4098 | ret = false; 4099 | } 4100 | } 4101 | 4102 | for (auto &[name, rule] : grammar) { 4103 | if (!referenced.count(name)) { 4104 | if (log) { 4105 | auto line = line_info(s, rule.s_); 4106 | auto msg = "'" + name + "' is not referenced."; 4107 | log(line.first, line.second, msg, ""); 4108 | } 4109 | } 4110 | } 4111 | 4112 | if (!ret) { return nullptr; } 4113 | 4114 | // Link references 4115 | for (auto &x : grammar) { 4116 | auto &rule = x.second; 4117 | LinkReferences vis(grammar, rule.params); 4118 | rule.accept(vis); 4119 | } 4120 | 4121 | // Check left recursion 4122 | ret = true; 4123 | 4124 | for (auto &[name, rule] : grammar) { 4125 | DetectLeftRecursion vis(name); 4126 | rule.accept(vis); 4127 | if (vis.error_s) { 4128 | if (log) { 4129 | auto line = line_info(s, vis.error_s); 4130 | log(line.first, line.second, "'" + name + "' is left recursive.", ""); 4131 | } 4132 | ret = false; 4133 | } 4134 | } 4135 | 4136 | if (!ret) { return nullptr; } 4137 | 4138 | // Check infinite loop 4139 | if (detect_infiniteLoop(data, start_rule, log, s)) { return nullptr; } 4140 | 4141 | // Automatic whitespace skipping 4142 | if (grammar.count(WHITESPACE_DEFINITION_NAME)) { 4143 | for (auto &x : grammar) { 4144 | auto &rule = x.second; 4145 | auto ope = rule.get_core_operator(); 4146 | if (IsLiteralToken::check(*ope)) { rule <= tok(ope); } 4147 | } 4148 | 4149 | auto &rule = grammar[WHITESPACE_DEFINITION_NAME]; 4150 | start_rule.whitespaceOpe = wsp(rule.get_core_operator()); 4151 | 4152 | if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; } 4153 | } 4154 | 4155 | // Word expression 4156 | if (grammar.count(WORD_DEFINITION_NAME)) { 4157 | auto &rule = grammar[WORD_DEFINITION_NAME]; 4158 | start_rule.wordOpe = rule.get_core_operator(); 4159 | 4160 | if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; } 4161 | } 4162 | 4163 | // Apply instructions 4164 | for (const auto &[name, instructions] : data.instructions) { 4165 | auto &rule = grammar[name]; 4166 | 4167 | for (const auto &instruction : instructions) { 4168 | if (instruction.type == "precedence") { 4169 | const auto &info = 4170 | std::any_cast(instruction.data); 4171 | 4172 | if (!apply_precedence_instruction(rule, info, s, log)) { 4173 | return nullptr; 4174 | } 4175 | } else if (instruction.type == "error_message") { 4176 | rule.error_message = std::any_cast(instruction.data); 4177 | } else if (instruction.type == "no_ast_opt") { 4178 | rule.no_ast_opt = true; 4179 | } 4180 | } 4181 | } 4182 | 4183 | // Set root definition 4184 | start = data.start; 4185 | enablePackratParsing = data.enablePackratParsing; 4186 | 4187 | return data.grammar; 4188 | } 4189 | 4190 | bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log, 4191 | const char *s) const { 4192 | std::vector> refs; 4193 | std::unordered_map has_error_cache; 4194 | DetectInfiniteLoop vis(data.start_pos, rule.name, refs, has_error_cache); 4195 | rule.accept(vis); 4196 | if (vis.has_error) { 4197 | if (log) { 4198 | auto line = line_info(s, vis.error_s); 4199 | log(line.first, line.second, 4200 | "infinite loop is detected in '" + vis.error_name + "'.", ""); 4201 | } 4202 | return true; 4203 | } 4204 | return false; 4205 | } 4206 | 4207 | Grammar g; 4208 | }; 4209 | 4210 | /*----------------------------------------------------------------------------- 4211 | * AST 4212 | *---------------------------------------------------------------------------*/ 4213 | 4214 | template struct AstBase : public Annotation { 4215 | AstBase(const char *path, size_t line, size_t column, const char *name, 4216 | const std::vector> &nodes, 4217 | size_t position = 0, size_t length = 0, size_t choice_count = 0, 4218 | size_t choice = 0) 4219 | : path(path ? path : ""), line(line), column(column), name(name), 4220 | position(position), length(length), choice_count(choice_count), 4221 | choice(choice), original_name(name), 4222 | original_choice_count(choice_count), original_choice(choice), 4223 | tag(str2tag(name)), original_tag(tag), is_token(false), nodes(nodes) {} 4224 | 4225 | AstBase(const char *path, size_t line, size_t column, const char *name, 4226 | const std::string_view &token, size_t position = 0, size_t length = 0, 4227 | size_t choice_count = 0, size_t choice = 0) 4228 | : path(path ? path : ""), line(line), column(column), name(name), 4229 | position(position), length(length), choice_count(choice_count), 4230 | choice(choice), original_name(name), 4231 | original_choice_count(choice_count), original_choice(choice), 4232 | tag(str2tag(name)), original_tag(tag), is_token(true), token(token) {} 4233 | 4234 | AstBase(const AstBase &ast, const char *original_name, size_t position = 0, 4235 | size_t length = 0, size_t original_choice_count = 0, 4236 | size_t original_choice = 0) 4237 | : path(ast.path), line(ast.line), column(ast.column), name(ast.name), 4238 | position(position), length(length), choice_count(ast.choice_count), 4239 | choice(ast.choice), original_name(original_name), 4240 | original_choice_count(original_choice_count), 4241 | original_choice(original_choice), tag(ast.tag), 4242 | original_tag(str2tag(original_name)), is_token(ast.is_token), 4243 | token(ast.token), nodes(ast.nodes), parent(ast.parent) {} 4244 | 4245 | const std::string path; 4246 | const size_t line = 1; 4247 | const size_t column = 1; 4248 | 4249 | const std::string name; 4250 | size_t position; 4251 | size_t length; 4252 | const size_t choice_count; 4253 | const size_t choice; 4254 | const std::string original_name; 4255 | const size_t original_choice_count; 4256 | const size_t original_choice; 4257 | const unsigned int tag; 4258 | const unsigned int original_tag; 4259 | 4260 | const bool is_token; 4261 | const std::string_view token; 4262 | 4263 | std::vector>> nodes; 4264 | std::weak_ptr> parent; 4265 | 4266 | std::string token_to_string() const { 4267 | assert(is_token); 4268 | return std::string(token); 4269 | } 4270 | 4271 | template T token_to_number() const { 4272 | return token_to_number_(token); 4273 | } 4274 | }; 4275 | 4276 | template 4277 | void ast_to_s_core(const std::shared_ptr &ptr, std::string &s, int level, 4278 | std::function fn) { 4279 | const auto &ast = *ptr; 4280 | for (auto i = 0; i < level; i++) { 4281 | s += " "; 4282 | } 4283 | auto name = ast.original_name; 4284 | if (ast.original_choice_count > 0) { 4285 | name += "/" + std::to_string(ast.original_choice); 4286 | } 4287 | if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; } 4288 | if (ast.is_token) { 4289 | s += "- " + name + " ("; 4290 | s += ast.token; 4291 | s += ")\n"; 4292 | } else { 4293 | s += "+ " + name + "\n"; 4294 | } 4295 | if (fn) { s += fn(ast, level + 1); } 4296 | for (auto node : ast.nodes) { 4297 | ast_to_s_core(node, s, level + 1, fn); 4298 | } 4299 | } 4300 | 4301 | template 4302 | std::string 4303 | ast_to_s(const std::shared_ptr &ptr, 4304 | std::function fn = nullptr) { 4305 | std::string s; 4306 | ast_to_s_core(ptr, s, 0, fn); 4307 | return s; 4308 | } 4309 | 4310 | struct AstOptimizer { 4311 | AstOptimizer(bool mode, const std::vector &rules = {}) 4312 | : mode_(mode), rules_(rules) {} 4313 | 4314 | template 4315 | std::shared_ptr optimize(std::shared_ptr original, 4316 | std::shared_ptr parent = nullptr) { 4317 | auto found = 4318 | std::find(rules_.begin(), rules_.end(), original->name) != rules_.end(); 4319 | auto opt = mode_ ? !found : found; 4320 | 4321 | if (opt && original->nodes.size() == 1) { 4322 | auto child = optimize(original->nodes[0], parent); 4323 | auto ast = std::make_shared(*child, original->name.data(), 4324 | original->choice_count, original->position, 4325 | original->length, original->choice); 4326 | for (auto node : ast->nodes) { 4327 | node->parent = ast; 4328 | } 4329 | return ast; 4330 | } 4331 | 4332 | auto ast = std::make_shared(*original); 4333 | ast->parent = parent; 4334 | ast->nodes.clear(); 4335 | for (auto node : original->nodes) { 4336 | auto child = optimize(node, ast); 4337 | ast->nodes.push_back(child); 4338 | } 4339 | return ast; 4340 | } 4341 | 4342 | private: 4343 | const bool mode_; 4344 | const std::vector rules_; 4345 | }; 4346 | 4347 | struct EmptyType {}; 4348 | using Ast = AstBase; 4349 | 4350 | template void add_ast_action(Definition &rule) { 4351 | rule.action = [&](const SemanticValues &vs) { 4352 | auto line = vs.line_info(); 4353 | 4354 | if (rule.is_token()) { 4355 | return std::make_shared( 4356 | vs.path, line.first, line.second, rule.name.data(), vs.token(), 4357 | std::distance(vs.ss, vs.sv().data()), vs.sv().length(), 4358 | vs.choice_count(), vs.choice()); 4359 | } 4360 | 4361 | auto ast = 4362 | std::make_shared(vs.path, line.first, line.second, rule.name.data(), 4363 | vs.transform>(), 4364 | std::distance(vs.ss, vs.sv().data()), 4365 | vs.sv().length(), vs.choice_count(), vs.choice()); 4366 | 4367 | for (auto node : ast->nodes) { 4368 | node->parent = ast; 4369 | } 4370 | return ast; 4371 | }; 4372 | } 4373 | 4374 | #define PEG_EXPAND(...) __VA_ARGS__ 4375 | #define PEG_CONCAT(a, b) a##b 4376 | #define PEG_CONCAT2(a, b) PEG_CONCAT(a, b) 4377 | 4378 | #define PEG_PICK( \ 4379 | a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \ 4380 | a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \ 4381 | a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \ 4382 | a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \ 4383 | a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \ 4384 | a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \ 4385 | a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \ 4386 | a100 4387 | 4388 | #define PEG_COUNT(...) \ 4389 | PEG_EXPAND(PEG_PICK( \ 4390 | __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \ 4391 | 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \ 4392 | 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ 4393 | 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \ 4394 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \ 4395 | 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) 4396 | 4397 | #define PEG_DEF_1(r) \ 4398 | peg::Definition r; \ 4399 | r.name = #r; \ 4400 | peg::add_ast_action(r); 4401 | 4402 | #define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__)) 4403 | #define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__)) 4404 | #define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__)) 4405 | #define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__)) 4406 | #define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__)) 4407 | #define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__)) 4408 | #define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__)) 4409 | #define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__)) 4410 | #define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__)) 4411 | #define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__)) 4412 | #define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__)) 4413 | #define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__)) 4414 | #define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__)) 4415 | #define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__)) 4416 | #define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__)) 4417 | #define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__)) 4418 | #define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__)) 4419 | #define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__)) 4420 | #define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__)) 4421 | #define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__)) 4422 | #define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__)) 4423 | #define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__)) 4424 | #define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__)) 4425 | #define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__)) 4426 | #define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__)) 4427 | #define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__)) 4428 | #define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__)) 4429 | #define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__)) 4430 | #define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__)) 4431 | #define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__)) 4432 | #define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__)) 4433 | #define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__)) 4434 | #define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__)) 4435 | #define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__)) 4436 | #define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__)) 4437 | #define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__)) 4438 | #define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__)) 4439 | #define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__)) 4440 | #define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__)) 4441 | #define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__)) 4442 | #define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__)) 4443 | #define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__)) 4444 | #define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__)) 4445 | #define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__)) 4446 | #define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__)) 4447 | #define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__)) 4448 | #define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__)) 4449 | #define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__)) 4450 | #define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__)) 4451 | #define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__)) 4452 | #define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__)) 4453 | #define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__)) 4454 | #define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__)) 4455 | #define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__)) 4456 | #define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__)) 4457 | #define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__)) 4458 | #define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__)) 4459 | #define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__)) 4460 | #define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__)) 4461 | #define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__)) 4462 | #define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__)) 4463 | #define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__)) 4464 | #define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__)) 4465 | #define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__)) 4466 | #define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__)) 4467 | #define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__)) 4468 | #define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__)) 4469 | #define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__)) 4470 | #define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__)) 4471 | #define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__)) 4472 | #define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__)) 4473 | #define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__)) 4474 | #define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__)) 4475 | #define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__)) 4476 | #define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__)) 4477 | #define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__)) 4478 | #define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__)) 4479 | #define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__)) 4480 | #define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__)) 4481 | #define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__)) 4482 | #define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__)) 4483 | #define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__)) 4484 | #define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__)) 4485 | #define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__)) 4486 | #define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__)) 4487 | #define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__)) 4488 | #define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__)) 4489 | #define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__)) 4490 | #define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__)) 4491 | #define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__)) 4492 | #define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__)) 4493 | #define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__)) 4494 | #define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__)) 4495 | #define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__)) 4496 | #define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__)) 4497 | #define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__)) 4498 | #define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__)) 4499 | #define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__)) 4500 | #define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__)) 4501 | 4502 | #define AST_DEFINITIONS(...) \ 4503 | PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__)) 4504 | 4505 | /*----------------------------------------------------------------------------- 4506 | * parser 4507 | *---------------------------------------------------------------------------*/ 4508 | 4509 | class parser { 4510 | public: 4511 | parser() = default; 4512 | 4513 | parser(const char *s, size_t n, const Rules &rules) { 4514 | load_grammar(s, n, rules); 4515 | } 4516 | 4517 | parser(const char *s, size_t n) : parser(s, n, Rules()) {} 4518 | 4519 | parser(std::string_view sv, const Rules &rules) 4520 | : parser(sv.data(), sv.size(), rules) {} 4521 | 4522 | parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {} 4523 | 4524 | #if defined(__cpp_lib_char8_t) 4525 | parser(std::u8string_view sv, const Rules &rules) 4526 | : parser(reinterpret_cast(sv.data()), sv.size(), rules) {} 4527 | 4528 | parser(std::u8string_view sv) 4529 | : parser(reinterpret_cast(sv.data()), sv.size(), Rules()) {} 4530 | #endif 4531 | 4532 | operator bool() { return grammar_ != nullptr; } 4533 | 4534 | bool load_grammar(const char *s, size_t n, const Rules &rules) { 4535 | grammar_ = ParserGenerator::parse(s, n, rules, start_, 4536 | enablePackratParsing_, log_); 4537 | return grammar_ != nullptr; 4538 | } 4539 | 4540 | bool load_grammar(const char *s, size_t n) { 4541 | return load_grammar(s, n, Rules()); 4542 | } 4543 | 4544 | bool load_grammar(std::string_view sv, const Rules &rules) { 4545 | return load_grammar(sv.data(), sv.size(), rules); 4546 | } 4547 | 4548 | bool load_grammar(std::string_view sv) { 4549 | return load_grammar(sv.data(), sv.size()); 4550 | } 4551 | 4552 | bool parse_n(const char *s, size_t n, const char *path = nullptr) const { 4553 | if (grammar_ != nullptr) { 4554 | const auto &rule = (*grammar_)[start_]; 4555 | auto result = rule.parse(s, n, path, log_); 4556 | return post_process(s, n, result); 4557 | } 4558 | return false; 4559 | } 4560 | 4561 | bool parse_n(const char *s, size_t n, std::any &dt, 4562 | const char *path = nullptr) const { 4563 | if (grammar_ != nullptr) { 4564 | const auto &rule = (*grammar_)[start_]; 4565 | auto result = rule.parse(s, n, dt, path, log_); 4566 | return post_process(s, n, result); 4567 | } 4568 | return false; 4569 | } 4570 | 4571 | template 4572 | bool parse_n(const char *s, size_t n, T &val, 4573 | const char *path = nullptr) const { 4574 | if (grammar_ != nullptr) { 4575 | const auto &rule = (*grammar_)[start_]; 4576 | auto result = rule.parse_and_get_value(s, n, val, path, log_); 4577 | return post_process(s, n, result); 4578 | } 4579 | return false; 4580 | } 4581 | 4582 | template 4583 | bool parse_n(const char *s, size_t n, std::any &dt, T &val, 4584 | const char *path = nullptr) const { 4585 | if (grammar_ != nullptr) { 4586 | const auto &rule = (*grammar_)[start_]; 4587 | auto result = rule.parse_and_get_value(s, n, dt, val, path, log_); 4588 | return post_process(s, n, result); 4589 | } 4590 | return false; 4591 | } 4592 | 4593 | bool parse(std::string_view sv, const char *path = nullptr) const { 4594 | return parse_n(sv.data(), sv.size(), path); 4595 | } 4596 | 4597 | bool parse(std::string_view sv, std::any &dt, 4598 | const char *path = nullptr) const { 4599 | return parse_n(sv.data(), sv.size(), dt, path); 4600 | } 4601 | 4602 | template 4603 | bool parse(std::string_view sv, T &val, const char *path = nullptr) const { 4604 | return parse_n(sv.data(), sv.size(), val, path); 4605 | } 4606 | 4607 | template 4608 | bool parse(std::string_view sv, std::any &dt, T &val, 4609 | const char *path = nullptr) const { 4610 | return parse_n(sv.data(), sv.size(), dt, val, path); 4611 | } 4612 | 4613 | #if defined(__cpp_lib_char8_t) 4614 | bool parse(std::u8string_view sv, const char *path = nullptr) const { 4615 | return parse_n(reinterpret_cast(sv.data()), sv.size(), path); 4616 | } 4617 | 4618 | bool parse(std::u8string_view sv, std::any &dt, 4619 | const char *path = nullptr) const { 4620 | return parse_n(reinterpret_cast(sv.data()), sv.size(), dt, 4621 | path); 4622 | } 4623 | 4624 | template 4625 | bool parse(std::u8string_view sv, T &val, const char *path = nullptr) const { 4626 | return parse_n(reinterpret_cast(sv.data()), sv.size(), val, 4627 | path); 4628 | } 4629 | 4630 | template 4631 | bool parse(std::u8string_view sv, std::any &dt, T &val, 4632 | const char *path = nullptr) const { 4633 | return parse_n(reinterpret_cast(sv.data()), sv.size(), dt, 4634 | val, path); 4635 | } 4636 | #endif 4637 | 4638 | Definition &operator[](const char *s) { return (*grammar_)[s]; } 4639 | 4640 | const Definition &operator[](const char *s) const { return (*grammar_)[s]; } 4641 | 4642 | const Grammar &get_grammar() const { return *grammar_; } 4643 | 4644 | void disable_eoi_check() { 4645 | if (grammar_ != nullptr) { 4646 | auto &rule = (*grammar_)[start_]; 4647 | rule.eoi_check = false; 4648 | } 4649 | } 4650 | 4651 | void enable_packrat_parsing() { 4652 | if (grammar_ != nullptr) { 4653 | auto &rule = (*grammar_)[start_]; 4654 | rule.enablePackratParsing = enablePackratParsing_ && true; 4655 | } 4656 | } 4657 | 4658 | void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) { 4659 | if (grammar_ != nullptr) { 4660 | auto &rule = (*grammar_)[start_]; 4661 | rule.tracer_enter = tracer_enter; 4662 | rule.tracer_leave = tracer_leave; 4663 | } 4664 | } 4665 | 4666 | void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave, 4667 | TracerStartOrEnd tracer_start, 4668 | TracerStartOrEnd tracer_end) { 4669 | if (grammar_ != nullptr) { 4670 | auto &rule = (*grammar_)[start_]; 4671 | rule.tracer_enter = tracer_enter; 4672 | rule.tracer_leave = tracer_leave; 4673 | rule.tracer_start = tracer_start; 4674 | rule.tracer_end = tracer_end; 4675 | } 4676 | } 4677 | 4678 | void set_verbose_trace(bool verbose_trace) { 4679 | if (grammar_ != nullptr) { 4680 | auto &rule = (*grammar_)[start_]; 4681 | rule.verbose_trace = verbose_trace; 4682 | } 4683 | } 4684 | 4685 | template parser &enable_ast() { 4686 | for (auto &[_, rule] : *grammar_) { 4687 | if (!rule.action) { add_ast_action(rule); } 4688 | } 4689 | return *this; 4690 | } 4691 | 4692 | template 4693 | std::shared_ptr optimize_ast(std::shared_ptr ast, 4694 | bool opt_mode = true) const { 4695 | return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast); 4696 | } 4697 | 4698 | void set_logger(Log log) { log_ = log; } 4699 | 4700 | void set_logger( 4701 | std::function 4702 | log) { 4703 | log_ = [log](size_t line, size_t col, const std::string &msg, 4704 | const std::string & /*rule*/) { log(line, col, msg); }; 4705 | } 4706 | 4707 | private: 4708 | bool post_process(const char *s, size_t n, Definition::Result &r) const { 4709 | if (log_ && !r.ret) { r.error_info.output_log(log_, s, n); } 4710 | return r.ret && !r.recovered; 4711 | } 4712 | 4713 | std::vector get_no_ast_opt_rules() const { 4714 | std::vector rules; 4715 | for (auto &[name, rule] : *grammar_) { 4716 | if (rule.no_ast_opt) { rules.push_back(name); } 4717 | } 4718 | return rules; 4719 | } 4720 | 4721 | std::shared_ptr grammar_; 4722 | std::string start_; 4723 | bool enablePackratParsing_ = false; 4724 | Log log_; 4725 | }; 4726 | 4727 | /*----------------------------------------------------------------------------- 4728 | * enable_tracing 4729 | *---------------------------------------------------------------------------*/ 4730 | 4731 | inline void enable_tracing(parser &parser, std::ostream &os) { 4732 | parser.enable_trace( 4733 | [&](auto &ope, auto s, auto, auto &, auto &c, auto &, auto &trace_data) { 4734 | auto prev_pos = std::any_cast(trace_data); 4735 | auto pos = static_cast(s - c.s); 4736 | auto backtrack = (pos < prev_pos ? "*" : ""); 4737 | std::string indent; 4738 | auto level = c.trace_ids.size() - 1; 4739 | while (level--) { 4740 | indent += "│"; 4741 | } 4742 | std::string name; 4743 | { 4744 | name = peg::TraceOpeName::get(const_cast(ope)); 4745 | 4746 | auto lit = dynamic_cast(&ope); 4747 | if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; } 4748 | } 4749 | os << "E " << pos + 1 << backtrack << "\t" << indent << "┌" << name 4750 | << " #" << c.trace_ids.back() << std::endl; 4751 | trace_data = static_cast(pos); 4752 | }, 4753 | [&](auto &ope, auto s, auto, auto &sv, auto &c, auto &, auto len, 4754 | auto &) { 4755 | auto pos = static_cast(s - c.s); 4756 | if (len != static_cast(-1)) { pos += len; } 4757 | std::string indent; 4758 | auto level = c.trace_ids.size() - 1; 4759 | while (level--) { 4760 | indent += "│"; 4761 | } 4762 | auto ret = len != static_cast(-1) ? "└o " : "└x "; 4763 | auto name = peg::TraceOpeName::get(const_cast(ope)); 4764 | std::stringstream choice; 4765 | if (sv.choice_count() > 0) { 4766 | choice << " " << sv.choice() << "/" << sv.choice_count(); 4767 | } 4768 | std::string token; 4769 | if (!sv.tokens.empty()) { 4770 | token += ", token '"; 4771 | token += sv.tokens[0]; 4772 | token += "'"; 4773 | } 4774 | std::string matched; 4775 | if (peg::success(len) && 4776 | peg::TokenChecker::is_token(const_cast(ope))) { 4777 | matched = ", match '" + peg::escape_characters(s, len) + "'"; 4778 | } 4779 | os << "L " << pos + 1 << "\t" << indent << ret << name << " #" 4780 | << c.trace_ids.back() << choice.str() << token << matched 4781 | << std::endl; 4782 | }, 4783 | [&](auto &trace_data) { trace_data = static_cast(0); }, 4784 | [&](auto &) {}); 4785 | } 4786 | 4787 | /*----------------------------------------------------------------------------- 4788 | * enable_profiling 4789 | *---------------------------------------------------------------------------*/ 4790 | 4791 | inline void enable_profiling(parser &parser, std::ostream &os) { 4792 | struct Stats { 4793 | struct Item { 4794 | std::string name; 4795 | size_t success; 4796 | size_t fail; 4797 | }; 4798 | std::vector items; 4799 | std::map index; 4800 | size_t total = 0; 4801 | std::chrono::steady_clock::time_point start; 4802 | }; 4803 | 4804 | parser.enable_trace( 4805 | [&](auto &ope, auto, auto, auto &, auto &, auto &, std::any &trace_data) { 4806 | if (auto holder = dynamic_cast(&ope)) { 4807 | auto &stats = *std::any_cast(trace_data); 4808 | 4809 | auto &name = holder->name(); 4810 | if (stats.index.find(name) == stats.index.end()) { 4811 | stats.index[name] = stats.index.size(); 4812 | stats.items.push_back({name, 0, 0}); 4813 | } 4814 | stats.total++; 4815 | } 4816 | }, 4817 | [&](auto &ope, auto, auto, auto &, auto &, auto &, auto len, 4818 | std::any &trace_data) { 4819 | if (auto holder = dynamic_cast(&ope)) { 4820 | auto &stats = *std::any_cast(trace_data); 4821 | 4822 | auto &name = holder->name(); 4823 | auto index = stats.index[name]; 4824 | auto &stat = stats.items[index]; 4825 | if (len != static_cast(-1)) { 4826 | stat.success++; 4827 | } else { 4828 | stat.fail++; 4829 | } 4830 | 4831 | if (index == 0) { 4832 | auto end = std::chrono::steady_clock::now(); 4833 | auto nano = std::chrono::duration_cast( 4834 | end - stats.start) 4835 | .count(); 4836 | auto sec = nano / 1000000.0; 4837 | os << "duration: " << sec << "s (" << nano << "µs)" << std::endl 4838 | << std::endl; 4839 | 4840 | char buff[BUFSIZ]; 4841 | size_t total_success = 0; 4842 | size_t total_fail = 0; 4843 | for (auto &[name, success, fail] : stats.items) { 4844 | total_success += success; 4845 | total_fail += fail; 4846 | } 4847 | 4848 | os << " id total % success fail " 4849 | "definition" 4850 | << std::endl; 4851 | 4852 | auto grand_total = total_success + total_fail; 4853 | snprintf(buff, BUFSIZ, "%4s %10zu %5s %10zu %10zu %s", "", 4854 | grand_total, "", total_success, total_fail, 4855 | "Total counters"); 4856 | os << buff << std::endl; 4857 | 4858 | snprintf(buff, BUFSIZ, "%4s %10s %5s %10.2f %10.2f %s", "", "", 4859 | "", total_success * 100.0 / grand_total, 4860 | total_fail * 100.0 / grand_total, "% success/fail"); 4861 | os << buff << std::endl << std::endl; 4862 | ; 4863 | 4864 | size_t id = 0; 4865 | for (auto &[name, success, fail] : stats.items) { 4866 | auto total = success + fail; 4867 | auto ratio = total * 100.0 / stats.total; 4868 | snprintf(buff, BUFSIZ, "%4zu %10zu %5.2f %10zu %10zu %s", id, 4869 | total, ratio, success, fail, name.c_str()); 4870 | os << buff << std::endl; 4871 | id++; 4872 | } 4873 | } 4874 | } 4875 | }, 4876 | [&](auto &trace_data) { 4877 | auto stats = new Stats{}; 4878 | stats->start = std::chrono::steady_clock::now(); 4879 | trace_data = stats; 4880 | }, 4881 | [&](auto &trace_data) { 4882 | auto stats = std::any_cast(trace_data); 4883 | delete stats; 4884 | }); 4885 | } 4886 | } // namespace peg 4887 | --------------------------------------------------------------------------------