├── README.md ├── include └── parsertl │ ├── bison_lookup.hpp │ ├── capture.hpp │ ├── debug.hpp │ ├── dfa.hpp │ ├── ebnf.y │ ├── ebnf_tables.hpp │ ├── enums.hpp │ ├── generator.hpp │ ├── iterator.hpp │ ├── licence_1_0.txt │ ├── lookup.hpp │ ├── match.hpp │ ├── match_results.hpp │ ├── narrow.hpp │ ├── nt_info.hpp │ ├── parse.hpp │ ├── read_bison.hpp │ ├── rules.hpp │ ├── runtime_error.hpp │ ├── search.hpp │ ├── search_iterator.hpp │ ├── serialise.hpp │ ├── state_machine.hpp │ └── token.hpp └── tests └── include_test ├── bison_lookup.cpp ├── debug.cpp ├── dfa.cpp ├── ebnf_tables.cpp ├── enums.cpp ├── generator.cpp ├── include_test.cpp ├── include_test.sln ├── include_test.vcxproj ├── include_test.vcxproj.filters ├── include_test.vcxproj.user ├── iterator.cpp ├── lookup.cpp ├── match.cpp ├── match_results.cpp ├── narrow.cpp ├── nt_info.cpp ├── parse.cpp ├── read_bison.cpp ├── rules.cpp ├── runtime_error.cpp ├── search.cpp ├── search_iterator.cpp ├── serialise.cpp ├── state_machine.cpp └── token.cpp /README.md: -------------------------------------------------------------------------------- 1 | parsertl 2 | ======== 3 | 4 | This is the C++03 version of parsertl. Please prefer parsertl14 wherever possible. 5 | -------------------------------------------------------------------------------- /include/parsertl/bison_lookup.hpp: -------------------------------------------------------------------------------- 1 | // bison_lookup.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_BISON_LOOKUP_HPP 7 | #define PARSERTL_BISON_LOOKUP_HPP 8 | 9 | #include "match_results.hpp" 10 | 11 | namespace parsertl 12 | { 13 | // cut down yyparse(): 14 | template 15 | void bison_next(const tables_struct& tables_, iterator& iter_, 16 | results& results_) 17 | { 18 | if (iter_->id == iter_->npos()) 19 | { 20 | results_.entry.action = error; 21 | results_.entry.param = unknown_token; 22 | return; 23 | } 24 | 25 | // Refer to what yypact is saying about the current state 26 | int yyn_ = tables_.yypact[results_.stack.back()]; 27 | 28 | if (yyn_ == tables_struct::YYPACT_NINF) 29 | goto yydefault; 30 | 31 | results_.token_id = tables_.yytranslate[iter_->id]; 32 | yyn_ += static_cast(results_.token_id); 33 | 34 | if (yyn_ < 0 || tables_struct::YYLAST < yyn_ || 35 | tables_.yycheck[yyn_] != results_.token_id) 36 | goto yydefault; 37 | 38 | yyn_ = tables_.yytable[yyn_]; 39 | 40 | if (yyn_ <= 0) 41 | { 42 | if (yyn_ == 0 || yyn_ == tables_struct::YYTABLE_NINF) 43 | { 44 | results_.entry.action = error; 45 | results_.entry.param = syntax_error; 46 | return; 47 | } 48 | 49 | yyn_ *= -1; 50 | goto yyreduce; 51 | } 52 | 53 | // ACCEPT 54 | if (yyn_ == tables_struct::YYFINAL) 55 | { 56 | results_.entry.action = accept; 57 | results_.entry.param = 0; 58 | return; 59 | } 60 | 61 | // SHIFT 62 | results_.entry.action = shift; 63 | results_.entry.param = static_cast(yyn_); 64 | return; 65 | 66 | yydefault: 67 | yyn_ = tables_.yydefact[results_.stack.back()]; 68 | 69 | if (yyn_ == 0) 70 | { 71 | results_.entry.action = error; 72 | results_.entry.param = syntax_error; 73 | return; 74 | } 75 | 76 | yyreduce: 77 | results_.entry.action = reduce; 78 | results_.entry.param = static_cast(yyn_); 79 | } 80 | 81 | template 82 | void bison_lookup(const tables_struct& tables_, iterator& iter_, 83 | results& results_) 84 | { 85 | switch (results_.entry.action) 86 | { 87 | case error: 88 | break; 89 | case shift: 90 | results_.stack.push_back(results_.entry.param); 91 | 92 | if (iter_->id != 0) 93 | ++iter_; 94 | 95 | results_.token_id = iter_->id; 96 | 97 | if (results_.token_id == iterator::value_type::npos()) 98 | { 99 | results_.entry.action = error; 100 | results_.entry.param = unknown_token; 101 | } 102 | 103 | break; 104 | case reduce: 105 | { 106 | int size_ = tables_.yyr2[results_.entry.param]; 107 | 108 | if (size_) 109 | { 110 | results_.stack.resize(results_.stack.size() - size_); 111 | } 112 | 113 | results_.token_id = tables_.yyr1[results_.entry.param]; 114 | results_.entry.action = go_to; 115 | results_.entry.param = tables_.yypgoto[results_.token_id - 116 | tables_struct::YYNTOKENS] + results_.stack.back(); 117 | // Drop through to go_to: 118 | } 119 | case go_to: 120 | if (0 <= results_.entry.param && 121 | results_.entry.param <= tables_struct::YYLAST && 122 | tables_.yycheck[results_.entry.param] == results_.stack.back()) 123 | { 124 | results_.entry.param = tables_.yytable[results_.entry.param]; 125 | } 126 | else 127 | { 128 | results_.entry.param = tables_.yydefgoto[results_.token_id - 129 | tables_struct::YYNTOKENS]; 130 | } 131 | 132 | results_.stack.push_back(results_.entry.param); 133 | break; 134 | case accept: 135 | return; 136 | } 137 | } 138 | 139 | template 141 | void bison_lookup(const tables_struct& tables_, iterator& iter_, 142 | results& results_, token_vector& productions_) 143 | { 144 | switch (results_.entry.action) 145 | { 146 | case error: 147 | break; 148 | case shift: 149 | results_.stack.push_back(results_.entry.param); 150 | productions_.push_back(typename token_vector::value_type(iter_->id, 151 | iter_->first, iter_->second)); 152 | 153 | if (iter_->id != 0) 154 | ++iter_; 155 | 156 | results_.token_id = 157 | static_cast(iter_->id); 158 | 159 | if (results_.token_id == iterator::value_type::npos()) 160 | { 161 | results_.entry.action = error; 162 | results_.entry.param = unknown_token; 163 | } 164 | 165 | break; 166 | case reduce: 167 | { 168 | int size_ = tables_.yyr2[results_.entry.param]; 169 | typename token_vector::value_type token_; 170 | 171 | if (size_) 172 | { 173 | results_.stack.resize(results_.stack.size() - size_); 174 | token_.first = (productions_.end() - size_)->first; 175 | token_.second = productions_.back().second; 176 | productions_.resize(productions_.size() - size_); 177 | } 178 | else 179 | { 180 | if (productions_.empty()) 181 | { 182 | token_.first = token_.second = iter_->first; 183 | } 184 | else 185 | { 186 | token_.first = token_.second = productions_.back().second; 187 | } 188 | } 189 | 190 | results_.token_id = tables_.yyr1[results_.entry.param]; 191 | productions_.push_back(token_); 192 | results_.entry.action = go_to; 193 | results_.entry.param = tables_.yypgoto[results_.token_id - 194 | tables_struct::YYNTOKENS] + results_.stack.back(); 195 | // Drop through to go_to: 196 | } 197 | case go_to: 198 | if (0 <= results_.entry.param && 199 | results_.entry.param <= tables_struct::YYLAST && 200 | tables_.yycheck[results_.entry.param] == results_.stack.back()) 201 | { 202 | results_.entry.param = tables_.yytable[results_.entry.param]; 203 | } 204 | else 205 | { 206 | results_.entry.param = tables_.yydefgoto[results_.token_id - 207 | tables_struct::YYNTOKENS]; 208 | } 209 | 210 | results_.stack.push_back(results_.entry.param); 211 | break; 212 | case accept: 213 | return; 214 | } 215 | } 216 | } 217 | 218 | #endif 219 | -------------------------------------------------------------------------------- /include/parsertl/capture.hpp: -------------------------------------------------------------------------------- 1 | // capture.hpp 2 | // Copyright (c) 2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_CAPTURE_HPP 7 | #define PARSERTL_CAPTURE_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | template 15 | struct capture 16 | { 17 | typedef iterator iter_type; 18 | typedef typename std::iterator_traits::value_type char_type; 19 | typedef std::basic_string string; 20 | 21 | iterator first; 22 | iterator second; 23 | 24 | capture() : 25 | first(iterator()), 26 | second(iterator()) 27 | { 28 | } 29 | 30 | capture(const iterator& first_, 31 | const iterator& second_) : 32 | first(first_), 33 | second(second_) 34 | { 35 | } 36 | 37 | bool operator==(const capture& rhs_) const 38 | { 39 | return first == rhs_.first && 40 | second == rhs_.second; 41 | } 42 | 43 | bool empty() const 44 | { 45 | return first == second; 46 | } 47 | 48 | string str() const 49 | { 50 | return string(first, second); 51 | } 52 | 53 | string substr(const std::size_t soffset_, 54 | const std::size_t eoffset_) const 55 | { 56 | return string(first + soffset_, second - eoffset_); 57 | } 58 | 59 | std::size_t length() const 60 | { 61 | return second - first; 62 | } 63 | }; 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /include/parsertl/debug.hpp: -------------------------------------------------------------------------------- 1 | // debug.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_DEBUG_HPP 7 | #define PARSERTL_DEBUG_HPP 8 | 9 | #include "dfa.hpp" 10 | #include 11 | #include "rules.hpp" 12 | 13 | namespace parsertl 14 | { 15 | template 16 | class basic_debug 17 | { 18 | public: 19 | typedef basic_rules rules; 20 | typedef std::basic_ostream ostream; 21 | 22 | static void dump(const rules& rules_, ostream& stream_) 23 | { 24 | const std::size_t start_ = rules_.start(); 25 | const production_deque& grammar_ = rules_.grammar(); 26 | const token_info_vector& tokens_info_ = rules_.tokens_info(); 27 | const std::size_t terminals_ = tokens_info_.size(); 28 | string_vector symbols_; 29 | std::set seen_; 30 | token_map map_; 31 | 32 | rules_.symbols(symbols_); 33 | 34 | // Skip EOI token 35 | for (std::size_t idx_ = 1, size_ = tokens_info_.size(); 36 | idx_ < size_; ++idx_) 37 | { 38 | const token_info& token_info_ = tokens_info_[idx_]; 39 | token_prec_assoc info_(token_info_._precedence, 40 | token_info_._associativity); 41 | typename token_map::iterator map_iter_ = map_.find(info_); 42 | 43 | if (map_iter_ == map_.end()) 44 | { 45 | map_.insert(token_pair(info_, symbols_[idx_])); 46 | } 47 | else 48 | { 49 | map_iter_->second += ' '; 50 | map_iter_->second += symbols_[idx_]; 51 | } 52 | } 53 | 54 | for (typename token_map::const_iterator iter_ = 55 | map_.begin(), end_ = map_.end(); iter_ != end_; ++iter_) 56 | { 57 | switch (iter_->first.second) 58 | { 59 | case rules::token_assoc: 60 | token(stream_); 61 | break; 62 | case rules::precedence_assoc: 63 | precedence(stream_); 64 | break; 65 | case rules::non_assoc: 66 | nonassoc(stream_); 67 | break; 68 | case rules::left_assoc: 69 | left(stream_); 70 | break; 71 | case rules::right_assoc: 72 | right(stream_); 73 | break; 74 | } 75 | 76 | stream_ << iter_->second << '\n'; 77 | } 78 | 79 | if (start_ != static_cast(~0)) 80 | { 81 | stream_ << '\n'; 82 | start(stream_); 83 | stream_ << symbols_[terminals_ + start_] << '\n' << '\n'; 84 | } 85 | 86 | stream_ << '%' << '%' << '\n' << '\n'; 87 | 88 | for (typename production_deque::const_iterator iter_ = 89 | grammar_.begin(), end_ = grammar_.end(); 90 | iter_ != end_; ++iter_) 91 | { 92 | if (seen_.find(iter_->_lhs) == seen_.end()) 93 | { 94 | typename production_deque::const_iterator lhs_iter_ = iter_; 95 | std::size_t index_ = lhs_iter_ - grammar_.begin(); 96 | 97 | stream_ << symbols_[terminals_ + lhs_iter_->_lhs]; 98 | stream_ << ':'; 99 | 100 | while (index_ != static_cast(~0)) 101 | { 102 | if (lhs_iter_->_rhs._symbols.empty()) 103 | { 104 | stream_ << ' '; 105 | empty(stream_); 106 | } 107 | else 108 | { 109 | typename symbol_vector::const_iterator rhs_iter_ = 110 | lhs_iter_->_rhs._symbols.begin(); 111 | typename symbol_vector::const_iterator rhs_end_ = 112 | lhs_iter_->_rhs._symbols.end(); 113 | 114 | for (; rhs_iter_ != rhs_end_; ++rhs_iter_) 115 | { 116 | const std::size_t id_ = 117 | rhs_iter_->_type == symbol::TERMINAL ? 118 | rhs_iter_->_id : 119 | terminals_ + rhs_iter_->_id; 120 | 121 | // Don't dump '$' 122 | if (id_ > 0) 123 | { 124 | stream_ << ' ' << symbols_[id_]; 125 | } 126 | } 127 | } 128 | 129 | if (!lhs_iter_->_rhs._prec.empty()) 130 | { 131 | stream_ << ' '; 132 | prec(stream_); 133 | stream_ << lhs_iter_->_rhs._prec; 134 | } 135 | 136 | index_ = lhs_iter_->_next_lhs; 137 | 138 | if (index_ != static_cast(~0)) 139 | { 140 | const string& lhs_ = 141 | symbols_[terminals_ + lhs_iter_->_lhs]; 142 | 143 | lhs_iter_ = grammar_.begin() + index_; 144 | stream_ << '\n'; 145 | 146 | for (std::size_t i_ = 0, size_ = lhs_.size(); 147 | i_ < size_; ++i_) 148 | { 149 | stream_ << ' '; 150 | } 151 | 152 | stream_ << '|'; 153 | } 154 | } 155 | 156 | seen_.insert(iter_->_lhs); 157 | stream_ << ';' << '\n' << '\n'; 158 | } 159 | } 160 | 161 | stream_ << '%' << '%' << '\n'; 162 | } 163 | 164 | static void dump(const rules& rules_, const dfa& dfa_, ostream& stream_) 165 | { 166 | const production_deque& grammar_ = rules_.grammar(); 167 | const std::size_t terminals_ = rules_.tokens_info().size(); 168 | string_vector symbols_; 169 | 170 | rules_.symbols(symbols_); 171 | 172 | for (std::size_t idx_ = 0, dfa_size_ = dfa_.size(); 173 | idx_ < dfa_size_; ++idx_) 174 | { 175 | const dfa_state& state_ = dfa_[idx_]; 176 | const cursor_vector& config_ = state_._closure; 177 | 178 | state(idx_, stream_); 179 | 180 | for (typename cursor_vector::const_iterator iter_ = 181 | config_.begin(), end_ = config_.end(); iter_ != end_; 182 | ++iter_) 183 | { 184 | const production& p_ = grammar_[iter_->_id]; 185 | std::size_t j_ = 0; 186 | 187 | stream_ << ' ' << ' ' << symbols_[terminals_ + p_._lhs] << 188 | ' ' << '-' << '>'; 189 | 190 | for (; j_ < p_._rhs.size(); ++j_) 191 | { 192 | const symbol& symbol_ = p_._rhs[j_]; 193 | const std::size_t id_ = 194 | symbol_._type == symbol::TERMINAL ? symbol_._id : 195 | terminals_ + symbol_._id; 196 | 197 | if (j_ == iter_->_index) 198 | { 199 | stream_ << ' ' << '.'; 200 | } 201 | 202 | stream_ << ' ' << symbols_[id_]; 203 | } 204 | 205 | if (j_ == iter_->_index) 206 | { 207 | stream_ << ' ' << '.'; 208 | } 209 | 210 | stream_ << '\n'; 211 | } 212 | 213 | if (!state_._transitions.empty()) 214 | stream_ << '\n'; 215 | 216 | for (typename cursor_vector::const_iterator t_ = 217 | state_._transitions.begin(), e_ = state_._transitions.end(); 218 | t_ != e_; ++t_) 219 | { 220 | stream_ << ' ' << ' ' << symbols_[t_->_id] << ' ' << 221 | '-' << '>' << ' ' << t_->_index << '\n'; 222 | } 223 | 224 | stream_ << '\n'; 225 | } 226 | } 227 | 228 | private: 229 | typedef typename rules::production production; 230 | typedef typename rules::production_deque production_deque; 231 | typedef std::basic_string string; 232 | typedef typename rules::string_vector string_vector; 233 | typedef typename rules::symbol symbol; 234 | typedef typename rules::symbol_vector symbol_vector; 235 | typedef std::pair 236 | token_prec_assoc; 237 | typedef typename rules::token_info token_info; 238 | typedef typename rules::token_info_vector token_info_vector; 239 | typedef std::map token_map; 240 | typedef std::pair token_pair; 241 | 242 | static void empty(std::ostream& stream_) 243 | { 244 | stream_ << "%empty"; 245 | } 246 | 247 | static void empty(std::wostream& stream_) 248 | { 249 | stream_ << L"%empty"; 250 | } 251 | 252 | static void left(std::ostream& stream_) 253 | { 254 | stream_ << "%left "; 255 | } 256 | 257 | static void left(std::wostream& stream_) 258 | { 259 | stream_ << L"%left "; 260 | } 261 | 262 | static void nonassoc(std::ostream& stream_) 263 | { 264 | stream_ << "%nonassoc "; 265 | } 266 | 267 | static void nonassoc(std::wostream& stream_) 268 | { 269 | stream_ << L"%nonassoc "; 270 | } 271 | 272 | static void prec(std::ostream& stream_) 273 | { 274 | stream_ << "%prec "; 275 | } 276 | 277 | static void prec(std::wostream& stream_) 278 | { 279 | stream_ << L"%prec "; 280 | } 281 | 282 | static void precedence(std::ostream& stream_) 283 | { 284 | stream_ << "%precedence "; 285 | } 286 | 287 | static void precedence(std::wostream& stream_) 288 | { 289 | stream_ << L"%precedence "; 290 | } 291 | 292 | static void right(std::ostream& stream_) 293 | { 294 | stream_ << "%right "; 295 | } 296 | 297 | static void right(std::wostream& stream_) 298 | { 299 | stream_ << L"%right "; 300 | } 301 | 302 | static void start(std::ostream& stream_) 303 | { 304 | stream_ << "%start "; 305 | } 306 | 307 | static void start(std::wostream& stream_) 308 | { 309 | stream_ << L"%start "; 310 | } 311 | 312 | static void state(const std::size_t row_, std::ostream& stream_) 313 | { 314 | stream_ << "state " << row_ << '\n' << '\n'; 315 | } 316 | 317 | static void state(const std::size_t row_, std::wostream& stream_) 318 | { 319 | stream_ << L"state " << row_ << L'\n' << L'\n'; 320 | } 321 | 322 | static void token(std::ostream& stream_) 323 | { 324 | stream_ << "%token "; 325 | } 326 | 327 | static void token(std::wostream& stream_) 328 | { 329 | stream_ << L"%token "; 330 | } 331 | }; 332 | 333 | typedef basic_debug debug; 334 | typedef basic_debug wdebug; 335 | } 336 | 337 | #endif 338 | -------------------------------------------------------------------------------- /include/parsertl/dfa.hpp: -------------------------------------------------------------------------------- 1 | // dfa.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_DFA_HPP 7 | #define PARSERTL_DFA_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | struct cursor 15 | { 16 | std::size_t _id; 17 | std::size_t _index; 18 | 19 | cursor() : 20 | _id(0), 21 | _index(0) 22 | { 23 | } 24 | 25 | cursor(const std::size_t id_, const std::size_t index_) : 26 | _id(id_), 27 | _index(index_) 28 | { 29 | } 30 | 31 | bool operator==(const cursor& rhs_) const 32 | { 33 | return _id == rhs_._id && _index == rhs_._index; 34 | } 35 | 36 | bool operator<(const cursor& rhs_) const 37 | { 38 | return _id < rhs_._id || 39 | (_id == rhs_._id && _index < rhs_._index); 40 | } 41 | }; 42 | 43 | typedef std::vector cursor_vector; 44 | 45 | struct dfa_state 46 | { 47 | cursor_vector _basis; 48 | cursor_vector _closure; 49 | cursor_vector _transitions; 50 | }; 51 | 52 | typedef std::deque dfa; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /include/parsertl/ebnf.y: -------------------------------------------------------------------------------- 1 | /* Generate code using: bison -S parsertl.cc ebnf.y */ 2 | %token EMPTY IDENTIFIER PREC TERMINAL 3 | %% 4 | 5 | rule: rhs_or; 6 | 7 | rhs_or: opt_prec_list 8 | | rhs_or '|' opt_prec_list; 9 | 10 | opt_prec_list: opt_list opt_prec; 11 | 12 | opt_list: 13 | | EMPTY 14 | | rhs_list; 15 | 16 | rhs_list: rhs 17 | | rhs_list rhs; 18 | 19 | rhs: IDENTIFIER 20 | | TERMINAL 21 | | '[' rhs_or ']' 22 | | rhs '?' 23 | | '{' rhs_or '}' 24 | | rhs '*' 25 | | '{' rhs_or '}' '-' 26 | | rhs '+' 27 | | '(' rhs_or ')'; 28 | 29 | opt_prec: 30 | | PREC IDENTIFIER 31 | | PREC TERMINAL; 32 | 33 | %% 34 | -------------------------------------------------------------------------------- /include/parsertl/ebnf_tables.hpp: -------------------------------------------------------------------------------- 1 | // ebnf_tables.hpp 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_EBNF_TABLES_HPP 7 | #define PARSERTL_EBNF_TABLES_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | struct ebnf_tables 14 | { 15 | enum 16 | { 17 | YYFINAL = 16, 18 | YYLAST = 32, 19 | YYNTOKENS = 18, 20 | YYPACT_NINF = -4, 21 | YYTABLE_NINF = -1 22 | }; 23 | 24 | enum yytokentype 25 | { 26 | EMPTY = 258, 27 | IDENTIFIER = 259, 28 | PREC = 260, 29 | TERMINAL = 261 30 | }; 31 | 32 | std::vector yytranslate; 33 | std::vector yyr1; 34 | std::vector yyr2; 35 | std::vector yydefact; 36 | std::vector yydefgoto; 37 | std::vector yypact; 38 | std::vector yypgoto; 39 | std::vector yytable; 40 | std::vector yycheck; 41 | 42 | ebnf_tables() 43 | { 44 | const unsigned char translate[] = 45 | { 46 | 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 49 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50 | 16, 17, 13, 15, 2, 14, 2, 2, 2, 2, 51 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 52 | 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 53 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 54 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55 | 2, 8, 2, 9, 2, 2, 2, 2, 2, 2, 56 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 57 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 58 | 2, 2, 2, 11, 7, 12, 2, 2, 2, 2, 59 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 60 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 61 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 62 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 63 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 64 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 65 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 66 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 67 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 68 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 69 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 71 | 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 72 | 5, 6 73 | }; 74 | const unsigned char r1[] = 75 | { 76 | 0, 18, 19, 20, 20, 21, 22, 22, 22, 23, 77 | 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 78 | 25, 25, 25 79 | }; 80 | const unsigned char r2[] = 81 | { 82 | 0, 2, 1, 1, 3, 2, 0, 1, 1, 1, 83 | 2, 1, 1, 3, 2, 3, 2, 4, 2, 3, 84 | 0, 2, 2 85 | }; 86 | const unsigned char defact[] = 87 | { 88 | 6, 7, 11, 12, 6, 6, 6, 0, 2, 3, 89 | 20, 8, 9, 0, 0, 0, 1, 6, 0, 5, 90 | 10, 14, 16, 18, 13, 15, 19, 4, 21, 22, 91 | 17 92 | }; 93 | const char defgoto[] = 94 | { 95 | -1, 7, 8, 9, 10, 11, 12, 19 96 | }; 97 | const char pact[] = 98 | { 99 | -3, -4, -4, -4, -3, -3, -3, 19, 18, -4, 100 | 22, -2, 5, 3, 4, 0, -4, -3, 20, -4, 101 | 5, -4, -4, -4, -4, 14, -4, -4, -4, -4, 102 | -4 103 | }; 104 | const char pgoto[] = 105 | { 106 | -4, -4, 17, 12, -4, -4, 21, -4 107 | }; 108 | const unsigned char table[] = 109 | { 110 | 1, 2, 2, 3, 3, 4, 4, 17, 5, 5, 111 | 17, 17, 24, 6, 6, 21, 25, 26, 22, 16, 112 | 23, 13, 14, 15, 28, 17, 29, 18, 30, 27, 113 | 0, 0, 20 114 | }; 115 | const char check[] = 116 | { 117 | 3, 4, 4, 6, 6, 8, 8, 7, 11, 11, 118 | 7, 7, 9, 16, 16, 10, 12, 17, 13, 0, 119 | 15, 4, 5, 6, 4, 7, 6, 5, 14, 17, 120 | -1, -1, 11 121 | }; 122 | 123 | yytranslate.assign(translate, translate + sizeof(translate) / 124 | sizeof(translate[0])); 125 | yyr1.assign(r1, r1 + sizeof(r1) / sizeof(r1[0])); 126 | yyr2.assign(r2, r2 + sizeof(r2) / sizeof(r2[0])); 127 | yydefact.assign(defact, defact + sizeof(defact) / 128 | sizeof(defact[0])); 129 | yydefgoto.assign(defgoto, defgoto + sizeof(defgoto) + 130 | sizeof(defgoto[0])); 131 | yypact.assign(pact, pact + sizeof(pact) / sizeof(pact[0])); 132 | yypgoto.assign(pgoto, pgoto + sizeof(pgoto) + sizeof(pgoto[0])); 133 | yytable.assign(table, table + sizeof(table) + sizeof(table[0])); 134 | yycheck.assign(check, check + sizeof(check) / sizeof(check[0])); 135 | } 136 | }; 137 | } 138 | 139 | #endif 140 | -------------------------------------------------------------------------------- /include/parsertl/enums.hpp: -------------------------------------------------------------------------------- 1 | // enums.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_ENUMS_HPP 7 | #define PARSERTL_ENUMS_HPP 8 | 9 | namespace parsertl 10 | { 11 | enum rule_flags { enable_captures = 1 }; 12 | enum action 13 | { 14 | error, 15 | shift, 16 | reduce, 17 | go_to, 18 | accept 19 | }; 20 | enum error_type 21 | { 22 | syntax_error, 23 | non_associative, 24 | unknown_token 25 | }; 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /include/parsertl/generator.hpp: -------------------------------------------------------------------------------- 1 | // generator.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_GENERATOR_HPP 7 | #define PARSERTL_GENERATOR_HPP 8 | 9 | #include "dfa.hpp" 10 | #include "narrow.hpp" 11 | #include "nt_info.hpp" 12 | #include "rules.hpp" 13 | #include "state_machine.hpp" 14 | 15 | namespace parsertl 16 | { 17 | template 18 | class basic_generator 19 | { 20 | public: 21 | typedef typename rules::production production; 22 | typedef typename rules::symbol_vector symbol_vector; 23 | 24 | struct prod 25 | { 26 | // Not owner 27 | const production* _production; 28 | std::size_t _lhs; 29 | cursor _lhs_indexes; 30 | symbol_vector _rhs; 31 | cursor_vector _rhs_indexes; 32 | 33 | prod() : 34 | _production(0), 35 | _lhs(static_cast(~0)) 36 | { 37 | } 38 | 39 | void swap(prod& rhs_) 40 | { 41 | std::swap(_production, rhs_._production); 42 | std::swap(_lhs, rhs_._lhs); 43 | std::swap(_lhs_indexes, rhs_._lhs_indexes); 44 | _rhs.swap(rhs_._rhs); 45 | std::swap(_rhs_indexes, rhs_._rhs_indexes); 46 | } 47 | 48 | // This operator is tuned specifically for new_grammar_ lookup only 49 | bool operator<(const prod& rhs_) const 50 | { 51 | return _production->_lhs < rhs_._production->_lhs || 52 | (_production->_lhs == rhs_._production->_lhs && 53 | _production->_rhs < rhs_._production->_rhs) || 54 | (_production->_lhs == rhs_._production->_lhs && 55 | _production->_rhs == rhs_._production->_rhs && 56 | _rhs_indexes.back()._index < 57 | rhs_._rhs_indexes.back()._index); 58 | } 59 | }; 60 | 61 | typedef std::deque prod_deque; 62 | typedef typename rules::string string; 63 | 64 | static void build(rules& rules_, sm& sm_, std::string* warnings_ = 0) 65 | { 66 | dfa dfa_; 67 | prod_deque new_grammar_; 68 | std::size_t new_start_ = static_cast(~0); 69 | nt_info_vector new_nt_info_; 70 | std::string warns_; 71 | 72 | build_dfa(rules_, dfa_); 73 | rewrite(rules_, dfa_, new_grammar_, new_start_, new_nt_info_); 74 | build_first_sets(new_grammar_, new_nt_info_); 75 | // First add EOF to follow_set of start. 76 | new_nt_info_[new_start_]._follow_set[0] = 1; 77 | build_follow_sets(new_grammar_, new_nt_info_); 78 | sm_.clear(); 79 | // new_grammar_ is only used for lookup now 80 | // so sort in order that std::lower_bound() can be used. 81 | std::sort(new_grammar_.begin(), new_grammar_.end()); 82 | build_table(rules_, dfa_, new_grammar_, new_nt_info_, 83 | sm_, warns_); 84 | 85 | // Warnings are now an error 86 | // unless you are explicitly fetching them 87 | if (!warns_.empty()) 88 | if (warnings_) 89 | *warnings_ = warns_; 90 | else 91 | throw runtime_error(warns_); 92 | 93 | // If you get an assert here then your id_type 94 | // is too small for the table. 95 | assert(static_cast(sm_._columns - 1) == sm_._columns - 1); 96 | assert(static_cast(sm_._rows - 1) == sm_._rows - 1); 97 | copy_rules(rules_, sm_); 98 | sm_._captures = rules_.captures(); 99 | } 100 | 101 | static void build_dfa(rules& rules_, dfa& dfa_) 102 | { 103 | rules_.validate(); 104 | 105 | const grammar& grammar_ = rules_.grammar(); 106 | const std::size_t terminals_ = rules_.tokens_info().size(); 107 | const std::size_t start_ = rules_.start(); 108 | hash_map hash_map_; 109 | 110 | dfa_.push_back(dfa_state()); 111 | 112 | // Only applies if build_dfa() has been called directly 113 | // from client code (i.e. build() will have already called 114 | // validate() in the normal case). 115 | if (start_ == npos()) 116 | { 117 | dfa_.back()._basis.push_back(cursor(0, 0)); 118 | } 119 | else 120 | { 121 | const std::size_t index_ = rules_.nt_locations()[start_]. 122 | _first_production; 123 | 124 | dfa_.back()._basis.push_back(cursor(index_, 0)); 125 | } 126 | 127 | hash_map_[hash_set(dfa_.back()._basis)].push_back(0); 128 | 129 | for (std::size_t s_ = 0; s_ < dfa_.size(); ++s_) 130 | { 131 | dfa_state& state_ = dfa_[s_]; 132 | size_t_vector symbols_; 133 | typedef std::deque item_sets; 134 | item_sets item_sets_; 135 | 136 | state_._closure.assign(state_._basis.begin(), 137 | state_._basis.end()); 138 | closure(rules_, state_); 139 | 140 | for (typename cursor_vector::const_iterator iter_ = 141 | state_._closure.begin(), end_ = state_._closure.end(); 142 | iter_ != end_; ++iter_) 143 | { 144 | const production p_ = grammar_[iter_->_id]; 145 | 146 | if (iter_->_index < p_._rhs._symbols.size()) 147 | { 148 | const symbol& symbol_ = p_._rhs._symbols[iter_->_index]; 149 | const std::size_t id_ = 150 | symbol_._type == symbol::TERMINAL ? 151 | symbol_._id : terminals_ + symbol_._id; 152 | typename size_t_vector::const_iterator sym_iter_ = 153 | std::find(symbols_.begin(), symbols_.end(), id_); 154 | cursor new_pair_(iter_->_id, iter_->_index + 1); 155 | 156 | if (sym_iter_ == symbols_.end()) 157 | { 158 | symbols_.push_back(id_); 159 | item_sets_.push_back(cursor_vector()); 160 | item_sets_.back().push_back(new_pair_); 161 | } 162 | else 163 | { 164 | const std::size_t index_ = 165 | sym_iter_ - symbols_.begin(); 166 | cursor_vector& vec_ = item_sets_[index_]; 167 | typename cursor_vector::const_iterator i_ = 168 | std::find(vec_.begin(), vec_.end(), new_pair_); 169 | 170 | if (i_ == vec_.end()) 171 | { 172 | vec_.push_back(new_pair_); 173 | } 174 | } 175 | } 176 | } 177 | 178 | for (typename size_t_vector::const_iterator iter_ = 179 | symbols_.begin(), end_ = symbols_.end(); 180 | iter_ != end_; ++iter_) 181 | { 182 | std::size_t index_ = iter_ - symbols_.begin(); 183 | cursor_vector& basis_ = item_sets_[index_]; 184 | 185 | std::sort(basis_.begin(), basis_.end()); 186 | index_ = add_dfa_state(dfa_, hash_map_, basis_); 187 | state_._transitions.push_back(cursor(*iter_, index_)); 188 | } 189 | } 190 | } 191 | 192 | static void rewrite(const rules& rules_, dfa& dfa_, 193 | prod_deque& new_grammar_, std::size_t& new_start_, 194 | nt_info_vector& new_nt_info_) 195 | { 196 | typedef std::pair trie; 197 | typedef std::map trie_map; 198 | const grammar& grammar_ = rules_.grammar(); 199 | string_vector terminals_; 200 | string_vector non_terminals_; 201 | const std::size_t start_ = rules_.start(); 202 | trie_map map_; 203 | 204 | rules_.terminals(terminals_); 205 | rules_.non_terminals(non_terminals_); 206 | 207 | for (std::size_t sidx_ = 0, ssize_ = dfa_.size(); 208 | sidx_ != ssize_; ++sidx_) 209 | { 210 | const dfa_state& state_ = dfa_[sidx_]; 211 | 212 | for (std::size_t cidx_ = 0, csize_ = state_._closure.size(); 213 | cidx_ != csize_; ++cidx_) 214 | { 215 | const cursor& pair_ = state_._closure[cidx_]; 216 | 217 | if (pair_._index != 0) continue; 218 | 219 | const production& production_ = grammar_[pair_._id]; 220 | prod prod_; 221 | 222 | prod_._production = &production_; 223 | 224 | if (production_._lhs != start_) 225 | { 226 | const std::size_t id_ = terminals_.size() + 227 | production_._lhs; 228 | 229 | prod_._lhs_indexes._id = sidx_; 230 | 231 | for (std::size_t tidx_ = 0, 232 | tsize_ = state_._transitions.size(); 233 | tidx_ != tsize_; ++tidx_) 234 | { 235 | const cursor& pr_ = state_._transitions[tidx_]; 236 | 237 | if (pr_._id == id_) 238 | { 239 | prod_._lhs_indexes._index = pr_._index; 240 | break; 241 | } 242 | } 243 | } 244 | 245 | trie trie_(production_._lhs, prod_._lhs_indexes); 246 | typename trie_map::const_iterator map_iter_ = 247 | map_.find(trie_); 248 | 249 | if (map_iter_ == map_.end()) 250 | { 251 | prod_._lhs = map_.size(); 252 | map_[trie_] = prod_._lhs; 253 | 254 | if (production_._lhs == start_) 255 | { 256 | new_start_ = prod_._lhs; 257 | } 258 | } 259 | else 260 | { 261 | prod_._lhs = map_iter_->second; 262 | } 263 | 264 | std::size_t index_ = sidx_; 265 | 266 | if (production_._rhs._symbols.empty()) 267 | { 268 | prod_._rhs_indexes.push_back(cursor(sidx_, sidx_)); 269 | } 270 | 271 | for (std::size_t ridx_ = 0, rsize_ = production_._rhs. 272 | _symbols.size(); ridx_ != rsize_; ++ridx_) 273 | { 274 | const symbol& symbol_ = 275 | production_._rhs._symbols[ridx_]; 276 | const dfa_state& st_ = dfa_[index_]; 277 | 278 | prod_._rhs_indexes.push_back(cursor(index_, 0)); 279 | 280 | for (std::size_t tidx_ = 0, 281 | tsize_ = st_._transitions.size(); 282 | tidx_ != tsize_; ++tidx_) 283 | { 284 | const std::size_t id_ = 285 | symbol_._type == symbol::TERMINAL ? 286 | symbol_._id : 287 | terminals_.size() + symbol_._id; 288 | const cursor& pr_ = st_._transitions[tidx_]; 289 | 290 | if (pr_._id == id_) 291 | { 292 | index_ = pr_._index; 293 | break; 294 | } 295 | } 296 | 297 | prod_._rhs_indexes.back()._index = index_; 298 | prod_._rhs.push_back(symbol_); 299 | 300 | if (symbol_._type == symbol::NON_TERMINAL) 301 | { 302 | symbol& rhs_symbol_ = prod_._rhs.back(); 303 | 304 | trie_ = trie(symbol_._id, 305 | prod_._rhs_indexes.back()); 306 | map_iter_ = map_.find(trie_); 307 | 308 | if (map_iter_ == map_.end()) 309 | { 310 | rhs_symbol_._id = map_.size(); 311 | map_[trie_] = rhs_symbol_._id; 312 | } 313 | else 314 | { 315 | rhs_symbol_._id = map_iter_->second; 316 | } 317 | } 318 | } 319 | 320 | new_grammar_.push_back(prod()); 321 | new_grammar_.back().swap(prod_); 322 | } 323 | } 324 | 325 | new_nt_info_.assign(map_.size(), 326 | nt_info(rules_.tokens_info().size())); 327 | } 328 | 329 | // http://www.sqlite.org/src/artifact?ci=trunk&filename=tool/lemon.c 330 | // FindFirstSets() 331 | static void build_first_sets(const prod_deque& grammar_, 332 | nt_info_vector& nt_info_) 333 | { 334 | bool progress_ = true; 335 | 336 | // First compute all lambdas 337 | do 338 | { 339 | progress_ = 0; 340 | 341 | for (typename prod_deque::const_iterator iter_ = 342 | grammar_.begin(), end_ = grammar_.end(); iter_ != end_; 343 | ++iter_) 344 | { 345 | if (nt_info_[iter_->_lhs]._nullable) continue; 346 | 347 | std::size_t i_ = 0; 348 | const std::size_t rhs_size_ = iter_->_rhs.size(); 349 | 350 | for (; i_ < rhs_size_; i_++) 351 | { 352 | const symbol& symbol_ = iter_->_rhs[i_]; 353 | 354 | if (symbol_._type != symbol::NON_TERMINAL || 355 | !nt_info_[symbol_._id]._nullable) 356 | { 357 | break; 358 | } 359 | } 360 | 361 | if (i_ == rhs_size_) 362 | { 363 | nt_info_[iter_->_lhs]._nullable = true; 364 | progress_ = 1; 365 | } 366 | } 367 | } while (progress_); 368 | 369 | // Now compute all first sets 370 | do 371 | { 372 | progress_ = 0; 373 | 374 | for (typename prod_deque::const_iterator iter_ = 375 | grammar_.begin(), end_ = grammar_.end(); iter_ != end_; 376 | ++iter_) 377 | { 378 | nt_info& lhs_info_ = nt_info_[iter_->_lhs]; 379 | const std::size_t rhs_size_ = iter_->_rhs.size(); 380 | 381 | for (std::size_t i_ = 0; i_ < rhs_size_; i_++) 382 | { 383 | const symbol& symbol_ = iter_->_rhs[i_]; 384 | 385 | if (symbol_._type == symbol::TERMINAL) 386 | { 387 | progress_ |= 388 | set_add(lhs_info_._first_set, symbol_._id); 389 | break; 390 | } 391 | else if (iter_->_lhs == symbol_._id) 392 | { 393 | if (!lhs_info_._nullable) break; 394 | } 395 | else 396 | { 397 | nt_info& rhs_info_ = nt_info_[symbol_._id]; 398 | 399 | progress_ |= set_union(lhs_info_._first_set, 400 | rhs_info_._first_set); 401 | 402 | if (!rhs_info_._nullable) break; 403 | } 404 | } 405 | } 406 | } while (progress_); 407 | } 408 | 409 | static void build_follow_sets(const prod_deque& grammar_, 410 | nt_info_vector& nt_info_) 411 | { 412 | for (;;) 413 | { 414 | bool changes_ = false; 415 | typename prod_deque::const_iterator iter_ = grammar_.begin(); 416 | typename prod_deque::const_iterator end_ = grammar_.end(); 417 | 418 | for (; iter_ != end_; ++iter_) 419 | { 420 | typename symbol_vector::const_iterator rhs_iter_ = 421 | iter_->_rhs.begin(); 422 | typename symbol_vector::const_iterator rhs_end_ = 423 | iter_->_rhs.end(); 424 | 425 | for (; rhs_iter_ != rhs_end_; ++rhs_iter_) 426 | { 427 | if (rhs_iter_->_type == symbol::NON_TERMINAL) 428 | { 429 | typename symbol_vector::const_iterator next_iter_ = 430 | rhs_iter_ + 1; 431 | nt_info& lhs_info_ = nt_info_[rhs_iter_->_id]; 432 | bool nullable_ = next_iter_ == rhs_end_; 433 | 434 | if (next_iter_ != rhs_end_) 435 | { 436 | if (next_iter_->_type == symbol::TERMINAL) 437 | { 438 | const std::size_t id_ = next_iter_->_id; 439 | 440 | // Just add terminal. 441 | changes_ |= set_add 442 | (lhs_info_._follow_set, id_); 443 | } 444 | else 445 | { 446 | // If there is a production A -> aBb 447 | // then everything in FIRST(b) is 448 | // placed in FOLLOW(B). 449 | const nt_info* rhs_info_ = 450 | &nt_info_[next_iter_->_id]; 451 | 452 | changes_ |= set_union(lhs_info_._follow_set, 453 | rhs_info_->_first_set); 454 | ++next_iter_; 455 | 456 | // If nullable, keep going 457 | if (rhs_info_->_nullable) 458 | { 459 | for (; next_iter_ != rhs_end_; 460 | ++next_iter_) 461 | { 462 | std::size_t next_id_ = 463 | static_cast(~0); 464 | 465 | if (next_iter_->_type == 466 | symbol::TERMINAL) 467 | { 468 | next_id_ = next_iter_->_id; 469 | // Just add terminal. 470 | changes_ |= set_add 471 | (lhs_info_._follow_set, 472 | next_id_); 473 | break; 474 | } 475 | else 476 | { 477 | next_id_ = next_iter_->_id; 478 | rhs_info_ = &nt_info_[next_id_]; 479 | changes_ |= set_union 480 | (lhs_info_._follow_set, 481 | rhs_info_->_first_set); 482 | 483 | if (!rhs_info_->_nullable) 484 | { 485 | break; 486 | } 487 | } 488 | } 489 | 490 | nullable_ = next_iter_ == rhs_end_; 491 | } 492 | } 493 | } 494 | 495 | if (nullable_) 496 | { 497 | // If there is a production A -> aB 498 | // then everything in FOLLOW(A) is in FOLLOW(B). 499 | const nt_info& rhs_info_ = 500 | nt_info_[iter_->_lhs]; 501 | 502 | changes_ |= set_union(lhs_info_._follow_set, 503 | rhs_info_._follow_set); 504 | } 505 | } 506 | } 507 | } 508 | 509 | if (!changes_) break; 510 | } 511 | } 512 | 513 | private: 514 | typedef typename sm::entry entry; 515 | typedef typename rules::production_deque grammar; 516 | typedef std::vector size_t_vector; 517 | typedef std::map hash_map; 518 | typedef typename rules::string_vector string_vector; 519 | typedef typename rules::symbol symbol; 520 | typedef typename rules::token_info token_info; 521 | typedef typename rules::token_info_vector token_info_vector; 522 | 523 | static void build_table(const rules& rules_, const dfa& dfa_, 524 | const prod_deque& new_grammar_, const nt_info_vector& new_nt_info_, 525 | sm& sm_, std::string& warnings_) 526 | { 527 | const grammar& grammar_ = rules_.grammar(); 528 | const std::size_t start_ = rules_.start(); 529 | const std::size_t terminals_ = rules_.tokens_info().size(); 530 | const std::size_t non_terminals_ = rules_.nt_locations().size(); 531 | string_vector symbols_; 532 | const std::size_t columns_ = terminals_ + non_terminals_; 533 | std::size_t index_ = 0; 534 | 535 | rules_.symbols(symbols_); 536 | sm_._columns = columns_; 537 | sm_._rows = dfa_.size(); 538 | sm_.push(); 539 | 540 | for (typename dfa::const_iterator iter_ = dfa_.begin(), 541 | end_ = dfa_.end(); iter_ != end_; ++iter_, ++index_) 542 | { 543 | // shift and gotos 544 | for (typename cursor_vector::const_iterator titer_ = 545 | iter_->_transitions.begin(), 546 | tend_ = iter_->_transitions.end(); 547 | titer_ != tend_; ++titer_) 548 | { 549 | const std::size_t id_ = titer_->_id; 550 | entry lhs_ = sm_.at(index_, id_); 551 | const entry rhs_((id_ < terminals_) ? 552 | // TERMINAL 553 | shift : 554 | // NON_TERMINAL 555 | go_to, 556 | static_cast(titer_->_index)); 557 | 558 | if (fill_entry(rules_, iter_->_closure, symbols_, 559 | lhs_, id_, rhs_, warnings_)) 560 | sm_.set(index_, id_, lhs_); 561 | } 562 | 563 | // reductions 564 | for (typename cursor_vector::const_iterator citer_ = 565 | iter_->_closure.begin(), 566 | cend_ = iter_->_closure.end(); citer_ != cend_; ++citer_) 567 | { 568 | const production& production_ = grammar_[citer_->_id]; 569 | 570 | if (production_._rhs._symbols.size() == citer_->_index) 571 | { 572 | char_vector follow_set_(terminals_, 0); 573 | prod key_; 574 | 575 | key_._production = &production_; 576 | // Only the second value is relevant for the lookup 577 | key_._rhs_indexes.push_back(cursor(index_, index_)); 578 | 579 | // config is reduction 580 | for (typename prod_deque::const_iterator ng_iter_ = 581 | std::lower_bound(new_grammar_.begin(), 582 | new_grammar_.end(), key_), 583 | ng_end_ = new_grammar_.end(); 584 | ng_iter_ != ng_end_; ++ng_iter_) 585 | { 586 | if (production_._lhs == 587 | ng_iter_->_production->_lhs && 588 | production_._rhs == 589 | ng_iter_->_production->_rhs && 590 | index_ == ng_iter_->_rhs_indexes.back()._index) 591 | { 592 | const std::size_t lhs_id_ = ng_iter_->_lhs; 593 | 594 | set_union(follow_set_, 595 | new_nt_info_[lhs_id_]._follow_set); 596 | } 597 | else 598 | break; 599 | } 600 | 601 | for (std::size_t id_ = 0, size_ = follow_set_.size(); 602 | id_ < size_; ++id_) 603 | { 604 | if (!follow_set_[id_]) continue; 605 | 606 | entry lhs_ = sm_.at(index_, id_); 607 | const entry rhs_(production_._lhs == start_ ? 608 | accept : 609 | reduce, 610 | static_cast(production_._index)); 611 | 612 | if (fill_entry(rules_, iter_->_closure, symbols_, 613 | lhs_, id_, rhs_, warnings_)) 614 | sm_.set(index_, id_, lhs_); 615 | } 616 | } 617 | } 618 | } 619 | } 620 | 621 | static void copy_rules(const rules& rules_, sm& sm_) 622 | { 623 | const grammar& grammar_ = rules_.grammar(); 624 | const std::size_t terminals_ = rules_.tokens_info().size(); 625 | typename grammar::const_iterator iter_ = grammar_.begin(); 626 | typename grammar::const_iterator end_ = grammar_.end(); 627 | 628 | for (; iter_ != end_; ++iter_) 629 | { 630 | const production& production_ = *iter_; 631 | typename symbol_vector::const_iterator rhs_iter_ = 632 | production_._rhs._symbols.begin(); 633 | typename symbol_vector::const_iterator rhs_end_ = 634 | production_._rhs._symbols.end(); 635 | 636 | sm_._rules.push_back(typename sm::id_type_vector_pair()); 637 | 638 | typename sm::id_type_vector_pair& pair_ = sm_._rules.back(); 639 | 640 | pair_._lhs = static_cast(terminals_ + 641 | production_._lhs); 642 | 643 | for (; rhs_iter_ != rhs_end_; ++rhs_iter_) 644 | { 645 | const symbol& symbol_ = *rhs_iter_; 646 | 647 | if (symbol_._type == symbol::TERMINAL) 648 | { 649 | pair_._rhs.push_back(static_cast 650 | (symbol_._id)); 651 | } 652 | else 653 | { 654 | pair_._rhs.push_back(static_cast 655 | (terminals_ + symbol_._id)); 656 | } 657 | } 658 | } 659 | } 660 | 661 | // Helper functions: 662 | 663 | // Add a new element to the set. Return true if the element was added 664 | // and false if it was already there. 665 | static bool set_add(char_vector& s_, const std::size_t e_) 666 | { 667 | const char rv_ = s_[e_]; 668 | 669 | assert(e_ < s_.size()); 670 | s_[e_] = 1; 671 | return !rv_; 672 | } 673 | 674 | // Add every element of rhs_ to lhs_. Return true if lhs_ changes. 675 | static bool set_union(char_vector& lhs_, const char_vector& rhs_) 676 | { 677 | const std::size_t size_ = lhs_.size(); 678 | bool progress_ = false; 679 | char* lhs_ptr_ = &lhs_.front(); 680 | const char* rhs_ptr_ = &rhs_.front(); 681 | 682 | for (std::size_t i_ = 0; i_ < size_; i_++) 683 | { 684 | if (rhs_ptr_[i_] == 0) continue; 685 | 686 | if (lhs_ptr_[i_] == 0) 687 | { 688 | progress_ = true; 689 | lhs_ptr_[i_] = 1; 690 | } 691 | } 692 | 693 | return progress_; 694 | } 695 | 696 | static void closure(const rules& rules_, dfa_state& state_) 697 | { 698 | const typename rules::nt_location_vector& nt_locations_ = 699 | rules_.nt_locations(); 700 | const grammar& grammar_ = rules_.grammar(); 701 | 702 | for (std::size_t c_ = 0; c_ < state_._closure.size(); ++c_) 703 | { 704 | const cursor pair_ = state_._closure[c_]; 705 | const production* p_ = &grammar_[pair_._id]; 706 | const std::size_t rhs_size_ = p_->_rhs._symbols.size(); 707 | 708 | if (pair_._index < rhs_size_) 709 | { 710 | // SHIFT 711 | const symbol& symbol_ = p_->_rhs._symbols[pair_._index]; 712 | 713 | if (symbol_._type == symbol::NON_TERMINAL) 714 | { 715 | for (std::size_t rule_ = 716 | nt_locations_[symbol_._id]._first_production; 717 | rule_ != npos(); rule_ = grammar_[rule_]._next_lhs) 718 | { 719 | const cursor new_pair_(rule_, 0); 720 | typename cursor_vector::const_iterator i_ = 721 | std::find(state_._closure.begin(), 722 | state_._closure.end(), new_pair_); 723 | 724 | if (i_ == state_._closure.end()) 725 | { 726 | state_._closure.push_back(new_pair_); 727 | } 728 | } 729 | } 730 | } 731 | } 732 | } 733 | 734 | static std::size_t add_dfa_state(dfa& dfa_, hash_map& hash_map_, 735 | cursor_vector& basis_) 736 | { 737 | size_t_vector& states_ = hash_map_[hash_set(basis_)]; 738 | std::size_t index_ = npos(); 739 | 740 | if (!states_.empty()) 741 | { 742 | for (typename size_t_vector::const_iterator iter_ = 743 | states_.begin(), end_ = states_.end(); iter_ != end_; 744 | ++iter_) 745 | { 746 | dfa_state& state_ = dfa_[*iter_]; 747 | 748 | if (state_._basis == basis_) 749 | { 750 | index_ = *iter_; 751 | break; 752 | } 753 | } 754 | } 755 | 756 | if (states_.empty() || index_ == npos()) 757 | { 758 | index_ = dfa_.size(); 759 | states_.push_back(index_); 760 | dfa_.push_back(dfa_state()); 761 | dfa_.back()._basis.swap(basis_); 762 | } 763 | 764 | return index_; 765 | } 766 | 767 | static std::size_t hash_set(cursor_vector& vec_) 768 | { 769 | std::size_t hash_ = 0; 770 | 771 | for (typename cursor_vector::const_iterator iter_ = 772 | vec_.begin(), end_ = vec_.end(); iter_ != end_; ++iter_) 773 | { 774 | hash_ *= 571; 775 | hash_ += iter_->_id * 37 + iter_->_index; 776 | } 777 | 778 | return hash_; 779 | } 780 | 781 | static bool fill_entry(const rules& rules_, 782 | const cursor_vector& config_, const string_vector& symbols_, 783 | entry& lhs_, const std::size_t id_, const entry& rhs_, 784 | std::string& warnings_) 785 | { 786 | bool modified_ = false; 787 | const grammar& grammar_ = rules_.grammar(); 788 | const token_info_vector& tokens_info_ = rules_.tokens_info(); 789 | const std::size_t terminals_ = tokens_info_.size(); 790 | static const char* actions_[] = 791 | { "ERROR", "SHIFT", "REDUCE", "GOTO", "ACCEPT" }; 792 | bool error_ = false; 793 | 794 | if (lhs_.action == error) 795 | { 796 | if (lhs_.param == syntax_error) 797 | { 798 | // No conflict 799 | lhs_ = rhs_; 800 | modified_ = true; 801 | } 802 | else 803 | { 804 | error_ = true; 805 | } 806 | } 807 | else 808 | { 809 | std::size_t lhs_prec_ = 0; 810 | typename rules::associativity lhs_assoc_ = rules::token_assoc; 811 | std::size_t rhs_prec_ = 0; 812 | const token_info* iter_ = &tokens_info_[id_]; 813 | 814 | if (lhs_.action == shift) 815 | { 816 | lhs_prec_ = iter_->_precedence; 817 | lhs_assoc_ = iter_->_associativity; 818 | } 819 | else if (lhs_.action == reduce) 820 | { 821 | const production& prod_ = grammar_[lhs_.param]; 822 | 823 | lhs_prec_ = prod_._precedence; 824 | lhs_assoc_ = prod_._associativity; 825 | } 826 | 827 | if (rhs_.action == shift) 828 | { 829 | rhs_prec_ = iter_->_precedence; 830 | } 831 | else if (rhs_.action == reduce) 832 | { 833 | rhs_prec_ = grammar_[rhs_.param]._precedence; 834 | } 835 | 836 | if (lhs_.action == shift && rhs_.action == reduce) 837 | { 838 | if (lhs_prec_ == 0 || rhs_prec_ == 0) 839 | { 840 | // Favour shift (leave lhs as it is). 841 | std::ostringstream ss_; 842 | 843 | ss_ << actions_[lhs_.action]; 844 | dump_action(grammar_, terminals_, config_, symbols_, 845 | id_, lhs_, ss_); 846 | ss_ << '/' << actions_[rhs_.action]; 847 | dump_action(grammar_, terminals_, config_, symbols_, 848 | id_, rhs_, ss_); 849 | ss_ << " conflict.\n"; 850 | warnings_ += ss_.str(); 851 | } 852 | else if (lhs_prec_ == rhs_prec_) 853 | { 854 | switch (lhs_assoc_) 855 | { 856 | case rules::precedence_assoc: 857 | // Favour shift (leave lhs as it is). 858 | { 859 | std::ostringstream ss_; 860 | 861 | ss_ << actions_[lhs_.action]; 862 | dump_action(grammar_, terminals_, config_, 863 | symbols_, id_, lhs_, ss_); 864 | ss_ << '/' << actions_[rhs_.action]; 865 | dump_action(grammar_, terminals_, config_, 866 | symbols_, id_, rhs_, ss_); 867 | ss_ << " conflict.\n"; 868 | warnings_ += ss_.str(); 869 | } 870 | 871 | break; 872 | case rules::non_assoc: 873 | lhs_ = entry(error, non_associative); 874 | modified_ = true; 875 | break; 876 | case rules::left_assoc: 877 | lhs_ = rhs_; 878 | modified_ = true; 879 | break; 880 | } 881 | } 882 | else if (rhs_prec_ > lhs_prec_) 883 | { 884 | lhs_ = rhs_; 885 | modified_ = true; 886 | } 887 | } 888 | else 889 | { 890 | error_ = true; 891 | } 892 | } 893 | 894 | if (error_) 895 | { 896 | std::ostringstream ss_; 897 | 898 | ss_ << actions_[lhs_.action]; 899 | dump_action(grammar_, terminals_, config_, symbols_, id_, lhs_, 900 | ss_); 901 | ss_ << '/' << actions_[rhs_.action]; 902 | dump_action(grammar_, terminals_, config_, symbols_, id_, rhs_, 903 | ss_); 904 | ss_ << " conflict.\n"; 905 | warnings_ += ss_.str(); 906 | 907 | if (lhs_.action == reduce && 908 | rhs_.action == reduce && 909 | // Take the earlier rule on reduce/reduce error 910 | rhs_.param < lhs_.param) 911 | { 912 | lhs_.param = rhs_.param; 913 | modified_ = true; 914 | } 915 | } 916 | 917 | return modified_; 918 | } 919 | 920 | static void dump_action(const grammar& grammar_, 921 | const std::size_t terminals_, const cursor_vector& config_, 922 | const string_vector& symbols_, const std::size_t id_, 923 | const entry& entry_, std::ostringstream& ss_) 924 | { 925 | if (entry_.action == shift) 926 | { 927 | typename cursor_vector::const_iterator iter_ = 928 | config_.begin(); 929 | typename cursor_vector::const_iterator end_ = 930 | config_.end(); 931 | 932 | for (; iter_ != end_; ++iter_) 933 | { 934 | const production& production_ = grammar_[iter_->_id]; 935 | 936 | if (production_._rhs._symbols.size() > iter_->_index && 937 | production_._rhs._symbols[iter_->_index]._id == id_) 938 | { 939 | dump_production(production_, iter_->_index, terminals_, 940 | symbols_, ss_); 941 | } 942 | } 943 | } 944 | else if (entry_.action == reduce) 945 | { 946 | const production& production_ = grammar_[entry_.param]; 947 | 948 | dump_production(production_, static_cast(~0), 949 | terminals_, symbols_, ss_); 950 | } 951 | } 952 | 953 | static void dump_production(const production& production_, 954 | const std::size_t dot_, const std::size_t terminals_, 955 | const string_vector& symbols_, std::ostringstream& ss_) 956 | { 957 | typename symbol_vector::const_iterator sym_iter_ = 958 | production_._rhs._symbols.begin(); 959 | typename symbol_vector::const_iterator sym_end_ = 960 | production_._rhs._symbols.end(); 961 | std::size_t index_ = 0; 962 | 963 | ss_ << " ("; 964 | narrow(symbols_[terminals_ + production_._lhs].c_str(), ss_); 965 | ss_ << " -> "; 966 | 967 | if (sym_iter_ != sym_end_) 968 | { 969 | const std::size_t id_ = sym_iter_->_type == symbol::TERMINAL ? 970 | sym_iter_->_id : 971 | terminals_ + sym_iter_->_id; 972 | 973 | if (index_ == dot_) ss_ << ". "; 974 | 975 | narrow(symbols_[id_].c_str(), ss_); 976 | ++sym_iter_; 977 | ++index_; 978 | } 979 | 980 | for (; sym_iter_ != sym_end_; ++sym_iter_, ++index_) 981 | { 982 | const std::size_t id_ = sym_iter_->_type == symbol::TERMINAL ? 983 | sym_iter_->_id : 984 | terminals_ + sym_iter_->_id; 985 | 986 | ss_ << ' '; 987 | 988 | if (index_ == dot_) ss_ << ". "; 989 | 990 | narrow(symbols_[id_].c_str(), ss_); 991 | } 992 | 993 | ss_ << ')'; 994 | } 995 | 996 | static std::size_t npos() 997 | { 998 | return static_cast(~0); 999 | } 1000 | }; 1001 | 1002 | typedef basic_generator generator; 1003 | typedef basic_generator 1004 | uncompressed_generator; 1005 | typedef basic_generator wgenerator; 1006 | typedef basic_generator 1007 | wuncompressed_generator; 1008 | } 1009 | 1010 | #endif 1011 | -------------------------------------------------------------------------------- /include/parsertl/iterator.hpp: -------------------------------------------------------------------------------- 1 | // iterator.hpp 2 | // Copyright (c) 2022-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_ITERATOR_HPP 7 | #define PARSERTL_ITERATOR_HPP 8 | 9 | #include 10 | #include "lookup.hpp" 11 | #include "match_results.hpp" 12 | #include "token.hpp" 13 | 14 | namespace parsertl 15 | { 16 | template 18 | class iterator 19 | { 20 | public: 21 | typedef basic_match_results results; 22 | typedef results value_type; 23 | typedef ptrdiff_t difference_type; 24 | typedef const value_type* pointer; 25 | typedef const value_type& reference; 26 | typedef std::forward_iterator_tag iterator_category; 27 | 28 | // Qualify token to prevent arg dependant lookup 29 | typedef parsertl::token token; 30 | typedef typename token::token_vector token_vector; 31 | 32 | iterator() : 33 | _sm(0) 34 | { 35 | } 36 | 37 | iterator(const lexer_iterator& iter_, const sm_type& sm_) : 38 | _iter(iter_), 39 | _results(_iter->id, sm_), 40 | _sm(&sm_) 41 | { 42 | // The first action can only ever be reduce 43 | // if the grammar treats no input as valid. 44 | if (_results.entry.action != reduce) 45 | lookup(); 46 | } 47 | 48 | iterator(const lexer_iterator& iter_, const sm_type& sm_, 49 | const std::size_t reserved_) : 50 | _iter(iter_), 51 | _results(_iter->id, sm_, reserved_), 52 | _productions(reserved_), 53 | _sm(&sm_) 54 | { 55 | // The first action can only ever be reduce 56 | // if the grammar treats no input as valid. 57 | if (_results.entry.action != reduce) 58 | lookup(); 59 | } 60 | 61 | typename token_vector::value_type dollar(const std::size_t index_) const 62 | { 63 | return _results.dollar(index_, *_sm, _productions); 64 | } 65 | 66 | std::size_t production_size(const std::size_t index_) const 67 | { 68 | return _results.production_size(*_sm, index_); 69 | } 70 | 71 | iterator& operator ++() 72 | { 73 | lookup(); 74 | return *this; 75 | } 76 | 77 | iterator operator ++(int) 78 | { 79 | iterator iter_ = *this; 80 | 81 | lookup(); 82 | return iter_; 83 | } 84 | 85 | const value_type& operator *() const 86 | { 87 | return _results; 88 | } 89 | 90 | const value_type* operator ->() const 91 | { 92 | return &_results; 93 | } 94 | 95 | bool operator ==(const iterator& rhs_) const 96 | { 97 | return _sm == rhs_._sm && 98 | (_sm == 0 ? true : 99 | _results == rhs_._results); 100 | } 101 | 102 | bool operator !=(const iterator& rhs_) const 103 | { 104 | return !(*this == rhs_); 105 | } 106 | 107 | lexer_iterator lexer_iter() const 108 | { 109 | return _iter; 110 | } 111 | 112 | private: 113 | lexer_iterator _iter; 114 | basic_match_results _results; 115 | token_vector _productions; 116 | const sm_type* _sm; 117 | 118 | void lookup() 119 | { 120 | // do while because we need to move past the current reduce action 121 | do 122 | { 123 | parsertl::lookup(_iter, *_sm, _results, _productions); 124 | } while (_results.entry.action == shift || 125 | _results.entry.action == go_to); 126 | 127 | switch (_results.entry.action) 128 | { 129 | case accept: 130 | case error: 131 | _sm = 0; 132 | break; 133 | default: 134 | break; 135 | } 136 | } 137 | }; 138 | 139 | typedef iterator siterator; 140 | typedef iterator citerator; 141 | typedef iterator wsiterator; 142 | typedef iterator wciterator; 143 | } 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /include/parsertl/licence_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | 25 | -------------------------------------------------------------------------------- /include/parsertl/lookup.hpp: -------------------------------------------------------------------------------- 1 | // lookup.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_LOOKUP_HPP 7 | #define PARSERTL_LOOKUP_HPP 8 | 9 | #include "match_results.hpp" 10 | #include "token.hpp" 11 | 12 | namespace parsertl 13 | { 14 | // parse sequence but do not keep track of productions 15 | template 16 | void lookup(lexer_iterator& iter_, const sm_type& sm_, 17 | basic_match_results& results_) 18 | { 19 | switch (results_.entry.action) 20 | { 21 | case shift: 22 | results_.stack.push_back(results_.entry.param); 23 | 24 | if (iter_->id != 0) 25 | ++iter_; 26 | 27 | results_.token_id = iter_->id; 28 | 29 | if (results_.token_id == lexer_iterator::value_type::npos()) 30 | { 31 | results_.entry.action = error; 32 | results_.entry.param = unknown_token; 33 | } 34 | else 35 | { 36 | results_.entry = 37 | sm_.at(results_.entry.param, results_.token_id); 38 | } 39 | 40 | break; 41 | case reduce: 42 | { 43 | const std::size_t size_ = 44 | sm_._rules[results_.entry.param]._rhs.size(); 45 | 46 | if (size_) 47 | { 48 | results_.stack.resize(results_.stack.size() - size_); 49 | } 50 | 51 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 52 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 53 | break; 54 | } 55 | case go_to: 56 | results_.stack.push_back(results_.entry.param); 57 | results_.token_id = iter_->id; 58 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 59 | break; 60 | case accept: 61 | { 62 | const std::size_t size_ = 63 | sm_._rules[results_.entry.param]._rhs.size(); 64 | 65 | if (size_) 66 | { 67 | results_.stack.resize(results_.stack.size() - size_); 68 | } 69 | 70 | break; 71 | } 72 | default: 73 | // error 74 | break; 75 | } 76 | } 77 | 78 | // Parse sequence and maintain production vector 79 | template 80 | void lookup(lexer_iterator& iter_, const sm_type& sm_, 81 | basic_match_results& results_, token_vector& productions_) 82 | { 83 | switch (results_.entry.action) 84 | { 85 | case shift: 86 | results_.stack.push_back(results_.entry.param); 87 | productions_.push_back(typename token_vector::value_type(iter_->id, 88 | iter_->first, iter_->second)); 89 | 90 | if (iter_->id != 0) 91 | ++iter_; 92 | 93 | results_.token_id = iter_->id; 94 | 95 | if (results_.token_id == lexer_iterator::value_type::npos()) 96 | { 97 | results_.entry.action = error; 98 | results_.entry.param = unknown_token; 99 | } 100 | else 101 | { 102 | results_.entry = 103 | sm_.at(results_.entry.param, results_.token_id); 104 | } 105 | 106 | break; 107 | case reduce: 108 | { 109 | const std::size_t size_ = 110 | sm_._rules[results_.entry.param]._rhs.size(); 111 | typename token_vector::value_type token_; 112 | 113 | if (size_) 114 | { 115 | results_.stack.resize(results_.stack.size() - size_); 116 | token_.first = (productions_.end() - size_)->first; 117 | token_.second = productions_.back().second; 118 | productions_.resize(productions_.size() - size_); 119 | } 120 | else 121 | { 122 | if (productions_.empty()) 123 | { 124 | token_.first = token_.second = iter_->first; 125 | } 126 | else 127 | { 128 | token_.first = token_.second = productions_.back().second; 129 | } 130 | } 131 | 132 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 133 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 134 | token_.id = results_.token_id; 135 | productions_.push_back(token_); 136 | break; 137 | } 138 | case go_to: 139 | results_.stack.push_back(results_.entry.param); 140 | results_.token_id = iter_->id; 141 | results_.entry = sm_.at(results_.stack.back(), results_.token_id); 142 | break; 143 | case accept: 144 | { 145 | const std::size_t size_ = 146 | sm_._rules[results_.entry.param]._rhs.size(); 147 | 148 | if (size_) 149 | { 150 | results_.stack.resize(results_.stack.size() - size_); 151 | } 152 | 153 | break; 154 | } 155 | default: 156 | // error 157 | break; 158 | } 159 | } 160 | } 161 | 162 | #endif 163 | -------------------------------------------------------------------------------- /include/parsertl/match.hpp: -------------------------------------------------------------------------------- 1 | // match.hpp 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_MATCH_HPP 7 | #define PARSERTL_MATCH_HPP 8 | 9 | #include "lookup.hpp" 10 | #include "parse.hpp" 11 | 12 | namespace parsertl 13 | { 14 | // Parse entire sequence and return boolean 15 | template 16 | bool match(lexer_iterator iter_, const sm_type& sm_) 17 | { 18 | basic_match_results results_(iter_->id, sm_); 19 | 20 | return parse(iter_, sm_, results_); 21 | } 22 | 23 | template 24 | bool match(lexer_iterator iter_, const sm_type& sm_, captures& captures_) 25 | { 26 | basic_match_results results_(iter_->id, sm_); 27 | // Qualify token to prevent arg dependant lookup 28 | typedef parsertl::token token; 29 | typedef typename lexer_iterator::value_type::iter_type iter_type; 30 | typename token::token_vector productions_; 31 | 32 | captures_.clear(); 33 | captures_.resize(sm_._captures.back().first + 34 | sm_._captures.back().second.size() + 1); 35 | captures_[0].push_back(std::pair 36 | (iter_->first, iter_->second)); 37 | 38 | while (results_.entry.action != error && 39 | results_.entry.action != accept) 40 | { 41 | if (results_.entry.action == reduce) 42 | { 43 | const typename sm_type::capture& row_ = 44 | sm_._captures[results_.entry.param]; 45 | 46 | if (!row_.second.empty()) 47 | { 48 | std::size_t index_ = 0; 49 | typename sm_type::capture_vector::const_iterator i_ = 50 | row_.second.begin(); 51 | typename sm_type::capture_vector::const_iterator e_ = 52 | row_.second.end(); 53 | 54 | for (; i_ != e_; ++i_) 55 | { 56 | const token& token1_ = results_.dollar(i_->first, sm_, 57 | productions_); 58 | const token& token2_ = results_.dollar(i_->second, sm_, 59 | productions_); 60 | 61 | captures_[row_.first + index_ + 1]. 62 | push_back(std::pair(token1_.first, 64 | token2_.second)); 65 | ++index_; 66 | } 67 | } 68 | } 69 | 70 | lookup(iter_, sm_, results_, productions_); 71 | } 72 | 73 | return results_.entry.action == accept; 74 | } 75 | } 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /include/parsertl/match_results.hpp: -------------------------------------------------------------------------------- 1 | // match_results.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_MATCH_RESULTS_HPP 7 | #define PARSERTL_MATCH_RESULTS_HPP 8 | 9 | #include "runtime_error.hpp" 10 | #include "state_machine.hpp" 11 | #include 12 | 13 | namespace parsertl 14 | { 15 | template 16 | struct basic_match_results 17 | { 18 | typedef sm_t sm_type; 19 | typedef typename sm_type::id_type id_type; 20 | std::vector stack; 21 | id_type token_id; 22 | typename sm_type::entry entry; 23 | 24 | basic_match_results() : 25 | token_id(static_cast(~0)) 26 | { 27 | stack.push_back(0); 28 | entry.action = error; 29 | entry.param = unknown_token; 30 | } 31 | 32 | basic_match_results(const std::size_t reserved_) : 33 | stack(reserved_) 34 | { 35 | basic_match_results(); 36 | } 37 | 38 | basic_match_results(const id_type token_id_, const sm_type& sm_) 39 | { 40 | reset(token_id_, sm_); 41 | } 42 | 43 | basic_match_results(const id_type token_id_, const sm_type& sm_, 44 | const std::size_t reserved_) : 45 | stack(reserved_) 46 | { 47 | basic_match_results(token_id_, sm_); 48 | } 49 | 50 | void clear() 51 | { 52 | stack.clear(); 53 | stack.push_back(0); 54 | token_id = static_cast(~0); 55 | entry.clear(); 56 | } 57 | 58 | void reset(const id_type token_id_, const sm_type& sm_) 59 | { 60 | stack.clear(); 61 | stack.push_back(0); 62 | token_id = token_id_; 63 | 64 | if (token_id == static_cast(~0)) 65 | { 66 | entry.action = error; 67 | entry.param = unknown_token; 68 | } 69 | else 70 | { 71 | entry = sm_.at(stack.back(), token_id); 72 | } 73 | } 74 | 75 | id_type reduce_id() const 76 | { 77 | if (entry.action != reduce) 78 | { 79 | throw runtime_error("Not in a reduce state!"); 80 | } 81 | 82 | return entry.param; 83 | } 84 | 85 | template 86 | typename token_vector::value_type& dollar(const std::size_t index_, 87 | const sm_type& sm_, token_vector& productions) 88 | { 89 | if (entry.action != reduce) 90 | { 91 | throw runtime_error("Not in a reduce state!"); 92 | } 93 | 94 | return productions[productions.size() - 95 | production_size(sm_, entry.param) + index_]; 96 | } 97 | 98 | template 99 | const typename token_vector::value_type& 100 | dollar(const std::size_t index_, const sm_type& sm_, 101 | const token_vector& productions) const 102 | { 103 | if (entry.action != reduce) 104 | { 105 | throw runtime_error("Not in a reduce state!"); 106 | } 107 | 108 | return productions[productions.size() - 109 | production_size(sm_, entry.param) + index_]; 110 | } 111 | 112 | std::size_t production_size(const sm_type& sm, 113 | const std::size_t index_) const 114 | { 115 | return sm._rules[index_]._rhs.size(); 116 | } 117 | 118 | bool operator ==(const basic_match_results& rhs_) const 119 | { 120 | return stack == rhs_.stack && 121 | token_id == rhs_.token_id && 122 | entry == rhs_.entry; 123 | } 124 | }; 125 | 126 | typedef basic_match_results match_results; 127 | typedef basic_match_results 128 | uncompressed_match_results; 129 | } 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /include/parsertl/narrow.hpp: -------------------------------------------------------------------------------- 1 | // narrow.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_NARROW_HPP 7 | #define PARSERTL_NARROW_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | template 14 | void narrow(const char_type* str_, std::ostringstream& ss_) 15 | { 16 | while (*str_) 17 | { 18 | // Safe to simply cast to char when string only contains ASCII. 19 | ss_ << static_cast(*str_++); 20 | } 21 | } 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /include/parsertl/nt_info.hpp: -------------------------------------------------------------------------------- 1 | // nt_info.hpp 2 | // Copyright (c) 2016-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_NT_INFO_HPP 7 | #define PARSERTL_NT_INFO_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | typedef std::vector char_vector; 14 | 15 | struct nt_info 16 | { 17 | bool _nullable; 18 | char_vector _first_set; 19 | char_vector _follow_set; 20 | 21 | nt_info(const std::size_t terminals_) : 22 | _nullable(false), 23 | _first_set(terminals_, 0), 24 | _follow_set(terminals_, 0) 25 | { 26 | } 27 | }; 28 | 29 | typedef std::vector nt_info_vector; 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /include/parsertl/parse.hpp: -------------------------------------------------------------------------------- 1 | // parse.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_PARSE_HPP 7 | #define PARSERTL_PARSE_HPP 8 | 9 | #include "match_results.hpp" 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | // Parse entire sequence and return boolean 15 | template 16 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 17 | basic_match_results& results_) 18 | { 19 | while (results_.entry.action != error) 20 | { 21 | switch (results_.entry.action) 22 | { 23 | case shift: 24 | results_.stack.push_back(results_.entry.param); 25 | 26 | if (iter_->id != 0) 27 | ++iter_; 28 | 29 | results_.token_id = iter_->id; 30 | 31 | if (results_.token_id == lexer_iterator::value_type::npos()) 32 | { 33 | results_.entry.action = error; 34 | results_.entry.param = unknown_token; 35 | } 36 | else 37 | { 38 | results_.entry = 39 | sm_.at(results_.stack.back(), results_.token_id); 40 | } 41 | 42 | break; 43 | case reduce: 44 | { 45 | const std::size_t size_ = 46 | sm_._rules[results_.entry.param]._rhs.size(); 47 | 48 | if (size_) 49 | { 50 | results_.stack.resize(results_.stack.size() - size_); 51 | } 52 | 53 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 54 | results_.entry = 55 | sm_.at(results_.stack.back(), results_.token_id); 56 | break; 57 | } 58 | case go_to: 59 | results_.stack.push_back(results_.entry.param); 60 | results_.token_id = iter_->id; 61 | results_.entry = 62 | sm_.at(results_.stack.back(), results_.token_id); 63 | break; 64 | default: 65 | // accept 66 | // error 67 | break; 68 | } 69 | 70 | if (results_.entry.action == accept) 71 | { 72 | const std::size_t size_ = 73 | sm_._rules[results_.entry.param]._rhs.size(); 74 | 75 | if (size_) 76 | { 77 | results_.stack.resize(results_.stack.size() - size_); 78 | } 79 | 80 | break; 81 | } 82 | } 83 | 84 | return results_.entry.action == accept; 85 | } 86 | } 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /include/parsertl/read_bison.hpp: -------------------------------------------------------------------------------- 1 | // read_bison.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_READ_BISON_HPP 7 | #define PARSERTL_READ_BISON_HPP 8 | 9 | #include "generator.hpp" 10 | #include "lookup.hpp" 11 | #include "match_results.hpp" 12 | #include "token.hpp" 13 | 14 | namespace parsertl 15 | { 16 | template 17 | void read_bison(const char_type* start_, const char_type* end_, 18 | rules_type& rules_) 19 | { 20 | typedef lexertl::basic_rules bison_lrules; 21 | typedef lexertl::basic_state_machine bison_lsm; 22 | typedef lexertl::recursive_match_results 23 | bison_crmatch; 24 | typedef lexertl::iterator 25 | bison_criterator; 26 | typedef lexertl::basic_generator 27 | bison_lgenerator; 28 | typedef std::basic_string string; 29 | 30 | rules grules_; 31 | state_machine gsm_; 32 | bison_lrules lrules_; 33 | bison_lsm lsm_; 34 | 35 | grules_.token("LITERAL NAME"); 36 | grules_.push("start", "list"); 37 | grules_.push("list", "directives '%%' rules '%%'"); 38 | grules_.push("directives", "%empty " 39 | "| directives directive"); 40 | 41 | grules_.push("directive", "'%code' " 42 | "| '%define' " 43 | "| '%debug' " 44 | "| '%expect' " 45 | "| '%locations' " 46 | "| '%type' " 47 | "| '%verbose' " 48 | "| '%initial-action'"); 49 | const std::size_t token_index_ = 50 | grules_.push("directive", "'%token' tokens '\\n'"); 51 | const std::size_t left_index_ = 52 | grules_.push("directive", "'%left' tokens '\\n'"); 53 | const std::size_t right_index_ = 54 | grules_.push("directive", "'%right' tokens '\\n'"); 55 | const std::size_t nonassoc_index_ = grules_.push("directive", 56 | "'%nonassoc' tokens '\\n'"); 57 | const std::size_t precedence_index_ = 58 | grules_.push("directive", "'%precedence' tokens '\\n'"); 59 | const std::size_t start_index_ = 60 | grules_.push("directive", "'%start' NAME '\\n'"); 61 | 62 | grules_.push("directive", "'\\n'"); 63 | grules_.push("tokens", "tokens name " 64 | "| name"); 65 | grules_.push("name", "LITERAL | NAME"); 66 | grules_.push("rules", "rules rule " 67 | "| rule"); 68 | 69 | const std::size_t prod_index_ = 70 | grules_.push("rule", "NAME ':' productions ';'"); 71 | 72 | // Meh 73 | grules_.push("rule", "';'"); 74 | grules_.push("productions", "productions '|' production prec " 75 | "| production prec"); 76 | grules_.push("production", "%empty | '%empty' | prod_list"); 77 | grules_.push("prod_list", "token " 78 | "| prod_list token"); 79 | grules_.push("token", "LITERAL | NAME"); 80 | grules_.push("prec", "%empty | '%prec' token"); 81 | 82 | std::string warnings_; 83 | 84 | generator::build(grules_, gsm_, &warnings_); 85 | 86 | lrules_.push_state("CODE"); 87 | lrules_.push_state("FINISH"); 88 | lrules_.push_state("PRODUCTIONS"); 89 | lrules_.push_state("PREC"); 90 | 91 | lrules_.push("%code[^{]*", grules_.token_id("'%code'")); 92 | lrules_.push("%debug.*", grules_.token_id("'%debug'")); 93 | lrules_.push("%define.*", grules_.token_id("'%define'")); 94 | lrules_.push("%expect.*", grules_.token_id("'%expect'")); 95 | lrules_.push("%verbose", grules_.token_id("'%verbose'")); 96 | lrules_.push("%initial-action[^{]*[{](.|\n)*?[}];", 97 | grules_.token_id("'%initial-action'")); 98 | lrules_.push("%left", grules_.token_id("'%left'")); 99 | lrules_.push("%locations", grules_.token_id("'%locations'")); 100 | lrules_.push("\n", grules_.token_id("'\\n'")); 101 | lrules_.push("%nonassoc", grules_.token_id("'%nonassoc'")); 102 | lrules_.push("%precedence", grules_.token_id("'%precedence'")); 103 | lrules_.push("%right", grules_.token_id("'%right'")); 104 | lrules_.push("%start", grules_.token_id("'%start'")); 105 | lrules_.push("%token", grules_.token_id("'%token'")); 106 | lrules_.push("%type.*", grules_.token_id("'%type'")); 107 | lrules_.push("%union[^{]*[{](.|\n)*?[}]", lrules_.skip()); 108 | lrules_.push("<[^>]+>", lrules_.skip()); 109 | lrules_.push("%[{](.|\n)*?%[}]", lrules_.skip()); 110 | lrules_.push("[ \t\r]+", lrules_.skip()); 111 | 112 | lrules_.push("INITIAL,CODE,PRODUCTIONS", "[{]", ">CODE"); 113 | lrules_.push("CODE", "'(\\\\.|[^\\\\\r\n'])*'", "."); 114 | 115 | lrules_.push("CODE", "[\"](\\\\.|[^\\\\\r\n\"])*[\"]", "."); 116 | lrules_.push("CODE", "<%", ">CODE"); 117 | lrules_.push("CODE", "%>", "<"); 118 | lrules_.push("CODE", "[^}]", "."); 119 | lrules_.push("CODE", "[}]", lrules_.skip(), "<"); 120 | 121 | lrules_.push("INITIAL", "%%", grules_.token_id("'%%'"), "PRODUCTIONS"); 122 | lrules_.push("PRODUCTIONS", ":", grules_.token_id("':'"), "."); 123 | lrules_.push("PRODUCTIONS", ";", grules_.token_id("';'"), "."); 124 | lrules_.push("PRODUCTIONS", "[|]", grules_.token_id("'|'"), 125 | "PRODUCTIONS"); 126 | lrules_.push("PRODUCTIONS", "%empty", 127 | grules_.token_id("'%empty'"), "."); 128 | lrules_.push("INITIAL,PRODUCTIONS", 129 | "'(\\\\([^0-9cx]|[0-9]{1,3}|c[@a-zA-Z]|x\\d+)|[^\\\\\r\n'])+'|" 130 | "[\"](\\\\([^0-9cx]|[0-9]{1,3}|c[@a-zA-Z]|x\\d+)" 131 | "|[^\\\\\r\n\"])+[\"]", 132 | grules_.token_id("LITERAL"), "."); 133 | lrules_.push("PREC", 134 | "'(\\\\([^0-9cx]|[0-9]{1,3}|c[@a-zA-Z]|x\\d+)|[^\\\\\r\n'])+'|" 135 | "[\"](\\\\([^0-9cx]|[0-9]{1,3}|c[@a-zA-Z]|x\\d+)|" 136 | "[^\\\\\r\n\"])+[\"]", 137 | grules_.token_id("LITERAL"), "PRODUCTIONS"); 138 | lrules_.push("INITIAL,PRODUCTIONS", 139 | "[A-Za-z_.][-A-Za-z_.0-9]*", grules_.token_id("NAME"), "."); 140 | lrules_.push("PRODUCTIONS", "%%", grules_.token_id("'%%'"), "FINISH"); 141 | lrules_.push("PRODUCTIONS", "%prec", 142 | grules_.token_id("'%prec'"), "PREC"); 143 | lrules_.push("PREC", "[A-Za-z_.][-A-Za-z_.0-9]*", 144 | grules_.token_id("NAME"), "PRODUCTIONS"); 145 | // Always skip comments 146 | lrules_.push("CODE,INITIAL,PREC,PRODUCTIONS", 147 | "[/][*](.|\n|\r\n)*?[*][/]|[/][/].*", lrules_.skip(), "."); 148 | // All whitespace in PRODUCTIONS mode is skipped. 149 | lrules_.push("PREC,PRODUCTIONS", "\\s+", lrules_.skip(), "."); 150 | lrules_.push("FINISH", "(?s:.)+", lrules_.skip(), "INITIAL"); 151 | 152 | bison_lgenerator::build(lrules_, lsm_); 153 | 154 | bison_criterator iter_(start_, end_, lsm_); 155 | typedef token token; 156 | typename token::token_vector productions_; 157 | match_results results_(iter_->id, gsm_); 158 | 159 | while (results_.entry.action != error && 160 | results_.entry.action != accept) 161 | { 162 | if (results_.entry.action == reduce) 163 | { 164 | if (results_.entry.param == token_index_) 165 | { 166 | const token& token_ = 167 | results_.dollar(1, gsm_, productions_); 168 | const string str_(token_.first, token_.second); 169 | 170 | rules_.token(str_.c_str()); 171 | } 172 | else if (results_.entry.param == left_index_) 173 | { 174 | const token& token_ = 175 | results_.dollar(1, gsm_, productions_); 176 | const string str_(token_.first, token_.second); 177 | 178 | rules_.left(str_.c_str()); 179 | } 180 | else if (results_.entry.param == right_index_) 181 | { 182 | const token& token_ = 183 | results_.dollar(1, gsm_, productions_); 184 | const string str_(token_.first, token_.second); 185 | 186 | rules_.right(str_.c_str()); 187 | } 188 | else if (results_.entry.param == nonassoc_index_) 189 | { 190 | const token& token_ = 191 | results_.dollar(1, gsm_, productions_); 192 | const string str_(token_.first, token_.second); 193 | 194 | rules_.nonassoc(str_.c_str()); 195 | } 196 | else if (results_.entry.param == precedence_index_) 197 | { 198 | const token& token_ = 199 | results_.dollar(1, gsm_, productions_); 200 | const string str_(token_.first, token_.second); 201 | 202 | rules_.precedence(str_.c_str()); 203 | } 204 | else if (results_.entry.param == start_index_) 205 | { 206 | const token& name_ = 207 | results_.dollar(1, gsm_, productions_); 208 | 209 | rules_.start(string(name_.first, 210 | name_.second).c_str()); 211 | } 212 | else if (results_.entry.param == prod_index_) 213 | { 214 | const token& lhs_ = results_.dollar(0, gsm_, productions_); 215 | const token& rhs_ = results_.dollar(2, gsm_, productions_); 216 | const string lhs_str_(lhs_.first, lhs_.second); 217 | string rhs_str_; 218 | // Strip out unwanted tokens (such as blocks of C code) 219 | bison_criterator rhs_iter_(rhs_.first, rhs_.second, lsm_); 220 | 221 | for (; rhs_iter_->id != 0; ++rhs_iter_) 222 | { 223 | if (!rhs_str_.empty() && !::strchr(" \t\n\r%", rhs_str_.back())) 224 | rhs_str_ += ' '; 225 | 226 | rhs_str_ += rhs_iter_->str(); 227 | } 228 | 229 | rules_.push(lhs_str_.c_str(), rhs_str_.c_str()); 230 | } 231 | } 232 | 233 | lookup(iter_, gsm_, results_, productions_); 234 | } 235 | 236 | if (results_.entry.action == error) 237 | { 238 | std::ostringstream ss_; 239 | string token_ = iter_->str(); 240 | 241 | ss_ << "Syntax error on line " << 242 | std::count(start_, iter_->first, '\n') + 1 << 243 | ": '"; 244 | narrow(token_.c_str(), ss_); 245 | ss_ << '\''; 246 | throw runtime_error(ss_.str()); 247 | } 248 | } 249 | } 250 | 251 | #endif 252 | -------------------------------------------------------------------------------- /include/parsertl/rules.hpp: -------------------------------------------------------------------------------- 1 | // rules.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_RULES_HPP 7 | #define PARSERTL_RULES_HPP 8 | 9 | #include "bison_lookup.hpp" 10 | #include "ebnf_tables.hpp" 11 | #include "enums.hpp" 12 | #include 13 | #include 14 | #include "match_results.hpp" 15 | #include "narrow.hpp" 16 | #include "runtime_error.hpp" 17 | #include "token.hpp" 18 | 19 | namespace parsertl 20 | { 21 | template 22 | class basic_rules 23 | { 24 | public: 25 | typedef T char_type; 26 | 27 | struct nt_location 28 | { 29 | std::size_t _first_production; 30 | std::size_t _last_production; 31 | 32 | nt_location() : 33 | _first_production(static_cast(~0)), 34 | _last_production(static_cast(~0)) 35 | { 36 | } 37 | }; 38 | 39 | typedef std::vector > capture_vector; 40 | typedef std::deque > 41 | captures_deque; 42 | typedef std::vector nt_location_vector; 43 | typedef std::basic_string string; 44 | typedef std::map string_id_type_map; 45 | typedef std::pair string_id_type_pair; 46 | typedef std::vector string_vector; 47 | 48 | struct symbol 49 | { 50 | enum type { TERMINAL, NON_TERMINAL }; 51 | 52 | type _type; 53 | std::size_t _id; 54 | 55 | symbol(const type type_, const std::size_t id_) : 56 | _type(type_), 57 | _id(id_) 58 | { 59 | } 60 | 61 | bool operator<(const symbol& rhs_) const 62 | { 63 | return _type < rhs_._type || 64 | (_type == rhs_._type && _id < rhs_._id); 65 | } 66 | 67 | bool operator==(const symbol& rhs_) const 68 | { 69 | return _type == rhs_._type && _id == rhs_._id; 70 | } 71 | }; 72 | 73 | typedef std::vector symbol_vector; 74 | enum associativity 75 | { 76 | token_assoc, precedence_assoc, non_assoc, left_assoc, right_assoc 77 | }; 78 | 79 | struct production 80 | { 81 | std::size_t _lhs; 82 | 83 | struct rhs 84 | { 85 | symbol_vector _symbols; 86 | string _prec; 87 | 88 | bool operator==(const rhs& rhs_) const 89 | { 90 | return _symbols == rhs_._symbols && 91 | _prec == rhs_._prec; 92 | } 93 | 94 | bool operator<(const rhs& rhs_) const 95 | { 96 | return _symbols < rhs_._symbols || 97 | (_symbols == rhs_._symbols && _prec < rhs_._prec); 98 | } 99 | }; 100 | 101 | rhs _rhs; 102 | std::size_t _precedence; 103 | associativity _associativity; 104 | std::size_t _index; 105 | std::size_t _next_lhs; 106 | 107 | production(const std::size_t index_) : 108 | _lhs(static_cast(~0)), 109 | _precedence(0), 110 | _associativity(token_assoc), 111 | _index(index_), 112 | _next_lhs(static_cast(~0)) 113 | { 114 | } 115 | 116 | void clear() 117 | { 118 | _lhs = static_cast(~0); 119 | _rhs._symbols.clear(); 120 | _rhs._prec.clear(); 121 | _precedence = 0; 122 | _associativity = token_assoc; 123 | _index = static_cast(~0); 124 | _next_lhs = static_cast(~0); 125 | } 126 | }; 127 | 128 | typedef std::deque production_deque; 129 | 130 | struct token_info 131 | { 132 | std::size_t _precedence; 133 | associativity _associativity; 134 | 135 | token_info() : 136 | _precedence(0), 137 | _associativity(token_assoc) 138 | { 139 | } 140 | 141 | token_info(const std::size_t precedence_, 142 | const associativity associativity_) : 143 | _precedence(precedence_), 144 | _associativity(associativity_) 145 | { 146 | } 147 | }; 148 | 149 | typedef std::vector token_info_vector; 150 | 151 | basic_rules(const std::size_t flags_ = 0) : 152 | _flags(flags_), 153 | _next_precedence(1) 154 | { 155 | lexer_rules rules_; 156 | 157 | rules_.insert_macro("TERMINAL", 158 | "'(\\\\([^0-9cx]|[0-9]{1,3}|c[@a-zA-Z]|x\\d+)|[^\\\\\r\n'])+'|" 159 | "[\"](\\\\([^0-9cx]|[0-9]{1,3}|c[@a-zA-Z]|x\\d+)|" 160 | "[^\\\\\r\n\"])+[\"]"); 161 | rules_.insert_macro("IDENTIFIER", "[A-Za-z_.][-A-Za-z_.0-9]*"); 162 | rules_.push("{TERMINAL}", ebnf_tables::TERMINAL); 163 | rules_.push("{IDENTIFIER}", ebnf_tables::IDENTIFIER); 164 | rules_.push("\\s+", rules_.skip()); 165 | lexer_generator::build(rules_, _token_lexer); 166 | 167 | rules_.push("[|]", '|'); 168 | rules_.push("\\[", '['); 169 | rules_.push("\\]", ']'); 170 | rules_.push("[?]", '?'); 171 | rules_.push("[{]", '{'); 172 | rules_.push("[}]", '}'); 173 | rules_.push("[*]", '*'); 174 | rules_.push("-", '-'); 175 | rules_.push("[+]", '+'); 176 | rules_.push("[(]", '('); 177 | rules_.push("[)]", ')'); 178 | rules_.push("%empty", ebnf_tables::EMPTY); 179 | rules_.push("%prec", ebnf_tables::PREC); 180 | rules_.push("[/][*].{+}[\r\n]*?[*][/]|[/][/].*", rules_.skip()); 181 | lexer_generator::build(rules_, _rule_lexer); 182 | 183 | const std::size_t id_ = insert_terminal(string(1, '$')); 184 | 185 | info(id_); 186 | } 187 | 188 | void clear() 189 | { 190 | _flags = 0; 191 | _next_precedence = 1; 192 | 193 | _non_terminals.clear(); 194 | _nt_locations.clear(); 195 | _new_rule_ids.clear(); 196 | _generated_rules.clear(); 197 | _start.clear(); 198 | _grammar.clear(); 199 | _captures.clear(); 200 | 201 | _terminals.clear(); 202 | _tokens_info.clear(); 203 | 204 | const std::size_t id_ = insert_terminal(string(1, '$')); 205 | 206 | info(id_); 207 | } 208 | 209 | void flags(const std::size_t flags_) 210 | { 211 | _flags = flags_; 212 | } 213 | 214 | void token(const char_type* names_) 215 | { 216 | lexer_iterator iter_(names_, str_end(names_), _token_lexer); 217 | 218 | token(iter_, 0, token_assoc, "token"); 219 | } 220 | 221 | void token(const string& names_) 222 | { 223 | lexer_iterator iter_(names_.c_str(), names_.c_str() + names_.size(), 224 | _token_lexer); 225 | 226 | token(iter_, 0, token_assoc, "token"); 227 | } 228 | 229 | void left(const char_type* names_) 230 | { 231 | lexer_iterator iter_(names_, str_end(names_), _token_lexer); 232 | 233 | token(iter_, _next_precedence, left_assoc, "left"); 234 | ++_next_precedence; 235 | } 236 | 237 | void left(const string& names_) 238 | { 239 | lexer_iterator iter_(names_.c_str(), names_.c_str() + names_.size(), 240 | _token_lexer); 241 | 242 | token(iter_, _next_precedence, left_assoc, "left"); 243 | ++_next_precedence; 244 | } 245 | 246 | void right(const char_type* names_) 247 | { 248 | lexer_iterator iter_(names_, str_end(names_), _token_lexer); 249 | 250 | token(iter_, _next_precedence, right_assoc, "right"); 251 | ++_next_precedence; 252 | } 253 | 254 | void right(const string& names_) 255 | { 256 | lexer_iterator iter_(names_.c_str(), names_.c_str() + names_.size(), 257 | _token_lexer); 258 | 259 | token(iter_, _next_precedence, right_assoc, "right"); 260 | ++_next_precedence; 261 | } 262 | 263 | void nonassoc(const char_type* names_) 264 | { 265 | lexer_iterator iter_(names_, str_end(names_), _token_lexer); 266 | 267 | token(iter_, _next_precedence, non_assoc, "nonassoc"); 268 | ++_next_precedence; 269 | } 270 | 271 | void nonassoc(const string& names_) 272 | { 273 | lexer_iterator iter_(names_.c_str(), names_.c_str() + names_.size(), 274 | _token_lexer); 275 | 276 | token(iter_, _next_precedence, non_assoc, "nonassoc"); 277 | ++_next_precedence; 278 | } 279 | 280 | void precedence(const char_type* names_) 281 | { 282 | lexer_iterator iter_(names_, str_end(names_), _token_lexer); 283 | 284 | token(iter_, _next_precedence, precedence_assoc, "precedence"); 285 | ++_next_precedence; 286 | } 287 | 288 | void precedence(const string& names_) 289 | { 290 | lexer_iterator iter_(names_.c_str(), names_.c_str() + names_.size(), 291 | _token_lexer); 292 | 293 | token(iter_, _next_precedence, precedence_assoc, "precedence"); 294 | ++_next_precedence; 295 | } 296 | 297 | id_type push(const string& lhs_, const string& rhs_) 298 | { 299 | // Return the first index of any EBNF/rule with ors. 300 | id_type index_ = static_cast(_grammar.size()); 301 | const std::size_t old_size_ = _grammar.size(); 302 | 303 | validate(lhs_.c_str()); 304 | 305 | if (_terminals.find(lhs_) != _terminals.end()) 306 | { 307 | std::ostringstream ss_; 308 | 309 | ss_ << "Rule "; 310 | narrow(lhs_.c_str(), ss_); 311 | ss_ << " is already defined as a TERMINAL."; 312 | throw runtime_error(ss_.str()); 313 | } 314 | 315 | if (_generated_rules.find(lhs_) != _generated_rules.end()) 316 | { 317 | std::ostringstream ss_; 318 | 319 | ss_ << "Rule "; 320 | narrow(lhs_.c_str(), ss_); 321 | ss_ << " is already defined as a generated rule."; 322 | throw runtime_error(ss_.str()); 323 | } 324 | 325 | lexer_iterator iter_(rhs_.c_str(), rhs_.c_str() + rhs_.size(), 326 | _rule_lexer); 327 | basic_match_results > results_; 328 | // Qualify token to prevent arg dependant lookup 329 | typedef parsertl::token token_t; 330 | typename token_t::token_vector productions_; 331 | std::stack rhs_stack_; 332 | std::stack > new_rules_; 333 | static const char_type empty_or_[] = 334 | { '%', 'e', 'm', 'p', 't', 'y', ' ', '|', ' ', '\0' }; 335 | static const char_type or_[] = { ' ', '|', ' ', '\0' }; 336 | 337 | bison_next(_ebnf_tables, iter_, results_); 338 | 339 | while (results_.entry.action != error && 340 | results_.entry.action != accept) 341 | { 342 | if (results_.entry.action == reduce) 343 | { 344 | switch (static_cast(results_.entry.param)) 345 | { 346 | case rhs_or_2_idx: 347 | { 348 | // rhs_or: rhs_or '|' opt_list 349 | const std::size_t size_ = 350 | _ebnf_tables.yyr2[results_.entry.param]; 351 | const std::size_t idx_ = productions_.size() - size_; 352 | const token_t& token_ = productions_[idx_ + 1]; 353 | const string r_ = token_.str() + char_type(' ') + 354 | rhs_stack_.top(); 355 | 356 | rhs_stack_.pop(); 357 | 358 | if (rhs_stack_.empty()) 359 | { 360 | rhs_stack_.push(r_); 361 | } 362 | else 363 | { 364 | rhs_stack_.top() += char_type(' ') + r_; 365 | } 366 | 367 | break; 368 | } 369 | case opt_prec_list_idx: 370 | { 371 | // opt_prec_list: opt_list opt_prec 372 | const std::size_t size_ = 373 | _ebnf_tables.yyr2[results_.entry.param]; 374 | const std::size_t idx_ = productions_.size() - size_; 375 | const token_t& token_ = productions_[idx_ + 1]; 376 | 377 | // Check if %prec is present 378 | if (token_.first != token_.second) 379 | { 380 | string r_ = rhs_stack_.top(); 381 | 382 | rhs_stack_.pop(); 383 | rhs_stack_.top() += char_type(' ') + r_; 384 | } 385 | 386 | break; 387 | } 388 | case opt_list_1_idx: 389 | // opt_list: %empty 390 | rhs_stack_.push(string()); 391 | break; 392 | case rhs_list_2_idx: 393 | { 394 | // rhs_list: rhs_list rhs 395 | string r_ = rhs_stack_.top(); 396 | 397 | rhs_stack_.pop(); 398 | rhs_stack_.top() += char_type(' ') + r_; 399 | break; 400 | } 401 | case opt_list_2_idx: 402 | case identifier_idx: 403 | case terminal_idx: 404 | { 405 | // opt_list: %empty 406 | // rhs: IDENTIFIER 407 | // rhs: TERMINAL 408 | const std::size_t size_ = 409 | _ebnf_tables.yyr2[results_.entry.param]; 410 | const std::size_t idx_ = productions_.size() - size_; 411 | const token_t& token_ = productions_[idx_]; 412 | 413 | rhs_stack_.push(token_.str()); 414 | break; 415 | } 416 | case optional_1_idx: 417 | case optional_2_idx: 418 | { 419 | // rhs: '[' rhs_or ']' 420 | // rhs: rhs '?' 421 | std::size_t& counter_ = _new_rule_ids[lhs_]; 422 | std::basic_ostringstream ss_; 423 | std::pair pair_; 424 | 425 | ++counter_; 426 | ss_ << counter_; 427 | pair_.first = lhs_ + char_type('_') + ss_.str(); 428 | _generated_rules.insert(pair_.first); 429 | pair_.second = empty_or_ + rhs_stack_.top(); 430 | rhs_stack_.top() = pair_.first; 431 | new_rules_.push(pair_); 432 | break; 433 | } 434 | case zom_1_idx: 435 | case zom_2_idx: 436 | { 437 | // rhs: '{' rhs_or '}' 438 | // rhs: rhs '*' 439 | std::size_t& counter_ = _new_rule_ids[lhs_]; 440 | std::basic_ostringstream ss_; 441 | std::pair pair_; 442 | 443 | ++counter_; 444 | ss_ << counter_; 445 | pair_.first = lhs_ + char_type('_') + ss_.str(); 446 | _generated_rules.insert(pair_.first); 447 | pair_.second = empty_or_ + pair_.first + 448 | char_type(' ') + rhs_stack_.top(); 449 | rhs_stack_.top() = pair_.first; 450 | new_rules_.push(pair_); 451 | break; 452 | } 453 | case oom_1_idx: 454 | case oom_2_idx: 455 | { 456 | // rhs: '{' rhs_or '}' '-' 457 | // rhs: rhs '+' 458 | std::size_t& counter_ = _new_rule_ids[lhs_]; 459 | std::basic_ostringstream ss_; 460 | std::pair pair_; 461 | 462 | ++counter_; 463 | ss_ << counter_; 464 | pair_.first = lhs_ + char_type('_') + ss_.str(); 465 | _generated_rules.insert(pair_.first); 466 | pair_.second = rhs_stack_.top() + or_ + 467 | pair_.first + char_type(' ') + rhs_stack_.top(); 468 | rhs_stack_.top() = pair_.first; 469 | new_rules_.push(pair_); 470 | break; 471 | } 472 | case bracketed_idx: 473 | { 474 | // rhs: '(' rhs_or ')' 475 | std::size_t& counter_ = _new_rule_ids[lhs_]; 476 | std::basic_ostringstream ss_; 477 | std::pair pair_; 478 | 479 | ++counter_; 480 | ss_ << counter_; 481 | pair_.first = lhs_ + char_type('_') + ss_.str(); 482 | _generated_rules.insert(pair_.first); 483 | pair_.second = rhs_stack_.top(); 484 | 485 | if (_flags & enable_captures) 486 | { 487 | rhs_stack_.top() = char_type('(') + pair_.first + 488 | char_type(')'); 489 | } 490 | else 491 | { 492 | rhs_stack_.top() = pair_.first; 493 | } 494 | 495 | new_rules_.push(pair_); 496 | break; 497 | } 498 | case prec_ident_idx: 499 | case prec_term_idx: 500 | { 501 | // opt_prec: PREC IDENTIFIER 502 | // opt_prec: PREC TERMINAL 503 | const std::size_t size_ = 504 | _ebnf_tables.yyr2[results_.entry.param]; 505 | const std::size_t idx_ = productions_.size() - size_; 506 | const token_t& token_ = productions_[idx_]; 507 | 508 | rhs_stack_.push(token_.str() + char_type(' ') + 509 | productions_[idx_ + 1].str()); 510 | break; 511 | } 512 | } 513 | } 514 | 515 | bison_lookup(_ebnf_tables, iter_, results_, productions_); 516 | bison_next(_ebnf_tables, iter_, results_); 517 | } 518 | 519 | if (results_.entry.action == error) 520 | { 521 | std::ostringstream ss_; 522 | 523 | ss_ << "Syntax error in rule "; 524 | narrow(lhs_.c_str(), ss_); 525 | ss_ << '(' << iter_->first - rhs_.c_str() + 1 << "): '"; 526 | narrow(rhs_.c_str(), ss_); 527 | ss_ << "'."; 528 | throw runtime_error(ss_.str()); 529 | } 530 | 531 | assert(rhs_stack_.size() == 1); 532 | push_production(lhs_, rhs_stack_.top()); 533 | 534 | while (!new_rules_.empty()) 535 | { 536 | push_production(new_rules_.top().first, 537 | new_rules_.top().second); 538 | new_rules_.pop(); 539 | } 540 | 541 | if (!_captures.empty() && old_size_ != _grammar.size()) 542 | { 543 | resize_captures(); 544 | } 545 | 546 | return index_; 547 | } 548 | 549 | id_type token_id(const string& name_) const 550 | { 551 | typename string_id_type_map::const_iterator iter_ = 552 | _terminals.find(name_); 553 | 554 | if (iter_ == _terminals.end()) 555 | { 556 | std::ostringstream ss_; 557 | 558 | ss_ << "Unknown token \""; 559 | narrow(name_.c_str(), ss_); 560 | ss_ << "\"."; 561 | throw runtime_error(ss_.str()); 562 | } 563 | 564 | return iter_->second; 565 | } 566 | 567 | string name_from_token_id(const std::size_t id_) const 568 | { 569 | string name_; 570 | 571 | for (typename string_id_type_map::const_iterator iter_ = 572 | _terminals.begin(), end_ = _terminals.end(); 573 | iter_ != end_; ++iter_) 574 | { 575 | if (iter_->second == id_) 576 | { 577 | name_ = iter_->first; 578 | break; 579 | } 580 | } 581 | 582 | return name_; 583 | } 584 | 585 | string name_from_nt_id(const std::size_t id_) const 586 | { 587 | string name_; 588 | 589 | for (typename string_id_type_map::const_iterator iter_ = 590 | _non_terminals.begin(), end_ = _non_terminals.end(); 591 | iter_ != end_; ++iter_) 592 | { 593 | if (iter_->second == id_) 594 | { 595 | name_ = iter_->first; 596 | break; 597 | } 598 | } 599 | 600 | return name_; 601 | } 602 | 603 | void start(const char_type* start_) 604 | { 605 | validate(start_); 606 | _start = start_; 607 | } 608 | 609 | void start(const string& start_) 610 | { 611 | validate(start_.c_str()); 612 | _start = start_; 613 | } 614 | 615 | const size_t start() const 616 | { 617 | return _start.empty() ? 618 | npos() : 619 | _non_terminals.find(_start)->second; 620 | } 621 | 622 | void validate() 623 | { 624 | if (_grammar.empty()) 625 | { 626 | throw runtime_error("No productions are defined."); 627 | } 628 | 629 | std::size_t start_ = npos(); 630 | 631 | // Determine id of start rule 632 | if (_start.empty()) 633 | { 634 | const std::size_t id_ = _grammar[0]._lhs; 635 | 636 | _start = name_from_nt_id(id_); 637 | 638 | if (!_start.empty()) 639 | start_ = id_; 640 | } 641 | else 642 | { 643 | typename string_id_type_map::const_iterator iter_ = 644 | _non_terminals.find(_start); 645 | 646 | if (iter_ != _non_terminals.end()) 647 | start_ = iter_->second; 648 | } 649 | 650 | if (start_ == npos()) 651 | { 652 | throw runtime_error("Specified start rule does not exist."); 653 | } 654 | 655 | // Look for unused rules 656 | for (typename string_id_type_map::const_iterator iter_ = 657 | _non_terminals.begin(), end_ = _non_terminals.end(); 658 | iter_ != end_; ++iter_) 659 | { 660 | bool found_ = false; 661 | 662 | // Skip start_ 663 | if (iter_->second == start_) continue; 664 | 665 | for (typename production_deque::const_iterator iter2_ = 666 | _grammar.begin(), end2_ = _grammar.end(); 667 | !found_ && iter2_ != end2_; ++iter2_) 668 | { 669 | if (iter_->second == iter2_->_lhs) 670 | continue; 671 | 672 | for (typename symbol_vector::const_iterator iter3_ = 673 | iter2_->_rhs._symbols.begin(), 674 | end3_ = iter2_->_rhs._symbols.end(); 675 | !found_ && iter3_ != end3_; ++iter3_) 676 | { 677 | if (iter3_->_type == symbol::NON_TERMINAL && 678 | iter3_->_id == iter_->second) 679 | { 680 | found_ = true; 681 | } 682 | } 683 | } 684 | 685 | if (!found_) 686 | { 687 | std::ostringstream ss_; 688 | const string name_ = iter_->first; 689 | 690 | ss_ << '\''; 691 | narrow(name_.c_str(), ss_); 692 | ss_ << "' is an unused rule."; 693 | throw runtime_error(ss_.str()); 694 | } 695 | } 696 | 697 | // Validate all non-terminals. 698 | for (std::size_t i_ = 0, size_ = _nt_locations.size(); 699 | i_ < size_; ++i_) 700 | { 701 | if (_nt_locations[i_]._first_production == npos()) 702 | { 703 | std::ostringstream ss_; 704 | const string name_ = name_from_nt_id(i_); 705 | 706 | ss_ << "Non-terminal '"; 707 | narrow(name_.c_str(), ss_); 708 | ss_ << "' does not have any productions."; 709 | throw runtime_error(ss_.str()); 710 | } 711 | } 712 | 713 | static const char_type accept_[] = 714 | { 715 | '$', 'a', 'c', 'c', 'e', 'p', 't', '\0' 716 | }; 717 | 718 | // Validate start rule 719 | if (_non_terminals.find(accept_) == _non_terminals.end()) 720 | { 721 | string rhs_ = _start; 722 | 723 | push_production(accept_, rhs_); 724 | _grammar.back()._rhs._symbols.push_back(symbol(symbol::TERMINAL, 725 | insert_terminal(string(1, '$')))); 726 | } 727 | 728 | _start = accept_; 729 | } 730 | 731 | const production_deque& grammar() const 732 | { 733 | return _grammar; 734 | } 735 | 736 | const token_info_vector& tokens_info() const 737 | { 738 | return _tokens_info; 739 | } 740 | 741 | const nt_location_vector& nt_locations() const 742 | { 743 | return _nt_locations; 744 | } 745 | 746 | void terminals(string_vector& vec_) const 747 | { 748 | typename string_id_type_map::const_iterator iter_ = 749 | _terminals.begin(); 750 | typename string_id_type_map::const_iterator end_ = 751 | _terminals.end(); 752 | 753 | vec_.clear(); 754 | vec_.resize(_terminals.size()); 755 | 756 | for (; iter_ != end_; ++iter_) 757 | { 758 | vec_[iter_->second] = iter_->first; 759 | } 760 | } 761 | 762 | std::size_t terminals_count() const 763 | { 764 | return _terminals.size(); 765 | } 766 | 767 | void non_terminals(string_vector& vec_) const 768 | { 769 | const std::size_t size_ = vec_.size(); 770 | typename string_id_type_map::const_iterator iter_ = 771 | _non_terminals.begin(); 772 | typename string_id_type_map::const_iterator end_ = 773 | _non_terminals.end(); 774 | 775 | vec_.resize(size_ + _non_terminals.size()); 776 | 777 | for (; iter_ != end_; ++iter_) 778 | { 779 | vec_[size_ + iter_->second] = iter_->first; 780 | } 781 | } 782 | 783 | std::size_t non_terminals_count() const 784 | { 785 | return _non_terminals.size(); 786 | } 787 | 788 | void symbols(string_vector& vec_) const 789 | { 790 | vec_.clear(); 791 | terminals(vec_); 792 | non_terminals(vec_); 793 | } 794 | 795 | const captures_deque& captures() const 796 | { 797 | return _captures; 798 | } 799 | 800 | std::size_t npos() const 801 | { 802 | return static_cast(~0); 803 | } 804 | 805 | private: 806 | enum ebnf_indexes 807 | { 808 | rule_idx = 2, 809 | rhs_or_1_idx, 810 | rhs_or_2_idx, 811 | opt_prec_list_idx, 812 | opt_list_1_idx, 813 | opt_list_2_idx, 814 | opt_list_3_idx, 815 | rhs_list_1_idx, 816 | rhs_list_2_idx, 817 | identifier_idx, 818 | terminal_idx, 819 | optional_1_idx, 820 | optional_2_idx, 821 | zom_1_idx, 822 | zom_2_idx, 823 | oom_1_idx, 824 | oom_2_idx, 825 | bracketed_idx, 826 | prec_empty_idx, 827 | prec_ident_idx, 828 | prec_term_idx 829 | }; 830 | 831 | typedef typename lexertl::basic_rules lexer_rules; 832 | typedef typename lexertl::basic_state_machine 833 | lexer_state_machine; 834 | typedef typename lexertl::basic_generator lexer_generator; 836 | typedef typename lexertl::iterator > lexer_iterator; 839 | 840 | std::size_t _flags; 841 | ebnf_tables _ebnf_tables; 842 | std::size_t _next_precedence; 843 | lexer_state_machine _rule_lexer; 844 | lexer_state_machine _token_lexer; 845 | string_id_type_map _terminals; 846 | token_info_vector _tokens_info; 847 | string_id_type_map _non_terminals; 848 | nt_location_vector _nt_locations; 849 | std::map _new_rule_ids; 850 | std::set _generated_rules; 851 | string _start; 852 | production_deque _grammar; 853 | captures_deque _captures; 854 | 855 | token_info& info(const std::size_t id_) 856 | { 857 | if (_tokens_info.size() <= id_) 858 | { 859 | _tokens_info.resize(id_ + 1); 860 | } 861 | 862 | return _tokens_info[id_]; 863 | } 864 | 865 | void token(lexer_iterator& iter_, const std::size_t precedence_, 866 | const associativity associativity_, const char* func_) 867 | { 868 | lexer_iterator end_; 869 | string token_; 870 | std::size_t id_ = static_cast(~0); 871 | 872 | for (; iter_ != end_; ++iter_) 873 | { 874 | if (iter_->id == _token_lexer.npos()) 875 | { 876 | std::ostringstream ss_; 877 | 878 | ss_ << "Unrecognised char in " << func_ << "()."; 879 | throw runtime_error(ss_.str()); 880 | } 881 | 882 | token_ = iter_->str(); 883 | 884 | if (_terminals.find(token_) != _terminals.end()) 885 | throw runtime_error("token " + token_ + 886 | " is already defined."); 887 | 888 | id_ = insert_terminal(token_); 889 | 890 | token_info& token_info_ = info(id_); 891 | 892 | token_info_._precedence = precedence_; 893 | token_info_._associativity = associativity_; 894 | } 895 | } 896 | 897 | void validate(const char_type* name_) const 898 | { 899 | const char_type* start_ = name_; 900 | 901 | while (*name_) 902 | { 903 | if (!(*name_ >= 'A' && *name_ <= 'Z') && 904 | !(*name_ >= 'a' && *name_ <= 'z') && 905 | *name_ != '_' && *name_ != '.' && 906 | !(*name_ >= '0' && *name_ <= '9') && 907 | *name_ != '-') 908 | { 909 | std::ostringstream ss_; 910 | 911 | ss_ << "Invalid name '"; 912 | name_ = start_; 913 | narrow(name_, ss_); 914 | ss_ << "'."; 915 | throw runtime_error(ss_.str()); 916 | } 917 | 918 | ++name_; 919 | } 920 | } 921 | 922 | id_type insert_terminal(const string& str_) 923 | { 924 | return _terminals.insert(string_id_type_pair(str_, 925 | static_cast(_terminals.size()))).first->second; 926 | } 927 | 928 | id_type insert_non_terminal(const string& str_) 929 | { 930 | return _non_terminals.insert 931 | (string_id_type_pair(str_, 932 | static_cast(_non_terminals.size()))).first->second; 933 | } 934 | 935 | const char_type* str_end(const char_type* str_) 936 | { 937 | while (*str_) ++str_; 938 | 939 | return str_; 940 | } 941 | 942 | void push_production(const string& lhs_, const string& rhs_) 943 | { 944 | const id_type lhs_id_ = insert_non_terminal(lhs_); 945 | nt_location& location_ = location(lhs_id_); 946 | lexer_iterator iter_(rhs_.c_str(), rhs_.c_str() + 947 | rhs_.size(), _rule_lexer); 948 | basic_match_results > results_; 949 | // Qualify token to prevent arg dependant lookup 950 | typedef parsertl::token token_t; 951 | typename token_t::token_vector productions_; 952 | production production_(_grammar.size()); 953 | int curr_bracket_ = 0; 954 | std::stack bracket_stack_; 955 | 956 | if (location_._first_production == npos()) 957 | { 958 | location_._first_production = production_._index; 959 | } 960 | 961 | if (location_._last_production != npos()) 962 | { 963 | _grammar[location_._last_production]._next_lhs = 964 | production_._index; 965 | } 966 | 967 | location_._last_production = production_._index; 968 | production_._lhs = lhs_id_; 969 | bison_next(_ebnf_tables, iter_, results_); 970 | 971 | while (results_.entry.action != error && 972 | results_.entry.action != accept) 973 | { 974 | if (results_.entry.action == shift) 975 | { 976 | switch (iter_->id) 977 | { 978 | case '(': 979 | if (_captures.size() <= _grammar.size()) 980 | { 981 | resize_captures(); 982 | curr_bracket_ = 0; 983 | } 984 | 985 | ++curr_bracket_; 986 | bracket_stack_.push(curr_bracket_); 987 | _captures.back().second.push_back(std::pair 988 | (static_cast 989 | (production_._rhs._symbols.size()), 990 | static_cast(0))); 991 | break; 992 | case ')': 993 | _captures.back().second[bracket_stack_.top() - 1]. 994 | second = static_cast(production_. 995 | _rhs._symbols.size() - 1); 996 | bracket_stack_.pop(); 997 | break; 998 | case '|': 999 | { 1000 | std::size_t old_lhs_ = production_._lhs; 1001 | std::size_t index_ = _grammar.size() + 1; 1002 | nt_location& loc_ = location(old_lhs_); 1003 | 1004 | production_._next_lhs = loc_._last_production = index_; 1005 | _grammar.push_back(production_); 1006 | production_.clear(); 1007 | production_._lhs = old_lhs_; 1008 | production_._index = index_; 1009 | break; 1010 | } 1011 | } 1012 | } 1013 | else if (results_.entry.action == reduce) 1014 | { 1015 | switch (static_cast(results_.entry.param)) 1016 | { 1017 | case identifier_idx: 1018 | { 1019 | // rhs: IDENTIFIER; 1020 | const std::size_t size_ = 1021 | _ebnf_tables.yyr2[results_.entry.param]; 1022 | const std::size_t idx_ = productions_.size() - size_; 1023 | const string token_ = productions_[idx_].str(); 1024 | typename string_id_type_map::const_iterator 1025 | terminal_iter_ = _terminals.find(token_); 1026 | 1027 | if (terminal_iter_ == _terminals.end()) 1028 | { 1029 | const id_type id_ = insert_non_terminal(token_); 1030 | 1031 | // NON_TERMINAL 1032 | location(id_); 1033 | production_._rhs._symbols. 1034 | push_back(symbol(symbol::NON_TERMINAL, id_)); 1035 | } 1036 | else 1037 | { 1038 | const std::size_t id_ = terminal_iter_->second; 1039 | token_info& token_info_ = info(id_); 1040 | 1041 | if (token_info_._precedence) 1042 | { 1043 | production_._precedence = 1044 | token_info_._precedence; 1045 | production_._associativity = 1046 | token_info_._associativity; 1047 | } 1048 | 1049 | production_._rhs._symbols. 1050 | push_back(symbol(symbol::TERMINAL, id_)); 1051 | } 1052 | 1053 | break; 1054 | } 1055 | case terminal_idx: 1056 | { 1057 | // rhs: TERMINAL; 1058 | const std::size_t size_ = 1059 | _ebnf_tables.yyr2[results_.entry.param]; 1060 | const std::size_t idx_ = productions_.size() - size_; 1061 | const string token_ = productions_[idx_].str(); 1062 | const std::size_t id_ = insert_terminal(token_); 1063 | token_info& token_info_ = info(id_); 1064 | 1065 | if (token_info_._precedence) 1066 | { 1067 | production_._precedence = token_info_._precedence; 1068 | production_._associativity = 1069 | token_info_._associativity; 1070 | } 1071 | 1072 | production_._rhs._symbols.push_back(symbol(symbol:: 1073 | TERMINAL, id_)); 1074 | break; 1075 | } 1076 | case prec_ident_idx: 1077 | case prec_term_idx: 1078 | { 1079 | // opt_prec: PREC IDENTIFIER; 1080 | // opt_prec: PREC TERMINAL; 1081 | const std::size_t size_ = 1082 | _ebnf_tables.yyr2[results_.entry.param]; 1083 | const std::size_t idx_ = productions_.size() - size_; 1084 | const string token_ = productions_[idx_ + 1].str(); 1085 | const id_type id_ = token_id(token_); 1086 | token_info& token_info_ = info(id_); 1087 | 1088 | // Explicit %prec, so no conditional 1089 | production_._precedence = token_info_._precedence; 1090 | production_._associativity = 1091 | token_info_._associativity; 1092 | production_._rhs._prec = token_; 1093 | break; 1094 | } 1095 | } 1096 | } 1097 | 1098 | bison_lookup(_ebnf_tables, iter_, results_, productions_); 1099 | bison_next(_ebnf_tables, iter_, results_); 1100 | } 1101 | 1102 | // As rules passed in are generated, 1103 | // they have already been validated. 1104 | assert(results_.entry.action == accept); 1105 | _grammar.push_back(production_); 1106 | } 1107 | 1108 | void resize_captures() 1109 | { 1110 | const std::size_t old_size_ = _captures.size(); 1111 | 1112 | _captures.resize(_grammar.size() + 1); 1113 | 1114 | if (old_size_ > 0) 1115 | { 1116 | for (std::size_t i_ = old_size_ - 1, 1117 | size_ = _captures.size() - 1; i_ < size_; ++i_) 1118 | { 1119 | _captures[i_ + 1].first = _captures[i_].first + 1120 | _captures[i_].second.size(); 1121 | } 1122 | } 1123 | } 1124 | 1125 | nt_location& location(const std::size_t id_) 1126 | { 1127 | if (_nt_locations.size() <= id_) 1128 | { 1129 | _nt_locations.resize(id_ + 1); 1130 | } 1131 | 1132 | return _nt_locations[id_]; 1133 | } 1134 | }; 1135 | 1136 | typedef basic_rules rules; 1137 | typedef basic_rules wrules; 1138 | } 1139 | 1140 | #endif 1141 | -------------------------------------------------------------------------------- /include/parsertl/runtime_error.hpp: -------------------------------------------------------------------------------- 1 | // runtime_error.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_RUNTIME_ERROR_HPP 7 | #define PARSERTL_RUNTIME_ERROR_HPP 8 | 9 | #include 10 | 11 | namespace parsertl 12 | { 13 | class runtime_error : public std::runtime_error 14 | { 15 | public: 16 | runtime_error(const std::string& what_arg_) : 17 | std::runtime_error(what_arg_) 18 | { 19 | } 20 | }; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /include/parsertl/search.hpp: -------------------------------------------------------------------------------- 1 | // search.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_SEARCH_HPP 7 | #define PARSERTL_SEARCH_HPP 8 | 9 | #include "capture.hpp" 10 | #include 11 | #include "match_results.hpp" 12 | #include "parse.hpp" 13 | #include 14 | #include "token.hpp" 15 | 16 | namespace parsertl 17 | { 18 | // Forward declarations: 19 | namespace details 20 | { 21 | template 22 | void next(lexer_iterator& iter_, const sm_type& sm_, 23 | basic_match_results& results_, 24 | std::set* prod_set_, 25 | lexer_iterator& last_eoi_, 26 | basic_match_results& last_results_); 27 | template 29 | void next(lexer_iterator& iter_, const sm_type& sm_, 30 | basic_match_results& results_, lexer_iterator& last_eoi_, 31 | token_vector& productions_); 32 | template 33 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 34 | basic_match_results& results_, 35 | std::set* prod_set_); 36 | template 38 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 39 | basic_match_results& results_, token_vector& productions_, 40 | std::multimap* prod_map_); 41 | template 43 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 44 | basic_match_results& results_, token_vector& productions_, 45 | std::vector >* 46 | prod_vec_); 47 | } 48 | 49 | template 50 | bool search(lexer_iterator& iter_, lexer_iterator& end_, const sm_type& sm_, 51 | captures& captures_) 52 | { 53 | basic_match_results results_(iter_->id, sm_); 54 | // Qualify token to prevent arg dependant lookup 55 | typedef parsertl::token token; 56 | typedef typename token::token_vector token_vector; 57 | typedef std::multimap prod_map; 58 | prod_map prod_map_; 59 | bool success_ = search(iter_, end_, sm_, &prod_map_); 60 | 61 | captures_.clear(); 62 | 63 | if (success_) 64 | { 65 | typename token::iter_type last_ = iter_->first; 66 | typename prod_map::const_iterator pi_ = prod_map_.begin(); 67 | typename prod_map::const_iterator pe_ = prod_map_.end(); 68 | 69 | captures_.resize((sm_._captures.empty() ? 0 : 70 | sm_._captures.back().first + 71 | sm_._captures.back().second.size()) + 1); 72 | captures_[0].push_back(capture 73 | (iter_->first, iter_->first)); 74 | 75 | for (; pi_ != pe_; ++pi_) 76 | { 77 | if (sm_._captures.size() > pi_->first) 78 | { 79 | const typename sm_type::capture& row_ = 80 | sm_._captures[pi_->first]; 81 | 82 | if (!row_.second.empty()) 83 | { 84 | typedef typename sm_type::capture_vector capture_vector; 85 | typename capture_vector::const_iterator ti_ = 86 | row_.second.begin(); 87 | typename capture_vector::const_iterator te_ = 88 | row_.second.end(); 89 | std::size_t index_ = 0; 90 | 91 | for (; ti_ != te_; ++ti_) 92 | { 93 | const token& token1_ = pi_->second[ti_->first]; 94 | const token& token2_ = pi_->second[ti_->second]; 95 | 96 | captures_[row_.first + index_ + 1]. 97 | push_back(capture 98 | (token1_.first, token2_.second)); 99 | ++index_; 100 | } 101 | } 102 | } 103 | } 104 | 105 | pi_ = prod_map_.begin(); 106 | pe_ = prod_map_.end(); 107 | 108 | for (; pi_ != pe_; ++pi_) 109 | { 110 | typename token::iter_type sec_ = pi_->second.back().second; 111 | 112 | if (sec_ > last_) 113 | { 114 | last_ = sec_; 115 | } 116 | } 117 | 118 | captures_.front().back().second = last_; 119 | } 120 | 121 | return success_; 122 | } 123 | 124 | // Equivalent of std::search(). 125 | template 126 | bool search(lexer_iterator& iter_, lexer_iterator& end_, const sm_type& sm_, 127 | std::set* prod_set_ = 0) 128 | { 129 | bool hit_ = false; 130 | lexer_iterator curr_ = iter_; 131 | lexer_iterator last_eoi_; 132 | // results_ defined here so that allocated memory can be reused. 133 | basic_match_results results_; 134 | basic_match_results last_results_; 135 | 136 | end_ = lexer_iterator(); 137 | 138 | while (curr_ != end_) 139 | { 140 | if (prod_set_) 141 | { 142 | prod_set_->clear(); 143 | } 144 | 145 | results_.reset(curr_->id, sm_); 146 | last_results_.clear(); 147 | 148 | while (results_.entry.action != accept && 149 | results_.entry.action != error) 150 | { 151 | details::next(curr_, sm_, results_, prod_set_, last_eoi_, 152 | last_results_); 153 | } 154 | 155 | hit_ = results_.entry.action == accept; 156 | 157 | if (hit_) 158 | { 159 | end_ = curr_; 160 | break; 161 | } 162 | else if (last_eoi_->id != 0) 163 | { 164 | lexer_iterator eoi_; 165 | 166 | hit_ = details::parse(eoi_, sm_, last_results_, prod_set_); 167 | 168 | if (hit_) 169 | { 170 | end_ = last_eoi_; 171 | break; 172 | } 173 | } 174 | 175 | if (iter_->id != 0) 176 | ++iter_; 177 | 178 | curr_ = iter_; 179 | } 180 | 181 | return hit_; 182 | } 183 | 184 | template 185 | bool search(lexer_iterator& iter_, lexer_iterator& end_, const sm_type& sm_, 186 | std::multimap* 187 | prod_map_ = 0) 188 | { 189 | bool hit_ = false; 190 | lexer_iterator curr_ = iter_; 191 | lexer_iterator last_eoi_; 192 | // results_ and productions_ defined here so that 193 | // allocated memory can be reused. 194 | basic_match_results results_; 195 | token_vector productions_; 196 | 197 | end_ = lexer_iterator(); 198 | 199 | while (curr_ != end_) 200 | { 201 | if (prod_map_) 202 | { 203 | prod_map_->clear(); 204 | } 205 | 206 | results_.reset(curr_->id, sm_); 207 | productions_.clear(); 208 | 209 | while (results_.entry.action != accept && 210 | results_.entry.action != error) 211 | { 212 | details::next(curr_, sm_, results_, last_eoi_, productions_); 213 | } 214 | 215 | hit_ = results_.entry.action == accept; 216 | 217 | if (hit_) 218 | { 219 | if (prod_map_) 220 | { 221 | lexer_iterator again_(iter_->first, last_eoi_->first, 222 | iter_.sm()); 223 | 224 | results_.reset(iter_->id, sm_); 225 | productions_.clear(); 226 | details::parse(again_, sm_, results_, productions_, 227 | prod_map_); 228 | } 229 | 230 | end_ = curr_; 231 | break; 232 | } 233 | else if (last_eoi_->id != 0) 234 | { 235 | lexer_iterator again_(iter_->first, last_eoi_->first, 236 | iter_.sm()); 237 | 238 | results_.reset(iter_->id, sm_); 239 | productions_.clear(); 240 | hit_ = details::parse(again_, sm_, results_, productions_, 241 | prod_map_); 242 | 243 | if (hit_) 244 | { 245 | end_ = last_eoi_; 246 | break; 247 | } 248 | } 249 | 250 | if (iter_->id != 0) 251 | ++iter_; 252 | 253 | curr_ = iter_; 254 | } 255 | 256 | return hit_; 257 | } 258 | 259 | template 260 | bool search(lexer_iterator& iter_, lexer_iterator& end_, const sm_type& sm_, 261 | std::vector >* 262 | prod_vec_ = 0) 263 | { 264 | bool hit_ = false; 265 | lexer_iterator curr_ = iter_; 266 | lexer_iterator last_eoi_; 267 | // results_ and productions_ defined here so that 268 | // allocated memory can be reused. 269 | basic_match_results results_; 270 | token_vector productions_; 271 | 272 | end_ = lexer_iterator(); 273 | 274 | while (curr_ != end_) 275 | { 276 | if (prod_vec_) 277 | { 278 | prod_vec_->clear(); 279 | } 280 | 281 | results_.reset(curr_->id, sm_); 282 | productions_.clear(); 283 | 284 | while (results_.entry.action != accept && 285 | results_.entry.action != error) 286 | { 287 | details::next(curr_, sm_, results_, last_eoi_, productions_); 288 | } 289 | 290 | hit_ = results_.entry.action == accept; 291 | 292 | if (hit_) 293 | { 294 | if (prod_vec_) 295 | { 296 | lexer_iterator again_(iter_->first, last_eoi_->first, 297 | iter_.sm()); 298 | 299 | results_.reset(iter_->id, sm_); 300 | productions_.clear(); 301 | details::parse(again_, sm_, results_, productions_, 302 | prod_vec_); 303 | } 304 | 305 | end_ = curr_; 306 | break; 307 | } 308 | else if (last_eoi_->id != 0) 309 | { 310 | lexer_iterator again_(iter_->first, last_eoi_->first, 311 | iter_.sm()); 312 | 313 | results_.reset(iter_->id, sm_); 314 | productions_.clear(); 315 | hit_ = details::parse(again_, sm_, results_, productions_, 316 | prod_vec_); 317 | 318 | if (hit_) 319 | { 320 | end_ = last_eoi_; 321 | break; 322 | } 323 | } 324 | 325 | if (iter_->id != 0) 326 | ++iter_; 327 | 328 | curr_ = iter_; 329 | } 330 | 331 | return hit_; 332 | } 333 | 334 | namespace details 335 | { 336 | template 337 | void next(lexer_iterator& iter_, const sm_type& sm_, 338 | basic_match_results& results_, 339 | std::set* prod_set_, 340 | lexer_iterator& last_eoi_, 341 | basic_match_results& last_results_) 342 | { 343 | switch (results_.entry.action) 344 | { 345 | case shift: 346 | { 347 | const typename sm_type::entry eoi_ = 348 | sm_.at(results_.entry.param); 349 | 350 | results_.stack.push_back(results_.entry.param); 351 | 352 | if (iter_->id != 0) 353 | ++iter_; 354 | 355 | results_.token_id = iter_->id; 356 | 357 | if (results_.token_id == lexer_iterator::value_type::npos()) 358 | { 359 | results_.entry.action = error; 360 | results_.entry.param = unknown_token; 361 | } 362 | else 363 | { 364 | results_.entry = 365 | sm_.at(results_.entry.param, results_.token_id); 366 | } 367 | 368 | if (eoi_.action != error) 369 | { 370 | last_eoi_ = iter_; 371 | last_results_.stack = results_.stack; 372 | last_results_.token_id = 0; 373 | last_results_.entry = eoi_; 374 | } 375 | 376 | break; 377 | } 378 | case reduce: 379 | { 380 | const std::size_t size_ = 381 | sm_._rules[results_.entry.param]._rhs.size(); 382 | 383 | if (prod_set_) 384 | { 385 | prod_set_->insert(results_.entry.param); 386 | } 387 | 388 | if (size_) 389 | { 390 | results_.stack.resize(results_.stack.size() - size_); 391 | } 392 | 393 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 394 | results_.entry = 395 | sm_.at(results_.stack.back(), results_.token_id); 396 | break; 397 | } 398 | case go_to: 399 | results_.stack.push_back(results_.entry.param); 400 | results_.token_id = iter_->id; 401 | results_.entry = 402 | sm_.at(results_.stack.back(), results_.token_id); 403 | break; 404 | case accept: 405 | { 406 | const std::size_t size_ = 407 | sm_._rules[results_.entry.param]._rhs.size(); 408 | 409 | if (size_) 410 | { 411 | results_.stack.resize(results_.stack.size() - size_); 412 | } 413 | 414 | break; 415 | } 416 | default: 417 | // error 418 | break; 419 | } 420 | } 421 | 422 | template 424 | void next(lexer_iterator& iter_, const sm_type& sm_, 425 | basic_match_results& results_, lexer_iterator& last_eoi_, 426 | token_vector& productions_) 427 | { 428 | switch (results_.entry.action) 429 | { 430 | case shift: 431 | { 432 | const typename sm_type::entry eoi_ = 433 | sm_.at(results_.entry.param); 434 | 435 | results_.stack.push_back(results_.entry.param); 436 | productions_.push_back(typename token_vector:: 437 | value_type(iter_->id, iter_->first, iter_->second)); 438 | 439 | if (iter_->id != 0) 440 | ++iter_; 441 | 442 | results_.token_id = iter_->id; 443 | 444 | if (results_.token_id == lexer_iterator::value_type::npos()) 445 | { 446 | results_.entry.action = error; 447 | results_.entry.param = unknown_token; 448 | } 449 | else 450 | { 451 | results_.entry = 452 | sm_.at(results_.entry.param, results_.token_id); 453 | } 454 | 455 | if (eoi_.action != error) 456 | { 457 | last_eoi_ = iter_; 458 | } 459 | 460 | break; 461 | } 462 | case reduce: 463 | { 464 | const std::size_t size_ = 465 | sm_._rules[results_.entry.param]._rhs.size(); 466 | token token_; 467 | 468 | if (size_) 469 | { 470 | token_.first = (productions_.end() - size_)->first; 471 | token_.second = productions_.back().second; 472 | results_.stack.resize(results_.stack.size() - size_); 473 | productions_.resize(productions_.size() - size_); 474 | } 475 | else 476 | { 477 | if (productions_.empty()) 478 | { 479 | token_.first = token_.second = iter_->first; 480 | } 481 | else 482 | { 483 | token_.first = token_.second = 484 | productions_.back().second; 485 | } 486 | } 487 | 488 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 489 | results_.entry = 490 | sm_.at(results_.stack.back(), results_.token_id); 491 | token_.id = results_.token_id; 492 | productions_.push_back(token_); 493 | break; 494 | } 495 | case go_to: 496 | results_.stack.push_back(results_.entry.param); 497 | results_.token_id = iter_->id; 498 | results_.entry = 499 | sm_.at(results_.stack.back(), results_.token_id); 500 | break; 501 | case accept: 502 | { 503 | const std::size_t size_ = 504 | sm_._rules[results_.entry.param]._rhs.size(); 505 | 506 | if (size_) 507 | { 508 | results_.stack.resize(results_.stack.size() - size_); 509 | } 510 | 511 | break; 512 | } 513 | default: 514 | // error 515 | break; 516 | } 517 | } 518 | 519 | template 520 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 521 | basic_match_results& results_, 522 | std::set* prod_set_) 523 | { 524 | while (results_.entry.action != error) 525 | { 526 | switch (results_.entry.action) 527 | { 528 | case shift: 529 | results_.stack.push_back(results_.entry.param); 530 | 531 | if (iter_->id != 0) 532 | ++iter_; 533 | 534 | results_.token_id = iter_->id; 535 | 536 | if (results_.token_id == lexer_iterator::value_type::npos()) 537 | { 538 | results_.entry.action = error; 539 | results_.entry.param = unknown_token; 540 | } 541 | else 542 | { 543 | results_.entry = 544 | sm_.at(results_.stack.back(), results_.token_id); 545 | } 546 | 547 | break; 548 | case reduce: 549 | { 550 | const std::size_t size_ = 551 | sm_._rules[results_.entry.param]._rhs.size(); 552 | 553 | if (prod_set_) 554 | { 555 | prod_set_->insert(results_.entry.param); 556 | } 557 | 558 | if (size_) 559 | { 560 | results_.stack.resize(results_.stack.size() - size_); 561 | } 562 | 563 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 564 | results_.entry = 565 | sm_.at(results_.stack.back(), results_.token_id); 566 | break; 567 | } 568 | case go_to: 569 | results_.stack.push_back(results_.entry.param); 570 | results_.token_id = iter_->id; 571 | results_.entry = 572 | sm_.at(results_.stack.back(), results_.token_id); 573 | break; 574 | default: 575 | // error 576 | break; 577 | } 578 | 579 | if (results_.entry.action == accept) 580 | { 581 | const std::size_t size_ = 582 | sm_._rules[results_.entry.param]._rhs.size(); 583 | 584 | if (size_) 585 | { 586 | results_.stack.resize(results_.stack.size() - size_); 587 | } 588 | 589 | break; 590 | } 591 | } 592 | 593 | return results_.entry.action == accept; 594 | } 595 | 596 | template 598 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 599 | basic_match_results& results_, token_vector& productions_, 600 | std::multimap* prod_map_) 601 | { 602 | while (results_.entry.action != error) 603 | { 604 | switch (results_.entry.action) 605 | { 606 | case shift: 607 | results_.stack.push_back(results_.entry.param); 608 | productions_.push_back(typename token_vector:: 609 | value_type(iter_->id, iter_->first, iter_->second)); 610 | 611 | if (iter_->id != 0) 612 | ++iter_; 613 | 614 | results_.token_id = iter_->id; 615 | 616 | if (results_.token_id == lexer_iterator::value_type::npos()) 617 | { 618 | results_.entry.action = error; 619 | results_.entry.param = unknown_token; 620 | } 621 | else 622 | { 623 | results_.entry = 624 | sm_.at(results_.stack.back(), results_.token_id); 625 | } 626 | 627 | break; 628 | case reduce: 629 | { 630 | const std::size_t size_ = 631 | sm_._rules[results_.entry.param]._rhs.size(); 632 | token token_; 633 | 634 | if (size_) 635 | { 636 | if (prod_map_) 637 | { 638 | prod_map_->insert(std::pair 639 | 640 | (results_.entry.param, 641 | token_vector(productions_.end() - size_, 642 | productions_.end()))); 643 | } 644 | 645 | token_.first = (productions_.end() - size_)->first; 646 | token_.second = productions_.back().second; 647 | results_.stack.resize(results_.stack.size() - size_); 648 | productions_.resize(productions_.size() - size_); 649 | } 650 | else 651 | { 652 | if (productions_.empty()) 653 | { 654 | token_.first = token_.second = iter_->first; 655 | } 656 | else 657 | { 658 | token_.first = token_.second = 659 | productions_.back().second; 660 | } 661 | } 662 | 663 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 664 | results_.entry = 665 | sm_.at(results_.stack.back(), results_.token_id); 666 | token_.id = results_.token_id; 667 | productions_.push_back(token_); 668 | break; 669 | } 670 | case go_to: 671 | results_.stack.push_back(results_.entry.param); 672 | results_.token_id = iter_->id; 673 | results_.entry = 674 | sm_.at(results_.stack.back(), results_.token_id); 675 | break; 676 | default: 677 | // accept 678 | // error 679 | break; 680 | } 681 | 682 | if (results_.entry.action == accept) 683 | { 684 | const std::size_t size_ = 685 | sm_._rules[results_.entry.param]._rhs.size(); 686 | 687 | if (size_) 688 | { 689 | results_.stack.resize(results_.stack.size() - size_); 690 | } 691 | 692 | break; 693 | } 694 | } 695 | 696 | return results_.entry.action == accept; 697 | } 698 | 699 | template 701 | bool parse(lexer_iterator& iter_, const sm_type& sm_, 702 | basic_match_results& results_, token_vector& productions_, 703 | std::vector >* 704 | prod_vec_) 705 | { 706 | while (results_.entry.action != error) 707 | { 708 | switch (results_.entry.action) 709 | { 710 | case shift: 711 | results_.stack.push_back(results_.entry.param); 712 | productions_.emplace_back(iter_->id, iter_->first, 713 | iter_->second); 714 | 715 | if (iter_->id != 0) 716 | ++iter_; 717 | 718 | results_.token_id = iter_->id; 719 | 720 | if (results_.token_id == lexer_iterator::value_type::npos()) 721 | { 722 | results_.entry.action = error; 723 | results_.entry.param = 724 | static_cast 725 | (unknown_token); 726 | } 727 | else 728 | { 729 | results_.entry = 730 | sm_.at(results_.stack.back(), results_.token_id); 731 | } 732 | 733 | break; 734 | case reduce: 735 | { 736 | const std::size_t size_ = 737 | sm_._rules[results_.entry.param]._rhs.size(); 738 | token token_; 739 | 740 | if (size_) 741 | { 742 | if (prod_vec_) 743 | { 744 | prod_vec_->emplace_back(results_.entry. 745 | param, token_vector(productions_.end() - size_, 746 | productions_.end())); 747 | } 748 | 749 | token_.first = (productions_.end() - size_)->first; 750 | token_.second = productions_.back().second; 751 | results_.stack.resize(results_.stack.size() - size_); 752 | productions_.resize(productions_.size() - size_); 753 | } 754 | else 755 | { 756 | if (productions_.empty()) 757 | { 758 | token_.first = token_.second = iter_->first; 759 | } 760 | else 761 | { 762 | token_.first = token_.second = 763 | productions_.back().second; 764 | } 765 | } 766 | 767 | results_.token_id = sm_._rules[results_.entry.param]._lhs; 768 | results_.entry = 769 | sm_.at(results_.stack.back(), results_.token_id); 770 | token_.id = results_.token_id; 771 | productions_.push_back(token_); 772 | break; 773 | } 774 | case go_to: 775 | results_.stack.push_back(results_.entry.param); 776 | results_.token_id = iter_->id; 777 | results_.entry = 778 | sm_.at(results_.stack.back(), results_.token_id); 779 | break; 780 | default: 781 | // accept 782 | // error 783 | break; 784 | } 785 | 786 | if (results_.entry.action == accept) 787 | { 788 | const std::size_t size_ = sm_._rules[results_.entry.param]. 789 | _rhs.size(); 790 | 791 | if (size_) 792 | { 793 | results_.stack.resize(results_.stack.size() - size_); 794 | } 795 | 796 | break; 797 | } 798 | } 799 | 800 | return results_.entry.action == accept; 801 | } 802 | } 803 | } 804 | 805 | #endif 806 | -------------------------------------------------------------------------------- /include/parsertl/search_iterator.hpp: -------------------------------------------------------------------------------- 1 | // iterator.hpp 2 | // Copyright (c) 2018-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_SEARCH_ITERATOR_HPP 7 | #define PARSERTL_SEARCH_ITERATOR_HPP 8 | 9 | #include "capture.hpp" 10 | #include 11 | #include "match_results.hpp" 12 | #include "search.hpp" 13 | 14 | namespace parsertl 15 | { 16 | template 18 | class search_iterator 19 | { 20 | public: 21 | typedef typename lexer_iterator::value_type::iter_type iter_type; 22 | typedef std::vector > > results; 23 | typedef results value_type; 24 | typedef ptrdiff_t difference_type; 25 | typedef const value_type* pointer; 26 | typedef const value_type& reference; 27 | typedef std::forward_iterator_tag iterator_category; 28 | 29 | search_iterator() : 30 | _sm(0) 31 | { 32 | } 33 | 34 | search_iterator(const lexer_iterator& iter_, const sm_type& sm_) : 35 | _iter(iter_), 36 | _sm(&sm_) 37 | { 38 | _captures.push_back(std::vector >()); 39 | _captures.back().push_back(capture 40 | (iter_->first, iter_->first)); 41 | lookup(); 42 | } 43 | 44 | search_iterator& operator ++() 45 | { 46 | lookup(); 47 | return *this; 48 | } 49 | 50 | search_iterator operator ++(int) 51 | { 52 | search_iterator iter_ = *this; 53 | 54 | lookup(); 55 | return iter_; 56 | } 57 | 58 | const value_type& operator *() const 59 | { 60 | return _captures; 61 | } 62 | 63 | const value_type* operator ->() const 64 | { 65 | return &_captures; 66 | } 67 | 68 | bool operator ==(const search_iterator& rhs_) const 69 | { 70 | return _sm == rhs_._sm && 71 | (_sm == 0 ? true : 72 | _captures == rhs_._captures); 73 | } 74 | 75 | bool operator !=(const search_iterator& rhs_) const 76 | { 77 | return !(*this == rhs_); 78 | } 79 | 80 | private: 81 | lexer_iterator _iter; 82 | results _captures; 83 | const sm_type* _sm; 84 | 85 | void lookup() 86 | { 87 | lexer_iterator end; 88 | 89 | _captures.clear(); 90 | 91 | if (search(_iter, end, *_sm, _captures)) 92 | { 93 | _iter = end; 94 | } 95 | else 96 | { 97 | _sm = 0; 98 | } 99 | } 100 | }; 101 | 102 | typedef search_iterator 103 | ssearch_iterator; 104 | typedef search_iterator 105 | csearch_iterator; 106 | typedef search_iterator 107 | wssearch_iterator; 108 | typedef search_iterator 109 | wcsearch_iterator; 110 | } 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /include/parsertl/serialise.hpp: -------------------------------------------------------------------------------- 1 | // serialise.hpp 2 | // Copyright (c) 2007-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_SERIALISE_HPP 7 | #define PARSERTL_SERIALISE_HPP 8 | 9 | #include "runtime_error.hpp" 10 | #include 11 | #include "state_machine.hpp" 12 | 13 | namespace parsertl 14 | { 15 | template 16 | void save(const basic_state_machine& sm_, stream& stream_) 17 | { 18 | typedef basic_state_machine sm_type; 19 | 20 | // Version number 21 | stream_ << 1 << '\n'; 22 | stream_ << sizeof(id_type) << '\n'; 23 | stream_ << sm_._columns << '\n'; 24 | stream_ << sm_._rows << '\n'; 25 | stream_ << sm_._rules.size() << '\n'; 26 | 27 | for (std::size_t idx_ = 0, size_ = sm_._rules.size(); 28 | idx_ < size_; ++idx_) 29 | { 30 | const typename sm_type::id_type_vector_pair& rule_ = 31 | sm_._rules[idx_]; 32 | 33 | stream_ << rule_._lhs << '\n'; 34 | lexertl::detail::output_vec(rule_._rhs, stream_); 35 | } 36 | 37 | stream_ << sm_._captures.size() << '\n'; 38 | 39 | for (std::size_t idx_ = 0, size_ = sm_._captures.size(); 40 | idx_ < size_; ++idx_) 41 | { 42 | const typename sm_type::capture& capture_ = sm_._captures[idx_]; 43 | 44 | stream_ << capture_.first << '\n'; 45 | stream_ << capture_.second.size() << '\n'; 46 | 47 | for (std::size_t idx2_ = 0, size2_ = capture_.second.size(); 48 | idx2_ < size2_; ++idx2_) 49 | { 50 | const typename sm_type::id_type_pair& pair_ = 51 | capture_.second[idx2_]; 52 | 53 | stream_ << pair_.first << ' ' << pair_.second << '\n'; 54 | } 55 | } 56 | 57 | stream_ << sm_._table.size() << '\n'; 58 | 59 | for (std::size_t idx_ = 0, size_ = sm_._table.size(); 60 | idx_ < size_; ++idx_) 61 | { 62 | const typename sm_type::pair_vector& vec_ = 63 | sm_._table[idx_]; 64 | 65 | stream_ << vec_.size() << '\n'; 66 | 67 | for (std::size_t idx2_ = 0, size2_ = vec_.size(); 68 | idx2_ < size2_; ++idx2_) 69 | { 70 | const typename sm_type::state_pair& pair_ = vec_[idx2_]; 71 | 72 | stream_ << pair_._id << ' '; 73 | stream_ << static_cast(pair_._entry.action) << ' '; 74 | stream_ << pair_._entry.param << '\n'; 75 | } 76 | } 77 | } 78 | 79 | template 80 | void load(stream& stream_, basic_state_machine& sm_) 81 | { 82 | typedef basic_state_machine sm_type; 83 | std::size_t num_ = 0; 84 | 85 | sm_.clear(); 86 | // Version 87 | stream_ >> num_; 88 | // sizeof(id_type) 89 | stream_ >> num_; 90 | 91 | if (num_ != sizeof(id_type)) 92 | throw runtime_error("id_type mismatch in parsertl::load()"); 93 | 94 | stream_ >> sm_._columns; 95 | stream_ >> sm_._rows; 96 | stream_ >> num_; 97 | 98 | for (std::size_t idx_ = 0; idx_ < num_; ++idx_) 99 | { 100 | sm_._rules.push_back(typename sm_type::id_type_vector_pair()); 101 | 102 | typename sm_type::id_type_vector_pair& rule_ = sm_._rules.back(); 103 | 104 | stream_ >> rule_._lhs; 105 | lexertl::detail::input_vec(stream_, rule_._rhs); 106 | } 107 | 108 | stream_ >> num_; 109 | 110 | for (std::size_t idx_ = 0, rows_ = num_; idx_ < rows_; ++idx_) 111 | { 112 | sm_._captures.push_back(typename sm_type::capture()); 113 | 114 | typename sm_type::capture& capture_ = sm_._captures.back(); 115 | 116 | stream_ >> capture_.first; 117 | stream_ >> num_; 118 | capture_.second.reserve(num_); 119 | 120 | for (std::size_t idx2_ = 0, entries_ = num_; 121 | idx2_ < entries_; ++idx2_) 122 | { 123 | capture_.second.push_back(typename sm_type::id_type_pair()); 124 | 125 | typename sm_type::id_type_pair& pair_ = capture_.second.back(); 126 | 127 | stream_ >> num_; 128 | pair_.first = static_cast(num_); 129 | stream_ >> num_; 130 | pair_.second = static_cast(num_); 131 | } 132 | } 133 | 134 | stream_ >> num_; 135 | sm_._table.reserve(num_); 136 | 137 | for (std::size_t idx_ = 0, rows_ = num_; idx_ < rows_; ++idx_) 138 | { 139 | sm_._table.push_back(typename sm_type::pair_vector()); 140 | 141 | typename sm_type::pair_vector& vec_ = sm_._table.back(); 142 | 143 | stream_ >> num_; 144 | vec_.reserve(num_); 145 | 146 | for (std::size_t idx2_ = 0, entries_ = num_; 147 | idx2_ < entries_; ++idx2_) 148 | { 149 | vec_.push_back(typename sm_type::state_pair()); 150 | 151 | typename sm_type::state_pair& pair_ = vec_.back(); 152 | 153 | stream_ >> num_; 154 | pair_._id = static_cast(num_); 155 | stream_ >> num_; 156 | pair_._entry.action = static_cast(num_); 157 | stream_ >> num_; 158 | pair_._entry.param = static_cast(num_); 159 | } 160 | } 161 | } 162 | } 163 | 164 | #endif 165 | -------------------------------------------------------------------------------- /include/parsertl/state_machine.hpp: -------------------------------------------------------------------------------- 1 | // state_machine.hpp 2 | // Copyright (c) 2014-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_STATE_MACHINE_HPP 7 | #define PARSERTL_STATE_MACHINE_HPP 8 | 9 | #include 10 | #include 11 | #include "enums.hpp" 12 | #include 13 | #include 14 | 15 | namespace parsertl 16 | { 17 | template 18 | struct base_state_machine 19 | { 20 | typedef id_ty id_type; 21 | typedef std::pair id_type_pair; 22 | typedef std::vector capture_vector; 23 | typedef std::pair capture; 24 | typedef std::deque captures_deque; 25 | typedef std::vector id_type_vector; 26 | 27 | struct id_type_vector_pair 28 | { 29 | id_type _lhs; 30 | id_type_vector _rhs; 31 | 32 | id_type_vector_pair() : 33 | _lhs(0) 34 | { 35 | } 36 | }; 37 | 38 | typedef std::deque rules; 39 | 40 | std::size_t _columns; 41 | std::size_t _rows; 42 | rules _rules; 43 | captures_deque _captures; 44 | 45 | // If you get a compile error here you have 46 | // failed to define an unsigned id type. 47 | lexertl::compile_assert<(static_cast(~0) > 0)> _valid_id_type; 48 | 49 | struct entry 50 | { 51 | // Qualify action to prevent compilation error 52 | parsertl::action action; 53 | id_type param; 54 | 55 | entry() : 56 | // Qualify action to prevent compilation error 57 | action(parsertl::error), 58 | param(syntax_error) 59 | { 60 | } 61 | 62 | // Qualify action to prevent compilation error 63 | entry(const parsertl::action action_, const id_type param_) : 64 | action(action_), 65 | param(param_) 66 | { 67 | } 68 | 69 | void clear() 70 | { 71 | // Qualify action to prevent compilation error 72 | action = parsertl::error; 73 | param = syntax_error; 74 | } 75 | 76 | bool operator ==(const entry& rhs_) const 77 | { 78 | return action == rhs_.action && param == rhs_.param; 79 | } 80 | }; 81 | 82 | base_state_machine() : 83 | _columns(0), 84 | _rows(0) 85 | { 86 | } 87 | 88 | // Just in case someone wants to use a pointer to the base 89 | virtual ~base_state_machine() 90 | { 91 | } 92 | 93 | virtual void clear() 94 | { 95 | _columns = _rows = 0; 96 | _rules.clear(); 97 | _captures.clear(); 98 | } 99 | }; 100 | 101 | // Uses a vector of vectors for the state machine 102 | template 103 | struct basic_state_machine : base_state_machine 104 | { 105 | public: 106 | typedef base_state_machine base_sm; 107 | typedef id_ty id_type; 108 | typedef typename base_sm::entry entry; 109 | 110 | struct state_pair 111 | { 112 | id_type _id; 113 | entry _entry; 114 | 115 | state_pair() : 116 | _id(0) 117 | { 118 | } 119 | 120 | state_pair(const id_type id_, const entry& entry_) : 121 | _id(id_), 122 | _entry(entry_) 123 | { 124 | } 125 | }; 126 | 127 | typedef std::vector pair_vector; 128 | typedef std::vector table; 129 | 130 | table _table; 131 | 132 | // No need to specify constructor. 133 | virtual ~basic_state_machine() 134 | { 135 | } 136 | 137 | virtual void clear() 138 | { 139 | base_sm::clear(); 140 | _table.clear(); 141 | } 142 | 143 | bool empty() const 144 | { 145 | return _table.empty(); 146 | } 147 | 148 | entry at(const std::size_t state_) const 149 | { 150 | const pair_vector& s_ = _table[state_]; 151 | typename pair_vector::const_iterator iter_ = 152 | std::find_if(s_.begin(), s_.end(), pred(0)); 153 | 154 | if (iter_ == s_.end()) 155 | return entry(); 156 | else 157 | return iter_->_entry; 158 | } 159 | 160 | entry at(const std::size_t state_, const std::size_t token_id_) const 161 | { 162 | const pair_vector& s_ = _table[state_]; 163 | typename pair_vector::const_iterator iter_ = 164 | std::find_if(s_.begin(), s_.end(), pred(token_id_)); 165 | 166 | if (iter_ == s_.end()) 167 | return entry(); 168 | else 169 | return iter_->_entry; 170 | } 171 | 172 | void set(const std::size_t state_, const std::size_t token_id_, 173 | const entry& entry_) 174 | { 175 | pair_vector& s_ = _table[state_]; 176 | typename pair_vector::iterator iter_ = std::find_if(s_.begin(), 177 | s_.end(), pred(token_id_)); 178 | 179 | if (iter_ == s_.end()) 180 | s_.push_back(state_pair(static_cast 181 | (token_id_), entry_)); 182 | else 183 | iter_->_entry = entry_; 184 | } 185 | 186 | void push() 187 | { 188 | _table.resize(base_sm::_rows); 189 | } 190 | 191 | private: 192 | struct pred 193 | { 194 | std::size_t _token_id; 195 | 196 | pred(const std::size_t token_id_) : 197 | _token_id(token_id_) 198 | { 199 | } 200 | 201 | bool operator()(const state_pair& pair) 202 | { 203 | return _token_id == pair._id; 204 | } 205 | }; 206 | }; 207 | 208 | // Uses uncompressed 2d array for state machine 209 | template 210 | struct basic_uncompressed_state_machine : base_state_machine 211 | { 212 | typedef base_state_machine base_sm; 213 | typedef id_ty id_type; 214 | typedef typename base_sm::entry entry; 215 | typedef std::vector table; 216 | 217 | table _table; 218 | 219 | // No need to specify constructor. 220 | virtual ~basic_uncompressed_state_machine() 221 | { 222 | }; 223 | 224 | virtual void clear() 225 | { 226 | base_sm::clear(); 227 | _table.clear(); 228 | } 229 | 230 | bool empty() const 231 | { 232 | return _table.empty(); 233 | } 234 | 235 | entry at(const std::size_t state_) const 236 | { 237 | return _table[state_ * base_sm::_columns]; 238 | } 239 | 240 | entry at(const std::size_t state_, const std::size_t token_id_) const 241 | { 242 | return _table[state_ * base_sm::_columns + token_id_]; 243 | } 244 | 245 | void set(const std::size_t state_, const std::size_t token_id_, 246 | const entry& entry_) 247 | { 248 | _table[state_ * base_sm::_columns + token_id_] = entry_; 249 | } 250 | 251 | void push() 252 | { 253 | _table.resize(base_sm::_columns * base_sm::_rows); 254 | } 255 | }; 256 | 257 | typedef basic_state_machine state_machine; 258 | typedef basic_uncompressed_state_machine 259 | uncompressed_state_machine; 260 | } 261 | 262 | #endif 263 | -------------------------------------------------------------------------------- /include/parsertl/token.hpp: -------------------------------------------------------------------------------- 1 | // token.hpp 2 | // Copyright (c) 2017-2023 Ben Hanson (http://www.benhanson.net/) 3 | // 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | #ifndef PARSERTL_TOKEN_HPP 7 | #define PARSERTL_TOKEN_HPP 8 | 9 | #include 10 | #include 11 | 12 | namespace parsertl 13 | { 14 | template 15 | struct token 16 | { 17 | typedef typename iterator::value_type::char_type char_type; 18 | typedef typename iterator::value_type::iter_type iter_type; 19 | typedef std::basic_string string; 20 | typedef std::vector > token_vector; 21 | std::size_t id; 22 | iter_type first; 23 | iter_type second; 24 | 25 | token() : 26 | id(static_cast(~0)), 27 | first(), 28 | second() 29 | { 30 | } 31 | 32 | token(const std::size_t id_, const iter_type& first_, 33 | const iter_type& second_) : 34 | id(id_), 35 | first(first_), 36 | second(second_) 37 | { 38 | } 39 | 40 | string str() const 41 | { 42 | return string(first, second); 43 | } 44 | 45 | string substr(const std::size_t soffset_, 46 | const std::size_t eoffset_) const 47 | { 48 | return string(first + soffset_, second - eoffset_); 49 | } 50 | 51 | std::size_t length() const 52 | { 53 | return second - first; 54 | } 55 | }; 56 | } 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /tests/include_test/bison_lookup.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/bison_lookup.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/debug.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/debug.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/dfa.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/dfa.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/ebnf_tables.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/ebnf_tables.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/enums.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/ebnf_tables.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/generator.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/generator.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/include_test.cpp: -------------------------------------------------------------------------------- 1 | int main() 2 | { 3 | } 4 | -------------------------------------------------------------------------------- /tests/include_test/include_test.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.6.33723.286 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "include_test", "include_test.vcxproj", "{9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Debug|x64.ActiveCfg = Debug|x64 17 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Debug|x64.Build.0 = Debug|x64 18 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Debug|x86.ActiveCfg = Debug|Win32 19 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Debug|x86.Build.0 = Debug|Win32 20 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Release|x64.ActiveCfg = Release|x64 21 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Release|x64.Build.0 = Release|x64 22 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Release|x86.ActiveCfg = Release|Win32 23 | {9F0B16BC-7105-406A-BAE7-7DA1F063B3DF}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {6D7B47DD-EA7C-4917-BD06-E0E1BF4C0892} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /tests/include_test/include_test.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 16.0 23 | Win32Proj 24 | {9f0b16bc-7105-406a-bae7-7da1f063b3df} 25 | includetest 26 | 10.0 27 | 28 | 29 | 30 | Application 31 | true 32 | v143 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v143 39 | true 40 | Unicode 41 | 42 | 43 | Application 44 | true 45 | v143 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v143 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | true 77 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 78 | true 79 | 80 | 81 | Console 82 | true 83 | 84 | 85 | 86 | 87 | Level3 88 | true 89 | true 90 | true 91 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 92 | true 93 | 94 | 95 | Console 96 | true 97 | true 98 | true 99 | 100 | 101 | 102 | 103 | Level3 104 | true 105 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 106 | true 107 | 108 | 109 | Console 110 | true 111 | 112 | 113 | 114 | 115 | Level3 116 | true 117 | true 118 | true 119 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 120 | true 121 | 122 | 123 | Console 124 | true 125 | true 126 | true 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /tests/include_test/include_test.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | Source Files 50 | 51 | 52 | Source Files 53 | 54 | 55 | Source Files 56 | 57 | 58 | Source Files 59 | 60 | 61 | Source Files 62 | 63 | 64 | Source Files 65 | 66 | 67 | Source Files 68 | 69 | 70 | Source Files 71 | 72 | 73 | Source Files 74 | 75 | 76 | Source Files 77 | 78 | 79 | Source Files 80 | 81 | 82 | Source Files 83 | 84 | 85 | -------------------------------------------------------------------------------- /tests/include_test/include_test.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/include_test/iterator.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/iterator.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/lookup.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/lookup.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/match.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/match.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/match_results.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/match_results.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/narrow.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/narrow.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/nt_info.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/nt_info.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/parse.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/parse.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/read_bison.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/read_bison.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/rules.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/rules.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/runtime_error.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/runtime_error.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/search.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/search.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/search_iterator.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/search_iterator.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/serialise.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/serialise.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/state_machine.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/state_machine.hpp" 2 | 3 | -------------------------------------------------------------------------------- /tests/include_test/token.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/parsertl/token.hpp" 2 | 3 | --------------------------------------------------------------------------------