├── LICENSE ├── c_xprs ├── utest.cpp └── c_xprs.h ├── formula_compiler ├── code_lib.cpp ├── main.cpp ├── byte_code.h ├── parser.cpp ├── code_run.cpp ├── code_gen.cpp └── Formula Compiler using C++ BNF-like EDSL.html ├── tutorial ├── jsontest.cpp └── jsonlite.h ├── examples ├── cmd.cpp ├── cfg.cpp ├── calc.cpp └── ini.cpp ├── README.md ├── getting-started.md └── bnflite.h /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Alexander A Semjonov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /c_xprs/utest.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************\ 2 | * Unit test of C expression parser and calculator lib(based on BNFlite) * 3 | * Copyright (c) 2018 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 4 | * * 5 | * This code is free software: you can redistribute it and/or modify it * 6 | * under the terms of the GNU Lesser General Public License as published * 7 | * by the Free Software Foundation, either version 3 of the License, * 8 | * or (at your option) any later version. * 9 | * * 10 | * This program is distributed in the hope that it will be useful, * 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 | * GNU General Public License for more details. * 14 | * * 15 | * You should have received a copy of the GNU General Public License * 16 | * along with this program. If not, see . * 17 | \*************************************************************************/ 18 | 19 | 20 | #include 21 | #include 22 | #include "c_xprs.h" 23 | 24 | 25 | C_Xprs gramma; 26 | 27 | #define TEST_FORCE_ERR(a) test_c_xprs(#a, -1); 28 | #define TEST_C_XPRS(a) test_c_xprs(#a, (a)); 29 | 30 | static void test_c_xprs(const char *expression, int test_result) 31 | { 32 | int value; 33 | if (gramma.Evaluate(expression, value)) 34 | std::cout << "Passed: " << expression << " is " << value <<"; Test: " 35 | << test_result << (value == test_result?" OK": " Error") << "\n"; 36 | else if (test_result == -1) 37 | std::cout << "Passed with expected error, expression: " << expression << "\n"; 38 | else 39 | std::cout << "Not Passed: problem with expression: " << expression << "\n"; 40 | } 41 | 42 | 43 | int main() 44 | { 45 | TEST_FORCE_ERR(1 + variable =); 46 | 47 | 48 | TEST_C_XPRS(0XFFFf); 49 | 50 | TEST_C_XPRS(!(0)); 51 | 52 | TEST_C_XPRS(2+0x11); 53 | 54 | TEST_C_XPRS((2&&1)==!(!2||!1)); 55 | TEST_C_XPRS((3&0xE)==~(~3|~0Xe)); 56 | 57 | 58 | TEST_C_XPRS(1+(0? 2: 3)-1); 59 | TEST_C_XPRS(2+0xF); 60 | TEST_C_XPRS(1+(0? 2: 3)-1); 61 | TEST_C_XPRS(2+3-0x4+5); 62 | TEST_C_XPRS(((5+1)*4)+1); 63 | 64 | TEST_C_XPRS( 1 + (0&&3) + (3|4) * (1-1<0?5:6)-4<<8-7 ); 65 | 66 | return 0; 67 | } 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /formula_compiler/code_lib.cpp: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * Embedded functions for byte-code formula compiler (based on BNFLite) * 3 | * Copyright (c) 2017 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | #include "byte_code.h" 18 | #include "math.h" 19 | 20 | 21 | static int GetX() 22 | { 23 | static int cnt; 24 | return cnt++; 25 | } 26 | 27 | static int Series(int a) 28 | { 29 | static int cnt; 30 | return a + cnt++ % 4; 31 | } 32 | 33 | static int Pow(int a, int b) 34 | { 35 | int res = 1; 36 | while (b-- > 0) 37 | res *= a; 38 | return res; 39 | } 40 | 41 | template 42 | T TPow(V v, W w) { return (T)powf((float)v, (float)w); } 43 | 44 | 45 | struct FuncTable GFunTable[] = { 46 | { opNop, "Error", { opNop, opNop }, 0, 0, 0 }, 47 | { opInt, "GetX", { opNop, opNop, opNop }, (void*)GetX, 0, 0 }, 48 | { opInt, "Series", { opInt, opNop, opNop }, (void*)Series, 0, 0 }, 49 | { opInt, "POW", { opInt, opInt, opNop }, (void*)Pow, 0, 0 }, // static example 50 | { opFloat, "POW", { opFloat, opInt, opNop }, (void*)(void(*)())(&TPow ), 0, 0 }, // template example 51 | { opFloat, "POW", { opInt, opFloat, opNop }, (void*)(void(*)())(&TPow ), 0, 0 }, // template example 52 | #if 1 53 | { opFloat, "POW", { opFloat, opFloat, opNop }, (void*)(void(*)())(&TPow ), 0, 0 }, // template example 54 | #else 55 | { opFloat, "POW", { opFloat, opFloat, opNop }, (void*)&[](float a, float b)->float {return powf(a, b);} }, // lambda example 56 | #endif 57 | }; 58 | 59 | size_t GFunTableSize = sizeof(GFunTable) / sizeof(GFunTable[0]); 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /formula_compiler/main.cpp: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * Starter of byte-code formula compiler (based on BNFLite) * 3 | * Copyright (c) 2017 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | #include "byte_code.h" 18 | 19 | 20 | int main(int argc, char* argv[]) 21 | { 22 | std::string expression; 23 | for (int i = 1; i < argc; i++) { 24 | expression += argv[i]; 25 | } 26 | if (!expression.size()) { 27 | std::cout << "No expression\n" 28 | "Use integer/float numbers and functions to make arithmetic expression\n" 29 | "Available functions are:\n"; 30 | for (size_t i = 1; i < GFunTableSize; i++) { 31 | const char* types[] = {"?", "Int", "Float", "String"}; 32 | std::cout << " " << GFunTable[i].name; 33 | char dlim = '('; size_t j = 0; 34 | for (; j < MAX_PARAM_NUM && GFunTable[i].param[j]; j++, dlim = ',') 35 | std::cout << dlim << types[GFunTable[i].param[j] & opMaskType]; 36 | std::cout << ");\n"; 37 | } 38 | return 1; 39 | } 40 | 41 | std::list bl = bnflite_byte_code(expression); 42 | 43 | std::cout << "Byte-code: "; 44 | for (std::list::iterator itr = bl.begin(); itr != bl.end(); ++itr) 45 | std::cout << *itr << (itr != (++bl.rbegin()).base()? ",": ";\n"); 46 | 47 | union { int val_i; float val_f; } res[4] = {0}; 48 | int err = EvaluateBC(bl, res); 49 | if (err || !bl.size()) 50 | std::cout << "running error: " << err << std::endl; 51 | else { 52 | std::cout << "result = "; 53 | for (size_t i = 0; i < sizeof(res)/sizeof(res[0]); i++) 54 | std::cout << 55 | (byte_code::toType(bl.back().type) == opInt? (float)res[i].val_i: res[i].val_f) 56 | << (i < sizeof(res)/sizeof(res[0]) - 1? ", ": ";\n"); 57 | } 58 | return err; 59 | } 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /tutorial/jsontest.cpp: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * Unit test pf JSONlite parser and repository * 3 | * Copyright (c) 2017 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | 18 | #include "jsonlite.h" 19 | #include 20 | #include 21 | #include 22 | 23 | using namespace std; 24 | 25 | const char* json_file = 26 | "{" 27 | "\"Array\":[" 28 | "{\"Name\" : \"Nested Object\" }," 29 | "[ 20, 10, true, -1, \"Nested Array\" ]" 30 | "]," 31 | "\"Object\":{\"name\":\"receipt\", \"dept\":30.5,\"cars\":[\"Ford\", \"KIA\", \"Fiat\"]}" 32 | "}" 33 | ; 34 | 35 | int main() 36 | { 37 | const char* tail = 0; 38 | int status = 0; 39 | Repo repo = Repo::ParseJSON(json_file, &status, &tail); 40 | if (status < 0) { 41 | cout << " Parsing error in the command, " << "error flags = {" << std::hex 42 | << (status&eRest?"eRest":"") 43 | << (status&eOver?", eOver":"") 44 | << (status&eEof?", eEof":"") 45 | << (status&eBadRule?", eBadRule":"") 46 | << (status&eBadLexem?", eBadLexem":"") 47 | << (status&eSyntax?", eSyntax":"") 48 | << (status&eError?", eError":"") 49 | << "},\n stopped at $> " 50 | << tail << "}" 51 | << endl; 52 | return -1; 53 | } 54 | Repo obj = repo("Object"); 55 | cout << "repo(\"Array\")[1][4] = " << repo("Array")[1][4].ToString() << endl; 56 | cout << "repo(\"Object\")(\"name\") = " << repo("Object")("name").ToString() << endl; 57 | cout << "repo(\"Object\")(\"dept\") = " << obj("dept").ToDouble() << endl; 58 | cout << "repo(\"Object\")(\"cars\").Size = " << repo("Object")("cars").Size() << endl; 59 | #if 1 60 | /* tree dump of repository */ 61 | repo.dumptree(cout); 62 | #else 63 | /* raw dump of repository */ 64 | for( auto it: *repo.get()) { 65 | cout << "{ (" << it.first.first << ", " << it.first.second << "), " 66 | << it.second.first << ", " << it.second.second << " }" << endl; 67 | } 68 | #endif 69 | cout << endl; 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /examples/cmd.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************\ 2 | * Parser of simple command line parameters (based on BNFlite) * 3 | * Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 4 | * * 5 | * This code is free software: you can redistribute it and/or modify it * 6 | * under the terms of the GNU Lesser General Public License as published * 7 | * by the Free Software Foundation, either version 3 of the License, * 8 | * or (at your option) any later version. * 9 | * * 10 | * This program is distributed in the hope that it will be useful, * 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 | * GNU General Public License for more details. * 14 | * * 15 | * You should have received a copy of the GNU General Public License * 16 | * along with this program. If not, see . * 17 | \*************************************************************************/ 18 | #include "stdio.h" 19 | #include "stdlib.h" 20 | #include "bnflite.h" 21 | 22 | using namespace bnf; 23 | 24 | 25 | static bool printFilter(const char* lexem, size_t len) 26 | { 27 | printf("Filter : %.*s;\n", len, lexem); 28 | return true; 29 | } 30 | 31 | int main(int argc, char* argv[]) 32 | { 33 | /* 34 | BNF description of the ffmpeg filtergraph syntax 35 | from https://ffmpeg.org/ffmpeg-filters.html (simplified for this example) 36 | NAME ::= sequence of alphanumeric characters and '_' 37 | LINKLABEL ::= "[" NAME "]" 38 | LINKLABELS ::= LINKLABEL [LINKLABELS] 39 | FILTER_ARGUMENTS ::= sequence of chars (possibly quoted) 40 | FILTER ::= [LINKLABELS] NAME ["=" FILTER_ARGUMENTS] [LINKLABELS] 41 | FILTERCHAIN ::= FILTER [,FILTERCHAIN] 42 | */ 43 | Token Alphanumeric('_'); // start declare one element of "sequence of alphanumeric characters" 44 | Alphanumeric.Add('0', '9'); // appended numeric part 45 | Alphanumeric.Add('a', 'z'); // appended alphabetic lowercase part 46 | Alphanumeric.Add('A', 'Z'); // appended alphabetic capital part 47 | Lexem NAME = Series(1, Alphanumeric); // declare "sequence of alphanumeric characters" 48 | Lexem LINKLABEL = "[" + NAME + "]"; 49 | Lexem LINKLABELS1 = Iterate(1, LINKLABEL); // declare as described 50 | Lexem LINKLABELS0 = Iterate(0, LINKLABEL); // declare as needed to use 51 | Token SequenceOfChars(' ' + 1, 0x7F - 1); // declare one element of "sequence of chars" 52 | SequenceOfChars.Remove("=,"); // exclude used(reserved) chars 53 | Lexem FILTER_ARGUMENTS = Series(1, SequenceOfChars); // declare "sequence of chars" 54 | Lexem FILTER = LINKLABELS0 + NAME + Iterate(0, "=" + FILTER_ARGUMENTS) + LINKLABELS0; 55 | Rule Filter = FILTER + printFilter; // form found filter 56 | Rule FILTERCHAIN = Filter + Repeat(0, "," + Filter); // declare several filters 57 | 58 | const char* test; int stat; 59 | const char* pstr = 0; 60 | test = "[0]amerge=0=5, c1"; 61 | stat = Analyze(FILTERCHAIN, test, &pstr); // Start parsing 62 | if (stat > 0) 63 | printf("Passed\n"); 64 | else 65 | printf("Failed, stopped at=%.40s\n status = 0x%0X, flg = %s%s%s%s%s%s%s%s\n", 66 | pstr?pstr:"", stat, 67 | stat&eOk?"eOk":"Not", 68 | stat&eRest?", eRest":"", 69 | stat&eOver?", eOver":"", 70 | stat&eEof?", eEof":"", 71 | stat&eBadRule?", eBadRule":"", 72 | stat&eBadLexem?", eBadLexem":"", 73 | stat&eSyntax?", eSyntax":"", 74 | stat&eError?", eError":"" ); 75 | return 0; 76 | } 77 | 78 | -------------------------------------------------------------------------------- /formula_compiler/byte_code.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * h-header of byte-code formula compiler (based on BNFLite) * 3 | * Copyright (c) 2017 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | #ifndef _BYTE_CODE_H 18 | #define _BYTE_CODE_H 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | enum OpCode 28 | { 29 | opFatal = -1, opNop = 0, 30 | opInt = 1, opFloat = 2, opStr = 3, opMaskType = 0x03, 31 | opError = 1, opNeg = 2, opPos = 3, opCall = 4, 32 | opToInt = 5, opToFloat = 6, opToStr = 7, 33 | opAdd = 2, opSub = 3, opMul = 4, opDiv = 5, 34 | }; 35 | 36 | #define OP3(scd, fst, op) (OpCode) ( ((op) << 4) | ((fst) << 2) | ((scd) << 0) ) 37 | #define OP2(fst, op) (OpCode)( ((op) << 2) | ((fst) << 0) ) 38 | #define OP1(op) (OpCode)(op) 39 | 40 | #define CP2(l, op, r) ( (((l) & 0xF) << 16) | (((r) & 0xF) << 12) | ((op) & 0x7F) ) 41 | 42 | #define MAX_PARAM_NUM 3 43 | #define PRM(r, p1, p2, p3) ( (((p3) & 3) << 6) | (((p2) & 3) << 4) | (((p1) & 3) << 2) | ((r) & 3) ) 44 | 45 | struct byte_code 46 | { 47 | OpCode type; 48 | union { 49 | int val_i; 50 | float val_f; 51 | const char* val_s; 52 | }; 53 | 54 | byte_code(): type(opNop), val_i(0) {}; 55 | byte_code(OpCode t, int val = 0) : type(t), val_i(val) {}; 56 | byte_code(OpCode t, float val) : type(t), val_f(val) {}; 57 | byte_code(OpCode t, const char* val) : type(t), val_s(val) {}; 58 | 59 | friend std::ostream& operator<<(std::ostream& out, const byte_code& bc); 60 | static char pType(int a) 61 | { return a > 1? a > 2?'S':'F' : a < 1?'?':'I'; } 62 | static int toType(int type) 63 | { switch (type) { 64 | case OP3(opInt, opFloat, opAdd): 65 | case OP3(opInt, opFloat, opSub): 66 | case OP3(opInt, opFloat, opMul): 67 | case OP3(opInt, opFloat, opDiv): 68 | return opFloat; 69 | default: 70 | return type & opMaskType; } 71 | } 72 | }; 73 | 74 | typedef std::list script; 75 | extern std::list GenCallOp(std::string name, std::vector > args); 76 | extern std::list GenUnaryOp(char op, std::list unr); 77 | extern std::list GenBinaryOp(std::list left, char op, std::list right); 78 | 79 | struct FuncTable 80 | { 81 | OpCode ret; 82 | const char* name; 83 | OpCode param[MAX_PARAM_NUM]; 84 | #if 0 85 | void (*fun)(); 86 | #else 87 | void *fun; 88 | #endif 89 | int num; 90 | int call_idx; 91 | }; 92 | 93 | extern struct FuncTable GFunTable[]; 94 | extern size_t GFunTableSize; 95 | 96 | std::list spirit_byte_code(std::string expr); 97 | std::list bnflite_byte_code(std::string expr); 98 | int EvaluateBC(std::list bc, void* res); 99 | 100 | #endif //_BYTE_CODE_H 101 | -------------------------------------------------------------------------------- /examples/cfg.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************\ 2 | * Parser of restricted custom xml configuration (based on BNFlite) * 3 | * Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 4 | * * 5 | * This code is free software: you can redistribute it and/or modify it * 6 | * under the terms of the GNU Lesser General Public License as published * 7 | * by the Free Software Foundation, either version 3 of the License, * 8 | * or (at your option) any later version. * 9 | * * 10 | * This program is distributed in the hope that it will be useful, * 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 | * GNU General Public License for more details. * 14 | * * 15 | * You should have received a copy of the GNU General Public License * 16 | * along with this program. If not, see . * 17 | \*************************************************************************/ 18 | #pragma warning(disable: 4786) 19 | 20 | #include 21 | #include 22 | #include "bnflite.h" 23 | 24 | using namespace bnf; 25 | using namespace std; 26 | 27 | 28 | const char* xml = // this is example of xml-like configuration file 29 | "" 30 | "" 31 | "" 32 | "" 33 | ""; 34 | 35 | 36 | struct client 37 | { 38 | string key, mail; 39 | vector< pair< string, string> > prop; 40 | }; 41 | 42 | vector Cfg; // client configuration container 43 | string tmpstr; 44 | 45 | 46 | static bool printMsg(const char* lexem, size_t len) 47 | { // debug function 48 | printf("Debug: %.*s;\n", len, lexem); 49 | return true; // should retuirn true to continue parsing 50 | } 51 | 52 | static bool addkey(const char* lexem, size_t len) 53 | { 54 | Cfg.resize(Cfg.size() + 1); 55 | Cfg.back().key = string(lexem + 1, len - 2); 56 | return true; 57 | } 58 | 59 | static bool addmail(const char* lexem, size_t len) 60 | { 61 | Cfg.back().mail = string(lexem + 1, len - 2); 62 | return true; 63 | } 64 | 65 | static bool addtype(const char* lexem, size_t len) 66 | { 67 | tmpstr = string(lexem + 1, len - 2); 68 | return true; 69 | } 70 | 71 | static bool addlimit(const char* lexem, size_t len) 72 | { 73 | Cfg.back().prop.push_back(make_pair(tmpstr, string(lexem + 1, len - 2))); 74 | return true; 75 | } 76 | 77 | 78 | int main() 79 | { 80 | Token value(1,255); value.Remove("\""); // assume the value can contain any character 81 | 82 | Lexem client("client"); // literals 83 | Lexem key("key"); 84 | Lexem type("type"); 85 | Lexem alert("alert"); 86 | Lexem limit("limit"); 87 | Lexem mail("mail"); 88 | 89 | Lexem quotedvalue = "\"" + *value + "\""; 90 | Lexem _client = Token("<") + Token("/") + client +">"; 91 | Lexem _end = Token("/") +">"; 92 | 93 | Rule xclient = Token("<") + client + key + "=" + quotedvalue + addkey 94 | + mail + "=" + quotedvalue + addmail + ">"; 95 | Rule xalert = Token("<") + alert + type + "=" + quotedvalue + addtype 96 | + limit + "=" + quotedvalue + addlimit + _end; 97 | 98 | Rule xclient1 = xclient + printMsg; 99 | Rule xalert1 = xalert + printMsg; 100 | 101 | Rule root = *(xclient + *(xalert) + _client); 102 | 103 | const char* tail = 0; 104 | int tst = Analyze(root, xml, &tail); 105 | if (tst > 0) 106 | cout << "Clients configured: " << Cfg.size() << endl; 107 | else 108 | cout << "Parsing errors detected, status = " << hex << tst << endl 109 | << "stopped at: " << tail << endl; 110 | 111 | 112 | for (vector::iterator j = Cfg.begin(); j != Cfg.end(); ++j) { 113 | cout << "Client " << j->key << " has " << (*j).prop.size() << " properties: "; 114 | for (vector >::iterator i = j->prop.begin(); i != j->prop.end(); ++i) { 115 | cout << i->first << "=" << i->second <<"; "; 116 | } 117 | } 118 | 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /formula_compiler/parser.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************\ 2 | * Parser part of bite-code formula compiler (based on BNFlite) * 3 | * Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 4 | * * 5 | * This code is free software: you can redistribute it and/or modify it * 6 | * under the terms of the GNU Lesser General Public License as published * 7 | * by the Free Software Foundation, either version 3 of the License, * 8 | * or (at your option) any later version. * 9 | * * 10 | * This program is distributed in the hope that it will be useful, * 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 | * GNU General Public License for more details. * 14 | * * 15 | * You should have received a copy of the GNU General Public License * 16 | * along with this program. If not, see . * 17 | \*************************************************************************/ 18 | 19 | #include "stdio.h" 20 | #include "string.h" 21 | 22 | #include "bnflite.h" 23 | #include "byte_code.h" 24 | 25 | using namespace bnf; 26 | 27 | static bool printErr(const char* lexem, size_t len) 28 | { 29 | printf("strings are not supported yet: %.*s;\n", len, lexem); 30 | return false; 31 | } 32 | 33 | typedef Interface< std::list > Gen; 34 | 35 | 36 | Gen DoBracket(std::vector& res) 37 | { 38 | return *res[0].text == '('? res[1] : res[0]; /* pass result without brackets */ 39 | } 40 | 41 | Gen DoString(std::vector& res) 42 | { 43 | return res[1]; 44 | } 45 | 46 | 47 | Gen DoNumber(std::vector& res) 48 | { /* use strtol/strtod to get value of parsed number */ 49 | char* lst; 50 | int j = res.size() - 1; 51 | int ivalue = strtol(res[0].text, &lst, 10); 52 | if (lst - res[j].text - res[j].length == 0) { 53 | return Gen(std::list(1, byte_code(opInt, ivalue)), res); 54 | } 55 | float fvalue = (float)strtod(res[0].text, &lst); 56 | if (lst - res[j].text - res[j].length == 0) { 57 | return Gen(std::list(1, byte_code(opFloat, fvalue)), res); 58 | } 59 | std::cout << "number parse error:"; std::cout.write(res[0].text, res[0].length); 60 | return Gen(std::list(1, byte_code(opError, 0)), res); 61 | 62 | } 63 | 64 | Gen DoUnary(std::vector& res) 65 | { /* pass result of unary operation ( just only '-' ) */ 66 | if (*res[0].text == '-') { 67 | return Gen(GenUnaryOp('-', res[1].data), res); 68 | } 69 | return res[0]; 70 | } 71 | 72 | Gen DoBinary(std::vector& res) 73 | { /* pass result of binary operation (shared for several rules) */ 74 | std::list left = res[0].data; 75 | for (unsigned int i = 1; i < ((res.size() - 1) | 1); i += 2) { 76 | left = GenBinaryOp(left, *res[i].text, res[i + 1].data); 77 | } 78 | return Gen(left, res); 79 | } 80 | 81 | Gen DoFunction(std::vector& res) 82 | { 83 | std::vector< std::list > args; 84 | 85 | for (unsigned int i = 1; i < res.size(); i++) { 86 | if( *res[i].text == '(' || *res[i].text == ',' || *res[i].text == ')' ) { 87 | continue; 88 | } 89 | args.push_back(res[i].data); 90 | } 91 | return Gen(GenCallOp(std::string(res[0].text, res[0].length), args), res); 92 | } 93 | 94 | 95 | std::list bnflite_byte_code(std::string expr) 96 | { 97 | Token digit1_9('1', '9'); 98 | Token DIGIT("0123456789"); 99 | Lexem i_digit = 1*DIGIT; 100 | Lexem frac_ = "." + i_digit; 101 | Lexem int_ = "0" | digit1_9 + *DIGIT; 102 | Lexem exp_ = "Ee" + !Token("+-") + i_digit; 103 | Lexem number_ = !Token("-") + int_ + !frac_ + !exp_; 104 | Rule number = number_; 105 | 106 | Token az_("_"); az_.Add('A', 'Z'); az_.Add('a', 'z'); 107 | Token az01_(az_); az01_.Add('0', '9'); 108 | 109 | Token all(1,255); all.Remove("\""); 110 | 111 | Lexem identifier = az_ + *(az01_); 112 | Lexem quotedstring = "\"" + *all + "\""; 113 | 114 | Rule expression; 115 | Rule unary; 116 | 117 | Rule function = identifier + "(" + !(expression + *("," + expression)) + ")"; 118 | 119 | Rule elementary = AcceptFirst() 120 | | "(" + expression + ")" 121 | | function 122 | | number 123 | | quotedstring + printErr 124 | | unary; 125 | 126 | unary = Token("-") + elementary; 127 | 128 | Rule primary = elementary + *("*%/" + elementary); 129 | 130 | /* Rule */ expression = primary + *("+-" + primary); 131 | 132 | Bind(number, DoNumber); 133 | Bind(elementary, DoBracket); 134 | Bind(unary, DoUnary); 135 | Bind(primary, DoBinary); 136 | Bind(expression, DoBinary); 137 | Bind(function, DoFunction); 138 | 139 | const char* tail = 0; 140 | Gen result; 141 | 142 | int tst = Analyze(expression, expr.c_str(), &tail, result); 143 | if (tst > 0) 144 | std::cout << result.data.size() << " byte-codes in: " << expr << std::endl; 145 | else 146 | std::cout << "Parsing errors detected, status = " << std::hex << tst << std::endl 147 | << "stopped at: " << tail << std::endl; 148 | 149 | expression = Null(); // disjoin Rule recursion to safe Rules removal 150 | unary = Null(); 151 | return result.data; 152 | } 153 | -------------------------------------------------------------------------------- /examples/calc.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************\ 2 | * Simple arithmetic calculator (based on BNFlite) * 3 | * Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 4 | * * 5 | * This code is free software: you can redistribute it and/or modify it * 6 | * under the terms of the GNU Lesser General Public License as published * 7 | * by the Free Software Foundation, either version 3 of the License, * 8 | * or (at your option) any later version. * 9 | * * 10 | * This program is distributed in the hope that it will be useful, * 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 | * GNU General Public License for more details. * 14 | * * 15 | * You should have received a copy of the GNU General Public License * 16 | * along with this program. If not, see . * 17 | \*************************************************************************/ 18 | 19 | #include "bnflite.h" 20 | #include 21 | #include 22 | 23 | using namespace bnf; 24 | 25 | 26 | typedef Interface Calc; 27 | 28 | /* pass result without brackets */ 29 | Calc DoBracket(std::vector& res) 30 | { 31 | return *res[0].text == '('? res[1] : res[0]; 32 | } 33 | 34 | /* pass number result */ 35 | Calc DoNumber(std::vector& res) 36 | { 37 | char* lst; 38 | double value = strtod(res[0].text, &lst); // formaly it is not correct for not 0-terminated strings 39 | Calc ret(value, res); 40 | if (lst - res[0].text - ret.length != 0) { 41 | std::cout << "number parse error:"; std::cout.write(ret.text, ret.length); 42 | } 43 | return ret; 44 | } 45 | 46 | /* pass rersult of unary operation ( just only '-' ) */ 47 | Calc DoUnary(std::vector& res) 48 | { 49 | if (*res[0].text == '-') return Calc(-res[1].data, res); 50 | else if (*res[0].text == '+' ) return Calc(res[1].data, res); 51 | else return Calc(res[0].data, res); 52 | } 53 | 54 | 55 | /* pass result of binary operation (shared for several rules) */ 56 | Calc DoBinary(std::vector& res) 57 | { 58 | double value = res[0].data; 59 | for (unsigned int i = 1; i < res.size(); i += 2) { 60 | switch(*res[i].text) { 61 | case '+': value += res[i + 1].data; break; 62 | case '-': value -= res[i + 1].data; break; 63 | case '*': value *= res[i + 1].data; break; 64 | case '/': value /= res[i + 1].data; break; 65 | case '%': value = (int)value % (int)res[i + 1].data; break; 66 | default: std::cout << "syntax error:"; std::cout.write(res[i].text, res[i].length); 67 | } 68 | } 69 | return Calc(value, res); 70 | } 71 | 72 | 73 | 74 | int main(int argc, char* argv[]) 75 | { 76 | std::string clc; 77 | for (int i = 1; i < argc; i++) { 78 | clc += argv[i]; 79 | } 80 | /* Example of ABNF notation of the number (from RFC 4627) 81 | number = [ minus ] int [ frac ] [ exp ] 82 | decimal-point = %x2E ; . 83 | digit1-9 = %x31-39 ; 1-9 84 | e = %x65 / %x45 ; e E 85 | exp = e [ minus / plus ] 1*DIGIT 86 | frac = decimal-point 1*DIGIT 87 | int = zero / ( digit1-9 *DIGIT ) 88 | minus = %x2D ; - 89 | plus = %x2B ; + 90 | zero = %x30 ; 0 91 | */ 92 | Token digit1_9('1', '9'); 93 | Token DIGIT("0123456789"); 94 | Lexem I_DIGIT = 1*DIGIT; // Series(1, DIGIT); 95 | Lexem frac = "." + I_DIGIT; 96 | Lexem int_ = "0" | digit1_9 + *DIGIT; //Series(0, DIGIT); 97 | #if __cplusplus > 199711L 98 | Lexem exp = "Ee" + !"+-"_T + I_DIGIT ; 99 | #else 100 | Lexem exp = "Ee" + !Token("+-") + I_DIGIT ; 101 | #endif 102 | Rule number = !Token("-") + int_ + !frac + !exp; 103 | Bind(number, DoNumber); 104 | 105 | Rule Expression; 106 | Bind(Expression, Calc::ByPass); 107 | 108 | Rule PrimaryExpression = "(" + Expression + ")" | number; 109 | #if __cplusplus > 199711L 110 | PrimaryExpression[ 111 | *[](std::vector& res) { return *res[0].text == '('? res[1] : res[0]; } 112 | ]; 113 | #else 114 | Bind(PrimaryExpression, DoBracket); 115 | #endif 116 | 117 | Rule UnaryExpression = !Token("+-") + PrimaryExpression; 118 | Bind(UnaryExpression, DoUnary); 119 | 120 | Rule MulExpression = UnaryExpression + *("*%/" + UnaryExpression); 121 | Bind(MulExpression, DoBinary); 122 | 123 | Rule AddExpression = MulExpression + *("+-" + MulExpression ); 124 | Bind(AddExpression, DoBinary); 125 | 126 | Expression = AddExpression; // Rule recursion created! 127 | 128 | if (!clc.size()) { 129 | clc = "2*3*4*5"; 130 | std::cout << "Run \"" << argv[0] << " " << clc << "\"\n"; 131 | } 132 | const char* tail = 0; 133 | Calc result; 134 | 135 | int tst = Analyze(Expression, clc.c_str(), &tail, result); 136 | if (tst > 0) 137 | std::cout <<"Result of " << clc << " = " << result.data << std::endl; 138 | else 139 | std::cout << "Parsing errors detected, status = " << std::hex << tst << std::endl 140 | << "stopped at: " << tail << std::endl; 141 | 142 | Expression = Null(); // disjoin Rule recursion to safe Rules removal 143 | return 0; 144 | } 145 | -------------------------------------------------------------------------------- /examples/ini.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************\ 2 | * Parser of configuration ini-files (based on BNFlite) * 3 | * Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 4 | * * 5 | * This code is free software: you can redistribute it and/or modify it * 6 | * under the terms of the GNU Lesser General Public License as published * 7 | * by the Free Software Foundation, either version 3 of the License, * 8 | * or (at your option) any later version. * 9 | * * 10 | * This program is distributed in the hope that it will be useful, * 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 | * GNU General Public License for more details. * 14 | * * 15 | * You should have received a copy of the GNU General Public License * 16 | * along with this program. If not, see . * 17 | \*************************************************************************/ 18 | #pragma warning(disable: 4786) 19 | 20 | #include 21 | #include 22 | #include "bnflite.h" 23 | 24 | using namespace bnf; 25 | using namespace std; 26 | 27 | 28 | /* somple ini file configuration example: 29 | [section_1] 30 | var1=value1 31 | var2=value2 32 | 33 | [section_2] 34 | var1=value1 35 | var2=value2 36 | */ 37 | 38 | 39 | const char* ini = // this sring represents some ini configuration file 40 | "; last modified 1 April 2001 by John Doe\n" 41 | " [ owner ]\n" 42 | "name=John Doe\n\n" 43 | "organization=Acme Widgets Inc.\n" 44 | "\n" 45 | "[database]\n \n" 46 | "; use IP address in case network name resolution is not working\n" 47 | "server=192.0.2.62 \n" 48 | "port= 143\n" 49 | "file=\"payroll.dat\"\n"; 50 | 51 | 52 | struct Section 53 | { 54 | string name; 55 | vector< pair< string, string> > value; 56 | Section(const char* name, size_t len) :name(name, len) {} 57 | }; 58 | vector Ini; // ini-file configuration container 59 | 60 | // example for custom interface instead of "typedef Interface Gen;" 61 | class Gen :public Interface<> 62 | { 63 | public: 64 | Gen(const Gen& ifc, const char* text, size_t length, const char* name) 65 | :Interface<>(ifc, text, length, name){} 66 | Gen(const char* text, size_t length, const char* name) 67 | :Interface<>(text, length, name){} 68 | Gen(const Gen& front, const Gen& back) 69 | :Interface<>(front, back) {} 70 | Gen() :Interface<>() {}; 71 | }; 72 | 73 | 74 | static bool printMsg(const char* lexem, size_t len) 75 | { // debug function 76 | printf("Debug: %.*s;\n", len, lexem); 77 | return true; 78 | } 79 | 80 | 81 | Gen DoSection(vector& res) 82 | { // save new section, it is 2nd lexem in section Rule in main 83 | Ini.push_back(Section(res[1].text, res[1].length)); 84 | return Gen(res.front(), res.back()); 85 | } 86 | 87 | Gen DoValue(vector& res) 88 | { // save new entry: 4th lexem - name and 7th lexem is property value 89 | int i = res.size(); 90 | if (i > 2 ) 91 | Ini.back().value.push_back(make_pair( 92 | string(res[0].text, res[0].length), string(res[2].text, res[2].length))); 93 | return Gen(res.front(), res.back()); 94 | } 95 | 96 | static void Bind(Rule& section, Rule& entry) 97 | { 98 | Bind(section, DoSection); 99 | Bind(entry, DoValue); 100 | } 101 | 102 | // example of custom pre-parser 103 | static const char* ini_zero_parse(const char* ptr) 104 | { // skip ini file comments 105 | if (*ptr ==';' || *ptr =='#') 106 | while (*ptr != 0) 107 | if( *ptr++ == '\n') 108 | break; 109 | return ptr; 110 | } 111 | 112 | 113 | int main() 114 | { 115 | Token space(" \t"); // space and tab are grammar part in ini files 116 | Token delimiter(" \t\n\r"); // consider new lines as grammar part too 117 | Token name("_.,:(){}-#@&*|"); // start declare with special symbols 118 | name.Add('0', '9'); // appended numeric part 119 | name.Add('a', 'z'); // appended alphabetic lowercase part 120 | name.Add('A', 'Z'); // appended alphabetic capital part 121 | Token value(1,255); value.Remove("\n"); 122 | 123 | Lexem Name = 1*name; 124 | Lexem Value = *value; 125 | Lexem Equal = *space + "=" + *space; 126 | Lexem Left = *space + "[" + *space; // bracket 127 | Lexem Right = *space + "]" + *space; 128 | Lexem Delimiter = *delimiter; 129 | 130 | Rule Item = Name + Equal + Value + "\n"; 131 | Rule Section = Left + Name + Right + "\n"; 132 | Rule Inidata = Delimiter + *(Section + Delimiter + *(Item + Delimiter)) ; 133 | 134 | 135 | Bind(Section, Item); 136 | 137 | Gen gen; // this is Interface object 138 | 139 | int tst = Analyze(Inidata, ini, gen, ini_zero_parse); 140 | if (tst > 0) 141 | cout << "Section read:" << Ini.size(); 142 | else 143 | cout << "Parsing errors detected, status = " << hex << tst << endl 144 | << "stopped at: " << (gen.data + gen.length) << endl; 145 | 146 | for (vector::iterator j = Ini.begin(); j != Ini.end(); ++j) { 147 | cout << endl << "Section " << j->name << " has " << (*j).value.size() << " values: "; 148 | for (vector >::iterator i = j->value.begin(); i != j->value.end(); ++i) { 149 | cout << i->first << "=" << i->second <<"; "; 150 | } 151 | } 152 | 153 | return 0; 154 | } 155 | 156 | 157 | -------------------------------------------------------------------------------- /formula_compiler/code_run.cpp: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * Byte-code interpreter(SSE2) of formula compiler (based on BNFLite) * 3 | * Copyright (c) 2017 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | 18 | #include "byte_code.h" 19 | 20 | 21 | #include 22 | #include 23 | 24 | int EvaluateBC(std::list bc, void* res) 25 | { 26 | __m128 X[16]; 27 | int i = -1; 28 | 29 | for (const auto &code : bc) { 30 | switch (code.type) { 31 | default: 32 | return -2; 33 | case OP2(opInt, opError): 34 | case OP2(opFloat, opError): 35 | return -1; 36 | 37 | case OP1(opInt): 38 | X[++i] = _mm_castsi128_ps(_mm_set1_epi32(code.val_i)); 39 | break; 40 | case OP1(opFloat): 41 | X[++i] = _mm_set_ps1(code.val_f); 42 | break; 43 | case OP2(opInt, opNeg): 44 | X[i] = _mm_castsi128_ps(_mm_sub_epi32(_mm_set1_epi32(0), _mm_castps_si128(X[i]))); 45 | break; 46 | case OP2(opFloat, opNeg): 47 | X[i] = _mm_mul_ps(X[i], _mm_set_ps1(-1L)); 48 | break; 49 | 50 | case OP3(opInt, opInt, opAdd): 51 | X[i - 1] = _mm_castsi128_ps(_mm_sub_epi32( _mm_castps_si128(X[i - 1]), 52 | _mm_sub_epi32(_mm_set1_epi32(0), _mm_castps_si128(X[i])))); 53 | i -= 1; break; 54 | case OP3(opFloat, opFloat, opAdd): 55 | X[i - 1] = _mm_add_ps(X[i - 1], X[i]); 56 | i -= 1; break; 57 | case OP3(opInt, opInt, opSub): 58 | X[i - 1] = _mm_castsi128_ps(_mm_sub_epi32(_mm_castps_si128(X[i - 1]), _mm_castps_si128(X[i]))); 59 | i -= 1; break; 60 | case OP3(opFloat, opFloat, opSub): 61 | X[i - 1] = _mm_sub_ps(X[i - 1], X[i]); 62 | i -= 1; break; 63 | case OP3(opInt, opInt, opMul): 64 | X[i - 1] = _mm_castsi128_ps(_mm_cvtps_epi32(_mm_mul_ps( 65 | _mm_cvtepi32_ps(_mm_castps_si128(X[i - 1])),_mm_cvtepi32_ps(_mm_castps_si128(X[i]))))); 66 | i -= 1; break; 67 | case OP3(opFloat, opFloat, opMul): 68 | X[i - 1] = _mm_mul_ps(X[i - 1], X[i]); 69 | i -= 1; break; 70 | case OP3(opInt, opInt, opDiv): 71 | X[i - 1] = _mm_castsi128_ps(_mm_cvtps_epi32(_mm_div_ps( 72 | _mm_cvtepi32_ps(_mm_castps_si128(X[i - 1])),_mm_cvtepi32_ps(_mm_castps_si128(X[i]))))); 73 | i -= 1; break; 74 | case OP3(opFloat, opFloat, opDiv): 75 | X[i - 1] = _mm_div_ps(X[i - 1], X[i]); 76 | i -= 1; break; 77 | 78 | case OP2(opInt, opToFloat): 79 | X[i] = _mm_cvtepi32_ps(_mm_castps_si128(X[i])); 80 | break; 81 | case OP2(opFloat, opToInt): 82 | X[i] = _mm_castsi128_ps(_mm_cvtps_epi32(X[i])); 83 | break; 84 | 85 | case OP2(opInt, opCall): 86 | case OP2(opFloat, opCall): 87 | case OP2(opStr, opCall): 88 | // __m128 res; 89 | int j = code.val_i; 90 | void (*f)() = (void(*)())GFunTable[j].fun; 91 | i = i - GFunTable[j].num + 1; 92 | int (&iX)[][4] =(int(&)[][4])(X[i]); 93 | float (&fX)[][4] =(float(&)[][4])(X[i]); 94 | 95 | for (int l = 0; l < 4; l++) { 96 | switch (GFunTable[j].call_idx) { 97 | case PRM(opInt, 0, 0, 0): 98 | iX[0][l] = ((int(*)())(f))(); 99 | break; 100 | case PRM(opInt, opInt, 0, 0): 101 | iX[0][l] = ((int(*)(int))(f))(iX[0][l]); 102 | break; 103 | case PRM(opFloat, opFloat, 0, 0): 104 | fX[0][l] = ((float(*)(float))(f))(fX[0][l]); 105 | break; 106 | case PRM(opInt, opInt, opInt, 0): 107 | iX[0][l] = ((int(*)(int, int))(f))(iX[0][l], iX[1][l]); 108 | break; 109 | case PRM(opFloat, opFloat, opInt, 0): 110 | fX[0][l] = ((float(*)(float, int))(f))(fX[0][l], iX[1][l]); 111 | break; 112 | case PRM(opFloat, opInt, opFloat, 0): 113 | fX[0][l] = ((float(*)(int, float))(f))(iX[0][l],fX[1][l]); 114 | break; 115 | case PRM(opFloat, opFloat, opFloat, 0): 116 | fX[0][l] = ((float(*)(float, float))(f))(fX[0][l], fX[1][l]); 117 | break; 118 | default: 119 | return -3; 120 | } 121 | } 122 | break; 123 | } 124 | } 125 | *(__m128*)res = X[i]; 126 | return i; 127 | } 128 | 129 | 130 | -------------------------------------------------------------------------------- /formula_compiler/code_gen.cpp: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * Byte-code generator part of formula compiler (based on BNFLite) * 3 | * Copyright (c) 2017 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | #include "byte_code.h" 18 | 19 | 20 | std::list GenUnaryOp(char op, std::list unr) 21 | { 22 | if (op == '-') 23 | unr.push_back(byte_code(OP2(byte_code::toType(unr.back().type), opNeg))); 24 | return unr; 25 | } 26 | 27 | std::list GenBinaryOp(std::list left, char op, std::list right) 28 | { 29 | static struct 30 | { 31 | int issue; 32 | OpCode bin; 33 | OpCode left; 34 | OpCode right; 35 | } bin_op[] = { 36 | {CP2(opInt, '+', opInt), OP3(opInt, opInt, opAdd), opNop, opNop}, 37 | {CP2(opInt, '-', opInt), OP3(opInt, opInt, opSub), opNop, opNop}, 38 | {CP2(opInt, '*', opInt), OP3(opInt, opInt, opMul), opNop, opNop}, 39 | {CP2(opInt, '/', opInt), OP3(opInt, opInt, opDiv), opNop, opNop}, 40 | {CP2(opInt, '+', opFloat), OP3(opFloat, opFloat, opAdd), OP2(opInt, opToFloat), opNop}, 41 | {CP2(opInt, '-', opFloat), OP3(opFloat, opFloat, opSub), OP2(opInt, opToFloat), opNop}, 42 | {CP2(opInt, '*', opFloat), OP3(opFloat, opFloat, opMul), OP2(opInt, opToFloat), opNop}, 43 | {CP2(opInt, '/', opFloat), OP3(opFloat, opFloat, opDiv), OP2(opInt, opToFloat), opNop}, 44 | {CP2(opFloat, '+', opInt), OP3(opFloat, opFloat, opAdd), opNop, OP2(opInt, opToFloat)}, 45 | {CP2(opFloat, '-', opInt), OP3(opFloat, opFloat, opSub), opNop, OP2(opInt, opToFloat)}, 46 | {CP2(opFloat, '*', opInt), OP3(opFloat, opFloat, opMul), opNop, OP2(opInt, opToFloat)}, 47 | {CP2(opFloat, '/', opInt), OP3(opFloat, opFloat, opDiv), opNop, OP2(opInt, opToFloat)}, 48 | {CP2(opFloat, '+', opFloat), OP3(opFloat, opFloat, opAdd), opNop, opNop}, 49 | {CP2(opFloat, '-', opFloat), OP3(opFloat, opFloat, opSub), opNop, opNop}, 50 | {CP2(opFloat, '*', opFloat), OP3(opFloat, opFloat, opMul), opNop, opNop}, 51 | {CP2(opFloat, '/', opFloat), OP3(opFloat, opFloat, opDiv), opNop, opNop} 52 | }; 53 | 54 | int issue = CP2(byte_code::toType(left.back().type), op, byte_code::toType(right.back().type)); 55 | 56 | for (unsigned int i = 0; i < sizeof(bin_op)/sizeof(bin_op[0]); i++) { 57 | if (issue == bin_op[i].issue) { 58 | if (bin_op[i].left) 59 | left.push_back(byte_code(bin_op[i].left)); 60 | if (bin_op[i].right) 61 | right.push_back(byte_code(bin_op[i].right)); 62 | right.push_back(byte_code(bin_op[i].bin)); 63 | break; 64 | } 65 | } 66 | left.splice(left.end(), right); 67 | return left; 68 | } 69 | 70 | 71 | std::list GenCallOp(std::string name, std::vector > args) 72 | { 73 | unsigned int i, j; 74 | std::list all; 75 | 76 | for (i = 0; i < GFunTableSize; i++) { 77 | if (name != GFunTable[i].name) 78 | continue; 79 | if (args.size() > MAX_PARAM_NUM) 80 | continue; 81 | unsigned int lim = args.size() < MAX_PARAM_NUM ? args.size(): MAX_PARAM_NUM; 82 | for (j = 0; j < lim; j++) { 83 | if (GFunTable[i].param[j] != byte_code::toType(args[j].back().type)) 84 | break; 85 | } 86 | if (j < lim || (j < MAX_PARAM_NUM && GFunTable[i].param[j] != 0)) 87 | continue; 88 | 89 | for (j = 0; j < lim; j++) { 90 | all.splice(all.end(), args[j]); 91 | } 92 | all.push_back(byte_code(OP2(GFunTable[i].ret & opMaskType, opCall), (signed)i)); 93 | GFunTable[i].num = j; 94 | GFunTable[i].call_idx = PRM(GFunTable[i].ret, 95 | GFunTable[i].param[0], GFunTable[i].param[1], GFunTable[i].param[2]); 96 | return all; 97 | } 98 | all.push_back(byte_code(OP2(opInt, opError), (signed)i)); 99 | return all; 100 | } 101 | 102 | std::ostream& operator<<(std::ostream& out, const byte_code& bc) 103 | { 104 | switch (bc.type) { 105 | case opInt: out << "Int(" << bc.val_i << ")"; break; 106 | case opFloat: out << "Float(" << bc.val_f << ")"; break; 107 | case opStr: out << "Str(" << bc.val_s << ")"; break; 108 | default: 109 | switch (bc.type >> 2) { 110 | case opError: out << "opError<"; break; 111 | case opNeg: out << "opNeg<"; break; 112 | case opCall: out << "opCall<"; break; 113 | case opToInt: out << "opToInt<"; break; 114 | case opToFloat: out << "opToFloat<"; break; 115 | case opToStr: out << "opToStr<"; break; 116 | default: 117 | switch (bc.type >> 4) { 118 | case opAdd: out << "opAdd<"; break; 119 | case opSub: out << "opSub<"; break; 120 | case opMul: out << "opMul<"; break; 121 | case opDiv: out << "opDiv<"; break; 122 | default: out << "Error<"; break; 123 | } 124 | out << byte_code::pType((bc.type >>2) & opMaskType) << ','; 125 | } 126 | out << byte_code::pType(bc.type & opMaskType) << '>'; 127 | } 128 | return out; 129 | } 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## About 3 | 4 | BNFLite is a C++ template library for lightweight flexible grammar parsers. 5 | BNFLite offers creative approach when the developer can specify 6 | a language for further parsing directly in the C++ code. 7 | Moreover, such "specifications" are executable now! 8 | 9 | ## Purpose 10 | 11 | BNFLite is intended to parse: 12 | - command line arguments; 13 | - small configuration files; 14 | - output of different tools 15 | 16 | 17 | ## Preface 18 | 19 | Once the author participated in the development of some tool which was invented to integrate together a lot of proprietary modules. 20 | There were thousands combinations of command line options, poor examples, ambiguous docs. 21 | So the command line was not compatible from version to version. 22 | Up-to-date formal BNF specs of the command line language could help but not for projects with limited budget. 23 | Starting YACC era, there is a solution to support some extra executable code describing language specifics. 24 | As a rule, usage of such means is too heavy because it is extra stuff and it is not BNF. 25 | BNFLite does not have such drawbacks! 26 | 27 | 28 | ## Usage 29 | 30 | You just need to include bnflite.h in to your C++ application: 31 | 32 | `#include "bnflite.h"` 33 | 34 | 35 | ## Concept 36 | 37 | ### BNF Notation 38 | 39 | The BNF (Backus–Naur form) specifies rules of a context-free grammar. 40 | Each computer language should have a complete BNF syntactic specification. 41 | Formal BNF term is called "production rule". Each rule except "terminal" 42 | is a conjunction of a series of more concrete rules or terminals: 43 | 44 | `production_rule ::= ... | ...;` 45 | 46 | For example: 47 | 48 | ::= <0> | <1> | <2> | <3> | <4> | <5> | <6> | <7> | <8> | <9> 49 | ::= | 50 | 51 | which means that the number is just a digit or another number with one more digit. 52 | 53 | Generally terminal is a symbol called "token". There are two types of productions rules: 54 | Lexical production is called "lexem". We will call syntax production rule as just a "rule". 55 | 56 | ### BNFlite notation 57 | 58 | All above can be presented in C++ friendly notation: 59 | 60 | Lexem Digit = Token("0") | "1" | "2" | "4" | "5" | "6" | "7" | "8" | "9"; //C++11: = "0"_T + "1" ... 61 | LEXEM(Number) = Digit | Digit + Number; 62 | 63 | These both expressions are executable due to this "bnflite.h" source code library 64 | which supports "Token", "Lexem" and "Rule" classes with overloaded "+" and "|" operators. 65 | More practical and faster way is to use simpler form: 66 | 67 | Token Digit("01234567"); 68 | Lexem Number = Iterate(1, Digit); 69 | 70 | Now e.g. `bnf::Analyze(Number, "532")` can be called with success. 71 | 72 | ### ABNF Notation 73 | 74 | Augmented BNF [specifications](https://tools.ietf.org/html/rfc5234) introduce constructions like `"*"` 75 | to support repetition where `` and `` imply at least `` and at most `` occurrences of the element. 76 | For example, `3*3` allows exactly three and `1*2` allows one or two. 77 | Simplified construction `*` allows any number(from 0 to infinity). Alternatively `1*` 78 | requires at least one. 79 | BNFLite offers to use the following constructions: 80 | `Series(a, token, b);` 81 | `Iterate(a, lexem, b);` 82 | `Repeat(a, rule, b);` 83 | 84 | But BNFLite also supports ABNF-like forms: 85 | 86 | Token DIGIT("0123456789"); 87 | Lexem AB_DIGIT = DIGIT(2,3) /* <2>*<3> - any 2 or 3 digit number */ 88 | Lexem I_DIGIT = 1*DIGIT; /* 1* any number */ 89 | Lexem O_DIGIT = *DIGIT; /* * - any number or nothing */ 90 | Lexem N_DIGIT = !DIGIT; /* <0>*<1> - one digit or nothing */``` 91 | 92 | So, you can almost directly transform ABNF specifications to BNFLite 93 | 94 | ### User's Callbacks 95 | 96 | To receive intermediate parsing results the callback system can be used. 97 | The first kind of callback can be used as expression element: 98 | 99 | bool MyNumber(const char* number_string, size_t length_of_number) //... 100 | Lexem Number = Iterate(1, Digit) + MyNumber; 101 | 102 | The second kind of callback can be bound to production Rule. 103 | The user need to define own context type and work with it: 104 | 105 | typedef Interface Usr; 106 | Usr DoNothing(std::vector& usr) { return usr[0]; } 107 | //... 108 | Rule Foo; 109 | Bind(Foo, DoNothing); 110 | 111 | ### Restrictions for Recursion in Rules 112 | 113 | Lite version have some restrictions for rule recursion. 114 | You can not write: 115 | 116 | `Lexem Number = Digit | Digit + Number; /* failure */` 117 | 118 | because Number is not initialized yet in the expressions. 119 | You can use macro LEXEM for such constructions 120 | 121 | `LEXEM(Number) = Digit | Digit + Number;` 122 | 123 | that means 124 | 125 | `Lexem Number; Number = Digit | Digit + Number;` 126 | 127 | when parsing is finished (after Analyze call) you have to break recursion manually 128 | like this: 129 | 130 | `Number = Null();` 131 | 132 | Otherwise not all BNFlite internal objects will be released (memory leaks expected) 133 | 134 | 135 | ## Design Notes 136 | 137 | BNFlite is а class library. It is not related to the template 138 | [Boost::Spirit](https://www.boost.org/doc/libs/1_64_0/libs/spirit/doc/html/index.html) library 139 | This is expendable approach, for example, the user can inherit public lib classes to create own constructions to parse and perform simultaneously. It fact, parser goes from implementation of domain specific language here. 140 | The prior-art is rather ["A BNF Parser in Forth"](http://www.bradrodriguez.com/papers/bnfparse.htm). 141 | 142 | 143 | ## Examples 144 | 145 | 1. examples/cmd.cpp - simple command line parser 146 | 2. examples/cfg.cpp - parser of restricted custom xml configuration 147 | 3. examples/ini.cpp - parser of ini-files (custom parsing example to perform grammar spaces and comments) 148 | 4. examples/calc.cpp - arithmetic calculator 149 | 150 | >$cd examples 151 | 152 | >$ g++ -I. -I.. calc.cpp 153 | 154 | >$ ./a.exe "2+(1+3)*2" 155 | 156 | >Result of 2+(1+3)*2 = 10 157 | 158 | Examples have been tested on several msvc and gcc compilers. 159 | 160 | 161 | ## Unit Test ( C-like expression parser and calculator ) 162 | 163 | 1. c_xprs/utest.cpp - simple unit test 164 | 2. c_xprs/c_xprs.h - parser of C-like expressions 165 | 166 | >$cd c_xprs 167 | 168 | >$ g++ -I. -I.. utest.cpp 169 | 170 | >$ ./a.exe 171 | 172 | Output result of several C-like expressions 173 | 174 | 175 | ## Demo (simplest formula compiler & bite-code interpreter) 176 | 177 | * formula_compiler/main.cpp - starter of byte-code formula compiler and interpreter 178 | * formula_compiler/parser.cpp - BNF-lite parser with grammar section and callbacks 179 | * formula_compiler/code_gen.cpp - byte-code generator 180 | * formula_compiler/code_lib.cpp - several examples of embedded functions (e.g POW(2,3) - power: 2*2*2) 181 | * formula_compiler/code_run.cpp - byte-code interpreter (used SSE2 for parallel calculation of 4 formulas) 182 | 183 | To build and run (remove option `-march=pentium4` if it needed for arm or 64 build): 184 | 185 | >$ cd formula_compiler 186 | 187 | >$ g++ -O2 -march=pentium4 -std=c++14 -I.. main.cpp parser.cpp code_gen.cpp code_lib.cpp code_run.cpp 188 | 189 | >$ ./a.exe `"2 + 3 *GetX()"` 190 | 191 | > 5 byte-codes in: `2 + 3 *GetX()` 192 | 193 | > Byte-code: Int(2),Int(3),opCall<1>,opMul,opAdd; 194 | 195 | > result = 2, 5, 8, 11; 196 | 197 | Note: The embedded function `GetX()` returns a sequential integer number started from `0`. 198 | So, the result is four parallel computations: 199 | `2 + 3 * 0 = 2; 2 + 3 * 1 = 5; 2 + 3 * 2 = 8; 2 + 3 * 3 = 11`. 200 | 201 | 202 | ## Contacts 203 | 204 | Alexander Semjonov : alexander.as0@mail.ru 205 | 206 | 207 | ## Contributing 208 | 209 | If you have any idea, feel free to fork it and submit your changes back to me. 210 | 211 | 212 | ## Donations 213 | 214 | If you think that the library you obtained here is worth of some money 215 | and are willing to pay for it, feel free to send any amount 216 | through WebMoney WMID: 047419562122 217 | 218 | 219 | ## Roadmap 220 | 221 | - Productize several approaches to catch syntax errors by means of this library (done in unit test) 222 | - Generate fastest C code parser from C++ BNFlite statements (..looking for customer) 223 | - Support wide characters (several approaches, need customer reqs, ..looking for customer) 224 | - Support releasing of ringed Rules (see "Restrictions for Recursion"), in fact the code exists but it is not "lite" 225 | 226 | 227 | ## License 228 | 229 | - MIT 230 | 231 | -------------------------------------------------------------------------------- /tutorial/jsonlite.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************\ 2 | * h-header of JSON lite parser and repository (based on BNFLite) * 3 | * Copyright (c) 2021 Alexander A. Semjonov * 4 | * * 5 | * Permission to use, copy, modify, and distribute this software for any * 6 | * purpose with or without fee is hereby granted, provided that the above * 7 | * copyright notice and this permission notice appear in all copies. * 8 | * * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 16 | \****************************************************************************/ 17 | #ifndef JSONLITE_H 18 | #define JSONLITE_H 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "bnflite.h" 29 | 30 | using namespace std; 31 | using namespace bnf; 32 | 33 | // repo_t - JSON Repository Structure 34 | // first.first - primary key, name of memebr/object/array 35 | // first.second - secondary key, the int value of parent object 36 | // second.first - string value of member or unic string id for object/array 37 | // second.second - auxiliary field 38 | typedef std::map , std::pair> repo_t; 39 | 40 | class Repo : public std::shared_ptr 41 | { 42 | private: 43 | repo_t::const_iterator curitr; 44 | void SetRoot() { curitr = (*this)->find(make_pair("-1", -1)); } 45 | Repo(const Repo& sp, repo_t::const_iterator f) : std::shared_ptr(sp), curitr(f) {}; 46 | Repo(repo_t* r) : std::shared_ptr(r) 47 | { curitr = (*this)->find(make_pair("-1", -1)); }; 48 | 49 | public: 50 | Repo() : std::shared_ptr( new repo_t() ), curitr(this->get()->begin()) 51 | {}; 52 | Repo(const Repo& sp) : std::shared_ptr(sp), curitr(sp.curitr) 53 | {} 54 | bool IsError() const 55 | { return curitr == (*this)->end(); } 56 | std::string operator*() const 57 | { return curitr == (*this)->end()? "ERROR" : curitr->second.first; } 58 | operator std::string() const 59 | { return curitr == (*this)->end()? "ERROR" : curitr->second.first; } 60 | std::string ToString() 61 | { return curitr == (*this)->end()? "ERROR" : 62 | curitr->second.first[0] != '\"' ? curitr->second.first : 63 | string(curitr->second.first.c_str() + 1, curitr->second.first.size() - 2); } 64 | double ToDouble() 65 | { return curitr == (*this)->end()? 0 : stod(curitr->second.first); } 66 | int Size() 67 | { return curitr == (*this)->end()? 0 : curitr->second.second; } 68 | Repo operator() (const char* key) 69 | { return Repo( *this, curitr == (*this)->end()? (*this)->end() : 70 | (*this)->find(make_pair(key, std::stoi(curitr->second.first)))); } 71 | Repo operator[](size_t i) 72 | { return Repo( *this, curitr == (*this)->end()? (*this)->end() : 73 | (*this)->find(make_pair(to_string(i), std::stoi(curitr->second.first)))); } 74 | Repo operator()(const char* key, size_t i) 75 | { return (this->operator()(key)).operator[](i); } 76 | void dumptree(std::ostream& out = std::cout) 77 | { out << std::endl << "\\" << std::endl; 78 | string str0(" "); 79 | if (curitr != this->get()->end()) _dumptree(*curitr, str0, out); } 80 | static Repo ParseJSON(const char* text, int* status, const char** pstop = 0) 81 | { Rule root, element; 82 | JSONGramma(root, element); 83 | gramma_callback(eStart, 0, 0); 84 | int tst = Analyze(root, text, pstop); 85 | if (status) *status = tst; 86 | return tst >= 0 ? Repo(gramma_callback(eGetRepo, 0, 0)) : Repo(); } 87 | 88 | protected: 89 | void _dumptree(const std::pair, std::pair>& lroot, string& str, std::ostream& out ) 90 | { 91 | size_t len = str.size(); 92 | int lvl = std::stoi(lroot.second.first); // next level 93 | int cnt = std::abs(lroot.second.second); 94 | for (const std::pair, std::pair>& obj: *this->get()) { 95 | if (lvl != obj.first.second) continue; 96 | str += --cnt? "\xC3\xC4\xC4" : "\xC0\xC4\xC4"; 97 | str += obj.first.first; 98 | if( obj.second.second == 0 ) { /* subobjects? array? */ 99 | str += " = "; 100 | str += obj.second.first; 101 | out << str << std::endl; 102 | str.resize(len); 103 | } else { 104 | out << str << std::endl; 105 | str.resize(len); 106 | str += cnt ? "\xB3 " : " "; 107 | _dumptree(obj, str, out); 108 | str.resize(len); 109 | } 110 | } 111 | } 112 | 113 | enum ParserEvent { eStart, eSaveVariable, ePutPalint, ePushKey, ePopKey, eGetRepo }; 114 | static bool SetLastMember(const char* lexem, size_t len) 115 | { gramma_callback(eSaveVariable, lexem, len); return 1; } 116 | static bool PopKey(const char* lexem, size_t len) 117 | { gramma_callback(ePopKey, lexem, len); return 1; } 118 | static bool PutPlain(const char* lexem, size_t len) 119 | { gramma_callback(ePutPalint, lexem, len); return 1; } 120 | static bool PushKey(const char* lexem, size_t len) 121 | { gramma_callback(ePushKey, lexem, len); return 1; } 122 | 123 | static repo_t* gramma_callback(enum ParserEvent event, const char* lexem, size_t len) 124 | { 125 | static int num = 0; 126 | static string last_member; 127 | static stack level; 128 | static repo_t* repo; 129 | 130 | switch (event) { 131 | case eStart: 132 | repo = new repo_t; 133 | num = 0; 134 | last_member.erase(); 135 | while (!level.empty()) { 136 | level.pop(); 137 | } 138 | break; 139 | case eSaveVariable: 140 | if (lexem[0] =='\"' && lexem[len - 1] =='\"') last_member.assign(lexem + 1, len - 2); 141 | else last_member.assign(lexem, len); 142 | break; 143 | case ePutPalint: 144 | if (last_member.empty()) { 145 | last_member = to_string(level.top()->second.second++); 146 | } else { 147 | level.top()->second.second--; 148 | } 149 | repo->insert(make_pair(make_pair(last_member, 150 | stoi(level.top()->second.first)), make_pair(string(lexem, len), 0))); 151 | last_member.erase(); 152 | break; 153 | case ePushKey: 154 | if (last_member.empty()) { 155 | last_member = level.empty()? "-1" : to_string(level.top()->second.second++); 156 | } else if (!level.empty()){ 157 | level.top()->second.second--; 158 | } 159 | level.push(repo->insert(make_pair( 160 | make_pair(last_member.empty()? string(1, *lexem) : last_member, 161 | level.empty()? -1 : stoi(level.top()->second.first)), 162 | make_pair(to_string(num++), 0))).first); 163 | last_member.erase(); 164 | break; 165 | case ePopKey: 166 | level.pop(); 167 | break; 168 | case eGetRepo: 169 | return repo; 170 | } 171 | return 0; 172 | } 173 | static void JSONGramma(Rule& root, Rule& element) 174 | { 175 | Lexem ws = *Token("\x20\x0A\x0D\x09"); // will be not used due to standard pre-parsing in this implementation 176 | Lexem sign = !Token("+-"); 177 | Token onenine('1', '9'); 178 | Lexem digit = "0" | onenine; 179 | Lexem digits = *digit; 180 | 181 | Lexem integer = digit | (onenine + digits) | ("-" + digit) | ("-" + onenine + digits); 182 | Lexem fraction = !("." + digits); 183 | Lexem exponent = !("Ee" + sign + digits); 184 | Lexem number = integer + fraction + exponent; 185 | 186 | Lexem hex = digit | Token('A', 'F') | Token('a', 'f'); 187 | Lexem escape = Token("\"\\/bfnrt") | ("u" + 4 * hex); 188 | 189 | Token any(0x20, 255); 190 | any.Remove("\"\\"); 191 | Lexem character = any | ("\\" + escape); 192 | Lexem characters = *character; 193 | Lexem string = "\"" + characters + "\""; 194 | 195 | Rule elements = element + *("," + element); 196 | Rule member = string + SetLastMember + ":" + element; 197 | Rule members = member + *("," + member); 198 | Rule array = Token("[") + PushKey + !elements + "]" + PopKey; 199 | Rule object = Token("{") + PushKey + !members + "}" + PopKey; 200 | Rule plain = string | number | Lexem("true") | Lexem("false") | Lexem("null"); 201 | Rule value = object | array | (plain + PutPlain); 202 | 203 | element = value; 204 | root = element; 205 | } 206 | }; 207 | #endif // JSONLITE_H 208 | -------------------------------------------------------------------------------- /getting-started.md: -------------------------------------------------------------------------------- 1 | ## BNF by Domain Specific Language in C++ Form 2 | 3 | BackusNaur Form (BNF) is a notation for the formal description of computer languages. 4 | Simply stated, it is a syntax for describing syntax. 5 | BNF and its deviations(EBNF, ABNF ...) are commonly used to elaborate wide range of programming specifications. 6 | 7 | BNF based on a declarative syntax that allows the user to define language constructions via "production rules". 8 | Each rule except "terminal" is a conjunction of a series of more detailed rules or terminals: 9 | 10 | `production_rule ::= ... | ...;` 11 | 12 | For example: 13 | 14 | ::= <0> | <1> | <2> | <3> | <4> | <5> | <6> | <7> | <8> | <9> 15 | ::= | 16 | 17 | which means that a number is either a single digit, or a single digit followed by another number. 18 | ( the number is just a digit or another number with one more digit ) 19 | 20 | BNFlite implements embedded domain specific language approach for grammar specification 21 | when all "production rules" are constructed as instances of C++ classes concatenated 22 | by means of overloaded arithmetic operators. 23 | 24 | Previous example can be presented in C++ form: 25 | 26 | #include "bnflite.h" // include lib 27 | using namespace bnf; 28 | Lexem Digit = Token("0") | "1" | "2" | "4" | "5" | "6" | "7" | "8" | "9"; 29 | Lexem Number; 30 | Number = Digit | Digit + Number; 31 | 32 | Execution of this C++ code produces an internal date representation for further parsing. 33 | 34 | ## class Token 35 | 36 | For the previous example the terminal symbol "Token" can be specified like this: 37 | 38 | Token Digit("0123456789"); // more compact and optimal than above 39 | Token Digit('0', '9'); // one more compact form 40 | 41 | One more way to create Token: 42 | 43 | Token NotPoint(1,127); NotPoint.Remove('.'); // all chars except point 44 | 45 | ## class Lexem 46 | 47 | Lexical productions are introduced as "Lexem" object: 48 | 49 | Token Letter('A', 'Z'); // Declare as upper-case 50 | Letter.Add('a', 'z'); // append lower-case to token 51 | Lexem LetterOrDigit = Letter | Digit; 52 | Lexem OptionalIdentifier; 53 | OptionalIdentifier = LetterOrDigit + OptionalIdentifier | Null(); 54 | Lexem Identifier = Letter + OptionalIdentifier; 55 | 56 | The C++ '=' operator is used as BNF '::='. 57 | The '+' operator implements sequential concatenation. 58 | The '|' operator implements 'or' relation. The Null() object return positive production with zero length. 59 | By default the parser tests all alternative productions and selects one with longest input. 60 | The "Identifier" can not started with digit. But it can be one letter. 61 | 62 | ## Repeatable Constructions 63 | 64 | Let assume we need to support 32-character identifiers only, so a 33-character identifier should be treated as error. 65 | The above `OptionalIdentifier` recursive repeat construction can not support such functionality. 66 | However it can be replaced by cyclic sequence mechanisms. 67 | BNFlite offers to use the following functions: 68 | 69 | Series(a, token, b); 70 | Iterate(a, lexem, b); 71 | Repeat(a, rule, b); 72 | 73 | where `a` and `b` imply at least `a` and at most `b` occurrences 74 | So we can use: 75 | 76 | Lexem Number = Series(1, Digit); // 1 means at least one digit 77 | Lexem Identifier = Letter + Iterate(0, LetterOrDigit, 32); // 0 - means zero or more at last 32 78 | 79 | Additionally BNFlite supports compact style(ABNF) introducing several overloaded operators to implement repeatable constructions. 80 | 81 | Lexem Number = 1*Digit; // 1 means at least one digit 82 | Lexem Identifier = Letter + *LetterOrDigit; // * - means zero-or-one or more 83 | 84 | 85 | ## class Rule 86 | 87 | "Rule" object is used to define syntax productions: 88 | 89 | Rule Array = Identifier + "[" + Number + "]"; 90 | 91 | Now we can call the parser: 92 | 93 | int tst = Analyze(Identifier, "b0[16]"); 94 | 95 | The parser tries to find right correspondence between "Array" objects and parts of parsed text: 96 | `Identifier + "[" + Number + "]"` <=> `"b0" + "[" + "16" + "]"` 97 | 98 | Practically the parser goes down through composed BNFlite objects achieving "Token" objects. 99 | Each Token can be fail or succeed against current character. 100 | In case of success the upper object continues parsing with next lower object. 101 | Otherwise, the upper object goes to next alternation(|) or fails if no more alternation is left. 102 | 103 | Lets consider another example: 104 | 105 | char* end; 106 | int tst = Analyze(Identifier, "b[16];", &end); 107 | 108 | Parser decomposes text to `"b0" + "[" + "16" + "]"`. 109 | The `";"` character is left because no rule for the last character, so the `Analyze` returns a negative value. 110 | The "end" variable contains the pointer to unrecognized `";"`. 111 | 112 | 113 | ## Lexing and Parsing Phases 114 | 115 | Let assume we need to parse `buf[16]` text as C style array: 116 | We can define it as a lexem: 117 | 118 | Lexem Identifier = Letter + OptionalIdentifier; 119 | Lexem Array = Identifier + "[" + Digit + "]"; 120 | 121 | Or as a production rule: 122 | 123 | Rule Identifier = Letter + OptionalIdentifier; 124 | Rule Array = Identifier + "[" + Digit + "]"; 125 | 126 | The result of `Analyze(Array, "buf0\t[ 16 ]");` depends on `Array` type. 127 | For the "Rule" case the input text is successfully parsed. 128 | For the "Lexem" case any spaces or tabs in the input text is treated as error. 129 | 130 | The "Rule" behavior to ignore some predefined constructions can be changed by the user. 131 | In this case the custom handler `const char* pre_parse(const char* ptr)` should be introduced 132 | to call `Analyze(Array, "buf [ 16 /*17*/ ]", pre_parse);` 133 | 134 | 135 | 136 | ## User's Callbacks 137 | 138 | Intermediate parsing results can be obtain by callbacks. Two kinds of callback are supported. 139 | - Function with prototype `bool fun(const char*, size_t)` can be used as an expression element: 140 | 141 | bool SizeNumber(const char* number_string, size_t length_of_number) 142 | { int number; 143 | std::istringstream iss(std::string(number_string, length_of_number)); 144 | iss >> number; 145 | if (iss.good() && number > 0 && number < MAX_NUMBER) return true; 146 | else { std::cout << "bad size of array:"; std::cout.write(number_string, length_of_number); return false; } 147 | } 148 | //... 149 | Rule Array = Identifier + "[" + Digit + SizeNumber + "]"; 150 | 151 | The user callback has too parameters: the pointer to the latest found element and its length. 152 | In this example `SizeNumber` callback accepts the string of a digit number. 153 | The user callback can do some semantic analyzes and return `1`(true) to continue parsing 154 | or `0`(false) to reject the current rule. 155 | 156 | - Each "Rule" can be bound with the callback to be called in successful case 157 | First of all the user needs to define own working type for his data. This type is used for specialization 158 | of BNFlite `Interface` template class to pass data between Rules. 159 | 160 | typedef Interface Usr; 161 | static Usr SizeNumber(std::vector& usr) // user callback function 162 | { printf("Size of Array : %.*s;\n", usr[2].length, usr[2].text); return usr[2]; } 163 | Bind(Array, SizeNumber); //connection between rule `Array` and user callback 164 | 165 | The callback receives vector of Interface objects from lower Rules 166 | and returns single `Interface` object as a result. 167 | The final root result is in `Analyze` call. 168 | 169 | Usr usr; // results after parsing 170 | int tst = bnf::Analyze(Identifier, "b[16];", usr); 171 | 172 | ## Parameters for `Analize` API Function Set 173 | 174 | - `root` - top Rule for parsing 175 | - `text` - text to be parsed 176 | - `pstop` - the pointer where parser stops (`*pstop != '\0'` - not enough rules or resources) 177 | - `u` - top variable of `Interface` template structure (see the second kind of callbacks) 178 | - `u.text` - pointer to text to be parsed (copy of `text`) 179 | - `u.length` - final length of parsed data to be returned after `Analize` call 180 | - `u.data` - final user data to be returned after `Analize` call 181 | 182 | ### Return Value 183 | 184 | `Analize()` returns a negative value in case of parsing error. 185 | Bit fields of the returned value can provide more information about parser behavior. 186 | 187 | 188 | ## Optimizations for Parser 189 | 190 | Generally, BNFlite utilizes simple top-down parser with backtracking. 191 | This parser may be not so good for complex grammar. 192 | However, the user has several special rules to make parsing smarter. 193 | 194 | - `Return()` - Choose current production (should be last in conjunction rule) 195 | - `AcceptFirst()` - Choose first appropriate production (should be first in disjunction rule) 196 | - `Skip()` - Accept result but not production itself (can not be first) 197 | 198 | In some cases less optimal `MemRule()` can be used to remember unsuccessful parsing to reduce known overhead 199 | 200 | 201 | ## Debugging of BNFLite Grammar 202 | 203 | Writing grammar by EDSL is easier if the user has some understanding the parser behavior. 204 | If the `Analyze` call returns an error the user always should take into consideration 205 | both possibilities: 206 | - syntax errors in the input text (incorrect text) 207 | - grammar bugs (incorrect rules). 208 | The BNFLite provides several mechanisms to minimize debugging and testing overheads 209 | 210 | ### Return Codes 211 | 212 | Return code from `Analyze` call can contain flags related to the gramma. 213 | - `eBadRule`, `eBadLexem` - the rule tree is not properly built 214 | - `eEof` - "unexpected end of file" for most cases it is OK, just not enough text for applied rules 215 | - `eSyntax` - syntax error (controlled by the user) 216 | - `eOver` - too much data for cycle rules 217 | - `eRest` - not all text has been parsed 218 | - `eNull` - no result 219 | 220 | 221 | ### Names and Breakpoints 222 | 223 | The user can assign the internal name to the `Rule` object by `setName()`. 224 | It can help to track recursive descent parser looking `Rule::_parse` function calls. 225 | Debugger stack (history of function calls) can inform which Rule was applied and when. 226 | The user just needs to watch the `this->name` variable. It is not as difficult as it seems at first glance. 227 | 228 | ### Debugging of Complex Gramma 229 | 230 | The user can divide complex gramma for several parts to develop them independently. 231 | `Analyze` function can be applied as unit test to any `Rule` representing subset of the gramma. 232 | 233 | ### Tracing 234 | 235 | The first kind of callback or function with prototype `bool foo(const char* lexem, size_t len)` 236 | can be used in BNFLite rules to obtain the temporary result. 237 | 238 | This function will print the parsed number: 239 | 240 | static bool DebugNumber(const char* lexem, size_t len) 241 | { printf("The number is: %.*s;\n", len, lexem); return true; } 242 | /* … */ 243 | Token digit1_9('1', '9'); 244 | Token DIGIT("0123456789"); 245 | Lexem I_DIGIT = 1*DIGIT; // Series(1, DIGIT); 246 | Lexem exp_ = "Ee" + !Token("+-") + I_DIGIT ; 247 | Lexem frac_ = "." + I_DIGIT; 248 | Lexem int_ = "0" | digit1_9 + *DIGIT; //Series(0, DIGIT); 249 | Lexem num_ = !Token("-") + int_ + !frac_ + !exp_; 250 | Rule number = num_ + DebugNumber; 251 | 252 | The function need to return `true` because result is correct. 253 | 254 | ### Catching Warning 255 | 256 | The first kind of callback can be used in BNFLite rules to inform about incorrect situations. 257 | Let's assume numbers with leading `0` can be performed bat they are not wanted. 258 | 259 | static bool Check0Number(const char* lexem, size_t len) 260 | { printf("Warning: the number %.*s with leading zero found\n", len, lexem); return false;} 261 | /* … */ 262 | Lexem num_0 = "0" + digit1_9 + *DIGIT; // special rule for numbers with one leading zero 263 | Rule number = num_ + DebugNumber | num_0 + Check0Number; 264 | 265 | The function still should return `true` to allow the parser to perform such numbers. 266 | C++11 constructions like below are also possible: 267 | 268 | Rule number = num_ | num_0 + [](const char* lexem, size_t len) 269 | { return !!printf("Warning: the number %.*s with leading zero found\n", len, lexem); } 270 | 271 | ### Catching Errors 272 | 273 | In some cases we need to force parser to stop correctly because input text is erroneous. 274 | Let's assume the numbers with leading of several `0` should be treated as error. 275 | 276 | static int Error00Number(const char* lexem, size_t len) 277 | { printf("Error with leading zeros: %.*s\n", len, lexem); return false} 278 | /* … */ 279 | Lexem num_00 = Series(2, "0") + digit1_9 + *DIGIT; // special rule for numbers with two or more leading zeros 280 | Rule number = num_ + DebugNumber | num_00 + Error00Number + Catch() | num_0 + Check0Number; 281 | 282 | The function should return `false` because text is not applied. 283 | The `Catch()` special library statement forces the parser to stop and return `eSyntax` error flag. 284 | 285 | Note: 286 | - The order `num_ + num_00 + num_0` is important because `num_0` is subset of `num_00` 287 | 288 | 289 | ### Try for Syntax Errors 290 | 291 | Below is the extended example for hex-decimal digit: 292 | 293 | Token hex_digit("abcdefABCDEF", DIGIT); 294 | Lexem HexDigits = Token("0") + Token("Xx") + Series(1, hex_digit); 295 | 296 | 297 | Let's assume the parser tries to apply the `HexDigits` rule to `0.0` and `OxZ` text elements. 298 | Both elements are not fit, but `0.0` can be appropriate for some another rule of gramma. 299 | But the `OxZ` is syntax error. We definitely know there is no rule for it. 300 | So we should use `Try()` special statement to force the parser to chech the production is ok. 301 | Otherwise, parser stops with`eSyntax` error at the end. 302 | In other words, the `Try()` forces `eSyntax` error if input text is not applied after `Try()` special rule. 303 | 304 | Lexem HexDigits = Token("0") + Token("Xx") + Try() + Series(1, hex_digit); 305 | 306 | In case of `HexDigits` unsuccess the internal `catch_error` handler is called. 307 | And the `Analize` returns `eSyntax` flag. 308 | 309 | 310 | -------------------------------------------------------------------------------- /c_xprs/c_xprs.h: -------------------------------------------------------------------------------- 1 | 2 | /*************************************************************************\ 3 | * This is C expression parser and calculator lib based on BNFlite * 4 | * Copyright (c) 2018 by Anton G. & Alexander S. ALL RIGHTS RESERVED. * 5 | * * 6 | * This program is free software: you can redistribute it and/or modify * 7 | * it under the terms of the GNU General Public License as published by * 8 | * the Free Software Foundation, either version 3 of the License, or * 9 | * (at your option) any later version. * 10 | * * 11 | * This program is distributed in the hope that it will be useful, * 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 14 | * GNU General Public License for more details. * 15 | * * 16 | * You should have received a copy of the GNU General Public License * 17 | * along with this program. If not, see . * 18 | * * 19 | * Recommendations for commercial use: * 20 | * Commercial application should have dedicated LGPL licensed cpp-file * 21 | * which exclusively includes GPL licensed "bnflit.h" file. In fact, * 22 | * the law does not care how this cpp-file file is linked to other * 23 | * binary applications. Just source code of this cpp-file has to be * 24 | * published in accordance with LGPL license. * 25 | \*************************************************************************/ 26 | 27 | 28 | #include "stdio.h" 29 | #include 30 | #include "bnflite.h" 31 | using namespace bnf; 32 | 33 | 34 | /*************************************************************************\ 35 | * Example of the expression tree for "4+3*2-1": * 36 | * '-' * 37 | * / \ * 38 | * '+' 1 * 39 | * / \ * 40 | * 4 '*' * 41 | * / \ * 42 | * 3 2 * 43 | \*************************************************************************/ 44 | struct XprsTree 45 | { 46 | XprsTree(): operation(0), val(0), right(0), left(0) {}; 47 | ~XprsTree() 48 | { if(right) delete right; 49 | if(left) delete left; 50 | } 51 | unsigned int operation; 52 | int val; 53 | XprsTree* right; 54 | XprsTree* left; 55 | }; 56 | 57 | 58 | class C_Xprs // tbd implemented as singleton, to be extended 59 | { 60 | private: 61 | 62 | template class Stack : public std::stack 63 | { public: 64 | T getpop() { 65 | if (this->size() > 0) { 66 | T value = this->top(); 67 | this->pop(); 68 | return value; 69 | } 70 | return 0; 71 | } 72 | }; 73 | 74 | static Stack rootNode; 75 | static int lastNumber; 76 | 77 | 78 | Rule PrimaryXprs; 79 | Rule PostfixXprs; 80 | Rule AdditiveXprs, AdditiveXprs0; 81 | Rule MultiplicativeXprs, MultiplicativeXprs0; 82 | Rule ShiftXprs, ShiftXprs0; 83 | Rule EqualityXprs, EqualityXprs0; 84 | Rule RelationalXprs, RelationalXprs0; 85 | Rule BitwiseAndXprs, BitwiseAndXprs0; 86 | Rule BitwiseOrXprs, BitwiseOrXprs0; 87 | Rule LogicalAndXprs, LogicalAndXprs0; 88 | Rule LogicalOrXprs, LogicalOrXprs0; 89 | Rule ExclusiveOrXprs, ExclusiveOrXprs0; 90 | Rule ConditionalXprs,ConditionalXprs0; 91 | 92 | Rule UnaryXprs; 93 | Rule MainXprs; 94 | 95 | /* 1st kind of callback */ 96 | static bool getHexNumber(const char* lexem, size_t len); 97 | static bool getNumber(const char* lexem, size_t len); 98 | static bool dbgPrint(const char* lexem, size_t len); 99 | static bool printMsg(const char* lexem, size_t len); 100 | static bool syntaxError(const char* lexem, size_t len); 101 | static bool numberAction(const char* lexem, size_t len); 102 | static bool buildBinaryAction(const char* lexem, size_t len); 103 | static bool buildUnaryAction(const char* lexem, size_t len); 104 | static bool unaryAction(const char* lexem, size_t len); 105 | static bool postfixAction(const char* lexem, size_t len); 106 | static bool binaryAction(const char* lexem, size_t len); 107 | static bool ifAction(const char* lexem, size_t len); 108 | static bool thenAction(const char* lexem, size_t len); 109 | static bool elseAction(const char* lexem, size_t len); 110 | 111 | 112 | static int GetOperationPriority(unsigned int op); 113 | static int Calcualte(XprsTree& node); 114 | 115 | bool ParseExpression(const char *expression); 116 | void GrammaInit(); 117 | 118 | public: 119 | bool Evaluate(const char *expression, int& result); 120 | C_Xprs() { GrammaInit(); } 121 | ~C_Xprs(){ MainXprs = Null(); UnaryXprs = Null(); }; 122 | }; 123 | 124 | C_Xprs::Stack C_Xprs::rootNode; 125 | int C_Xprs::lastNumber; 126 | 127 | 128 | 129 | bool C_Xprs::getHexNumber(const char* lexem, size_t len) 130 | { 131 | int i = 0; 132 | lastNumber = 0; 133 | if ( len > 1 && lexem[0] == '0' && (lexem[1] == 'X' || lexem[1] == 'x')) 134 | i += 2; 135 | for (; i < len; i++) { 136 | if (lexem[i] >= '0' && lexem[i] <= '9') { 137 | lastNumber = 16 * lastNumber + (lexem[i] - '0'); 138 | } else if (lexem[i] >= 'A' && lexem[i] <= 'F') { 139 | lastNumber = 16 * lastNumber + (lexem[i] - 'A' + 10 ); 140 | } else if (lexem[i] >= 'a' && lexem[i] <= 'f') { 141 | lastNumber = 16 * lastNumber + (lexem[i] - 'a' + 10 ); 142 | } else break; 143 | } 144 | return true; 145 | } 146 | 147 | bool C_Xprs::getNumber(const char* lexem, size_t len) 148 | { 149 | lastNumber = 0; 150 | for (int i = 0; i < len && lexem[i] >= '0' && lexem[i] <= '9'; i++) { 151 | lastNumber = 10 * lastNumber + (lexem[i] - '0'); 152 | } 153 | return true; 154 | } 155 | 156 | 157 | bool C_Xprs::dbgPrint(const char* lexem, size_t len) 158 | { 159 | printf("dbg lexem: %.*s;\n", len, lexem); 160 | return true; 161 | } 162 | 163 | bool C_Xprs::printMsg(const char* lexem, size_t len) 164 | { 165 | printf("print lexem: %.*s;\n", len, lexem); 166 | return true; 167 | } 168 | 169 | 170 | bool C_Xprs::syntaxError(const char* lexem, size_t len) 171 | { 172 | printf(" Callback: forced syntax error for: %.*s;\n", len, lexem); 173 | return true; 174 | } 175 | 176 | 177 | 178 | bool C_Xprs::numberAction(const char* lexem, size_t len) 179 | { 180 | XprsTree* node = new XprsTree(); 181 | node->val = lastNumber; 182 | rootNode.push(node); 183 | return true; 184 | } 185 | 186 | 187 | bool C_Xprs::buildBinaryAction(const char* lexem, size_t len) 188 | { 189 | int getopprio(unsigned int op); 190 | if (rootNode.size() >= 2) 191 | { 192 | XprsTree* child = rootNode.getpop(); 193 | XprsTree* parent = rootNode.top(); 194 | 195 | if( GetOperationPriority(child->operation) == GetOperationPriority(parent->operation)) { 196 | parent->left = child->right; 197 | child->right = parent; 198 | rootNode.pop(); 199 | rootNode.push(child); 200 | } 201 | else 202 | { parent->left = child; 203 | } 204 | } 205 | return true; 206 | } 207 | 208 | 209 | bool C_Xprs::buildUnaryAction(const char* lexem, size_t len) 210 | { 211 | if (rootNode.size() >= 2) 212 | { 213 | XprsTree* child = rootNode.getpop(); 214 | XprsTree* parent = rootNode.top(); 215 | parent->right = child; 216 | } 217 | return true; 218 | } 219 | 220 | 221 | bool C_Xprs::unaryAction(const char* lexem, size_t len) 222 | { 223 | if (len > 1 && ((lexem[0] == '+' && lexem[1] == '+') || (lexem[0] == '-' && lexem[1] == '-'))) { 224 | return false; // not supported operations 225 | } 226 | XprsTree* node = new XprsTree(); 227 | rootNode.push(node); 228 | 229 | node->operation = lexem[0] << 8 | ' '; 230 | 231 | return true; 232 | } 233 | 234 | 235 | bool C_Xprs::postfixAction(const char* lexem, size_t len) 236 | { 237 | return false; 238 | } 239 | 240 | 241 | bool C_Xprs::binaryAction(const char* lexem, size_t len) 242 | { 243 | XprsTree* node = new XprsTree(); 244 | node->right = rootNode.getpop(); 245 | rootNode.push(node); 246 | node->operation = 0; 247 | for (int i = 0; i < len; i++) { 248 | node->operation = node->operation << 8 | lexem[i]; 249 | } 250 | return true; 251 | } 252 | 253 | bool C_Xprs::ifAction(const char* lexem, size_t len) 254 | { 255 | XprsTree* node = new XprsTree(); 256 | node->right = rootNode.getpop(); 257 | rootNode.push(node); 258 | node->left = new XprsTree(); 259 | rootNode.push(node->left); 260 | node->operation = '?'; 261 | return true; 262 | } 263 | 264 | 265 | bool C_Xprs::thenAction(const char* lexem, size_t len) 266 | { 267 | if (rootNode.size() > 0) { 268 | XprsTree* node = rootNode.getpop(); 269 | rootNode.top()->right = node; 270 | } 271 | return true; 272 | } 273 | 274 | bool C_Xprs::elseAction(const char* lexem, size_t len) 275 | { 276 | if (rootNode.size() > 0) { 277 | XprsTree* node = rootNode.getpop(); 278 | rootNode.getpop()->left = node; 279 | } 280 | return true; 281 | } 282 | 283 | 284 | #define OPBBB(a, b, c) (a << 16 | b << 8 | c) 285 | #define OPBB(a, b) (a << 8 | b) 286 | #define OPB(a) (a) 287 | #define OPU(a) (a << 8 | ' ') 288 | 289 | int C_Xprs::GetOperationPriority(unsigned int op) 290 | { 291 | switch (op) { 292 | case OPBB(':',':'): return 1; 293 | case OPBB('-','>'): case OPU('.'): return 2; 294 | case OPU('+'): case OPU('-'): case OPU('~'): case OPU('!'): return 3; 295 | case OPBB('+','+'): case OPBB('-','-'): case OPU('*'): case OPU('&'): return 3; 296 | case OPBB('.','*'): case OPBBB('.','>','*'): return 4; 297 | case OPB('*'): case OPB('/'): case OPB(':'): return 5; 298 | case OPB('+'): case OPB('-'): return 6; 299 | case OPBB('<','<'): case OPBB('>','>'): return 7; 300 | case OPB('<'): case OPBB('<','='): return 8; 301 | case OPB('>'): case OPBB('>','='): return 8; 302 | case OPBB('=','='): case OPBB('!','='): return 9; 303 | case OPB('&'): return 10; 304 | case OPB('^'): return 11; 305 | case OPB('|'): return 12; 306 | case OPBB('&','&'): return 13; 307 | case OPBB('|','|'): return 14; 308 | case OPB('='): case OPB('?'): return 15; 309 | case OPBB('+','='): case OPBB('-','='): return 15; 310 | case OPBB('*','='): case OPBB('/','='): case OPBB('%','='): return 15; 311 | case OPBBB('<','<','='): case OPBBB('>','>','='): return 15; 312 | case OPBB('&','='): case OPBB('^','='): case OPBB('|','='): return 15; 313 | case OPB(','): return 17; 314 | } 315 | return 0; 316 | } 317 | 318 | int C_Xprs::Calcualte(XprsTree& node) 319 | { 320 | int val1, val2; 321 | 322 | switch (node.operation & 0xFF) { 323 | case '?': val1 = node.right? Calcualte(*node.right) : 0; 324 | if (val1) return node.val = Calcualte(*node.left->right); 325 | else return node.val = Calcualte(*node.left->left); 326 | break; 327 | case ' ': val1 = node.right? Calcualte(*node.right) : 0; 328 | break; 329 | default: val1 = node.right? Calcualte(*node.right) : 0; 330 | val2 = node.left? Calcualte(*node.left) : 0; 331 | break; 332 | } 333 | 334 | switch (node.operation) { 335 | case 0: return node.val; 336 | 337 | case OPU('+'): return node.val = +val1; 338 | case OPU('-'): return node.val = -val1; 339 | case OPU('~'): return node.val = ~val1; 340 | case OPU('!'): return node.val = !val1; 341 | 342 | case OPB('*'): return node.val = val1 * val2; 343 | case OPB('/'): return node.val = val1 / val2; 344 | case OPB('%'): return node.val = val1 % val2; 345 | case OPB('+'): return node.val = val1 + val2; 346 | case OPB('-'): return node.val = val1 - val2; 347 | case OPBB('<','<'): return node.val = val1 << val2; 348 | case OPBB('>','>'): return node.val = val1 >> val2; 349 | case OPB('<'): return node.val = val1 < val2; 350 | case OPBB('<','='): return node.val = val1 <= val2; 351 | case OPB('>'): return node.val = val1 > val2; 352 | case OPBB('>','='): return node.val = val1 >= val2; 353 | case OPBB('=','='): return node.val = val1 == val2; 354 | case OPBB('!','='): return node.val = val1 != val2; 355 | case OPB('&'): return node.val = val1 & val2; 356 | case OPB('^'): return node.val = val1 ^ val2; 357 | case OPB('|'): return node.val = val1 | val2; 358 | case OPBB('&','&'): return node.val = val1 && val2; 359 | case OPBB('|','|'): return node.val = val1 || val2; 360 | default: return 0; 361 | } 362 | } 363 | 364 | void C_Xprs::GrammaInit() 365 | { 366 | 367 | Token Any(1, 255); 368 | Token Digit("0123456789"); 369 | Token HexDigit("abcdefABCDEF",Digit); 370 | Token OctalDigit("01234567"); 371 | Token BinaryDigit("01"); 372 | Token Letter("abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 373 | Token Character("0123456789",Letter); 374 | 375 | Lexem Identifier = Letter + Series(0, Character); 376 | Lexem HexDigits = Lexem("0x",1) + Series(1, HexDigit); 377 | Lexem Digits = Series(1, Digit); 378 | 379 | Rule Number = HexDigits + getHexNumber + Return() | Digits + getNumber; 380 | 381 | 382 | PostfixXprs = Lexem("++") + postfixAction + Return() | 383 | Lexem("--") + postfixAction + Return() | 384 | Null(); 385 | 386 | PrimaryXprs = Number + Try() + numberAction + PostfixXprs + Return() | 387 | "(" + Try() + MainXprs + ")" + Return() | 388 | syntaxError + Catch(); 389 | 390 | UnaryXprs = 391 | Lexem("++") + Try() + unaryAction + PrimaryXprs + buildUnaryAction + Return() | 392 | Lexem("--") + Try() + unaryAction + PrimaryXprs + buildUnaryAction + Return() | 393 | ("-") + Try() + unaryAction + UnaryXprs + buildUnaryAction + Return() | 394 | ("+") + Try() + unaryAction + UnaryXprs + buildUnaryAction + Return() | 395 | ("!") + Try() + unaryAction + UnaryXprs + buildUnaryAction + Return() | 396 | ("~") + Try() + unaryAction + UnaryXprs + buildUnaryAction + Return() | 397 | PrimaryXprs; 398 | 399 | MultiplicativeXprs0 = 400 | ("*") + Try() + binaryAction + MultiplicativeXprs + buildBinaryAction + Return() | 401 | ("/") + Try() + binaryAction + MultiplicativeXprs + buildBinaryAction + Return() | 402 | ("%") + Try() + binaryAction + MultiplicativeXprs + buildBinaryAction; 403 | MultiplicativeXprs = UnaryXprs + Repeat(0, MultiplicativeXprs0); 404 | 405 | AdditiveXprs0 = 406 | ("+") + Try() + binaryAction + AdditiveXprs + buildBinaryAction + Return() | 407 | ("-") + Try() + binaryAction + AdditiveXprs + buildBinaryAction; 408 | AdditiveXprs = MultiplicativeXprs + Repeat(0, AdditiveXprs0); 409 | 410 | ShiftXprs0 = 411 | Lexem("<<") + Try() + binaryAction + AdditiveXprs + buildBinaryAction + Return() | 412 | Lexem(">>") + Try() + binaryAction + AdditiveXprs + buildBinaryAction; 413 | ShiftXprs = AdditiveXprs + Repeat(0, ShiftXprs0); 414 | 415 | RelationalXprs0 = 416 | Lexem(">") + Try() + binaryAction + ShiftXprs + buildBinaryAction + Return() | 417 | Lexem("<") + Try() + binaryAction + ShiftXprs + buildBinaryAction + Return() | 418 | Lexem(">=") + Try() + binaryAction + ShiftXprs + buildBinaryAction + Return() | 419 | Lexem("<=") + Try() + binaryAction + ShiftXprs + buildBinaryAction; 420 | RelationalXprs = ShiftXprs + Repeat(0, RelationalXprs0); 421 | 422 | EqualityXprs0 = 423 | Lexem("==") + Try() + binaryAction + EqualityXprs + buildBinaryAction + Return() | 424 | Lexem("!=") + Try() + binaryAction + EqualityXprs + buildBinaryAction; 425 | EqualityXprs = RelationalXprs + Repeat(0, EqualityXprs0); 426 | 427 | Lexem NotLogicalAnd = Token("&") + Token("&").Invert(); 428 | BitwiseAndXprs0 = Skip() + NotLogicalAnd + Token("&") + Try() + binaryAction + EqualityXprs + buildBinaryAction; 429 | BitwiseAndXprs = EqualityXprs + Repeat(0, BitwiseAndXprs0); 430 | 431 | ExclusiveOrXprs0 = "^" + Try() + binaryAction + BitwiseAndXprs + buildBinaryAction; 432 | ExclusiveOrXprs = BitwiseAndXprs + Repeat(0, ExclusiveOrXprs0); 433 | 434 | Lexem NotLogicalOr = "|" + Token("|").Invert(); 435 | BitwiseOrXprs0 = Skip() + NotLogicalOr + "|" + Try() + binaryAction + ExclusiveOrXprs + buildBinaryAction; 436 | BitwiseOrXprs = ExclusiveOrXprs + Repeat(0, BitwiseOrXprs0); 437 | 438 | /* Note Token("&") + Token("&") and Lexem("&&") constructions are the same 439 | from the language notation point of view, but callbuck bachavior is different */ 440 | LogicalAndXprs0 = Lexem("&&") + Try() + binaryAction + BitwiseOrXprs + buildBinaryAction; 441 | LogicalAndXprs = BitwiseOrXprs + Repeat(0, LogicalAndXprs0); 442 | 443 | LogicalOrXprs0 = Lexem("||") + Try() + binaryAction + LogicalAndXprs + buildBinaryAction; 444 | LogicalOrXprs = LogicalAndXprs + Repeat(0, LogicalOrXprs0); 445 | 446 | ConditionalXprs0 = "?" + Try() + ifAction + MainXprs + 447 | ":" + thenAction + ConditionalXprs + 448 | elseAction + Return() | 449 | Null(); 450 | ConditionalXprs = LogicalOrXprs + ConditionalXprs0; 451 | 452 | MainXprs = ConditionalXprs; 453 | 454 | } 455 | 456 | 457 | int catch_syntax_error(const char* ptr) 458 | { 459 | printf("caught syntax error for: %.80s;\n", ptr); 460 | return eSyntax; 461 | } 462 | 463 | 464 | bool C_Xprs::ParseExpression(const char *expression) 465 | { 466 | const char *last = 0; 467 | int tst = Analyze(MainXprs, expression, &last); 468 | if (tst < 0) { 469 | std::cout << " Analize: expression not OK, " << "Err = {" << std::hex 470 | << (tst&eOk?"eOk":"eErr") 471 | << (tst&eRest?", eRest":"") 472 | << (tst&eOver?", eOver":"") 473 | << (tst&eEof?", eEof":"") 474 | << (tst&eBadRule?", eBadRule":"") 475 | << (tst&eBadLexem?", eBadLexem":"") 476 | << (tst&eSyntax?", eSyntax":"") 477 | << (tst&eError?", eError":"") 478 | << "}, stopped at = {" 479 | << last << "}" 480 | << std::endl; 481 | return false; 482 | } 483 | return true; 484 | } 485 | 486 | bool C_Xprs::Evaluate(const char *expression, int& result) 487 | { 488 | bool ok = 0; 489 | if (ParseExpression(expression) && rootNode.size() == 1 ) { 490 | result = Calcualte(*rootNode.top()); 491 | ok = 1; 492 | } 493 | while(!rootNode.empty()) { 494 | XprsTree* node = rootNode.getpop(); 495 | delete node; 496 | } 497 | return ok; 498 | } 499 | -------------------------------------------------------------------------------- /formula_compiler/Formula Compiler using C++ BNF-like EDSL.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Article Source 5 | 6 | 7 | 8 | 9 | 15 | 16 | 17 | 18 |

Formula Compiler using C++ BNF-like EDSL

19 | 20 |
21 |
22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |

Preface

31 | 32 |

Is there an area where a new programming language developed from scratch can be applied? For the first glance, there are no requirements which cannot be covered by the existing languages. However, such areas exist, e.g. systems where parallel calculation is required. It is remarkable the computer society has been looking a language to support grammar parsers itself! BNF (Backus Naur Form) notation and it’s derivatives (ABND, EBNF, etc.) are widely used for formal specifications of programming languages.

33 | 34 |

Although such specifications are really good for both requirements and design they do not help for parser programming. Another approach is to generate code for parsers. The most famous tool is yacc (Yet Another Compiler Compiler) the precompile routine which can generate C parser code from syntactic definition in BNF-like form. This solution is considered as heavy and the software community has been looking for something easier.

35 | 36 |

For the C++ the approach is obvious: a lite solution should be presented as embedded domain specific language (EDSL). It means BNF-like statements have to be mapped to both C++ keywords and overloaded operators. One known example is boost::spirit template library. This paper introduces the BNFLite C++ template library and demonstrates how to create the simplest byte-code formula compiler using this tool.

37 | 38 |

Introduction to BNFLite

39 | 40 |

The BNFLite implements embedded domain specific language approach for gramma specifications when all "production rules" are constructed as instances of C++ classes concatenated by means of overloaded arithmetic operators. For example, the integer number in BNF specs can be described as:

41 | 42 |
<digit> ::= <0> | <1> | <2> | <3> | <4> | <5> | <6> | <7> | <8> | <9>
 43 | <number> ::= <digit> | <digit> <number>
 44 | 
45 | 46 |

This representation can be written on C++ using BNFlite approach:  

47 | 48 |
#include "bnflite.h" // include source code lib
 49 | using namespace bnf;
 50 | Lexem Digit = Token("0") | "1"  | "2" | "4" | "5" | "6" | "7" | "8" | "9";
 51 | Lexem Number;
 52 | Number = Digit | Digit + Number;
 53 | 
54 | 55 |

Execution of this C++ code produces an internal date structures for further parsing procedures.

56 | 57 |

 

58 | 59 |

Requirements for Formula Compiler

60 | 61 |

Let specify base requirements to the language of the formula compiler to be developed

62 | 63 |
    64 |
  1. Byte-code compiler shall be founded on expression like language: 65 |
      66 |
    1. The following base types shall be supported: <INTEGER> | <FLOAT> | <STRING>
    2. The compiler shall support unary negative operation and basic binary arithmetic operations
    3. The compiler shall be able to generate code to call C++ functions defined in body program
    67 |
68 | 69 |

This is enough for design of simple programming language, which can be described in Backus-Naur Form.

70 | 71 |

Formal BNF term is called "production rule". Each rule except "terminal" is a conjunction of a series of more concrete rules or terminals: production_rule ::= <rule_1>...<rule_n> | <rule_n_1>...<rule_m>. So, the designed expression-like language can be presented by the following terms:

72 | 73 |
<operand> ::= <INTEGER> | <FLOAT> | <STRING>
 74 | <operator> ::= "+" | "-" | "*" | "/"
 75 | <arguments> ::= <EMPTY> | <expression> | < arguments> "," <expression>
 76 | <function> ::= <IDENTIFIER> "(" < arguments > ")"
 77 | <expression> ::= <operand> | <function> | "-" <expression> | <expression> <operator> <expression> | "(" <expression> ")"
78 | 79 |

Such decryption is formally correct but it has two known issues. Firstly, binary operator precedence is not presented. Secondly, it contains redundant number of recursions which can be substituted by repetitions.

80 | 81 |

 Augmented BNF specifications introduce constructions like <a>*<b><element> to support repetition where <a> and <b> imply at least <a> and at most <b> occurrences of the element. For example, 3*3<element> allows exactly three and 1*2<element> allows one or two. *<element> simplified construction allows any number (from 0 to infinity). Alternatively 1*<element> enclose an optional element (at least one) and it can be optionally presented as [element]

82 | 83 |

Full specifications of the formula compiler language in ABNF form are not so complex:

84 | 85 |
ALPHA  =  'A'-'Z' | 'a'-'z'
 86 | DIGIT  =  '0'-'9'
 87 | digit1_9 =  '1'-'9'
 88 | string  = '\"' *( ALPHA  | DIGIT | '_' ) '\"'
 89 | e = 'e' | 'E'            
 90 | exp = e ['-' | '+'] 1*DIGIT
 91 | frac = '.' 1*DIGIT
 92 | int = '0' | ( digit1_9 *DIGIT )
 93 | number = [ '-' ] int [ frac ] [ exp ]
 94 | operand = int | number | string
 95 | operator = '+' | '-' | '*' | '/'
 96 | identifier =  (ALPHA | '_') *( ALPHA  | DIGIT | '_' )
 97 | arguments ::=  expression *( ',' expression )
 98 | function ::= identifier '(' [arguments]  ')'
 99 | elementary ::= operand | function | ('-' elementary) | ('(' expression ')')
100 | primary = elementary *(('/' elementary) | ('*' elementary))
101 | expression = primary *(('-' primary) | ('+' primary))
102 | 103 |

These specs are more computer friendly then previous ones. 

104 | 105 |

Short Design Notes

106 | 107 |

The simple byte-code compiler is a minor extension of the expression calculator. For example, the expression 2 + 3 * 4 can be converted to tree.

108 | 109 |
└──+
110 |    ├── 2
111 |    └──*
112 |       ├── 3
113 |       └── 4
114 | 115 | 116 |
117 | 118 |

It can be written on C manned “add(2, mul(3, 4))” form.  Let write it reversely: ” (2,(3,4)mul)add”. This form is called as revers polish notation(RPN) and byte-codes can be easily generated:

119 | 120 |
Int(2) Int(3), Int(4), Mul<I,I>, Addl<I,I>
121 | 122 |

Int(number) operation pushes the number to the stack. Mul/Add operation pops two parameters from the stack and pushes the result.

123 | 124 |

Practical benefit of the formula compiler comes only when functions are used. Let consider “liner map” 2 + 3 *GetX(). The byte-code will be:

125 | 126 |
Int(2) Int(3), Call<0>, Mul<I,I>, Addl<I,I>
127 | 128 |

For example, this functionally can be cyclically applied to X database column to obtain Y column (Moreover, the provided demo does four calculations simultaneously but it is rather the scope of another paper).

129 | 130 |

All byte-codes consist of two fields: type and union for integer, float and pointer to string.

131 | 132 |
struct byte_code
133 | {
134 |     OpCode type;
135 |     union {
136 |         int val_i;
137 |         float val_f;
138 |         const char* val_s;
139 |     };
140 |     byte_code(): type(opNop), val_i(0) {};
141 |     /* … */
142 | 143 |

The union represents the operand for the simple “immediate addressing mode”. The type is the operation code based upon the following enumerator:

144 | 145 |
enum OpCode
146 | {
147 |     opFatal = -1, opNop = 0,
148 |     opInt = 1,  opFloat = 2,  opStr = 3,  opMaskType = 0x03,
149 |     opError = 1, opNeg = 2, opPos = 3, opCall = 4,
150 |     opToInt = 5,  opToFloat = 6, opToStr = 7,
151 |     opAdd = 2,  opSub = 3,  opMul = 4,  opDiv = 5,
152 | };
153 | 154 |

The real operation code is a little more complex. It contains types for stack operands.  

155 | 156 |

BNFLite Grammar classes (Token, Lexem and Rule)

157 | 158 |

The BNFLite description is similar to EBNF specifications above.

159 | 160 |
Number:
161 | 162 |
Token digit1_9('1', '9');
163 | Token DIGIT("0123456789");
164 | Lexem i_digit = 1*DIGIT;
165 | Lexem frac_ = "." + i_digit;
166 | Lexem int_ = "0" | digit1_9  + *DIGIT;
167 | Lexem exp_ = "Ee" + !Token("+-") + i_digit;
168 | Lexem number_ = !Token("-") + int_ + !frac_ + !exp_;
169 | Rule number = number_;
170 | 171 |

The Token class represents terminals, which are symbols. The Lexem constructs strings of symbols. Parsing is a process of analyzing tokens and lexemes of the input stream. The unary C operator `*` means the construction can be repeated from 0 to infinity. The binary C operator `*` means the construction like 1*DIGIT can be repeated from 1 to infinity. The unary C operator `!` means the construction is optional.

172 | 173 |

Practically, Rule is similar to Lexem except for callbacks and spaces sensitivity. The construction Rule number_ = !Token("-") + int_ + !frac_ + !exp_; allows incorrect spaces e.g. between integer and fractional parts.

174 | 175 |
Strings and identifiers:
176 | 177 |
Token az_("_"); az_.Add('A', 'Z'); az_.Add('a', 'z');
178 | Token az01_(az_); az01_.Add('0', '9');
179 | Token all(1,255); all.Remove("\"");
180 | Lexem identifier = az_  + *(az01_);
181 | Lexem quotedstring = "\"" + *all + "\"";
182 | 
183 | 184 |

The Token all represents all symbols except quote;

185 | 186 |
Major parsing rules:
187 | 188 |
Rule expression;
189 | Rule unary;
190 | Rule function = identifier + "(" + !(expression + *("," + expression)) +  ")";
191 | Rule elementary = AcceptFirst()
192 |         | "(" + expression + ")"
193 |         | function
194 |         | number
195 |         | quotedstring + printErr
196 |         | unary;
197 | unary = Token("-") + elementary;
198 | Rule primary = elementary + *("*%/" + elementary);
199 | /* Rule */ expression = primary + *("+-" + primary);
200 | 
201 | 202 |

Expression and unary rules are recursive. They need to be declare earlier. The AcceptFirst() statement changes parser behavior from the default mode to “accept best” for this particular Rule. After that the parser chooses a first appropriate construction instead of the most appropriate one.

203 | 204 |

Parser Calls and Callbacks

205 | 206 |

Recursive descent parser implies tree composition. The user needs a means to track the tree traversal recursion. First of all, the interface object structure to interact with the parser should be specified.

207 | 208 |
typedef Interface< std::list<byte_code> > Gen;
209 | /* ... */
210 | const char* tail = 0;
211 | Gen result;
212 | int tst = Analyze(expression, expr.c_str(), &tail, result);
213 | 214 |

Parsing starts by the Analyze call with the following arguments:

215 | 216 |
    217 |
  • expression – root Rule object of BNFlite gramma
  • expr.c_str() – string of the expression to be compiled to byte-code
  • tail – pointer to end of text in case of successful parsing or to place where parsing is stopped due to error
  • result – final object from users callbacks
  • tst – contains result of parsing implemented as bitwise flags, error if negative
218 | 219 |

The Callback function accepts vector of byte-code lists formed early. It returns the list of new formed byte-codes. For example, Gen DoNumber(std::vector<Gen>& res) accepts the one element vector representing a number. It has to return the result Gen object with filled user's data (either Int<> or Float<>).

220 | 221 |

In common case the callback Gen DoBinary(std::vector<Gen>& res) accepts  the left byte-code vector, the sign of operation(+**/), and the right byte-code vector. The callback just joints left and right byte-code vectors and generate the tail byte-code according to the sign.

222 | 223 |

Comparison with Boost::Spirit Implementation 

224 | 225 |

This is a grammar part implemented by means of boost::spirit template library:

226 | 227 |
namespace q = boost::spirit::qi;
228 | 
229 | typedef boost::variant<
230 |     int, float, std::string,
231 |     boost::recursive_wrapper<struct binary_node>,
232 |     boost::recursive_wrapper<struct unary_node>,
233 |     boost::recursive_wrapper<struct call_node>
234 | > branch_t;
235 | 
236 | template <typename Iterator>
237 | struct SParser: q::grammar<Iterator, branch_t, q::space_type>
238 | {
239 |     q::rule<Iterator, branch_t, q::space_type> expression, primary, elementary, unary;
240 |     q::rule<Iterator, std::string()> quotedstring;
241 |     q::rule<Iterator, std::string()> identifier;
242 |     q::rule<Iterator, std::vector<branch_t>, q::space_type> arglist;
243 |     q::rule<Iterator, call_node, q::space_type> function;
244 | 
245 |     SParser() : SParser::base_type(expression)
246 |     {
247 |     using boost::phoenix::construct;
248 |         
249 |     expression = primary[q::_val = q::_1]
250 |         >> *('+'  > primary[q::_val = construct<binary_node>(q::_val, q::_1, '+')]
251 |             | '-' > primary[q::_val = construct<binary_node>(q::_val, q::_1, '-')] );
252 |     primary =   elementary[q::_val = q::_1]
253 |         >> *('*'  > elementary[q::_val = construct<binary_node>(q::_val, q::_1, '*')]
254 |             | '/' > elementary[q::_val = construct<binary_node>(q::_val, q::_1, '/')] );
255 |     unary = '-' > elementary[q::_val = construct<unary_node>(q::_1, '-')]; 
256 | 
257 |     elementary = q::real_parser<float, q::strict_real_policies<float>>()[q::_val = q::_1]
258 |         | q::int_[q::_val = q::_1]
259 |         | quotedstring[q::_val = q::_1]
260 |         | ('(' > expression > ')')[q::_val = q::_1]
261 |         | function[q::_val = q::_1]
262 |         | unary[q::_val = q::_1];
263 | 
264 |     quotedstring = '"' > *(q::char_ - '"') > '"';
265 |     identifier = (q::alpha | q::char_('_')) >> *(q::alnum | q::char_('_'));
266 | 
267 |     arglist = '(' > (expression % ',')[q::_val = q::_1] > ')';
268 |     function = (identifier > arglist)[q::_val = construct<call_node>(q::_1, q::_2)];
269 | 
270 |     on_error(expression, std::cout << boost::phoenix::val("Error "));
271 |     }
272 | };
273 | 274 |
Please, refer to boost::spirit documentation for details. The example is provided for demonstration similarity of boost::spirit gramma to BNFlite one.
275 | 276 |

Boost::spirit is more mature tool and does not use callbacks. For each “rule” it calls constructors of classes which should be member of boost::variant. Therefore, boost::spirit cannot be used for one-pass compilers. However, boost::variant can be effective if something more complex is required (e.g. byte-code optimizers).

277 | 278 |

Problems of boost::spirit lie in another area. Any inclusion of boost::spirit considerably increases project compiler time. Another significant drawback of boost::spirit is related to run-time debugging of the gramma - it is not possible at all! 

279 | 280 |

Debugging of BNFLite Gramma

281 | 282 |

Writing gramma by EDSL is unusual and the user does not have full understanding about the parser. If the Analyze call returns an error for the correct text then the user always should take into consideration the possibility of grammar bugs.  

283 | 284 |
Return code
285 | 286 |

Return code from Analyze call can contain flags related to the gramma. For example, eBadRule, eBadLexem flags mean the tree of rules is not properly built.

287 | 288 |
Names and breakpoints
289 | 290 |

The user can assign a name to the Rule. It can help to track recursive descent parser using Rule::_parse function. Debugger stack (history of function calls) can inform which Rule was applied and when. The user just needs to watch the this.name variable. It is not as difficult as it seems at first glance.

291 | 292 |
 Gramma subsets
293 | 294 |

Analyze function can be applied as unit test to any Rule representing subset of gramma.

295 | 296 |
Tracing
297 | 298 |

Function with prototype “bool foo(const char* lexem, size_t len)” can be used in BNFLite expressions for both reasons: to obtain temporary results and to inform about predicted errors.

299 | 300 |

This function will print the parsed number

301 | 302 |
static bool DebugNumber(const char* lexem, size_t len)
303 | {    printf("The number is: %.*s;\n", len, lexem);    return true; }
304 |     /* … */
305 | Rule number = number_ + DebugNumber;
306 | 307 |

The function should return true if result is correct.

308 | 309 |

Let assume the numbers with leading ‘+’ are not allowed

310 | 311 |
static bool ErrorPlusNumber(const char* lexem, size_t len)
312 | {printf("The number %.*s with plus is not allowed\n", len, lexem); return false;}
313 |     /* … */
314 | Lexem number_p = !Token("+") + int_ + !frac_ + !exp_ + ErrorPlusNumber;
315 | Rule number = number_ | number_p;
316 | 317 |

The function should return false to pass the incorrect result to the parser. C++11 constructions like below are also possible:

318 | 319 |
Rule number = number_ | [](const char* lexem, size_t len)
320 | { return !printf("The number %.*s with plus is not allowed\n", len, lexem); }
321 | 322 |

Building and Run

323 | 324 |

The attached simplest formula compiler package has the following C++ files:

325 | 326 |
    327 |
  1. main.cpp - starter of byte-code compiler and interpreter
  2. parser.cpp - BNFLite parser with grammar section and callbacks
  3. code_gen.cpp - byte-code generator
  4. code_lib.cpp - several examples of built-in functions (e.g POW(2,3) - power: 2*2*2)
  5. code_run.cpp - byte-code interpreter (used SSE2 for parallel calculation of 4 formulas)
328 | 329 |

The package has several prebuilt projects but generally it can build as:

330 | 331 |
>$ g++ -O2 -march=pentium4 -std=c++14 -I.. code_gen.cpp  parser.cpp  code_lib.cpp  main.cpp code_run.cpp
332 | 333 |

Function GetX() returns four values: 0, 1, 2, 3. It can be used in expression to be compiled and calculated:

334 | 335 |
> $ a.exe "2 + 3 *GetX()"  
336 | 5 byte-codes in: 2+3*GetX()
337 | Byte-code: Int(2),Int(3),opCall<I>,opMul<I,I>,opAdd<I,I>;
338 | result = 2, 5, 8, 11;
339 | 340 |

Conclusion

341 | 342 |

The presented formula compiler is the realization of several ideas in order to demonstrate its feasibility. First of all, it is parallel computation according to compiled formula. Secondly, it introduces BNFLite header library to spread existing concept of applicability of BNF forms. Now a fast parser implementation can be easily created for specific customer requirements where BNF-like language is used.

343 | 344 |

References

345 | 346 |

[1] BNFLite with some examples, https://github.com/r35382/bnflite

347 | 348 | 349 |
350 | 351 | 352 | 353 | 354 | 355 |
356 | 357 | 358 | -------------------------------------------------------------------------------- /bnflite.h: -------------------------------------------------------------------------------- 1 | 2 | /*************************************************************************\ 3 | * BNF Lite is a C++ template library for lightweight grammar parsers * 4 | * Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * 5 | * * 6 | * Permission is hereby granted, free of charge, to any person * 7 | * obtaining a copy of this software and associated documentation * 8 | * files (the "Software"), to deal in the Software without restriction, * 9 | * including without limitation the rights to use, copy, modify, merge, * 10 | * publish, distribute, sublicense, and/or sell copies of the Software, * 11 | * and to permit persons to whom the Software is furnished to do so, * 12 | * subject to the following conditions: * 13 | * * 14 | * The above copyright notice and this permission notice shall be * 15 | * included in all copies or substantial portions of the Software. * 16 | * * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.* 20 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * 21 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * 22 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH * 23 | * THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * 24 | \*************************************************************************/ 25 | 26 | #ifndef BNFLITE_H 27 | #define BNFLITE_H 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | namespace bnf 39 | { 40 | // BNF (Backus-Naur form) is a notation for describing syntax of computer languages 41 | // BNF Lite is the source code template library implementing the way to support BNF specifications 42 | // BNF Terms: 43 | // * Production rule is formal BNF expression which is a conjunction of a series 44 | // of more concrete rules: 45 | // production_rule ::= ... | ...; 46 | // * e.g. 47 | // ::= <0> | <1> | <2> | <3> | <4> | <5> | <6> | <7> | <8> | <9> 48 | // ::= | 49 | // where the number is just a digit or another number with one more digit; 50 | // Now this example can be presented in C++ friendly notation: 51 | // Lexem Digit = Token("0") | "1" | "2" | "4" | "5" | "6" | "7" | "8" | "9"; 52 | // RULE(Number) = Digit | Digit + Number; 53 | // where: 54 | // * Token is a terminal production; 55 | // * Lexem (or LEXEM) is a lexical production; 56 | // * Rule (or RULE) is used here as synonym of syntax production 57 | // To parse any number (e.g. 532) it is just enough to call the bnf::Analyze(Number, "532") 58 | 59 | enum Limits { maxCharNum = 256, maxLexemLength = 1024, maxRepeate = 4096, maxEmptyStack = 16 60 | }; 61 | enum Status { eNone = 0, eOk = 1, 62 | eRet = 0x8, e1st = 0x10, eSkip = 0x20, eTry = 0x40, eNull = 0x80, 63 | eRest = 0x0100, eNoData = 0x0200, eOver = 0x0400, eEof = 0x0800, 64 | eBadRule = 0x1000, eBadLexem = 0x2000, eSyntax = 0x4000, 65 | eError = ((~(unsigned int)0) >> 1) + 1 66 | }; 67 | 68 | class _Tie; class _And; class _Or; class _Cycle; 69 | 70 | /* context class to support the first kind of callback */ 71 | class _Base // base parser class 72 | { 73 | public: 74 | std::vector cntxV; // public for internal extensions 75 | protected: friend class Token; friend class Lexem; friend class Rule; 76 | friend class _And; friend class _Or; friend class _Cycle; 77 | int level; 78 | const char* pstop; 79 | int _chk_stack() 80 | { static const char* org; static int cnt; 81 | if (org != cntxV.back()) { org = cntxV.back(); cnt = 0; } 82 | else if (++cnt > maxEmptyStack) return eOver|eError; 83 | return 0; } 84 | const char* (*zero_parse)(const char*); 85 | int catch_error(const char* ptr) // attempt to catch general syntax error 86 | { return eSyntax|eError; } 87 | virtual void _erase(int low, int up = 0) 88 | { cntxV.erase(cntxV.begin() + low, up? cntxV.begin() + up : cntxV.end() ); } 89 | virtual std::pair _pre_call(void* callback) 90 | { return std::make_pair((void*)0, 0); } 91 | virtual void _post_call(std::pair up) 92 | {}; 93 | virtual void _do_call(std::pair up, void* callback, size_t org, const char* name) 94 | {}; 95 | virtual void _stub_call(size_t org, const char* name) 96 | {}; 97 | public: 98 | int _analyze(_Tie& root, const char* text, size_t*); 99 | _Base(const char* (*pre)(const char*)) : level(1), pstop(0), zero_parse(pre?pre:base_parser) 100 | {}; 101 | virtual ~_Base() 102 | {}; 103 | // default pre-parser procedure to skip special symbols 104 | static const char* base_parser(const char* ptr) 105 | { for (char cc = *ptr; cc != 0; cc = *++ptr) { 106 | if (cc != ' ' && cc !='\t' && cc != '\n' && cc != '\r') { 107 | break; } } 108 | return ptr; } 109 | }; 110 | 111 | #if !defined(_MSC_VER) 112 | #define _NAME_OFF 0 113 | #else 114 | #define _NAME_OFF 6 115 | #endif 116 | 117 | /* internal base class to support multiform relationships between different BNFlite elements */ 118 | class _Tie 119 | { 120 | bool _is_compound(); 121 | protected: friend class _Base; friend class ExtParser; 122 | friend class _And; friend class _Or; friend class _Cycle; 123 | friend class Token; friend class Lexem; friend class Rule; 124 | 125 | bool inner; 126 | mutable std::vector use; 127 | mutable std::list usage; 128 | std::string name; 129 | template static void _setname(T* t, const char * name = 0) 130 | { static int cnt = 0; 131 | if (name) { t->name = name; } 132 | else { t->name = typeid(*t).name() + _NAME_OFF; 133 | for (int i = ++cnt; i != 0; i /= 10) { 134 | t->name += '0' + i - (i/10)*10; } } } 135 | void _clone(const _Tie* lnk) 136 | { usage.swap(lnk->usage); 137 | for (std::list::const_iterator usg = usage.begin(); usg != usage.end(); ++usg) { 138 | for (size_t i = 0; i < (*usg)->use.size(); i++) { 139 | if ((*usg)->use[i] == lnk) { 140 | (*usg)->use[i] = this; } } } 141 | use.swap(lnk->use); 142 | for (size_t i = 0; i < use.size(); i++) { 143 | if (!use[i]) continue; 144 | std::list::iterator itr = 145 | std::find(use[i]->usage.begin(), use[i]->usage.end(), lnk); 146 | *itr = this; } 147 | if(lnk->inner) { 148 | delete lnk; } } 149 | _Tie(std::string nm = "") :inner(false), name(nm) 150 | {}; 151 | explicit _Tie(const _Tie* lnk) : inner(true), name(lnk->name) 152 | { _clone(lnk); } 153 | _Tie(const _Tie& link) : inner(link.inner), name(link.name) 154 | { _clone(&link); } 155 | virtual ~_Tie() 156 | { for (size_t i = 0; i < use.size(); i++) { 157 | const _Tie* lnk = use[i]; 158 | if (lnk) { 159 | lnk->usage.remove(this); 160 | for (size_t j = 0; j < use.size(); j++) { 161 | if ( use[j] == lnk) { 162 | use[j] = 0; } } 163 | if (lnk->inner && lnk->usage.size() == 0) { 164 | delete lnk; } } } } 165 | static int call_1st(const _Tie* lnk, _Base* parser) 166 | { return lnk->_parse(parser); } 167 | void _clue(const _Tie& link) 168 | { if (!use.size() || _is_compound()) { 169 | use.push_back(&link); 170 | } else { 171 | if (use[0]) { 172 | use[0]->usage.remove(this); 173 | if (use[0]->inner && use[0]->usage.size() == 0) { 174 | delete use[0]; } } 175 | use[0] = &link; } 176 | link.usage.push_back(this); } 177 | template static T* _safe_delete(T* t) 178 | { if (t->usage.size() != 0) { 179 | if (!t->inner) { 180 | return new T(t); } } 181 | return 0; } 182 | virtual int _parse(_Base* parser) const throw() = 0; 183 | public: 184 | void setName(const char * name) 185 | { this->name = name; } 186 | const char *getName() 187 | { return name.c_str(); } 188 | _And operator+(const _Tie& link); 189 | _And operator+(const char* s); 190 | _And operator+(bool (*f)(const char*, size_t)); 191 | friend _And operator+(const char* s, const _Tie& lnk); 192 | friend _And operator+(bool (*f)(const char*, size_t),const _Tie& lnk); 193 | _Or operator|(const _Tie& link); 194 | _Or operator|(const char* s); 195 | _Or operator|(bool (*f)(const char*, size_t)); 196 | friend _Or operator|(const char* s, const _Tie& lnk); 197 | friend _Or operator|(bool (*f)(const char*, size_t), const _Tie& lnk); 198 | 199 | // Support Augmented BNF constructions like "
*" to implement repetition; 200 | // In ABNF and imply at least and at most occurrences of the element; 201 | // e.g * allows any number(from 0 to infinity, 1* requires at least one; 202 | // 3*3 allows exactly 3 and 1*2 allows one or two. 203 | _Cycle operator()(int at_least, int total); // ABNF case .* as element(a,b) 204 | _Cycle operator*(); // ABNF case * (from 0 to infinity) 205 | _Cycle operator!(); // ABNF case <0>.<1>* or <1> (at least one) 206 | }; 207 | 208 | /* implementation of parsing control rules */ 209 | template class _Ctrl: public _Tie 210 | { 211 | protected: friend class _Tie; 212 | virtual int _parse(_Base* parser) const throw() 213 | { return flg; } 214 | explicit _Ctrl(const _Ctrl* ctrl) :_Tie(ctrl) 215 | {}; 216 | _Ctrl(const _Ctrl& control) :_Tie(control) 217 | {}; 218 | public: 219 | explicit _Ctrl(): _Tie(std::string(1, cc)) 220 | {}; 221 | ~_Ctrl() 222 | { _safe_delete(this); } 223 | }; 224 | 225 | /* Null operation, immediate successful return */ 226 | typedef _Ctrl Null; // stub for some constructions (e.g. "zero-or-one") 227 | 228 | /* Force Return, immediate return from conjunction rule to impact disjunction rule */ 229 | typedef _Ctrl Return; 230 | 231 | /* Switch to use "Accept First" strategy for disjunction rule instead "Accept Best" */ 232 | typedef _Ctrl AcceptFirst; 233 | 234 | /* Try to catch syntax error in current conjunction rule */ 235 | typedef _Ctrl Try; 236 | 237 | /* Check but do not accept next statement for conjunction rule */ 238 | typedef _Ctrl Skip; 239 | 240 | /* Force syntax error */ 241 | typedef _Ctrl Catch; 242 | 243 | 244 | /* interface class for tokens */ 245 | class Token: public _Tie 246 | { 247 | Token& operator=(const _Tie&); 248 | explicit Token(const _Tie&); 249 | public: 250 | class interval_set : protected std::map 251 | { 252 | public: 253 | interval_set() 254 | { insert(std::make_pair(0, false)); insert(std::make_pair(WCHAR_MAX, false)); } 255 | bool test(wchar_t key) const 256 | { return (--upper_bound(key))->second; } 257 | void reset(wchar_t key) 258 | { set(key, 0, false); } 259 | void set(wchar_t key, size_t rep = 0, bool val = true) 260 | { wchar_t key_end = key + rep + 1; 261 | if (key == 0 || key_end == WCHAR_MAX) return; 262 | std::map::iterator right_begin = lower_bound(key); 263 | std::map::iterator left_begin = right_begin; --left_begin; 264 | std::map::iterator right_end = upper_bound(key_end); 265 | std::map::iterator left_end = right_end; --left_end; 266 | if (left_end->second == val) 267 | if (left_end->first >= key_end && right_begin == left_end) erase(right_begin); 268 | else erase(right_begin, right_end); 269 | else { 270 | std::map::iterator itr = insert(std::make_pair(key_end, left_end->second)).first; 271 | if(right_begin->first < itr->first) 272 | erase(right_begin, itr); } 273 | if (left_begin->second != val) 274 | insert(std::make_pair(key, val)); } 275 | void flip() 276 | { for (std::map::iterator itr = begin(); itr != end(); ++itr) 277 | itr->second = !itr->second; } 278 | }; 279 | 280 | protected: friend class _Tie; 281 | #if defined(BNFLITE_WIDE) 282 | interval_set match; 283 | #else 284 | std::bitset match; 285 | #endif 286 | explicit Token(const Token* tkn) :_Tie(tkn), match(tkn->match) 287 | {}; 288 | virtual int _parse(_Base* parser) const throw() 289 | { const char* cc = parser->cntxV.back(); 290 | if (parser->level) 291 | cc = parser->zero_parse(cc); 292 | char c = *((unsigned char*)cc); 293 | if (match.test(c)) { 294 | if (parser->level) { 295 | parser->cntxV.push_back(cc); 296 | parser->_stub_call(parser->cntxV.size() - 1, name.c_str()); } 297 | parser->cntxV.push_back(++cc); 298 | return c ? eOk : eOk|eEof; } 299 | return c ? eNone : eEof; } 300 | public: 301 | Token(const char c) :_Tie(std::string(1, c)) 302 | { Add(c, 0); }; // create single char token 303 | Token(int fst, int lst) :_Tie(std::string(1, fst).append("-") += lst) 304 | { Add(fst, lst); }; // create token by ASCII charactes in range 305 | Token(const char *s) :_Tie(std::string(s)) 306 | { Add(s); }; // create token by C string sample 307 | Token(const char *s, const Token& token) :_Tie(std::string(s)), match(token.match) 308 | { Add(s); }; // create token by both C string sample and another token set 309 | Token(const Token& token) :_Tie(token), match(token.match) 310 | {}; 311 | virtual ~Token() 312 | { _safe_delete(this); } 313 | void Add(int fst, int lst = 0, const char *sample = "") // add characters in range fst...lst exept mentioned in sample; 314 | { switch (lst) { // lst == 0|1: add single | upper&lower case character(s) 315 | case 1: if (fst >= 'A' && fst <= 'Z') match.set(fst - 'A' + 'a'); 316 | else if (fst >= 'a' && fst <= 'z') match.set(fst - 'a' + 'A'); 317 | case 0: match.set((unsigned char)fst); break; 318 | default: for (int i = fst; i <= lst; i++) { 319 | match.set((unsigned char)i); } 320 | Remove(sample); } } 321 | void Add(const char *sample) 322 | { while (*sample) { 323 | match.set((unsigned char)*sample++); } } 324 | void Remove(int fst, int lst = 0) 325 | { for (int i = fst; i <= (lst?lst:fst); i++) { 326 | match.reset((unsigned char)i); } } 327 | void Remove(const char *sample) 328 | { while (*sample) { 329 | match.reset((unsigned char)*sample++); } } 330 | int GetSymbol(int next = 1) // get first short symbol 331 | { for (unsigned int i = next; i < maxCharNum; i++) { 332 | if (match.test(i)) return i; } 333 | return 0; } 334 | Token& Invert() // invert token to build construction to not match 335 | { match.flip(); return *this; } 336 | 337 | }; 338 | #if __cplusplus > 199711L 339 | inline Token operator""_T(const char* sample, size_t len) 340 | { return Token(std::string(sample, len).c_str()); } 341 | #endif 342 | 343 | /* standalone callback wrapper class */ 344 | class Action: public _Tie 345 | { 346 | bool (*action)(const char* lexem, size_t len); 347 | Action(_Tie&); 348 | protected: friend class _Tie; 349 | explicit Action(const Action* a) :_Tie(a), action(a->action) 350 | {}; 351 | int _parse(_Base* parser) const throw() 352 | { std::vector::reverse_iterator itr = parser->cntxV.rbegin() + 1; 353 | return (*action)(*itr, parser->cntxV.back() - *itr); } 354 | public: 355 | Action(bool (*action)(const char* lexem, size_t len), const char *name = "") 356 | :_Tie(name), action(action) {}; 357 | virtual ~Action() 358 | { _safe_delete(this); } 359 | }; 360 | 361 | /* internal class to support conjunction constructions of BNFlite elements */ 362 | class _And: public _Tie 363 | { 364 | protected: friend class _Tie; friend class Lexem; 365 | _And(const _Tie& b1, const _Tie& b2):_Tie("") 366 | { (name = b1.name).append("+") += b2.name; _clue(b1); _clue(b2); } 367 | explicit _And(const _And* rl) :_Tie(rl) 368 | {}; 369 | virtual int _parse(_Base* parser) const throw() 370 | { int stat = 0; size_t save = 0; size_t size = parser->cntxV.size(); 371 | for (unsigned i = 0; i < use.size(); i++, stat &= ~(eSkip|eOk)) { 372 | stat |= use[i]->_parse(parser); 373 | if (!(stat & eOk) || (stat & eError) || ((stat & eEof) && (parser->cntxV.back() == parser->cntxV[size - 1]))) { 374 | if (parser->level && (stat & eTry) && !(stat & eError) && !save) { 375 | stat |= parser->catch_error(parser->cntxV.back()); } 376 | parser->_erase(size); 377 | return stat & ~(eTry|eSkip|eOk); } 378 | else { 379 | if (save) { 380 | parser->cntxV.resize(save); 381 | save = 0; } 382 | if (stat & eSkip) { 383 | save = parser->cntxV.size(); } } } 384 | return eOk | (stat & ~(eTry|eSkip)); } 385 | public: 386 | ~_And() 387 | { _safe_delete(this); } 388 | _And& operator+(const _Tie& rule2) 389 | { name.append("+") += rule2.name; _clue(rule2); return *this; } 390 | _And& operator+(const char* s) 391 | { name.append("+") += s; _clue(Token(s)); return *this; } 392 | _And& operator+(bool (*f)(const char*, size_t)) 393 | { name += "+()"; _clue(Action(f)); return *this; } 394 | friend _And operator+(const char* s, const _Tie& link); 395 | friend _And operator+(bool (*f)(const char*, size_t), const _Tie& link); 396 | }; 397 | inline _And _Tie::operator+(const _Tie& rule2) 398 | { return _And(*this, rule2); } 399 | inline _And _Tie::operator+(const char* s) 400 | { return _And(*this, Token(s)); } 401 | inline _And _Tie::operator+(bool (*f)(const char*, size_t)) 402 | { return _And(*this, Action(f)); } 403 | inline _And operator+(const char* s, const _Tie& link) 404 | { return _And(Token(s), link); } 405 | inline _And operator+(bool (*f)(const char*, size_t), const _Tie& link) 406 | { return _And(Action(f), link); } 407 | 408 | /* internal class to support disjunction constructions of BNFlite elements */ 409 | class _Or: public _Tie 410 | { 411 | protected: friend class _Tie; 412 | _Or(const _Tie& b1, const _Tie& b2):_Tie("") 413 | { (name = b1.name).append("|") += b2.name; _clue(b1); _clue(b2); } 414 | explicit _Or(const _Or* rl) :_Tie(rl) 415 | {}; 416 | virtual int _parse(_Base* parser) const throw() 417 | { int stat = 0; int tstat = 0; int max = 0; int tmp = -1; 418 | size_t size = parser->cntxV.size(); 419 | for (unsigned i = 0; i < use.size(); i++, stat &= ~(eOk|eRet|eEof|eError)) { 420 | size_t msize = parser->cntxV.size(); 421 | if (msize > size) { 422 | parser->cntxV.push_back(parser->cntxV[size - 1]); } 423 | stat |= use[i]->_parse(parser); 424 | if (stat & (eOk|eError)) { 425 | tmp = parser->cntxV.back() - parser->cntxV[size - 1]; 426 | if ((tmp > max) || (tmp > 0 && (stat & (eRet|e1st))) || (tmp >= 0 && (stat & eError))) { 427 | max = tmp; 428 | tstat = stat; 429 | if (msize > size) { 430 | parser->_erase(size, msize + 1); } 431 | if (stat & (eRet|e1st|eError)) { 432 | break; } 433 | continue; } } 434 | if (parser->cntxV.size() > msize) { 435 | parser->_erase(msize); } } 436 | return (max || tmp >= 0 ? tstat | eOk: tstat & ~eOk) & ~(e1st|eRet); } 437 | public: 438 | ~_Or() 439 | { _safe_delete(this); } 440 | _Or& operator|(const _Tie& rule2) 441 | { name.append("|") += rule2.name; _clue(rule2); return *this; } 442 | _Or& operator|(const char* s) 443 | { name.append("|") += s; _clue(Token(s)); return *this; } 444 | _Or& operator|(bool (*f)(const char*, size_t)) 445 | { name += "|()"; _clue(Action(f)); return *this; } 446 | friend _Or operator|(const char* s, const _Tie& link); 447 | friend _Or operator|(bool (*f)(const char*, size_t), const _Tie& link); 448 | }; 449 | inline _Or _Tie::operator|(const _Tie& rule2) 450 | { return _Or(*this, rule2); } 451 | inline _Or _Tie::operator|(const char* s) 452 | { return _Or(*this, Token(s)); } 453 | inline _Or _Tie::operator|(bool (*f)(const char*, size_t)) 454 | { return _Or(*this, Action(f)); } 455 | inline _Or operator|(const char* s, const _Tie& link) 456 | { return _Or(Token(s), link); } 457 | inline _Or operator|(bool (*f)(const char*, size_t), const _Tie& link) 458 | { return _Or(Action(f), link); } 459 | inline bool _Tie::_is_compound() 460 | { return dynamic_cast<_And*>(this) || dynamic_cast<_Or*>(this); } 461 | 462 | 463 | /* interface class for lexem */ 464 | class Lexem: public _Tie 465 | { 466 | Lexem& operator=(const class Rule&); 467 | Lexem(const Rule& rule); 468 | protected: friend class _Tie; 469 | explicit Lexem(Lexem* lxm) :_Tie(lxm) 470 | {}; 471 | virtual int _parse(_Base* parser) const throw() 472 | { if (!use.size()) 473 | return eError|eBadLexem; 474 | if (!parser->level || dynamic_cast(use[0])) 475 | return use[0]->_parse(parser); 476 | size_t size = parser->cntxV.size(); 477 | parser->cntxV.push_back(parser->zero_parse(parser->cntxV.back())); 478 | parser->level--; 479 | int stat = use[0]->_parse(parser); 480 | parser->level++; 481 | if ((stat & eOk) && parser->cntxV.size() - size > 1) { 482 | parser->_stub_call(size - 1, name.c_str()); 483 | if (parser->cntxV.back() > parser->pstop) parser->pstop = parser->cntxV.back(); 484 | parser->cntxV[(++size)++] = parser->cntxV.back(); } 485 | parser->cntxV.resize(size); 486 | return stat; } 487 | public: 488 | Lexem(const char *literal, bool cs = 0) :_Tie() 489 | { int size = strlen(literal); 490 | switch (size) { 491 | case 1: this->operator=(Token(literal[0], cs)); 492 | case 0: break; 493 | default: { 494 | _And _and(Token(literal[0], cs), Token(literal[1], cs)); 495 | for (int i = 2; i < size; i++) { 496 | _and.operator+((const _Tie&)Token(literal[i], cs)); } 497 | this->operator=(_and); } } 498 | _setname(this, literal); } 499 | explicit Lexem() :_Tie() 500 | { _setname(this); } 501 | virtual ~Lexem() 502 | { _safe_delete(this); } 503 | Lexem(const _Tie& link) :_Tie() 504 | { _setname(this, 0); _clue(link); } 505 | Lexem& operator=(const Lexem& lexem) 506 | { if (&lexem != this) _clue(lexem); 507 | return *this; } 508 | Lexem& operator=(const _Tie& link) 509 | { _clue(link); return *this; } 510 | }; 511 | 512 | /* interface class for BNF rules */ 513 | class Rule : public _Tie 514 | { 515 | void* callback; 516 | protected: friend class _Tie; friend class _And; 517 | explicit Rule(const Rule* rl) :_Tie(rl), callback(rl->callback) 518 | {}; 519 | virtual int _parse(_Base* parser) const throw() 520 | { if (!use.size() || !parser->level) 521 | return eError|eBadRule; 522 | if (dynamic_cast(use[0])) { 523 | return use[0]->_parse(parser); } 524 | size_t size = parser->cntxV.size(); 525 | std::pair up = parser->_pre_call(callback); 526 | int stat = use[0]->_parse(parser); 527 | if ((stat & eOk) && parser->cntxV.size() - size > 1) { 528 | parser->_do_call(up, callback, size, name.c_str()); 529 | if (parser->cntxV.back() > parser->pstop) parser->pstop = parser->cntxV.back(); 530 | parser->cntxV[(++size)++] = parser->cntxV.back(); } 531 | parser->cntxV.resize(size); 532 | parser->_post_call(up); 533 | return stat; } 534 | public: 535 | explicit Rule() :_Tie(), callback(0) 536 | { _setname(this); } 537 | virtual ~Rule() 538 | { _safe_delete(this); } 539 | Rule(const _Tie& link) :_Tie(), callback(0) 540 | { const Rule* rl = dynamic_cast(&link); 541 | if (rl) { _clone(&link); callback = rl->callback; name = rl->name; } 542 | else { _clue(link); callback = 0; _setname(this); } } 543 | Rule& operator=(const _Tie& link) 544 | { _clue(link); return *this; } 545 | Rule& operator=(const Rule& rule) 546 | { if (&rule == this) return *this; 547 | return this->operator=((const _Tie&)rule); } 548 | template friend Rule& Bind(Rule& rule, U (*callback)(std::vector&)); 549 | template Rule& operator[](U (*callback)(std::vector&)); 550 | }; 551 | 552 | /* friendly debug interface */ 553 | #define LEXEM(lexem) Lexem lexem; lexem.setName(#lexem); lexem 554 | #define RULE(rule) Rule rule; rule.setName(#rule); rule 555 | 556 | /* internal class to support repeat constructions of BNF elements */ 557 | class _Cycle: public _Tie 558 | { 559 | unsigned int min, max; 560 | int flag; 561 | protected: friend class _Tie; 562 | explicit _Cycle(const _Cycle* u) :_Tie(u), min(u->min), max(u->max), flag(u->flag) 563 | {}; 564 | _Cycle(const _Cycle& w) :_Tie(w), min(w.min), max(w.max), flag(w.flag) 565 | {}; 566 | int _parse(_Base* parser) const throw() 567 | { int stat; unsigned int i; 568 | for (stat = 0, i = 0; i < max; i++, stat &= ~(e1st|eTry|eSkip|eRet|eOk)) { 569 | stat |= use[0]->_parse(parser); 570 | if ((stat & (eOk|eError)) == eOk) 571 | continue; 572 | return i < min? stat & ~eOk : stat | parser->_chk_stack() | eOk; } 573 | return stat | flag | eOk; } 574 | _Cycle(int at_least, const _Tie& link, int total = maxRepeate, int limit = maxRepeate) 575 | :_Tie(std::string("@")), min(at_least), max(total), flag(total < limit? eNone : eOver|eError) 576 | { _clue(link); } 577 | public: 578 | ~_Cycle() 579 | { _safe_delete(this); } 580 | friend _Cycle operator*(int at_least, const _Tie& link); 581 | friend _Cycle Repeat(int at_least, const Rule& rule, int total, int limit); 582 | friend _Cycle Iterate(int at_least, const Lexem& lexem, int total, int limit); 583 | friend _Cycle Series(int at_least, const Token& token, int total, int limit); 584 | }; 585 | inline _Cycle _Tie::operator*() 586 | { return _Cycle(0, *this); } 587 | inline _Cycle _Tie::operator!() 588 | { return _Cycle(0, *this, 1); } 589 | inline _Cycle _Tie::operator()(int at_least, int total) 590 | { return _Cycle(at_least, *this, total); } 591 | inline _Cycle operator*(int at_least, const _Tie& link) 592 | { return _Cycle(at_least, link); } 593 | inline _Cycle Repeat(int at_least, const Rule& rule, int total = maxLexemLength, int limit = maxRepeate) 594 | { return _Cycle(at_least, rule, total, limit); } 595 | inline _Cycle Iterate(int at_least, const Lexem& lexem, int total = maxLexemLength, int limit = maxLexemLength) 596 | { return _Cycle(at_least, lexem, total, limit); } 597 | inline _Cycle Series(int at_least, const Token& token, int total = maxLexemLength, int limit = maxCharNum) 598 | { return _Cycle(at_least, token, total, limit); } 599 | 600 | /* context class to support the second kind of callback */ 601 | template class _Parser : public _Base 602 | { 603 | protected: 604 | std::vector* cntxU; 605 | unsigned int off; 606 | void _erase(int low, int up = 0) 607 | { cntxV.erase(cntxV.begin() + low, up? cntxV.begin() + up : cntxV.end() ); 608 | if (cntxU && level) 609 | cntxU->erase(cntxU->begin() + (low - off) / 2, 610 | up? cntxU->begin() + (up - off) / 2 : cntxU->end()); } 611 | virtual std::pair _pre_call(void* callback) 612 | { std::pair up = std::make_pair(cntxU, off); 613 | cntxU = callback? new std::vector : 0; 614 | off = callback? cntxV.size() : 0; 615 | return up; } 616 | virtual void _post_call(std::pair up) 617 | { if (cntxU) { 618 | delete cntxU; } 619 | cntxU = (std::vector*)up.first; 620 | off = up.second; } 621 | virtual void _do_call(std::pair up, void* callback, size_t org, const char* name) 622 | { if (callback) { 623 | if (up.first) { 624 | ((std::vector*)up.first)->push_back(U(reinterpret_cast< 625 | U(*)(std::vector&)>(callback)(*cntxU), cntxV[org], cntxV.back() - cntxV[org], name)); 626 | } else { reinterpret_cast&)>(callback)(*cntxU); } 627 | } else if (up.first) { 628 | ((std::vector*)up.first)->push_back(U(cntxV[org], cntxV.back() - cntxV[org], name)); } } 629 | virtual void _stub_call(size_t org, const char* name) 630 | { if (cntxU) { 631 | cntxU->push_back(U(cntxV[org], cntxV.back() - cntxV[org], name)); } } 632 | public: 633 | _Parser(const char* (*f)(const char*), std::vector* v) :_Base(f), cntxU(v), off(0) 634 | {}; 635 | virtual ~_Parser() 636 | {}; 637 | int _get_result(U& u) 638 | { if (cntxU && cntxU->size()) { u.data = cntxU->front().data; return 0; } 639 | else return eNull; } 640 | template friend Rule& Bind(Rule& rule, W (*callback)(std::vector&)); 641 | }; 642 | 643 | inline int _Base::_analyze(_Tie& root, const char* text, size_t* plen) 644 | { cntxV.push_back(text); cntxV.push_back(text); 645 | int stat = root._parse(this); 646 | const char* ptr = zero_parse(pstop > cntxV.back() ? pstop : cntxV.back()); 647 | if (plen) *plen = ptr - text; 648 | return stat | (*ptr? eError|eRest: 0); } 649 | 650 | /* User interface template to support the second kind of callback */ 651 | /* The user need to specify own 'Foo' abstract type to develop own callbaks */ 652 | /* like: Interface CallBack(std::vector>& res); */ 653 | template struct Interface 654 | { 655 | Data data; // user data element 656 | const char* text; // pointer to parsed text according to bound Rule 657 | size_t length; // length of parsed text according to bound Rule 658 | const char* name; // the name of bound Rule 659 | Interface(const Interface& ifc, const char* text, size_t length, const char* name) 660 | :data(ifc.data) , text(text), length(length), name(name) 661 | {}; // mandatory constructor with user data to be called from library 662 | Interface(const char* text, size_t length, const char* name) 663 | :data(), text(text), length(length), name(name) 664 | {}; // mandatory default constructor to be called from library 665 | Interface(Data data, std::vector& res, const char* name = "") 666 | :data(data), text(res.size()? res[0].text: ""), 667 | length(res.size()? res[res.size() - 1].text 668 | - res[0].text + res[res.size() - 1].length : 0), name(name) 669 | {}; // constructor to pass data from user's callback to library 670 | Interface(const Interface& front, const Interface& back, const char* name = "") 671 | : data(), text(front.text), length(back.text - front.text + back.length), name(name) 672 | {}; // constructor to pass data from user's callback to library 673 | Interface(): data(), text(0), length(0), name(0) 674 | {}; // default constructor 675 | static Interface ByPass(std::vector& res) // simplest user callback example 676 | { return res.size()? res[0]: Interface(); } // just to pass data to upper level 677 | int _get_pstop(const char** pstop) 678 | { if (pstop) *pstop = text + length; 679 | return length ? eNone : eNull; } 680 | }; 681 | 682 | /* Private parsing interface */ 683 | template inline int _Analyze(_Tie& root, U& u, const char* (*pre_parse)(const char*)) 684 | { if (typeid(U) == typeid(Interface<>)) { 685 | _Base base(pre_parse); return base._analyze(root, u.text, &u.length); 686 | } else { std::vector v; _Parser parser(pre_parse, &v); 687 | return parser._analyze(root, u.text, &u.length) | parser._get_result(u); } } 688 | 689 | /* Primary interface set to start parsing of text against constructed rules */ 690 | template inline int Analyze(_Tie& root, const char* text, const char** pstop, U& u, const char* (*pre_parse)(const char*) = 0) 691 | { u.text = text; return _Analyze(root, u, pre_parse) | u._get_pstop(pstop); } 692 | template inline int Analyze(_Tie& root, const char* text, U& u, const char* (*pre_parse)(const char*) = 0) 693 | { u.text = text; return _Analyze(root, u, pre_parse) | u._get_pstop(0); } 694 | inline int Analyze(_Tie& root, const char* text, const char** pstop = 0, const char* (*pre_parse)(const char*) = 0) 695 | { Interface<> u; u.text = text; return _Analyze(root, u, pre_parse) | u._get_pstop(pstop); } 696 | 697 | 698 | /* Create association between Rule and user's callback */ 699 | template inline Rule& Bind(Rule& rule, U (*callback)(std::vector&)) 700 | { rule.callback = reinterpret_cast(callback); return rule; } 701 | template inline Rule& Rule::operator[](U (*callback)(std::vector&)) // for C++11 702 | { this->callback = reinterpret_cast(callback); return *this; } 703 | 704 | 705 | }; // bnf:: 706 | #endif // BNFLITE_H 707 | --------------------------------------------------------------------------------