├── DIFF ├── README ├── TODO ├── examples └── simple_ast │ ├── ast.go │ └── parser.y ├── lemon.c └── lempar.go /DIFF: -------------------------------------------------------------------------------- 1 | Differences with C version: 2 | 3 | - Parser stack is always dynamic (grows automatically). 4 | - Debug is always enabled, possibly 'const debug' will be added in future for 5 | dead code elimination. 6 | - Instead of a union for YYMINORTYPE, I use interface{}. 7 | - Default type for a token minor is interface{} (TODO(nsf): why? maybe change to int). 8 | - YYTRACKMAXSTACKDEPTH is not implemented and removed from the source code. 9 | - Custom Parser arguments are not implemented (yet). 10 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | From the golang-nuts mailing list (with few modifications): 2 | 3 | --== intro ==-- 4 | 5 | Hi. I just want to announce a simple port of the lemon parser generator 6 | to the Go programming language. 7 | 8 | Shortly: Lemon is a LALR(1) parser generator, similar to yacc, but is 9 | much simpler. Also it has few neat features, like: 10 | - It is thread-safe and re-entrant 11 | - Tokenizer calls the parser, instead of parser calls tokenizer 12 | (the opposite of yacc/bison) 13 | - Uses no global variables, allows to run multiple parsers 14 | simultaneously 15 | - Auto-generation of token symbol constants (yacc/bison requires a 16 | list) 17 | - No need to count grammar symbols, they can be named (nice names 18 | instead of $1 $2 $3) 19 | - etc.. 20 | 21 | The Lemon is developed as a part of the sqlite project. The generator code is 22 | quite horrible, but it works and I believe it is very solid. Of course it has 23 | few drawbacks, but.. :) 24 | 25 | Lemon homepage: 26 | http://www.hwaci.com/sw/lemon 27 | 28 | --== golemon specific info ==-- 29 | 30 | I don't know whether I will support the golemon or not, but at least if someone 31 | likes lemon (like me), he/she has a decent (I hope so) base for his/her 32 | experiments now. 33 | 34 | I've successfully compiled and executed the very beginning of this 35 | tutorial: 36 | http://freshmeat.net/articles/lemon-parser-generator-tutorial 37 | 38 | See also 'examples' directory. 39 | 40 | Hopefully, I will have enough enthusiasm to fix some of the obvious 41 | issues (see TODO). 42 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - Remove %stack_overflow stuff. The stack in Go lemon is dynamic and it never overflows. 2 | - Remove %destructor stuff. In a GC language it's a useless feature. 3 | - Implement %extra_argument stuff. 4 | - Implement proper support for %name. 5 | - Rename all the YY constants and types and variables to lower-case-based 6 | names, in case if user will want to hide the parser in a separate package. 7 | - Investigate different options and how they interact with Go port (like -m option). 8 | + Add support for Go's //line filename.y: stuff 9 | - Clean up type mess 10 | - Optimize 11 | - Good to Go! 12 | -------------------------------------------------------------------------------- /examples/simple_ast/ast.go: -------------------------------------------------------------------------------- 1 | // Simple AST building example, Go-ish style. 2 | // In order to build do the following: 3 | // 1. build golemon 4 | // 2. ../../lemon parser.y 5 | // 3. 8g ast.go parser.go parser_tokens.go 6 | // 4. 8l -o ast ast.8 7 | 8 | package main 9 | 10 | import "fmt" 11 | 12 | type Expr interface { 13 | String() string 14 | } 15 | 16 | //------------------------------------------------------------------------- 17 | // NumExpr 18 | //------------------------------------------------------------------------- 19 | type NumExpr int 20 | 21 | func (n NumExpr) String() string { return fmt.Sprintf("%d", int(n)) } 22 | 23 | //------------------------------------------------------------------------- 24 | // BinExpr 25 | //------------------------------------------------------------------------- 26 | type BinExpr struct { 27 | tok int 28 | lhs Expr 29 | rhs Expr 30 | } 31 | 32 | var toktostr = map[int]string { 33 | PLUS: "+", 34 | MINUS: "-", 35 | TIMES: "*", 36 | DIVIDE: "/", 37 | } 38 | 39 | func (b *BinExpr) String() string { 40 | return fmt.Sprintf("(%s%s%s)", b.lhs.String(), toktostr[b.tok], b.rhs.String()) 41 | } 42 | 43 | var AST Expr 44 | 45 | // let's make an AST 46 | 47 | func main() { 48 | p := NewParser() 49 | p.Parse(INTEGER, 5) 50 | p.Parse(PLUS, 0) 51 | p.Parse(INTEGER, 10) 52 | p.Parse(TIMES, 0) 53 | p.Parse(INTEGER, 4) 54 | p.Parse(0, 0) 55 | p.Dispose() 56 | 57 | fmt.Println(AST) 58 | 59 | p = NewParser() 60 | p.Parse(INTEGER, 5) 61 | p.Parse(TIMES, 0) 62 | p.Parse(INTEGER, 10) 63 | p.Parse(PLUS, 0) 64 | p.Parse(INTEGER, 4) 65 | p.Parse(0, 0) 66 | p.Dispose() 67 | 68 | fmt.Println(AST) 69 | } 70 | -------------------------------------------------------------------------------- /examples/simple_ast/parser.y: -------------------------------------------------------------------------------- 1 | %token_type { int } 2 | 3 | %left PLUS MINUS. 4 | %left DIVIDE TIMES. 5 | 6 | %include { 7 | //import "fmt" 8 | } 9 | 10 | %syntax_error { 11 | fmt.Println("Syntax error!") 12 | } 13 | 14 | program ::= expr(A). { AST = A } 15 | 16 | %type expr { Expr } 17 | expr(A) ::= expr(B) MINUS expr(C). { A = &BinExpr{MINUS, B, C} } 18 | expr(A) ::= expr(B) PLUS expr(C). { A = &BinExpr{PLUS, B, C} } 19 | expr(A) ::= expr(B) TIMES expr(C). { A = &BinExpr{TIMES, B, C} } 20 | expr(A) ::= expr(B) DIVIDE expr(C). { A = &BinExpr{DIVIDE, B, C} } 21 | 22 | expr(A) ::= INTEGER(B). { A = NumExpr(B) } 23 | -------------------------------------------------------------------------------- /lemon.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** This file contains all sources (including headers) to the LEMON 3 | ** LALR(1) parser generator. The sources have been combined into a 4 | ** single file to make it easy to include LEMON in the source tree 5 | ** and Makefile of another program. 6 | ** 7 | ** The author of this program disclaims copyright. 8 | */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifndef __WIN32__ 17 | # if defined(_WIN32) || defined(WIN32) 18 | # define __WIN32__ 19 | # endif 20 | #endif 21 | 22 | #ifdef __WIN32__ 23 | extern int access(); 24 | #else 25 | #include 26 | #endif 27 | 28 | /* #define PRIVATE static */ 29 | #define PRIVATE 30 | 31 | #ifdef TEST 32 | #define MAXRHS 5 /* Set low to exercise exception code */ 33 | #else 34 | #define MAXRHS 1000 35 | #endif 36 | 37 | static int showPrecedenceConflict = 0; 38 | static const char **made_files = NULL; 39 | static int made_files_count = 0; 40 | static int successful_exit = 0; 41 | static void LemonAtExit(void) 42 | { 43 | /* if we failed, delete (most) files we made, to unconfuse build tools. */ 44 | int i; 45 | for (i = 0; i < made_files_count; i++) { 46 | if (!successful_exit) { 47 | remove(made_files[i]); 48 | } 49 | } 50 | free(made_files); 51 | made_files_count = 0; 52 | made_files = NULL; 53 | } 54 | 55 | static char *msort(char*,char**,int(*)(const char*,const char*)); 56 | 57 | /* 58 | ** Compilers are getting increasingly pedantic about type conversions 59 | ** as C evolves ever closer to Ada.... To work around the latest problems 60 | ** we have to define the following variant of strlen(). 61 | */ 62 | #define lemonStrlen(X) ((int)strlen(X)) 63 | 64 | /* a few forward declarations... */ 65 | struct rule; 66 | struct lemon; 67 | struct action; 68 | 69 | static struct action *Action_new(void); 70 | static struct action *Action_sort(struct action *); 71 | 72 | /********** From the file "build.h" ************************************/ 73 | void FindRulePrecedences(); 74 | void FindFirstSets(); 75 | void FindStates(); 76 | void FindLinks(); 77 | void FindFollowSets(); 78 | void FindActions(); 79 | 80 | /********* From the file "configlist.h" *********************************/ 81 | void Configlist_init(void); 82 | struct config *Configlist_add(struct rule *, int); 83 | struct config *Configlist_addbasis(struct rule *, int); 84 | void Configlist_closure(struct lemon *); 85 | void Configlist_sort(void); 86 | void Configlist_sortbasis(void); 87 | struct config *Configlist_return(void); 88 | struct config *Configlist_basis(void); 89 | void Configlist_eat(struct config *); 90 | void Configlist_reset(void); 91 | 92 | /********* From the file "error.h" ***************************************/ 93 | void ErrorMsg(const char *, int,const char *, ...); 94 | 95 | /****** From the file "option.h" ******************************************/ 96 | enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, 97 | OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; 98 | struct s_options { 99 | enum option_type type; 100 | const char *label; 101 | char *arg; 102 | const char *message; 103 | }; 104 | int OptInit(char**,struct s_options*,FILE*); 105 | int OptNArgs(void); 106 | char *OptArg(int); 107 | void OptErr(int); 108 | void OptPrint(void); 109 | 110 | /******** From the file "parse.h" *****************************************/ 111 | void Parse(struct lemon *lemp); 112 | 113 | /********* From the file "plink.h" ***************************************/ 114 | struct plink *Plink_new(void); 115 | void Plink_add(struct plink **, struct config *); 116 | void Plink_copy(struct plink **, struct plink *); 117 | void Plink_delete(struct plink *); 118 | 119 | /********** From the file "report.h" *************************************/ 120 | void Reprint(struct lemon *); 121 | void ReportOutput(struct lemon *); 122 | void ReportTable(struct lemon *, int); 123 | void ReportHeader(struct lemon *); 124 | void CompressTables(struct lemon *); 125 | void ResortStates(struct lemon *); 126 | 127 | /********** From the file "set.h" ****************************************/ 128 | void SetSize(int); /* All sets will be of size N */ 129 | char *SetNew(void); /* A new set for element 0..N */ 130 | void SetFree(char*); /* Deallocate a set */ 131 | 132 | char *SetNew(void); /* A new set for element 0..N */ 133 | int SetAdd(char*,int); /* Add element to a set */ 134 | int SetUnion(char *,char *); /* A <- A U B, thru element N */ 135 | #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ 136 | 137 | /********** From the file "struct.h" *************************************/ 138 | /* 139 | ** Principal data structures for the LEMON parser generator. 140 | */ 141 | 142 | typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; 143 | 144 | /* Symbols (terminals and nonterminals) of the grammar are stored 145 | ** in the following: */ 146 | enum symbol_type { 147 | TERMINAL, 148 | NONTERMINAL, 149 | MULTITERMINAL 150 | }; 151 | enum e_assoc { 152 | LEFT, 153 | RIGHT, 154 | NONE, 155 | UNK 156 | }; 157 | struct symbol { 158 | const char *name; /* Name of the symbol */ 159 | int index; /* Index number for this symbol */ 160 | enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ 161 | struct rule *rule; /* Linked list of rules of this (if an NT) */ 162 | struct symbol *fallback; /* fallback token in case this token doesn't parse */ 163 | int prec; /* Precedence if defined (-1 otherwise) */ 164 | enum e_assoc assoc; /* Associativity if precedence is defined */ 165 | char *firstset; /* First-set for all rules of this symbol */ 166 | Boolean lambda; /* True if NT and can generate an empty string */ 167 | int useCnt; /* Number of times used */ 168 | char *destructor; /* Code which executes whenever this symbol is 169 | ** popped from the stack during error processing */ 170 | int destLineno; /* Line number for start of destructor */ 171 | char *datatype; /* The data type of information held by this 172 | ** object. Only used if type==NONTERMINAL */ 173 | int dtnum; /* The data type number. In the parser, the value 174 | ** stack is a union. The .yy%d element of this 175 | ** union is the correct data type for this object */ 176 | /* The following fields are used by MULTITERMINALs only */ 177 | int nsubsym; /* Number of constituent symbols in the MULTI */ 178 | struct symbol **subsym; /* Array of constituent symbols */ 179 | }; 180 | 181 | /* Each production rule in the grammar is stored in the following 182 | ** structure. */ 183 | struct rule { 184 | struct symbol *lhs; /* Left-hand side of the rule */ 185 | const char *lhsalias; /* Alias for the LHS (NULL if none) */ 186 | int lhsStart; /* True if left-hand side is the start symbol */ 187 | int ruleline; /* Line number for the rule */ 188 | int nrhs; /* Number of RHS symbols */ 189 | struct symbol **rhs; /* The RHS symbols */ 190 | const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ 191 | int line; /* Line number at which code begins */ 192 | const char *code; /* The code executed when this rule is reduced */ 193 | struct symbol *precsym; /* Precedence symbol for this rule */ 194 | int index; /* An index number for this rule */ 195 | Boolean canReduce; /* True if this rule is ever reduced */ 196 | struct rule *nextlhs; /* Next rule with the same LHS */ 197 | struct rule *next; /* Next rule in the global list */ 198 | }; 199 | 200 | /* A configuration is a production rule of the grammar together with 201 | ** a mark (dot) showing how much of that rule has been processed so far. 202 | ** Configurations also contain a follow-set which is a list of terminal 203 | ** symbols which are allowed to immediately follow the end of the rule. 204 | ** Every configuration is recorded as an instance of the following: */ 205 | enum cfgstatus { 206 | COMPLETE, 207 | INCOMPLETE 208 | }; 209 | struct config { 210 | struct rule *rp; /* The rule upon which the configuration is based */ 211 | int dot; /* The parse point */ 212 | char *fws; /* Follow-set for this configuration only */ 213 | struct plink *fplp; /* Follow-set forward propagation links */ 214 | struct plink *bplp; /* Follow-set backwards propagation links */ 215 | struct state *stp; /* Pointer to state which contains this */ 216 | enum cfgstatus status; /* used during followset and shift computations */ 217 | struct config *next; /* Next configuration in the state */ 218 | struct config *bp; /* The next basis configuration */ 219 | }; 220 | 221 | enum e_action { 222 | SHIFT, 223 | ACCEPT, 224 | REDUCE, 225 | ERROR, 226 | SSCONFLICT, /* A shift/shift conflict */ 227 | SRCONFLICT, /* Was a reduce, but part of a conflict */ 228 | RRCONFLICT, /* Was a reduce, but part of a conflict */ 229 | SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ 230 | RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ 231 | NOT_USED /* Deleted by compression */ 232 | }; 233 | 234 | /* Every shift or reduce operation is stored as one of the following */ 235 | struct action { 236 | struct symbol *sp; /* The look-ahead symbol */ 237 | enum e_action type; 238 | union { 239 | struct state *stp; /* The new state, if a shift */ 240 | struct rule *rp; /* The rule, if a reduce */ 241 | } x; 242 | struct action *next; /* Next action for this state */ 243 | struct action *collide; /* Next action with the same hash */ 244 | }; 245 | 246 | /* Each state of the generated parser's finite state machine 247 | ** is encoded as an instance of the following structure. */ 248 | struct state { 249 | struct config *bp; /* The basis configurations for this state */ 250 | struct config *cfp; /* All configurations in this set */ 251 | int statenum; /* Sequential number for this state */ 252 | struct action *ap; /* Array of actions for this state */ 253 | int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ 254 | int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ 255 | int iDflt; /* Default action */ 256 | }; 257 | #define NO_OFFSET (-2147483647) 258 | 259 | /* A followset propagation link indicates that the contents of one 260 | ** configuration followset should be propagated to another whenever 261 | ** the first changes. */ 262 | struct plink { 263 | struct config *cfp; /* The configuration to which linked */ 264 | struct plink *next; /* The next propagate link */ 265 | }; 266 | 267 | /* The state vector for the entire parser generator is recorded as 268 | ** follows. (LEMON uses no global variables and makes little use of 269 | ** static variables. Fields in the following structure can be thought 270 | ** of as begin global variables in the program.) */ 271 | struct lemon { 272 | struct state **sorted; /* Table of states sorted by state number */ 273 | struct rule *rule; /* List of all rules */ 274 | int nstate; /* Number of states */ 275 | int nrule; /* Number of rules */ 276 | int nsymbol; /* Number of terminal and nonterminal symbols */ 277 | int nterminal; /* Number of terminal symbols */ 278 | struct symbol **symbols; /* Sorted array of pointers to symbols */ 279 | int errorcnt; /* Number of errors */ 280 | struct symbol *errsym; /* The error symbol */ 281 | struct symbol *wildcard; /* Token that matches anything */ 282 | char *name; /* Name of the generated parser */ 283 | char *arg; /* Declaration of the 3th argument to parser */ 284 | char *tokentype; /* Type of terminal symbols in the parser stack */ 285 | char *vartype; /* The default type of non-terminal symbols */ 286 | char *start; /* Name of the start symbol for the grammar */ 287 | char *stacksize; /* Size of the parser stack */ 288 | char *include; /* Code to put at the start of the C file */ 289 | char *error; /* Code to execute when an error is seen */ 290 | char *overflow; /* Code to execute on a stack overflow */ 291 | char *failure; /* Code to execute on parser failure */ 292 | char *accept; /* Code to execute when the parser excepts */ 293 | char *extracode; /* Code appended to the generated file */ 294 | char *tokendest; /* Code to execute to destroy token data */ 295 | char *vardest; /* Code for the default non-terminal destructor */ 296 | char *filename; /* Name of the input file */ 297 | char *outname; /* Name of the current output file */ 298 | char *tokenprefix; /* A prefix added to token names in the .h file */ 299 | int nconflict; /* Number of parsing conflicts */ 300 | int tablesize; /* Size of the parse tables */ 301 | int basisflag; /* Print only basis configurations */ 302 | int has_fallback; /* True if any %fallback is seen in the grammar */ 303 | int nolinenosflag; /* True if #line statements should not be printed */ 304 | char *argv0; /* Name of the program */ 305 | }; 306 | 307 | #define MemoryCheck(X) if((X)==0){ \ 308 | extern void memory_error(); \ 309 | memory_error(); \ 310 | } 311 | 312 | /**************** From the file "table.h" *********************************/ 313 | /* 314 | ** All code in this file has been automatically generated 315 | ** from a specification in the file 316 | ** "table.q" 317 | ** by the associative array code building program "aagen". 318 | ** Do not edit this file! Instead, edit the specification 319 | ** file, then rerun aagen. 320 | */ 321 | /* 322 | ** Code for processing tables in the LEMON parser generator. 323 | */ 324 | /* Routines for handling a strings */ 325 | 326 | const char *Strsafe(const char *); 327 | 328 | void Strsafe_init(void); 329 | int Strsafe_insert(const char *); 330 | const char *Strsafe_find(const char *); 331 | 332 | /* Routines for handling symbols of the grammar */ 333 | 334 | struct symbol *Symbol_new(const char *); 335 | int Symbolcmpp(const void *, const void *); 336 | void Symbol_init(void); 337 | int Symbol_insert(struct symbol *, const char *); 338 | struct symbol *Symbol_find(const char *); 339 | struct symbol *Symbol_Nth(int); 340 | int Symbol_count(void); 341 | struct symbol **Symbol_arrayof(void); 342 | 343 | /* Routines to manage the state table */ 344 | 345 | int Configcmp(const char *, const char *); 346 | struct state *State_new(void); 347 | void State_init(void); 348 | int State_insert(struct state *, struct config *); 349 | struct state *State_find(struct config *); 350 | struct state **State_arrayof(/* */); 351 | 352 | /* Routines used for efficiency in Configlist_add */ 353 | 354 | void Configtable_init(void); 355 | int Configtable_insert(struct config *); 356 | struct config *Configtable_find(struct config *); 357 | void Configtable_clear(int(*)(struct config *)); 358 | 359 | /****************** From the file "action.c" *******************************/ 360 | /* 361 | ** Routines processing parser actions in the LEMON parser generator. 362 | */ 363 | 364 | /* Allocate a new parser action */ 365 | static struct action *Action_new(void){ 366 | static struct action *freelist = 0; 367 | struct action *newaction; 368 | 369 | if( freelist==0 ){ 370 | int i; 371 | int amt = 100; 372 | freelist = (struct action *)calloc(amt, sizeof(struct action)); 373 | if( freelist==0 ){ 374 | fprintf(stderr,"Unable to allocate memory for a new parser action."); 375 | exit(1); 376 | } 377 | for(i=0; inext; 382 | return newaction; 383 | } 384 | 385 | /* Compare two actions for sorting purposes. Return negative, zero, or 386 | ** positive if the first action is less than, equal to, or greater than 387 | ** the first 388 | */ 389 | static int actioncmp( 390 | struct action *ap1, 391 | struct action *ap2 392 | ){ 393 | int rc; 394 | rc = ap1->sp->index - ap2->sp->index; 395 | if( rc==0 ){ 396 | rc = (int)ap1->type - (int)ap2->type; 397 | } 398 | if( rc==0 && ap1->type==REDUCE ){ 399 | rc = ap1->x.rp->index - ap2->x.rp->index; 400 | } 401 | if( rc==0 ){ 402 | rc = (int) (ap2 - ap1); 403 | } 404 | return rc; 405 | } 406 | 407 | /* Sort parser actions */ 408 | static struct action *Action_sort( 409 | struct action *ap 410 | ){ 411 | ap = (struct action *)msort((char *)ap,(char **)&ap->next, 412 | (int(*)(const char*,const char*))actioncmp); 413 | return ap; 414 | } 415 | 416 | void Action_add( 417 | struct action **app, 418 | enum e_action type, 419 | struct symbol *sp, 420 | char *arg 421 | ){ 422 | struct action *newaction; 423 | newaction = Action_new(); 424 | newaction->next = *app; 425 | *app = newaction; 426 | newaction->type = type; 427 | newaction->sp = sp; 428 | if( type==SHIFT ){ 429 | newaction->x.stp = (struct state *)arg; 430 | }else{ 431 | newaction->x.rp = (struct rule *)arg; 432 | } 433 | } 434 | /********************** New code to implement the "acttab" module ***********/ 435 | /* 436 | ** This module implements routines use to construct the yy_action[] table. 437 | */ 438 | 439 | /* 440 | ** The state of the yy_action table under construction is an instance of 441 | ** the following structure. 442 | ** 443 | ** The yy_action table maps the pair (state_number, lookahead) into an 444 | ** action_number. The table is an array of integers pairs. The state_number 445 | ** determines an initial offset into the yy_action array. The lookahead 446 | ** value is then added to this initial offset to get an index X into the 447 | ** yy_action array. If the aAction[X].lookahead equals the value of the 448 | ** of the lookahead input, then the value of the action_number output is 449 | ** aAction[X].action. If the lookaheads do not match then the 450 | ** default action for the state_number is returned. 451 | ** 452 | ** All actions associated with a single state_number are first entered 453 | ** into aLookahead[] using multiple calls to acttab_action(). Then the 454 | ** actions for that single state_number are placed into the aAction[] 455 | ** array with a single call to acttab_insert(). The acttab_insert() call 456 | ** also resets the aLookahead[] array in preparation for the next 457 | ** state number. 458 | */ 459 | struct lookahead_action { 460 | int lookahead; /* Value of the lookahead token */ 461 | int action; /* Action to take on the given lookahead */ 462 | }; 463 | typedef struct acttab acttab; 464 | struct acttab { 465 | int nAction; /* Number of used slots in aAction[] */ 466 | int nActionAlloc; /* Slots allocated for aAction[] */ 467 | struct lookahead_action 468 | *aAction, /* The yy_action[] table under construction */ 469 | *aLookahead; /* A single new transaction set */ 470 | int mnLookahead; /* Minimum aLookahead[].lookahead */ 471 | int mnAction; /* Action associated with mnLookahead */ 472 | int mxLookahead; /* Maximum aLookahead[].lookahead */ 473 | int nLookahead; /* Used slots in aLookahead[] */ 474 | int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ 475 | }; 476 | 477 | /* Return the number of entries in the yy_action table */ 478 | #define acttab_size(X) ((X)->nAction) 479 | 480 | /* The value for the N-th entry in yy_action */ 481 | #define acttab_yyaction(X,N) ((X)->aAction[N].action) 482 | 483 | /* The value for the N-th entry in yy_lookahead */ 484 | #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) 485 | 486 | /* Free all memory associated with the given acttab */ 487 | void acttab_free(acttab *p){ 488 | free( p->aAction ); 489 | free( p->aLookahead ); 490 | free( p ); 491 | } 492 | 493 | /* Allocate a new acttab structure */ 494 | acttab *acttab_alloc(void){ 495 | acttab *p = (acttab *) calloc( 1, sizeof(*p) ); 496 | if( p==0 ){ 497 | fprintf(stderr,"Unable to allocate memory for a new acttab."); 498 | exit(1); 499 | } 500 | memset(p, 0, sizeof(*p)); 501 | return p; 502 | } 503 | 504 | /* Add a new action to the current transaction set. 505 | ** 506 | ** This routine is called once for each lookahead for a particular 507 | ** state. 508 | */ 509 | void acttab_action(acttab *p, int lookahead, int action){ 510 | if( p->nLookahead>=p->nLookaheadAlloc ){ 511 | p->nLookaheadAlloc += 25; 512 | p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, 513 | sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); 514 | if( p->aLookahead==0 ){ 515 | fprintf(stderr,"malloc failed\n"); 516 | exit(1); 517 | } 518 | } 519 | if( p->nLookahead==0 ){ 520 | p->mxLookahead = lookahead; 521 | p->mnLookahead = lookahead; 522 | p->mnAction = action; 523 | }else{ 524 | if( p->mxLookaheadmxLookahead = lookahead; 525 | if( p->mnLookahead>lookahead ){ 526 | p->mnLookahead = lookahead; 527 | p->mnAction = action; 528 | } 529 | } 530 | p->aLookahead[p->nLookahead].lookahead = lookahead; 531 | p->aLookahead[p->nLookahead].action = action; 532 | p->nLookahead++; 533 | } 534 | 535 | /* 536 | ** Add the transaction set built up with prior calls to acttab_action() 537 | ** into the current action table. Then reset the transaction set back 538 | ** to an empty set in preparation for a new round of acttab_action() calls. 539 | ** 540 | ** Return the offset into the action table of the new transaction. 541 | */ 542 | int acttab_insert(acttab *p){ 543 | int i, j, k, n; 544 | assert( p->nLookahead>0 ); 545 | 546 | /* Make sure we have enough space to hold the expanded action table 547 | ** in the worst case. The worst case occurs if the transaction set 548 | ** must be appended to the current action table 549 | */ 550 | n = p->mxLookahead + 1; 551 | if( p->nAction + n >= p->nActionAlloc ){ 552 | int oldAlloc = p->nActionAlloc; 553 | p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; 554 | p->aAction = (struct lookahead_action *) realloc( p->aAction, 555 | sizeof(p->aAction[0])*p->nActionAlloc); 556 | if( p->aAction==0 ){ 557 | fprintf(stderr,"malloc failed\n"); 558 | exit(1); 559 | } 560 | for(i=oldAlloc; inActionAlloc; i++){ 561 | p->aAction[i].lookahead = -1; 562 | p->aAction[i].action = -1; 563 | } 564 | } 565 | 566 | /* Scan the existing action table looking for an offset that is a 567 | ** duplicate of the current transaction set. Fall out of the loop 568 | ** if and when the duplicate is found. 569 | ** 570 | ** i is the index in p->aAction[] where p->mnLookahead is inserted. 571 | */ 572 | for(i=p->nAction-1; i>=0; i--){ 573 | if( p->aAction[i].lookahead==p->mnLookahead ){ 574 | /* All lookaheads and actions in the aLookahead[] transaction 575 | ** must match against the candidate aAction[i] entry. */ 576 | if( p->aAction[i].action!=p->mnAction ) continue; 577 | for(j=0; jnLookahead; j++){ 578 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; 579 | if( k<0 || k>=p->nAction ) break; 580 | if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; 581 | if( p->aLookahead[j].action!=p->aAction[k].action ) break; 582 | } 583 | if( jnLookahead ) continue; 584 | 585 | /* No possible lookahead value that is not in the aLookahead[] 586 | ** transaction is allowed to match aAction[i] */ 587 | n = 0; 588 | for(j=0; jnAction; j++){ 589 | if( p->aAction[j].lookahead<0 ) continue; 590 | if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; 591 | } 592 | if( n==p->nLookahead ){ 593 | break; /* An exact match is found at offset i */ 594 | } 595 | } 596 | } 597 | 598 | /* If no existing offsets exactly match the current transaction, find an 599 | ** an empty offset in the aAction[] table in which we can add the 600 | ** aLookahead[] transaction. 601 | */ 602 | if( i<0 ){ 603 | /* Look for holes in the aAction[] table that fit the current 604 | ** aLookahead[] transaction. Leave i set to the offset of the hole. 605 | ** If no holes are found, i is left at p->nAction, which means the 606 | ** transaction will be appended. */ 607 | for(i=0; inActionAlloc - p->mxLookahead; i++){ 608 | if( p->aAction[i].lookahead<0 ){ 609 | for(j=0; jnLookahead; j++){ 610 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; 611 | if( k<0 ) break; 612 | if( p->aAction[k].lookahead>=0 ) break; 613 | } 614 | if( jnLookahead ) continue; 615 | for(j=0; jnAction; j++){ 616 | if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; 617 | } 618 | if( j==p->nAction ){ 619 | break; /* Fits in empty slots */ 620 | } 621 | } 622 | } 623 | } 624 | /* Insert transaction set at index i. */ 625 | for(j=0; jnLookahead; j++){ 626 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; 627 | p->aAction[k] = p->aLookahead[j]; 628 | if( k>=p->nAction ) p->nAction = k+1; 629 | } 630 | p->nLookahead = 0; 631 | 632 | /* Return the offset that is added to the lookahead in order to get the 633 | ** index into yy_action of the action */ 634 | return i - p->mnLookahead; 635 | } 636 | 637 | /********************** From the file "build.c" *****************************/ 638 | /* 639 | ** Routines to construction the finite state machine for the LEMON 640 | ** parser generator. 641 | */ 642 | 643 | /* Find a precedence symbol of every rule in the grammar. 644 | ** 645 | ** Those rules which have a precedence symbol coded in the input 646 | ** grammar using the "[symbol]" construct will already have the 647 | ** rp->precsym field filled. Other rules take as their precedence 648 | ** symbol the first RHS symbol with a defined precedence. If there 649 | ** are not RHS symbols with a defined precedence, the precedence 650 | ** symbol field is left blank. 651 | */ 652 | void FindRulePrecedences(struct lemon *xp) 653 | { 654 | struct rule *rp; 655 | for(rp=xp->rule; rp; rp=rp->next){ 656 | if( rp->precsym==0 ){ 657 | int i, j; 658 | for(i=0; inrhs && rp->precsym==0; i++){ 659 | struct symbol *sp = rp->rhs[i]; 660 | if( sp->type==MULTITERMINAL ){ 661 | for(j=0; jnsubsym; j++){ 662 | if( sp->subsym[j]->prec>=0 ){ 663 | rp->precsym = sp->subsym[j]; 664 | break; 665 | } 666 | } 667 | }else if( sp->prec>=0 ){ 668 | rp->precsym = rp->rhs[i]; 669 | } 670 | } 671 | } 672 | } 673 | return; 674 | } 675 | 676 | /* Find all nonterminals which will generate the empty string. 677 | ** Then go back and compute the first sets of every nonterminal. 678 | ** The first set is the set of all terminal symbols which can begin 679 | ** a string generated by that nonterminal. 680 | */ 681 | void FindFirstSets(struct lemon *lemp) 682 | { 683 | int i, j; 684 | struct rule *rp; 685 | int progress; 686 | 687 | for(i=0; insymbol; i++){ 688 | lemp->symbols[i]->lambda = LEMON_FALSE; 689 | } 690 | for(i=lemp->nterminal; insymbol; i++){ 691 | lemp->symbols[i]->firstset = SetNew(); 692 | } 693 | 694 | /* First compute all lambdas */ 695 | do{ 696 | progress = 0; 697 | for(rp=lemp->rule; rp; rp=rp->next){ 698 | if( rp->lhs->lambda ) continue; 699 | for(i=0; inrhs; i++){ 700 | struct symbol *sp = rp->rhs[i]; 701 | if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break; 702 | } 703 | if( i==rp->nrhs ){ 704 | rp->lhs->lambda = LEMON_TRUE; 705 | progress = 1; 706 | } 707 | } 708 | }while( progress ); 709 | 710 | /* Now compute all first sets */ 711 | do{ 712 | struct symbol *s1, *s2; 713 | progress = 0; 714 | for(rp=lemp->rule; rp; rp=rp->next){ 715 | s1 = rp->lhs; 716 | for(i=0; inrhs; i++){ 717 | s2 = rp->rhs[i]; 718 | if( s2->type==TERMINAL ){ 719 | progress += SetAdd(s1->firstset,s2->index); 720 | break; 721 | }else if( s2->type==MULTITERMINAL ){ 722 | for(j=0; jnsubsym; j++){ 723 | progress += SetAdd(s1->firstset,s2->subsym[j]->index); 724 | } 725 | break; 726 | }else if( s1==s2 ){ 727 | if( s1->lambda==LEMON_FALSE ) break; 728 | }else{ 729 | progress += SetUnion(s1->firstset,s2->firstset); 730 | if( s2->lambda==LEMON_FALSE ) break; 731 | } 732 | } 733 | } 734 | }while( progress ); 735 | return; 736 | } 737 | 738 | /* Compute all LR(0) states for the grammar. Links 739 | ** are added to between some states so that the LR(1) follow sets 740 | ** can be computed later. 741 | */ 742 | PRIVATE struct state *getstate(struct lemon *); /* forward reference */ 743 | void FindStates(struct lemon *lemp) 744 | { 745 | struct symbol *sp; 746 | struct rule *rp; 747 | 748 | Configlist_init(); 749 | 750 | /* Find the start symbol */ 751 | if( lemp->start ){ 752 | sp = Symbol_find(lemp->start); 753 | if( sp==0 ){ 754 | ErrorMsg(lemp->filename,0, 755 | "The specified start symbol \"%s\" is not \ 756 | in a nonterminal of the grammar. \"%s\" will be used as the start \ 757 | symbol instead.",lemp->start,lemp->rule->lhs->name); 758 | lemp->errorcnt++; 759 | sp = lemp->rule->lhs; 760 | } 761 | }else{ 762 | sp = lemp->rule->lhs; 763 | } 764 | 765 | /* Make sure the start symbol doesn't occur on the right-hand side of 766 | ** any rule. Report an error if it does. (YACC would generate a new 767 | ** start symbol in this case.) */ 768 | for(rp=lemp->rule; rp; rp=rp->next){ 769 | int i; 770 | for(i=0; inrhs; i++){ 771 | if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ 772 | ErrorMsg(lemp->filename,0, 773 | "The start symbol \"%s\" occurs on the \ 774 | right-hand side of a rule. This will result in a parser which \ 775 | does not work properly.",sp->name); 776 | lemp->errorcnt++; 777 | } 778 | } 779 | } 780 | 781 | /* The basis configuration set for the first state 782 | ** is all rules which have the start symbol as their 783 | ** left-hand side */ 784 | for(rp=sp->rule; rp; rp=rp->nextlhs){ 785 | struct config *newcfp; 786 | rp->lhsStart = 1; 787 | newcfp = Configlist_addbasis(rp,0); 788 | SetAdd(newcfp->fws,0); 789 | } 790 | 791 | /* Compute the first state. All other states will be 792 | ** computed automatically during the computation of the first one. 793 | ** The returned pointer to the first state is not used. */ 794 | (void)getstate(lemp); 795 | return; 796 | } 797 | 798 | /* Return a pointer to a state which is described by the configuration 799 | ** list which has been built from calls to Configlist_add. 800 | */ 801 | PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ 802 | PRIVATE struct state *getstate(struct lemon *lemp) 803 | { 804 | struct config *cfp, *bp; 805 | struct state *stp; 806 | 807 | /* Extract the sorted basis of the new state. The basis was constructed 808 | ** by prior calls to "Configlist_addbasis()". */ 809 | Configlist_sortbasis(); 810 | bp = Configlist_basis(); 811 | 812 | /* Get a state with the same basis */ 813 | stp = State_find(bp); 814 | if( stp ){ 815 | /* A state with the same basis already exists! Copy all the follow-set 816 | ** propagation links from the state under construction into the 817 | ** preexisting state, then return a pointer to the preexisting state */ 818 | struct config *x, *y; 819 | for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ 820 | Plink_copy(&y->bplp,x->bplp); 821 | Plink_delete(x->fplp); 822 | x->fplp = x->bplp = 0; 823 | } 824 | cfp = Configlist_return(); 825 | Configlist_eat(cfp); 826 | }else{ 827 | /* This really is a new state. Construct all the details */ 828 | Configlist_closure(lemp); /* Compute the configuration closure */ 829 | Configlist_sort(); /* Sort the configuration closure */ 830 | cfp = Configlist_return(); /* Get a pointer to the config list */ 831 | stp = State_new(); /* A new state structure */ 832 | MemoryCheck(stp); 833 | stp->bp = bp; /* Remember the configuration basis */ 834 | stp->cfp = cfp; /* Remember the configuration closure */ 835 | stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ 836 | stp->ap = 0; /* No actions, yet. */ 837 | State_insert(stp,stp->bp); /* Add to the state table */ 838 | buildshifts(lemp,stp); /* Recursively compute successor states */ 839 | } 840 | return stp; 841 | } 842 | 843 | /* 844 | ** Return true if two symbols are the same. 845 | */ 846 | int same_symbol(struct symbol *a, struct symbol *b) 847 | { 848 | int i; 849 | if( a==b ) return 1; 850 | if( a->type!=MULTITERMINAL ) return 0; 851 | if( b->type!=MULTITERMINAL ) return 0; 852 | if( a->nsubsym!=b->nsubsym ) return 0; 853 | for(i=0; insubsym; i++){ 854 | if( a->subsym[i]!=b->subsym[i] ) return 0; 855 | } 856 | return 1; 857 | } 858 | 859 | /* Construct all successor states to the given state. A "successor" 860 | ** state is any state which can be reached by a shift action. 861 | */ 862 | PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) 863 | { 864 | struct config *cfp; /* For looping thru the config closure of "stp" */ 865 | struct config *bcfp; /* For the inner loop on config closure of "stp" */ 866 | struct config *newcfg; /* */ 867 | struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ 868 | struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ 869 | struct state *newstp; /* A pointer to a successor state */ 870 | 871 | /* Each configuration becomes complete after it contibutes to a successor 872 | ** state. Initially, all configurations are incomplete */ 873 | for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; 874 | 875 | /* Loop through all configurations of the state "stp" */ 876 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ 877 | if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ 878 | if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ 879 | Configlist_reset(); /* Reset the new config set */ 880 | sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ 881 | 882 | /* For every configuration in the state "stp" which has the symbol "sp" 883 | ** following its dot, add the same configuration to the basis set under 884 | ** construction but with the dot shifted one symbol to the right. */ 885 | for(bcfp=cfp; bcfp; bcfp=bcfp->next){ 886 | if( bcfp->status==COMPLETE ) continue; /* Already used */ 887 | if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ 888 | bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ 889 | if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ 890 | bcfp->status = COMPLETE; /* Mark this config as used */ 891 | newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); 892 | Plink_add(&newcfg->bplp,bcfp); 893 | } 894 | 895 | /* Get a pointer to the state described by the basis configuration set 896 | ** constructed in the preceding loop */ 897 | newstp = getstate(lemp); 898 | 899 | /* The state "newstp" is reached from the state "stp" by a shift action 900 | ** on the symbol "sp" */ 901 | if( sp->type==MULTITERMINAL ){ 902 | int i; 903 | for(i=0; insubsym; i++){ 904 | Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); 905 | } 906 | }else{ 907 | Action_add(&stp->ap,SHIFT,sp,(char *)newstp); 908 | } 909 | } 910 | } 911 | 912 | /* 913 | ** Construct the propagation links 914 | */ 915 | void FindLinks(struct lemon *lemp) 916 | { 917 | int i; 918 | struct config *cfp, *other; 919 | struct state *stp; 920 | struct plink *plp; 921 | 922 | /* Housekeeping detail: 923 | ** Add to every propagate link a pointer back to the state to 924 | ** which the link is attached. */ 925 | for(i=0; instate; i++){ 926 | stp = lemp->sorted[i]; 927 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ 928 | cfp->stp = stp; 929 | } 930 | } 931 | 932 | /* Convert all backlinks into forward links. Only the forward 933 | ** links are used in the follow-set computation. */ 934 | for(i=0; instate; i++){ 935 | stp = lemp->sorted[i]; 936 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ 937 | for(plp=cfp->bplp; plp; plp=plp->next){ 938 | other = plp->cfp; 939 | Plink_add(&other->fplp,cfp); 940 | } 941 | } 942 | } 943 | } 944 | 945 | /* Compute all followsets. 946 | ** 947 | ** A followset is the set of all symbols which can come immediately 948 | ** after a configuration. 949 | */ 950 | void FindFollowSets(struct lemon *lemp) 951 | { 952 | int i; 953 | struct config *cfp; 954 | struct plink *plp; 955 | int progress; 956 | int change; 957 | 958 | for(i=0; instate; i++){ 959 | for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 960 | cfp->status = INCOMPLETE; 961 | } 962 | } 963 | 964 | do{ 965 | progress = 0; 966 | for(i=0; instate; i++){ 967 | for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 968 | if( cfp->status==COMPLETE ) continue; 969 | for(plp=cfp->fplp; plp; plp=plp->next){ 970 | change = SetUnion(plp->cfp->fws,cfp->fws); 971 | if( change ){ 972 | plp->cfp->status = INCOMPLETE; 973 | progress = 1; 974 | } 975 | } 976 | cfp->status = COMPLETE; 977 | } 978 | } 979 | }while( progress ); 980 | } 981 | 982 | static int resolve_conflict(struct action *,struct action *, struct symbol *); 983 | 984 | /* Compute the reduce actions, and resolve conflicts. 985 | */ 986 | void FindActions(struct lemon *lemp) 987 | { 988 | int i,j; 989 | struct config *cfp; 990 | struct state *stp; 991 | struct symbol *sp; 992 | struct rule *rp; 993 | 994 | /* Add all of the reduce actions 995 | ** A reduce action is added for each element of the followset of 996 | ** a configuration which has its dot at the extreme right. 997 | */ 998 | for(i=0; instate; i++){ /* Loop over all states */ 999 | stp = lemp->sorted[i]; 1000 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ 1001 | if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ 1002 | for(j=0; jnterminal; j++){ 1003 | if( SetFind(cfp->fws,j) ){ 1004 | /* Add a reduce action to the state "stp" which will reduce by the 1005 | ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ 1006 | Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); 1007 | } 1008 | } 1009 | } 1010 | } 1011 | } 1012 | 1013 | /* Add the accepting token */ 1014 | if( lemp->start ){ 1015 | sp = Symbol_find(lemp->start); 1016 | if( sp==0 ) sp = lemp->rule->lhs; 1017 | }else{ 1018 | sp = lemp->rule->lhs; 1019 | } 1020 | /* Add to the first state (which is always the starting state of the 1021 | ** finite state machine) an action to ACCEPT if the lookahead is the 1022 | ** start nonterminal. */ 1023 | Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); 1024 | 1025 | /* Resolve conflicts */ 1026 | for(i=0; instate; i++){ 1027 | struct action *ap, *nap; 1028 | struct state *stp; 1029 | stp = lemp->sorted[i]; 1030 | /* assert( stp->ap ); */ 1031 | stp->ap = Action_sort(stp->ap); 1032 | for(ap=stp->ap; ap && ap->next; ap=ap->next){ 1033 | for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ 1034 | /* The two actions "ap" and "nap" have the same lookahead. 1035 | ** Figure out which one should be used */ 1036 | lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym); 1037 | } 1038 | } 1039 | } 1040 | 1041 | /* Report an error for each rule that can never be reduced. */ 1042 | for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; 1043 | for(i=0; instate; i++){ 1044 | struct action *ap; 1045 | for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ 1046 | if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; 1047 | } 1048 | } 1049 | for(rp=lemp->rule; rp; rp=rp->next){ 1050 | if( rp->canReduce ) continue; 1051 | ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); 1052 | lemp->errorcnt++; 1053 | } 1054 | } 1055 | 1056 | /* Resolve a conflict between the two given actions. If the 1057 | ** conflict can't be resolved, return non-zero. 1058 | ** 1059 | ** NO LONGER TRUE: 1060 | ** To resolve a conflict, first look to see if either action 1061 | ** is on an error rule. In that case, take the action which 1062 | ** is not associated with the error rule. If neither or both 1063 | ** actions are associated with an error rule, then try to 1064 | ** use precedence to resolve the conflict. 1065 | ** 1066 | ** If either action is a SHIFT, then it must be apx. This 1067 | ** function won't work if apx->type==REDUCE and apy->type==SHIFT. 1068 | */ 1069 | static int resolve_conflict( 1070 | struct action *apx, 1071 | struct action *apy, 1072 | struct symbol *errsym /* The error symbol (if defined. NULL otherwise) */ 1073 | ){ 1074 | struct symbol *spx, *spy; 1075 | int errcnt = 0; 1076 | assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ 1077 | if( apx->type==SHIFT && apy->type==SHIFT ){ 1078 | apy->type = SSCONFLICT; 1079 | errcnt++; 1080 | } 1081 | if( apx->type==SHIFT && apy->type==REDUCE ){ 1082 | spx = apx->sp; 1083 | spy = apy->x.rp->precsym; 1084 | if( spy==0 || spx->prec<0 || spy->prec<0 ){ 1085 | /* Not enough precedence information. */ 1086 | apy->type = SRCONFLICT; 1087 | errcnt++; 1088 | }else if( spx->prec>spy->prec ){ /* higher precedence wins */ 1089 | apy->type = RD_RESOLVED; 1090 | }else if( spx->precprec ){ 1091 | apx->type = SH_RESOLVED; 1092 | }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ 1093 | apy->type = RD_RESOLVED; /* associativity */ 1094 | }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ 1095 | apx->type = SH_RESOLVED; 1096 | }else{ 1097 | assert( spx->prec==spy->prec && spx->assoc==NONE ); 1098 | apy->type = SRCONFLICT; 1099 | errcnt++; 1100 | } 1101 | }else if( apx->type==REDUCE && apy->type==REDUCE ){ 1102 | spx = apx->x.rp->precsym; 1103 | spy = apy->x.rp->precsym; 1104 | if( spx==0 || spy==0 || spx->prec<0 || 1105 | spy->prec<0 || spx->prec==spy->prec ){ 1106 | apy->type = RRCONFLICT; 1107 | errcnt++; 1108 | }else if( spx->prec>spy->prec ){ 1109 | apy->type = RD_RESOLVED; 1110 | }else if( spx->precprec ){ 1111 | apx->type = RD_RESOLVED; 1112 | } 1113 | }else{ 1114 | assert( 1115 | apx->type==SH_RESOLVED || 1116 | apx->type==RD_RESOLVED || 1117 | apx->type==SSCONFLICT || 1118 | apx->type==SRCONFLICT || 1119 | apx->type==RRCONFLICT || 1120 | apy->type==SH_RESOLVED || 1121 | apy->type==RD_RESOLVED || 1122 | apy->type==SSCONFLICT || 1123 | apy->type==SRCONFLICT || 1124 | apy->type==RRCONFLICT 1125 | ); 1126 | /* The REDUCE/SHIFT case cannot happen because SHIFTs come before 1127 | ** REDUCEs on the list. If we reach this point it must be because 1128 | ** the parser conflict had already been resolved. */ 1129 | } 1130 | return errcnt; 1131 | } 1132 | /********************* From the file "configlist.c" *************************/ 1133 | /* 1134 | ** Routines to processing a configuration list and building a state 1135 | ** in the LEMON parser generator. 1136 | */ 1137 | 1138 | static struct config *freelist = 0; /* List of free configurations */ 1139 | static struct config *current = 0; /* Top of list of configurations */ 1140 | static struct config **currentend = 0; /* Last on list of configs */ 1141 | static struct config *basis = 0; /* Top of list of basis configs */ 1142 | static struct config **basisend = 0; /* End of list of basis configs */ 1143 | 1144 | /* Return a pointer to a new configuration */ 1145 | PRIVATE struct config *newconfig(){ 1146 | struct config *newcfg; 1147 | if( freelist==0 ){ 1148 | int i; 1149 | int amt = 3; 1150 | freelist = (struct config *)calloc( amt, sizeof(struct config) ); 1151 | if( freelist==0 ){ 1152 | fprintf(stderr,"Unable to allocate memory for a new configuration."); 1153 | exit(1); 1154 | } 1155 | for(i=0; inext; 1160 | return newcfg; 1161 | } 1162 | 1163 | /* The configuration "old" is no longer used */ 1164 | PRIVATE void deleteconfig(struct config *old) 1165 | { 1166 | old->next = freelist; 1167 | freelist = old; 1168 | } 1169 | 1170 | /* Initialized the configuration list builder */ 1171 | void Configlist_init(){ 1172 | current = 0; 1173 | currentend = ¤t; 1174 | basis = 0; 1175 | basisend = &basis; 1176 | Configtable_init(); 1177 | return; 1178 | } 1179 | 1180 | /* Initialized the configuration list builder */ 1181 | void Configlist_reset(){ 1182 | current = 0; 1183 | currentend = ¤t; 1184 | basis = 0; 1185 | basisend = &basis; 1186 | Configtable_clear(0); 1187 | return; 1188 | } 1189 | 1190 | /* Add another configuration to the configuration list */ 1191 | struct config *Configlist_add( 1192 | struct rule *rp, /* The rule */ 1193 | int dot /* Index into the RHS of the rule where the dot goes */ 1194 | ){ 1195 | struct config *cfp, model; 1196 | 1197 | assert( currentend!=0 ); 1198 | model.rp = rp; 1199 | model.dot = dot; 1200 | cfp = Configtable_find(&model); 1201 | if( cfp==0 ){ 1202 | cfp = newconfig(); 1203 | cfp->rp = rp; 1204 | cfp->dot = dot; 1205 | cfp->fws = SetNew(); 1206 | cfp->stp = 0; 1207 | cfp->fplp = cfp->bplp = 0; 1208 | cfp->next = 0; 1209 | cfp->bp = 0; 1210 | *currentend = cfp; 1211 | currentend = &cfp->next; 1212 | Configtable_insert(cfp); 1213 | } 1214 | return cfp; 1215 | } 1216 | 1217 | /* Add a basis configuration to the configuration list */ 1218 | struct config *Configlist_addbasis(struct rule *rp, int dot) 1219 | { 1220 | struct config *cfp, model; 1221 | 1222 | assert( basisend!=0 ); 1223 | assert( currentend!=0 ); 1224 | model.rp = rp; 1225 | model.dot = dot; 1226 | cfp = Configtable_find(&model); 1227 | if( cfp==0 ){ 1228 | cfp = newconfig(); 1229 | cfp->rp = rp; 1230 | cfp->dot = dot; 1231 | cfp->fws = SetNew(); 1232 | cfp->stp = 0; 1233 | cfp->fplp = cfp->bplp = 0; 1234 | cfp->next = 0; 1235 | cfp->bp = 0; 1236 | *currentend = cfp; 1237 | currentend = &cfp->next; 1238 | *basisend = cfp; 1239 | basisend = &cfp->bp; 1240 | Configtable_insert(cfp); 1241 | } 1242 | return cfp; 1243 | } 1244 | 1245 | /* Compute the closure of the configuration list */ 1246 | void Configlist_closure(struct lemon *lemp) 1247 | { 1248 | struct config *cfp, *newcfp; 1249 | struct rule *rp, *newrp; 1250 | struct symbol *sp, *xsp; 1251 | int i, dot; 1252 | 1253 | assert( currentend!=0 ); 1254 | for(cfp=current; cfp; cfp=cfp->next){ 1255 | rp = cfp->rp; 1256 | dot = cfp->dot; 1257 | if( dot>=rp->nrhs ) continue; 1258 | sp = rp->rhs[dot]; 1259 | if( sp->type==NONTERMINAL ){ 1260 | if( sp->rule==0 && sp!=lemp->errsym ){ 1261 | ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", 1262 | sp->name); 1263 | lemp->errorcnt++; 1264 | } 1265 | for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ 1266 | newcfp = Configlist_add(newrp,0); 1267 | for(i=dot+1; inrhs; i++){ 1268 | xsp = rp->rhs[i]; 1269 | if( xsp->type==TERMINAL ){ 1270 | SetAdd(newcfp->fws,xsp->index); 1271 | break; 1272 | }else if( xsp->type==MULTITERMINAL ){ 1273 | int k; 1274 | for(k=0; knsubsym; k++){ 1275 | SetAdd(newcfp->fws, xsp->subsym[k]->index); 1276 | } 1277 | break; 1278 | }else{ 1279 | SetUnion(newcfp->fws,xsp->firstset); 1280 | if( xsp->lambda==LEMON_FALSE ) break; 1281 | } 1282 | } 1283 | if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); 1284 | } 1285 | } 1286 | } 1287 | return; 1288 | } 1289 | 1290 | /* Sort the configuration list */ 1291 | void Configlist_sort(){ 1292 | current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); 1293 | currentend = 0; 1294 | return; 1295 | } 1296 | 1297 | /* Sort the basis configuration list */ 1298 | void Configlist_sortbasis(){ 1299 | basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); 1300 | basisend = 0; 1301 | return; 1302 | } 1303 | 1304 | /* Return a pointer to the head of the configuration list and 1305 | ** reset the list */ 1306 | struct config *Configlist_return(){ 1307 | struct config *old; 1308 | old = current; 1309 | current = 0; 1310 | currentend = 0; 1311 | return old; 1312 | } 1313 | 1314 | /* Return a pointer to the head of the configuration list and 1315 | ** reset the list */ 1316 | struct config *Configlist_basis(){ 1317 | struct config *old; 1318 | old = basis; 1319 | basis = 0; 1320 | basisend = 0; 1321 | return old; 1322 | } 1323 | 1324 | /* Free all elements of the given configuration list */ 1325 | void Configlist_eat(struct config *cfp) 1326 | { 1327 | struct config *nextcfp; 1328 | for(; cfp; cfp=nextcfp){ 1329 | nextcfp = cfp->next; 1330 | assert( cfp->fplp==0 ); 1331 | assert( cfp->bplp==0 ); 1332 | if( cfp->fws ) SetFree(cfp->fws); 1333 | deleteconfig(cfp); 1334 | } 1335 | return; 1336 | } 1337 | /***************** From the file "error.c" *********************************/ 1338 | /* 1339 | ** Code for printing error message. 1340 | */ 1341 | 1342 | void ErrorMsg(const char *filename, int lineno, const char *format, ...){ 1343 | va_list ap; 1344 | fprintf(stderr, "%s:%d: ", filename, lineno); 1345 | va_start(ap, format); 1346 | vfprintf(stderr,format,ap); 1347 | va_end(ap); 1348 | fprintf(stderr, "\n"); 1349 | } 1350 | /**************** From the file "main.c" ************************************/ 1351 | /* 1352 | ** Main program file for the LEMON parser generator. 1353 | */ 1354 | 1355 | /* Report an out-of-memory condition and abort. This function 1356 | ** is used mostly by the "MemoryCheck" macro in struct.h 1357 | */ 1358 | void memory_error(){ 1359 | fprintf(stderr,"Out of memory. Aborting...\n"); 1360 | exit(1); 1361 | } 1362 | 1363 | static int nDefine = 0; /* Number of -D options on the command line */ 1364 | static char **azDefine = 0; /* Name of the -D macros */ 1365 | 1366 | /* This routine is called with the argument to each -D command-line option. 1367 | ** Add the macro defined to the azDefine array. 1368 | */ 1369 | static void handle_D_option(char *z){ 1370 | char **paz; 1371 | nDefine++; 1372 | azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); 1373 | if( azDefine==0 ){ 1374 | fprintf(stderr,"out of memory\n"); 1375 | exit(1); 1376 | } 1377 | paz = &azDefine[nDefine-1]; 1378 | *paz = (char *) malloc( lemonStrlen(z)+1 ); 1379 | if( *paz==0 ){ 1380 | fprintf(stderr,"out of memory\n"); 1381 | exit(1); 1382 | } 1383 | strcpy(*paz, z); 1384 | for(z=*paz; *z && *z!='='; z++){} 1385 | *z = 0; 1386 | } 1387 | 1388 | static char *user_templatename = NULL; 1389 | static void handle_T_option(char *z){ 1390 | user_templatename = (char *) malloc( lemonStrlen(z)+1 ); 1391 | if( user_templatename==0 ){ 1392 | memory_error(); 1393 | } 1394 | strcpy(user_templatename, z); 1395 | } 1396 | 1397 | /* The main program. Parse the command line and do it... */ 1398 | int main(int argc, char **argv) 1399 | { 1400 | static int version = 0; 1401 | static int rpflag = 0; 1402 | static int basisflag = 0; 1403 | static int compress = 0; 1404 | static int quiet = 0; 1405 | static int statistics = 0; 1406 | static int mhflag = 0; 1407 | static int nolinenosflag = 0; 1408 | static int noResort = 0; 1409 | static struct s_options options[] = { 1410 | {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, 1411 | {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, 1412 | {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."}, 1413 | {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, 1414 | {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, 1415 | {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."}, 1416 | {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."}, 1417 | {OPT_FLAG, "p", (char*)&showPrecedenceConflict, 1418 | "Show conflicts resolved by precedence rules"}, 1419 | {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, 1420 | {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"}, 1421 | {OPT_FLAG, "s", (char*)&statistics, 1422 | "Print parser stats to standard output."}, 1423 | {OPT_FLAG, "x", (char*)&version, "Print the version number."}, 1424 | {OPT_FLAG,0,0,0} 1425 | }; 1426 | int i; 1427 | int exitcode; 1428 | struct lemon lem; 1429 | 1430 | atexit(LemonAtExit); 1431 | 1432 | OptInit(argv,options,stderr); 1433 | if( version ){ 1434 | printf("Lemon version 1.0\n"); 1435 | exit(0); 1436 | } 1437 | if( OptNArgs()!=1 ){ 1438 | fprintf(stderr,"Exactly one filename argument is required.\n"); 1439 | exit(1); 1440 | } 1441 | memset(&lem, 0, sizeof(lem)); 1442 | lem.errorcnt = 0; 1443 | 1444 | /* Initialize the machine */ 1445 | Strsafe_init(); 1446 | Symbol_init(); 1447 | State_init(); 1448 | lem.argv0 = argv[0]; 1449 | lem.filename = OptArg(0); 1450 | lem.basisflag = basisflag; 1451 | lem.nolinenosflag = nolinenosflag; 1452 | Symbol_new("$"); 1453 | lem.errsym = Symbol_new("error"); 1454 | lem.errsym->useCnt = 0; 1455 | 1456 | /* Parse the input file */ 1457 | Parse(&lem); 1458 | if( lem.errorcnt ) exit(lem.errorcnt); 1459 | if( lem.nrule==0 ){ 1460 | fprintf(stderr,"Empty grammar.\n"); 1461 | exit(1); 1462 | } 1463 | 1464 | /* Count and index the symbols of the grammar */ 1465 | lem.nsymbol = Symbol_count(); 1466 | Symbol_new("{default}"); 1467 | lem.symbols = Symbol_arrayof(); 1468 | for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1469 | qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp); 1470 | for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1471 | for(i=1; isupper(lem.symbols[i]->name[0]); i++); 1472 | lem.nterminal = i; 1473 | 1474 | /* Generate a reprint of the grammar, if requested on the command line */ 1475 | if( rpflag ){ 1476 | Reprint(&lem); 1477 | }else{ 1478 | /* Initialize the size for all follow and first sets */ 1479 | SetSize(lem.nterminal+1); 1480 | 1481 | /* Find the precedence for every production rule (that has one) */ 1482 | FindRulePrecedences(&lem); 1483 | 1484 | /* Compute the lambda-nonterminals and the first-sets for every 1485 | ** nonterminal */ 1486 | FindFirstSets(&lem); 1487 | 1488 | /* Compute all LR(0) states. Also record follow-set propagation 1489 | ** links so that the follow-set can be computed later */ 1490 | lem.nstate = 0; 1491 | FindStates(&lem); 1492 | lem.sorted = State_arrayof(); 1493 | 1494 | /* Tie up loose ends on the propagation links */ 1495 | FindLinks(&lem); 1496 | 1497 | /* Compute the follow set of every reducible configuration */ 1498 | FindFollowSets(&lem); 1499 | 1500 | /* Compute the action tables */ 1501 | FindActions(&lem); 1502 | 1503 | /* Compress the action tables */ 1504 | if( compress==0 ) CompressTables(&lem); 1505 | 1506 | /* Reorder and renumber the states so that states with fewer choices 1507 | ** occur at the end. This is an optimization that helps make the 1508 | ** generated parser tables smaller. */ 1509 | if( noResort==0 ) ResortStates(&lem); 1510 | 1511 | /* Generate a report of the parser generated. (the "y.output" file) */ 1512 | if( !quiet ) ReportOutput(&lem); 1513 | 1514 | /* Generate the source code for the parser */ 1515 | ReportTable(&lem, mhflag); 1516 | 1517 | /* Produce a header file for use by the scanner. (This step is 1518 | ** omitted if the "-m" option is used because makeheaders will 1519 | ** generate the file for us.) */ 1520 | if( !mhflag ) ReportHeader(&lem); 1521 | } 1522 | if( statistics ){ 1523 | printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", 1524 | lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); 1525 | printf(" %d states, %d parser table entries, %d conflicts\n", 1526 | lem.nstate, lem.tablesize, lem.nconflict); 1527 | } 1528 | if( lem.nconflict > 0 ){ 1529 | fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); 1530 | } 1531 | 1532 | /* return 0 on success, 1 on failure. */ 1533 | exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; 1534 | successful_exit = (exitcode == 0); 1535 | exit(exitcode); 1536 | return (exitcode); 1537 | } 1538 | /******************** From the file "msort.c" *******************************/ 1539 | /* 1540 | ** A generic merge-sort program. 1541 | ** 1542 | ** USAGE: 1543 | ** Let "ptr" be a pointer to some structure which is at the head of 1544 | ** a null-terminated list. Then to sort the list call: 1545 | ** 1546 | ** ptr = msort(ptr,&(ptr->next),cmpfnc); 1547 | ** 1548 | ** In the above, "cmpfnc" is a pointer to a function which compares 1549 | ** two instances of the structure and returns an integer, as in 1550 | ** strcmp. The second argument is a pointer to the pointer to the 1551 | ** second element of the linked list. This address is used to compute 1552 | ** the offset to the "next" field within the structure. The offset to 1553 | ** the "next" field must be constant for all structures in the list. 1554 | ** 1555 | ** The function returns a new pointer which is the head of the list 1556 | ** after sorting. 1557 | ** 1558 | ** ALGORITHM: 1559 | ** Merge-sort. 1560 | */ 1561 | 1562 | /* 1563 | ** Return a pointer to the next structure in the linked list. 1564 | */ 1565 | #define NEXT(A) (*(char**)(((unsigned long)A)+offset)) 1566 | 1567 | /* 1568 | ** Inputs: 1569 | ** a: A sorted, null-terminated linked list. (May be null). 1570 | ** b: A sorted, null-terminated linked list. (May be null). 1571 | ** cmp: A pointer to the comparison function. 1572 | ** offset: Offset in the structure to the "next" field. 1573 | ** 1574 | ** Return Value: 1575 | ** A pointer to the head of a sorted list containing the elements 1576 | ** of both a and b. 1577 | ** 1578 | ** Side effects: 1579 | ** The "next" pointers for elements in the lists a and b are 1580 | ** changed. 1581 | */ 1582 | static char *merge( 1583 | char *a, 1584 | char *b, 1585 | int (*cmp)(const char*,const char*), 1586 | int offset 1587 | ){ 1588 | char *ptr, *head; 1589 | 1590 | if( a==0 ){ 1591 | head = b; 1592 | }else if( b==0 ){ 1593 | head = a; 1594 | }else{ 1595 | if( (*cmp)(a,b)<=0 ){ 1596 | ptr = a; 1597 | a = NEXT(a); 1598 | }else{ 1599 | ptr = b; 1600 | b = NEXT(b); 1601 | } 1602 | head = ptr; 1603 | while( a && b ){ 1604 | if( (*cmp)(a,b)<=0 ){ 1605 | NEXT(ptr) = a; 1606 | ptr = a; 1607 | a = NEXT(a); 1608 | }else{ 1609 | NEXT(ptr) = b; 1610 | ptr = b; 1611 | b = NEXT(b); 1612 | } 1613 | } 1614 | if( a ) NEXT(ptr) = a; 1615 | else NEXT(ptr) = b; 1616 | } 1617 | return head; 1618 | } 1619 | 1620 | /* 1621 | ** Inputs: 1622 | ** list: Pointer to a singly-linked list of structures. 1623 | ** next: Pointer to pointer to the second element of the list. 1624 | ** cmp: A comparison function. 1625 | ** 1626 | ** Return Value: 1627 | ** A pointer to the head of a sorted list containing the elements 1628 | ** orginally in list. 1629 | ** 1630 | ** Side effects: 1631 | ** The "next" pointers for elements in list are changed. 1632 | */ 1633 | #define LISTSIZE 30 1634 | static char *msort( 1635 | char *list, 1636 | char **next, 1637 | int (*cmp)(const char*,const char*) 1638 | ){ 1639 | unsigned long offset; 1640 | char *ep; 1641 | char *set[LISTSIZE]; 1642 | int i; 1643 | offset = (unsigned long)next - (unsigned long)list; 1644 | for(i=0; istate = WAITING_FOR_DECL_KEYWORD; 2004 | }else if( islower(x[0]) ){ 2005 | psp->lhs = Symbol_new(x); 2006 | psp->nrhs = 0; 2007 | psp->lhsalias = 0; 2008 | psp->state = WAITING_FOR_ARROW; 2009 | }else if( x[0]=='{' ){ 2010 | if( psp->prevrule==0 ){ 2011 | ErrorMsg(psp->filename,psp->tokenlineno, 2012 | "There is no prior rule opon which to attach the code \ 2013 | fragment which begins on this line."); 2014 | psp->errorcnt++; 2015 | }else if( psp->prevrule->code!=0 ){ 2016 | ErrorMsg(psp->filename,psp->tokenlineno, 2017 | "Code fragment beginning on this line is not the first \ 2018 | to follow the previous rule."); 2019 | psp->errorcnt++; 2020 | }else{ 2021 | psp->prevrule->line = psp->tokenlineno; 2022 | psp->prevrule->code = &x[1]; 2023 | } 2024 | }else if( x[0]=='[' ){ 2025 | psp->state = PRECEDENCE_MARK_1; 2026 | }else{ 2027 | ErrorMsg(psp->filename,psp->tokenlineno, 2028 | "Token \"%s\" should be either \"%%\" or a nonterminal name.", 2029 | x); 2030 | psp->errorcnt++; 2031 | } 2032 | break; 2033 | case PRECEDENCE_MARK_1: 2034 | if( !isupper(x[0]) ){ 2035 | ErrorMsg(psp->filename,psp->tokenlineno, 2036 | "The precedence symbol must be a terminal."); 2037 | psp->errorcnt++; 2038 | }else if( psp->prevrule==0 ){ 2039 | ErrorMsg(psp->filename,psp->tokenlineno, 2040 | "There is no prior rule to assign precedence \"[%s]\".",x); 2041 | psp->errorcnt++; 2042 | }else if( psp->prevrule->precsym!=0 ){ 2043 | ErrorMsg(psp->filename,psp->tokenlineno, 2044 | "Precedence mark on this line is not the first \ 2045 | to follow the previous rule."); 2046 | psp->errorcnt++; 2047 | }else{ 2048 | psp->prevrule->precsym = Symbol_new(x); 2049 | } 2050 | psp->state = PRECEDENCE_MARK_2; 2051 | break; 2052 | case PRECEDENCE_MARK_2: 2053 | if( x[0]!=']' ){ 2054 | ErrorMsg(psp->filename,psp->tokenlineno, 2055 | "Missing \"]\" on precedence mark."); 2056 | psp->errorcnt++; 2057 | } 2058 | psp->state = WAITING_FOR_DECL_OR_RULE; 2059 | break; 2060 | case WAITING_FOR_ARROW: 2061 | if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2062 | psp->state = IN_RHS; 2063 | }else if( x[0]=='(' ){ 2064 | psp->state = LHS_ALIAS_1; 2065 | }else{ 2066 | ErrorMsg(psp->filename,psp->tokenlineno, 2067 | "Expected to see a \":\" following the LHS symbol \"%s\".", 2068 | psp->lhs->name); 2069 | psp->errorcnt++; 2070 | psp->state = RESYNC_AFTER_RULE_ERROR; 2071 | } 2072 | break; 2073 | case LHS_ALIAS_1: 2074 | if( isalpha(x[0]) ){ 2075 | psp->lhsalias = x; 2076 | psp->state = LHS_ALIAS_2; 2077 | }else{ 2078 | ErrorMsg(psp->filename,psp->tokenlineno, 2079 | "\"%s\" is not a valid alias for the LHS \"%s\"\n", 2080 | x,psp->lhs->name); 2081 | psp->errorcnt++; 2082 | psp->state = RESYNC_AFTER_RULE_ERROR; 2083 | } 2084 | break; 2085 | case LHS_ALIAS_2: 2086 | if( x[0]==')' ){ 2087 | psp->state = LHS_ALIAS_3; 2088 | }else{ 2089 | ErrorMsg(psp->filename,psp->tokenlineno, 2090 | "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2091 | psp->errorcnt++; 2092 | psp->state = RESYNC_AFTER_RULE_ERROR; 2093 | } 2094 | break; 2095 | case LHS_ALIAS_3: 2096 | if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2097 | psp->state = IN_RHS; 2098 | }else{ 2099 | ErrorMsg(psp->filename,psp->tokenlineno, 2100 | "Missing \"->\" following: \"%s(%s)\".", 2101 | psp->lhs->name,psp->lhsalias); 2102 | psp->errorcnt++; 2103 | psp->state = RESYNC_AFTER_RULE_ERROR; 2104 | } 2105 | break; 2106 | case IN_RHS: 2107 | if( x[0]=='.' ){ 2108 | struct rule *rp; 2109 | rp = (struct rule *)calloc( sizeof(struct rule) + 2110 | sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); 2111 | if( rp==0 ){ 2112 | ErrorMsg(psp->filename,psp->tokenlineno, 2113 | "Can't allocate enough memory for this rule."); 2114 | psp->errorcnt++; 2115 | psp->prevrule = 0; 2116 | }else{ 2117 | int i; 2118 | rp->ruleline = psp->tokenlineno; 2119 | rp->rhs = (struct symbol**)&rp[1]; 2120 | rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); 2121 | for(i=0; inrhs; i++){ 2122 | rp->rhs[i] = psp->rhs[i]; 2123 | rp->rhsalias[i] = psp->alias[i]; 2124 | } 2125 | rp->lhs = psp->lhs; 2126 | rp->lhsalias = psp->lhsalias; 2127 | rp->nrhs = psp->nrhs; 2128 | rp->code = 0; 2129 | rp->precsym = 0; 2130 | rp->index = psp->gp->nrule++; 2131 | rp->nextlhs = rp->lhs->rule; 2132 | rp->lhs->rule = rp; 2133 | rp->next = 0; 2134 | if( psp->firstrule==0 ){ 2135 | psp->firstrule = psp->lastrule = rp; 2136 | }else{ 2137 | psp->lastrule->next = rp; 2138 | psp->lastrule = rp; 2139 | } 2140 | psp->prevrule = rp; 2141 | } 2142 | psp->state = WAITING_FOR_DECL_OR_RULE; 2143 | }else if( isalpha(x[0]) ){ 2144 | if( psp->nrhs>=MAXRHS ){ 2145 | ErrorMsg(psp->filename,psp->tokenlineno, 2146 | "Too many symbols on RHS of rule beginning at \"%s\".", 2147 | x); 2148 | psp->errorcnt++; 2149 | psp->state = RESYNC_AFTER_RULE_ERROR; 2150 | }else{ 2151 | psp->rhs[psp->nrhs] = Symbol_new(x); 2152 | psp->alias[psp->nrhs] = 0; 2153 | psp->nrhs++; 2154 | } 2155 | }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ 2156 | struct symbol *msp = psp->rhs[psp->nrhs-1]; 2157 | if( msp->type!=MULTITERMINAL ){ 2158 | struct symbol *origsp = msp; 2159 | msp = (struct symbol *) calloc(1,sizeof(*msp)); 2160 | memset(msp, 0, sizeof(*msp)); 2161 | msp->type = MULTITERMINAL; 2162 | msp->nsubsym = 1; 2163 | msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); 2164 | msp->subsym[0] = origsp; 2165 | msp->name = origsp->name; 2166 | psp->rhs[psp->nrhs-1] = msp; 2167 | } 2168 | msp->nsubsym++; 2169 | msp->subsym = (struct symbol **) realloc(msp->subsym, 2170 | sizeof(struct symbol*)*msp->nsubsym); 2171 | msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); 2172 | if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){ 2173 | ErrorMsg(psp->filename,psp->tokenlineno, 2174 | "Cannot form a compound containing a non-terminal"); 2175 | psp->errorcnt++; 2176 | } 2177 | }else if( x[0]=='(' && psp->nrhs>0 ){ 2178 | psp->state = RHS_ALIAS_1; 2179 | }else{ 2180 | ErrorMsg(psp->filename,psp->tokenlineno, 2181 | "Illegal character on RHS of rule: \"%s\".",x); 2182 | psp->errorcnt++; 2183 | psp->state = RESYNC_AFTER_RULE_ERROR; 2184 | } 2185 | break; 2186 | case RHS_ALIAS_1: 2187 | if( isalpha(x[0]) ){ 2188 | psp->alias[psp->nrhs-1] = x; 2189 | psp->state = RHS_ALIAS_2; 2190 | }else{ 2191 | ErrorMsg(psp->filename,psp->tokenlineno, 2192 | "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", 2193 | x,psp->rhs[psp->nrhs-1]->name); 2194 | psp->errorcnt++; 2195 | psp->state = RESYNC_AFTER_RULE_ERROR; 2196 | } 2197 | break; 2198 | case RHS_ALIAS_2: 2199 | if( x[0]==')' ){ 2200 | psp->state = IN_RHS; 2201 | }else{ 2202 | ErrorMsg(psp->filename,psp->tokenlineno, 2203 | "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2204 | psp->errorcnt++; 2205 | psp->state = RESYNC_AFTER_RULE_ERROR; 2206 | } 2207 | break; 2208 | case WAITING_FOR_DECL_KEYWORD: 2209 | if( isalpha(x[0]) ){ 2210 | psp->declkeyword = x; 2211 | psp->declargslot = 0; 2212 | psp->decllinenoslot = 0; 2213 | psp->insertLineMacro = 1; 2214 | psp->state = WAITING_FOR_DECL_ARG; 2215 | if( strcmp(x,"name")==0 ){ 2216 | psp->declargslot = &(psp->gp->name); 2217 | psp->insertLineMacro = 0; 2218 | }else if( strcmp(x,"include")==0 ){ 2219 | psp->declargslot = &(psp->gp->include); 2220 | }else if( strcmp(x,"code")==0 ){ 2221 | psp->declargslot = &(psp->gp->extracode); 2222 | }else if( strcmp(x,"token_destructor")==0 ){ 2223 | psp->declargslot = &psp->gp->tokendest; 2224 | }else if( strcmp(x,"default_destructor")==0 ){ 2225 | psp->declargslot = &psp->gp->vardest; 2226 | }else if( strcmp(x,"token_prefix")==0 ){ 2227 | psp->declargslot = &psp->gp->tokenprefix; 2228 | psp->insertLineMacro = 0; 2229 | }else if( strcmp(x,"syntax_error")==0 ){ 2230 | psp->declargslot = &(psp->gp->error); 2231 | }else if( strcmp(x,"parse_accept")==0 ){ 2232 | psp->declargslot = &(psp->gp->accept); 2233 | }else if( strcmp(x,"parse_failure")==0 ){ 2234 | psp->declargslot = &(psp->gp->failure); 2235 | }else if( strcmp(x,"stack_overflow")==0 ){ 2236 | psp->declargslot = &(psp->gp->overflow); 2237 | }else if( strcmp(x,"extra_argument")==0 ){ 2238 | psp->declargslot = &(psp->gp->arg); 2239 | psp->insertLineMacro = 0; 2240 | }else if( strcmp(x,"token_type")==0 ){ 2241 | psp->declargslot = &(psp->gp->tokentype); 2242 | psp->insertLineMacro = 0; 2243 | }else if( strcmp(x,"default_type")==0 ){ 2244 | psp->declargslot = &(psp->gp->vartype); 2245 | psp->insertLineMacro = 0; 2246 | }else if( strcmp(x,"stack_size")==0 ){ 2247 | psp->declargslot = &(psp->gp->stacksize); 2248 | psp->insertLineMacro = 0; 2249 | }else if( strcmp(x,"start_symbol")==0 ){ 2250 | psp->declargslot = &(psp->gp->start); 2251 | psp->insertLineMacro = 0; 2252 | }else if( strcmp(x,"left")==0 ){ 2253 | psp->preccounter++; 2254 | psp->declassoc = LEFT; 2255 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2256 | }else if( strcmp(x,"right")==0 ){ 2257 | psp->preccounter++; 2258 | psp->declassoc = RIGHT; 2259 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2260 | }else if( strcmp(x,"nonassoc")==0 ){ 2261 | psp->preccounter++; 2262 | psp->declassoc = NONE; 2263 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2264 | }else if( strcmp(x,"destructor")==0 ){ 2265 | psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; 2266 | }else if( strcmp(x,"type")==0 ){ 2267 | psp->state = WAITING_FOR_DATATYPE_SYMBOL; 2268 | }else if( strcmp(x,"fallback")==0 ){ 2269 | psp->fallback = 0; 2270 | psp->state = WAITING_FOR_FALLBACK_ID; 2271 | }else if( strcmp(x,"wildcard")==0 ){ 2272 | psp->state = WAITING_FOR_WILDCARD_ID; 2273 | }else{ 2274 | ErrorMsg(psp->filename,psp->tokenlineno, 2275 | "Unknown declaration keyword: \"%%%s\".",x); 2276 | psp->errorcnt++; 2277 | psp->state = RESYNC_AFTER_DECL_ERROR; 2278 | } 2279 | }else{ 2280 | ErrorMsg(psp->filename,psp->tokenlineno, 2281 | "Illegal declaration keyword: \"%s\".",x); 2282 | psp->errorcnt++; 2283 | psp->state = RESYNC_AFTER_DECL_ERROR; 2284 | } 2285 | break; 2286 | case WAITING_FOR_DESTRUCTOR_SYMBOL: 2287 | if( !isalpha(x[0]) ){ 2288 | ErrorMsg(psp->filename,psp->tokenlineno, 2289 | "Symbol name missing after %%destructor keyword"); 2290 | psp->errorcnt++; 2291 | psp->state = RESYNC_AFTER_DECL_ERROR; 2292 | }else{ 2293 | struct symbol *sp = Symbol_new(x); 2294 | psp->declargslot = &sp->destructor; 2295 | psp->decllinenoslot = &sp->destLineno; 2296 | psp->insertLineMacro = 1; 2297 | psp->state = WAITING_FOR_DECL_ARG; 2298 | } 2299 | break; 2300 | case WAITING_FOR_DATATYPE_SYMBOL: 2301 | if( !isalpha(x[0]) ){ 2302 | ErrorMsg(psp->filename,psp->tokenlineno, 2303 | "Symbol name missing after %%type keyword"); 2304 | psp->errorcnt++; 2305 | psp->state = RESYNC_AFTER_DECL_ERROR; 2306 | }else{ 2307 | struct symbol *sp = Symbol_find(x); 2308 | if((sp) && (sp->datatype)){ 2309 | ErrorMsg(psp->filename,psp->tokenlineno, 2310 | "Symbol %%type \"%s\" already defined", x); 2311 | psp->errorcnt++; 2312 | psp->state = RESYNC_AFTER_DECL_ERROR; 2313 | }else{ 2314 | if (!sp){ 2315 | sp = Symbol_new(x); 2316 | } 2317 | psp->declargslot = &sp->datatype; 2318 | psp->insertLineMacro = 0; 2319 | psp->state = WAITING_FOR_DECL_ARG; 2320 | } 2321 | } 2322 | break; 2323 | case WAITING_FOR_PRECEDENCE_SYMBOL: 2324 | if( x[0]=='.' ){ 2325 | psp->state = WAITING_FOR_DECL_OR_RULE; 2326 | }else if( isupper(x[0]) ){ 2327 | struct symbol *sp; 2328 | sp = Symbol_new(x); 2329 | if( sp->prec>=0 ){ 2330 | ErrorMsg(psp->filename,psp->tokenlineno, 2331 | "Symbol \"%s\" has already be given a precedence.",x); 2332 | psp->errorcnt++; 2333 | }else{ 2334 | sp->prec = psp->preccounter; 2335 | sp->assoc = psp->declassoc; 2336 | } 2337 | }else{ 2338 | ErrorMsg(psp->filename,psp->tokenlineno, 2339 | "Can't assign a precedence to \"%s\".",x); 2340 | psp->errorcnt++; 2341 | } 2342 | break; 2343 | case WAITING_FOR_DECL_ARG: 2344 | if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){ 2345 | const char *zOld, *zNew; 2346 | char *zBuf, *z; 2347 | int nOld, n, nLine, nNew, nBack; 2348 | int addLineMacro; 2349 | char zLine[50]; 2350 | zNew = x; 2351 | if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; 2352 | nNew = lemonStrlen(zNew); 2353 | if( *psp->declargslot ){ 2354 | zOld = *psp->declargslot; 2355 | }else{ 2356 | zOld = ""; 2357 | } 2358 | nOld = lemonStrlen(zOld); 2359 | n = nOld + nNew + 20; 2360 | addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && 2361 | (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); 2362 | if( addLineMacro ){ 2363 | for(z=psp->filename, nBack=0; *z; z++){ 2364 | if( *z=='\\' ) nBack++; 2365 | } 2366 | sprintf(zLine, "//line :%d", psp->tokenlineno); 2367 | nLine = lemonStrlen(zLine); 2368 | n += nLine + lemonStrlen(psp->filename) + nBack; 2369 | } 2370 | *psp->declargslot = (char *) realloc(*psp->declargslot, n); 2371 | zBuf = *psp->declargslot + nOld; 2372 | if( addLineMacro ){ 2373 | if( nOld && zBuf[-1]!='\n' ){ 2374 | *(zBuf++) = '\n'; 2375 | } 2376 | memcpy(zBuf, zLine, 7); // copy '//line ' part 2377 | zBuf += 7; 2378 | //*(zBuf++) = '"'; 2379 | for(z=psp->filename; *z; z++){ 2380 | if( *z=='\\' ){ 2381 | *(zBuf++) = '\\'; 2382 | } 2383 | *(zBuf++) = *z; 2384 | } 2385 | //*(zBuf++) = '"'; 2386 | memcpy(zBuf, zLine+7, nLine-7); // copy ':%d' part 2387 | zBuf += nLine-7; 2388 | *(zBuf++) = '\n'; 2389 | } 2390 | if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ 2391 | psp->decllinenoslot[0] = psp->tokenlineno; 2392 | } 2393 | memcpy(zBuf, zNew, nNew); 2394 | zBuf += nNew; 2395 | *zBuf = 0; 2396 | psp->state = WAITING_FOR_DECL_OR_RULE; 2397 | }else{ 2398 | ErrorMsg(psp->filename,psp->tokenlineno, 2399 | "Illegal argument to %%%s: %s",psp->declkeyword,x); 2400 | psp->errorcnt++; 2401 | psp->state = RESYNC_AFTER_DECL_ERROR; 2402 | } 2403 | break; 2404 | case WAITING_FOR_FALLBACK_ID: 2405 | if( x[0]=='.' ){ 2406 | psp->state = WAITING_FOR_DECL_OR_RULE; 2407 | }else if( !isupper(x[0]) ){ 2408 | ErrorMsg(psp->filename, psp->tokenlineno, 2409 | "%%fallback argument \"%s\" should be a token", x); 2410 | psp->errorcnt++; 2411 | }else{ 2412 | struct symbol *sp = Symbol_new(x); 2413 | if( psp->fallback==0 ){ 2414 | psp->fallback = sp; 2415 | }else if( sp->fallback ){ 2416 | ErrorMsg(psp->filename, psp->tokenlineno, 2417 | "More than one fallback assigned to token %s", x); 2418 | psp->errorcnt++; 2419 | }else{ 2420 | sp->fallback = psp->fallback; 2421 | psp->gp->has_fallback = 1; 2422 | } 2423 | } 2424 | break; 2425 | case WAITING_FOR_WILDCARD_ID: 2426 | if( x[0]=='.' ){ 2427 | psp->state = WAITING_FOR_DECL_OR_RULE; 2428 | }else if( !isupper(x[0]) ){ 2429 | ErrorMsg(psp->filename, psp->tokenlineno, 2430 | "%%wildcard argument \"%s\" should be a token", x); 2431 | psp->errorcnt++; 2432 | }else{ 2433 | struct symbol *sp = Symbol_new(x); 2434 | if( psp->gp->wildcard==0 ){ 2435 | psp->gp->wildcard = sp; 2436 | }else{ 2437 | ErrorMsg(psp->filename, psp->tokenlineno, 2438 | "Extra wildcard to token: %s", x); 2439 | psp->errorcnt++; 2440 | } 2441 | } 2442 | break; 2443 | case RESYNC_AFTER_RULE_ERROR: 2444 | /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2445 | ** break; */ 2446 | case RESYNC_AFTER_DECL_ERROR: 2447 | if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2448 | if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; 2449 | break; 2450 | } 2451 | } 2452 | 2453 | /* Run the preprocessor over the input file text. The global variables 2454 | ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined 2455 | ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and 2456 | ** comments them out. Text in between is also commented out as appropriate. 2457 | */ 2458 | static void preprocess_input(char *z){ 2459 | int i, j, k, n; 2460 | int exclude = 0; 2461 | int start = 0; 2462 | int lineno = 1; 2463 | int start_lineno = 1; 2464 | for(i=0; z[i]; i++){ 2465 | if( z[i]=='\n' ) lineno++; 2466 | if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; 2467 | if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){ 2468 | if( exclude ){ 2469 | exclude--; 2470 | if( exclude==0 ){ 2471 | for(j=start; jfilename; 2523 | ps.errorcnt = 0; 2524 | ps.state = INITIALIZE; 2525 | 2526 | /* Begin by reading the input file */ 2527 | fp = fopen(ps.filename,"rb"); 2528 | if( fp==0 ){ 2529 | ErrorMsg(ps.filename,0,"Can't open this file for reading."); 2530 | gp->errorcnt++; 2531 | return; 2532 | } 2533 | fseek(fp,0,2); 2534 | filesize = ftell(fp); 2535 | rewind(fp); 2536 | filebuf = (char *)malloc( filesize+1 ); 2537 | if( filebuf==0 ){ 2538 | ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.", 2539 | filesize+1); 2540 | gp->errorcnt++; 2541 | return; 2542 | } 2543 | if( fread(filebuf,1,filesize,fp)!=filesize ){ 2544 | ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", 2545 | filesize); 2546 | free(filebuf); 2547 | gp->errorcnt++; 2548 | return; 2549 | } 2550 | fclose(fp); 2551 | filebuf[filesize] = 0; 2552 | 2553 | /* Make an initial pass through the file to handle %ifdef and %ifndef */ 2554 | preprocess_input(filebuf); 2555 | 2556 | /* Now scan the text of the input file */ 2557 | lineno = 1; 2558 | for(cp=filebuf; (c= *cp)!=0; ){ 2559 | if( c=='\n' ) lineno++; /* Keep track of the line number */ 2560 | if( isspace(c) ){ cp++; continue; } /* Skip all white space */ 2561 | if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ 2562 | cp+=2; 2563 | while( (c= *cp)!=0 && c!='\n' ) cp++; 2564 | continue; 2565 | } 2566 | if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ 2567 | cp+=2; 2568 | while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ 2569 | if( c=='\n' ) lineno++; 2570 | cp++; 2571 | } 2572 | if( c ) cp++; 2573 | continue; 2574 | } 2575 | ps.tokenstart = cp; /* Mark the beginning of the token */ 2576 | ps.tokenlineno = lineno; /* Linenumber on which token begins */ 2577 | if( c=='\"' ){ /* String literals */ 2578 | cp++; 2579 | while( (c= *cp)!=0 && c!='\"' ){ 2580 | if( c=='\n' ) lineno++; 2581 | cp++; 2582 | } 2583 | if( c==0 ){ 2584 | ErrorMsg(ps.filename,startline, 2585 | "String starting on this line is not terminated before the end of the file."); 2586 | ps.errorcnt++; 2587 | nextcp = cp; 2588 | }else{ 2589 | nextcp = cp+1; 2590 | } 2591 | }else if( c=='{' ){ /* A block of C code */ 2592 | int level; 2593 | cp++; 2594 | for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ 2595 | if( c=='\n' ) lineno++; 2596 | else if( c=='{' ) level++; 2597 | else if( c=='}' ) level--; 2598 | else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ 2599 | int prevc; 2600 | cp = &cp[2]; 2601 | prevc = 0; 2602 | while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ 2603 | if( c=='\n' ) lineno++; 2604 | prevc = c; 2605 | cp++; 2606 | } 2607 | }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ 2608 | cp = &cp[2]; 2609 | while( (c= *cp)!=0 && c!='\n' ) cp++; 2610 | if( c ) lineno++; 2611 | }else if( c=='\'' || c=='\"' ){ /* String a character literals */ 2612 | int startchar, prevc; 2613 | startchar = c; 2614 | prevc = 0; 2615 | for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ 2616 | if( c=='\n' ) lineno++; 2617 | if( prevc=='\\' ) prevc = 0; 2618 | else prevc = c; 2619 | } 2620 | } 2621 | } 2622 | if( c==0 ){ 2623 | ErrorMsg(ps.filename,ps.tokenlineno, 2624 | "C code starting on this line is not terminated before the end of the file."); 2625 | ps.errorcnt++; 2626 | nextcp = cp; 2627 | }else{ 2628 | nextcp = cp+1; 2629 | } 2630 | }else if( isalnum(c) ){ /* Identifiers */ 2631 | while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2632 | nextcp = cp; 2633 | }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ 2634 | cp += 3; 2635 | nextcp = cp; 2636 | }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){ 2637 | cp += 2; 2638 | while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2639 | nextcp = cp; 2640 | }else{ /* All other (one character) operators */ 2641 | cp++; 2642 | nextcp = cp; 2643 | } 2644 | c = *cp; 2645 | *cp = 0; /* Null terminate the token */ 2646 | parseonetoken(&ps); /* Parse the token */ 2647 | *cp = c; /* Restore the buffer */ 2648 | cp = nextcp; 2649 | } 2650 | free(filebuf); /* Release the buffer after parsing */ 2651 | gp->rule = ps.firstrule; 2652 | gp->errorcnt = ps.errorcnt; 2653 | } 2654 | /*************************** From the file "plink.c" *********************/ 2655 | /* 2656 | ** Routines processing configuration follow-set propagation links 2657 | ** in the LEMON parser generator. 2658 | */ 2659 | static struct plink *plink_freelist = 0; 2660 | 2661 | /* Allocate a new plink */ 2662 | struct plink *Plink_new(){ 2663 | struct plink *newlink; 2664 | 2665 | if( plink_freelist==0 ){ 2666 | int i; 2667 | int amt = 100; 2668 | plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); 2669 | if( plink_freelist==0 ){ 2670 | fprintf(stderr, 2671 | "Unable to allocate memory for a new follow-set propagation link.\n"); 2672 | exit(1); 2673 | } 2674 | for(i=0; inext; 2679 | return newlink; 2680 | } 2681 | 2682 | /* Add a plink to a plink list */ 2683 | void Plink_add(struct plink **plpp, struct config *cfp) 2684 | { 2685 | struct plink *newlink; 2686 | newlink = Plink_new(); 2687 | newlink->next = *plpp; 2688 | *plpp = newlink; 2689 | newlink->cfp = cfp; 2690 | } 2691 | 2692 | /* Transfer every plink on the list "from" to the list "to" */ 2693 | void Plink_copy(struct plink **to, struct plink *from) 2694 | { 2695 | struct plink *nextpl; 2696 | while( from ){ 2697 | nextpl = from->next; 2698 | from->next = *to; 2699 | *to = from; 2700 | from = nextpl; 2701 | } 2702 | } 2703 | 2704 | /* Delete every plink on the list */ 2705 | void Plink_delete(struct plink *plp) 2706 | { 2707 | struct plink *nextpl; 2708 | 2709 | while( plp ){ 2710 | nextpl = plp->next; 2711 | plp->next = plink_freelist; 2712 | plink_freelist = plp; 2713 | plp = nextpl; 2714 | } 2715 | } 2716 | /*********************** From the file "report.c" **************************/ 2717 | /* 2718 | ** Procedures for generating reports and tables in the LEMON parser generator. 2719 | */ 2720 | 2721 | /* Generate a filename with the given suffix. Space to hold the 2722 | ** name comes from malloc() and must be freed by the calling 2723 | ** function. 2724 | */ 2725 | PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) 2726 | { 2727 | char *name; 2728 | char *cp; 2729 | 2730 | name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 ); 2731 | if( name==0 ){ 2732 | fprintf(stderr,"Can't allocate space for a filename.\n"); 2733 | exit(1); 2734 | } 2735 | strcpy(name,lemp->filename); 2736 | cp = strrchr(name,'.'); 2737 | if( cp ) *cp = 0; 2738 | strcat(name,suffix); 2739 | return name; 2740 | } 2741 | 2742 | /* Open a file with a name based on the name of the input file, 2743 | ** but with a different (specified) suffix, and return a pointer 2744 | ** to the stream */ 2745 | PRIVATE FILE *file_open( 2746 | struct lemon *lemp, 2747 | const char *suffix, 2748 | const char *mode 2749 | ){ 2750 | FILE *fp; 2751 | 2752 | if( lemp->outname ) free(lemp->outname); 2753 | lemp->outname = file_makename(lemp, suffix); 2754 | fp = fopen(lemp->outname,mode); 2755 | if( fp==0 && *mode=='w' ){ 2756 | fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); 2757 | lemp->errorcnt++; 2758 | return 0; 2759 | } 2760 | 2761 | /* Add files we create to a list, so we can delete them if we fail. This 2762 | ** is to keep makefiles from getting confused. We don't include .out files, 2763 | ** though: this is debug information, and you don't want it deleted if there 2764 | ** was an error you need to track down. 2765 | */ 2766 | if(( *mode=='w' ) && (strcmp(suffix, ".out") != 0)){ 2767 | const char **ptr = (const char **) 2768 | realloc(made_files, sizeof (const char **) * (made_files_count + 1)); 2769 | const char *fname = Strsafe(lemp->outname); 2770 | if ((ptr == NULL) || (fname == NULL)) { 2771 | free(ptr); 2772 | memory_error(); 2773 | } 2774 | made_files = ptr; 2775 | made_files[made_files_count++] = fname; 2776 | } 2777 | return fp; 2778 | } 2779 | 2780 | /* Duplicate the input file without comments and without actions 2781 | ** on rules */ 2782 | void Reprint(struct lemon *lemp) 2783 | { 2784 | struct rule *rp; 2785 | struct symbol *sp; 2786 | int i, j, maxlen, len, ncolumns, skip; 2787 | printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); 2788 | maxlen = 10; 2789 | for(i=0; insymbol; i++){ 2790 | sp = lemp->symbols[i]; 2791 | len = lemonStrlen(sp->name); 2792 | if( len>maxlen ) maxlen = len; 2793 | } 2794 | ncolumns = 76/(maxlen+5); 2795 | if( ncolumns<1 ) ncolumns = 1; 2796 | skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; 2797 | for(i=0; insymbol; j+=skip){ 2800 | sp = lemp->symbols[j]; 2801 | assert( sp->index==j ); 2802 | printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); 2803 | } 2804 | printf("\n"); 2805 | } 2806 | for(rp=lemp->rule; rp; rp=rp->next){ 2807 | printf("%s",rp->lhs->name); 2808 | /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ 2809 | printf(" ::="); 2810 | for(i=0; inrhs; i++){ 2811 | sp = rp->rhs[i]; 2812 | printf(" %s", sp->name); 2813 | if( sp->type==MULTITERMINAL ){ 2814 | for(j=1; jnsubsym; j++){ 2815 | printf("|%s", sp->subsym[j]->name); 2816 | } 2817 | } 2818 | /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ 2819 | } 2820 | printf("."); 2821 | if( rp->precsym ) printf(" [%s]",rp->precsym->name); 2822 | /* if( rp->code ) printf("\n %s",rp->code); */ 2823 | printf("\n"); 2824 | } 2825 | } 2826 | 2827 | void ConfigPrint(FILE *fp, struct config *cfp) 2828 | { 2829 | struct rule *rp; 2830 | struct symbol *sp; 2831 | int i, j; 2832 | rp = cfp->rp; 2833 | fprintf(fp,"%s ::=",rp->lhs->name); 2834 | for(i=0; i<=rp->nrhs; i++){ 2835 | if( i==cfp->dot ) fprintf(fp," *"); 2836 | if( i==rp->nrhs ) break; 2837 | sp = rp->rhs[i]; 2838 | fprintf(fp," %s", sp->name); 2839 | if( sp->type==MULTITERMINAL ){ 2840 | for(j=1; jnsubsym; j++){ 2841 | fprintf(fp,"|%s",sp->subsym[j]->name); 2842 | } 2843 | } 2844 | } 2845 | } 2846 | 2847 | /* #define TEST */ 2848 | #if 0 2849 | /* Print a set */ 2850 | PRIVATE void SetPrint(out,set,lemp) 2851 | FILE *out; 2852 | char *set; 2853 | struct lemon *lemp; 2854 | { 2855 | int i; 2856 | char *spacer; 2857 | spacer = ""; 2858 | fprintf(out,"%12s[",""); 2859 | for(i=0; interminal; i++){ 2860 | if( SetFind(set,i) ){ 2861 | fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); 2862 | spacer = " "; 2863 | } 2864 | } 2865 | fprintf(out,"]\n"); 2866 | } 2867 | 2868 | /* Print a plink chain */ 2869 | PRIVATE void PlinkPrint(out,plp,tag) 2870 | FILE *out; 2871 | struct plink *plp; 2872 | char *tag; 2873 | { 2874 | while( plp ){ 2875 | fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); 2876 | ConfigPrint(out,plp->cfp); 2877 | fprintf(out,"\n"); 2878 | plp = plp->next; 2879 | } 2880 | } 2881 | #endif 2882 | 2883 | /* Print an action to the given file descriptor. Return FALSE if 2884 | ** nothing was actually printed. 2885 | */ 2886 | int PrintAction(struct action *ap, FILE *fp, int indent){ 2887 | int result = 1; 2888 | switch( ap->type ){ 2889 | case SHIFT: 2890 | fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum); 2891 | break; 2892 | case REDUCE: 2893 | fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); 2894 | break; 2895 | case ACCEPT: 2896 | fprintf(fp,"%*s accept",indent,ap->sp->name); 2897 | break; 2898 | case ERROR: 2899 | fprintf(fp,"%*s error",indent,ap->sp->name); 2900 | break; 2901 | case SRCONFLICT: 2902 | case RRCONFLICT: 2903 | fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", 2904 | indent,ap->sp->name,ap->x.rp->index); 2905 | break; 2906 | case SSCONFLICT: 2907 | fprintf(fp,"%*s shift %-3d ** Parsing conflict **", 2908 | indent,ap->sp->name,ap->x.stp->statenum); 2909 | break; 2910 | case SH_RESOLVED: 2911 | if( showPrecedenceConflict ){ 2912 | fprintf(fp,"%*s shift %-3d -- dropped by precedence", 2913 | indent,ap->sp->name,ap->x.stp->statenum); 2914 | }else{ 2915 | result = 0; 2916 | } 2917 | break; 2918 | case RD_RESOLVED: 2919 | if( showPrecedenceConflict ){ 2920 | fprintf(fp,"%*s reduce %-3d -- dropped by precedence", 2921 | indent,ap->sp->name,ap->x.rp->index); 2922 | }else{ 2923 | result = 0; 2924 | } 2925 | break; 2926 | case NOT_USED: 2927 | result = 0; 2928 | break; 2929 | } 2930 | return result; 2931 | } 2932 | 2933 | /* Generate the "y.output" log file */ 2934 | void ReportOutput(struct lemon *lemp) 2935 | { 2936 | int i; 2937 | struct state *stp; 2938 | struct config *cfp; 2939 | struct action *ap; 2940 | FILE *fp; 2941 | 2942 | fp = file_open(lemp,".out","wb"); 2943 | if( fp==0 ) return; 2944 | for(i=0; instate; i++){ 2945 | stp = lemp->sorted[i]; 2946 | fprintf(fp,"State %d:\n",stp->statenum); 2947 | if( lemp->basisflag ) cfp=stp->bp; 2948 | else cfp=stp->cfp; 2949 | while( cfp ){ 2950 | char buf[20]; 2951 | if( cfp->dot==cfp->rp->nrhs ){ 2952 | sprintf(buf,"(%d)",cfp->rp->index); 2953 | fprintf(fp," %5s ",buf); 2954 | }else{ 2955 | fprintf(fp," "); 2956 | } 2957 | ConfigPrint(fp,cfp); 2958 | fprintf(fp,"\n"); 2959 | #if 0 2960 | SetPrint(fp,cfp->fws,lemp); 2961 | PlinkPrint(fp,cfp->fplp,"To "); 2962 | PlinkPrint(fp,cfp->bplp,"From"); 2963 | #endif 2964 | if( lemp->basisflag ) cfp=cfp->bp; 2965 | else cfp=cfp->next; 2966 | } 2967 | fprintf(fp,"\n"); 2968 | for(ap=stp->ap; ap; ap=ap->next){ 2969 | if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); 2970 | } 2971 | fprintf(fp,"\n"); 2972 | } 2973 | fprintf(fp, "----------------------------------------------------\n"); 2974 | fprintf(fp, "Symbols:\n"); 2975 | for(i=0; insymbol; i++){ 2976 | int j; 2977 | struct symbol *sp; 2978 | 2979 | sp = lemp->symbols[i]; 2980 | fprintf(fp, " %3d: %s", i, sp->name); 2981 | if( sp->type==NONTERMINAL ){ 2982 | fprintf(fp, ":"); 2983 | if( sp->lambda ){ 2984 | fprintf(fp, " "); 2985 | } 2986 | for(j=0; jnterminal; j++){ 2987 | if( sp->firstset && SetFind(sp->firstset, j) ){ 2988 | fprintf(fp, " %s", lemp->symbols[j]->name); 2989 | } 2990 | } 2991 | } 2992 | fprintf(fp, "\n"); 2993 | } 2994 | fclose(fp); 2995 | return; 2996 | } 2997 | 2998 | /* Search for the file "name" which is in the same directory as 2999 | ** the exacutable */ 3000 | PRIVATE char *pathsearch(char *argv0, char *name, int modemask) 3001 | { 3002 | const char *pathlist; 3003 | char *pathbufptr; 3004 | char *pathbuf; 3005 | char *path,*cp; 3006 | char c; 3007 | 3008 | #ifdef __WIN32__ 3009 | cp = strrchr(argv0,'\\'); 3010 | #else 3011 | cp = strrchr(argv0,'/'); 3012 | #endif 3013 | if( cp ){ 3014 | c = *cp; 3015 | *cp = 0; 3016 | path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); 3017 | if( path ) sprintf(path,"%s/%s",argv0,name); 3018 | *cp = c; 3019 | }else{ 3020 | pathlist = getenv("PATH"); 3021 | if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; 3022 | pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); 3023 | path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); 3024 | if( (pathbuf != 0) && (path!=0) ){ 3025 | pathbufptr = pathbuf; 3026 | strcpy(pathbuf, pathlist); 3027 | while( *pathbuf ){ 3028 | cp = strchr(pathbuf,':'); 3029 | if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; 3030 | c = *cp; 3031 | *cp = 0; 3032 | sprintf(path,"%s/%s",pathbuf,name); 3033 | *cp = c; 3034 | if( c==0 ) pathbuf[0] = 0; 3035 | else pathbuf = &cp[1]; 3036 | if( access(path,modemask)==0 ) break; 3037 | } 3038 | free(pathbufptr); 3039 | } 3040 | } 3041 | return path; 3042 | } 3043 | 3044 | /* Given an action, compute the integer value for that action 3045 | ** which is to be put in the action table of the generated machine. 3046 | ** Return negative if no action should be generated. 3047 | */ 3048 | PRIVATE int compute_action(struct lemon *lemp, struct action *ap) 3049 | { 3050 | int act; 3051 | switch( ap->type ){ 3052 | case SHIFT: act = ap->x.stp->statenum; break; 3053 | case REDUCE: act = ap->x.rp->index + lemp->nstate; break; 3054 | case ERROR: act = lemp->nstate + lemp->nrule; break; 3055 | case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; 3056 | default: act = -1; break; 3057 | } 3058 | return act; 3059 | } 3060 | 3061 | #define LINESIZE 1000 3062 | /* The next cluster of routines are for reading the template file 3063 | ** and writing the results to the generated parser */ 3064 | /* The first function transfers data from "in" to "out" until 3065 | ** a line is seen which begins with "%%". The line number is 3066 | ** tracked. 3067 | ** 3068 | ** if name!=0, then any word that begin with "Parse" is changed to 3069 | ** begin with *name instead. 3070 | */ 3071 | PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) 3072 | { 3073 | int i, iStart; 3074 | char line[LINESIZE]; 3075 | while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ 3076 | (*lineno)++; 3077 | iStart = 0; 3078 | if( name ){ 3079 | for(i=0; line[i]; i++){ 3080 | if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 3081 | && (i==0 || !isalpha(line[i-1])) 3082 | ){ 3083 | if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); 3084 | fprintf(out,"%s",name); 3085 | i += 4; 3086 | iStart = i+1; 3087 | } 3088 | } 3089 | } 3090 | fprintf(out,"%s",&line[iStart]); 3091 | } 3092 | } 3093 | 3094 | /* The next function finds the template file and opens it, returning 3095 | ** a pointer to the opened file. */ 3096 | PRIVATE FILE *tplt_open(struct lemon *lemp) 3097 | { 3098 | static char templatename[] = "lempar.go"; 3099 | char buf[1000]; 3100 | FILE *in; 3101 | char *tpltname; 3102 | char *cp; 3103 | 3104 | /* first, see if user specified a template filename on the command line. */ 3105 | if (user_templatename != 0) { 3106 | if( access(user_templatename,004)==-1 ){ 3107 | fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 3108 | user_templatename); 3109 | lemp->errorcnt++; 3110 | return 0; 3111 | } 3112 | in = fopen(user_templatename,"rb"); 3113 | if( in==0 ){ 3114 | fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename); 3115 | lemp->errorcnt++; 3116 | return 0; 3117 | } 3118 | return in; 3119 | } 3120 | 3121 | cp = strrchr(lemp->filename,'.'); 3122 | if( cp ){ 3123 | sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); 3124 | }else{ 3125 | sprintf(buf,"%s.lt",lemp->filename); 3126 | } 3127 | if( access(buf,004)==0 ){ 3128 | tpltname = buf; 3129 | }else if( access(templatename,004)==0 ){ 3130 | tpltname = templatename; 3131 | }else{ 3132 | tpltname = pathsearch(lemp->argv0,templatename,0); 3133 | } 3134 | if( tpltname==0 ){ 3135 | fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 3136 | templatename); 3137 | lemp->errorcnt++; 3138 | return 0; 3139 | } 3140 | in = fopen(tpltname,"rb"); 3141 | if( in==0 ){ 3142 | fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); 3143 | lemp->errorcnt++; 3144 | return 0; 3145 | } 3146 | return in; 3147 | } 3148 | 3149 | /* Print a #line directive line to the output file. */ 3150 | PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) 3151 | { 3152 | fprintf(out,"//line "); 3153 | while( *filename ){ 3154 | if( *filename == '\\' ) putc('\\',out); 3155 | putc(*filename,out); 3156 | filename++; 3157 | } 3158 | fprintf(out,":%d\n", lineno); 3159 | } 3160 | 3161 | /* Print a string to the file and keep the linenumber up to date */ 3162 | PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) 3163 | { 3164 | if( str==0 ) return; 3165 | while( *str ){ 3166 | putc(*str,out); 3167 | if( *str=='\n' ) (*lineno)++; 3168 | str++; 3169 | } 3170 | if( str[-1]!='\n' ){ 3171 | putc('\n',out); 3172 | (*lineno)++; 3173 | } 3174 | return; 3175 | } 3176 | 3177 | /* 3178 | ** The following routine emits code for the destructor for the 3179 | ** symbol sp 3180 | */ 3181 | void emit_destructor_code( 3182 | FILE *out, 3183 | struct symbol *sp, 3184 | struct lemon *lemp, 3185 | int *lineno 3186 | ){ 3187 | char *cp = 0; 3188 | 3189 | if( sp->type==TERMINAL ){ 3190 | cp = lemp->tokendest; 3191 | if( cp==0 ) return; 3192 | fprintf(out,"{\n"); (*lineno)++; 3193 | }else if( sp->destructor ){ 3194 | cp = sp->destructor; 3195 | fprintf(out,"{\n"); (*lineno)++; 3196 | if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); } 3197 | }else if( lemp->vardest ){ 3198 | cp = lemp->vardest; 3199 | if( cp==0 ) return; 3200 | fprintf(out,"{\n"); (*lineno)++; 3201 | }else{ 3202 | assert( 0 ); /* Cannot happen */ 3203 | } 3204 | for(; *cp; cp++){ 3205 | if( *cp=='$' && cp[1]=='$' ){ 3206 | fprintf(out,"(yypminor->yy%d)",sp->dtnum); 3207 | cp++; 3208 | continue; 3209 | } 3210 | if( *cp=='\n' ) (*lineno)++; 3211 | fputc(*cp,out); 3212 | } 3213 | fprintf(out,"\n"); (*lineno)++; 3214 | fprintf(out,"}\n"); (*lineno)++; 3215 | return; 3216 | } 3217 | 3218 | /* 3219 | ** Return TRUE (non-zero) if the given symbol has a destructor. 3220 | */ 3221 | int has_destructor(struct symbol *sp, struct lemon *lemp) 3222 | { 3223 | int ret; 3224 | if( sp->type==TERMINAL ){ 3225 | ret = lemp->tokendest!=0; 3226 | }else{ 3227 | ret = lemp->vardest!=0 || sp->destructor!=0; 3228 | } 3229 | return ret; 3230 | } 3231 | 3232 | /* 3233 | ** Append text to a dynamically allocated string. If zText is 0 then 3234 | ** reset the string to be empty again. Always return the complete text 3235 | ** of the string (which is overwritten with each call). 3236 | ** 3237 | ** n bytes of zText are stored. If n==0 then all of zText up to the first 3238 | ** \000 terminator is stored. zText can contain up to two instances of 3239 | ** %d. The values of p1 and p2 are written into the first and second 3240 | ** %d. 3241 | ** 3242 | ** If n==-1, then the previous character is overwritten. 3243 | */ 3244 | PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ 3245 | static char empty[1] = { 0 }; 3246 | static char *z = 0; 3247 | static int alloced = 0; 3248 | static int used = 0; 3249 | int c; 3250 | char zInt[40]; 3251 | if( zText==0 ){ 3252 | used = 0; 3253 | return z; 3254 | } 3255 | if( n<=0 ){ 3256 | if( n<0 ){ 3257 | used += n; 3258 | assert( used>=0 ); 3259 | } 3260 | n = lemonStrlen(zText); 3261 | } 3262 | if( n+sizeof(zInt)*2+used >= alloced ){ 3263 | alloced = n + sizeof(zInt)*2 + used + 200; 3264 | z = (char *) realloc(z, alloced); 3265 | } 3266 | if( z==0 ) return empty; 3267 | while( n-- > 0 ){ 3268 | c = *(zText++); 3269 | if( c=='%' && n>0 && zText[0]=='d' ){ 3270 | sprintf(zInt, "%d", p1); 3271 | p1 = p2; 3272 | strcpy(&z[used], zInt); 3273 | used += lemonStrlen(&z[used]); 3274 | zText++; 3275 | n--; 3276 | }else{ 3277 | z[used++] = c; 3278 | } 3279 | } 3280 | z[used] = 0; 3281 | return z; 3282 | } 3283 | 3284 | /* 3285 | ** zCode is a string that is the action associated with a rule. Expand 3286 | ** the symbols in this string so that the refer to elements of the parser 3287 | ** stack. 3288 | */ 3289 | PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){ 3290 | char *cp, *xp; 3291 | int i; 3292 | char lhsused = 0; /* True if the LHS element has been used */ 3293 | char used[MAXRHS]; /* True for each RHS element which is used */ 3294 | 3295 | for(i=0; inrhs; i++) used[i] = 0; 3296 | lhsused = 0; 3297 | 3298 | if( rp->code==0 ){ 3299 | static char newlinestr[2] = { '\n', '\0' }; 3300 | rp->code = newlinestr; 3301 | rp->line = rp->ruleline; 3302 | } 3303 | 3304 | append_str(0,0,0,0); 3305 | 3306 | /* workaround for absence of unions, we're making a temp variable of the 3307 | * right type, then we use it and then box it back to the interface{} again 3308 | */ 3309 | append_str("__LHS := yy%d_or_default(yygotominor); ",0,rp->lhs->dtnum,0); 3310 | 3311 | /* This const cast is wrong but harmless, if we're careful. */ 3312 | for(cp=(char *)rp->code; *cp; cp++){ 3313 | if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){ 3314 | char saved; 3315 | for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++); 3316 | saved = *xp; 3317 | *xp = 0; 3318 | if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ 3319 | append_str("__LHS",0,0,0); 3320 | cp = xp; 3321 | lhsused = 1; 3322 | }else{ 3323 | for(i=0; inrhs; i++){ 3324 | if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ 3325 | if( cp!=rp->code && cp[-1]=='@' ){ 3326 | /* If the argument is of the form @X then substituted 3327 | ** the token number of X, not the value of X */ 3328 | append_str("yystack[yyidx+(%d)].major",-1,i-rp->nrhs+1,0); 3329 | }else{ 3330 | struct symbol *sp = rp->rhs[i]; 3331 | int dtnum; 3332 | if( sp->type==MULTITERMINAL ){ 3333 | dtnum = sp->subsym[0]->dtnum; 3334 | }else{ 3335 | dtnum = sp->dtnum; 3336 | } 3337 | append_str("yy%d(yystack[yyidx+(%d)].minor)",0,dtnum,i-rp->nrhs+1); 3338 | } 3339 | cp = xp; 3340 | used[i] = 1; 3341 | break; 3342 | } 3343 | } 3344 | } 3345 | *xp = saved; 3346 | } 3347 | append_str(cp, 1, 0, 0); 3348 | } /* End loop */ 3349 | append_str("; yygotominor = __LHS; ",0,rp->lhs->dtnum,0); 3350 | 3351 | /* Check to make sure the LHS has been used */ 3352 | if( rp->lhsalias && !lhsused ){ 3353 | ErrorMsg(lemp->filename,rp->ruleline, 3354 | "Label \"%s\" for \"%s(%s)\" is never used.", 3355 | rp->lhsalias,rp->lhs->name,rp->lhsalias); 3356 | lemp->errorcnt++; 3357 | } 3358 | 3359 | /* Generate destructor code for RHS symbols which are not used in the 3360 | ** reduce code */ 3361 | for(i=0; inrhs; i++){ 3362 | if( rp->rhsalias[i] && !used[i] ){ 3363 | ErrorMsg(lemp->filename,rp->ruleline, 3364 | "Label %s for \"%s(%s)\" is never used.", 3365 | rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); 3366 | lemp->errorcnt++; 3367 | }else if( rp->rhsalias[i]==0 ){ 3368 | if( has_destructor(rp->rhs[i],lemp) ){ 3369 | append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, 3370 | rp->rhs[i]->index,i-rp->nrhs+1); 3371 | }else{ 3372 | /* No destructor defined for this term */ 3373 | } 3374 | } 3375 | } 3376 | if( rp->code ){ 3377 | cp = append_str(0,0,0,0); 3378 | rp->code = Strsafe(cp?cp:""); 3379 | } 3380 | } 3381 | 3382 | /* 3383 | ** Generate code which executes when the rule "rp" is reduced. Write 3384 | ** the code to "out". Make sure lineno stays up-to-date. 3385 | */ 3386 | PRIVATE void emit_code( 3387 | FILE *out, 3388 | struct rule *rp, 3389 | struct lemon *lemp, 3390 | int *lineno 3391 | ){ 3392 | const char *cp; 3393 | 3394 | /* Generate code to do the reduce action */ 3395 | if( rp->code ){ 3396 | if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } 3397 | fprintf(out,"{%s",rp->code); 3398 | for(cp=rp->code; *cp; cp++){ 3399 | if( *cp=='\n' ) (*lineno)++; 3400 | } /* End loop */ 3401 | fprintf(out,"}\n"); (*lineno)++; 3402 | } /* End if( rp->code ) */ 3403 | 3404 | return; 3405 | } 3406 | 3407 | /* 3408 | ** Print the definition of the union used for the parser's data stack. 3409 | ** This union contains fields for every possible data type for tokens 3410 | ** and nonterminals. In the process of computing and printing this 3411 | ** union, also set the ".dtnum" field of every terminal and nonterminal 3412 | ** symbol. 3413 | */ 3414 | void print_stack_union( 3415 | FILE *out, /* The output stream */ 3416 | struct lemon *lemp, /* The main info structure for this parser */ 3417 | int *plineno, /* Pointer to the line number */ 3418 | int mhflag /* True if generating makeheaders output */ 3419 | ){ 3420 | int lineno = *plineno; /* The line number of the output */ 3421 | char **types; /* A hash table of datatypes */ 3422 | int arraysize; /* Size of the "types" array */ 3423 | int maxdtlength; /* Maximum length of any ".datatype" field. */ 3424 | char *stddt; /* Standardized name for a datatype */ 3425 | int i,j; /* Loop counters */ 3426 | int hash; /* For hashing the name of a type */ 3427 | const char *name; /* Name of the parser */ 3428 | 3429 | /* Allocate and initialize types[] and allocate stddt[] */ 3430 | arraysize = lemp->nsymbol * 2; 3431 | types = (char**)calloc( arraysize, sizeof(char*) ); 3432 | for(i=0; ivartype ){ 3435 | maxdtlength = lemonStrlen(lemp->vartype); 3436 | } 3437 | for(i=0; insymbol; i++){ 3438 | int len; 3439 | struct symbol *sp = lemp->symbols[i]; 3440 | if( sp->datatype==0 ) continue; 3441 | len = lemonStrlen(sp->datatype); 3442 | if( len>maxdtlength ) maxdtlength = len; 3443 | } 3444 | stddt = (char*)malloc( maxdtlength*2 + 1 ); 3445 | if( types==0 || stddt==0 ){ 3446 | fprintf(stderr,"Out of memory.\n"); 3447 | exit(1); 3448 | } 3449 | 3450 | /* Build a hash table of datatypes. The ".dtnum" field of each symbol 3451 | ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is 3452 | ** used for terminal symbols. If there is no %default_type defined then 3453 | ** 0 is also used as the .dtnum value for nonterminals which do not specify 3454 | ** a datatype using the %type directive. 3455 | */ 3456 | for(i=0; insymbol; i++){ 3457 | struct symbol *sp = lemp->symbols[i]; 3458 | char *cp; 3459 | if( sp==lemp->errsym ){ 3460 | sp->dtnum = arraysize+1; 3461 | continue; 3462 | } 3463 | if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ 3464 | sp->dtnum = 0; 3465 | continue; 3466 | } 3467 | cp = sp->datatype; 3468 | if( cp==0 ) cp = lemp->vartype; 3469 | j = 0; 3470 | while( isspace(*cp) ) cp++; 3471 | while( *cp ) stddt[j++] = *cp++; 3472 | while( j>0 && isspace(stddt[j-1]) ) j--; 3473 | stddt[j] = 0; 3474 | if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ 3475 | sp->dtnum = 0; 3476 | continue; 3477 | } 3478 | hash = 0; 3479 | for(j=0; stddt[j]; j++){ 3480 | hash = hash*53 + stddt[j]; 3481 | } 3482 | hash = (hash & 0x7fffffff)%arraysize; 3483 | while( types[hash] ){ 3484 | if( strcmp(types[hash],stddt)==0 ){ 3485 | sp->dtnum = hash + 1; 3486 | break; 3487 | } 3488 | hash++; 3489 | if( hash>=arraysize ) hash = 0; 3490 | } 3491 | if( types[hash]==0 ){ 3492 | sp->dtnum = hash + 1; 3493 | types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); 3494 | if( types[hash]==0 ){ 3495 | fprintf(stderr,"Out of memory.\n"); 3496 | exit(1); 3497 | } 3498 | strcpy(types[hash],stddt); 3499 | } 3500 | } 3501 | 3502 | /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ 3503 | name = lemp->name ? lemp->name : "Parse"; 3504 | lineno = *plineno; 3505 | fprintf(out,"type %sTOKENTYPE %s\n",name, 3506 | lemp->tokentype?lemp->tokentype:"interface{}"); lineno++; 3507 | /* ok, let's cheat here, instead of defining a union, we define functions for 3508 | * converting from interface{} type to the real type */ 3509 | fprintf(out,"func yy0(what interface{}) %sTOKENTYPE { return what.(%sTOKENTYPE) }\n", name, name); lineno++; 3510 | /* in case if the type is some kind of struct, default initialize it! */ 3511 | fprintf(out,"func yy0_or_default(what interface{}) %sTOKENTYPE { " 3512 | "if what != nil { return what.(%sTOKENTYPE) }; " 3513 | "var def %sTOKENTYPE; return def " 3514 | "}\n",name,name,name); lineno++; 3515 | for(i=0; ierrsym->useCnt ){ 3525 | fprintf(out,"// int yy%d;\n",lemp->errsym->dtnum); lineno++; // TODO(nsf): do I need this? 3526 | } 3527 | free(stddt); 3528 | free(types); 3529 | //fprintf(out,"} YYMINORTYPE;\n"); lineno++; 3530 | fprintf(out, "type YYMINORTYPE interface{}\n"); 3531 | *plineno = lineno; 3532 | /*--------------------- COPY PASTED from ^^^ 3533 | name = lemp->name ? lemp->name : "Parse"; 3534 | lineno = *plineno; 3535 | if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } 3536 | fprintf(out,"#define %sTOKENTYPE %s\n",name, 3537 | lemp->tokentype?lemp->tokentype:"void*"); lineno++; 3538 | if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } 3539 | fprintf(out,"typedef union {\n"); lineno++; 3540 | fprintf(out," int yyinit;\n"); lineno++; 3541 | fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; 3542 | for(i=0; ierrsym->useCnt ){ 3548 | fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; 3549 | } 3550 | free(stddt); 3551 | free(types); 3552 | fprintf(out,"} YYMINORTYPE;\n"); lineno++; 3553 | *plineno = lineno; 3554 | */ 3555 | } 3556 | 3557 | /* 3558 | ** Return the name of a C datatype able to represent values between 3559 | ** lwr and upr, inclusive. 3560 | */ 3561 | static const char *minimum_size_type(int lwr, int upr){ 3562 | if( lwr>=0 ){ 3563 | if( upr<=255 ){ 3564 | return "uint8"; 3565 | }else if( upr<65535 ){ 3566 | return "uint16"; 3567 | }else{ 3568 | return "uint32"; 3569 | } 3570 | }else if( lwr>=-127 && upr<=127 ){ 3571 | return "int8"; 3572 | }else if( lwr>=-32767 && upr<32767 ){ 3573 | return "int16"; 3574 | }else{ 3575 | return "int32"; 3576 | } 3577 | } 3578 | 3579 | /* 3580 | ** Each state contains a set of token transaction and a set of 3581 | ** nonterminal transactions. Each of these sets makes an instance 3582 | ** of the following structure. An array of these structures is used 3583 | ** to order the creation of entries in the yy_action[] table. 3584 | */ 3585 | struct axset { 3586 | struct state *stp; /* A pointer to a state */ 3587 | int isTkn; /* True to use tokens. False for non-terminals */ 3588 | int nAction; /* Number of actions */ 3589 | int iOrder; /* Original order of action sets */ 3590 | }; 3591 | 3592 | /* 3593 | ** Compare to axset structures for sorting purposes 3594 | */ 3595 | static int axset_compare(const void *a, const void *b){ 3596 | struct axset *p1 = (struct axset*)a; 3597 | struct axset *p2 = (struct axset*)b; 3598 | int c; 3599 | c = p2->nAction - p1->nAction; 3600 | if( c==0 ){ 3601 | c = p2->iOrder - p1->iOrder; 3602 | } 3603 | assert( c!=0 || p1==p2 ); 3604 | return c; 3605 | } 3606 | 3607 | /* 3608 | ** Write text on "out" that describes the rule "rp". 3609 | */ 3610 | static void writeRuleText(FILE *out, struct rule *rp){ 3611 | int j; 3612 | fprintf(out,"%s ::=", rp->lhs->name); 3613 | for(j=0; jnrhs; j++){ 3614 | struct symbol *sp = rp->rhs[j]; 3615 | fprintf(out," %s", sp->name); 3616 | if( sp->type==MULTITERMINAL ){ 3617 | int k; 3618 | for(k=1; knsubsym; k++){ 3619 | fprintf(out,"|%s",sp->subsym[k]->name); 3620 | } 3621 | } 3622 | } 3623 | } 3624 | 3625 | 3626 | /* Generate C source code for the parser */ 3627 | void ReportTable( 3628 | struct lemon *lemp, 3629 | int mhflag /* Output in makeheaders format if true */ 3630 | ){ 3631 | FILE *out, *in; 3632 | char line[LINESIZE]; 3633 | int lineno; 3634 | struct state *stp; 3635 | struct action *ap; 3636 | struct rule *rp; 3637 | struct acttab *pActtab; 3638 | int i, j, n; 3639 | const char *name; 3640 | int mnTknOfst, mxTknOfst; 3641 | int mnNtOfst, mxNtOfst; 3642 | struct axset *ax; 3643 | 3644 | in = tplt_open(lemp); 3645 | if( in==0 ) return; 3646 | out = file_open(lemp,".go","wb"); 3647 | if( out==0 ){ 3648 | fclose(in); 3649 | return; 3650 | } 3651 | lineno = 1; 3652 | tplt_xfer(lemp->name,in,out,&lineno); 3653 | 3654 | /* Generate the include code, if any */ 3655 | tplt_print(out,lemp,lemp->include,&lineno); 3656 | if( mhflag ){ 3657 | char *name = file_makename(lemp, "_tokens.go"); 3658 | fprintf(out,"#include \"%s\"\n", name); lineno++; 3659 | free(name); 3660 | } 3661 | tplt_xfer(lemp->name,in,out,&lineno); 3662 | 3663 | /* Generate #defines for all tokens */ 3664 | if( mhflag ){ 3665 | const char *prefix; 3666 | if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 3667 | else prefix = ""; 3668 | for(i=1; interminal; i++){ 3669 | fprintf(out,"const %s%-30s = %2d\n",prefix,lemp->symbols[i]->name,i); 3670 | lineno++; 3671 | } 3672 | } 3673 | tplt_xfer(lemp->name,in,out,&lineno); 3674 | 3675 | /* Generate the defines */ 3676 | fprintf(out,"type YYCODETYPE %s\n", 3677 | minimum_size_type(0, lemp->nsymbol+1)); lineno++; 3678 | fprintf(out,"const YYNOCODE = %d\n",lemp->nsymbol+1); lineno++; 3679 | fprintf(out,"type YYACTIONTYPE %s\n", 3680 | minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; 3681 | fprintf(out,"const YYWILDCARD = %d\n", 3682 | lemp->wildcard ? lemp->wildcard->index : -1); lineno++; 3683 | print_stack_union(out,lemp,&lineno,mhflag); 3684 | name = lemp->name ? lemp->name : "Parse"; 3685 | /* 3686 | if( lemp->arg && lemp->arg[0] ){ 3687 | int i; 3688 | i = lemonStrlen(lemp->arg); 3689 | while( i>=1 && isspace(lemp->arg[i-1]) ) i--; 3690 | while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; 3691 | fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; 3692 | fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; 3693 | fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", 3694 | name,lemp->arg,&lemp->arg[i]); lineno++; 3695 | fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", 3696 | name,&lemp->arg[i],&lemp->arg[i]); lineno++; 3697 | }else{ 3698 | fprintf(out,"#define %sARG_SDECL\n",name); lineno++; 3699 | fprintf(out,"#define %sARG_PDECL\n",name); lineno++; 3700 | fprintf(out,"#define %sARG_FETCH\n",name); lineno++; 3701 | fprintf(out,"#define %sARG_STORE\n",name); lineno++; 3702 | } 3703 | */ 3704 | fprintf(out,"const YYNSTATE = %d\n",lemp->nstate); lineno++; 3705 | fprintf(out,"const YYNRULE = %d\n",lemp->nrule); lineno++; 3706 | fprintf(out,"const YYERRORSYMBOL = %d\n", 3707 | lemp->errsym->useCnt ? lemp->errsym->index : -1); lineno++; 3708 | tplt_xfer(lemp->name,in,out,&lineno); 3709 | 3710 | /* Generate the action table and its associates: 3711 | ** 3712 | ** yy_action[] A single table containing all actions. 3713 | ** yy_lookahead[] A table containing the lookahead for each entry in 3714 | ** yy_action. Used to detect hash collisions. 3715 | ** yy_shift_ofst[] For each state, the offset into yy_action for 3716 | ** shifting terminals. 3717 | ** yy_reduce_ofst[] For each state, the offset into yy_action for 3718 | ** shifting non-terminals after a reduce. 3719 | ** yy_default[] Default action for each state. 3720 | */ 3721 | 3722 | /* Compute the actions on all states and count them up */ 3723 | ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0])); 3724 | if( ax==0 ){ 3725 | fprintf(stderr,"malloc failed\n"); 3726 | exit(1); 3727 | } 3728 | for(i=0; instate; i++){ 3729 | stp = lemp->sorted[i]; 3730 | ax[i*2].stp = stp; 3731 | ax[i*2].isTkn = 1; 3732 | ax[i*2].nAction = stp->nTknAct; 3733 | ax[i*2+1].stp = stp; 3734 | ax[i*2+1].isTkn = 0; 3735 | ax[i*2+1].nAction = stp->nNtAct; 3736 | } 3737 | mxTknOfst = mnTknOfst = 0; 3738 | mxNtOfst = mnNtOfst = 0; 3739 | 3740 | /* Compute the action table. In order to try to keep the size of the 3741 | ** action table to a minimum, the heuristic of placing the largest action 3742 | ** sets first is used. 3743 | */ 3744 | for(i=0; instate*2; i++) ax[i].iOrder = i; 3745 | qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); 3746 | pActtab = acttab_alloc(); 3747 | for(i=0; instate*2 && ax[i].nAction>0; i++){ 3748 | stp = ax[i].stp; 3749 | if( ax[i].isTkn ){ 3750 | for(ap=stp->ap; ap; ap=ap->next){ 3751 | int action; 3752 | if( ap->sp->index>=lemp->nterminal ) continue; 3753 | action = compute_action(lemp, ap); 3754 | if( action<0 ) continue; 3755 | acttab_action(pActtab, ap->sp->index, action); 3756 | } 3757 | stp->iTknOfst = acttab_insert(pActtab); 3758 | if( stp->iTknOfstiTknOfst; 3759 | if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; 3760 | }else{ 3761 | for(ap=stp->ap; ap; ap=ap->next){ 3762 | int action; 3763 | if( ap->sp->indexnterminal ) continue; 3764 | if( ap->sp->index==lemp->nsymbol ) continue; 3765 | action = compute_action(lemp, ap); 3766 | if( action<0 ) continue; 3767 | acttab_action(pActtab, ap->sp->index, action); 3768 | } 3769 | stp->iNtOfst = acttab_insert(pActtab); 3770 | if( stp->iNtOfstiNtOfst; 3771 | if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; 3772 | } 3773 | } 3774 | free(ax); 3775 | 3776 | /* Output the yy_action table */ 3777 | n = acttab_size(pActtab); 3778 | //fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; 3779 | fprintf(out,"var yy_action = []YYACTIONTYPE{\n"); lineno++; 3780 | for(i=j=0; instate + lemp->nrule + 2; 3783 | if( j==0 ) fprintf(out," /* %5d */ ", i); 3784 | fprintf(out, " %4d,", action); 3785 | if( j==9 || i==n-1 ){ 3786 | fprintf(out, "\n"); lineno++; 3787 | j = 0; 3788 | }else{ 3789 | j++; 3790 | } 3791 | } 3792 | fprintf(out, "}\n"); lineno++; 3793 | 3794 | /* Output the yy_lookahead table */ 3795 | fprintf(out,"var yy_lookahead = []YYCODETYPE{\n"); lineno++; 3796 | for(i=j=0; insymbol; 3799 | if( j==0 ) fprintf(out," /* %5d */ ", i); 3800 | fprintf(out, " %4d,", la); 3801 | if( j==9 || i==n-1 ){ 3802 | fprintf(out, "\n"); lineno++; 3803 | j = 0; 3804 | }else{ 3805 | j++; 3806 | } 3807 | } 3808 | fprintf(out, "}\n"); lineno++; 3809 | 3810 | /* Output the yy_shift_ofst[] table */ 3811 | fprintf(out, "const YY_SHIFT_USE_DFLT = %d\n", mnTknOfst-1); lineno++; 3812 | n = lemp->nstate; 3813 | while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; 3814 | fprintf(out, "const YY_SHIFT_COUNT = %d\n", n-1); lineno++; 3815 | fprintf(out, "const YY_SHIFT_MIN = %d\n", mnTknOfst); lineno++; 3816 | fprintf(out, "const YY_SHIFT_MAX = %d\n", mxTknOfst); lineno++; 3817 | fprintf(out, "var yy_shift_ofst = []%s{\n", 3818 | minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; 3819 | for(i=j=0; isorted[i]; 3822 | ofst = stp->iTknOfst; 3823 | if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; 3824 | if( j==0 ) fprintf(out," /* %5d */ ", i); 3825 | fprintf(out, " %4d,", ofst); 3826 | if( j==9 || i==n-1 ){ 3827 | fprintf(out, "\n"); lineno++; 3828 | j = 0; 3829 | }else{ 3830 | j++; 3831 | } 3832 | } 3833 | fprintf(out, "}\n"); lineno++; 3834 | 3835 | /* Output the yy_reduce_ofst[] table */ 3836 | fprintf(out, "const YY_REDUCE_USE_DFLT = %d\n", mnNtOfst-1); lineno++; 3837 | n = lemp->nstate; 3838 | while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; 3839 | fprintf(out, "const YY_REDUCE_COUNT = %d\n", n-1); lineno++; 3840 | fprintf(out, "const YY_REDUCE_MIN = %d\n", mnNtOfst); lineno++; 3841 | fprintf(out, "const YY_REDUCE_MAX = %d\n", mxNtOfst); lineno++; 3842 | fprintf(out, "var yy_reduce_ofst = []%s{\n", 3843 | minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; 3844 | for(i=j=0; isorted[i]; 3847 | ofst = stp->iNtOfst; 3848 | if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; 3849 | if( j==0 ) fprintf(out," /* %5d */ ", i); 3850 | fprintf(out, " %4d,", ofst); 3851 | if( j==9 || i==n-1 ){ 3852 | fprintf(out, "\n"); lineno++; 3853 | j = 0; 3854 | }else{ 3855 | j++; 3856 | } 3857 | } 3858 | fprintf(out, "}\n"); lineno++; 3859 | 3860 | /* Output the default action table */ 3861 | fprintf(out, "var yy_default = []YYACTIONTYPE{\n"); lineno++; 3862 | n = lemp->nstate; 3863 | for(i=j=0; isorted[i]; 3865 | if( j==0 ) fprintf(out," /* %5d */ ", i); 3866 | fprintf(out, " %4d,", stp->iDflt); 3867 | if( j==9 || i==n-1 ){ 3868 | fprintf(out, "\n"); lineno++; 3869 | j = 0; 3870 | }else{ 3871 | j++; 3872 | } 3873 | } 3874 | fprintf(out, "}\n"); lineno++; 3875 | tplt_xfer(lemp->name,in,out,&lineno); 3876 | 3877 | /* Generate the table of fallback tokens. 3878 | */ 3879 | if( lemp->has_fallback ){ 3880 | int mx = lemp->nterminal - 1; 3881 | while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } 3882 | for(i=0; i<=mx; i++){ 3883 | struct symbol *p = lemp->symbols[i]; 3884 | if( p->fallback==0 ){ 3885 | fprintf(out, " 0, /* %10s => nothing */\n", p->name); 3886 | }else{ 3887 | fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, 3888 | p->name, p->fallback->name); 3889 | } 3890 | lineno++; 3891 | } 3892 | } 3893 | tplt_xfer(lemp->name, in, out, &lineno); 3894 | 3895 | /* Generate a table containing the symbolic name of every symbol 3896 | */ 3897 | for(i=0; insymbol; i++){ 3898 | sprintf(line,"\"%s\",",lemp->symbols[i]->name); 3899 | fprintf(out," %-15s",line); 3900 | if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } 3901 | } 3902 | if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } 3903 | tplt_xfer(lemp->name,in,out,&lineno); 3904 | 3905 | /* Generate a table containing a text string that describes every 3906 | ** rule in the rule set of the grammar. This information is used 3907 | ** when tracing REDUCE actions. 3908 | */ 3909 | for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ 3910 | assert( rp->index==i ); 3911 | fprintf(out," /* %3d */ \"", i); 3912 | writeRuleText(out, rp); 3913 | fprintf(out,"\",\n"); lineno++; 3914 | } 3915 | tplt_xfer(lemp->name,in,out,&lineno); 3916 | 3917 | /* Generate code which executes every time a symbol is popped from 3918 | ** the stack while processing errors or while destroying the parser. 3919 | ** (In other words, generate the %destructor actions) 3920 | */ 3921 | if( lemp->tokendest ){ 3922 | int once = 1; 3923 | for(i=0; insymbol; i++){ 3924 | struct symbol *sp = lemp->symbols[i]; 3925 | if( sp==0 || sp->type!=TERMINAL ) continue; 3926 | if( once ){ 3927 | fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; 3928 | once = 0; 3929 | } 3930 | fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3931 | } 3932 | for(i=0; insymbol && lemp->symbols[i]->type!=TERMINAL; i++); 3933 | if( insymbol ){ 3934 | emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3935 | } 3936 | } 3937 | if( lemp->vardest ){ 3938 | struct symbol *dflt_sp = 0; 3939 | int once = 1; 3940 | for(i=0; insymbol; i++){ 3941 | struct symbol *sp = lemp->symbols[i]; 3942 | if( sp==0 || sp->type==TERMINAL || 3943 | sp->index<=0 || sp->destructor!=0 ) continue; 3944 | if( once ){ 3945 | fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; 3946 | once = 0; 3947 | } 3948 | fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3949 | dflt_sp = sp; 3950 | } 3951 | if( dflt_sp!=0 ){ 3952 | emit_destructor_code(out,dflt_sp,lemp,&lineno); 3953 | } 3954 | } 3955 | for(i=0; insymbol; i++){ 3956 | struct symbol *sp = lemp->symbols[i]; 3957 | if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; 3958 | fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3959 | 3960 | /* Combine duplicate destructors into a single case */ 3961 | for(j=i+1; jnsymbol; j++){ 3962 | struct symbol *sp2 = lemp->symbols[j]; 3963 | if( sp2 && sp2->type!=TERMINAL && sp2->destructor 3964 | && sp2->dtnum==sp->dtnum 3965 | && strcmp(sp->destructor,sp2->destructor)==0 ){ 3966 | fprintf(out," case %d: /* %s */\n", 3967 | sp2->index, sp2->name); lineno++; 3968 | sp2->destructor = 0; 3969 | } 3970 | } 3971 | 3972 | emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3973 | } 3974 | tplt_xfer(lemp->name,in,out,&lineno); 3975 | 3976 | /* Generate code which executes whenever the parser stack overflows */ 3977 | tplt_print(out,lemp,lemp->overflow,&lineno); 3978 | tplt_xfer(lemp->name,in,out,&lineno); 3979 | 3980 | /* Generate the table of rule information 3981 | ** 3982 | ** Note: This code depends on the fact that rules are number 3983 | ** sequentually beginning with 0. 3984 | */ 3985 | for(rp=lemp->rule; rp; rp=rp->next){ 3986 | fprintf(out," ruleInfoEntry{ %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; 3987 | } 3988 | tplt_xfer(lemp->name,in,out,&lineno); 3989 | 3990 | /* Generate code which execution during each REDUCE action */ 3991 | for(rp=lemp->rule; rp; rp=rp->next){ 3992 | translate_code(lemp, rp); 3993 | } 3994 | /* First output rules other than the default: rule */ 3995 | for(rp=lemp->rule; rp; rp=rp->next){ 3996 | struct rule *rp2; /* Other rules with the same action */ 3997 | if( rp->code==0 ) continue; 3998 | if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */ 3999 | fprintf(out," case %d: /* ", rp->index); 4000 | writeRuleText(out, rp); 4001 | fprintf(out, " */\n"); lineno++; 4002 | for(rp2=rp->next; rp2; rp2=rp2->next){ 4003 | if( rp2->code==rp->code ){ 4004 | fprintf(out," case %d: /* ", rp2->index); 4005 | writeRuleText(out, rp2); 4006 | fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++; 4007 | rp2->code = 0; 4008 | } 4009 | } 4010 | emit_code(out,rp,lemp,&lineno); 4011 | rp->code = 0; 4012 | } 4013 | /* Finally, output the default: rule. We choose as the default: all 4014 | ** empty actions. */ 4015 | fprintf(out," default:\n"); lineno++; 4016 | for(rp=lemp->rule; rp; rp=rp->next){ 4017 | if( rp->code==0 ) continue; 4018 | assert( rp->code[0]=='\n' && rp->code[1]==0 ); 4019 | fprintf(out," /* (%d) ", rp->index); 4020 | writeRuleText(out, rp); 4021 | fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++; 4022 | } 4023 | tplt_xfer(lemp->name,in,out,&lineno); 4024 | 4025 | /* Generate code which executes if a parse fails */ 4026 | tplt_print(out,lemp,lemp->failure,&lineno); 4027 | tplt_xfer(lemp->name,in,out,&lineno); 4028 | 4029 | /* Generate code which executes when a syntax error occurs */ 4030 | tplt_print(out,lemp,lemp->error,&lineno); 4031 | tplt_xfer(lemp->name,in,out,&lineno); 4032 | 4033 | /* Generate code which executes when the parser accepts its input */ 4034 | tplt_print(out,lemp,lemp->accept,&lineno); 4035 | tplt_xfer(lemp->name,in,out,&lineno); 4036 | 4037 | /* Append any addition code the user desires */ 4038 | tplt_print(out,lemp,lemp->extracode,&lineno); 4039 | 4040 | fclose(in); 4041 | fclose(out); 4042 | return; 4043 | } 4044 | 4045 | /* Generate a header file for the parser */ 4046 | void ReportHeader(struct lemon *lemp) 4047 | { 4048 | FILE *out, *in; 4049 | const char *prefix; 4050 | char line[LINESIZE]; 4051 | char pattern[LINESIZE]; 4052 | int i; 4053 | 4054 | if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 4055 | else prefix = ""; 4056 | in = file_open(lemp,"_tokens.go","rb"); 4057 | if( in ){ 4058 | for(i=1; interminal && fgets(line,LINESIZE,in); i++){ 4059 | sprintf(pattern,"const %s%-30s = %2d\n",prefix,lemp->symbols[i]->name,i); 4060 | if( strcmp(line,pattern) ) break; 4061 | } 4062 | fclose(in); 4063 | if( i==lemp->nterminal ){ 4064 | /* No change in the file. Don't rewrite it. */ 4065 | return; 4066 | } 4067 | } 4068 | out = file_open(lemp,"_tokens.go","wb"); 4069 | if( out ){ 4070 | fprintf(out,"package main\n"); 4071 | for(i=1; interminal; i++){ 4072 | fprintf(out,"const %s%-30s = %2d\n",prefix,lemp->symbols[i]->name,i); 4073 | } 4074 | fclose(out); 4075 | } 4076 | return; 4077 | } 4078 | 4079 | /* Reduce the size of the action tables, if possible, by making use 4080 | ** of defaults. 4081 | ** 4082 | ** In this version, we take the most frequent REDUCE action and make 4083 | ** it the default. Except, there is no default if the wildcard token 4084 | ** is a possible look-ahead. 4085 | */ 4086 | void CompressTables(struct lemon *lemp) 4087 | { 4088 | struct state *stp; 4089 | struct action *ap, *ap2; 4090 | struct rule *rp, *rp2, *rbest; 4091 | int nbest, n; 4092 | int i; 4093 | int usesWildcard; 4094 | 4095 | for(i=0; instate; i++){ 4096 | stp = lemp->sorted[i]; 4097 | nbest = 0; 4098 | rbest = 0; 4099 | usesWildcard = 0; 4100 | 4101 | for(ap=stp->ap; ap; ap=ap->next){ 4102 | if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ 4103 | usesWildcard = 1; 4104 | } 4105 | if( ap->type!=REDUCE ) continue; 4106 | rp = ap->x.rp; 4107 | if( rp->lhsStart ) continue; 4108 | if( rp==rbest ) continue; 4109 | n = 1; 4110 | for(ap2=ap->next; ap2; ap2=ap2->next){ 4111 | if( ap2->type!=REDUCE ) continue; 4112 | rp2 = ap2->x.rp; 4113 | if( rp2==rbest ) continue; 4114 | if( rp2==rp ) n++; 4115 | } 4116 | if( n>nbest ){ 4117 | nbest = n; 4118 | rbest = rp; 4119 | } 4120 | } 4121 | 4122 | /* Do not make a default if the number of rules to default 4123 | ** is not at least 1 or if the wildcard token is a possible 4124 | ** lookahead. 4125 | */ 4126 | if( nbest<1 || usesWildcard ) continue; 4127 | 4128 | 4129 | /* Combine matching REDUCE actions into a single default */ 4130 | for(ap=stp->ap; ap; ap=ap->next){ 4131 | if( ap->type==REDUCE && ap->x.rp==rbest ) break; 4132 | } 4133 | assert( ap ); 4134 | ap->sp = Symbol_new("{default}"); 4135 | for(ap=ap->next; ap; ap=ap->next){ 4136 | if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; 4137 | } 4138 | stp->ap = Action_sort(stp->ap); 4139 | } 4140 | } 4141 | 4142 | 4143 | /* 4144 | ** Compare two states for sorting purposes. The smaller state is the 4145 | ** one with the most non-terminal actions. If they have the same number 4146 | ** of non-terminal actions, then the smaller is the one with the most 4147 | ** token actions. 4148 | */ 4149 | static int stateResortCompare(const void *a, const void *b){ 4150 | const struct state *pA = *(const struct state**)a; 4151 | const struct state *pB = *(const struct state**)b; 4152 | int n; 4153 | 4154 | n = pB->nNtAct - pA->nNtAct; 4155 | if( n==0 ){ 4156 | n = pB->nTknAct - pA->nTknAct; 4157 | if( n==0 ){ 4158 | n = pB->statenum - pA->statenum; 4159 | } 4160 | } 4161 | assert( n!=0 ); 4162 | return n; 4163 | } 4164 | 4165 | 4166 | /* 4167 | ** Renumber and resort states so that states with fewer choices 4168 | ** occur at the end. Except, keep state 0 as the first state. 4169 | */ 4170 | void ResortStates(struct lemon *lemp) 4171 | { 4172 | int i; 4173 | struct state *stp; 4174 | struct action *ap; 4175 | 4176 | for(i=0; instate; i++){ 4177 | stp = lemp->sorted[i]; 4178 | stp->nTknAct = stp->nNtAct = 0; 4179 | stp->iDflt = lemp->nstate + lemp->nrule; 4180 | stp->iTknOfst = NO_OFFSET; 4181 | stp->iNtOfst = NO_OFFSET; 4182 | for(ap=stp->ap; ap; ap=ap->next){ 4183 | if( compute_action(lemp,ap)>=0 ){ 4184 | if( ap->sp->indexnterminal ){ 4185 | stp->nTknAct++; 4186 | }else if( ap->sp->indexnsymbol ){ 4187 | stp->nNtAct++; 4188 | }else{ 4189 | stp->iDflt = compute_action(lemp, ap); 4190 | } 4191 | } 4192 | } 4193 | } 4194 | qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), 4195 | stateResortCompare); 4196 | for(i=0; instate; i++){ 4197 | lemp->sorted[i]->statenum = i; 4198 | } 4199 | } 4200 | 4201 | 4202 | /***************** From the file "set.c" ************************************/ 4203 | /* 4204 | ** Set manipulation routines for the LEMON parser generator. 4205 | */ 4206 | 4207 | static int size = 0; 4208 | 4209 | /* Set the set size */ 4210 | void SetSize(int n) 4211 | { 4212 | size = n+1; 4213 | } 4214 | 4215 | /* Allocate a new set */ 4216 | char *SetNew(){ 4217 | char *s; 4218 | s = (char*)calloc( size, 1); 4219 | if( s==0 ){ 4220 | extern void memory_error(); 4221 | memory_error(); 4222 | } 4223 | return s; 4224 | } 4225 | 4226 | /* Deallocate a set */ 4227 | void SetFree(char *s) 4228 | { 4229 | free(s); 4230 | } 4231 | 4232 | /* Add a new element to the set. Return TRUE if the element was added 4233 | ** and FALSE if it was already there. */ 4234 | int SetAdd(char *s, int e) 4235 | { 4236 | int rv; 4237 | assert( e>=0 && esize = 1024; 4327 | x1a->count = 0; 4328 | x1a->tbl = (x1node*)malloc( 4329 | (sizeof(x1node) + sizeof(x1node*))*1024 ); 4330 | if( x1a->tbl==0 ){ 4331 | free(x1a); 4332 | x1a = 0; 4333 | }else{ 4334 | int i; 4335 | x1a->ht = (x1node**)&(x1a->tbl[1024]); 4336 | for(i=0; i<1024; i++) x1a->ht[i] = 0; 4337 | } 4338 | } 4339 | } 4340 | /* Insert a new record into the array. Return TRUE if successful. 4341 | ** Prior data with the same key is NOT overwritten */ 4342 | int Strsafe_insert(const char *data) 4343 | { 4344 | x1node *np; 4345 | int h; 4346 | int ph; 4347 | 4348 | if( x1a==0 ) return 0; 4349 | ph = strhash(data); 4350 | h = ph & (x1a->size-1); 4351 | np = x1a->ht[h]; 4352 | while( np ){ 4353 | if( strcmp(np->data,data)==0 ){ 4354 | /* An existing entry with the same key is found. */ 4355 | /* Fail because overwrite is not allows. */ 4356 | return 0; 4357 | } 4358 | np = np->next; 4359 | } 4360 | if( x1a->count>=x1a->size ){ 4361 | /* Need to make the hash table bigger */ 4362 | int i,size; 4363 | struct s_x1 array; 4364 | array.size = size = x1a->size*2; 4365 | array.count = x1a->count; 4366 | array.tbl = (x1node*)malloc( 4367 | (sizeof(x1node) + sizeof(x1node*))*size ); 4368 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4369 | array.ht = (x1node**)&(array.tbl[size]); 4370 | for(i=0; icount; i++){ 4372 | x1node *oldnp, *newnp; 4373 | oldnp = &(x1a->tbl[i]); 4374 | h = strhash(oldnp->data) & (size-1); 4375 | newnp = &(array.tbl[i]); 4376 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4377 | newnp->next = array.ht[h]; 4378 | newnp->data = oldnp->data; 4379 | newnp->from = &(array.ht[h]); 4380 | array.ht[h] = newnp; 4381 | } 4382 | free(x1a->tbl); 4383 | *x1a = array; 4384 | } 4385 | /* Insert the new data */ 4386 | h = ph & (x1a->size-1); 4387 | np = &(x1a->tbl[x1a->count++]); 4388 | np->data = data; 4389 | if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); 4390 | np->next = x1a->ht[h]; 4391 | x1a->ht[h] = np; 4392 | np->from = &(x1a->ht[h]); 4393 | return 1; 4394 | } 4395 | 4396 | /* Return a pointer to data assigned to the given key. Return NULL 4397 | ** if no such key. */ 4398 | const char *Strsafe_find(const char *key) 4399 | { 4400 | int h; 4401 | x1node *np; 4402 | 4403 | if( x1a==0 ) return 0; 4404 | h = strhash(key) & (x1a->size-1); 4405 | np = x1a->ht[h]; 4406 | while( np ){ 4407 | if( strcmp(np->data,key)==0 ) break; 4408 | np = np->next; 4409 | } 4410 | return np ? np->data : 0; 4411 | } 4412 | 4413 | /* Return a pointer to the (terminal or nonterminal) symbol "x". 4414 | ** Create a new symbol if this is the first time "x" has been seen. 4415 | */ 4416 | struct symbol *Symbol_new(const char *x) 4417 | { 4418 | struct symbol *sp; 4419 | 4420 | sp = Symbol_find(x); 4421 | if( sp==0 ){ 4422 | sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); 4423 | MemoryCheck(sp); 4424 | sp->name = Strsafe(x); 4425 | sp->type = isupper(*x) ? TERMINAL : NONTERMINAL; 4426 | sp->rule = 0; 4427 | sp->fallback = 0; 4428 | sp->prec = -1; 4429 | sp->assoc = UNK; 4430 | sp->firstset = 0; 4431 | sp->lambda = LEMON_FALSE; 4432 | sp->destructor = 0; 4433 | sp->destLineno = 0; 4434 | sp->datatype = 0; 4435 | sp->useCnt = 0; 4436 | Symbol_insert(sp,sp->name); 4437 | } 4438 | sp->useCnt++; 4439 | return sp; 4440 | } 4441 | 4442 | /* Compare two symbols for working purposes 4443 | ** 4444 | ** Symbols that begin with upper case letters (terminals or tokens) 4445 | ** must sort before symbols that begin with lower case letters 4446 | ** (non-terminals). Other than that, the order does not matter. 4447 | ** 4448 | ** We find experimentally that leaving the symbols in their original 4449 | ** order (the order they appeared in the grammar file) gives the 4450 | ** smallest parser tables in SQLite. 4451 | */ 4452 | int Symbolcmpp(const void *_a, const void *_b) 4453 | { 4454 | const struct symbol **a = (const struct symbol **) _a; 4455 | const struct symbol **b = (const struct symbol **) _b; 4456 | int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); 4457 | int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); 4458 | assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 ); 4459 | return i1-i2; 4460 | } 4461 | 4462 | /* There is one instance of the following structure for each 4463 | ** associative array of type "x2". 4464 | */ 4465 | struct s_x2 { 4466 | int size; /* The number of available slots. */ 4467 | /* Must be a power of 2 greater than or */ 4468 | /* equal to 1 */ 4469 | int count; /* Number of currently slots filled */ 4470 | struct s_x2node *tbl; /* The data stored here */ 4471 | struct s_x2node **ht; /* Hash table for lookups */ 4472 | }; 4473 | 4474 | /* There is one instance of this structure for every data element 4475 | ** in an associative array of type "x2". 4476 | */ 4477 | typedef struct s_x2node { 4478 | struct symbol *data; /* The data */ 4479 | const char *key; /* The key */ 4480 | struct s_x2node *next; /* Next entry with the same hash */ 4481 | struct s_x2node **from; /* Previous link */ 4482 | } x2node; 4483 | 4484 | /* There is only one instance of the array, which is the following */ 4485 | static struct s_x2 *x2a; 4486 | 4487 | /* Allocate a new associative array */ 4488 | void Symbol_init(){ 4489 | if( x2a ) return; 4490 | x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); 4491 | if( x2a ){ 4492 | x2a->size = 128; 4493 | x2a->count = 0; 4494 | x2a->tbl = (x2node*)malloc( 4495 | (sizeof(x2node) + sizeof(x2node*))*128 ); 4496 | if( x2a->tbl==0 ){ 4497 | free(x2a); 4498 | x2a = 0; 4499 | }else{ 4500 | int i; 4501 | x2a->ht = (x2node**)&(x2a->tbl[128]); 4502 | for(i=0; i<128; i++) x2a->ht[i] = 0; 4503 | } 4504 | } 4505 | } 4506 | /* Insert a new record into the array. Return TRUE if successful. 4507 | ** Prior data with the same key is NOT overwritten */ 4508 | int Symbol_insert(struct symbol *data, const char *key) 4509 | { 4510 | x2node *np; 4511 | int h; 4512 | int ph; 4513 | 4514 | if( x2a==0 ) return 0; 4515 | ph = strhash(key); 4516 | h = ph & (x2a->size-1); 4517 | np = x2a->ht[h]; 4518 | while( np ){ 4519 | if( strcmp(np->key,key)==0 ){ 4520 | /* An existing entry with the same key is found. */ 4521 | /* Fail because overwrite is not allows. */ 4522 | return 0; 4523 | } 4524 | np = np->next; 4525 | } 4526 | if( x2a->count>=x2a->size ){ 4527 | /* Need to make the hash table bigger */ 4528 | int i,size; 4529 | struct s_x2 array; 4530 | array.size = size = x2a->size*2; 4531 | array.count = x2a->count; 4532 | array.tbl = (x2node*)malloc( 4533 | (sizeof(x2node) + sizeof(x2node*))*size ); 4534 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4535 | array.ht = (x2node**)&(array.tbl[size]); 4536 | for(i=0; icount; i++){ 4538 | x2node *oldnp, *newnp; 4539 | oldnp = &(x2a->tbl[i]); 4540 | h = strhash(oldnp->key) & (size-1); 4541 | newnp = &(array.tbl[i]); 4542 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4543 | newnp->next = array.ht[h]; 4544 | newnp->key = oldnp->key; 4545 | newnp->data = oldnp->data; 4546 | newnp->from = &(array.ht[h]); 4547 | array.ht[h] = newnp; 4548 | } 4549 | free(x2a->tbl); 4550 | *x2a = array; 4551 | } 4552 | /* Insert the new data */ 4553 | h = ph & (x2a->size-1); 4554 | np = &(x2a->tbl[x2a->count++]); 4555 | np->key = key; 4556 | np->data = data; 4557 | if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); 4558 | np->next = x2a->ht[h]; 4559 | x2a->ht[h] = np; 4560 | np->from = &(x2a->ht[h]); 4561 | return 1; 4562 | } 4563 | 4564 | /* Return a pointer to data assigned to the given key. Return NULL 4565 | ** if no such key. */ 4566 | struct symbol *Symbol_find(const char *key) 4567 | { 4568 | int h; 4569 | x2node *np; 4570 | 4571 | if( x2a==0 ) return 0; 4572 | h = strhash(key) & (x2a->size-1); 4573 | np = x2a->ht[h]; 4574 | while( np ){ 4575 | if( strcmp(np->key,key)==0 ) break; 4576 | np = np->next; 4577 | } 4578 | return np ? np->data : 0; 4579 | } 4580 | 4581 | /* Return the n-th data. Return NULL if n is out of range. */ 4582 | struct symbol *Symbol_Nth(int n) 4583 | { 4584 | struct symbol *data; 4585 | if( x2a && n>0 && n<=x2a->count ){ 4586 | data = x2a->tbl[n-1].data; 4587 | }else{ 4588 | data = 0; 4589 | } 4590 | return data; 4591 | } 4592 | 4593 | /* Return the size of the array */ 4594 | int Symbol_count() 4595 | { 4596 | return x2a ? x2a->count : 0; 4597 | } 4598 | 4599 | /* Return an array of pointers to all data in the table. 4600 | ** The array is obtained from malloc. Return NULL if memory allocation 4601 | ** problems, or if the array is empty. */ 4602 | struct symbol **Symbol_arrayof() 4603 | { 4604 | struct symbol **array; 4605 | int i,size; 4606 | if( x2a==0 ) return 0; 4607 | size = x2a->count; 4608 | array = (struct symbol **)calloc(size, sizeof(struct symbol *)); 4609 | if( array ){ 4610 | for(i=0; itbl[i].data; 4611 | } 4612 | return array; 4613 | } 4614 | 4615 | /* Compare two configurations */ 4616 | int Configcmp(const char *_a,const char *_b) 4617 | { 4618 | const struct config *a = (struct config *) _a; 4619 | const struct config *b = (struct config *) _b; 4620 | int x; 4621 | x = a->rp->index - b->rp->index; 4622 | if( x==0 ) x = a->dot - b->dot; 4623 | return x; 4624 | } 4625 | 4626 | /* Compare two states */ 4627 | PRIVATE int statecmp(struct config *a, struct config *b) 4628 | { 4629 | int rc; 4630 | for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ 4631 | rc = a->rp->index - b->rp->index; 4632 | if( rc==0 ) rc = a->dot - b->dot; 4633 | } 4634 | if( rc==0 ){ 4635 | if( a ) rc = 1; 4636 | if( b ) rc = -1; 4637 | } 4638 | return rc; 4639 | } 4640 | 4641 | /* Hash a state */ 4642 | PRIVATE int statehash(struct config *a) 4643 | { 4644 | int h=0; 4645 | while( a ){ 4646 | h = h*571 + a->rp->index*37 + a->dot; 4647 | a = a->bp; 4648 | } 4649 | return h; 4650 | } 4651 | 4652 | /* Allocate a new state structure */ 4653 | struct state *State_new() 4654 | { 4655 | struct state *newstate; 4656 | newstate = (struct state *)calloc(1, sizeof(struct state) ); 4657 | MemoryCheck(newstate); 4658 | return newstate; 4659 | } 4660 | 4661 | /* There is one instance of the following structure for each 4662 | ** associative array of type "x3". 4663 | */ 4664 | struct s_x3 { 4665 | int size; /* The number of available slots. */ 4666 | /* Must be a power of 2 greater than or */ 4667 | /* equal to 1 */ 4668 | int count; /* Number of currently slots filled */ 4669 | struct s_x3node *tbl; /* The data stored here */ 4670 | struct s_x3node **ht; /* Hash table for lookups */ 4671 | }; 4672 | 4673 | /* There is one instance of this structure for every data element 4674 | ** in an associative array of type "x3". 4675 | */ 4676 | typedef struct s_x3node { 4677 | struct state *data; /* The data */ 4678 | struct config *key; /* The key */ 4679 | struct s_x3node *next; /* Next entry with the same hash */ 4680 | struct s_x3node **from; /* Previous link */ 4681 | } x3node; 4682 | 4683 | /* There is only one instance of the array, which is the following */ 4684 | static struct s_x3 *x3a; 4685 | 4686 | /* Allocate a new associative array */ 4687 | void State_init(){ 4688 | if( x3a ) return; 4689 | x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); 4690 | if( x3a ){ 4691 | x3a->size = 128; 4692 | x3a->count = 0; 4693 | x3a->tbl = (x3node*)malloc( 4694 | (sizeof(x3node) + sizeof(x3node*))*128 ); 4695 | if( x3a->tbl==0 ){ 4696 | free(x3a); 4697 | x3a = 0; 4698 | }else{ 4699 | int i; 4700 | x3a->ht = (x3node**)&(x3a->tbl[128]); 4701 | for(i=0; i<128; i++) x3a->ht[i] = 0; 4702 | } 4703 | } 4704 | } 4705 | /* Insert a new record into the array. Return TRUE if successful. 4706 | ** Prior data with the same key is NOT overwritten */ 4707 | int State_insert(struct state *data, struct config *key) 4708 | { 4709 | x3node *np; 4710 | int h; 4711 | int ph; 4712 | 4713 | if( x3a==0 ) return 0; 4714 | ph = statehash(key); 4715 | h = ph & (x3a->size-1); 4716 | np = x3a->ht[h]; 4717 | while( np ){ 4718 | if( statecmp(np->key,key)==0 ){ 4719 | /* An existing entry with the same key is found. */ 4720 | /* Fail because overwrite is not allows. */ 4721 | return 0; 4722 | } 4723 | np = np->next; 4724 | } 4725 | if( x3a->count>=x3a->size ){ 4726 | /* Need to make the hash table bigger */ 4727 | int i,size; 4728 | struct s_x3 array; 4729 | array.size = size = x3a->size*2; 4730 | array.count = x3a->count; 4731 | array.tbl = (x3node*)malloc( 4732 | (sizeof(x3node) + sizeof(x3node*))*size ); 4733 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4734 | array.ht = (x3node**)&(array.tbl[size]); 4735 | for(i=0; icount; i++){ 4737 | x3node *oldnp, *newnp; 4738 | oldnp = &(x3a->tbl[i]); 4739 | h = statehash(oldnp->key) & (size-1); 4740 | newnp = &(array.tbl[i]); 4741 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4742 | newnp->next = array.ht[h]; 4743 | newnp->key = oldnp->key; 4744 | newnp->data = oldnp->data; 4745 | newnp->from = &(array.ht[h]); 4746 | array.ht[h] = newnp; 4747 | } 4748 | free(x3a->tbl); 4749 | *x3a = array; 4750 | } 4751 | /* Insert the new data */ 4752 | h = ph & (x3a->size-1); 4753 | np = &(x3a->tbl[x3a->count++]); 4754 | np->key = key; 4755 | np->data = data; 4756 | if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); 4757 | np->next = x3a->ht[h]; 4758 | x3a->ht[h] = np; 4759 | np->from = &(x3a->ht[h]); 4760 | return 1; 4761 | } 4762 | 4763 | /* Return a pointer to data assigned to the given key. Return NULL 4764 | ** if no such key. */ 4765 | struct state *State_find(struct config *key) 4766 | { 4767 | int h; 4768 | x3node *np; 4769 | 4770 | if( x3a==0 ) return 0; 4771 | h = statehash(key) & (x3a->size-1); 4772 | np = x3a->ht[h]; 4773 | while( np ){ 4774 | if( statecmp(np->key,key)==0 ) break; 4775 | np = np->next; 4776 | } 4777 | return np ? np->data : 0; 4778 | } 4779 | 4780 | /* Return an array of pointers to all data in the table. 4781 | ** The array is obtained from malloc. Return NULL if memory allocation 4782 | ** problems, or if the array is empty. */ 4783 | struct state **State_arrayof() 4784 | { 4785 | struct state **array; 4786 | int i,size; 4787 | if( x3a==0 ) return 0; 4788 | size = x3a->count; 4789 | array = (struct state **)malloc( sizeof(struct state *)*size ); 4790 | if( array ){ 4791 | for(i=0; itbl[i].data; 4792 | } 4793 | return array; 4794 | } 4795 | 4796 | /* Hash a configuration */ 4797 | PRIVATE int confighash(struct config *a) 4798 | { 4799 | int h=0; 4800 | h = h*571 + a->rp->index*37 + a->dot; 4801 | return h; 4802 | } 4803 | 4804 | /* There is one instance of the following structure for each 4805 | ** associative array of type "x4". 4806 | */ 4807 | struct s_x4 { 4808 | int size; /* The number of available slots. */ 4809 | /* Must be a power of 2 greater than or */ 4810 | /* equal to 1 */ 4811 | int count; /* Number of currently slots filled */ 4812 | struct s_x4node *tbl; /* The data stored here */ 4813 | struct s_x4node **ht; /* Hash table for lookups */ 4814 | }; 4815 | 4816 | /* There is one instance of this structure for every data element 4817 | ** in an associative array of type "x4". 4818 | */ 4819 | typedef struct s_x4node { 4820 | struct config *data; /* The data */ 4821 | struct s_x4node *next; /* Next entry with the same hash */ 4822 | struct s_x4node **from; /* Previous link */ 4823 | } x4node; 4824 | 4825 | /* There is only one instance of the array, which is the following */ 4826 | static struct s_x4 *x4a; 4827 | 4828 | /* Allocate a new associative array */ 4829 | void Configtable_init(){ 4830 | if( x4a ) return; 4831 | x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); 4832 | if( x4a ){ 4833 | x4a->size = 64; 4834 | x4a->count = 0; 4835 | x4a->tbl = (x4node*)malloc( 4836 | (sizeof(x4node) + sizeof(x4node*))*64 ); 4837 | if( x4a->tbl==0 ){ 4838 | free(x4a); 4839 | x4a = 0; 4840 | }else{ 4841 | int i; 4842 | x4a->ht = (x4node**)&(x4a->tbl[64]); 4843 | for(i=0; i<64; i++) x4a->ht[i] = 0; 4844 | } 4845 | } 4846 | } 4847 | /* Insert a new record into the array. Return TRUE if successful. 4848 | ** Prior data with the same key is NOT overwritten */ 4849 | int Configtable_insert(struct config *data) 4850 | { 4851 | x4node *np; 4852 | int h; 4853 | int ph; 4854 | 4855 | if( x4a==0 ) return 0; 4856 | ph = confighash(data); 4857 | h = ph & (x4a->size-1); 4858 | np = x4a->ht[h]; 4859 | while( np ){ 4860 | if( Configcmp((const char *) np->data,(const char *) data)==0 ){ 4861 | /* An existing entry with the same key is found. */ 4862 | /* Fail because overwrite is not allows. */ 4863 | return 0; 4864 | } 4865 | np = np->next; 4866 | } 4867 | if( x4a->count>=x4a->size ){ 4868 | /* Need to make the hash table bigger */ 4869 | int i,size; 4870 | struct s_x4 array; 4871 | array.size = size = x4a->size*2; 4872 | array.count = x4a->count; 4873 | array.tbl = (x4node*)malloc( 4874 | (sizeof(x4node) + sizeof(x4node*))*size ); 4875 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4876 | array.ht = (x4node**)&(array.tbl[size]); 4877 | for(i=0; icount; i++){ 4879 | x4node *oldnp, *newnp; 4880 | oldnp = &(x4a->tbl[i]); 4881 | h = confighash(oldnp->data) & (size-1); 4882 | newnp = &(array.tbl[i]); 4883 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4884 | newnp->next = array.ht[h]; 4885 | newnp->data = oldnp->data; 4886 | newnp->from = &(array.ht[h]); 4887 | array.ht[h] = newnp; 4888 | } 4889 | free(x4a->tbl); 4890 | *x4a = array; 4891 | } 4892 | /* Insert the new data */ 4893 | h = ph & (x4a->size-1); 4894 | np = &(x4a->tbl[x4a->count++]); 4895 | np->data = data; 4896 | if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); 4897 | np->next = x4a->ht[h]; 4898 | x4a->ht[h] = np; 4899 | np->from = &(x4a->ht[h]); 4900 | return 1; 4901 | } 4902 | 4903 | /* Return a pointer to data assigned to the given key. Return NULL 4904 | ** if no such key. */ 4905 | struct config *Configtable_find(struct config *key) 4906 | { 4907 | int h; 4908 | x4node *np; 4909 | 4910 | if( x4a==0 ) return 0; 4911 | h = confighash(key) & (x4a->size-1); 4912 | np = x4a->ht[h]; 4913 | while( np ){ 4914 | if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; 4915 | np = np->next; 4916 | } 4917 | return np ? np->data : 0; 4918 | } 4919 | 4920 | /* Remove all data from the table. Pass each data to the function "f" 4921 | ** as it is removed. ("f" may be null to avoid this step.) */ 4922 | void Configtable_clear(int(*f)(struct config *)) 4923 | { 4924 | int i; 4925 | if( x4a==0 || x4a->count==0 ) return; 4926 | if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); 4927 | for(i=0; isize; i++) x4a->ht[i] = 0; 4928 | x4a->count = 0; 4929 | return; 4930 | } 4931 | -------------------------------------------------------------------------------- /lempar.go: -------------------------------------------------------------------------------- 1 | /* Driver template for the LEMON parser generator. 2 | ** The author disclaims copyright to this source code. 3 | */ 4 | /* First off, code is included that follows the "include" declaration 5 | ** in the input grammar file. */ 6 | package main 7 | 8 | import ( 9 | "io" 10 | "fmt" 11 | ) 12 | 13 | %% 14 | /* Next is all token values, in a form suitable for use by makeheaders. 15 | ** This section will be null unless lemon is run with the -m switch. 16 | */ 17 | /* 18 | ** These constants (all generated automatically by the parser generator) 19 | ** specify the various kinds of tokens (terminals) that the parser 20 | ** understands. 21 | ** 22 | ** Each symbol here is a terminal symbol in the grammar. 23 | */ 24 | %% 25 | 26 | /* The next thing included is series of defines which control 27 | ** various aspects of the generated parser. 28 | ** YYCODETYPE is the data type used for storing terminal 29 | ** and nonterminal numbers. "unsigned char" is 30 | ** used if there are fewer than 250 terminals 31 | ** and nonterminals. "int" is used otherwise. 32 | ** YYNOCODE is a number of type YYCODETYPE which corresponds 33 | ** to no legal terminal or nonterminal number. This 34 | ** number is used to fill in empty slots of the hash 35 | ** table. 36 | ** YYFALLBACK If defined, this indicates that one or more tokens 37 | ** have fall-back values which should be used if the 38 | ** original value of the token will not parse. 39 | ** YYACTIONTYPE is the data type used for storing terminal 40 | ** and nonterminal numbers. "unsigned char" is 41 | ** used if there are fewer than 250 rules and 42 | ** states combined. "int" is used otherwise. 43 | ** ParseTOKENTYPE is the data type used for minor tokens given 44 | ** directly to the parser from the tokenizer. 45 | ** YYMINORTYPE is the data type used for all minor tokens. 46 | ** This is typically a union of many types, one of 47 | ** which is ParseTOKENTYPE. The entry in the union 48 | ** for base tokens is called "yy0". 49 | ** YYSTACKDEPTH is the maximum depth of the parser's stack. If 50 | ** zero the stack is dynamically sized using realloc() 51 | ** ParseARG_SDECL A static variable declaration for the %extra_argument 52 | ** ParseARG_PDECL A parameter declaration for the %extra_argument 53 | ** ParseARG_STORE Code to store %extra_argument into yypParser 54 | ** ParseARG_FETCH Code to extract %extra_argument from yypParser 55 | ** YYNSTATE the combined number of states. 56 | ** YYNRULE the number of rules in the grammar 57 | ** YYERRORSYMBOL is the code number of the error symbol. If not 58 | ** defined, then do no error processing. 59 | */ 60 | %% 61 | const ( 62 | YY_NO_ACTION = YYNSTATE + YYNRULE + 2 63 | YY_ACCEPT_ACTION = YYNSTATE + YYNRULE + 1 64 | YY_ERROR_ACTION = YYNSTATE + YYNRULE 65 | ) 66 | 67 | /* Next are the tables used to determine what action to take based on the 68 | ** current state and lookahead token. These tables are used to implement 69 | ** functions that take a state number and lookahead value and return an 70 | ** action integer. 71 | ** 72 | ** Suppose the action integer is N. Then the action is determined as 73 | ** follows 74 | ** 75 | ** 0 <= N < YYNSTATE Shift N. That is, push the lookahead 76 | ** token onto the stack and goto state N. 77 | ** 78 | ** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE. 79 | ** 80 | ** N == YYNSTATE+YYNRULE A syntax error has occurred. 81 | ** 82 | ** N == YYNSTATE+YYNRULE+1 The parser accepts its input. 83 | ** 84 | ** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused 85 | ** slots in the yy_action[] table. 86 | ** 87 | ** The action table is constructed as a single large table named yy_action[]. 88 | ** Given state S and lookahead X, the action is computed as 89 | ** 90 | ** yy_action[ yy_shift_ofst[S] + X ] 91 | ** 92 | ** If the index value yy_shift_ofst[S]+X is out of range or if the value 93 | ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X or if yy_shift_ofst[S] 94 | ** is equal to YY_SHIFT_USE_DFLT, it means that the action is not in the table 95 | ** and that yy_default[S] should be used instead. 96 | ** 97 | ** The formula above is for computing the action when the lookahead is 98 | ** a terminal symbol. If the lookahead is a non-terminal (as occurs after 99 | ** a reduce action) then the yy_reduce_ofst[] array is used in place of 100 | ** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of 101 | ** YY_SHIFT_USE_DFLT. 102 | ** 103 | ** The following are the tables generated in this section: 104 | ** 105 | ** yy_action[] A single table containing all actions. 106 | ** yy_lookahead[] A table containing the lookahead for each entry in 107 | ** yy_action. Used to detect hash collisions. 108 | ** yy_shift_ofst[] For each state, the offset into yy_action for 109 | ** shifting terminals. 110 | ** yy_reduce_ofst[] For each state, the offset into yy_action for 111 | ** shifting non-terminals after a reduce. 112 | ** yy_default[] Default action for each state. 113 | */ 114 | %% 115 | 116 | /* The next table maps tokens into fallback tokens. If a construct 117 | ** like the following: 118 | ** 119 | ** %fallback ID X Y Z. 120 | ** 121 | ** appears in the grammar, then ID becomes a fallback token for X, Y, 122 | ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser 123 | ** but it does not parse, the type of the token is changed to ID and 124 | ** the parse is retried before an error is thrown. 125 | */ 126 | var yyFallback = []YYCODETYPE{ 127 | %% 128 | } 129 | 130 | /* The following structure represents a single element of the 131 | ** parser's stack. Information stored includes: 132 | ** 133 | ** + The state number for the parser at this level of the stack. 134 | ** 135 | ** + The value of the token stored at this level of the stack. 136 | ** (In other words, the "major" token.) 137 | ** 138 | ** + The semantic value stored at this level of the stack. This is 139 | ** the information used by the action routines in the grammar. 140 | ** It is sometimes called the "minor" token. 141 | */ 142 | type yyStackEntry struct { 143 | stateno YYACTIONTYPE 144 | major YYCODETYPE 145 | minor YYMINORTYPE 146 | } 147 | 148 | /* The state of the parser is completely contained in an instance of 149 | ** the following structure */ 150 | type yyParser struct { 151 | idx int 152 | errcnt int 153 | stack []yyStackEntry 154 | 155 | traceWriter io.Writer 156 | tracePrompt string 157 | } 158 | 159 | /* 160 | ** Turn parser tracing on by giving a stream to which to write the trace 161 | ** and a prompt to preface each trace message. Tracing is turned off 162 | ** by making either argument NULL 163 | ** 164 | ** Inputs: 165 | **
    166 | **
  • A FILE* to which trace output should be written. 167 | ** If NULL, then tracing is turned off. 168 | **
  • A prefix string written at the beginning of every 169 | ** line of trace output. If NULL, then tracing is 170 | ** turned off. 171 | **
172 | ** 173 | ** Outputs: 174 | ** None. 175 | */ 176 | func (p *yyParser) Trace(traceWriter io.Writer, tracePrompt string) { 177 | p.traceWriter = traceWriter 178 | p.tracePrompt = tracePrompt 179 | if p.traceWriter == nil { 180 | p.tracePrompt = "" 181 | } 182 | } 183 | 184 | /* For tracing shifts, the names of all terminals and nonterminals 185 | ** are required. The following table supplies these names */ 186 | var yyTokenName = []string{ 187 | %% 188 | } 189 | 190 | /* For tracing reduce actions, the names of all rules are required. 191 | */ 192 | var yyRuleName = []string{ 193 | %% 194 | } 195 | 196 | /* 197 | ** Try to increase the size of the parser stack. 198 | */ 199 | func (p *yyParser) growStack() { 200 | n := len(p.stack) 201 | s := make([]yyStackEntry, n*2+100) 202 | copy(s, p.stack) 203 | p.stack = s 204 | 205 | // TODO(nsf): add 'if debug' for dead code elimination 206 | if p.traceWriter != nil { 207 | fmt.Fprintf(p.traceWriter, "%sStack grows to %d entries!\n", 208 | p.tracePrompt, len(p.stack)) 209 | } 210 | } 211 | 212 | /* 213 | ** This function allocates a new parser. 214 | ** The only argument is a pointer to a function which works like 215 | ** malloc. 216 | ** 217 | ** Inputs: 218 | ** A pointer to the function used to allocate memory. 219 | ** 220 | ** Outputs: 221 | ** A pointer to a parser. This pointer is used in subsequent calls 222 | ** to Parse and ParseFree. 223 | */ 224 | func NewParser() *yyParser { 225 | p := new(yyParser) 226 | p.idx = -1 227 | p.errcnt = 0 228 | p.stack = make([]yyStackEntry, 100) 229 | return p 230 | } 231 | 232 | /* The following function deletes the value associated with a 233 | ** symbol. The symbol can be either a terminal or nonterminal. 234 | ** "yymajor" is the symbol code, and "yypminor" is a pointer to 235 | ** the value. 236 | */ 237 | func (p *yyParser) destructor(major YYCODETYPE, minor *YYMINORTYPE) { 238 | // TODO(nsf): ParseARG_FETCH 239 | switch major { 240 | %% 241 | } 242 | } 243 | 244 | /* 245 | ** Pop the parser's stack once. 246 | ** 247 | ** If there is a destructor routine associated with the token which 248 | ** is popped from the stack, then call it. 249 | ** 250 | ** Return the major token number for the symbol popped. 251 | */ 252 | func (p *yyParser) popParserStack() YYCODETYPE { 253 | if p.idx < 0 { 254 | return 0 255 | } 256 | 257 | top := &p.stack[p.idx] 258 | major := top.major 259 | 260 | // TODO(nsf): add 'if debug' for dead code elimination 261 | if p.traceWriter != nil { 262 | fmt.Fprintf(p.traceWriter, "%sPopping %s\n", 263 | p.tracePrompt, yyTokenName[major]) 264 | } 265 | p.destructor(major, &top.minor) 266 | p.idx-- 267 | return major 268 | } 269 | 270 | /* 271 | ** Deallocate and destroy a parser. Destructors are all called for 272 | ** all stack elements before shutting the parser down. 273 | ** 274 | ** Inputs: 275 | **
    276 | **
  • A pointer to the parser. This should be a pointer 277 | ** obtained from ParseAlloc. 278 | **
  • A pointer to a function used to reclaim memory obtained 279 | ** from malloc. 280 | **
281 | */ 282 | func (p *yyParser) Dispose() { 283 | for p.idx >= 0 { 284 | p.popParserStack() 285 | } 286 | } 287 | 288 | /* 289 | ** Find the appropriate action for a parser given the terminal 290 | ** look-ahead token iLookAhead. 291 | ** 292 | ** If the look-ahead token is YYNOCODE, then check to see if the action is 293 | ** independent of the look-ahead. If it is, return the action, otherwise 294 | ** return YY_NO_ACTION. 295 | */ 296 | func (p *yyParser) findShiftAction(lookahead YYCODETYPE) YYACTIONTYPE { 297 | stateno := p.stack[p.idx].stateno 298 | 299 | if stateno > YY_SHIFT_COUNT { 300 | return yy_default[stateno] 301 | } 302 | 303 | i := int(yy_shift_ofst[stateno]) 304 | if i == YY_SHIFT_USE_DFLT { 305 | return yy_default[stateno] 306 | } 307 | 308 | // assert(lookahead != YYNOCODE) 309 | i += int(lookahead) 310 | if i >= 0 && i < len(yy_action) && yy_lookahead[i] == lookahead { 311 | return yy_action[i] 312 | } 313 | if lookahead > 0 { 314 | if int(lookahead) < len(yyFallback) { 315 | fallback := yyFallback[lookahead] 316 | if fallback != 0 { 317 | // TODO(nsf): add 'if debug' for dead code elimination 318 | if p.traceWriter != nil { 319 | fmt.Fprintf(p.traceWriter, "%sFALLBACK %s => %s\n", 320 | p.tracePrompt, 321 | yyTokenName[lookahead], 322 | yyTokenName[fallback]) 323 | } 324 | return p.findShiftAction(fallback) 325 | } 326 | } 327 | if YYWILDCARD >= 0 { 328 | var wildcard int = YYWILDCARD 329 | j := i - int(lookahead) + wildcard 330 | 331 | if j >= 0 && j < len(yy_action) && int(yy_lookahead[j]) == wildcard { 332 | // TODO(nsf): add 'if debug' for dead code elimination 333 | if p.traceWriter != nil { 334 | fmt.Fprintf(p.traceWriter, "%sWILDCARD %s => %s\n", 335 | p.tracePrompt, 336 | yyTokenName[lookahead], 337 | yyTokenName[wildcard]) 338 | } 339 | return yy_action[j] 340 | } 341 | } 342 | } 343 | return yy_default[stateno] 344 | } 345 | 346 | /* 347 | ** Find the appropriate action for a parser given the non-terminal 348 | ** look-ahead token iLookAhead. 349 | ** 350 | ** If the look-ahead token is YYNOCODE, then check to see if the action is 351 | ** independent of the look-ahead. If it is, return the action, otherwise 352 | ** return YY_NO_ACTION. 353 | */ 354 | func (p *yyParser) findReduceAction(stateno YYACTIONTYPE, lookahead YYCODETYPE) YYACTIONTYPE { 355 | // assert(stateno <= YY_REDUCE_MAX) 356 | var i int = int(yy_reduce_ofst[stateno]) 357 | // assert(i != YY_REDUCE_USE_DFLT) 358 | // assert(lookahead != YYNOCODE) 359 | i += int(lookahead) 360 | // assert(i >= 0 && i < len(yy_action)) 361 | // assert(yy_lookahead[i] == lookahead) 362 | return yy_action[i] 363 | } 364 | 365 | /* 366 | ** The following routine is called if the stack overflows. 367 | ** TODO(nsf): We have a dynamic stack, it can't overflow? remove this method? 368 | */ 369 | func (p *yyParser) stackOverflow(pminor *YYMINORTYPE) { 370 | // TODO(nsf): ParseARG_FETCH 371 | p.idx-- 372 | // TODO(nsf): add 'if debug' for dead code elimination 373 | if p.traceWriter != nil { 374 | fmt.Fprintf(p.traceWriter, "%sStack Overflow!\n", 375 | p.tracePrompt) 376 | } 377 | for p.idx >= 0 { 378 | p.popParserStack() 379 | } 380 | %% 381 | // TODO(nsf): ParseARG_STORE 382 | } 383 | 384 | /* 385 | ** Perform a shift action. 386 | */ 387 | func (p *yyParser) shift(newState YYACTIONTYPE, major YYCODETYPE, pminor *YYMINORTYPE) { 388 | p.idx++ 389 | if p.idx >= len(p.stack) { 390 | p.growStack() 391 | 392 | // TODO(nsf): can't happen? dynamic stack doesn't overflow 393 | if p.idx >= len(p.stack) { 394 | p.stackOverflow(pminor) 395 | return 396 | } 397 | } 398 | 399 | top := &p.stack[p.idx] 400 | top.stateno = newState 401 | top.major = major 402 | top.minor = *pminor 403 | 404 | // TODO(nsf): add 'if debug' for dead code elimination 405 | if p.traceWriter != nil { 406 | fmt.Fprintf(p.traceWriter, "%sShift %d\n", 407 | p.tracePrompt, newState) 408 | fmt.Fprintf(p.traceWriter, "%sStack:", 409 | p.tracePrompt) 410 | for i := 1; i <= p.idx; i++ { 411 | fmt.Fprintf(p.traceWriter, " %s", yyTokenName[p.stack[i].major]) 412 | } 413 | fmt.Fprintf(p.traceWriter, "\n") 414 | } 415 | } 416 | 417 | /* The following table contains information about every rule that 418 | ** is used during the reduce. 419 | */ 420 | type ruleInfoEntry struct { 421 | lhs YYCODETYPE 422 | nrhs byte 423 | } 424 | 425 | var yyRuleInfo = []ruleInfoEntry{ 426 | %% 427 | } 428 | 429 | /* 430 | ** Perform a reduce action and the shift that must immediately 431 | ** follow the reduce. 432 | ** Use 'yyp' instead of 'p', because the namespace of this function is visible 433 | ** in rule actions, we don't want clashes. 434 | */ 435 | func (yyp *yyParser) reduce(ruleno YYACTIONTYPE) { 436 | var yygoto YYCODETYPE 437 | var yyact YYACTIONTYPE 438 | var yygotominor YYMINORTYPE 439 | var yysize int 440 | 441 | // TODO(nsf): add 'if debug' for dead code elimination 442 | if yyp.traceWriter != nil && ruleno >= 0 && int(ruleno) < len(yyRuleName) { 443 | fmt.Fprintf(yyp.traceWriter, "%sReduce [%s].\n", 444 | yyp.tracePrompt, yyRuleName[ruleno]) 445 | } 446 | 447 | // these will be used in rule actions 448 | yystack := yyp.stack 449 | yyidx := yyp.idx 450 | 451 | switch ruleno { 452 | %% 453 | } 454 | 455 | yygoto = yyRuleInfo[ruleno].lhs 456 | yysize = int(yyRuleInfo[ruleno].nrhs) 457 | yyp.idx -= yysize 458 | yyact = yyp.findReduceAction(yyp.stack[yyp.idx].stateno, yygoto) 459 | if yyact < YYNSTATE { 460 | // TODO(nsf): add 'if debug' for dead code elimination 461 | yyp.shift(yyact, yygoto, &yygotominor) 462 | /* TODO(nsf): enable this 'if !debug' 463 | if yysize > 0 { 464 | yyp.idx++ 465 | yymsp = &yyp.stack[yyp.idx] 466 | yymsp.stateno = yyact 467 | yymsp.major = yygoto 468 | yymsp.minor = yygotominor 469 | } else { 470 | yyp.shift(yyact, yygoto, &yygotominor) 471 | } 472 | */ 473 | } else { 474 | // assert(yyact == YYNSTATE + YYNRULE + 1) 475 | yyp.accept() 476 | } 477 | } 478 | 479 | /* 480 | ** The following code executes when the parse fails 481 | */ 482 | func (p *yyParser) parseFailed() { 483 | // TODO(nsf): ParseARG_FETCH 484 | 485 | // TODO(nsf): add 'if debug' for dead code elimination 486 | if p.traceWriter != nil { 487 | fmt.Fprintf(p.traceWriter, "%sFail!\n", p.tracePrompt) 488 | } 489 | 490 | for p.idx >= 0 { 491 | p.popParserStack() 492 | } 493 | %% 494 | // TODO(nsf): ParseARG_STORE 495 | } 496 | 497 | /* 498 | ** The following code executes when a syntax error first occurs. 499 | */ 500 | func (p *yyParser) syntaxError(major YYCODETYPE, minor YYMINORTYPE) { 501 | // TODO(nsf): ParseARG_FETCH 502 | 503 | // TODO(nsf): #define TOKEN (minor.yy0) 504 | %% 505 | // TODO(nsf): ParseARG_STORE 506 | } 507 | 508 | /* 509 | ** The following is executed when the parser accepts 510 | */ 511 | func (p *yyParser) accept() { 512 | // TODO(nsf): ParseARG_FETCH 513 | 514 | // TODO(nsf): add 'if debug' for dead code elimination 515 | if p.traceWriter != nil { 516 | fmt.Fprintf(p.traceWriter, "%sAccept!\n", p.tracePrompt) 517 | } 518 | for p.idx >= 0 { 519 | p.popParserStack() 520 | } 521 | %% 522 | // TODO(nsf): ParseARG_STORE 523 | } 524 | 525 | /* The main parser program. 526 | ** The first argument is a pointer to a structure obtained from 527 | ** "ParseAlloc" which describes the current state of the parser. 528 | ** The second argument is the major token number. The third is 529 | ** the minor token. The fourth optional argument is whatever the 530 | ** user wants (and specified in the grammar) and is available for 531 | ** use by the action routines. 532 | ** 533 | ** Inputs: 534 | **
    535 | **
  • A pointer to the parser (an opaque structure.) 536 | **
  • The major token number. 537 | **
  • The minor token number. 538 | **
  • An option argument of a grammar-specified type. 539 | **
540 | ** 541 | ** Outputs: 542 | ** None. 543 | */ 544 | func (p *yyParser) Parse(major YYCODETYPE, minor ParseTOKENTYPE, arg ...interface{}) { 545 | var ( 546 | minorunion YYMINORTYPE 547 | act YYACTIONTYPE 548 | endofinput bool 549 | errorhit bool 550 | ) 551 | 552 | if p.idx < 0 { 553 | p.idx = 0 554 | p.errcnt = -1 555 | p.stack[0].stateno = 0 556 | p.stack[0].major = 0 557 | } 558 | 559 | minorunion = minor 560 | endofinput = major == 0 561 | 562 | // TODO(nsf): add 'if debug' for dead code elimination 563 | if p.traceWriter != nil { 564 | fmt.Fprintf(p.traceWriter, "%sInput %s\n", 565 | p.tracePrompt, 566 | yyTokenName[major]) 567 | } 568 | for { 569 | act = p.findShiftAction(major) 570 | if act < YYNSTATE { 571 | // assert(!endofinput) 572 | p.shift(act, major, &minorunion) 573 | p.errcnt-- 574 | major = YYNOCODE 575 | } else if act < YYNSTATE + YYNRULE { 576 | p.reduce(act - YYNSTATE) 577 | } else { 578 | // assert(act == YY_ERROR_ACTION) 579 | // TODO(nsf): add 'if debug' for dead code elimination 580 | if p.traceWriter != nil { 581 | fmt.Fprintf(p.traceWriter, "%sSyntax Error!\n", 582 | p.tracePrompt) 583 | } 584 | 585 | if YYERRORSYMBOL >= 0 { 586 | var errsym int = YYERRORSYMBOL 587 | if p.errcnt < 0 { 588 | p.syntaxError(major, minorunion) 589 | } 590 | mx := int(p.stack[p.idx].major) 591 | if mx == errsym || errorhit { 592 | // TODO(nsf): add 'if debug' for dead code elimination 593 | if p.traceWriter != nil { 594 | fmt.Fprintf(p.traceWriter, 595 | "%sDiscard input token %s\n", 596 | p.tracePrompt, 597 | yyTokenName[major]) 598 | } 599 | p.destructor(major, &minorunion) 600 | major = YYNOCODE 601 | } else { 602 | for p.idx >= 0 && mx != errsym { 603 | act = p.findReduceAction(p.stack[p.idx].stateno, 604 | YYCODETYPE(errsym)) 605 | if act < YYNSTATE { 606 | break 607 | } 608 | p.popParserStack() 609 | } 610 | if p.idx < 0 || major == 0 { 611 | p.destructor(major, &minorunion) 612 | p.parseFailed() 613 | major = YYNOCODE 614 | } else if mx != errsym { 615 | var u2 YYMINORTYPE 616 | //u2.yy0 = ParseTOKENTYPE(nil) 617 | p.shift(act, YYCODETYPE(errsym), &u2) 618 | } 619 | } 620 | p.errcnt = 3 621 | errorhit = true 622 | } else { 623 | if p.errcnt <= 0 { 624 | p.syntaxError(major, minorunion) 625 | } 626 | p.errcnt = 3 627 | p.destructor(major, &minorunion) 628 | if endofinput { 629 | p.parseFailed() 630 | } 631 | major = YYNOCODE 632 | } 633 | } 634 | if !(major != YYNOCODE && p.idx >= 0) { 635 | break 636 | } 637 | } 638 | } 639 | --------------------------------------------------------------------------------