├── lemon ├── sliced │ ├── parse.h │ ├── error.h │ ├── plink.h │ ├── build.h │ ├── report.h │ ├── error.c │ ├── set.h │ ├── option.h │ ├── configlist.h │ ├── set.c │ ├── table.h │ ├── plink.c │ ├── msort.c │ ├── configlist.c │ ├── lemon.c │ ├── option.c │ ├── README.md │ ├── main.c │ ├── struct.h │ ├── action.c │ ├── build.c │ ├── table.c │ └── parse.c ├── examples │ ├── calc │ │ ├── .gitignore │ │ ├── calc.y │ │ ├── main.c │ │ ├── Makefile │ │ └── README.md │ ├── calc2 │ │ ├── .gitignore │ │ ├── token.h │ │ ├── Makefile │ │ ├── main.c │ │ ├── calc2.y │ │ └── README.md │ └── README.md └── README.md ├── .travis.yml ├── .editorconfig ├── .gitattributes ├── .gitignore ├── validate.sh ├── CONTRIBUTING.md └── README.md /lemon/sliced/parse.h: -------------------------------------------------------------------------------- 1 | void Parse(struct lemon *lemp); 2 | 3 | -------------------------------------------------------------------------------- /lemon/examples/calc/.gitignore: -------------------------------------------------------------------------------- 1 | lemon 2 | calc 3 | calc.c 4 | calc.h 5 | *.out 6 | -------------------------------------------------------------------------------- /lemon/sliced/error.h: -------------------------------------------------------------------------------- 1 | void ErrorMsg(const char *, int,const char *, ...); 2 | 3 | -------------------------------------------------------------------------------- /lemon/examples/calc2/.gitignore: -------------------------------------------------------------------------------- 1 | lemon 2 | calc2 3 | calc2.c 4 | calc2.h 5 | *.out 6 | -------------------------------------------------------------------------------- /lemon/sliced/plink.h: -------------------------------------------------------------------------------- 1 | struct plink *Plink_new(void); 2 | void Plink_add(struct plink **, struct config *); 3 | void Plink_copy(struct plink **, struct plink *); 4 | void Plink_delete(struct plink *); 5 | 6 | -------------------------------------------------------------------------------- /lemon/sliced/build.h: -------------------------------------------------------------------------------- 1 | void FindRulePrecedences(struct lemon*); 2 | void FindFirstSets(struct lemon*); 3 | void FindStates(struct lemon*); 4 | void FindLinks(struct lemon*); 5 | void FindFollowSets(struct lemon*); 6 | void FindActions(struct lemon*); 7 | 8 | -------------------------------------------------------------------------------- /lemon/sliced/report.h: -------------------------------------------------------------------------------- 1 | void Reprint(struct lemon *); 2 | void ReportOutput(struct lemon *); 3 | void ReportTable(struct lemon *, int, int); 4 | void ReportHeader(struct lemon *); 5 | void CompressTables(struct lemon *); 6 | void ResortStates(struct lemon *); 7 | 8 | -------------------------------------------------------------------------------- /lemon/sliced/error.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Code for printing error message. 3 | */ 4 | 5 | void ErrorMsg(const char *filename, int lineno, const char *format, ...){ 6 | va_list ap; 7 | fprintf(stderr, "%s:%d: ", filename, lineno); 8 | va_start(ap, format); 9 | vfprintf(stderr,format,ap); 10 | va_end(ap); 11 | fprintf(stderr, "\n"); 12 | } 13 | -------------------------------------------------------------------------------- /lemon/examples/calc2/token.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** The author of this program disclaims copyright. 3 | */ 4 | 5 | #ifndef CALC2_TOKEN_H 6 | #define CALC2_TOKEN_H 1 7 | 8 | typedef struct Token { 9 | int value; 10 | unsigned n; 11 | } Token; 12 | 13 | #endif // CALC2_TOKEN_H 14 | 15 | /* Local Variables: */ 16 | /* c-basic-offset: 4 */ 17 | /* tab-width: 4 */ 18 | /* indent-tabs-mode: t */ 19 | /* End: */ 20 | -------------------------------------------------------------------------------- /lemon/sliced/set.h: -------------------------------------------------------------------------------- 1 | void SetSize(int); /* All sets will be of size N */ 2 | char *SetNew(void); /* A new set for element 0..N */ 3 | void SetFree(char*); /* Deallocate a set */ 4 | int SetAdd(char*,int); /* Add element to a set */ 5 | int SetUnion(char *,char *); /* A <- A U B, thru element N */ 6 | #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ 7 | 8 | -------------------------------------------------------------------------------- /lemon/sliced/option.h: -------------------------------------------------------------------------------- 1 | enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, 2 | OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; 3 | struct s_options { 4 | enum option_type type; 5 | const char *label; 6 | char *arg; 7 | const char *message; 8 | }; 9 | int OptInit(char**,struct s_options*,FILE*); 10 | int OptNArgs(void); 11 | char *OptArg(int); 12 | void OptErr(int); 13 | void OptPrint(void); 14 | 15 | -------------------------------------------------------------------------------- /lemon/sliced/configlist.h: -------------------------------------------------------------------------------- 1 | void Configlist_init(void); 2 | struct config *Configlist_add(struct rule *, int); 3 | struct config *Configlist_addbasis(struct rule *, int); 4 | void Configlist_closure(struct lemon *); 5 | void Configlist_sort(void); 6 | void Configlist_sortbasis(void); 7 | struct config *Configlist_return(void); 8 | struct config *Configlist_basis(void); 9 | void Configlist_eat(struct config *); 10 | void Configlist_reset(void); 11 | 12 | -------------------------------------------------------------------------------- /lemon/examples/calc/calc.y: -------------------------------------------------------------------------------- 1 | /* 2 | ** The author of this program disclaims copyright. 3 | */ 4 | 5 | %include { 6 | #include 7 | #include /* malloc, free */ 8 | #include "calc.h" 9 | } 10 | 11 | %token_type {int} 12 | 13 | %left PLUS MINUS. 14 | %left DIVIDE TIMES. 15 | 16 | %syntax_error { 17 | printf ("Syntax error!\n"); 18 | } 19 | 20 | program ::= expr(A). { 21 | printf ("Result=%d\n", A); 22 | } 23 | 24 | expr(A) ::= expr(B) MINUS expr(C). { 25 | A = B - C; 26 | } 27 | 28 | expr(A) ::= expr(B) PLUS expr(C). { 29 | A = B + C; 30 | } 31 | 32 | expr(A) ::= expr(B) TIMES expr(C). { 33 | A = B * C; 34 | } 35 | 36 | expr(A) ::= expr(B) DIVIDE expr(C). { 37 | if (C != 0) { 38 | A = B / C; 39 | } else { 40 | printf ("A divide by zero detected\n"); 41 | } 42 | } 43 | 44 | expr(A) ::= INTEGER(B). { 45 | A = B; 46 | } 47 | 48 | /* Local Variables: */ 49 | /* c-basic-offset: 4 */ 50 | /* tab-width: 4 */ 51 | /* indent-tabs-mode: t */ 52 | /* mode: lemon */ 53 | /* End: */ 54 | -------------------------------------------------------------------------------- /lemon/examples/calc/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** The author of this program disclaims copyright. 3 | */ 4 | 5 | int main() 6 | { 7 | void* pParser = ParseAlloc (malloc); 8 | 9 | /* First input: 10 | ** 15 / 5 11 | */ 12 | Parse (pParser, INTEGER, 15); 13 | Parse (pParser, DIVIDE, 0); 14 | Parse (pParser, INTEGER, 5); 15 | Parse (pParser, 0, 0); 16 | 17 | /* Second input: 18 | ** 50 + 125 19 | */ 20 | Parse (pParser, INTEGER, 50); 21 | Parse (pParser, PLUS, 0); 22 | Parse (pParser, INTEGER, 125); 23 | Parse (pParser, 0, 0); 24 | 25 | /* Third input: 26 | ** 50 * 125 + 125 27 | */ 28 | Parse (pParser, INTEGER, 50); 29 | Parse (pParser, TIMES, 0); 30 | Parse (pParser, INTEGER, 125); 31 | Parse (pParser, PLUS, 0); 32 | Parse (pParser, INTEGER, 125); 33 | Parse (pParser, 0, 0); 34 | 35 | ParseFree(pParser, free); 36 | 37 | return 0; 38 | } 39 | 40 | /* Local Variables: */ 41 | /* c-basic-offset: 4 */ 42 | /* tab-width: 4 */ 43 | /* indent-tabs-mode: t */ 44 | /* End: */ 45 | -------------------------------------------------------------------------------- /lemon/examples/calc/Makefile: -------------------------------------------------------------------------------- 1 | # The author of this program disclaims copyright. 2 | 3 | SHELL := $(shell which bash) 4 | CC := gcc 5 | APP := calc 6 | 7 | srcdir = $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 8 | lemondir = $(realpath $(srcdir)/../..) 9 | 10 | .SUFFIXES: 11 | .SUFFIXES: .c .h 12 | 13 | CFLAGS := -Wall -W -O2 -s -pipe -std=c99 14 | LFLAGS := -O2 -s -pipe 15 | 16 | all: $(APP) 17 | 18 | lemon: $(lemondir)/lemon.c 19 | $(CC) -I$(lemondir) -o $@ $(LFLAGS) $< 20 | 21 | # Don't use $(APP).h as a target 22 | # as the header file is not regenerated by 23 | # lemon if there was no change in tokens names. 24 | # Using $(APP).c is sufficient 25 | # becase generating .c will always create the .h as well. 26 | $(APP).c: lemon $(APP).y $(lemondir)/lempar.c 27 | ./lemon -T$(lemondir)/lempar.c $(APP).y 28 | 29 | $(APP): $(APP).c $(APP).h main.c 30 | cat main.c >> $(APP).c 31 | $(CC) -I. -o $(APP) $(LFLAGS) $< 32 | 33 | test: $(APP) 34 | ./$(APP) 35 | 36 | clean: 37 | rm -f *.o 38 | rm -f *.out 39 | rm -f lemon 40 | rm -f $(APP) $(APP).c $(APP).h 41 | -------------------------------------------------------------------------------- /lemon/examples/calc2/Makefile: -------------------------------------------------------------------------------- 1 | # The author of this program disclaims copyright. 2 | 3 | SHELL := $(shell which bash) 4 | CC := gcc 5 | APP := calc2 6 | 7 | srcdir = $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 8 | lemondir = $(realpath $(srcdir)/../..) 9 | 10 | .SUFFIXES: 11 | .SUFFIXES: .c .h 12 | 13 | CFLAGS := -Wall -W -O2 -s -pipe -std=c99 14 | LFLAGS := -O2 -s -pipe 15 | 16 | all: $(APP) 17 | 18 | lemon: $(lemondir)/lemon.c 19 | $(CC) -I$(lemondir) -o $@ $(LFLAGS) $< 20 | 21 | # Don't use $(APP).h as a target 22 | # as the header file is not regenerated by 23 | # lemon if there was no change in tokens names. 24 | # Using $(APP).c is sufficient 25 | # becase generating .c will always create the .h as well. 26 | $(APP).c: lemon $(APP).y $(lemondir)/lempar.c 27 | ./lemon -T$(lemondir)/lempar.c $(APP).y 28 | 29 | $(APP): $(APP).c $(APP).h main.c 30 | cat main.c >> $(APP).c 31 | $(CC) -I. -o $(APP) $(LFLAGS) $< 32 | 33 | test: $(APP) 34 | ./$(APP) 35 | 36 | clean: 37 | rm -f *.o 38 | rm -f *.out 39 | rm -f lemon 40 | rm -f $(APP) $(APP).c $(APP).h 41 | -------------------------------------------------------------------------------- /lemon/sliced/set.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Set manipulation routines for the LEMON parser generator. 3 | */ 4 | 5 | static int size = 0; 6 | 7 | /* Set the set size */ 8 | void SetSize(int n) 9 | { 10 | size = n+1; 11 | } 12 | 13 | /* Allocate a new set */ 14 | char *SetNew(void){ 15 | char *s; 16 | s = (char*)calloc( size, 1); 17 | if( s==0 ){ 18 | memory_error(); 19 | } 20 | return s; 21 | } 22 | 23 | /* Deallocate a set */ 24 | void SetFree(char *s) 25 | { 26 | free(s); 27 | } 28 | 29 | /* Add a new element to the set. Return TRUE if the element was added 30 | ** and FALSE if it was already there. */ 31 | int SetAdd(char *s, int e) 32 | { 33 | int rv; 34 | assert( e>=0 && e 7 | #include /* malloc, free */ 8 | #include "token.h" 9 | #include "calc2.h" 10 | } 11 | 12 | %token_type {Token} 13 | %default_type {Token} 14 | 15 | %type expr {Token} 16 | %type NUM {Token} 17 | 18 | %left PLUS MINUS. 19 | %left DIVIDE TIMES. 20 | 21 | %syntax_error { 22 | printf ("Parse error\n"); 23 | exit(1); 24 | } 25 | 26 | %parse_accept { 27 | printf("Parsing complete\n\n"); 28 | } 29 | 30 | program ::= expr(A). { 31 | printf ("Token.value: %d\nToken.n: %d\n", 32 | A.value, A.n); 33 | } 34 | 35 | expr(A) ::= expr(B) MINUS expr(C). { 36 | A.value = B.value - C.value; 37 | A.n = B.n+1 + C.n+1; 38 | } 39 | 40 | expr(A) ::= expr(B) PLUS expr(C). { 41 | A.value = B.value + C.value; 42 | A.n = B.n+1 + C.n+1; 43 | } 44 | 45 | expr(A) ::= expr(B) TIMES expr(C). { 46 | A.value = B.value * C.value; 47 | A.n = B.n+1 + C.n+1; 48 | } 49 | 50 | expr(A) ::= expr(B) DIVIDE expr(C). { 51 | if (C.value != 0) { 52 | A.value = B.value / C.value; 53 | A.n = B.n+1 + C.n+1; 54 | } else { 55 | printf ("A divide by zero detected\n"); 56 | } 57 | } 58 | 59 | expr(A) ::= NUM(B). { 60 | A.value = B.value; 61 | A.n = B.n+1; 62 | } 63 | 64 | /* Local Variables: */ 65 | /* c-basic-offset: 4 */ 66 | /* tab-width: 4 */ 67 | /* indent-tabs-mode: t */ 68 | /* mode: lemon */ 69 | /* End: */ 70 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # Lemon Grove # 3 | # https://github.com/tajmone/lemon-grove # 4 | ############################################ 5 | 6 | root = true 7 | 8 | ## Repository Configurations 9 | ############################ 10 | [.{git*,editorconfig,*.yml}] 11 | indent_style = space 12 | indent_size = unset 13 | end_of_line = lf 14 | charset = utf-8 15 | trim_trailing_whitespace = true 16 | insert_final_newline = true 17 | 18 | 19 | ## Shell Scripts 20 | ################ 21 | [*.sh] 22 | end_of_line = lf 23 | indent_style = tab 24 | indent_size = unset 25 | charset = utf-8 26 | trim_trailing_whitespace = true 27 | insert_final_newline = true 28 | 29 | 30 | ## Markdown GFM 31 | ############### 32 | [*.md] 33 | indent_style = space 34 | indent_size = unset 35 | end_of_line = unset 36 | charset = utf-8 37 | trim_trailing_whitespace = true 38 | insert_final_newline = true 39 | 40 | 41 | ## Make Files 42 | ############# 43 | [{*[Mm]akefile*,*.mak,*.mk,depend}] 44 | indent_size = 2 45 | indent_style = tab 46 | end_of_line = lf 47 | trim_trailing_whitespace = true 48 | insert_final_newline = true 49 | 50 | 51 | ## C Source Files 52 | ################# 53 | [*.{c,h,y}] 54 | indent_style = space 55 | indent_size = unset 56 | end_of_line = unset 57 | charset = utf-8 58 | trim_trailing_whitespace = true 59 | insert_final_newline = true 60 | 61 | ; C-style doc comments: 62 | block_comment_start = /* 63 | block_comment = * 64 | block_comment_end = */ 65 | 66 | 67 | # EOF # 68 | -------------------------------------------------------------------------------- /lemon/sliced/table.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** All code in this file has been automatically generated 3 | ** from a specification in the file 4 | ** "table.q" 5 | ** by the associative array code building program "aagen". 6 | ** Do not edit this file! Instead, edit the specification 7 | ** file, then rerun aagen. 8 | */ 9 | /* 10 | ** Code for processing tables in the LEMON parser generator. 11 | */ 12 | /* Routines for handling a strings */ 13 | 14 | const char *Strsafe(const char *); 15 | 16 | void Strsafe_init(void); 17 | int Strsafe_insert(const char *); 18 | const char *Strsafe_find(const char *); 19 | 20 | /* Routines for handling symbols of the grammar */ 21 | 22 | struct symbol *Symbol_new(const char *); 23 | int Symbolcmpp(const void *, const void *); 24 | void Symbol_init(void); 25 | int Symbol_insert(struct symbol *, const char *); 26 | struct symbol *Symbol_find(const char *); 27 | struct symbol *Symbol_Nth(int); 28 | int Symbol_count(void); 29 | struct symbol **Symbol_arrayof(void); 30 | 31 | /* Routines to manage the state table */ 32 | 33 | int Configcmp(const char *, const char *); 34 | struct state *State_new(void); 35 | void State_init(void); 36 | int State_insert(struct state *, struct config *); 37 | struct state *State_find(struct config *); 38 | struct state **State_arrayof(void); 39 | 40 | /* Routines used for efficiency in Configlist_add */ 41 | 42 | void Configtable_init(void); 43 | int Configtable_insert(struct config *); 44 | struct config *Configtable_find(struct config *); 45 | void Configtable_clear(int(*)(struct config *)); 46 | 47 | -------------------------------------------------------------------------------- /lemon/sliced/plink.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Routines processing configuration follow-set propagation links 3 | ** in the LEMON parser generator. 4 | */ 5 | static struct plink *plink_freelist = 0; 6 | 7 | /* Allocate a new plink */ 8 | struct plink *Plink_new(void){ 9 | struct plink *newlink; 10 | 11 | if( plink_freelist==0 ){ 12 | int i; 13 | int amt = 100; 14 | plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); 15 | if( plink_freelist==0 ){ 16 | fprintf(stderr, 17 | "Unable to allocate memory for a new follow-set propagation link.\n"); 18 | exit(1); 19 | } 20 | for(i=0; inext; 25 | return newlink; 26 | } 27 | 28 | /* Add a plink to a plink list */ 29 | void Plink_add(struct plink **plpp, struct config *cfp) 30 | { 31 | struct plink *newlink; 32 | newlink = Plink_new(); 33 | newlink->next = *plpp; 34 | *plpp = newlink; 35 | newlink->cfp = cfp; 36 | } 37 | 38 | /* Transfer every plink on the list "from" to the list "to" */ 39 | void Plink_copy(struct plink **to, struct plink *from) 40 | { 41 | struct plink *nextpl; 42 | while( from ){ 43 | nextpl = from->next; 44 | from->next = *to; 45 | *to = from; 46 | from = nextpl; 47 | } 48 | } 49 | 50 | /* Delete every plink on the list */ 51 | void Plink_delete(struct plink *plp) 52 | { 53 | struct plink *nextpl; 54 | 55 | while( plp ){ 56 | nextpl = plp->next; 57 | plp->next = plink_freelist; 58 | plink_freelist = plp; 59 | plp = nextpl; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # Lemon Grove # 3 | # https://github.com/tajmone/lemon-grove # 4 | ############################################ 5 | 6 | * text=auto 7 | 8 | ## Source Files 9 | ################ 10 | *.c text 11 | *.h text 12 | *.y text 13 | 14 | ## Make files 15 | ############# 16 | [Mm]akefile text eol=lf 17 | [Mm]akefile* text eol=lf 18 | [Mm]akefile.* text eol=lf 19 | 20 | ## Shell Scripts 21 | ################ 22 | *.bat text eol=crlf 23 | *.cmd text eol=crlf 24 | *.sh text eol=lf 25 | *.ps1 text eol=crlf 26 | 27 | ## Documentation Files 28 | ###################### 29 | *.adoc text 30 | *.asciidoc text 31 | *.docx binary 32 | *.markdown text 33 | *.md text 34 | *.odt binary 35 | *.pdf binary 36 | *.txt text 37 | *COPYRIGHT* text 38 | *README* text 39 | AUTHORS text 40 | CHANGELOG text 41 | CHANGES text 42 | CONTRIBUTING text 43 | COPYING text 44 | copyright text 45 | INSTALL text 46 | license text 47 | LICENSE text 48 | NEWS text 49 | readme text 50 | 51 | ## Repository Configuration 52 | ########################### 53 | .editorconfig text eol=lf 54 | .gitlab-ci.yml text eol=lf 55 | .travis.yml text eol=lf 56 | .gitattributes text eol=lf 57 | .gitconfig text eol=lf 58 | .gitignore text eol=lf 59 | .gitmodules text eol=lf 60 | 61 | ## Image & Graphics Files 62 | ######################### 63 | *.bmp binary 64 | *.gif binary 65 | *.ico binary 66 | *.jpeg binary 67 | *.jpg binary 68 | *.png binary 69 | *.svg binary 70 | 71 | # EOF # 72 | -------------------------------------------------------------------------------- /lemon/examples/README.md: -------------------------------------------------------------------------------- 1 | # Lemon Examples 2 | 3 | In this directory tree you'll find some user-contributed examples on using Lemon. All examples herein rely on the Lemon version from the [parent folder]. 4 | 5 | ## Calculator 6 | 7 | - [`/calc/`](./calc) 8 | 9 | A very simple calculator example, contributed by [Serghei Iakovlev], public domain. 10 | 11 | - [`/calc2/`](./calc2) 12 | 13 | A bit more complex calculator example, contributed by [Serghei Iakovlev], public domain. 14 | 15 | # Contributing Guidelines 16 | 17 | Feel free to contribute your own examples. Each example should be in its own folder, to keep the repository well structured. 18 | 19 | All examples in this directory tree must compile against the Lemon version provided in the [parent folder]. To allow automation of the build & test process, each example folder should contain a `Makefile` with following targets: 20 | 21 | - `all` 22 | - `test` 23 | - `clean` 24 | 25 | For a practical example, see [`calc/Makefile`](./calc/Makefile). 26 | 27 | Please, state clearly the license terms of your contributed examples (or the lack thereof), and provide at least a brief description of what the example does. 28 | 29 | See also [`../../CONTRIBUTING.md`](../../CONTRIBUTING.md) 30 | 31 | 32 | 35 | 36 | [parent folder]: ../ 37 | 38 | 39 | 40 | [CONTRIBUTING]: ../../CONTRIBUTING.md " Read the contributors' guidelines" 41 | 42 | 43 | 44 | [Serghei Iakovlev]: https://github.com/sergeyklay "View Serghei Iakovlev's GitHub profile" 45 | 46 | 47 | -------------------------------------------------------------------------------- /lemon/examples/calc/README.md: -------------------------------------------------------------------------------- 1 | # Calculator 2 | 3 | > Contributed by [Serghei Iakovlev], public domain. 4 | 5 | This is a very simple calculator. To compile the program do the following: 6 | 7 | ```sh 8 | $ make 9 | ``` 10 | 11 | Then, to run the example issue the following command: 12 | 13 | ```sh 14 | ./calc 15 | ``` 16 | 17 | Let's explain what is actually happening: Take a look at the file `main.c`, then, take a look at `calc.c`. The `main.c` file is appended to the raw form of `calc.c` in the `Makefile`. lemon does not create a complete program - only the necessary subroutines. So it is necessary to build in the main part of a program. 18 | 19 | If you make your own changes to this example program, you should make the changes to `calc.y` or `main.c`. `calc.c` and `calc.h` are auto-generated files, and they will be over-written every time lemon is run. 20 | 21 | Disecting `main.c`: These are the essential functions that must be called. Note, this is a stripped down simple version with no error checking or tokenizer. The tokens are hardwired in so we can see exactly how lemon operates. 22 | 23 | ```c 24 | void* pParser = ParseAlloc (malloc); 25 | ``` 26 | 27 | The next 4 lines parse the command `15 DIVIDE 5`. 28 | 29 | ```c 30 | Parse (pParser, INTEGER, 15); 31 | Parse (pParser, DIVIDE, 0); 32 | Parse (pParser, INTEGER, 5); 33 | Parse (pParser, 0, 0); 34 | ``` 35 | 36 | `INTEGER` and `DIVIDE` are assigned values in the generated file `cacl.h` to be the following; 37 | ```c 38 | #define PLUS 1 39 | #define MINUS 2 40 | #define DIVIDE 3 41 | #define TIMES 4 42 | #define INTEGER 5 43 | ``` 44 | 45 | Again, this is a generated file, so if any additions are made to this file, they'll be over-written when re-running lemon. 46 | 47 | 48 | 51 | 52 | [Serghei Iakovlev]: https://github.com/sergeyklay "View Serghei Iakovlev's GitHub profile" 53 | 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # Lemon Grove # 3 | # https://github.com/tajmone/lemon-grove # 4 | ############################################ 5 | 6 | ## ============ 7 | ## Binary files 8 | ## ============ 9 | 10 | ## Object files 11 | ############### 12 | *.elf 13 | *.ko 14 | *.o 15 | *.obj 16 | 17 | ## Static libraries 18 | ################### 19 | *.a 20 | *.la 21 | *.lib 22 | *.lo 23 | 24 | ## Shared objects 25 | ################# 26 | *.dll 27 | *.dylib 28 | *.so 29 | *.so.* 30 | 31 | ## Executables 32 | ############## 33 | *.app 34 | *.exe 35 | *.out 36 | 37 | ## =============== 38 | ## Temp Work Files 39 | ## =============== 40 | ___*.* 41 | *.lnk 42 | *.url 43 | README.html 44 | 45 | ############################ 46 | ## COMMON IGNORE PATTERNS ## 47 | ############################ 48 | # Based on ".gitignore" created by: 49 | # https://www.gitignore.io/api/windows,linux,macos 50 | 51 | ## Linux 52 | ######## 53 | 54 | *~ 55 | 56 | # temporary files which can be created if a process still has a handle open of a deleted 57 | file 58 | .fuse_hidden* 59 | 60 | # KDE directory preferences 61 | .directory 62 | 63 | # Linux trash folder which might appear on any partition or disk 64 | .Trash-* 65 | 66 | # .nfs files are created when an open file is removed but is still being accessed 67 | .nfs* 68 | 69 | ## macOS 70 | ######## 71 | *.DS_Store 72 | .AppleDouble 73 | .LSOverride 74 | 75 | # Icon must end with two \r 76 | Icon 77 | 78 | # Thumbnails 79 | ._* 80 | 81 | # Files that might appear in the root of a volume 82 | .DocumentRevisions-V100 83 | .fseventsd 84 | .Spotlight-V100 85 | .TemporaryItems 86 | .Trashes 87 | .VolumeIcon.icns 88 | .com.apple.timemachine.donotpresent 89 | 90 | # Directories potentially created on remote AFP share 91 | .AppleDB 92 | .AppleDesktop 93 | Network Trash Folder 94 | Temporary Items 95 | .apdisk 96 | 97 | ## Windows 98 | ########## 99 | 100 | # Windows thumbnail cache files 101 | Thumbs.db 102 | ehthumbs.db 103 | ehthumbs_vista.db 104 | 105 | # Folder config file 106 | Desktop.ini 107 | 108 | # Recycle Bin used on file shares 109 | .BIN/ 110 | 111 | # Windows Installer files 112 | *.cab 113 | *.msi 114 | *.msm 115 | *.msp 116 | 117 | # End of https://www.gitignore.io/api/windows,linux,macos,purebasic 118 | 119 | # EOF # 120 | -------------------------------------------------------------------------------- /validate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # "validate.sh" v1.1.1 | 2019/11/20 4 | #------------------------------------------------------------------------------- 5 | # Validate code style consistency in the repository via EditorConfig settings 6 | # and the EClint validator tool: 7 | # https://editorconfig.org 8 | # https://www.npmjs.com/package/eclint 9 | #------------------------------------------------------------------------------- 10 | echo -e "\n\033[34;1m================================================" 11 | echo -e "\033[33;1mValidating Code Styles via EditorConfig Settings" 12 | echo -e "\033[34;1m================================================\033[0m" 13 | 14 | # ================== 15 | # Check Dependencies 16 | # ================== 17 | # Since the script might also be run locally by end users, check that EClint is 18 | # installed on the user machine: 19 | 20 | if eclint --version > /dev/null 2>&1 ; then 21 | echo -e "Using:" 22 | echo -e "\033[34;1m*\033[35m Node.js $(node -v)" 23 | echo -e "\033[34;1m*\033[35m EClint v$(eclint --version).\n\033[31;1m" 24 | else 25 | echo -e "\033[31;1m~~~ ERROR! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 26 | echo -e "\033[31;1mIn order to run this script you need to install EClint (Node.js):\n" 27 | echo -e "\033[31;1m\thttps://www.npmjs.com/package/eclint" 28 | echo -e "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\033[0m" 29 | echo -e "If you've already installed Node.js on your machine, type:\n" 30 | echo -e "\033[33;1m\tnpm install -g eclint" 31 | echo -e "\033[31;1m\n/// Aborting All Tests ///\033[0m" 32 | exit 1 33 | fi 34 | 35 | # ============== 36 | # Validate Files 37 | # ============== 38 | # Check that project files meet the code style standards set in `.editorconfig`; 39 | # if not, print only the list of files that failed -- because EClint reports are 40 | # usually too long. 41 | 42 | tmpLog=$(mktemp) 43 | eclint check 2> $tmpLog || { 44 | echo -e "\033[31;1m~~~ ERROR! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"; 45 | echo -e "\033[31;1mThe following files didn't pass the validation test:\n\033[33;1m"; 46 | cat $tmpLog | grep "^[^ ]"; 47 | echo -e "\033[31;1m\n\033[31;1mRun ECLint locally for detailed information about the problems."; 48 | echo -e "\033[31;1m~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"; 49 | echo -e "\033[31;1m/// Aborting All Tests ///\033[0m"; 50 | rm $tmpLog; 51 | exit 1; 52 | } 53 | rm $tmpLog; 54 | echo -e "\033[32;1m/// Test Passed ///\033[0m" 55 | exit 56 | 57 | # EOF # 58 | -------------------------------------------------------------------------------- /lemon/examples/calc2/README.md: -------------------------------------------------------------------------------- 1 | # Calculator 2 2 | 3 | A slightly more complex calculator version than the one provided in our [previous example](../calc/). 4 | 5 | Contributed by [Serghei Iakovlev], public domain. 6 | 7 | ----- 8 | 9 | **Table of Contents** 10 | 11 | 12 | 13 | - [Project Files](#project-files) 14 | - [About](#about) 15 | - [Building Instructions](#building-instructions) 16 | 17 | 18 | 19 | ----- 20 | 21 | # Project Files 22 | 23 | - [`calc2.y`][calc2.y] — Lemon template for the calculator. 24 | - [`main.c`][main.c] — custom `main()` code, appended to generated `calc.c` source by Make. 25 | - [`token.h`][token.h] — defines token type as a structure. 26 | 27 | # About 28 | 29 | This version of our calculator is more interesting example. 30 | The main difference is the definition of the token type as a structure. 31 | This token type is defined in `token.h`, using the following 4 lines: 32 | 33 | ``` c 34 | typedef struct Token { 35 | const char *op; 36 | int value; 37 | unsigned n; 38 | } Token; 39 | ``` 40 | 41 | Defining a structure for the token type is the most common practice to provide flexibility in semantic action, or the piece of code on the right of the production rule. 42 | Here is an example: 43 | 44 | ``` c 45 | expr(A) ::= expr(B) MINUS expr(C). { 46 | A.value = B.value - C.value; 47 | A.n = B.n+1 + C.n+1; 48 | } 49 | ``` 50 | 51 | The `%token_type` is defined in `calc2.y`: 52 | 53 | ``` c 54 | %token_type {Token} 55 | ``` 56 | 57 | This structure supports both a value and a count. 58 | You could add many more values, if you wanted to. 59 | 60 | # Building Instructions 61 | 62 | To compile the program, type the following in the terminal: 63 | 64 | ```sh 65 | $ make 66 | ``` 67 | 68 | > **MAKE NOTE** — Requires GNU Make version 4. 69 | 70 | 71 | 72 | > **WINDOWS NOTE** — The above command must be typed in Bash, it won't work in CMD or PowerShell terminals (use the Bash that ships with Git for Windows). 73 | 74 | 75 | 76 | > **MINGW NOTE** — If you're using MinGW to compile, instead of `make` type: 77 | > 78 | > ```sh 79 | > $ mingw32-make 80 | > ``` 81 | 82 | If everything worked as expected, you'll find the following new files in this folder: 83 | 84 | - `calc2.h` — Lemon-generated header file. 85 | - `calc2.c` — Lemon-generated parser code. 86 | - `calc2`/`calc2.exe` — the executable calculator example. 87 | - `calc2.out` — Lemon-generated info on parser states, symbols and rules. 88 | 89 | Then, to run the example issue the following command: 90 | 91 | ```sh 92 | $ ./calc2 93 | ``` 94 | 95 | 98 | 99 | [Serghei Iakovlev]: https://github.com/sergeyklay "View Serghei Iakovlev's GitHub profile" 100 | 101 | 102 | 103 | [main.c]: ./main.c "View source file" 104 | [token.h]: ./token.h "View source file" 105 | [calc2.y]: ./calc2.y "View source file" 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /lemon/sliced/msort.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** A generic merge-sort program. 3 | ** 4 | ** USAGE: 5 | ** Let "ptr" be a pointer to some structure which is at the head of 6 | ** a null-terminated list. Then to sort the list call: 7 | ** 8 | ** ptr = msort(ptr,&(ptr->next),cmpfnc); 9 | ** 10 | ** In the above, "cmpfnc" is a pointer to a function which compares 11 | ** two instances of the structure and returns an integer, as in 12 | ** strcmp. The second argument is a pointer to the pointer to the 13 | ** second element of the linked list. This address is used to compute 14 | ** the offset to the "next" field within the structure. The offset to 15 | ** the "next" field must be constant for all structures in the list. 16 | ** 17 | ** The function returns a new pointer which is the head of the list 18 | ** after sorting. 19 | ** 20 | ** ALGORITHM: 21 | ** Merge-sort. 22 | */ 23 | 24 | /* 25 | ** Return a pointer to the next structure in the linked list. 26 | */ 27 | #define NEXT(A) (*(char**)(((char*)A)+offset)) 28 | 29 | /* 30 | ** Inputs: 31 | ** a: A sorted, null-terminated linked list. (May be null). 32 | ** b: A sorted, null-terminated linked list. (May be null). 33 | ** cmp: A pointer to the comparison function. 34 | ** offset: Offset in the structure to the "next" field. 35 | ** 36 | ** Return Value: 37 | ** A pointer to the head of a sorted list containing the elements 38 | ** of both a and b. 39 | ** 40 | ** Side effects: 41 | ** The "next" pointers for elements in the lists a and b are 42 | ** changed. 43 | */ 44 | static char *merge( 45 | char *a, 46 | char *b, 47 | int (*cmp)(const char*,const char*), 48 | int offset 49 | ){ 50 | char *ptr, *head; 51 | 52 | if( a==0 ){ 53 | head = b; 54 | }else if( b==0 ){ 55 | head = a; 56 | }else{ 57 | if( (*cmp)(a,b)<=0 ){ 58 | ptr = a; 59 | a = NEXT(a); 60 | }else{ 61 | ptr = b; 62 | b = NEXT(b); 63 | } 64 | head = ptr; 65 | while( a && b ){ 66 | if( (*cmp)(a,b)<=0 ){ 67 | NEXT(ptr) = a; 68 | ptr = a; 69 | a = NEXT(a); 70 | }else{ 71 | NEXT(ptr) = b; 72 | ptr = b; 73 | b = NEXT(b); 74 | } 75 | } 76 | if( a ) NEXT(ptr) = a; 77 | else NEXT(ptr) = b; 78 | } 79 | return head; 80 | } 81 | 82 | /* 83 | ** Inputs: 84 | ** list: Pointer to a singly-linked list of structures. 85 | ** next: Pointer to pointer to the second element of the list. 86 | ** cmp: A comparison function. 87 | ** 88 | ** Return Value: 89 | ** A pointer to the head of a sorted list containing the elements 90 | ** originally in list. 91 | ** 92 | ** Side effects: 93 | ** The "next" pointers for elements in list are changed. 94 | */ 95 | #define LISTSIZE 30 96 | static char *msort( 97 | char *list, 98 | char **next, 99 | int (*cmp)(const char*,const char*) 100 | ){ 101 | unsigned long offset; 102 | char *ep; 103 | char *set[LISTSIZE]; 104 | int i; 105 | offset = (unsigned long)((char*)next - (char*)list); 106 | for(i=0; i 48 | 49 | [Unlicense]: https://unlicense.org/ "Visit Unlicense.org" 50 | 51 | [CC0]: https://creativecommons.org/publicdomain/zero/1.0/deed.en "View CC0 1.0 Universal at Creative Commons website" 52 | 53 | [EditorConfig]: https://editorconfig.org/ "Visit EditorConfig website" 54 | [EClint]: https://www.npmjs.com/package/eclint "Visit EClint page at NPM" 55 | [Travis CI]: https://travis-ci.com/ "Visit Travis CI website" 56 | 57 | 58 | [validate.sh]: ./validate.sh "View source script" 59 | [.editorconfig]: ./.editorconfig "View EditorConfig settings file" 60 | 61 | 62 | -------------------------------------------------------------------------------- /lemon/sliced/configlist.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Routines to processing a configuration list and building a state 3 | ** in the LEMON parser generator. 4 | */ 5 | 6 | static struct config *freelist = 0; /* List of free configurations */ 7 | static struct config *current = 0; /* Top of list of configurations */ 8 | static struct config **currentend = 0; /* Last on list of configs */ 9 | static struct config *basis = 0; /* Top of list of basis configs */ 10 | static struct config **basisend = 0; /* End of list of basis configs */ 11 | 12 | /* Return a pointer to a new configuration */ 13 | PRIVATE struct config *newconfig(void){ 14 | return (struct config*)calloc(1, sizeof(struct config)); 15 | } 16 | 17 | /* The configuration "old" is no longer used */ 18 | PRIVATE void deleteconfig(struct config *old) 19 | { 20 | old->next = freelist; 21 | freelist = old; 22 | } 23 | 24 | /* Initialized the configuration list builder */ 25 | void Configlist_init(void){ 26 | current = 0; 27 | currentend = ¤t; 28 | basis = 0; 29 | basisend = &basis; 30 | Configtable_init(); 31 | return; 32 | } 33 | 34 | /* Initialized the configuration list builder */ 35 | void Configlist_reset(void){ 36 | current = 0; 37 | currentend = ¤t; 38 | basis = 0; 39 | basisend = &basis; 40 | Configtable_clear(0); 41 | return; 42 | } 43 | 44 | /* Add another configuration to the configuration list */ 45 | struct config *Configlist_add( 46 | struct rule *rp, /* The rule */ 47 | int dot /* Index into the RHS of the rule where the dot goes */ 48 | ){ 49 | struct config *cfp, model; 50 | 51 | assert( currentend!=0 ); 52 | model.rp = rp; 53 | model.dot = dot; 54 | cfp = Configtable_find(&model); 55 | if( cfp==0 ){ 56 | cfp = newconfig(); 57 | cfp->rp = rp; 58 | cfp->dot = dot; 59 | cfp->fws = SetNew(); 60 | cfp->stp = 0; 61 | cfp->fplp = cfp->bplp = 0; 62 | cfp->next = 0; 63 | cfp->bp = 0; 64 | *currentend = cfp; 65 | currentend = &cfp->next; 66 | Configtable_insert(cfp); 67 | } 68 | return cfp; 69 | } 70 | 71 | /* Add a basis configuration to the configuration list */ 72 | struct config *Configlist_addbasis(struct rule *rp, int dot) 73 | { 74 | struct config *cfp, model; 75 | 76 | assert( basisend!=0 ); 77 | assert( currentend!=0 ); 78 | model.rp = rp; 79 | model.dot = dot; 80 | cfp = Configtable_find(&model); 81 | if( cfp==0 ){ 82 | cfp = newconfig(); 83 | cfp->rp = rp; 84 | cfp->dot = dot; 85 | cfp->fws = SetNew(); 86 | cfp->stp = 0; 87 | cfp->fplp = cfp->bplp = 0; 88 | cfp->next = 0; 89 | cfp->bp = 0; 90 | *currentend = cfp; 91 | currentend = &cfp->next; 92 | *basisend = cfp; 93 | basisend = &cfp->bp; 94 | Configtable_insert(cfp); 95 | } 96 | return cfp; 97 | } 98 | 99 | /* Compute the closure of the configuration list */ 100 | void Configlist_closure(struct lemon *lemp) 101 | { 102 | struct config *cfp, *newcfp; 103 | struct rule *rp, *newrp; 104 | struct symbol *sp, *xsp; 105 | int i, dot; 106 | 107 | assert( currentend!=0 ); 108 | for(cfp=current; cfp; cfp=cfp->next){ 109 | rp = cfp->rp; 110 | dot = cfp->dot; 111 | if( dot>=rp->nrhs ) continue; 112 | sp = rp->rhs[dot]; 113 | if( sp->type==NONTERMINAL ){ 114 | if( sp->rule==0 && sp!=lemp->errsym ){ 115 | ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", 116 | sp->name); 117 | lemp->errorcnt++; 118 | } 119 | for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ 120 | newcfp = Configlist_add(newrp,0); 121 | for(i=dot+1; inrhs; i++){ 122 | xsp = rp->rhs[i]; 123 | if( xsp->type==TERMINAL ){ 124 | SetAdd(newcfp->fws,xsp->index); 125 | break; 126 | }else if( xsp->type==MULTITERMINAL ){ 127 | int k; 128 | for(k=0; knsubsym; k++){ 129 | SetAdd(newcfp->fws, xsp->subsym[k]->index); 130 | } 131 | break; 132 | }else{ 133 | SetUnion(newcfp->fws,xsp->firstset); 134 | if( xsp->lambda==LEMON_FALSE ) break; 135 | } 136 | } 137 | if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); 138 | } 139 | } 140 | } 141 | return; 142 | } 143 | 144 | /* Sort the configuration list */ 145 | void Configlist_sort(void){ 146 | current = (struct config*)msort((char*)current,(char**)&(current->next), 147 | Configcmp); 148 | currentend = 0; 149 | return; 150 | } 151 | 152 | /* Sort the basis configuration list */ 153 | void Configlist_sortbasis(void){ 154 | basis = (struct config*)msort((char*)current,(char**)&(current->bp), 155 | Configcmp); 156 | basisend = 0; 157 | return; 158 | } 159 | 160 | /* Return a pointer to the head of the configuration list and 161 | ** reset the list */ 162 | struct config *Configlist_return(void){ 163 | struct config *old; 164 | old = current; 165 | current = 0; 166 | currentend = 0; 167 | return old; 168 | } 169 | 170 | /* Return a pointer to the head of the configuration list and 171 | ** reset the list */ 172 | struct config *Configlist_basis(void){ 173 | struct config *old; 174 | old = basis; 175 | basis = 0; 176 | basisend = 0; 177 | return old; 178 | } 179 | 180 | /* Free all elements of the given configuration list */ 181 | void Configlist_eat(struct config *cfp) 182 | { 183 | struct config *nextcfp; 184 | for(; cfp; cfp=nextcfp){ 185 | nextcfp = cfp->next; 186 | assert( cfp->fplp==0 ); 187 | assert( cfp->bplp==0 ); 188 | if( cfp->fws ) SetFree(cfp->fws); 189 | deleteconfig(cfp); 190 | } 191 | return; 192 | } 193 | -------------------------------------------------------------------------------- /lemon/sliced/lemon.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** This file contains all sources (including headers) to the LEMON 3 | ** LALR(1) parser generator. The sources have been combined into a 4 | ** single file to make it easy to include LEMON in the source tree 5 | ** and Makefile of another program. 6 | ** 7 | ** The author of this program disclaims copyright. 8 | */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define ISSPACE(X) isspace((unsigned char)(X)) 17 | #define ISDIGIT(X) isdigit((unsigned char)(X)) 18 | #define ISALNUM(X) isalnum((unsigned char)(X)) 19 | #define ISALPHA(X) isalpha((unsigned char)(X)) 20 | #define ISUPPER(X) isupper((unsigned char)(X)) 21 | #define ISLOWER(X) islower((unsigned char)(X)) 22 | 23 | 24 | #ifndef __WIN32__ 25 | # if defined(_WIN32) || defined(WIN32) 26 | # define __WIN32__ 27 | # endif 28 | #endif 29 | 30 | #ifdef __WIN32__ 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | extern int access(const char *path, int mode); 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | #else 39 | #include 40 | #endif 41 | 42 | /* #define PRIVATE static */ 43 | #define PRIVATE 44 | 45 | #ifdef TEST 46 | #define MAXRHS 5 /* Set low to exercise exception code */ 47 | #else 48 | #define MAXRHS 1000 49 | #endif 50 | 51 | extern void memory_error(); 52 | static int showPrecedenceConflict = 0; 53 | static char *msort(char*,char**,int(*)(const char*,const char*)); 54 | 55 | /* 56 | ** Compilers are getting increasingly pedantic about type conversions 57 | ** as C evolves ever closer to Ada.... To work around the latest problems 58 | ** we have to define the following variant of strlen(). 59 | */ 60 | #define lemonStrlen(X) ((int)strlen(X)) 61 | 62 | /* 63 | ** Compilers are starting to complain about the use of sprintf() and strcpy(), 64 | ** saying they are unsafe. So we define our own versions of those routines too. 65 | ** 66 | ** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and 67 | ** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). 68 | ** The third is a helper routine for vsnprintf() that adds texts to the end of a 69 | ** buffer, making sure the buffer is always zero-terminated. 70 | ** 71 | ** The string formatter is a minimal subset of stdlib sprintf() supporting only 72 | ** a few simply conversions: 73 | ** 74 | ** %d 75 | ** %s 76 | ** %.*s 77 | ** 78 | */ 79 | static void lemon_addtext( 80 | char *zBuf, /* The buffer to which text is added */ 81 | int *pnUsed, /* Slots of the buffer used so far */ 82 | const char *zIn, /* Text to add */ 83 | int nIn, /* Bytes of text to add. -1 to use strlen() */ 84 | int iWidth /* Field width. Negative to left justify */ 85 | ){ 86 | if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} 87 | while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } 88 | if( nIn==0 ) return; 89 | memcpy(&zBuf[*pnUsed], zIn, nIn); 90 | *pnUsed += nIn; 91 | while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } 92 | zBuf[*pnUsed] = 0; 93 | } 94 | static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ 95 | int i, j, k, c; 96 | int nUsed = 0; 97 | const char *z; 98 | char zTemp[50]; 99 | str[0] = 0; 100 | for(i=j=0; (c = zFormat[i])!=0; i++){ 101 | if( c=='%' ){ 102 | int iWidth = 0; 103 | lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); 104 | c = zFormat[++i]; 105 | if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ 106 | if( c=='-' ) i++; 107 | while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; 108 | if( c=='-' ) iWidth = -iWidth; 109 | c = zFormat[i]; 110 | } 111 | if( c=='d' ){ 112 | int v = va_arg(ap, int); 113 | if( v<0 ){ 114 | lemon_addtext(str, &nUsed, "-", 1, iWidth); 115 | v = -v; 116 | }else if( v==0 ){ 117 | lemon_addtext(str, &nUsed, "0", 1, iWidth); 118 | } 119 | k = 0; 120 | while( v>0 ){ 121 | k++; 122 | zTemp[sizeof(zTemp)-k] = (v%10) + '0'; 123 | v /= 10; 124 | } 125 | lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); 126 | }else if( c=='s' ){ 127 | z = va_arg(ap, const char*); 128 | lemon_addtext(str, &nUsed, z, -1, iWidth); 129 | }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ 130 | i += 2; 131 | k = va_arg(ap, int); 132 | z = va_arg(ap, const char*); 133 | lemon_addtext(str, &nUsed, z, k, iWidth); 134 | }else if( c=='%' ){ 135 | lemon_addtext(str, &nUsed, "%", 1, 0); 136 | }else{ 137 | fprintf(stderr, "illegal format\n"); 138 | exit(1); 139 | } 140 | j = i+1; 141 | } 142 | } 143 | lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); 144 | return nUsed; 145 | } 146 | static int lemon_sprintf(char *str, const char *format, ...){ 147 | va_list ap; 148 | int rc; 149 | va_start(ap, format); 150 | rc = lemon_vsprintf(str, format, ap); 151 | va_end(ap); 152 | return rc; 153 | } 154 | static void lemon_strcpy(char *dest, const char *src){ 155 | while( (*(dest++) = *(src++))!=0 ){} 156 | } 157 | static void lemon_strcat(char *dest, const char *src){ 158 | while( *dest ) dest++; 159 | lemon_strcpy(dest, src); 160 | } 161 | 162 | 163 | /* a few forward declarations... */ 164 | struct rule; 165 | struct lemon; 166 | struct action; 167 | 168 | static struct action *Action_new(void); 169 | static struct action *Action_sort(struct action *); 170 | 171 | #include "build.h" 172 | #include "configlist.h" 173 | #include "error.h" 174 | #include "option.h" 175 | #include "parse.h" 176 | #include "plink.h" 177 | #include "report.h" 178 | #include "set.h" 179 | #include "struct.h" 180 | #include "table.h" 181 | #include "action.c" 182 | #include "build.c" 183 | #include "configlist.c" 184 | #include "error.c" 185 | #include "main.c" 186 | #include "msort.c" 187 | #include "option.c" 188 | #include "parse.c" 189 | #include "plink.c" 190 | #include "report.c" 191 | #include "set.c" 192 | #include "table.c" 193 | -------------------------------------------------------------------------------- /lemon/sliced/option.c: -------------------------------------------------------------------------------- 1 | static char **g_argv; 2 | static struct s_options *op; 3 | static FILE *errstream; 4 | 5 | #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0) 6 | 7 | /* 8 | ** Print the command line with a carrot pointing to the k-th character 9 | ** of the n-th field. 10 | */ 11 | static void errline(int n, int k, FILE *err) 12 | { 13 | int spcnt, i; 14 | if( g_argv[0] ){ 15 | fprintf(err,"%s",g_argv[0]); 16 | spcnt = lemonStrlen(g_argv[0]) + 1; 17 | }else{ 18 | spcnt = 0; 19 | } 20 | for(i=1; i 17 | 18 | - [Introduction](#introduction) 19 | - [DIY Slicing](#diy-slicing) 20 | - [External Links](#external-links) 21 | - [Changelog](#changelog) 22 | 23 | 24 | 25 | ----- 26 | 27 | # Introduction 28 | 29 | The Lemon source file `lemon.c`, like most tools from the [SQLite] project, was created by merging multiple C sources into a single file via a technique called ["amalgamation"], in order to reduce the number of file dependencies and improve performance (5-10% speed gain). 30 | Except that today only the single source file of Lemon survives in the SQLite project — and code maintainance and updates are done directly in the amalgamated single source file. 31 | 32 | I wanted to include in the Lemon Grove project a version of the Lemon source split into modules, to simplify studying its code and working on derivative versions and ports. 33 | Reversing the amalgamation is not a hard task in itself, for the amalgamator adds some comment lines indicating the name of the original file from which the code was taken: 34 | 35 | ```c 36 | void Configlist_reset(void); 37 | 38 | /********* From the file "error.h" ***************************************/ 39 | void ErrorMsg(const char *, int,const char *, ...); 40 | 41 | /****** From the file "option.h" ******************************************/ 42 | enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, 43 | OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; 44 | ``` 45 | 46 | So, splitting the "`lemon.c`" file manually is neither a huge nor hard task. 47 | But keeping the split sources always up to date with the latest upstream sources from the [SQLite repository] (which are updated quite often) is another matter altogether, and definitely a daunting task. 48 | 49 | Therefore, I've created __[Lemon Slicer]__, a small tool to automate the de-amalgamation process, so that whenever I update the Lemon sources in the parent folder I can update with a single click their de-amalgamated counterparts in this folder. 50 | 51 | Besides automating the task of splitting "`lemon.c`" into its original modules, Lemon Slicer also injects at the end of the left-over contents of the "`lemon.c`" file all the required `#include` directives to ensure that the split contents are loaded back in the correct order, so that the Lemon parser remains buildable by compiling "`lemon.c`". 52 | 53 | # DIY Slicing 54 | 55 | If you need to de-amalgamated the Lemon sources in this project yourself, it can be easily done on Windows OS x64 with the following steps. 56 | 57 | To download (or update) the __Lemon Slicer__ de-amalgamator tool, open the CMD in the parent folder and type: 58 | 59 | ``` 60 | curl -LJO https://github.com/tajmone/lemon-slicer/raw/master/lemon-slicer.exe 61 | ``` 62 | 63 | You'll then find in the parent `lemon/` folder the `lemon-slicer.exe` executable tool. 64 | Just invoke it from the CMD (or double click on it) and in a few seconds it will update/recreate the de-amalgamated contents in `lemon/sliced/`. 65 | 66 | If you're working on Linux, macOS or 32-bits Windows, you'll need to obtain a precompiled binary of Lemon Slicer matching your OS. 67 | Lemon Slicer is a cross-platform tool, so visit its repository for more info on how to obtain or compile it: 68 | 69 | - https://github.com/tajmone/lemon-slicer 70 | 71 | Of course, you can use Lemon Slicer to de-amalgamate any version of Lemon outside this project, as long as you execute it in a folder containing the "`lemon.c`" and "`lempar.c`" files (and on the condition that "`lemon.c`" still contains the original comment lines created by the SQLite amalgamator). 72 | 73 | # External Links 74 | 75 | - [Lemon Slicer] — GitHub repository. 76 | - [The SQLite Amalgamation] — for more info about amalgamation. 77 | 78 | # Changelog 79 | 80 | The following changelog lists which versions of the Lemon sources were used to create the de-amalgamated files found in this folder — where "upstream" refers to the [SQLite] project hosting the original Lemon sources. 81 | 82 | - **2021-12-16** 83 | + `lemon.c` sliced from upstream check-in [`f2f279b2`][f2f279b2] (2021-10-04) — fix harmless static analyzer warnings. 84 | + `lempar.c` updated to upstream check-in [`ba4fb518`][ba4fb518] (2021-11-09) — fix so that Lemon can compile with `NDEBUG`. 85 | - **2021-02-10** 86 | + `lemon.c` sliced from upstream check-in [`d1e22e2f`][d1e22e2f] (2021-01-07) — fix compiler warnings and typos. 87 | + `lempar.c` updated to upstream check-in [`203c049c`][203c049c] (2021-01-02) — improved. 88 | - **2020-09-12** 89 | + `lemon.c` sliced from upstream check-in [`430c5d1d`][430c5d1d] (2020-09-05) — bug fix. 90 | + `lempar.c` updated to upstream check-in [`84d54eb3`][84d54eb3] (2020-09-01) 91 | - **2020-01-05** 92 | + `lemon.c` sliced from upstream check-in [`fccfb8a9`][fccfb8a9] (2019-12-19) 93 | + `lempar.c` updated to upstream check-in [`4d6d2fc0`][4d6d2fc0] (2019-12-11) 94 | - **2019-08-13** 95 | + `lemon.c` sliced from upstream check-in [`2da0eea0`][2da0eea0] (2019-06-03) 96 | + `lempar.c` updated to upstream check-in [`9e664585`][9e664585] (2019-07-16) 97 | 98 | 101 | 102 | [de-amalgamated]: https://www.sqlite.org/amalgamation.html "Learn about amalgamation in the SQLite project" 103 | ["amalgamation"]: https://www.sqlite.org/amalgamation.html "Learn about amalgamation in the SQLite project" 104 | [Lemon Slicer]: https://github.com/tajmone/lemon-slicer "Visit the Lemon Slicer repository on GitHub" 105 | 106 | 107 | 108 | [SQLite]: http://www.sqlite.org/ "Visit SQLite website" 109 | [SQLite repository]: https://sqlite.org/src/doc/trunk/README.md "Visit the SQLite source repository" 110 | [The SQLite Amalgamation]: https://www.sqlite.org/amalgamation.html "Learn about amalgamation in the SQLite project" 111 | 112 | 113 | 114 | [us lemon.c]: https://www.sqlite.org/src/file/tool/lemon.c "View upstream source file" 115 | [us lempar.c]: https://www.sqlite.org/src/file/tool/lempar.c "View upstream source file" 116 | 117 | 118 | 119 | [203c049c]: https://www.sqlite.org/src/info/203c049c66238041 "View upstream check-in 203c049c (2021-01-02)" 120 | [2da0eea0]: https://www.sqlite.org/src/info/2da0eea02d128c37 "View upstream check-in 2da0eea0 (2019-06-03)" 121 | [430c5d1d]: https://www.sqlite.org/src/info/430c5d1da57af452 "View upstream check-in 430c5d1d (2020-09-05)" 122 | [4d6d2fc0]: https://www.sqlite.org/src/info/4d6d2fc046d586a1 "View upstream check-in 4d6d2fc0 (2019-12-11)" 123 | [84d54eb3]: https://www.sqlite.org/src/info/84d54eb357161741 "View upstream check-in 84d54eb3 (2020-09-01)" 124 | [9e664585]: https://www.sqlite.org/src/info/9e66458592d40fbd "View upstream check-in 9e664585 (2019-07-16)" 125 | [ba4fb518]: https://www.sqlite.org/src/info/ba4fb51853fbcb8c "View upstream check-in ba4fb518 (2021-11-09)" 126 | [d1e22e2f]: https://www.sqlite.org/src/info/d1e22e2f76cce7eb "View upstream check-in d1e22e2f (2021-01-07)" 127 | [f2f279b2]: https://www.sqlite.org/src/info/f2f279b2cc1c8b3b "View upstream check-in f2f279b2 (2021-10-04)" 128 | [fccfb8a9]: https://www.sqlite.org/src/info/fccfb8a9ed3c1df9 "View upstream check-in fccfb8a9 (2019-12-19)" 129 | 130 | 131 | -------------------------------------------------------------------------------- /lemon/sliced/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Main program file for the LEMON parser generator. 3 | */ 4 | 5 | /* Report an out-of-memory condition and abort. This function 6 | ** is used mostly by the "MemoryCheck" macro in struct.h 7 | */ 8 | void memory_error(void){ 9 | fprintf(stderr,"Out of memory. Aborting...\n"); 10 | exit(1); 11 | } 12 | 13 | static int nDefine = 0; /* Number of -D options on the command line */ 14 | static char **azDefine = 0; /* Name of the -D macros */ 15 | 16 | /* This routine is called with the argument to each -D command-line option. 17 | ** Add the macro defined to the azDefine array. 18 | */ 19 | static void handle_D_option(char *z){ 20 | char **paz; 21 | nDefine++; 22 | azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); 23 | if( azDefine==0 ){ 24 | fprintf(stderr,"out of memory\n"); 25 | exit(1); 26 | } 27 | paz = &azDefine[nDefine-1]; 28 | *paz = (char *) malloc( lemonStrlen(z)+1 ); 29 | if( *paz==0 ){ 30 | fprintf(stderr,"out of memory\n"); 31 | exit(1); 32 | } 33 | lemon_strcpy(*paz, z); 34 | for(z=*paz; *z && *z!='='; z++){} 35 | *z = 0; 36 | } 37 | 38 | /* Rember the name of the output directory 39 | */ 40 | static char *outputDir = NULL; 41 | static void handle_d_option(char *z){ 42 | outputDir = (char *) malloc( lemonStrlen(z)+1 ); 43 | if( outputDir==0 ){ 44 | fprintf(stderr,"out of memory\n"); 45 | exit(1); 46 | } 47 | lemon_strcpy(outputDir, z); 48 | } 49 | 50 | static char *user_templatename = NULL; 51 | static void handle_T_option(char *z){ 52 | user_templatename = (char *) malloc( lemonStrlen(z)+1 ); 53 | if( user_templatename==0 ){ 54 | memory_error(); 55 | } 56 | lemon_strcpy(user_templatename, z); 57 | } 58 | 59 | /* Merge together to lists of rules ordered by rule.iRule */ 60 | static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ 61 | struct rule *pFirst = 0; 62 | struct rule **ppPrev = &pFirst; 63 | while( pA && pB ){ 64 | if( pA->iRuleiRule ){ 65 | *ppPrev = pA; 66 | ppPrev = &pA->next; 67 | pA = pA->next; 68 | }else{ 69 | *ppPrev = pB; 70 | ppPrev = &pB->next; 71 | pB = pB->next; 72 | } 73 | } 74 | if( pA ){ 75 | *ppPrev = pA; 76 | }else{ 77 | *ppPrev = pB; 78 | } 79 | return pFirst; 80 | } 81 | 82 | /* 83 | ** Sort a list of rules in order of increasing iRule value 84 | */ 85 | static struct rule *Rule_sort(struct rule *rp){ 86 | unsigned int i; 87 | struct rule *pNext; 88 | struct rule *x[32]; 89 | memset(x, 0, sizeof(x)); 90 | while( rp ){ 91 | pNext = rp->next; 92 | rp->next = 0; 93 | for(i=0; iindex = i; 201 | qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); 202 | for(i=0; iindex = i; 203 | while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } 204 | assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); 205 | lem.nsymbol = i - 1; 206 | for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); 207 | lem.nterminal = i; 208 | 209 | /* Assign sequential rule numbers. Start with 0. Put rules that have no 210 | ** reduce action C-code associated with them last, so that the switch() 211 | ** statement that selects reduction actions will have a smaller jump table. 212 | */ 213 | for(i=0, rp=lem.rule; rp; rp=rp->next){ 214 | rp->iRule = rp->code ? i++ : -1; 215 | } 216 | lem.nruleWithAction = i; 217 | for(rp=lem.rule; rp; rp=rp->next){ 218 | if( rp->iRule<0 ) rp->iRule = i++; 219 | } 220 | lem.startRule = lem.rule; 221 | lem.rule = Rule_sort(lem.rule); 222 | 223 | /* Generate a reprint of the grammar, if requested on the command line */ 224 | if( rpflag ){ 225 | Reprint(&lem); 226 | }else{ 227 | /* Initialize the size for all follow and first sets */ 228 | SetSize(lem.nterminal+1); 229 | 230 | /* Find the precedence for every production rule (that has one) */ 231 | FindRulePrecedences(&lem); 232 | 233 | /* Compute the lambda-nonterminals and the first-sets for every 234 | ** nonterminal */ 235 | FindFirstSets(&lem); 236 | 237 | /* Compute all LR(0) states. Also record follow-set propagation 238 | ** links so that the follow-set can be computed later */ 239 | lem.nstate = 0; 240 | FindStates(&lem); 241 | lem.sorted = State_arrayof(); 242 | 243 | /* Tie up loose ends on the propagation links */ 244 | FindLinks(&lem); 245 | 246 | /* Compute the follow set of every reducible configuration */ 247 | FindFollowSets(&lem); 248 | 249 | /* Compute the action tables */ 250 | FindActions(&lem); 251 | 252 | /* Compress the action tables */ 253 | if( compress==0 ) CompressTables(&lem); 254 | 255 | /* Reorder and renumber the states so that states with fewer choices 256 | ** occur at the end. This is an optimization that helps make the 257 | ** generated parser tables smaller. */ 258 | if( noResort==0 ) ResortStates(&lem); 259 | 260 | /* Generate a report of the parser generated. (the "y.output" file) */ 261 | if( !quiet ) ReportOutput(&lem); 262 | 263 | /* Generate the source code for the parser */ 264 | ReportTable(&lem, mhflag, sqlFlag); 265 | 266 | /* Produce a header file for use by the scanner. (This step is 267 | ** omitted if the "-m" option is used because makeheaders will 268 | ** generate the file for us.) */ 269 | if( !mhflag ) ReportHeader(&lem); 270 | } 271 | if( statistics ){ 272 | printf("Parser statistics:\n"); 273 | stats_line("terminal symbols", lem.nterminal); 274 | stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); 275 | stats_line("total symbols", lem.nsymbol); 276 | stats_line("rules", lem.nrule); 277 | stats_line("states", lem.nxstate); 278 | stats_line("conflicts", lem.nconflict); 279 | stats_line("action table entries", lem.nactiontab); 280 | stats_line("lookahead table entries", lem.nlookaheadtab); 281 | stats_line("total table size (bytes)", lem.tablesize); 282 | } 283 | if( lem.nconflict > 0 ){ 284 | fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); 285 | } 286 | 287 | /* return 0 on success, 1 on failure. */ 288 | exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; 289 | exit(exitcode); 290 | return (exitcode); 291 | } 292 | -------------------------------------------------------------------------------- /lemon/sliced/struct.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** Principal data structures for the LEMON parser generator. 3 | */ 4 | 5 | typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; 6 | 7 | /* Symbols (terminals and nonterminals) of the grammar are stored 8 | ** in the following: */ 9 | enum symbol_type { 10 | TERMINAL, 11 | NONTERMINAL, 12 | MULTITERMINAL 13 | }; 14 | enum e_assoc { 15 | LEFT, 16 | RIGHT, 17 | NONE, 18 | UNK 19 | }; 20 | struct symbol { 21 | const char *name; /* Name of the symbol */ 22 | int index; /* Index number for this symbol */ 23 | enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ 24 | struct rule *rule; /* Linked list of rules of this (if an NT) */ 25 | struct symbol *fallback; /* fallback token in case this token doesn't parse */ 26 | int prec; /* Precedence if defined (-1 otherwise) */ 27 | enum e_assoc assoc; /* Associativity if precedence is defined */ 28 | char *firstset; /* First-set for all rules of this symbol */ 29 | Boolean lambda; /* True if NT and can generate an empty string */ 30 | int useCnt; /* Number of times used */ 31 | char *destructor; /* Code which executes whenever this symbol is 32 | ** popped from the stack during error processing */ 33 | int destLineno; /* Line number for start of destructor. Set to 34 | ** -1 for duplicate destructors. */ 35 | char *datatype; /* The data type of information held by this 36 | ** object. Only used if type==NONTERMINAL */ 37 | int dtnum; /* The data type number. In the parser, the value 38 | ** stack is a union. The .yy%d element of this 39 | ** union is the correct data type for this object */ 40 | int bContent; /* True if this symbol ever carries content - if 41 | ** it is ever more than just syntax */ 42 | /* The following fields are used by MULTITERMINALs only */ 43 | int nsubsym; /* Number of constituent symbols in the MULTI */ 44 | struct symbol **subsym; /* Array of constituent symbols */ 45 | }; 46 | 47 | /* Each production rule in the grammar is stored in the following 48 | ** structure. */ 49 | struct rule { 50 | struct symbol *lhs; /* Left-hand side of the rule */ 51 | const char *lhsalias; /* Alias for the LHS (NULL if none) */ 52 | int lhsStart; /* True if left-hand side is the start symbol */ 53 | int ruleline; /* Line number for the rule */ 54 | int nrhs; /* Number of RHS symbols */ 55 | struct symbol **rhs; /* The RHS symbols */ 56 | const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ 57 | int line; /* Line number at which code begins */ 58 | const char *code; /* The code executed when this rule is reduced */ 59 | const char *codePrefix; /* Setup code before code[] above */ 60 | const char *codeSuffix; /* Breakdown code after code[] above */ 61 | struct symbol *precsym; /* Precedence symbol for this rule */ 62 | int index; /* An index number for this rule */ 63 | int iRule; /* Rule number as used in the generated tables */ 64 | Boolean noCode; /* True if this rule has no associated C code */ 65 | Boolean codeEmitted; /* True if the code has been emitted already */ 66 | Boolean canReduce; /* True if this rule is ever reduced */ 67 | Boolean doesReduce; /* Reduce actions occur after optimization */ 68 | Boolean neverReduce; /* Reduce is theoretically possible, but prevented 69 | ** by actions or other outside implementation */ 70 | struct rule *nextlhs; /* Next rule with the same LHS */ 71 | struct rule *next; /* Next rule in the global list */ 72 | }; 73 | 74 | /* A configuration is a production rule of the grammar together with 75 | ** a mark (dot) showing how much of that rule has been processed so far. 76 | ** Configurations also contain a follow-set which is a list of terminal 77 | ** symbols which are allowed to immediately follow the end of the rule. 78 | ** Every configuration is recorded as an instance of the following: */ 79 | enum cfgstatus { 80 | COMPLETE, 81 | INCOMPLETE 82 | }; 83 | struct config { 84 | struct rule *rp; /* The rule upon which the configuration is based */ 85 | int dot; /* The parse point */ 86 | char *fws; /* Follow-set for this configuration only */ 87 | struct plink *fplp; /* Follow-set forward propagation links */ 88 | struct plink *bplp; /* Follow-set backwards propagation links */ 89 | struct state *stp; /* Pointer to state which contains this */ 90 | enum cfgstatus status; /* used during followset and shift computations */ 91 | struct config *next; /* Next configuration in the state */ 92 | struct config *bp; /* The next basis configuration */ 93 | }; 94 | 95 | enum e_action { 96 | SHIFT, 97 | ACCEPT, 98 | REDUCE, 99 | ERROR, 100 | SSCONFLICT, /* A shift/shift conflict */ 101 | SRCONFLICT, /* Was a reduce, but part of a conflict */ 102 | RRCONFLICT, /* Was a reduce, but part of a conflict */ 103 | SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ 104 | RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ 105 | NOT_USED, /* Deleted by compression */ 106 | SHIFTREDUCE /* Shift first, then reduce */ 107 | }; 108 | 109 | /* Every shift or reduce operation is stored as one of the following */ 110 | struct action { 111 | struct symbol *sp; /* The look-ahead symbol */ 112 | enum e_action type; 113 | union { 114 | struct state *stp; /* The new state, if a shift */ 115 | struct rule *rp; /* The rule, if a reduce */ 116 | } x; 117 | struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ 118 | struct action *next; /* Next action for this state */ 119 | struct action *collide; /* Next action with the same hash */ 120 | }; 121 | 122 | /* Each state of the generated parser's finite state machine 123 | ** is encoded as an instance of the following structure. */ 124 | struct state { 125 | struct config *bp; /* The basis configurations for this state */ 126 | struct config *cfp; /* All configurations in this set */ 127 | int statenum; /* Sequential number for this state */ 128 | struct action *ap; /* List of actions for this state */ 129 | int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ 130 | int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ 131 | int iDfltReduce; /* Default action is to REDUCE by this rule */ 132 | struct rule *pDfltReduce;/* The default REDUCE rule. */ 133 | int autoReduce; /* True if this is an auto-reduce state */ 134 | }; 135 | #define NO_OFFSET (-2147483647) 136 | 137 | /* A followset propagation link indicates that the contents of one 138 | ** configuration followset should be propagated to another whenever 139 | ** the first changes. */ 140 | struct plink { 141 | struct config *cfp; /* The configuration to which linked */ 142 | struct plink *next; /* The next propagate link */ 143 | }; 144 | 145 | /* The state vector for the entire parser generator is recorded as 146 | ** follows. (LEMON uses no global variables and makes little use of 147 | ** static variables. Fields in the following structure can be thought 148 | ** of as begin global variables in the program.) */ 149 | struct lemon { 150 | struct state **sorted; /* Table of states sorted by state number */ 151 | struct rule *rule; /* List of all rules */ 152 | struct rule *startRule; /* First rule */ 153 | int nstate; /* Number of states */ 154 | int nxstate; /* nstate with tail degenerate states removed */ 155 | int nrule; /* Number of rules */ 156 | int nruleWithAction; /* Number of rules with actions */ 157 | int nsymbol; /* Number of terminal and nonterminal symbols */ 158 | int nterminal; /* Number of terminal symbols */ 159 | int minShiftReduce; /* Minimum shift-reduce action value */ 160 | int errAction; /* Error action value */ 161 | int accAction; /* Accept action value */ 162 | int noAction; /* No-op action value */ 163 | int minReduce; /* Minimum reduce action */ 164 | int maxAction; /* Maximum action value of any kind */ 165 | struct symbol **symbols; /* Sorted array of pointers to symbols */ 166 | int errorcnt; /* Number of errors */ 167 | struct symbol *errsym; /* The error symbol */ 168 | struct symbol *wildcard; /* Token that matches anything */ 169 | char *name; /* Name of the generated parser */ 170 | char *arg; /* Declaration of the 3rd argument to parser */ 171 | char *ctx; /* Declaration of 2nd argument to constructor */ 172 | char *tokentype; /* Type of terminal symbols in the parser stack */ 173 | char *vartype; /* The default type of non-terminal symbols */ 174 | char *start; /* Name of the start symbol for the grammar */ 175 | char *stacksize; /* Size of the parser stack */ 176 | char *include; /* Code to put at the start of the C file */ 177 | char *error; /* Code to execute when an error is seen */ 178 | char *overflow; /* Code to execute on a stack overflow */ 179 | char *failure; /* Code to execute on parser failure */ 180 | char *accept; /* Code to execute when the parser excepts */ 181 | char *extracode; /* Code appended to the generated file */ 182 | char *tokendest; /* Code to execute to destroy token data */ 183 | char *vardest; /* Code for the default non-terminal destructor */ 184 | char *filename; /* Name of the input file */ 185 | char *outname; /* Name of the current output file */ 186 | char *tokenprefix; /* A prefix added to token names in the .h file */ 187 | int nconflict; /* Number of parsing conflicts */ 188 | int nactiontab; /* Number of entries in the yy_action[] table */ 189 | int nlookaheadtab; /* Number of entries in yy_lookahead[] */ 190 | int tablesize; /* Total table size of all tables in bytes */ 191 | int basisflag; /* Print only basis configurations */ 192 | int printPreprocessed; /* Show preprocessor output on stdout */ 193 | int has_fallback; /* True if any %fallback is seen in the grammar */ 194 | int nolinenosflag; /* True if #line statements should not be printed */ 195 | char *argv0; /* Name of the program */ 196 | }; 197 | 198 | #define MemoryCheck(X) if((X)==0){ \ 199 | extern void memory_error(); \ 200 | memory_error(); \ 201 | } 202 | 203 | -------------------------------------------------------------------------------- /lemon/README.md: -------------------------------------------------------------------------------- 1 | # The Lemon Parser Generator 2 | 3 | The Lemon parser generator, created by [D. Richard Hipp], public domain. 4 | 5 | - [`/examples/`][examples] — user contributed examples. 6 | - [`/sliced/`][sliced] — [de-amalgamated] Lemon (split sources) to easen code study and porting. 7 | - [`lemon.c`][lemon.c] — the parser generator program. 8 | - [`lempar.c`][lempar.c] — the template for the parser generator. 9 | - [`lemon.md`][lemon.md] — Lemon documentation. 10 | 11 | ----- 12 | 13 | **Table of Contents** 14 | 15 | 16 | 17 | - [Meet Lemon](#meet-lemon) 18 | - [Lemon and SQLite](#lemon-and-sqlite) 19 | - [History of Lemon](#history-of-lemon) 20 | - [Files History and Sources](#files-history-and-sources) 21 | - [Changelog](#changelog) 22 | 23 | 24 | 25 | ----- 26 | 27 | # Meet Lemon 28 | 29 | - http://www.hwaci.com/sw/lemon/ 30 | 31 | The Lemon program is an [LALR(1)] [parser generator]. 32 | It takes a context free grammar and converts it into a subroutine that will parse a file using that grammar. 33 | Lemon is similar to the much more famous programs "[YACC]" and "[BISON]". 34 | But lemon is not compatible with either yacc or bison. 35 | There are several important differences: 36 | 37 | * Lemon using a different grammar syntax which is less prone to programming errors. 38 | * The parser generated by Lemon is both re-entrant and thread-safe. 39 | * Lemon includes the concept of a non-terminal destructor, which makes it much easier to write a parser that does not leak memory. 40 | 41 | The complete source code to the lemon parser generator is contained in two files. 42 | The file [`lemon.c`][lemon.c] is the parser generator program itself. 43 | A separate file [`lempar.c`][lempar.c] is the template for the parser subroutine that lemon generates. 44 | Documentation on lemon is also available. 45 | 46 | Both the source code to lemon itself and the code that lemon generates are in the public domain. 47 | 48 | To see an example of how to use lemon, see the source code to the [SQLite] database engine. 49 | Lemon is maintained as part of the SQLite project. 50 | 51 | 52 | # Lemon and SQLite 53 | 54 | - https://www.sqlite.org/lemon.html 55 | 56 | One of the advantages of hosting code generator tool as part of the project is that the tools can be optimized to serve specific needs of the overall project. 57 | Lemon has benefited from this effect. 58 | Over the years, the Lemon parser generator has been extended and enhanced to provide new capabilities and improved performance to SQLite. [...] 59 | 60 | The parsing of SQL statements is a significant consumer of CPU cycles in any SQL database engine. 61 | On-going efforts to optimize SQLite have caused the developers to spend a lot of time tweaking Lemon to generate faster parsers. 62 | These efforts have benefited all users of the Lemon parser generator, not just SQLite. 63 | But if Lemon had been a separately maintained tool, it would have been more difficulty to make coordinated changes to both SQLite and Lemon, and as a result not as much optimization would have been accomplished. 64 | Hence, the fact that the parser generator tool is included in the source tree for SQLite has turned out to be a net benefit for both the tool itself and for SQLite. 65 | 66 | 67 | # History of Lemon 68 | 69 | Lemon was original written by [D. Richard Hipp] (also the creator of SQLite) while he was in graduate school at Duke University between 1987 and 1992. 70 | The original creation date of Lemon has been lost, but was probably sometime around 1990. 71 | Lemon generates an LALR(1) parser. 72 | There was companion LL(1) parser generator tool named "Lime", but the source code for Lime has been lost. 73 | 74 | The Lemon source code was originally written as separate source files, and only later merged into a single "`lemon.c`" source file. 75 | 76 | The author of Lemon and SQLite (Hipp) reports that his C programming skills were greatly enhanced by studying [John Ousterhout]’s original source code to [Tcl]. 77 | Hipp discovered and studied Tcl in 1993. 78 | Lemon was written before then, and SQLite afterwards. 79 | There is a clear difference in the coding styles of these two products, with SQLite seeming to be cleaner, more readable, and easier to maintain. 80 | 81 | 82 | # Files History and Sources 83 | 84 | This folder contains the original Lemon source files taken from the [SQLite] project: 85 | 86 | - [`lemon.c`][lemon.c] — Taken from SQLite [`tool/lemon.c`][us lemon.c], check-in [`f2f279b2`][f2f279b2] (2021-10-04) 87 | - [`lempar.c`][lempar.c] — Taken from SQLite [`tool/lempar.c`][us lempar.c], check-in [`ba4fb518`][ba4fb518] (2021-11-09) 88 | 89 | And the [official Lemon documentation], ported to markdown by Tristano Ajmone: 90 | 91 | - [`lemon.md`][lemon.md] — Ported from SQLite [`doc/lemon.html`][us lemon.html], updated to check-in [`36624d37`][36624d37] (2021-03-28) 92 | 93 | > **NOTE** — The C sources are unaltered, except for minor whitespace modifications to pass our [EditorConfig] code-styles validation on Travis CI — i.e., stripping trailing whitespace, and other minor aesthetic tweaks that don't involve actual changes to the code. 94 | 95 | # Changelog 96 | 97 | In the following changelog, "upstream" refers to the [SQLite] project hosting the original Lemon sources. 98 | 99 | - **2021-12-16** 100 | + `lemon.c` updated to upstream check-in [`f2f279b2`][f2f279b2] (2021-10-04) — fix harmless static analyzer warnings. 101 | + `lempar.c` updated to upstream check-in [`ba4fb518`][ba4fb518] (2021-11-09) — fix so that Lemon can compile with `NDEBUG`. 102 | - **2021-07-14** 103 | + `lemon.c` compared to upstream check-in [`36624d37`][36624d37] (2021-03-28) — only whitespace noise changes. 104 | + `lempar.c` unchanged. 105 | + `lemon.md` updated to upstream check-in [`36624d37`][36624d37] (2021-03-28) — documents the `%token` directive. 106 | - **2021-02-10** 107 | + `lemon.c` updated to upstream check-in [`d1e22e2f`][d1e22e2f] (2021-01-07) — fix compiler warnings and typos. 108 | + `lempar.c` updated to upstream check-in [`203c049c`][203c049c] (2021-01-02) — improved. 109 | + `lemon.md` compared to upstream check-in [`2ffb2ffa`][2ffb2ffa] (2021-01-16) — no changes affecting markdown version. 110 | - **2020-09-12** 111 | + `lemon.c` updated to upstream check-in [`430c5d1d`][430c5d1d] (2020-09-05) — bug fix. 112 | + `lempar.c` updated to upstream check-in [`84d54eb3`][84d54eb3] (2020-09-01) — improved. 113 | + `lemon.md` updated to upstream check-in [`84d54eb3`][84d54eb3] (2020-09-01) — new contents. 114 | - **2020-01-05** 115 | + `lemon.c` updated to upstream check-in [`fccfb8a9`][fccfb8a9] (2019-12-19) 116 | + `lempar.c` updated to upstream check-in [`4d6d2fc0`][4d6d2fc0] (2019-12-11) 117 | - **2019-08-11** 118 | + `lemon.c` updated to upstream check-in [`2da0eea0`][2da0eea0] (2019-06-03) 119 | + `lempar.c` updated to upstream check-in [`9e664585`][9e664585] (2019-07-16) 120 | - **2019-05-31** 121 | + `lemon.c` updated to upstream check-in [`ca068d82`][ca068d82] (2019-05-10) 122 | - **2019-04-24** 123 | + `lemon.c` taken from upstream check-in [`1caff0fb`][1caff0fb] (2019-01-15) 124 | + `lempar.c` taken from upstream check-in [`70fe8ec2`][70fe8ec2] (2018-12-03) 125 | + `lemon.md` ported from upstream check-in [`9c9c46dc`][9c9c46dc] (2018-11-27) 126 | 127 | 130 | 131 | [de-amalgamated]: https://www.sqlite.org/amalgamation.html "Learn about amalgamation in the SQLite project" 132 | [LALR(1)]: https://en.wikipedia.org/wiki/LALR_parser "See Wikipedia page on LALR parser" 133 | [parser generator]: https://en.wikipedia.org/wiki/Compiler-compiler "See Wikipedia page on Compiler-compiler" 134 | 135 | 136 | 137 | [lemon.c]: ./lemon.c "View source" 138 | [lempar.c]: ./lempar.c "View source" 139 | [lemon.md]: ./lemon.md "View source" 140 | 141 | 142 | 143 | [examples]: ./examples/ "Navigate folder" 144 | [sliced]: ./sliced/ "Navigate folder" 145 | 146 | 147 | 148 | [us lemon.c]: https://www.sqlite.org/src/file/tool/lemon.c "View latest upstream source file version" 149 | [us lempar.c]: https://www.sqlite.org/src/file/tool/lempar.c "View latest upstream source file version" 150 | [us lemon.html]: https://www.sqlite.org/src/file/doc/lemon.html "View latest upstream source file version" 151 | [official Lemon documentation]: https://sqlite.org/src/doc/trunk/doc/lemon.html "View original HTML documentation" 152 | 153 | 154 | 155 | [1caff0fb]: https://www.sqlite.org/src/info/1caff0fb0b2051e2 "View upstream check-in 1caff0fb (2019-01-15)" 156 | [203c049c]: https://www.sqlite.org/src/info/203c049c66238041 "View upstream check-in 203c049c (2021-01-02)" 157 | [2da0eea0]: https://www.sqlite.org/src/info/2da0eea02d128c37 "View upstream check-in 2da0eea0 (2019-06-03)" 158 | [2ffb2ffa]: https://www.sqlite.org/src/info/2ffb2ffa0ea147ed "View upstream check-in 2ffb2ffa (2021-01-16)" 159 | [36624d37]: https://www.sqlite.org/src/info/36624d3740a8d095 "View upstream check-in 36624d37 (2021-03-28)" 160 | [430c5d1d]: https://www.sqlite.org/src/info/430c5d1da57af452 "View upstream check-in 430c5d1d (2020-09-05)" 161 | [4d6d2fc0]: https://www.sqlite.org/src/info/4d6d2fc046d586a1 "View upstream check-in 4d6d2fc0 (2019-12-11)" 162 | [70fe8ec2]: https://www.sqlite.org/src/info/70fe8ec2ae3099b8 "View upstream check-in 70fe8ec2 (2018-12-03)" 163 | [84d54eb3]: https://www.sqlite.org/src/info/84d54eb357161741 "View upstream check-in 84d54eb3 (2020-09-01)" 164 | [9c9c46dc]: https://www.sqlite.org/src/info/9c9c46dcbe92aeab "View upstream check-in 9c9c46dc (2018-11-27)" 165 | [9e664585]: https://www.sqlite.org/src/info/9e66458592d40fbd "View upstream check-in 9e664585 (2019-07-16)" 166 | [ba4fb518]: https://www.sqlite.org/src/info/ba4fb51853fbcb8c "View upstream check-in ba4fb518 (2021-11-09)" 167 | [ca068d82]: https://www.sqlite.org/src/info/ca068d82387fc3cd "View upstream check-in ca068d82 (2019-05-10)" 168 | [d1e22e2f]: https://www.sqlite.org/src/info/d1e22e2f76cce7eb "View upstream check-in d1e22e2f (2021-01-07)" 169 | [f2f279b2]: https://www.sqlite.org/src/info/f2f279b2cc1c8b3b "View upstream check-in f2f279b2 (2021-10-04)" 170 | [fccfb8a9]: https://www.sqlite.org/src/info/fccfb8a9ed3c1df9 "View upstream check-in fccfb8a9 (2019-12-19)" 171 | 172 | 173 | 174 | 175 | [SQLite]: http://www.sqlite.org/ "Visit SQLite website" 176 | [Bison]: https://www.gnu.org/software/bison/ "Visit GNU Bison website" 177 | [Yacc]: https://en.wikipedia.org/wiki/Yacc "Wikipedia page on Yacc" 178 | [Tcl]: https://www.tcl.tk/ "Visit Tcl website" 179 | [EditorConfig]: https://editorconfig.org/ "Visit EditorConfig website" 180 | 181 | 182 | 183 | [D. Richard Hipp]: http://www.hwaci.com/drh/ "Visit D. Richard Hipp's website" 184 | [John Ousterhout]: https://web.stanford.edu/~ouster/cgi-bin/home.php "Visit John Ousterhout's web page at Stanford University" 185 | 186 | 187 | -------------------------------------------------------------------------------- /lemon/sliced/action.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Routines processing parser actions in the LEMON parser generator. 3 | */ 4 | 5 | /* Allocate a new parser action */ 6 | static struct action *Action_new(void){ 7 | static struct action *actionfreelist = 0; 8 | struct action *newaction; 9 | 10 | if( actionfreelist==0 ){ 11 | int i; 12 | int amt = 100; 13 | actionfreelist = (struct action *)calloc(amt, sizeof(struct action)); 14 | if( actionfreelist==0 ){ 15 | fprintf(stderr,"Unable to allocate memory for a new parser action."); 16 | exit(1); 17 | } 18 | for(i=0; inext; 23 | return newaction; 24 | } 25 | 26 | /* Compare two actions for sorting purposes. Return negative, zero, or 27 | ** positive if the first action is less than, equal to, or greater than 28 | ** the first 29 | */ 30 | static int actioncmp( 31 | struct action *ap1, 32 | struct action *ap2 33 | ){ 34 | int rc; 35 | rc = ap1->sp->index - ap2->sp->index; 36 | if( rc==0 ){ 37 | rc = (int)ap1->type - (int)ap2->type; 38 | } 39 | if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ 40 | rc = ap1->x.rp->index - ap2->x.rp->index; 41 | } 42 | if( rc==0 ){ 43 | rc = (int) (ap2 - ap1); 44 | } 45 | return rc; 46 | } 47 | 48 | /* Sort parser actions */ 49 | static struct action *Action_sort( 50 | struct action *ap 51 | ){ 52 | ap = (struct action *)msort((char *)ap,(char **)&ap->next, 53 | (int(*)(const char*,const char*))actioncmp); 54 | return ap; 55 | } 56 | 57 | void Action_add( 58 | struct action **app, 59 | enum e_action type, 60 | struct symbol *sp, 61 | char *arg 62 | ){ 63 | struct action *newaction; 64 | newaction = Action_new(); 65 | newaction->next = *app; 66 | *app = newaction; 67 | newaction->type = type; 68 | newaction->sp = sp; 69 | newaction->spOpt = 0; 70 | if( type==SHIFT ){ 71 | newaction->x.stp = (struct state *)arg; 72 | }else{ 73 | newaction->x.rp = (struct rule *)arg; 74 | } 75 | } 76 | /********************** New code to implement the "acttab" module ***********/ 77 | /* 78 | ** This module implements routines use to construct the yy_action[] table. 79 | */ 80 | 81 | /* 82 | ** The state of the yy_action table under construction is an instance of 83 | ** the following structure. 84 | ** 85 | ** The yy_action table maps the pair (state_number, lookahead) into an 86 | ** action_number. The table is an array of integers pairs. The state_number 87 | ** determines an initial offset into the yy_action array. The lookahead 88 | ** value is then added to this initial offset to get an index X into the 89 | ** yy_action array. If the aAction[X].lookahead equals the value of the 90 | ** of the lookahead input, then the value of the action_number output is 91 | ** aAction[X].action. If the lookaheads do not match then the 92 | ** default action for the state_number is returned. 93 | ** 94 | ** All actions associated with a single state_number are first entered 95 | ** into aLookahead[] using multiple calls to acttab_action(). Then the 96 | ** actions for that single state_number are placed into the aAction[] 97 | ** array with a single call to acttab_insert(). The acttab_insert() call 98 | ** also resets the aLookahead[] array in preparation for the next 99 | ** state number. 100 | */ 101 | struct lookahead_action { 102 | int lookahead; /* Value of the lookahead token */ 103 | int action; /* Action to take on the given lookahead */ 104 | }; 105 | typedef struct acttab acttab; 106 | struct acttab { 107 | int nAction; /* Number of used slots in aAction[] */ 108 | int nActionAlloc; /* Slots allocated for aAction[] */ 109 | struct lookahead_action 110 | *aAction, /* The yy_action[] table under construction */ 111 | *aLookahead; /* A single new transaction set */ 112 | int mnLookahead; /* Minimum aLookahead[].lookahead */ 113 | int mnAction; /* Action associated with mnLookahead */ 114 | int mxLookahead; /* Maximum aLookahead[].lookahead */ 115 | int nLookahead; /* Used slots in aLookahead[] */ 116 | int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ 117 | int nterminal; /* Number of terminal symbols */ 118 | int nsymbol; /* total number of symbols */ 119 | }; 120 | 121 | /* Return the number of entries in the yy_action table */ 122 | #define acttab_lookahead_size(X) ((X)->nAction) 123 | 124 | /* The value for the N-th entry in yy_action */ 125 | #define acttab_yyaction(X,N) ((X)->aAction[N].action) 126 | 127 | /* The value for the N-th entry in yy_lookahead */ 128 | #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) 129 | 130 | /* Free all memory associated with the given acttab */ 131 | void acttab_free(acttab *p){ 132 | free( p->aAction ); 133 | free( p->aLookahead ); 134 | free( p ); 135 | } 136 | 137 | /* Allocate a new acttab structure */ 138 | acttab *acttab_alloc(int nsymbol, int nterminal){ 139 | acttab *p = (acttab *) calloc( 1, sizeof(*p) ); 140 | if( p==0 ){ 141 | fprintf(stderr,"Unable to allocate memory for a new acttab."); 142 | exit(1); 143 | } 144 | memset(p, 0, sizeof(*p)); 145 | p->nsymbol = nsymbol; 146 | p->nterminal = nterminal; 147 | return p; 148 | } 149 | 150 | /* Add a new action to the current transaction set. 151 | ** 152 | ** This routine is called once for each lookahead for a particular 153 | ** state. 154 | */ 155 | void acttab_action(acttab *p, int lookahead, int action){ 156 | if( p->nLookahead>=p->nLookaheadAlloc ){ 157 | p->nLookaheadAlloc += 25; 158 | p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, 159 | sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); 160 | if( p->aLookahead==0 ){ 161 | fprintf(stderr,"malloc failed\n"); 162 | exit(1); 163 | } 164 | } 165 | if( p->nLookahead==0 ){ 166 | p->mxLookahead = lookahead; 167 | p->mnLookahead = lookahead; 168 | p->mnAction = action; 169 | }else{ 170 | if( p->mxLookaheadmxLookahead = lookahead; 171 | if( p->mnLookahead>lookahead ){ 172 | p->mnLookahead = lookahead; 173 | p->mnAction = action; 174 | } 175 | } 176 | p->aLookahead[p->nLookahead].lookahead = lookahead; 177 | p->aLookahead[p->nLookahead].action = action; 178 | p->nLookahead++; 179 | } 180 | 181 | /* 182 | ** Add the transaction set built up with prior calls to acttab_action() 183 | ** into the current action table. Then reset the transaction set back 184 | ** to an empty set in preparation for a new round of acttab_action() calls. 185 | ** 186 | ** Return the offset into the action table of the new transaction. 187 | ** 188 | ** If the makeItSafe parameter is true, then the offset is chosen so that 189 | ** it is impossible to overread the yy_lookaside[] table regardless of 190 | ** the lookaside token. This is done for the terminal symbols, as they 191 | ** come from external inputs and can contain syntax errors. When makeItSafe 192 | ** is false, there is more flexibility in selecting offsets, resulting in 193 | ** a smaller table. For non-terminal symbols, which are never syntax errors, 194 | ** makeItSafe can be false. 195 | */ 196 | int acttab_insert(acttab *p, int makeItSafe){ 197 | int i, j, k, n, end; 198 | assert( p->nLookahead>0 ); 199 | 200 | /* Make sure we have enough space to hold the expanded action table 201 | ** in the worst case. The worst case occurs if the transaction set 202 | ** must be appended to the current action table 203 | */ 204 | n = p->nsymbol + 1; 205 | if( p->nAction + n >= p->nActionAlloc ){ 206 | int oldAlloc = p->nActionAlloc; 207 | p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; 208 | p->aAction = (struct lookahead_action *) realloc( p->aAction, 209 | sizeof(p->aAction[0])*p->nActionAlloc); 210 | if( p->aAction==0 ){ 211 | fprintf(stderr,"malloc failed\n"); 212 | exit(1); 213 | } 214 | for(i=oldAlloc; inActionAlloc; i++){ 215 | p->aAction[i].lookahead = -1; 216 | p->aAction[i].action = -1; 217 | } 218 | } 219 | 220 | /* Scan the existing action table looking for an offset that is a 221 | ** duplicate of the current transaction set. Fall out of the loop 222 | ** if and when the duplicate is found. 223 | ** 224 | ** i is the index in p->aAction[] where p->mnLookahead is inserted. 225 | */ 226 | end = makeItSafe ? p->mnLookahead : 0; 227 | for(i=p->nAction-1; i>=end; i--){ 228 | if( p->aAction[i].lookahead==p->mnLookahead ){ 229 | /* All lookaheads and actions in the aLookahead[] transaction 230 | ** must match against the candidate aAction[i] entry. */ 231 | if( p->aAction[i].action!=p->mnAction ) continue; 232 | for(j=0; jnLookahead; j++){ 233 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; 234 | if( k<0 || k>=p->nAction ) break; 235 | if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; 236 | if( p->aLookahead[j].action!=p->aAction[k].action ) break; 237 | } 238 | if( jnLookahead ) continue; 239 | 240 | /* No possible lookahead value that is not in the aLookahead[] 241 | ** transaction is allowed to match aAction[i] */ 242 | n = 0; 243 | for(j=0; jnAction; j++){ 244 | if( p->aAction[j].lookahead<0 ) continue; 245 | if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; 246 | } 247 | if( n==p->nLookahead ){ 248 | break; /* An exact match is found at offset i */ 249 | } 250 | } 251 | } 252 | 253 | /* If no existing offsets exactly match the current transaction, find an 254 | ** an empty offset in the aAction[] table in which we can add the 255 | ** aLookahead[] transaction. 256 | */ 257 | if( inAction, which means the 261 | ** transaction will be appended. */ 262 | i = makeItSafe ? p->mnLookahead : 0; 263 | for(; inActionAlloc - p->mxLookahead; i++){ 264 | if( p->aAction[i].lookahead<0 ){ 265 | for(j=0; jnLookahead; j++){ 266 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; 267 | if( k<0 ) break; 268 | if( p->aAction[k].lookahead>=0 ) break; 269 | } 270 | if( jnLookahead ) continue; 271 | for(j=0; jnAction; j++){ 272 | if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; 273 | } 274 | if( j==p->nAction ){ 275 | break; /* Fits in empty slots */ 276 | } 277 | } 278 | } 279 | } 280 | /* Insert transaction set at index i. */ 281 | #if 0 282 | printf("Acttab:"); 283 | for(j=0; jnLookahead; j++){ 284 | printf(" %d", p->aLookahead[j].lookahead); 285 | } 286 | printf(" inserted at %d\n", i); 287 | #endif 288 | for(j=0; jnLookahead; j++){ 289 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; 290 | p->aAction[k] = p->aLookahead[j]; 291 | if( k>=p->nAction ) p->nAction = k+1; 292 | } 293 | if( makeItSafe && i+p->nterminal>=p->nAction ) p->nAction = i+p->nterminal+1; 294 | p->nLookahead = 0; 295 | 296 | /* Return the offset that is added to the lookahead in order to get the 297 | ** index into yy_action of the action */ 298 | return i - p->mnLookahead; 299 | } 300 | 301 | /* 302 | ** Return the size of the action table without the trailing syntax error 303 | ** entries. 304 | */ 305 | int acttab_action_size(acttab *p){ 306 | int n = p->nAction; 307 | while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } 308 | return n; 309 | } 310 | 311 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lemon Grove 2 | [![Build Status][badge-travis]][project-travis] 3 | 4 | - https://github.com/tajmone/lemon-grove 5 | 6 | The original Lemon parser generator, along with sibling projects, forks and assets — all in one place. 7 | 8 | Project maintained by [Tristano Ajmone] since 2019/04/23. 9 | 10 | 11 | ----- 12 | 13 | **Table of Contents** 14 | 15 | 16 | 17 | - [About Lemon](#about-lemon) 18 | - [Project Contents](#project-contents) 19 | - [Lemon Links](#lemon-links) 20 | - [Official Lemon](#official-lemon) 21 | - [Lemon Forks](#lemon-forks) 22 | - [Lemonex](#lemonex) 23 | - [Cross-Language Lemons](#cross-language-lemons) 24 | - [Citron \(Swift\)](#citron-swift) 25 | - [Golemon \(Go\)](#golemon-go) 26 | - [jlemon \(Java\)](#jlemon-java) 27 | - [Lemon.JS](#lemonjs) 28 | - [Lemon PHP](#lemon-php) 29 | - [Lemon Rust](#lemon-rust) 30 | - [Lemon Ports](#lemon-ports) 31 | - [lemon-mint \(Rust\)](#lemon-mint-rust) 32 | - [pomelo \(Rust\)](#pomelo-rust) 33 | - [Lemon Inspired](#lemon-inspired) 34 | - [limecc \(Python\)](#limecc-python) 35 | - [Editors Support for Lemon](#editors-support-for-lemon) 36 | - [Atom](#atom) 37 | - [Emacs](#emacs) 38 | - [Sublime Text](#sublime-text) 39 | - [VSCode](#vscode) 40 | - [Vim](#vim) 41 | - [Lemon Tools](#lemon-tools) 42 | - [Lemon Slicer](#lemon-slicer) 43 | - [Good Reads](#good-reads) 44 | - [Compiler Design in C](#compiler-design-in-c) 45 | 46 | 47 | 48 | ----- 49 | 50 | 51 | # About Lemon 52 | 53 | - http://www.hwaci.com/sw/lemon/ 54 | 55 | The Lemon program is an [LALR(1)] [parser generator], written in C89 by [Richard Hipp] during the late 1980s. 56 | It takes a context free grammar and converts it into a subroutine that will parse a file using that grammar. 57 | Lemon is similar to the much more famous programs "YACC" and "BISON", but it's not compatible with them. 58 | There are several important differences that distinguish Lemon from Yacc and Bison: 59 | 60 | - Lemon uses a different context free grammar syntax, which is less prone to programming errors. 61 | - The parser generated by Lemon is both re-entrant and thread-safe. 62 | - In Yacc and Bison, the parser calls the tokenizer (aka _push parsing_ model). 63 | In Lemon, it's the tokenizer that calls the parser (aka _pull parsing_ model). 64 | - Lemon includes the concept of a _non-terminal destructor_, which simplifies writing parsers that don't leak memory. 65 | - Lemon doesn't use global variables. 66 | Yacc and Bison use global variables to pass information between the tokenizer and the parser. 67 | - Lemon allows multiple parsers to be running simultaneously; Yacc and Bison do not. 68 | 69 | Lemon's innovative design inspired Bison to embody some of these features in the course of time. 70 | For a detailed history of Lemon, see the _[History of Lemon]_ section in [`lemon/lemon.md`][lemon.md]. 71 | 72 | The complete source code to the Lemon parser generator is contained in two files: [`lemon.c`][lemon.c] and [`lempar.c`][lempar.c]. 73 | 74 | 75 | # Project Contents 76 | 77 | Currently, this repository contains only the official version of Lemon (public domain) taken from the [SQLite] project: 78 | 79 | - [`/lemon/`](./lemon/) — original Lemon sources from [SQLite]: 80 | + [`/examples/`](./lemon/examples) — user-contributed examples. 81 | + [`/sliced/`][sliced/] — [de-amalgamated] Lemon (split sources). 82 | + [`lemon.c`][lemon.c] — parser generator program. 83 | + [`lempar.c`][lempar.c] — template for the parser generator. 84 | + [`lemon.md`][lemon.md] — documentation. 85 | 86 | The `examples/` subfolder contains third-party examples based on this version of Lemon (the examples are not from the SQLite project). 87 | 88 | There are many variations of the original Lemon code circulating over the Internet, and it's quite common to find tutorials and examples that rely on tweaked versions of Lemon, often in conjunction with other tools (like [re2c]). 89 | 90 | To avoid confusion, in this project all examples are kept together with the Lemon version for which they were designed. Different versions of Lemon (forks, ports, etc.) will be kept in separate folders, and each version will have its own `examples/` subfolder. 91 | The idea is to keep the Lemon Grove tidy and well structured, so that its users can easily distinguish which version of Lemon is where, and how to find examples for that specific version. 92 | 93 | The [`sliced/` subfolder][sliced/] contains a [de-amalgamated] version of Lemon, i.e. the single `lemon.c` source file is split into separate modules (23 files), like it used to be originally. 94 | Anyone interested in studying or porting the Lemon source code will find it easier to work on these individual modules, instead of the official single-file source (>5600 lines). 95 | 96 | Since de-amalgamation is done via a custom tool ([Lemon Slicer]) and derived from the code found in the `lemon/` folder, the Lemon sources in the `sliced/` folder will always mirror the code from the `lemon/` folder. 97 | Whenever the latter is updated, the former gets updated too. 98 | 99 | # Lemon Links 100 | 101 | External links to useful third party Lemon-assets. 102 | 103 | - [Lemon on Wikipedia] 104 | 105 | ## Official Lemon 106 | 107 | The official Lemon code, actively maintained as part of the [SQLite] project. 108 | 109 | - [Lemon homepage] 110 | - [Lemon documentation] 111 | - Upstream Lemon sources on [SQLite] repository ([Fossil]): 112 | + [`lemon.c`](https://www.sqlite.org/src/file/tool/lemon.c) 113 | + [`lempar.c`](https://www.sqlite.org/src/file/tool/lempar.c) 114 | + [`lemon.html`](https://sqlite.org/src/doc/trunk/doc/lemon.html) 115 | 116 | ## Lemon Forks 117 | 118 | Adaptations of Lemon that add new functionality or provide native integration with other tools. 119 | 120 | ### Lemonex 121 | 122 | - https://github.com/renjipanicker/lemonex 123 | 124 | Created by [@renjipanicker] in 2015, public domain. 125 | 126 | Lemonex is an extension to the Lemon parser, developed to include a built-in lexer. 127 | 128 | ## Cross-Language Lemons 129 | 130 | Lemon has been adapted to produce parser generators in other programming languages too, by tweaking its C source code and creating an _ad hoc_ version of `lempar.c` in the target language. 131 | 132 | ### Citron (Swift) 133 | 134 | - https://github.com/roop/citron 135 | - https://roopc.net/citron/ 136 | 137 | By [Roopesh Chander] in 2017, MIT licensed. 138 | 139 | Citron is an LALR parser generator for Swift, based on the Lemon engine. 140 | 141 | ### Golemon (Go) 142 | 143 | - https://github.com/nsf/golemon 144 | 145 | Created by [@nsf] in 2010, public domain. 146 | 147 | Outdate and no longer maintained, but a good starting point for anyone willing to revive the project. 148 | 149 | ### jlemon (Java) 150 | 151 | - https://github.com/gwenn/jlemon 152 | 153 | Created by [@gwenn] in 2017, released into the public domain via [Unlicense]. 154 | 155 | A fork of the LEMON parser generator that generates Java code and the associated SQL parser. 156 | 157 | ### Lemon.JS 158 | 159 | - https://github.com/sormy/lemon-js 160 | 161 | Created by [Artem Butusov] in 2017, public domain. 162 | 163 | ### Lemon PHP 164 | 165 | - https://github.com/wez/lemon-php 166 | 167 | Created by [Wez Furlong] in 2006, last updated in 2012, BSD-like license. 168 | 169 | ### Lemon Rust 170 | 171 | - https://github.com/rodrigorc/lemon_rust 172 | 173 | Created by [Rodrigo Rivas Costa] in 2015, released under Apache-2.0 license. 174 | 175 | Lemon Rust is now deprecated in favour of [pomelo], by same author. 176 | 177 | 178 | ## Lemon Ports 179 | 180 | Lemon implementations in other programming languages. 181 | 182 | ### lemon-mint (Rust) 183 | 184 | - https://github.com/jeremiah-shaulov/lemon-mint 185 | - https://lib.rs/crates/lemon-mint 186 | 187 | By [Jeremiah Shaulov], released under MIT License. 188 | 189 | Implementation of the Lemon parser generator as a Rust library with API. 190 | 191 | ### pomelo (Rust) 192 | 193 | - https://github.com/rodrigorc/pomelo 194 | - https://crates.io/crates/pomelo 195 | 196 | By [Rodrigo Rivas Costa], released under dual license — MIT or Apache-2.0. 197 | 198 | Implementation of the Lemon parser generator as a Rust procedural macro. 199 | 200 | ## Lemon Inspired 201 | 202 | Lemon has played an influential role in the development of parser generators, serving as a model for the creation of similar tools. Here are links to some of them. 203 | 204 | ### limecc (Python) 205 | 206 | - https://github.com/avakar/limecc 207 | 208 | By [Martin Vejnár], Boost Software License 1.0. 209 | 210 | limecc is a lexer and parser generator similar to other tools like Yacc, Bison and especially Lemon from which limecc sources inspiration. Grammars are written in a language called Lime, which describes lexical tokens, grammar productions, and semantic actions. The generator produces C++ code for the corresponding lexer and parser. 211 | 212 | ## Editors Support for Lemon 213 | 214 | Packages/plugins for adding support for Lemon grammar files to various editors. 215 | 216 | Missing packages? Please add them via pull request, or [open an issue] and provide a link. 217 | 218 | ### Atom 219 | 220 | - https://github.com/kyursen/language-lemon 221 | 222 | By [Yursen Kim], MIT License. 223 | 224 | > Adds syntax highlighting to Lemon Parser Generator files in Atom. 225 | 226 | ### Emacs 227 | 228 | - https://github.com/mooz/lemon-mode 229 | 230 | By [Masafumi Oyamada], GNU GPL v3. 231 | 232 | > Emacs major mode for editing LEMON grammar files. 233 | 234 | ### Sublime Text 235 | 236 | - https://github.com/ksherlock/sublime-lemon 237 | 238 | By [@ksherlock], CC0 1.0 Universal. 239 | 240 | > Sublime Text 3 syntax file for the Lemon parser generator. 241 | 242 | ### VSCode 243 | 244 | - https://github.com/sergeyklay/lemon-vscode 245 | 246 | By [Serghei Iakovlev], MIT License. 247 | 248 | > Lemon Parser Generator syntax highlighting for VSCode. 249 | 250 | ### Vim 251 | 252 | - https://github.com/dccmx/vim-lemon-syntax 253 | - https://github.com/vim-scripts/lemon.vim 254 | 255 | By [@dccmx], MIT License. 256 | 257 | > Crappy syntax highlighting in Vim for Lemon Parser Generator grammars. 258 | 259 | ## Lemon Tools 260 | 261 | Links to some Lemon-related tools. 262 | 263 | ### Lemon Slicer 264 | 265 | - https://github.com/tajmone/lemon-slicer 266 | 267 | Lemon Slicer is a dedicated tool to de-amalgamate the "`lemon.c`" source file into separate modules. 268 | It was created specifically for the Lemon Grove project. 269 | 270 | By [Tristano Ajmone], MIT License. 271 | 272 | # Good Reads 273 | 274 | Links to useful books, articles and tutorials on the topics of lexing and parsing. 275 | 276 | ## Compiler Design in C 277 | 278 | Free PDF book + sources, 984 pp. 279 | 280 | - https://holub.com/compiler/ 281 | 282 | Originally published in 1990 by [Prentice-Hall Inc.], _Compiler Design in C_, by [Allen Holub], is an excellent book on the topic. Written in plain language, this book takes the reader through a 984 pages long journey on how to build a compiler, step by step, introducing and explaining each compiler component in detail, and providing source code examples of each implementation step. 283 | 284 | Since the book is now out of print, the author has generously made it available for free download on his website, in PDF format, including all the source code files and the release notes. 285 | 286 | [Prentice-Hall Inc.]: http://www.prenticehall.com/ 287 | 288 | 291 | 292 | [de-amalgamated]: https://www.sqlite.org/amalgamation.html "Learn about amalgamation in the SQLite project" 293 | 294 | 295 | 296 | [open an issue]: https://github.com/tajmone/lemon-grove/issues/new "Click to open a new issue..." 297 | [badge-travis]: https://travis-ci.com/tajmone/lemon-grove.svg?branch=master 298 | [project-travis]: https://travis-ci.com/tajmone/lemon-grove 299 | 300 | [History of Lemon]: ./lemon/lemon.md#history-of-lemon "Learn more about the history of Lemon" 301 | 302 | 303 | 304 | [sliced/]: ./lemon/sliced/ "Go to the de-amalgamated Lemon source code" 305 | 306 | [lemon.md]: ./lemon/lemon.md "Lemon documentation" 307 | [lemon.c]: ./lemon/lemon.c "View source file" 308 | [lempar.c]: ./lemon/lempar.c "View source file" 309 | 310 | 311 | 312 | [pomelo]: #pomelo-rust "Jump to pomelo review" 313 | [Lemon Slicer]: #lemon-slicer "Jump to Lemon Slicer review" 314 | 315 | 316 | 317 | 318 | [LALR(1)]: https://en.wikipedia.org/wiki/LALR_parser "See Wikipedia page on LALR parser" 319 | [parser generator]: https://en.wikipedia.org/wiki/Compiler-compiler "See Wikipedia page on Compiler-compiler" 320 | 321 | 322 | 323 | [Lemon homepage]: http://www.hwaci.com/sw/lemon/ "Visit the official Lemon homepage" 324 | [Lemon documentation]: https://sqlite.org/src/doc/trunk/doc/lemon.html "Read the official Lemon documentation" 325 | [Lemon on Wikipedia]: https://en.wikipedia.org/wiki/Lemon_Parser_Generator "Read the Wikepida page for Lemon Parser Generator" 326 | 327 | 328 | 329 | [Fossil]: https://www.fossil-scm.org/ "Visit Fossil website" 330 | [re2c]: http://re2c.org/ "Visit re2c website" 331 | [SQLite]: https://www.sqlite.org/index.html "Visit SQLite website" 332 | 333 | 334 | 335 | [Unlicense]: https://unlicense.org/ "Visit unlicense.org" 336 | 337 | 338 | 339 | [@dccmx]: https://github.com/dccmx "View @dccmx's GitHub profile" 340 | [@gwenn]: https://github.com/gwenn "View gwenn's GitHub profile" 341 | [@ksherlock]: https://github.com/ksherlock "View @ksherlock's GitHub profile" 342 | [@nsf]: https://github.com/nsf "View nsf's GitHub profile" 343 | [@renjipanicker]: https://github.com/renjipanicker "View @renjipanicker's GitHub profile" 344 | [@sergeyklay]: https://github.com/sergeyklay "View Serghei Iakovlev's GitHub profile" 345 | [Allen Holub]: https://holub.com "Visit Allen Holub's website" 346 | [Artem Butusov]: https://github.com/sormy "View Artem Butusov's GitHub profile" 347 | [Jeremiah Shaulov]: https://github.com/jeremiah-shaulov "View Jeremiah Shaulov's GitHub profile" 348 | [Martin Vejnár]: https://github.com/avakar "View Martin Vejnár's GitHub profile" 349 | [Masafumi Oyamada]: http://mooz.github.io/ "Visit Masafumi Oyamada's website" 350 | [Richard Hipp]: http://www.hwaci.com/drh/ "Visit D. Richard Hipp's website" 351 | [Rodrigo Rivas Costa]: https://github.com/rodrigorc "View Rodrigo Rivas Costa's GitHub profile" 352 | [Roopesh Chander]: https://github.com/roop "View Roopesh Chander's GitHub profile" 353 | [Serghei Iakovlev]: https://github.com/sergeyklay "View Serghei Iakovlev's GitHub profile" 354 | [Tristano Ajmone]: https://github.com/tajmone "View Tristano Ajmone's GitHub profile" 355 | [Wez Furlong]: https://github.com/wez "View Wez Furlong's GitHub profile" 356 | [Yursen Kim]: https://github.com/kyursen "View Yursen Kim's GitHub profile" 357 | 358 | 359 | 360 | -------------------------------------------------------------------------------- /lemon/sliced/build.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Routines to construction the finite state machine for the LEMON 3 | ** parser generator. 4 | */ 5 | 6 | /* Find a precedence symbol of every rule in the grammar. 7 | ** 8 | ** Those rules which have a precedence symbol coded in the input 9 | ** grammar using the "[symbol]" construct will already have the 10 | ** rp->precsym field filled. Other rules take as their precedence 11 | ** symbol the first RHS symbol with a defined precedence. If there 12 | ** are not RHS symbols with a defined precedence, the precedence 13 | ** symbol field is left blank. 14 | */ 15 | void FindRulePrecedences(struct lemon *xp) 16 | { 17 | struct rule *rp; 18 | for(rp=xp->rule; rp; rp=rp->next){ 19 | if( rp->precsym==0 ){ 20 | int i, j; 21 | for(i=0; inrhs && rp->precsym==0; i++){ 22 | struct symbol *sp = rp->rhs[i]; 23 | if( sp->type==MULTITERMINAL ){ 24 | for(j=0; jnsubsym; j++){ 25 | if( sp->subsym[j]->prec>=0 ){ 26 | rp->precsym = sp->subsym[j]; 27 | break; 28 | } 29 | } 30 | }else if( sp->prec>=0 ){ 31 | rp->precsym = rp->rhs[i]; 32 | } 33 | } 34 | } 35 | } 36 | return; 37 | } 38 | 39 | /* Find all nonterminals which will generate the empty string. 40 | ** Then go back and compute the first sets of every nonterminal. 41 | ** The first set is the set of all terminal symbols which can begin 42 | ** a string generated by that nonterminal. 43 | */ 44 | void FindFirstSets(struct lemon *lemp) 45 | { 46 | int i, j; 47 | struct rule *rp; 48 | int progress; 49 | 50 | for(i=0; insymbol; i++){ 51 | lemp->symbols[i]->lambda = LEMON_FALSE; 52 | } 53 | for(i=lemp->nterminal; insymbol; i++){ 54 | lemp->symbols[i]->firstset = SetNew(); 55 | } 56 | 57 | /* First compute all lambdas */ 58 | do{ 59 | progress = 0; 60 | for(rp=lemp->rule; rp; rp=rp->next){ 61 | if( rp->lhs->lambda ) continue; 62 | for(i=0; inrhs; i++){ 63 | struct symbol *sp = rp->rhs[i]; 64 | assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); 65 | if( sp->lambda==LEMON_FALSE ) break; 66 | } 67 | if( i==rp->nrhs ){ 68 | rp->lhs->lambda = LEMON_TRUE; 69 | progress = 1; 70 | } 71 | } 72 | }while( progress ); 73 | 74 | /* Now compute all first sets */ 75 | do{ 76 | struct symbol *s1, *s2; 77 | progress = 0; 78 | for(rp=lemp->rule; rp; rp=rp->next){ 79 | s1 = rp->lhs; 80 | for(i=0; inrhs; i++){ 81 | s2 = rp->rhs[i]; 82 | if( s2->type==TERMINAL ){ 83 | progress += SetAdd(s1->firstset,s2->index); 84 | break; 85 | }else if( s2->type==MULTITERMINAL ){ 86 | for(j=0; jnsubsym; j++){ 87 | progress += SetAdd(s1->firstset,s2->subsym[j]->index); 88 | } 89 | break; 90 | }else if( s1==s2 ){ 91 | if( s1->lambda==LEMON_FALSE ) break; 92 | }else{ 93 | progress += SetUnion(s1->firstset,s2->firstset); 94 | if( s2->lambda==LEMON_FALSE ) break; 95 | } 96 | } 97 | } 98 | }while( progress ); 99 | return; 100 | } 101 | 102 | /* Compute all LR(0) states for the grammar. Links 103 | ** are added to between some states so that the LR(1) follow sets 104 | ** can be computed later. 105 | */ 106 | PRIVATE struct state *getstate(struct lemon *); /* forward reference */ 107 | void FindStates(struct lemon *lemp) 108 | { 109 | struct symbol *sp; 110 | struct rule *rp; 111 | 112 | Configlist_init(); 113 | 114 | /* Find the start symbol */ 115 | if( lemp->start ){ 116 | sp = Symbol_find(lemp->start); 117 | if( sp==0 ){ 118 | ErrorMsg(lemp->filename,0, 119 | "The specified start symbol \"%s\" is not " 120 | "in a nonterminal of the grammar. \"%s\" will be used as the start " 121 | "symbol instead.",lemp->start,lemp->startRule->lhs->name); 122 | lemp->errorcnt++; 123 | sp = lemp->startRule->lhs; 124 | } 125 | }else if( lemp->startRule ){ 126 | sp = lemp->startRule->lhs; 127 | }else{ 128 | ErrorMsg(lemp->filename,0,"Internal error - no start rule\n"); 129 | exit(1); 130 | } 131 | 132 | /* Make sure the start symbol doesn't occur on the right-hand side of 133 | ** any rule. Report an error if it does. (YACC would generate a new 134 | ** start symbol in this case.) */ 135 | for(rp=lemp->rule; rp; rp=rp->next){ 136 | int i; 137 | for(i=0; inrhs; i++){ 138 | if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ 139 | ErrorMsg(lemp->filename,0, 140 | "The start symbol \"%s\" occurs on the " 141 | "right-hand side of a rule. This will result in a parser which " 142 | "does not work properly.",sp->name); 143 | lemp->errorcnt++; 144 | } 145 | } 146 | } 147 | 148 | /* The basis configuration set for the first state 149 | ** is all rules which have the start symbol as their 150 | ** left-hand side */ 151 | for(rp=sp->rule; rp; rp=rp->nextlhs){ 152 | struct config *newcfp; 153 | rp->lhsStart = 1; 154 | newcfp = Configlist_addbasis(rp,0); 155 | SetAdd(newcfp->fws,0); 156 | } 157 | 158 | /* Compute the first state. All other states will be 159 | ** computed automatically during the computation of the first one. 160 | ** The returned pointer to the first state is not used. */ 161 | (void)getstate(lemp); 162 | return; 163 | } 164 | 165 | /* Return a pointer to a state which is described by the configuration 166 | ** list which has been built from calls to Configlist_add. 167 | */ 168 | PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ 169 | PRIVATE struct state *getstate(struct lemon *lemp) 170 | { 171 | struct config *cfp, *bp; 172 | struct state *stp; 173 | 174 | /* Extract the sorted basis of the new state. The basis was constructed 175 | ** by prior calls to "Configlist_addbasis()". */ 176 | Configlist_sortbasis(); 177 | bp = Configlist_basis(); 178 | 179 | /* Get a state with the same basis */ 180 | stp = State_find(bp); 181 | if( stp ){ 182 | /* A state with the same basis already exists! Copy all the follow-set 183 | ** propagation links from the state under construction into the 184 | ** preexisting state, then return a pointer to the preexisting state */ 185 | struct config *x, *y; 186 | for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ 187 | Plink_copy(&y->bplp,x->bplp); 188 | Plink_delete(x->fplp); 189 | x->fplp = x->bplp = 0; 190 | } 191 | cfp = Configlist_return(); 192 | Configlist_eat(cfp); 193 | }else{ 194 | /* This really is a new state. Construct all the details */ 195 | Configlist_closure(lemp); /* Compute the configuration closure */ 196 | Configlist_sort(); /* Sort the configuration closure */ 197 | cfp = Configlist_return(); /* Get a pointer to the config list */ 198 | stp = State_new(); /* A new state structure */ 199 | MemoryCheck(stp); 200 | stp->bp = bp; /* Remember the configuration basis */ 201 | stp->cfp = cfp; /* Remember the configuration closure */ 202 | stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ 203 | stp->ap = 0; /* No actions, yet. */ 204 | State_insert(stp,stp->bp); /* Add to the state table */ 205 | buildshifts(lemp,stp); /* Recursively compute successor states */ 206 | } 207 | return stp; 208 | } 209 | 210 | /* 211 | ** Return true if two symbols are the same. 212 | */ 213 | int same_symbol(struct symbol *a, struct symbol *b) 214 | { 215 | int i; 216 | if( a==b ) return 1; 217 | if( a->type!=MULTITERMINAL ) return 0; 218 | if( b->type!=MULTITERMINAL ) return 0; 219 | if( a->nsubsym!=b->nsubsym ) return 0; 220 | for(i=0; insubsym; i++){ 221 | if( a->subsym[i]!=b->subsym[i] ) return 0; 222 | } 223 | return 1; 224 | } 225 | 226 | /* Construct all successor states to the given state. A "successor" 227 | ** state is any state which can be reached by a shift action. 228 | */ 229 | PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) 230 | { 231 | struct config *cfp; /* For looping thru the config closure of "stp" */ 232 | struct config *bcfp; /* For the inner loop on config closure of "stp" */ 233 | struct config *newcfg; /* */ 234 | struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ 235 | struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ 236 | struct state *newstp; /* A pointer to a successor state */ 237 | 238 | /* Each configuration becomes complete after it contributes to a successor 239 | ** state. Initially, all configurations are incomplete */ 240 | for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; 241 | 242 | /* Loop through all configurations of the state "stp" */ 243 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ 244 | if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ 245 | if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ 246 | Configlist_reset(); /* Reset the new config set */ 247 | sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ 248 | 249 | /* For every configuration in the state "stp" which has the symbol "sp" 250 | ** following its dot, add the same configuration to the basis set under 251 | ** construction but with the dot shifted one symbol to the right. */ 252 | for(bcfp=cfp; bcfp; bcfp=bcfp->next){ 253 | if( bcfp->status==COMPLETE ) continue; /* Already used */ 254 | if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ 255 | bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ 256 | if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ 257 | bcfp->status = COMPLETE; /* Mark this config as used */ 258 | newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); 259 | Plink_add(&newcfg->bplp,bcfp); 260 | } 261 | 262 | /* Get a pointer to the state described by the basis configuration set 263 | ** constructed in the preceding loop */ 264 | newstp = getstate(lemp); 265 | 266 | /* The state "newstp" is reached from the state "stp" by a shift action 267 | ** on the symbol "sp" */ 268 | if( sp->type==MULTITERMINAL ){ 269 | int i; 270 | for(i=0; insubsym; i++){ 271 | Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); 272 | } 273 | }else{ 274 | Action_add(&stp->ap,SHIFT,sp,(char *)newstp); 275 | } 276 | } 277 | } 278 | 279 | /* 280 | ** Construct the propagation links 281 | */ 282 | void FindLinks(struct lemon *lemp) 283 | { 284 | int i; 285 | struct config *cfp, *other; 286 | struct state *stp; 287 | struct plink *plp; 288 | 289 | /* Housekeeping detail: 290 | ** Add to every propagate link a pointer back to the state to 291 | ** which the link is attached. */ 292 | for(i=0; instate; i++){ 293 | stp = lemp->sorted[i]; 294 | for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ 295 | cfp->stp = stp; 296 | } 297 | } 298 | 299 | /* Convert all backlinks into forward links. Only the forward 300 | ** links are used in the follow-set computation. */ 301 | for(i=0; instate; i++){ 302 | stp = lemp->sorted[i]; 303 | for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ 304 | for(plp=cfp->bplp; plp; plp=plp->next){ 305 | other = plp->cfp; 306 | Plink_add(&other->fplp,cfp); 307 | } 308 | } 309 | } 310 | } 311 | 312 | /* Compute all followsets. 313 | ** 314 | ** A followset is the set of all symbols which can come immediately 315 | ** after a configuration. 316 | */ 317 | void FindFollowSets(struct lemon *lemp) 318 | { 319 | int i; 320 | struct config *cfp; 321 | struct plink *plp; 322 | int progress; 323 | int change; 324 | 325 | for(i=0; instate; i++){ 326 | assert( lemp->sorted[i]!=0 ); 327 | for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 328 | cfp->status = INCOMPLETE; 329 | } 330 | } 331 | 332 | do{ 333 | progress = 0; 334 | for(i=0; instate; i++){ 335 | assert( lemp->sorted[i]!=0 ); 336 | for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 337 | if( cfp->status==COMPLETE ) continue; 338 | for(plp=cfp->fplp; plp; plp=plp->next){ 339 | change = SetUnion(plp->cfp->fws,cfp->fws); 340 | if( change ){ 341 | plp->cfp->status = INCOMPLETE; 342 | progress = 1; 343 | } 344 | } 345 | cfp->status = COMPLETE; 346 | } 347 | } 348 | }while( progress ); 349 | } 350 | 351 | static int resolve_conflict(struct action *,struct action *); 352 | 353 | /* Compute the reduce actions, and resolve conflicts. 354 | */ 355 | void FindActions(struct lemon *lemp) 356 | { 357 | int i,j; 358 | struct config *cfp; 359 | struct state *stp; 360 | struct symbol *sp; 361 | struct rule *rp; 362 | 363 | /* Add all of the reduce actions 364 | ** A reduce action is added for each element of the followset of 365 | ** a configuration which has its dot at the extreme right. 366 | */ 367 | for(i=0; instate; i++){ /* Loop over all states */ 368 | stp = lemp->sorted[i]; 369 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ 370 | if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ 371 | for(j=0; jnterminal; j++){ 372 | if( SetFind(cfp->fws,j) ){ 373 | /* Add a reduce action to the state "stp" which will reduce by the 374 | ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ 375 | Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); 376 | } 377 | } 378 | } 379 | } 380 | } 381 | 382 | /* Add the accepting token */ 383 | if( lemp->start ){ 384 | sp = Symbol_find(lemp->start); 385 | if( sp==0 ){ 386 | if( lemp->startRule==0 ){ 387 | fprintf(stderr, "internal error on source line %d: no start rule\n", 388 | __LINE__); 389 | exit(1); 390 | } 391 | sp = lemp->startRule->lhs; 392 | } 393 | }else{ 394 | sp = lemp->startRule->lhs; 395 | } 396 | /* Add to the first state (which is always the starting state of the 397 | ** finite state machine) an action to ACCEPT if the lookahead is the 398 | ** start nonterminal. */ 399 | Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); 400 | 401 | /* Resolve conflicts */ 402 | for(i=0; instate; i++){ 403 | struct action *ap, *nap; 404 | stp = lemp->sorted[i]; 405 | /* assert( stp->ap ); */ 406 | stp->ap = Action_sort(stp->ap); 407 | for(ap=stp->ap; ap && ap->next; ap=ap->next){ 408 | for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ 409 | /* The two actions "ap" and "nap" have the same lookahead. 410 | ** Figure out which one should be used */ 411 | lemp->nconflict += resolve_conflict(ap,nap); 412 | } 413 | } 414 | } 415 | 416 | /* Report an error for each rule that can never be reduced. */ 417 | for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; 418 | for(i=0; instate; i++){ 419 | struct action *ap; 420 | for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ 421 | if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; 422 | } 423 | } 424 | for(rp=lemp->rule; rp; rp=rp->next){ 425 | if( rp->canReduce ) continue; 426 | ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); 427 | lemp->errorcnt++; 428 | } 429 | } 430 | 431 | /* Resolve a conflict between the two given actions. If the 432 | ** conflict can't be resolved, return non-zero. 433 | ** 434 | ** NO LONGER TRUE: 435 | ** To resolve a conflict, first look to see if either action 436 | ** is on an error rule. In that case, take the action which 437 | ** is not associated with the error rule. If neither or both 438 | ** actions are associated with an error rule, then try to 439 | ** use precedence to resolve the conflict. 440 | ** 441 | ** If either action is a SHIFT, then it must be apx. This 442 | ** function won't work if apx->type==REDUCE and apy->type==SHIFT. 443 | */ 444 | static int resolve_conflict( 445 | struct action *apx, 446 | struct action *apy 447 | ){ 448 | struct symbol *spx, *spy; 449 | int errcnt = 0; 450 | assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ 451 | if( apx->type==SHIFT && apy->type==SHIFT ){ 452 | apy->type = SSCONFLICT; 453 | errcnt++; 454 | } 455 | if( apx->type==SHIFT && apy->type==REDUCE ){ 456 | spx = apx->sp; 457 | spy = apy->x.rp->precsym; 458 | if( spy==0 || spx->prec<0 || spy->prec<0 ){ 459 | /* Not enough precedence information. */ 460 | apy->type = SRCONFLICT; 461 | errcnt++; 462 | }else if( spx->prec>spy->prec ){ /* higher precedence wins */ 463 | apy->type = RD_RESOLVED; 464 | }else if( spx->precprec ){ 465 | apx->type = SH_RESOLVED; 466 | }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ 467 | apy->type = RD_RESOLVED; /* associativity */ 468 | }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ 469 | apx->type = SH_RESOLVED; 470 | }else{ 471 | assert( spx->prec==spy->prec && spx->assoc==NONE ); 472 | apx->type = ERROR; 473 | } 474 | }else if( apx->type==REDUCE && apy->type==REDUCE ){ 475 | spx = apx->x.rp->precsym; 476 | spy = apy->x.rp->precsym; 477 | if( spx==0 || spy==0 || spx->prec<0 || 478 | spy->prec<0 || spx->prec==spy->prec ){ 479 | apy->type = RRCONFLICT; 480 | errcnt++; 481 | }else if( spx->prec>spy->prec ){ 482 | apy->type = RD_RESOLVED; 483 | }else if( spx->precprec ){ 484 | apx->type = RD_RESOLVED; 485 | } 486 | }else{ 487 | assert( 488 | apx->type==SH_RESOLVED || 489 | apx->type==RD_RESOLVED || 490 | apx->type==SSCONFLICT || 491 | apx->type==SRCONFLICT || 492 | apx->type==RRCONFLICT || 493 | apy->type==SH_RESOLVED || 494 | apy->type==RD_RESOLVED || 495 | apy->type==SSCONFLICT || 496 | apy->type==SRCONFLICT || 497 | apy->type==RRCONFLICT 498 | ); 499 | /* The REDUCE/SHIFT case cannot happen because SHIFTs come before 500 | ** REDUCEs on the list. If we reach this point it must be because 501 | ** the parser conflict had already been resolved. */ 502 | } 503 | return errcnt; 504 | } 505 | -------------------------------------------------------------------------------- /lemon/sliced/table.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** All code in this file has been automatically generated 3 | ** from a specification in the file 4 | ** "table.q" 5 | ** by the associative array code building program "aagen". 6 | ** Do not edit this file! Instead, edit the specification 7 | ** file, then rerun aagen. 8 | */ 9 | /* 10 | ** Code for processing tables in the LEMON parser generator. 11 | */ 12 | 13 | PRIVATE unsigned strhash(const char *x) 14 | { 15 | unsigned h = 0; 16 | while( *x ) h = h*13 + *(x++); 17 | return h; 18 | } 19 | 20 | /* Works like strdup, sort of. Save a string in malloced memory, but 21 | ** keep strings in a table so that the same string is not in more 22 | ** than one place. 23 | */ 24 | const char *Strsafe(const char *y) 25 | { 26 | const char *z; 27 | char *cpy; 28 | 29 | if( y==0 ) return 0; 30 | z = Strsafe_find(y); 31 | if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){ 32 | lemon_strcpy(cpy,y); 33 | z = cpy; 34 | Strsafe_insert(z); 35 | } 36 | MemoryCheck(z); 37 | return z; 38 | } 39 | 40 | /* There is one instance of the following structure for each 41 | ** associative array of type "x1". 42 | */ 43 | struct s_x1 { 44 | int size; /* The number of available slots. */ 45 | /* Must be a power of 2 greater than or */ 46 | /* equal to 1 */ 47 | int count; /* Number of currently slots filled */ 48 | struct s_x1node *tbl; /* The data stored here */ 49 | struct s_x1node **ht; /* Hash table for lookups */ 50 | }; 51 | 52 | /* There is one instance of this structure for every data element 53 | ** in an associative array of type "x1". 54 | */ 55 | typedef struct s_x1node { 56 | const char *data; /* The data */ 57 | struct s_x1node *next; /* Next entry with the same hash */ 58 | struct s_x1node **from; /* Previous link */ 59 | } x1node; 60 | 61 | /* There is only one instance of the array, which is the following */ 62 | static struct s_x1 *x1a; 63 | 64 | /* Allocate a new associative array */ 65 | void Strsafe_init(void){ 66 | if( x1a ) return; 67 | x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); 68 | if( x1a ){ 69 | x1a->size = 1024; 70 | x1a->count = 0; 71 | x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); 72 | if( x1a->tbl==0 ){ 73 | free(x1a); 74 | x1a = 0; 75 | }else{ 76 | int i; 77 | x1a->ht = (x1node**)&(x1a->tbl[1024]); 78 | for(i=0; i<1024; i++) x1a->ht[i] = 0; 79 | } 80 | } 81 | } 82 | /* Insert a new record into the array. Return TRUE if successful. 83 | ** Prior data with the same key is NOT overwritten */ 84 | int Strsafe_insert(const char *data) 85 | { 86 | x1node *np; 87 | unsigned h; 88 | unsigned ph; 89 | 90 | if( x1a==0 ) return 0; 91 | ph = strhash(data); 92 | h = ph & (x1a->size-1); 93 | np = x1a->ht[h]; 94 | while( np ){ 95 | if( strcmp(np->data,data)==0 ){ 96 | /* An existing entry with the same key is found. */ 97 | /* Fail because overwrite is not allows. */ 98 | return 0; 99 | } 100 | np = np->next; 101 | } 102 | if( x1a->count>=x1a->size ){ 103 | /* Need to make the hash table bigger */ 104 | int i,arrSize; 105 | struct s_x1 array; 106 | array.size = arrSize = x1a->size*2; 107 | array.count = x1a->count; 108 | array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); 109 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 110 | array.ht = (x1node**)&(array.tbl[arrSize]); 111 | for(i=0; icount; i++){ 113 | x1node *oldnp, *newnp; 114 | oldnp = &(x1a->tbl[i]); 115 | h = strhash(oldnp->data) & (arrSize-1); 116 | newnp = &(array.tbl[i]); 117 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 118 | newnp->next = array.ht[h]; 119 | newnp->data = oldnp->data; 120 | newnp->from = &(array.ht[h]); 121 | array.ht[h] = newnp; 122 | } 123 | /* free(x1a->tbl); // This program was originally for 16-bit machines. 124 | ** Don't worry about freeing memory on modern platforms. */ 125 | *x1a = array; 126 | } 127 | /* Insert the new data */ 128 | h = ph & (x1a->size-1); 129 | np = &(x1a->tbl[x1a->count++]); 130 | np->data = data; 131 | if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); 132 | np->next = x1a->ht[h]; 133 | x1a->ht[h] = np; 134 | np->from = &(x1a->ht[h]); 135 | return 1; 136 | } 137 | 138 | /* Return a pointer to data assigned to the given key. Return NULL 139 | ** if no such key. */ 140 | const char *Strsafe_find(const char *key) 141 | { 142 | unsigned h; 143 | x1node *np; 144 | 145 | if( x1a==0 ) return 0; 146 | h = strhash(key) & (x1a->size-1); 147 | np = x1a->ht[h]; 148 | while( np ){ 149 | if( strcmp(np->data,key)==0 ) break; 150 | np = np->next; 151 | } 152 | return np ? np->data : 0; 153 | } 154 | 155 | /* Return a pointer to the (terminal or nonterminal) symbol "x". 156 | ** Create a new symbol if this is the first time "x" has been seen. 157 | */ 158 | struct symbol *Symbol_new(const char *x) 159 | { 160 | struct symbol *sp; 161 | 162 | sp = Symbol_find(x); 163 | if( sp==0 ){ 164 | sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); 165 | MemoryCheck(sp); 166 | sp->name = Strsafe(x); 167 | sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; 168 | sp->rule = 0; 169 | sp->fallback = 0; 170 | sp->prec = -1; 171 | sp->assoc = UNK; 172 | sp->firstset = 0; 173 | sp->lambda = LEMON_FALSE; 174 | sp->destructor = 0; 175 | sp->destLineno = 0; 176 | sp->datatype = 0; 177 | sp->useCnt = 0; 178 | Symbol_insert(sp,sp->name); 179 | } 180 | sp->useCnt++; 181 | return sp; 182 | } 183 | 184 | /* Compare two symbols for sorting purposes. Return negative, 185 | ** zero, or positive if a is less then, equal to, or greater 186 | ** than b. 187 | ** 188 | ** Symbols that begin with upper case letters (terminals or tokens) 189 | ** must sort before symbols that begin with lower case letters 190 | ** (non-terminals). And MULTITERMINAL symbols (created using the 191 | ** %token_class directive) must sort at the very end. Other than 192 | ** that, the order does not matter. 193 | ** 194 | ** We find experimentally that leaving the symbols in their original 195 | ** order (the order they appeared in the grammar file) gives the 196 | ** smallest parser tables in SQLite. 197 | */ 198 | int Symbolcmpp(const void *_a, const void *_b) 199 | { 200 | const struct symbol *a = *(const struct symbol **) _a; 201 | const struct symbol *b = *(const struct symbol **) _b; 202 | int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; 203 | int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; 204 | return i1==i2 ? a->index - b->index : i1 - i2; 205 | } 206 | 207 | /* There is one instance of the following structure for each 208 | ** associative array of type "x2". 209 | */ 210 | struct s_x2 { 211 | int size; /* The number of available slots. */ 212 | /* Must be a power of 2 greater than or */ 213 | /* equal to 1 */ 214 | int count; /* Number of currently slots filled */ 215 | struct s_x2node *tbl; /* The data stored here */ 216 | struct s_x2node **ht; /* Hash table for lookups */ 217 | }; 218 | 219 | /* There is one instance of this structure for every data element 220 | ** in an associative array of type "x2". 221 | */ 222 | typedef struct s_x2node { 223 | struct symbol *data; /* The data */ 224 | const char *key; /* The key */ 225 | struct s_x2node *next; /* Next entry with the same hash */ 226 | struct s_x2node **from; /* Previous link */ 227 | } x2node; 228 | 229 | /* There is only one instance of the array, which is the following */ 230 | static struct s_x2 *x2a; 231 | 232 | /* Allocate a new associative array */ 233 | void Symbol_init(void){ 234 | if( x2a ) return; 235 | x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); 236 | if( x2a ){ 237 | x2a->size = 128; 238 | x2a->count = 0; 239 | x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); 240 | if( x2a->tbl==0 ){ 241 | free(x2a); 242 | x2a = 0; 243 | }else{ 244 | int i; 245 | x2a->ht = (x2node**)&(x2a->tbl[128]); 246 | for(i=0; i<128; i++) x2a->ht[i] = 0; 247 | } 248 | } 249 | } 250 | /* Insert a new record into the array. Return TRUE if successful. 251 | ** Prior data with the same key is NOT overwritten */ 252 | int Symbol_insert(struct symbol *data, const char *key) 253 | { 254 | x2node *np; 255 | unsigned h; 256 | unsigned ph; 257 | 258 | if( x2a==0 ) return 0; 259 | ph = strhash(key); 260 | h = ph & (x2a->size-1); 261 | np = x2a->ht[h]; 262 | while( np ){ 263 | if( strcmp(np->key,key)==0 ){ 264 | /* An existing entry with the same key is found. */ 265 | /* Fail because overwrite is not allows. */ 266 | return 0; 267 | } 268 | np = np->next; 269 | } 270 | if( x2a->count>=x2a->size ){ 271 | /* Need to make the hash table bigger */ 272 | int i,arrSize; 273 | struct s_x2 array; 274 | array.size = arrSize = x2a->size*2; 275 | array.count = x2a->count; 276 | array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); 277 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 278 | array.ht = (x2node**)&(array.tbl[arrSize]); 279 | for(i=0; icount; i++){ 281 | x2node *oldnp, *newnp; 282 | oldnp = &(x2a->tbl[i]); 283 | h = strhash(oldnp->key) & (arrSize-1); 284 | newnp = &(array.tbl[i]); 285 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 286 | newnp->next = array.ht[h]; 287 | newnp->key = oldnp->key; 288 | newnp->data = oldnp->data; 289 | newnp->from = &(array.ht[h]); 290 | array.ht[h] = newnp; 291 | } 292 | /* free(x2a->tbl); // This program was originally written for 16-bit 293 | ** machines. Don't worry about freeing this trivial amount of memory 294 | ** on modern platforms. Just leak it. */ 295 | *x2a = array; 296 | } 297 | /* Insert the new data */ 298 | h = ph & (x2a->size-1); 299 | np = &(x2a->tbl[x2a->count++]); 300 | np->key = key; 301 | np->data = data; 302 | if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); 303 | np->next = x2a->ht[h]; 304 | x2a->ht[h] = np; 305 | np->from = &(x2a->ht[h]); 306 | return 1; 307 | } 308 | 309 | /* Return a pointer to data assigned to the given key. Return NULL 310 | ** if no such key. */ 311 | struct symbol *Symbol_find(const char *key) 312 | { 313 | unsigned h; 314 | x2node *np; 315 | 316 | if( x2a==0 ) return 0; 317 | h = strhash(key) & (x2a->size-1); 318 | np = x2a->ht[h]; 319 | while( np ){ 320 | if( strcmp(np->key,key)==0 ) break; 321 | np = np->next; 322 | } 323 | return np ? np->data : 0; 324 | } 325 | 326 | /* Return the n-th data. Return NULL if n is out of range. */ 327 | struct symbol *Symbol_Nth(int n) 328 | { 329 | struct symbol *data; 330 | if( x2a && n>0 && n<=x2a->count ){ 331 | data = x2a->tbl[n-1].data; 332 | }else{ 333 | data = 0; 334 | } 335 | return data; 336 | } 337 | 338 | /* Return the size of the array */ 339 | int Symbol_count() 340 | { 341 | return x2a ? x2a->count : 0; 342 | } 343 | 344 | /* Return an array of pointers to all data in the table. 345 | ** The array is obtained from malloc. Return NULL if memory allocation 346 | ** problems, or if the array is empty. */ 347 | struct symbol **Symbol_arrayof() 348 | { 349 | struct symbol **array; 350 | int i,arrSize; 351 | if( x2a==0 ) return 0; 352 | arrSize = x2a->count; 353 | array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); 354 | if( array ){ 355 | for(i=0; itbl[i].data; 356 | } 357 | return array; 358 | } 359 | 360 | /* Compare two configurations */ 361 | int Configcmp(const char *_a,const char *_b) 362 | { 363 | const struct config *a = (struct config *) _a; 364 | const struct config *b = (struct config *) _b; 365 | int x; 366 | x = a->rp->index - b->rp->index; 367 | if( x==0 ) x = a->dot - b->dot; 368 | return x; 369 | } 370 | 371 | /* Compare two states */ 372 | PRIVATE int statecmp(struct config *a, struct config *b) 373 | { 374 | int rc; 375 | for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ 376 | rc = a->rp->index - b->rp->index; 377 | if( rc==0 ) rc = a->dot - b->dot; 378 | } 379 | if( rc==0 ){ 380 | if( a ) rc = 1; 381 | if( b ) rc = -1; 382 | } 383 | return rc; 384 | } 385 | 386 | /* Hash a state */ 387 | PRIVATE unsigned statehash(struct config *a) 388 | { 389 | unsigned h=0; 390 | while( a ){ 391 | h = h*571 + a->rp->index*37 + a->dot; 392 | a = a->bp; 393 | } 394 | return h; 395 | } 396 | 397 | /* Allocate a new state structure */ 398 | struct state *State_new() 399 | { 400 | struct state *newstate; 401 | newstate = (struct state *)calloc(1, sizeof(struct state) ); 402 | MemoryCheck(newstate); 403 | return newstate; 404 | } 405 | 406 | /* There is one instance of the following structure for each 407 | ** associative array of type "x3". 408 | */ 409 | struct s_x3 { 410 | int size; /* The number of available slots. */ 411 | /* Must be a power of 2 greater than or */ 412 | /* equal to 1 */ 413 | int count; /* Number of currently slots filled */ 414 | struct s_x3node *tbl; /* The data stored here */ 415 | struct s_x3node **ht; /* Hash table for lookups */ 416 | }; 417 | 418 | /* There is one instance of this structure for every data element 419 | ** in an associative array of type "x3". 420 | */ 421 | typedef struct s_x3node { 422 | struct state *data; /* The data */ 423 | struct config *key; /* The key */ 424 | struct s_x3node *next; /* Next entry with the same hash */ 425 | struct s_x3node **from; /* Previous link */ 426 | } x3node; 427 | 428 | /* There is only one instance of the array, which is the following */ 429 | static struct s_x3 *x3a; 430 | 431 | /* Allocate a new associative array */ 432 | void State_init(void){ 433 | if( x3a ) return; 434 | x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); 435 | if( x3a ){ 436 | x3a->size = 128; 437 | x3a->count = 0; 438 | x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); 439 | if( x3a->tbl==0 ){ 440 | free(x3a); 441 | x3a = 0; 442 | }else{ 443 | int i; 444 | x3a->ht = (x3node**)&(x3a->tbl[128]); 445 | for(i=0; i<128; i++) x3a->ht[i] = 0; 446 | } 447 | } 448 | } 449 | /* Insert a new record into the array. Return TRUE if successful. 450 | ** Prior data with the same key is NOT overwritten */ 451 | int State_insert(struct state *data, struct config *key) 452 | { 453 | x3node *np; 454 | unsigned h; 455 | unsigned ph; 456 | 457 | if( x3a==0 ) return 0; 458 | ph = statehash(key); 459 | h = ph & (x3a->size-1); 460 | np = x3a->ht[h]; 461 | while( np ){ 462 | if( statecmp(np->key,key)==0 ){ 463 | /* An existing entry with the same key is found. */ 464 | /* Fail because overwrite is not allows. */ 465 | return 0; 466 | } 467 | np = np->next; 468 | } 469 | if( x3a->count>=x3a->size ){ 470 | /* Need to make the hash table bigger */ 471 | int i,arrSize; 472 | struct s_x3 array; 473 | array.size = arrSize = x3a->size*2; 474 | array.count = x3a->count; 475 | array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); 476 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 477 | array.ht = (x3node**)&(array.tbl[arrSize]); 478 | for(i=0; icount; i++){ 480 | x3node *oldnp, *newnp; 481 | oldnp = &(x3a->tbl[i]); 482 | h = statehash(oldnp->key) & (arrSize-1); 483 | newnp = &(array.tbl[i]); 484 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 485 | newnp->next = array.ht[h]; 486 | newnp->key = oldnp->key; 487 | newnp->data = oldnp->data; 488 | newnp->from = &(array.ht[h]); 489 | array.ht[h] = newnp; 490 | } 491 | free(x3a->tbl); 492 | *x3a = array; 493 | } 494 | /* Insert the new data */ 495 | h = ph & (x3a->size-1); 496 | np = &(x3a->tbl[x3a->count++]); 497 | np->key = key; 498 | np->data = data; 499 | if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); 500 | np->next = x3a->ht[h]; 501 | x3a->ht[h] = np; 502 | np->from = &(x3a->ht[h]); 503 | return 1; 504 | } 505 | 506 | /* Return a pointer to data assigned to the given key. Return NULL 507 | ** if no such key. */ 508 | struct state *State_find(struct config *key) 509 | { 510 | unsigned h; 511 | x3node *np; 512 | 513 | if( x3a==0 ) return 0; 514 | h = statehash(key) & (x3a->size-1); 515 | np = x3a->ht[h]; 516 | while( np ){ 517 | if( statecmp(np->key,key)==0 ) break; 518 | np = np->next; 519 | } 520 | return np ? np->data : 0; 521 | } 522 | 523 | /* Return an array of pointers to all data in the table. 524 | ** The array is obtained from malloc. Return NULL if memory allocation 525 | ** problems, or if the array is empty. */ 526 | struct state **State_arrayof(void) 527 | { 528 | struct state **array; 529 | int i,arrSize; 530 | if( x3a==0 ) return 0; 531 | arrSize = x3a->count; 532 | array = (struct state **)calloc(arrSize, sizeof(struct state *)); 533 | if( array ){ 534 | for(i=0; itbl[i].data; 535 | } 536 | return array; 537 | } 538 | 539 | /* Hash a configuration */ 540 | PRIVATE unsigned confighash(struct config *a) 541 | { 542 | unsigned h=0; 543 | h = h*571 + a->rp->index*37 + a->dot; 544 | return h; 545 | } 546 | 547 | /* There is one instance of the following structure for each 548 | ** associative array of type "x4". 549 | */ 550 | struct s_x4 { 551 | int size; /* The number of available slots. */ 552 | /* Must be a power of 2 greater than or */ 553 | /* equal to 1 */ 554 | int count; /* Number of currently slots filled */ 555 | struct s_x4node *tbl; /* The data stored here */ 556 | struct s_x4node **ht; /* Hash table for lookups */ 557 | }; 558 | 559 | /* There is one instance of this structure for every data element 560 | ** in an associative array of type "x4". 561 | */ 562 | typedef struct s_x4node { 563 | struct config *data; /* The data */ 564 | struct s_x4node *next; /* Next entry with the same hash */ 565 | struct s_x4node **from; /* Previous link */ 566 | } x4node; 567 | 568 | /* There is only one instance of the array, which is the following */ 569 | static struct s_x4 *x4a; 570 | 571 | /* Allocate a new associative array */ 572 | void Configtable_init(void){ 573 | if( x4a ) return; 574 | x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); 575 | if( x4a ){ 576 | x4a->size = 64; 577 | x4a->count = 0; 578 | x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); 579 | if( x4a->tbl==0 ){ 580 | free(x4a); 581 | x4a = 0; 582 | }else{ 583 | int i; 584 | x4a->ht = (x4node**)&(x4a->tbl[64]); 585 | for(i=0; i<64; i++) x4a->ht[i] = 0; 586 | } 587 | } 588 | } 589 | /* Insert a new record into the array. Return TRUE if successful. 590 | ** Prior data with the same key is NOT overwritten */ 591 | int Configtable_insert(struct config *data) 592 | { 593 | x4node *np; 594 | unsigned h; 595 | unsigned ph; 596 | 597 | if( x4a==0 ) return 0; 598 | ph = confighash(data); 599 | h = ph & (x4a->size-1); 600 | np = x4a->ht[h]; 601 | while( np ){ 602 | if( Configcmp((const char *) np->data,(const char *) data)==0 ){ 603 | /* An existing entry with the same key is found. */ 604 | /* Fail because overwrite is not allows. */ 605 | return 0; 606 | } 607 | np = np->next; 608 | } 609 | if( x4a->count>=x4a->size ){ 610 | /* Need to make the hash table bigger */ 611 | int i,arrSize; 612 | struct s_x4 array; 613 | array.size = arrSize = x4a->size*2; 614 | array.count = x4a->count; 615 | array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); 616 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 617 | array.ht = (x4node**)&(array.tbl[arrSize]); 618 | for(i=0; icount; i++){ 620 | x4node *oldnp, *newnp; 621 | oldnp = &(x4a->tbl[i]); 622 | h = confighash(oldnp->data) & (arrSize-1); 623 | newnp = &(array.tbl[i]); 624 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 625 | newnp->next = array.ht[h]; 626 | newnp->data = oldnp->data; 627 | newnp->from = &(array.ht[h]); 628 | array.ht[h] = newnp; 629 | } 630 | /* free(x4a->tbl); // This code was originall written for 16-bit machines. 631 | ** on modern machines, don't worry about freeing this trival amount of 632 | ** memory. */ 633 | *x4a = array; 634 | } 635 | /* Insert the new data */ 636 | h = ph & (x4a->size-1); 637 | np = &(x4a->tbl[x4a->count++]); 638 | np->data = data; 639 | if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); 640 | np->next = x4a->ht[h]; 641 | x4a->ht[h] = np; 642 | np->from = &(x4a->ht[h]); 643 | return 1; 644 | } 645 | 646 | /* Return a pointer to data assigned to the given key. Return NULL 647 | ** if no such key. */ 648 | struct config *Configtable_find(struct config *key) 649 | { 650 | int h; 651 | x4node *np; 652 | 653 | if( x4a==0 ) return 0; 654 | h = confighash(key) & (x4a->size-1); 655 | np = x4a->ht[h]; 656 | while( np ){ 657 | if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; 658 | np = np->next; 659 | } 660 | return np ? np->data : 0; 661 | } 662 | 663 | /* Remove all data from the table. Pass each data to the function "f" 664 | ** as it is removed. ("f" may be null to avoid this step.) */ 665 | void Configtable_clear(int(*f)(struct config *)) 666 | { 667 | int i; 668 | if( x4a==0 || x4a->count==0 ) return; 669 | if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); 670 | for(i=0; isize; i++) x4a->ht[i] = 0; 671 | x4a->count = 0; 672 | return; 673 | } 674 | -------------------------------------------------------------------------------- /lemon/sliced/parse.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Input file parser for the LEMON parser generator. 3 | */ 4 | 5 | /* The state of the parser */ 6 | enum e_state { 7 | INITIALIZE, 8 | WAITING_FOR_DECL_OR_RULE, 9 | WAITING_FOR_DECL_KEYWORD, 10 | WAITING_FOR_DECL_ARG, 11 | WAITING_FOR_PRECEDENCE_SYMBOL, 12 | WAITING_FOR_ARROW, 13 | IN_RHS, 14 | LHS_ALIAS_1, 15 | LHS_ALIAS_2, 16 | LHS_ALIAS_3, 17 | RHS_ALIAS_1, 18 | RHS_ALIAS_2, 19 | PRECEDENCE_MARK_1, 20 | PRECEDENCE_MARK_2, 21 | RESYNC_AFTER_RULE_ERROR, 22 | RESYNC_AFTER_DECL_ERROR, 23 | WAITING_FOR_DESTRUCTOR_SYMBOL, 24 | WAITING_FOR_DATATYPE_SYMBOL, 25 | WAITING_FOR_FALLBACK_ID, 26 | WAITING_FOR_WILDCARD_ID, 27 | WAITING_FOR_CLASS_ID, 28 | WAITING_FOR_CLASS_TOKEN, 29 | WAITING_FOR_TOKEN_NAME 30 | }; 31 | struct pstate { 32 | char *filename; /* Name of the input file */ 33 | int tokenlineno; /* Linenumber at which current token starts */ 34 | int errorcnt; /* Number of errors so far */ 35 | char *tokenstart; /* Text of current token */ 36 | struct lemon *gp; /* Global state vector */ 37 | enum e_state state; /* The state of the parser */ 38 | struct symbol *fallback; /* The fallback token */ 39 | struct symbol *tkclass; /* Token class symbol */ 40 | struct symbol *lhs; /* Left-hand side of current rule */ 41 | const char *lhsalias; /* Alias for the LHS */ 42 | int nrhs; /* Number of right-hand side symbols seen */ 43 | struct symbol *rhs[MAXRHS]; /* RHS symbols */ 44 | const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ 45 | struct rule *prevrule; /* Previous rule parsed */ 46 | const char *declkeyword; /* Keyword of a declaration */ 47 | char **declargslot; /* Where the declaration argument should be put */ 48 | int insertLineMacro; /* Add #line before declaration insert */ 49 | int *decllinenoslot; /* Where to write declaration line number */ 50 | enum e_assoc declassoc; /* Assign this association to decl arguments */ 51 | int preccounter; /* Assign this precedence to decl arguments */ 52 | struct rule *firstrule; /* Pointer to first rule in the grammar */ 53 | struct rule *lastrule; /* Pointer to the most recently parsed rule */ 54 | }; 55 | 56 | /* Parse a single token */ 57 | static void parseonetoken(struct pstate *psp) 58 | { 59 | const char *x; 60 | x = Strsafe(psp->tokenstart); /* Save the token permanently */ 61 | #if 0 62 | printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, 63 | x,psp->state); 64 | #endif 65 | switch( psp->state ){ 66 | case INITIALIZE: 67 | psp->prevrule = 0; 68 | psp->preccounter = 0; 69 | psp->firstrule = psp->lastrule = 0; 70 | psp->gp->nrule = 0; 71 | /* fall through */ 72 | case WAITING_FOR_DECL_OR_RULE: 73 | if( x[0]=='%' ){ 74 | psp->state = WAITING_FOR_DECL_KEYWORD; 75 | }else if( ISLOWER(x[0]) ){ 76 | psp->lhs = Symbol_new(x); 77 | psp->nrhs = 0; 78 | psp->lhsalias = 0; 79 | psp->state = WAITING_FOR_ARROW; 80 | }else if( x[0]=='{' ){ 81 | if( psp->prevrule==0 ){ 82 | ErrorMsg(psp->filename,psp->tokenlineno, 83 | "There is no prior rule upon which to attach the code " 84 | "fragment which begins on this line."); 85 | psp->errorcnt++; 86 | }else if( psp->prevrule->code!=0 ){ 87 | ErrorMsg(psp->filename,psp->tokenlineno, 88 | "Code fragment beginning on this line is not the first " 89 | "to follow the previous rule."); 90 | psp->errorcnt++; 91 | }else if( strcmp(x, "{NEVER-REDUCE")==0 ){ 92 | psp->prevrule->neverReduce = 1; 93 | }else{ 94 | psp->prevrule->line = psp->tokenlineno; 95 | psp->prevrule->code = &x[1]; 96 | psp->prevrule->noCode = 0; 97 | } 98 | }else if( x[0]=='[' ){ 99 | psp->state = PRECEDENCE_MARK_1; 100 | }else{ 101 | ErrorMsg(psp->filename,psp->tokenlineno, 102 | "Token \"%s\" should be either \"%%\" or a nonterminal name.", 103 | x); 104 | psp->errorcnt++; 105 | } 106 | break; 107 | case PRECEDENCE_MARK_1: 108 | if( !ISUPPER(x[0]) ){ 109 | ErrorMsg(psp->filename,psp->tokenlineno, 110 | "The precedence symbol must be a terminal."); 111 | psp->errorcnt++; 112 | }else if( psp->prevrule==0 ){ 113 | ErrorMsg(psp->filename,psp->tokenlineno, 114 | "There is no prior rule to assign precedence \"[%s]\".",x); 115 | psp->errorcnt++; 116 | }else if( psp->prevrule->precsym!=0 ){ 117 | ErrorMsg(psp->filename,psp->tokenlineno, 118 | "Precedence mark on this line is not the first " 119 | "to follow the previous rule."); 120 | psp->errorcnt++; 121 | }else{ 122 | psp->prevrule->precsym = Symbol_new(x); 123 | } 124 | psp->state = PRECEDENCE_MARK_2; 125 | break; 126 | case PRECEDENCE_MARK_2: 127 | if( x[0]!=']' ){ 128 | ErrorMsg(psp->filename,psp->tokenlineno, 129 | "Missing \"]\" on precedence mark."); 130 | psp->errorcnt++; 131 | } 132 | psp->state = WAITING_FOR_DECL_OR_RULE; 133 | break; 134 | case WAITING_FOR_ARROW: 135 | if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 136 | psp->state = IN_RHS; 137 | }else if( x[0]=='(' ){ 138 | psp->state = LHS_ALIAS_1; 139 | }else{ 140 | ErrorMsg(psp->filename,psp->tokenlineno, 141 | "Expected to see a \":\" following the LHS symbol \"%s\".", 142 | psp->lhs->name); 143 | psp->errorcnt++; 144 | psp->state = RESYNC_AFTER_RULE_ERROR; 145 | } 146 | break; 147 | case LHS_ALIAS_1: 148 | if( ISALPHA(x[0]) ){ 149 | psp->lhsalias = x; 150 | psp->state = LHS_ALIAS_2; 151 | }else{ 152 | ErrorMsg(psp->filename,psp->tokenlineno, 153 | "\"%s\" is not a valid alias for the LHS \"%s\"\n", 154 | x,psp->lhs->name); 155 | psp->errorcnt++; 156 | psp->state = RESYNC_AFTER_RULE_ERROR; 157 | } 158 | break; 159 | case LHS_ALIAS_2: 160 | if( x[0]==')' ){ 161 | psp->state = LHS_ALIAS_3; 162 | }else{ 163 | ErrorMsg(psp->filename,psp->tokenlineno, 164 | "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 165 | psp->errorcnt++; 166 | psp->state = RESYNC_AFTER_RULE_ERROR; 167 | } 168 | break; 169 | case LHS_ALIAS_3: 170 | if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 171 | psp->state = IN_RHS; 172 | }else{ 173 | ErrorMsg(psp->filename,psp->tokenlineno, 174 | "Missing \"->\" following: \"%s(%s)\".", 175 | psp->lhs->name,psp->lhsalias); 176 | psp->errorcnt++; 177 | psp->state = RESYNC_AFTER_RULE_ERROR; 178 | } 179 | break; 180 | case IN_RHS: 181 | if( x[0]=='.' ){ 182 | struct rule *rp; 183 | rp = (struct rule *)calloc( sizeof(struct rule) + 184 | sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); 185 | if( rp==0 ){ 186 | ErrorMsg(psp->filename,psp->tokenlineno, 187 | "Can't allocate enough memory for this rule."); 188 | psp->errorcnt++; 189 | psp->prevrule = 0; 190 | }else{ 191 | int i; 192 | rp->ruleline = psp->tokenlineno; 193 | rp->rhs = (struct symbol**)&rp[1]; 194 | rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); 195 | for(i=0; inrhs; i++){ 196 | rp->rhs[i] = psp->rhs[i]; 197 | rp->rhsalias[i] = psp->alias[i]; 198 | if( rp->rhsalias[i]!=0 ){ rp->rhs[i]->bContent = 1; } 199 | } 200 | rp->lhs = psp->lhs; 201 | rp->lhsalias = psp->lhsalias; 202 | rp->nrhs = psp->nrhs; 203 | rp->code = 0; 204 | rp->noCode = 1; 205 | rp->precsym = 0; 206 | rp->index = psp->gp->nrule++; 207 | rp->nextlhs = rp->lhs->rule; 208 | rp->lhs->rule = rp; 209 | rp->next = 0; 210 | if( psp->firstrule==0 ){ 211 | psp->firstrule = psp->lastrule = rp; 212 | }else{ 213 | psp->lastrule->next = rp; 214 | psp->lastrule = rp; 215 | } 216 | psp->prevrule = rp; 217 | } 218 | psp->state = WAITING_FOR_DECL_OR_RULE; 219 | }else if( ISALPHA(x[0]) ){ 220 | if( psp->nrhs>=MAXRHS ){ 221 | ErrorMsg(psp->filename,psp->tokenlineno, 222 | "Too many symbols on RHS of rule beginning at \"%s\".", 223 | x); 224 | psp->errorcnt++; 225 | psp->state = RESYNC_AFTER_RULE_ERROR; 226 | }else{ 227 | psp->rhs[psp->nrhs] = Symbol_new(x); 228 | psp->alias[psp->nrhs] = 0; 229 | psp->nrhs++; 230 | } 231 | }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 && ISUPPER(x[1]) ){ 232 | struct symbol *msp = psp->rhs[psp->nrhs-1]; 233 | if( msp->type!=MULTITERMINAL ){ 234 | struct symbol *origsp = msp; 235 | msp = (struct symbol *) calloc(1,sizeof(*msp)); 236 | memset(msp, 0, sizeof(*msp)); 237 | msp->type = MULTITERMINAL; 238 | msp->nsubsym = 1; 239 | msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); 240 | msp->subsym[0] = origsp; 241 | msp->name = origsp->name; 242 | psp->rhs[psp->nrhs-1] = msp; 243 | } 244 | msp->nsubsym++; 245 | msp->subsym = (struct symbol **) realloc(msp->subsym, 246 | sizeof(struct symbol*)*msp->nsubsym); 247 | msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); 248 | if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ 249 | ErrorMsg(psp->filename,psp->tokenlineno, 250 | "Cannot form a compound containing a non-terminal"); 251 | psp->errorcnt++; 252 | } 253 | }else if( x[0]=='(' && psp->nrhs>0 ){ 254 | psp->state = RHS_ALIAS_1; 255 | }else{ 256 | ErrorMsg(psp->filename,psp->tokenlineno, 257 | "Illegal character on RHS of rule: \"%s\".",x); 258 | psp->errorcnt++; 259 | psp->state = RESYNC_AFTER_RULE_ERROR; 260 | } 261 | break; 262 | case RHS_ALIAS_1: 263 | if( ISALPHA(x[0]) ){ 264 | psp->alias[psp->nrhs-1] = x; 265 | psp->state = RHS_ALIAS_2; 266 | }else{ 267 | ErrorMsg(psp->filename,psp->tokenlineno, 268 | "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", 269 | x,psp->rhs[psp->nrhs-1]->name); 270 | psp->errorcnt++; 271 | psp->state = RESYNC_AFTER_RULE_ERROR; 272 | } 273 | break; 274 | case RHS_ALIAS_2: 275 | if( x[0]==')' ){ 276 | psp->state = IN_RHS; 277 | }else{ 278 | ErrorMsg(psp->filename,psp->tokenlineno, 279 | "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 280 | psp->errorcnt++; 281 | psp->state = RESYNC_AFTER_RULE_ERROR; 282 | } 283 | break; 284 | case WAITING_FOR_DECL_KEYWORD: 285 | if( ISALPHA(x[0]) ){ 286 | psp->declkeyword = x; 287 | psp->declargslot = 0; 288 | psp->decllinenoslot = 0; 289 | psp->insertLineMacro = 1; 290 | psp->state = WAITING_FOR_DECL_ARG; 291 | if( strcmp(x,"name")==0 ){ 292 | psp->declargslot = &(psp->gp->name); 293 | psp->insertLineMacro = 0; 294 | }else if( strcmp(x,"include")==0 ){ 295 | psp->declargslot = &(psp->gp->include); 296 | }else if( strcmp(x,"code")==0 ){ 297 | psp->declargslot = &(psp->gp->extracode); 298 | }else if( strcmp(x,"token_destructor")==0 ){ 299 | psp->declargslot = &psp->gp->tokendest; 300 | }else if( strcmp(x,"default_destructor")==0 ){ 301 | psp->declargslot = &psp->gp->vardest; 302 | }else if( strcmp(x,"token_prefix")==0 ){ 303 | psp->declargslot = &psp->gp->tokenprefix; 304 | psp->insertLineMacro = 0; 305 | }else if( strcmp(x,"syntax_error")==0 ){ 306 | psp->declargslot = &(psp->gp->error); 307 | }else if( strcmp(x,"parse_accept")==0 ){ 308 | psp->declargslot = &(psp->gp->accept); 309 | }else if( strcmp(x,"parse_failure")==0 ){ 310 | psp->declargslot = &(psp->gp->failure); 311 | }else if( strcmp(x,"stack_overflow")==0 ){ 312 | psp->declargslot = &(psp->gp->overflow); 313 | }else if( strcmp(x,"extra_argument")==0 ){ 314 | psp->declargslot = &(psp->gp->arg); 315 | psp->insertLineMacro = 0; 316 | }else if( strcmp(x,"extra_context")==0 ){ 317 | psp->declargslot = &(psp->gp->ctx); 318 | psp->insertLineMacro = 0; 319 | }else if( strcmp(x,"token_type")==0 ){ 320 | psp->declargslot = &(psp->gp->tokentype); 321 | psp->insertLineMacro = 0; 322 | }else if( strcmp(x,"default_type")==0 ){ 323 | psp->declargslot = &(psp->gp->vartype); 324 | psp->insertLineMacro = 0; 325 | }else if( strcmp(x,"stack_size")==0 ){ 326 | psp->declargslot = &(psp->gp->stacksize); 327 | psp->insertLineMacro = 0; 328 | }else if( strcmp(x,"start_symbol")==0 ){ 329 | psp->declargslot = &(psp->gp->start); 330 | psp->insertLineMacro = 0; 331 | }else if( strcmp(x,"left")==0 ){ 332 | psp->preccounter++; 333 | psp->declassoc = LEFT; 334 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 335 | }else if( strcmp(x,"right")==0 ){ 336 | psp->preccounter++; 337 | psp->declassoc = RIGHT; 338 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 339 | }else if( strcmp(x,"nonassoc")==0 ){ 340 | psp->preccounter++; 341 | psp->declassoc = NONE; 342 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 343 | }else if( strcmp(x,"destructor")==0 ){ 344 | psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; 345 | }else if( strcmp(x,"type")==0 ){ 346 | psp->state = WAITING_FOR_DATATYPE_SYMBOL; 347 | }else if( strcmp(x,"fallback")==0 ){ 348 | psp->fallback = 0; 349 | psp->state = WAITING_FOR_FALLBACK_ID; 350 | }else if( strcmp(x,"token")==0 ){ 351 | psp->state = WAITING_FOR_TOKEN_NAME; 352 | }else if( strcmp(x,"wildcard")==0 ){ 353 | psp->state = WAITING_FOR_WILDCARD_ID; 354 | }else if( strcmp(x,"token_class")==0 ){ 355 | psp->state = WAITING_FOR_CLASS_ID; 356 | }else{ 357 | ErrorMsg(psp->filename,psp->tokenlineno, 358 | "Unknown declaration keyword: \"%%%s\".",x); 359 | psp->errorcnt++; 360 | psp->state = RESYNC_AFTER_DECL_ERROR; 361 | } 362 | }else{ 363 | ErrorMsg(psp->filename,psp->tokenlineno, 364 | "Illegal declaration keyword: \"%s\".",x); 365 | psp->errorcnt++; 366 | psp->state = RESYNC_AFTER_DECL_ERROR; 367 | } 368 | break; 369 | case WAITING_FOR_DESTRUCTOR_SYMBOL: 370 | if( !ISALPHA(x[0]) ){ 371 | ErrorMsg(psp->filename,psp->tokenlineno, 372 | "Symbol name missing after %%destructor keyword"); 373 | psp->errorcnt++; 374 | psp->state = RESYNC_AFTER_DECL_ERROR; 375 | }else{ 376 | struct symbol *sp = Symbol_new(x); 377 | psp->declargslot = &sp->destructor; 378 | psp->decllinenoslot = &sp->destLineno; 379 | psp->insertLineMacro = 1; 380 | psp->state = WAITING_FOR_DECL_ARG; 381 | } 382 | break; 383 | case WAITING_FOR_DATATYPE_SYMBOL: 384 | if( !ISALPHA(x[0]) ){ 385 | ErrorMsg(psp->filename,psp->tokenlineno, 386 | "Symbol name missing after %%type keyword"); 387 | psp->errorcnt++; 388 | psp->state = RESYNC_AFTER_DECL_ERROR; 389 | }else{ 390 | struct symbol *sp = Symbol_find(x); 391 | if((sp) && (sp->datatype)){ 392 | ErrorMsg(psp->filename,psp->tokenlineno, 393 | "Symbol %%type \"%s\" already defined", x); 394 | psp->errorcnt++; 395 | psp->state = RESYNC_AFTER_DECL_ERROR; 396 | }else{ 397 | if (!sp){ 398 | sp = Symbol_new(x); 399 | } 400 | psp->declargslot = &sp->datatype; 401 | psp->insertLineMacro = 0; 402 | psp->state = WAITING_FOR_DECL_ARG; 403 | } 404 | } 405 | break; 406 | case WAITING_FOR_PRECEDENCE_SYMBOL: 407 | if( x[0]=='.' ){ 408 | psp->state = WAITING_FOR_DECL_OR_RULE; 409 | }else if( ISUPPER(x[0]) ){ 410 | struct symbol *sp; 411 | sp = Symbol_new(x); 412 | if( sp->prec>=0 ){ 413 | ErrorMsg(psp->filename,psp->tokenlineno, 414 | "Symbol \"%s\" has already be given a precedence.",x); 415 | psp->errorcnt++; 416 | }else{ 417 | sp->prec = psp->preccounter; 418 | sp->assoc = psp->declassoc; 419 | } 420 | }else{ 421 | ErrorMsg(psp->filename,psp->tokenlineno, 422 | "Can't assign a precedence to \"%s\".",x); 423 | psp->errorcnt++; 424 | } 425 | break; 426 | case WAITING_FOR_DECL_ARG: 427 | if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ 428 | const char *zOld, *zNew; 429 | char *zBuf, *z; 430 | int nOld, n, nLine = 0, nNew, nBack; 431 | int addLineMacro; 432 | char zLine[50]; 433 | zNew = x; 434 | if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; 435 | nNew = lemonStrlen(zNew); 436 | if( *psp->declargslot ){ 437 | zOld = *psp->declargslot; 438 | }else{ 439 | zOld = ""; 440 | } 441 | nOld = lemonStrlen(zOld); 442 | n = nOld + nNew + 20; 443 | addLineMacro = !psp->gp->nolinenosflag 444 | && psp->insertLineMacro 445 | && psp->tokenlineno>1 446 | && (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); 447 | if( addLineMacro ){ 448 | for(z=psp->filename, nBack=0; *z; z++){ 449 | if( *z=='\\' ) nBack++; 450 | } 451 | lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); 452 | nLine = lemonStrlen(zLine); 453 | n += nLine + lemonStrlen(psp->filename) + nBack; 454 | } 455 | *psp->declargslot = (char *) realloc(*psp->declargslot, n); 456 | zBuf = *psp->declargslot + nOld; 457 | if( addLineMacro ){ 458 | if( nOld && zBuf[-1]!='\n' ){ 459 | *(zBuf++) = '\n'; 460 | } 461 | memcpy(zBuf, zLine, nLine); 462 | zBuf += nLine; 463 | *(zBuf++) = '"'; 464 | for(z=psp->filename; *z; z++){ 465 | if( *z=='\\' ){ 466 | *(zBuf++) = '\\'; 467 | } 468 | *(zBuf++) = *z; 469 | } 470 | *(zBuf++) = '"'; 471 | *(zBuf++) = '\n'; 472 | } 473 | if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ 474 | psp->decllinenoslot[0] = psp->tokenlineno; 475 | } 476 | memcpy(zBuf, zNew, nNew); 477 | zBuf += nNew; 478 | *zBuf = 0; 479 | psp->state = WAITING_FOR_DECL_OR_RULE; 480 | }else{ 481 | ErrorMsg(psp->filename,psp->tokenlineno, 482 | "Illegal argument to %%%s: %s",psp->declkeyword,x); 483 | psp->errorcnt++; 484 | psp->state = RESYNC_AFTER_DECL_ERROR; 485 | } 486 | break; 487 | case WAITING_FOR_FALLBACK_ID: 488 | if( x[0]=='.' ){ 489 | psp->state = WAITING_FOR_DECL_OR_RULE; 490 | }else if( !ISUPPER(x[0]) ){ 491 | ErrorMsg(psp->filename, psp->tokenlineno, 492 | "%%fallback argument \"%s\" should be a token", x); 493 | psp->errorcnt++; 494 | }else{ 495 | struct symbol *sp = Symbol_new(x); 496 | if( psp->fallback==0 ){ 497 | psp->fallback = sp; 498 | }else if( sp->fallback ){ 499 | ErrorMsg(psp->filename, psp->tokenlineno, 500 | "More than one fallback assigned to token %s", x); 501 | psp->errorcnt++; 502 | }else{ 503 | sp->fallback = psp->fallback; 504 | psp->gp->has_fallback = 1; 505 | } 506 | } 507 | break; 508 | case WAITING_FOR_TOKEN_NAME: 509 | /* Tokens do not have to be declared before use. But they can be 510 | ** in order to control their assigned integer number. The number for 511 | ** each token is assigned when it is first seen. So by including 512 | ** 513 | ** %token ONE TWO THREE. 514 | ** 515 | ** early in the grammar file, that assigns small consecutive values 516 | ** to each of the tokens ONE TWO and THREE. 517 | */ 518 | if( x[0]=='.' ){ 519 | psp->state = WAITING_FOR_DECL_OR_RULE; 520 | }else if( !ISUPPER(x[0]) ){ 521 | ErrorMsg(psp->filename, psp->tokenlineno, 522 | "%%token argument \"%s\" should be a token", x); 523 | psp->errorcnt++; 524 | }else{ 525 | (void)Symbol_new(x); 526 | } 527 | break; 528 | case WAITING_FOR_WILDCARD_ID: 529 | if( x[0]=='.' ){ 530 | psp->state = WAITING_FOR_DECL_OR_RULE; 531 | }else if( !ISUPPER(x[0]) ){ 532 | ErrorMsg(psp->filename, psp->tokenlineno, 533 | "%%wildcard argument \"%s\" should be a token", x); 534 | psp->errorcnt++; 535 | }else{ 536 | struct symbol *sp = Symbol_new(x); 537 | if( psp->gp->wildcard==0 ){ 538 | psp->gp->wildcard = sp; 539 | }else{ 540 | ErrorMsg(psp->filename, psp->tokenlineno, 541 | "Extra wildcard to token: %s", x); 542 | psp->errorcnt++; 543 | } 544 | } 545 | break; 546 | case WAITING_FOR_CLASS_ID: 547 | if( !ISLOWER(x[0]) ){ 548 | ErrorMsg(psp->filename, psp->tokenlineno, 549 | "%%token_class must be followed by an identifier: %s", x); 550 | psp->errorcnt++; 551 | psp->state = RESYNC_AFTER_DECL_ERROR; 552 | }else if( Symbol_find(x) ){ 553 | ErrorMsg(psp->filename, psp->tokenlineno, 554 | "Symbol \"%s\" already used", x); 555 | psp->errorcnt++; 556 | psp->state = RESYNC_AFTER_DECL_ERROR; 557 | }else{ 558 | psp->tkclass = Symbol_new(x); 559 | psp->tkclass->type = MULTITERMINAL; 560 | psp->state = WAITING_FOR_CLASS_TOKEN; 561 | } 562 | break; 563 | case WAITING_FOR_CLASS_TOKEN: 564 | if( x[0]=='.' ){ 565 | psp->state = WAITING_FOR_DECL_OR_RULE; 566 | }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ 567 | struct symbol *msp = psp->tkclass; 568 | msp->nsubsym++; 569 | msp->subsym = (struct symbol **) realloc(msp->subsym, 570 | sizeof(struct symbol*)*msp->nsubsym); 571 | if( !ISUPPER(x[0]) ) x++; 572 | msp->subsym[msp->nsubsym-1] = Symbol_new(x); 573 | }else{ 574 | ErrorMsg(psp->filename, psp->tokenlineno, 575 | "%%token_class argument \"%s\" should be a token", x); 576 | psp->errorcnt++; 577 | psp->state = RESYNC_AFTER_DECL_ERROR; 578 | } 579 | break; 580 | case RESYNC_AFTER_RULE_ERROR: 581 | /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 582 | ** break; */ 583 | case RESYNC_AFTER_DECL_ERROR: 584 | if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 585 | if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; 586 | break; 587 | } 588 | } 589 | 590 | /* The text in the input is part of the argument to an %ifdef or %ifndef. 591 | ** Evaluate the text as a boolean expression. Return true or false. 592 | */ 593 | static int eval_preprocessor_boolean(char *z, int lineno){ 594 | int neg = 0; 595 | int res = 0; 596 | int okTerm = 1; 597 | int i; 598 | for(i=0; z[i]!=0; i++){ 599 | if( ISSPACE(z[i]) ) continue; 600 | if( z[i]=='!' ){ 601 | if( !okTerm ) goto pp_syntax_error; 602 | neg = !neg; 603 | continue; 604 | } 605 | if( z[i]=='|' && z[i+1]=='|' ){ 606 | if( okTerm ) goto pp_syntax_error; 607 | if( res ) return 1; 608 | i++; 609 | okTerm = 1; 610 | continue; 611 | } 612 | if( z[i]=='&' && z[i+1]=='&' ){ 613 | if( okTerm ) goto pp_syntax_error; 614 | if( !res ) return 0; 615 | i++; 616 | okTerm = 1; 617 | continue; 618 | } 619 | if( z[i]=='(' ){ 620 | int k; 621 | int n = 1; 622 | if( !okTerm ) goto pp_syntax_error; 623 | for(k=i+1; z[k]; k++){ 624 | if( z[k]==')' ){ 625 | n--; 626 | if( n==0 ){ 627 | z[k] = 0; 628 | res = eval_preprocessor_boolean(&z[i+1], -1); 629 | z[k] = ')'; 630 | if( res<0 ){ 631 | i = i-res; 632 | goto pp_syntax_error; 633 | } 634 | i = k; 635 | break; 636 | } 637 | }else if( z[k]=='(' ){ 638 | n++; 639 | }else if( z[k]==0 ){ 640 | i = k; 641 | goto pp_syntax_error; 642 | } 643 | } 644 | if( neg ){ 645 | res = !res; 646 | neg = 0; 647 | } 648 | okTerm = 0; 649 | continue; 650 | } 651 | if( ISALPHA(z[i]) ){ 652 | int j, k, n; 653 | if( !okTerm ) goto pp_syntax_error; 654 | for(k=i+1; ISALNUM(z[k]) || z[k]=='_'; k++){} 655 | n = k - i; 656 | res = 0; 657 | for(j=0; j0 ){ 677 | fprintf(stderr, "%%if syntax error on line %d.\n", lineno); 678 | fprintf(stderr, " %.*s <-- syntax error here\n", i+1, z); 679 | exit(1); 680 | }else{ 681 | return -(i+1); 682 | } 683 | } 684 | 685 | /* Run the preprocessor over the input file text. The global variables 686 | ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined 687 | ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and 688 | ** comments them out. Text in between is also commented out as appropriate. 689 | */ 690 | static void preprocess_input(char *z){ 691 | int i, j, k; 692 | int exclude = 0; 693 | int start = 0; 694 | int lineno = 1; 695 | int start_lineno = 1; 696 | for(i=0; z[i]; i++){ 697 | if( z[i]=='\n' ) lineno++; 698 | if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; 699 | if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ 700 | if( exclude ){ 701 | exclude--; 702 | if( exclude==0 ){ 703 | for(j=start; jfilename; 767 | ps.errorcnt = 0; 768 | ps.state = INITIALIZE; 769 | 770 | /* Begin by reading the input file */ 771 | fp = fopen(ps.filename,"rb"); 772 | if( fp==0 ){ 773 | ErrorMsg(ps.filename,0,"Can't open this file for reading."); 774 | gp->errorcnt++; 775 | return; 776 | } 777 | fseek(fp,0,2); 778 | filesize = ftell(fp); 779 | rewind(fp); 780 | filebuf = (char *)malloc( filesize+1 ); 781 | if( filesize>100000000 || filebuf==0 ){ 782 | ErrorMsg(ps.filename,0,"Input file too large."); 783 | free(filebuf); 784 | gp->errorcnt++; 785 | fclose(fp); 786 | return; 787 | } 788 | if( fread(filebuf,1,filesize,fp)!=filesize ){ 789 | ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", 790 | filesize); 791 | free(filebuf); 792 | gp->errorcnt++; 793 | fclose(fp); 794 | return; 795 | } 796 | fclose(fp); 797 | filebuf[filesize] = 0; 798 | 799 | /* Make an initial pass through the file to handle %ifdef and %ifndef */ 800 | preprocess_input(filebuf); 801 | if( gp->printPreprocessed ){ 802 | printf("%s\n", filebuf); 803 | return; 804 | } 805 | 806 | /* Now scan the text of the input file */ 807 | lineno = 1; 808 | for(cp=filebuf; (c= *cp)!=0; ){ 809 | if( c=='\n' ) lineno++; /* Keep track of the line number */ 810 | if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ 811 | if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ 812 | cp+=2; 813 | while( (c= *cp)!=0 && c!='\n' ) cp++; 814 | continue; 815 | } 816 | if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ 817 | cp+=2; 818 | while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ 819 | if( c=='\n' ) lineno++; 820 | cp++; 821 | } 822 | if( c ) cp++; 823 | continue; 824 | } 825 | ps.tokenstart = cp; /* Mark the beginning of the token */ 826 | ps.tokenlineno = lineno; /* Linenumber on which token begins */ 827 | if( c=='\"' ){ /* String literals */ 828 | cp++; 829 | while( (c= *cp)!=0 && c!='\"' ){ 830 | if( c=='\n' ) lineno++; 831 | cp++; 832 | } 833 | if( c==0 ){ 834 | ErrorMsg(ps.filename,startline, 835 | "String starting on this line is not terminated before " 836 | "the end of the file."); 837 | ps.errorcnt++; 838 | nextcp = cp; 839 | }else{ 840 | nextcp = cp+1; 841 | } 842 | }else if( c=='{' ){ /* A block of C code */ 843 | int level; 844 | cp++; 845 | for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ 846 | if( c=='\n' ) lineno++; 847 | else if( c=='{' ) level++; 848 | else if( c=='}' ) level--; 849 | else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ 850 | int prevc; 851 | cp = &cp[2]; 852 | prevc = 0; 853 | while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ 854 | if( c=='\n' ) lineno++; 855 | prevc = c; 856 | cp++; 857 | } 858 | }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ 859 | cp = &cp[2]; 860 | while( (c= *cp)!=0 && c!='\n' ) cp++; 861 | if( c ) lineno++; 862 | }else if( c=='\'' || c=='\"' ){ /* String a character literals */ 863 | int startchar, prevc; 864 | startchar = c; 865 | prevc = 0; 866 | for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ 867 | if( c=='\n' ) lineno++; 868 | if( prevc=='\\' ) prevc = 0; 869 | else prevc = c; 870 | } 871 | } 872 | } 873 | if( c==0 ){ 874 | ErrorMsg(ps.filename,ps.tokenlineno, 875 | "C code starting on this line is not terminated before " 876 | "the end of the file."); 877 | ps.errorcnt++; 878 | nextcp = cp; 879 | }else{ 880 | nextcp = cp+1; 881 | } 882 | }else if( ISALNUM(c) ){ /* Identifiers */ 883 | while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; 884 | nextcp = cp; 885 | }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ 886 | cp += 3; 887 | nextcp = cp; 888 | }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ 889 | cp += 2; 890 | while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; 891 | nextcp = cp; 892 | }else{ /* All other (one character) operators */ 893 | cp++; 894 | nextcp = cp; 895 | } 896 | c = *cp; 897 | *cp = 0; /* Null terminate the token */ 898 | parseonetoken(&ps); /* Parse the token */ 899 | *cp = (char)c; /* Restore the buffer */ 900 | cp = nextcp; 901 | } 902 | free(filebuf); /* Release the buffer after parsing */ 903 | gp->rule = ps.firstrule; 904 | gp->errorcnt = ps.errorcnt; 905 | } 906 | --------------------------------------------------------------------------------