├── .gitignore ├── README.md ├── makefile ├── scanner.l └── parser.y /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | 6 | # Compiled Dynamic libraries 7 | *.so 8 | 9 | # Compiled Static libraries 10 | *.lai 11 | *.la 12 | *.a 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | example-parser 2 | ============== 3 | 4 | This is an example parser written in flex and bison 5 | 6 | Layout 7 | ====== 8 | 9 | The main method is located in `parser.y` and starts the whole thing. It uses gnu get opts to parser command line switches, and then sets up the input and output files. Then the parser is called on `line 379` which begins looking for the first rule. 10 | 11 | The first rule is on `line 85` and defines an entire input set. Lower case indicates a rule, curly brackets is c code to run at the specified point during matching, and upper case is a token. 12 | 13 | Each time it needs a new token, yylex is called, and it uses the rules defined in scanner.l to determine what token is next. Since an unknown amount of input had to be stored for this job, I'm doing some string buffer manipulation that is unneccessary for most projects. 14 | 15 | Any c code can be run inside the curly brackets, so whatever needs to be done at a given point is possible. 16 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | TARGET = example.exe 2 | TESTBUILD = example 3 | LEX = flex 4 | LFLAGS = -Cf 5 | YACC = bison 6 | YFLAGS = -d 7 | CC = i586-mingw32msvc-gcc 8 | CFLAGS = -O -Wall 9 | INSTALLDIR = ~/bin 10 | 11 | .PHONY: default all clean install uninstall cleanall 12 | 13 | default: $(TARGET) 14 | 15 | all: default install 16 | 17 | FLEXFILES = $(wildcard *.l) 18 | BISONFILES = $(wildcard *.y) 19 | CFLEX = $(patsubst %.l, %.c, $(FLEXFILES)) 20 | CBISON = $(patsubst %.y, %.c, $(BISONFILES)) 21 | OBJECTS = $(CFLEX) $(CBISON) 22 | 23 | %.c: %.l 24 | $(LEX) $(LFLAGS) -o $@ $< 25 | 26 | %.c: %.y 27 | $(YACC) $(YFLAGS) -o $@ $< 28 | 29 | %.h: %.c 30 | @touch $@ 31 | 32 | $(CFLEX): $(CBISON) 33 | 34 | .PRECIOUS: $(TARGET) $(OBJECTS) 35 | 36 | $(TARGET): $(OBJECTS) 37 | $(CC) -g $(OBJECTS) $(CFLAGS) -o $@ 38 | 39 | linux: $(OBJECTS) 40 | gcc -g $(OBJECTS) $(CFLAGS) -o $(TESTBUILD) 41 | 42 | cleanall: clean uninstall 43 | 44 | clean: 45 | -rm -f *.c 46 | -rm -f $(TARGET) 47 | -rm -f $(TESTBUILD) 48 | 49 | uninstall: 50 | -rm -f $(INSTALLDIR)/$(TARGET) 51 | 52 | install: 53 | cp -f $(TARGET) $(INSTALLDIR) 54 | 55 | build: 56 | bison -d -o parser.c parser.y 57 | flex -Cf -o scanner.c scanner.l 58 | $(CC) -g $(OBJECTS) $(CFLAGS) -o $(TARGET) 59 | 60 | test: 61 | bison -d -o parser.c parser.y 62 | flex -Cf -o scanner.c scanner.l 63 | gcc -g $(OBJECTS) $(CFLAGS) -o rvtest 64 | -------------------------------------------------------------------------------- /scanner.l: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of flex. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * Neither the name of the University nor the names of its contributors 15 | * may be used to endorse or promote products derived from this software 16 | * without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 19 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 | * PURPOSE. 22 | */ 23 | 24 | /************************************************** 25 | start of definitions section 26 | 27 | ***************************************************/ 28 | 29 | %{ 30 | /* A template scanner file to build "scanner.c". */ 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "parser.h" 36 | 37 | /* put your variables here */ 38 | 39 | %} 40 | 41 | 42 | %option 8bit 43 | %option nounput nomain noyywrap 44 | %option warn 45 | 46 | DATE [0-9]{1,2}\/[0-9]{1,2}\/[0-9]{1,4} 47 | PAGENUM PAGE[ ]+[0-9]+ 48 | VALUE [0-9,.%CR]+ 49 | ACCT [0-9]+ 50 | BARCODE [FTAD]{50,74} 51 | INFO [a-zA-Z()/#]+([a-zA-Z0-9()/#-]+|[ \t][a-zA-Z()/#-]+)+ 52 | 53 | %x startAddress 54 | 55 | %% 56 | /************************************************ 57 | start of rules section 58 | 59 | *************************************************/ 60 | 61 | 62 | /* Address Block Rules */ 63 | ^[ \t]+ { } 64 | [FTAD]{50,74} { yylval.string = strdup(yytext); return BARCODE; } 65 | [0-9]{24,35}[ \t]+\/BC\* { yylval.string = strdup(yytext); return BARCODE; } 66 | [0-9]+[ \t]+{DATE} { yyless(0); BEGIN INITIAL; return '\n'; } 67 | [^\r\n]+[0-9]{5}-[0-9]{4}$ { BEGIN INITIAL; yylval.string = strdup(yytext); return ADDRESS; } 68 | [^\r\n]+[0-9]{5}$ { BEGIN INITIAL; yylval.string = strdup(yytext); return ADDRESS; } 69 | [^ \t\r\n]+ { yylval.string = strdup(yytext); return ADDRESS; } 70 | [ \t]+ { yylval.string = strdup(yytext); return ADDRESS; } 71 | \r?\n { yylval.string = strdup("\t"); return ADDRESS; } 72 | 73 | /* INITIAL Rules */ 74 | ^[ \t]+ { } 75 | 76 | {DATE} { yylval.string = strdup(yytext); return DATE; } 77 | {PAGENUM} { yylval.string = strdup(yytext); return PAGENUM; } 78 | {VALUE} { yylval.string = strdup(yytext); return VALUE; } 79 | {INFO} { yylval.string = strdup(yytext); return INFO; } 80 | [ \t]+ { return '\t'; } 81 | 82 | . { fprintf(stderr, "PROBLEM: %s\n", yytext); } 83 | \r?\n { return '\n'; } 84 | 85 | %% 86 | 87 | void endAddress() { 88 | BEGIN(INITIAL); 89 | } 90 | 91 | void startOfAddress() { 92 | BEGIN(startAddress); 93 | } 94 | -------------------------------------------------------------------------------- /parser.y: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of flex. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * Neither the name of the University nor the names of its contributors 15 | * may be used to endorse or promote products derived from this software 16 | * without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 19 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 | * PURPOSE. 22 | */ 23 | 24 | %{ 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | /* #include "config.h" */ 31 | 32 | #define YYERROR_VERBOSE 1 /* For debugging. */ 33 | /* #define YYPARSE_PARAM scanner */ /* For pure bison parser. */ 34 | /* #define YYLEX_PARAM scanner */ /* For reentrant flex. */ 35 | 36 | 37 | int yyerror(char* msg); 38 | extern int yylex(); 39 | extern FILE *yyin; 40 | extern FILE *yyout; 41 | 42 | char FileName[256]; 43 | FILE *outfile; 44 | 45 | char BadFileName[256]; 46 | FILE *badStatements; 47 | char inputName[256]; 48 | char *tmp; 49 | char *badRec; 50 | 51 | #define MAX_LINES 100 52 | #define MAX_LINE_LENGTH 1000 53 | char savebuf[MAX_LINES][MAX_LINE_LENGTH]; /* buffer to hold current statement */ 54 | char line[MAX_LINE_LENGTH]; 55 | int currLine = 0; 56 | int pagesInStatement = 0; 57 | 58 | void startOfAddress(void); 59 | void endAddress(void); 60 | void printStatement(void); 61 | void save_line(char *); 62 | 63 | /* flags for command line options */ 64 | static int output_flag = 0; 65 | static int help_flag = 0; 66 | %} 67 | 68 | %union { 69 | char *string; 70 | } 71 | 72 | %token BARCODE 73 | %token DATE 74 | %token PAGENUM 75 | %token VALUE 76 | %token ACCT 77 | %token ADDRESS 78 | %token INFO 79 | 80 | %type dh h1 bc ad h2 pb tr td f1 f2 81 | 82 | %debug 83 | %% 84 | 85 | statements : statement '\n' 86 | | statement '\n' statements 87 | ; 88 | 89 | statement : dh '\n' h1 '\n' { startOfAddress(); } addressBlock '\n' statement 90 | | h2 '\n' pb '\n' statement 91 | | h2 '\n' statement 92 | | tr '\n' statement 93 | | td { 94 | tmp = strdup(line); 95 | strcpy(line, "td\t"); 96 | strcat(line, tmp); 97 | free(tmp); 98 | save_line(line); 99 | } '\n' statement 100 | | f1 '\n' f2 { printStatement(); } 101 | ; 102 | 103 | addressBlock : bc ad { endAddress(); 104 | tmp = strdup(line); 105 | strcpy(line, "ad"); 106 | strcat(line, tmp); 107 | free(tmp); 108 | badRec = strstr(line, "HOLD STATEMENT"); 109 | if (badRec) { 110 | yyout = badStatements; 111 | } 112 | badRec = strstr(line, "INSUFFICIENT ADDRESS"); 113 | if (badRec) { 114 | yyout = badStatements; 115 | } 116 | save_line(line); 117 | } 118 | | ad { endAddress(); 119 | tmp = strdup(line); 120 | strcpy(line, "ad\t"); 121 | strcat(line, tmp); 122 | free(tmp); 123 | badRec = strstr(line, "HOLD STATEMENT"); 124 | if (badRec) { 125 | yyout = badStatements; 126 | } 127 | badRec = strstr(line, "INSUFFICIENT ADDRESS"); 128 | if (badRec) { 129 | yyout = badStatements; 130 | } 131 | save_line(line); 132 | } 133 | ; 134 | 135 | dh : DATE '\t' PAGENUM { sprintf(line, "dh\t%s\t%s", $1, $3); 136 | pagesInStatement += 1; 137 | if (pagesInStatement > 1) { 138 | save_line("np"); 139 | } 140 | save_line(line); 141 | } 142 | ; 143 | 144 | h1 : VALUE '\t' DATE '\t' VALUE '\t' VALUE '\t' VALUE '\t' VALUE { 145 | sprintf(line, "h1\t%s\t%s\t%s\t%s\t%s\t%s", $1, $3, $5, $7, $9, $11); save_line(line); } 146 | ; 147 | 148 | bc : BARCODE { sprintf(line, "bc\t%s", $1); save_line(line); } 149 | ; 150 | 151 | ad : ADDRESS { sprintf(line, "%s", $1); } 152 | | ADDRESS ad { tmp = strdup(line); strcpy(line, $1); strcat(line, tmp); free(tmp); } 153 | ; 154 | 155 | h2 : VALUE '\t' DATE '\t' VALUE '\t' VALUE '\t' DATE '\t' VALUE { 156 | sprintf(line, "h2\t%s\t%s\t%s\t%s\t%s\t%s", $1, $3, $5, $7, $9, $11); save_line(line); } 157 | | VALUE '\t' DATE '\t' VALUE '\t' INFO '\t' DATE '\t' VALUE { 158 | sprintf(line, "h2\t%s\t%s\t%s\t%s\t%s\t%s", $1, $3, $5, $7, $9, $11); save_line(line); } 159 | ; 160 | 161 | pb : DATE '\t' INFO '\t' VALUE { sprintf(line, "pb\t%s\t%s\t%s", $1, $3, $5); save_line(line); } 162 | ; 163 | 164 | tr : DATE '\t' INFO '\t' VALUE '\t' VALUE { 165 | sprintf(line, "tr\t%s\t%s\t%s\t%s", $1, $3, $5, $7); save_line(line); } 166 | | DATE '\t' VALUE '\t' INFO '\t' VALUE '\t' VALUE { 167 | sprintf(line, "tr\t%s\t%s\t%s\t%s\t%s", $1, $3, $5, $7, $9); save_line(line); } 168 | ; 169 | 170 | td : INFO { sprintf(line, "td\t%s", $1); } 171 | | td '\t' INFO { sprintf(line, "%s\t%s", $1, $3); } 172 | | td '\t' DATE { sprintf(line, "%s\t%s", $1, $3); } 173 | | td '\t' VALUE { sprintf(line, "%s\t%s", $1, $3); } 174 | ; 175 | 176 | f1 : VALUE '\t' VALUE '\t' VALUE '\t' VALUE '\t' VALUE '\t' VALUE { 177 | sprintf(line, "f1\t%s\t%s\t%s\t%s\t%s\t%s", $1, $3, $5, $7, $9, $11); save_line(line); } 178 | ; 179 | 180 | f2 : VALUE '\t' VALUE '\t' VALUE '\t' VALUE { sprintf(line, "f2\t%s\t%s\t%s\t%s", $1, $3, $5, $7); save_line(line); } 181 | ; 182 | 183 | %% 184 | 185 | /**************************************************** 186 | start of code section 187 | 188 | 189 | *****************************************************/ 190 | 191 | int yyerror(char* msg) { 192 | fprintf(stderr,"%s\n",msg); 193 | return 0; 194 | } 195 | 196 | void printStatement() { 197 | int i; 198 | for(i = 0; i < currLine; i++) { 199 | if (i == 0) { 200 | fprintf(yyout, "%s\t%d\r\n", savebuf[i], pagesInStatement); 201 | } 202 | else { 203 | fprintf(yyout, "%s\r\n", savebuf[i]); 204 | } 205 | } 206 | currLine = 0; 207 | pagesInStatement = 0; 208 | yyout = outfile; 209 | } 210 | 211 | void save_line(char *s) { 212 | strcpy(savebuf[currLine], s); 213 | currLine += 1; 214 | } 215 | 216 | 217 | int main(int argc, char **argv); 218 | 219 | int main (argc,argv) 220 | int argc; 221 | char **argv; 222 | { 223 | /**************************************************** 224 | The main method drives the program. It gets the filename from the 225 | command line, and opens the initial files to write to. Then it calls the lexer. 226 | After the lexer returns, the main method finishes out the report file, 227 | closes all of the open files, and prints out to the command line to let the 228 | user know it is finished. 229 | ****************************************************/ 230 | 231 | int c; 232 | yydebug = 0; 233 | 234 | /* the gnu getopt library is used to parse the command line for flags 235 | afterwards, the final option is assumed to be the input file */ 236 | 237 | while (1) { 238 | static struct option long_options[] = { 239 | /* These options set a flag. */ 240 | {"help", no_argument, &help_flag, 1}, 241 | /* These options don't set a flag. We distinguish them by their indices. */ 242 | 243 | {"useStdOut", no_argument, 0, 'o'}, 244 | {0, 0, 0, 0} 245 | }; 246 | /* getopt_long stores the option index here. */ 247 | int option_index = 0; 248 | c = getopt_long (argc, argv, "hod", 249 | long_options, &option_index); 250 | 251 | /* Detect the end of the options. */ 252 | if (c == -1) 253 | break; 254 | 255 | switch (c) { 256 | case 0: 257 | /* If this option set a flag, do nothing else now. */ 258 | if (long_options[option_index].flag != 0) 259 | break; 260 | printf ("option %s", long_options[option_index].name); 261 | if (optarg) 262 | printf (" with arg %s", optarg); 263 | printf ("\n"); 264 | break; 265 | 266 | case 'h': 267 | help_flag = 1; 268 | break; 269 | 270 | case 'd': 271 | yydebug = 1; 272 | break; 273 | 274 | case 'o': 275 | output_flag = 1; 276 | break; 277 | 278 | case '?': 279 | /* getopt_long already printed an error message. */ 280 | break; 281 | 282 | default: 283 | abort (); 284 | } 285 | } 286 | 287 | if (help_flag == 1) { 288 | printf("proper syntax is: example [OPTIONS]... INFILE OUTFILE\n"); 289 | printf("Parses input file to collect and mark 'statements'.\n\n"); 290 | printf("Option list: \n"); 291 | printf("-d turns on debug info\n"); 292 | printf("-o sets output to stdout\n"); 293 | printf("--help print help to screen\n"); 294 | printf("\n"); 295 | printf("If infile is left out, then stdin is used for input.\n"); 296 | printf("If outfile is a filename, then that file is used for output.\n"); 297 | printf("If there is no outfile, then infile-EDIT is used.\n"); 298 | printf("There cannot be an outfile without an infile.\n"); 299 | return 0; 300 | } 301 | 302 | /* get the filename off the command line and redirect it to input 303 | if there is no filename then use stdin */ 304 | 305 | 306 | if (optind < argc) { 307 | FILE *file; 308 | 309 | file = fopen(argv[optind], "rb"); 310 | if (!file) { 311 | fprintf (stderr, "%s: Couldn't open file %s; %s\n", argv[0], argv[optind], strerror (errno)); 312 | exit(errno); 313 | } 314 | yyin = file; 315 | strcpy(inputName, argv[optind]); 316 | } 317 | else { 318 | printf("no input file set, using stdin. Press ctrl-c to quit"); 319 | yyin = stdin; 320 | strcpy(inputName, "\b\b\b\b\bagainst stdin"); 321 | } 322 | 323 | /* increment current place in argument list */ 324 | optind++; 325 | 326 | 327 | /******************************************** 328 | if no input name, then output set to stdout 329 | if no output name then copy input name and add -EDIT.csv 330 | otherwise use output name 331 | 332 | *********************************************/ 333 | if (optind > argc) { 334 | outfile = stdout; 335 | badStatements = stderr; 336 | } 337 | else if (output_flag == 1) { 338 | outfile = stdout; 339 | badStatements = stderr; 340 | } 341 | else if (optind < argc){ 342 | outfile = fopen(argv[optind], "wb"); 343 | if (!outfile) { 344 | fprintf (stderr, "%s: Couldn't open file %s; %s\n", argv[0], argv[optind], strerror (errno)); 345 | exit(errno); 346 | } 347 | 348 | strncpy(BadFileName, argv[optind], strlen(argv[optind])-4); 349 | FileName[strlen(argv[optind])-4] = '\0'; 350 | strcat(BadFileName, "-BadStatements.dat"); 351 | badStatements = fopen(BadFileName, "wb"); 352 | if (!badStatements) { 353 | fprintf (stderr, "%s: Couldn't open file %s; %s\n", argv[0], BadFileName, strerror (errno)); 354 | exit(errno); 355 | } 356 | } 357 | else { 358 | strncpy(FileName, argv[optind-1], strlen(argv[optind-1])-4); 359 | FileName[strlen(argv[optind-1])-4] = '\0'; 360 | strcat(FileName, "-EDIT.dat"); 361 | outfile = fopen(FileName, "wb"); 362 | if (!outfile) { 363 | fprintf (stderr, "%s: Couldn't open file %s; %s\n", argv[0], FileName, strerror (errno)); 364 | exit(errno); 365 | } 366 | 367 | strncpy(BadFileName, argv[optind-1], strlen(argv[optind-1])-4); 368 | FileName[strlen(argv[optind-1])-4] = '\0'; 369 | strcat(BadFileName, "-BadStatements.dat"); 370 | badStatements = fopen(BadFileName, "wb"); 371 | if (!badStatements) { 372 | fprintf (stderr, "%s: Couldn't open file %s; %s\n", argv[0], BadFileName, strerror (errno)); 373 | exit(errno); 374 | } 375 | } 376 | 377 | yyout = outfile; 378 | fprintf(yyout, "header\tfield2\tfield3\tfield4\tfield5\tfield6\tfield7\tfield8\tfield9\tfield10\tfield11\tfield12\tfield13\tfield14\tfield15\r\n"); 379 | yyparse(); 380 | if (output_flag == 0) { 381 | fclose(yyout); 382 | fclose(badStatements); 383 | } 384 | printf("Flex program finished running file %s\n", inputName); 385 | return 0; 386 | } 387 | 388 | --------------------------------------------------------------------------------