├── Makefile ├── README ├── calc.c ├── combinator.c └── combinator.h /Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-O2 3 | 4 | all: combinator.o calc.c 5 | $(CC) $(CFLAGS) calc.c combinator.o -o calc 6 | 7 | combinator.o: combinator.c combinator.h 8 | $(CC) $(CFLAGS) -c combinator.c -o combinator.o 9 | 10 | clean: 11 | rm -f combinator.o calc 12 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Combinators in C 2 | ================ 3 | 4 | This is a sketch of parser combinators in C (plus some hacks). 5 | 6 | The idea is that we build up a function that parses a given grammar by putting "combinators" together. 7 | 8 | We only define a few combinators here: 9 | 10 | * And( comb1, comb2 ) : constructs the combinator which parses comb1 followed by comb2 11 | 12 | * Or ( comb1, comb2 ) : constructs the combinators which parses comb1 or comb2 13 | 14 | * Lloop ( comb1, comb2 ) : constructs the combinator which parses a list of comb1's separated by comb2's; this combinator is for use when comb2 parses a left associative operator (such as '+') 15 | 16 | We also have some functions for parsing literals: 17 | 18 | * Integer() : constructs a combinator which parses an integer 19 | 20 | * Match( str ) : constructs the combinator which accepts the string str 21 | 22 | * Expect( str, err ) : constructs the combinator which accepts the string, str, if present, else it prints the given error mesage, err, and aborts 23 | 24 | The combinators are themselves defined using closures, i.e. structs which contain a function to call, plus data which is to be passed to that function (in this case, other combinators). 25 | 26 | Warning: there are many things missing in this code. For example there is no garbage collection, so the code leaks memory. Also combinators for options and lists are not present. 27 | 28 | Calc 29 | ==== 30 | 31 | The Calc program is a very simple example of how to use the combinators. 32 | 33 | To build it, simply type make. To run it, type ./calc. 34 | 35 | You can evaluate simple expressions involving: 36 | 37 | * Integers (unsigned at present) 38 | 39 | * +, -, *, / 40 | 41 | * parentheses () 42 | 43 | CTRL-D to quit. 44 | 45 | E.g. 46 | 47 | > 1 + (2*3-4+5)*7; 48 | 50 49 | 50 | -------------------------------------------------------------------------------- /calc.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include "combinator.h" 29 | 30 | ast_t * eval(ast_t * a) 31 | { 32 | if (a == NULL) 33 | exception("missing operand\n"); 34 | 35 | switch (a->typ) 36 | { 37 | case T_INT: 38 | break; 39 | case T_ADD: 40 | eval(a->child); 41 | eval(a->child->next); 42 | a->l = a->child->l + a->child->next->l; 43 | break; 44 | case T_SUB: 45 | eval(a->child); 46 | eval(a->child->next); 47 | a->l = a->child->l - a->child->next->l; 48 | break; 49 | case T_MUL: 50 | eval(a->child); 51 | eval(a->child->next); 52 | a->l = a->child->l * a->child->next->l; 53 | break; 54 | case T_DIV: 55 | eval(a->child); 56 | eval(a->child->next); 57 | a->l = a->child->l / a->child->next->l; 58 | break; 59 | default: 60 | exception("Unknown tag in AST node\n"); 61 | } 62 | 63 | return a; 64 | } 65 | 66 | int main(void) 67 | { 68 | ast_t * a; 69 | 70 | input_t in = { NULL, 0, 0, 0 }; 71 | 72 | Combinator(Expr); 73 | Combinator(Term); 74 | Combinator(Factor); 75 | Combinator(Stmt); 76 | 77 | char * msg1 = "Error: \")\" expected!\n"; 78 | 79 | Factor = Or( 80 | Integer(), 81 | And( 82 | And( 83 | Match(T_NULL, "("), 84 | Expr 85 | ), 86 | Expect(T_NULL, ")", msg1) 87 | ) 88 | ); 89 | 90 | Term = Lloop( 91 | Factor, 92 | Or( 93 | Match(T_MUL, "*"), 94 | Match(T_DIV, "/") 95 | ) 96 | ); 97 | 98 | Expr = Lloop( 99 | Term, 100 | Or( 101 | Match(T_ADD, "+"), 102 | Match(T_SUB, "-") 103 | ) 104 | ); 105 | 106 | Stmt = And( 107 | Expr, 108 | Match(T_NULL, ";") 109 | ); 110 | 111 | printf("Welcome to Calc v1.0\n\n"); 112 | printf("> "); 113 | 114 | while (a = parse(&in, Stmt)) 115 | { 116 | printf("%ld\n", eval(a)->l); 117 | printf("\n> "); 118 | } 119 | 120 | return 0; 121 | } 122 | 123 | -------------------------------------------------------------------------------- /combinator.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include "combinator.h" 31 | 32 | void exception(char * err) 33 | { 34 | fprintf(stderr, err); 35 | abort(); 36 | } 37 | 38 | ast_t * new_ast(tag_t typ, ast_t * child, ast_t * next) 39 | { 40 | ast_t * ast = malloc(sizeof(ast_t)); 41 | 42 | ast->typ = typ; 43 | ast->child = child; 44 | ast->next = next; 45 | 46 | return ast; 47 | } 48 | 49 | char read1(input_t * in) 50 | { 51 | if (in->start < in->length) 52 | return in->input[in->start++]; 53 | 54 | if (in->alloc == in->length) 55 | { 56 | in->input = realloc(in->input, in->alloc + 50); 57 | in->alloc += 50; 58 | } 59 | 60 | in->start++; 61 | return in->input[in->length++] = getchar(); 62 | } 63 | 64 | void skip_whitespace(input_t * in) 65 | { 66 | char c; 67 | 68 | while ((c = read1(in)) == ' ' || c == '\n' || c == '\t') ; 69 | 70 | in->start--; 71 | } 72 | 73 | ast_t * match(input_t * in, tag_t tag, char * str) 74 | { 75 | int start = in->start; 76 | int i = 0, len = strlen(str); 77 | 78 | skip_whitespace(in); 79 | 80 | while (i < len && str[i] == read1(in)) i++; 81 | 82 | if (i != len) 83 | { 84 | in->start = start; 85 | return NULL; 86 | } 87 | 88 | return new_ast(tag, NULL, NULL); 89 | } 90 | 91 | ast_t * expect(input_t * in, tag_t tag, char * str, char * err) 92 | { 93 | ast_t * ast; 94 | 95 | if (!(ast = match(in, tag, str))) 96 | exception(err); 97 | 98 | return ast; 99 | } 100 | 101 | ast_t * parse(input_t * in, comb_t * c) 102 | { 103 | switch (c->type) 104 | { 105 | case C_COMB: 106 | return c->clos->comb(in, c->clos->cl1, c->clos->cl2); 107 | case C_MATCH: 108 | return match(in, c->tag, c->str); 109 | case C_EXPECT: 110 | return expect(in, c->tag, c->s2.str, c->s2.err); 111 | case C_LIT: 112 | return c->lit(in); 113 | case C_FORWARD: 114 | return parse(in, *c->forward); 115 | default: 116 | printf("tag = %d\n", c->type); 117 | exception("Unknown tag in parse()\n"); 118 | } 119 | } 120 | 121 | ast_t * comb_and(input_t * in, comb_t * c1, comb_t * c2) 122 | { 123 | ast_t * a1, * a2; 124 | int start = in->start; 125 | 126 | if ((a1 = parse(in, c1)) && (a2 = parse(in, c2))) 127 | { 128 | if (c1->tag != T_NULL) 129 | { 130 | if (c2->tag != T_NULL) 131 | { 132 | a1->next = a2; 133 | return a1; 134 | } else 135 | return a1; 136 | } 137 | if (c2->tag != T_NULL) 138 | return a2; 139 | } 140 | 141 | in->start = start; 142 | return NULL; 143 | } 144 | 145 | ast_t * comb_or(input_t * in, comb_t * c1, comb_t * c2) 146 | { 147 | ast_t * a; 148 | int start = in->start; 149 | 150 | if (a = parse(in, c1)) 151 | return a; 152 | 153 | in->start = start; 154 | 155 | if (a = parse(in, c2)) 156 | return a; 157 | 158 | in->start = start; 159 | return NULL; 160 | } 161 | 162 | ast_t * comb_laloop(input_t * in, comb_t * c1, comb_t * c2) 163 | { 164 | ast_t * a, * b, * t, *op; 165 | int start = in->start; 166 | 167 | if (a = parse(in, c1)) 168 | { 169 | start = in->start; 170 | while (op = parse(in, c2)) 171 | { 172 | b = parse(in, c1); 173 | a->next = b; 174 | op->child = a; 175 | a = op; 176 | start = in->start; 177 | } 178 | 179 | in->start = start; 180 | return a; 181 | } 182 | 183 | in->start = start; 184 | return NULL; 185 | } 186 | 187 | comb_t * Match(tag_t tag, char * str) 188 | { 189 | comb_t * c = malloc(sizeof(comb_t)); 190 | 191 | c->type = C_MATCH; 192 | c->tag = tag; 193 | c->str = str; 194 | 195 | return c; 196 | } 197 | 198 | comb_t * Expect(tag_t tag, char * str, char * err) 199 | { 200 | comb_t * c = malloc(sizeof(comb_t)); 201 | 202 | c->type = C_EXPECT; 203 | c->tag = tag; 204 | c->s2.str = str; 205 | c->s2.err = err; 206 | 207 | return c; 208 | } 209 | 210 | comb_t * Comb(ast_t * (*comb)(input_t *, struct comb_t *, struct comb_t *), 211 | comb_t * cl1, comb_t * cl2) 212 | { 213 | closure_t * clos = malloc(sizeof(closure_t)); 214 | 215 | clos->comb = comb; 216 | clos->cl1 = cl1; 217 | clos->cl2 = cl2; 218 | 219 | comb_t * c = malloc(sizeof(comb_t)); 220 | 221 | c->type = C_COMB; 222 | c->tag = T_COMB; 223 | c->clos = clos; 224 | 225 | return c; 226 | } 227 | 228 | #define str_insert(s, c) \ 229 | do { \ 230 | if (i == alloc) { \ 231 | s = realloc(s, alloc + 10); \ 232 | alloc += 10; \ 233 | } \ 234 | str[i++] = c; \ 235 | } while (0) 236 | 237 | ast_t * integer(input_t * in) 238 | { 239 | int start = in->start; 240 | char c; 241 | int i = 0, alloc = 0; 242 | char * str = NULL; 243 | ast_t * a; 244 | 245 | skip_whitespace(in); 246 | 247 | if ((c = read1(in)) >= '1' && c <= '9') 248 | { 249 | str_insert(str, c); 250 | 251 | start = in->start; 252 | while (isdigit(c = read1(in))) 253 | { 254 | str_insert(str, c); 255 | start = in->start; 256 | } 257 | in->start = start; 258 | 259 | str_insert(str, '\0'); 260 | 261 | a = new_ast(T_INT, NULL, NULL); 262 | a->l = atol(str); 263 | free(str); 264 | 265 | return a; 266 | } else if (c == '0') 267 | { 268 | str_insert(str, c); 269 | 270 | str_insert(str, '\0'); 271 | a = new_ast(T_INT, NULL, NULL); 272 | a->l = atol(str); 273 | free(str); 274 | 275 | return a; 276 | } 277 | 278 | in->start = start; 279 | return NULL; 280 | } 281 | 282 | comb_t * Integer() 283 | { 284 | comb_t * c = malloc(sizeof(comb_t)); 285 | 286 | c->type = C_LIT; 287 | c->tag = T_INT; 288 | c->lit = integer; 289 | 290 | return c; 291 | } 292 | 293 | comb_t * forward(comb_t ** comb) 294 | { 295 | comb_t * c = malloc(sizeof(comb_t)); 296 | 297 | c->type = C_FORWARD; 298 | c->tag = T_FORWARD; 299 | c->forward = comb; 300 | 301 | return c; 302 | } 303 | -------------------------------------------------------------------------------- /combinator.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | typedef struct 28 | { 29 | char * input; 30 | int alloc; 31 | int length; 32 | int start; 33 | } input_t; 34 | 35 | typedef enum 36 | { 37 | C_COMB, C_MATCH, C_EXPECT, C_LIT, C_FORWARD 38 | } ctag_t; 39 | 40 | typedef enum 41 | { 42 | T_NULL, T_INT, T_MUL, T_ADD, T_SUB, T_DIV, T_FORWARD, T_COMB 43 | } tag_t; 44 | 45 | typedef struct ast_t 46 | { 47 | tag_t typ; 48 | struct ast_t * child; 49 | struct ast_t * next; 50 | char * str; 51 | long l; 52 | } ast_t; 53 | 54 | struct comb_t; 55 | 56 | typedef struct 57 | { 58 | ast_t * (*comb)(input_t *, struct comb_t *, struct comb_t *); 59 | struct comb_t * cl1; 60 | struct comb_t * cl2; 61 | } closure_t; 62 | 63 | typedef struct 64 | { 65 | char * str; 66 | char * err; 67 | } str2_t; 68 | 69 | typedef struct comb_t 70 | { 71 | ctag_t type; 72 | tag_t tag; 73 | union 74 | { 75 | closure_t * clos; 76 | char * str; 77 | str2_t s2; 78 | ast_t * (*lit)(input_t *); 79 | struct comb_t ** forward; 80 | }; 81 | } comb_t; 82 | 83 | comb_t * forward(comb_t ** comb); 84 | 85 | comb_t * Integer(); 86 | 87 | ast_t * match(input_t * in, tag_t tag, char * str); 88 | 89 | ast_t * expect(input_t * in, tag_t tag, char * str, char * err); 90 | 91 | ast_t * comb_or(input_t * in, comb_t * c1, comb_t * c2); 92 | 93 | ast_t * comb_and(input_t * in, comb_t * c1, comb_t * c2); 94 | 95 | ast_t * comb_laloop(input_t * in, comb_t * c1, comb_t * c2); 96 | 97 | comb_t * Match(tag_t tag, char * str); 98 | 99 | comb_t * Expect(tag_t tag, char * str, char * err); 100 | 101 | comb_t * Comb(ast_t * (*comb)(input_t *, struct comb_t *, struct comb_t *), 102 | comb_t * cl1, comb_t * cl2); 103 | 104 | ast_t * parse(input_t * in, comb_t * c); 105 | 106 | #define And(x, y) Comb(comb_and, x, y) 107 | #define Or(x, y) Comb(comb_or, x, y) 108 | #define Lloop(x, y) Comb(comb_laloop, x, y) 109 | 110 | #define Combinator(x) \ 111 | comb_t * x; \ 112 | x = forward(&x); 113 | --------------------------------------------------------------------------------