├── LICENSE ├── Makefile ├── README ├── ast.c ├── ast.h ├── backend.c ├── backend.h ├── cesium.c ├── exception.c ├── exception.h ├── input.c ├── input.h ├── parser.c ├── parser.h ├── symbol.c ├── symbol.h ├── types.c └── types.h /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2012 William Hart. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are 4 | permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of 7 | conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | of conditions and the following disclaimer in the documentation and/or other materials 11 | provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 16 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | INC=-I/home/wbhart/gc/include 2 | LIB=-L/home/wbhart/gc/lib 3 | OBJS=backend.o types.o symbol.o input.o ast.o exception.o parser.o 4 | HEADERS=ast.h exception.h parser.h input.h symbol.h types.h backend.h 5 | 6 | cesium: cesium.c $(HEADERS) $(OBJS) 7 | gcc -O2 -o cesium cesium.c $(INC) $(OBJS) $(LIB) -lgc 8 | 9 | ast.o: ast.c $(HEADERS) 10 | gcc -c -O2 -o ast.o ast.c $(INC) 11 | 12 | exception.o: exception.c $(HEADERS) 13 | gcc -c -O2 -o exception.o exception.c $(INC) 14 | 15 | parser.o: parser.c $(HEADERS) 16 | gcc -c -O2 -o parser.o parser.c $(INC) 17 | 18 | input.o: input.c $(HEADERS) 19 | gcc -c -O2 -o input.o input.c $(INC) 20 | 21 | symbol.o: symbol.c $(HEADERS) 22 | gcc -c -O2 -o symbol.o symbol.c $(INC) 23 | 24 | types.o: types.c $(HEADERS) 25 | gcc -c -O2 -o types.o types.c $(INC) 26 | 27 | backend.o: backend.c $(HEADERS) 28 | gcc -c -O2 -o backend.o backend.c $(INC) 29 | 30 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Cesium v 0.3: 2 | ============= 3 | 4 | Dependencies: 5 | ------------- 6 | 7 | * Boehm-Demers-Weiser Garbage Collection 8 | 9 | Build: 10 | ------ 11 | 12 | Update directory paths at the top of the Makefile then type: 13 | 14 | make 15 | 16 | To run, simply type: 17 | 18 | ./cesium 19 | 20 | Introduction: 21 | ------------- 22 | 23 | Cesium is a language with the following features: 24 | 25 | * LLVMJit backend for performance 26 | * Local type inference 27 | * C foreign function interface 28 | * Syntax and expression macros 29 | * Garbage collection 30 | * Imperative and functional styles 31 | * Parameterised types 32 | * Ad hoc and parametric polymorphism 33 | 34 | Progress: 35 | --------- 36 | 37 | * Parser combinators - almost done 38 | * Symbol Hash table - not done 39 | * Parser - not done 40 | * Environments - not done 41 | * Type system - not done 42 | * Back end - not done 43 | * Closures - not done 44 | * Type inference - not done 45 | * FFI - not done 46 | * Module system - not done 47 | 48 | -------------------------------------------------------------------------------- /ast.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "ast.h" 28 | 29 | ast_t * ast_nil; 30 | 31 | ast_t * new_ast() 32 | { 33 | ast_t * ast = GC_MALLOC(sizeof(ast_t)); 34 | 35 | return ast; 36 | } 37 | 38 | void ast_init() 39 | { 40 | ast_nil = new_ast(); 41 | ast_nil->typ = T_NONE; 42 | } 43 | 44 | ast_t * ast1(tag_t typ, ast_t * a1) 45 | { 46 | ast_t * ast = new_ast(); 47 | ast->typ = typ; 48 | ast->child = a1; 49 | return ast; 50 | } 51 | 52 | ast_t * ast2(tag_t typ, ast_t * a1, ast_t * a2) 53 | { 54 | ast_t * ast = new_ast(); 55 | ast->typ = typ; 56 | ast->child = a1; 57 | ast->child->next = a2; 58 | return ast; 59 | } -------------------------------------------------------------------------------- /ast.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "gc.h" 28 | #include "symbol.h" 29 | 30 | #ifndef AST_H 31 | #define AST_H 32 | 33 | typedef enum 34 | { 35 | T_NONE, T_LIST, T_INT, T_ADD, T_SUB, T_MUL, T_DIV, T_REM, T_IDENT 36 | } tag_t; 37 | 38 | typedef struct ast_t 39 | { 40 | tag_t typ; 41 | struct ast_t * child; 42 | struct ast_t * next; 43 | sym_t * sym; 44 | } ast_t; 45 | 46 | ast_t * new_ast(); 47 | 48 | ast_t * ast1(tag_t typ, ast_t * a1); 49 | 50 | ast_t * ast2(tag_t typ, ast_t * a1, ast_t * a2); 51 | 52 | #endif -------------------------------------------------------------------------------- /backend.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "backend.h" 28 | 29 | 30 | -------------------------------------------------------------------------------- /backend.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include 29 | #include "gc.h" 30 | 31 | #ifndef BACKEND_H 32 | #define BACKEND_H 33 | 34 | 35 | 36 | #endif 37 | 38 | -------------------------------------------------------------------------------- /cesium.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include "parser.h" 29 | #include "types.h" 30 | #include "backend.h" 31 | 32 | extern jmp_buf exc; 33 | 34 | int main(void) 35 | { 36 | ast_t * a; 37 | input_t * in = new_input(); 38 | int jval; 39 | 40 | ast_init(); 41 | sym_tab_init(); 42 | types_init(); 43 | 44 | printf("Welcome to Cesium v0.3\n\n"); 45 | printf("> "); 46 | 47 | combinator_t * stmt = new_combinator(); 48 | combinator_t * exp = new_combinator(); 49 | combinator_t * paren = new_combinator(); 50 | combinator_t * base = new_combinator(); 51 | 52 | seq(paren, T_LIST, 53 | match("("), 54 | exp, 55 | match(")"), 56 | NULL); 57 | 58 | multi(base, T_NONE, 59 | capture(T_INT, integer()), 60 | paren, 61 | NULL); 62 | 63 | expr(exp, base); 64 | 65 | expr_insert(exp, 0, T_ADD, EXPR_INFIX, ASSOC_LEFT, match("+")); 66 | expr_altern(exp, 0, T_SUB, match("-")); 67 | 68 | expr_insert(exp, 1, T_MUL, EXPR_INFIX, ASSOC_LEFT, match("*")); 69 | expr_altern(exp, 1, T_DIV, match("/")); 70 | expr_altern(exp, 1, T_REM, match("%")); 71 | 72 | seq(stmt, T_NONE, 73 | exp, 74 | match(";"), 75 | NULL); 76 | 77 | while (1) 78 | { 79 | if (!(jval = setjmp(exc))) 80 | { 81 | a = parse(in, stmt); 82 | if (!a) break; 83 | } else 84 | { 85 | while (read1(in) != '\n') ; 86 | } 87 | 88 | printf("\n> "); 89 | in->start = 0; 90 | in->length = 0; 91 | } 92 | 93 | printf("\n"); 94 | 95 | return 0; 96 | 97 | } 98 | -------------------------------------------------------------------------------- /exception.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "exception.h" 28 | 29 | jmp_buf exc; 30 | 31 | void exception(char * err) 32 | { 33 | fprintf(stderr, err); 34 | 35 | longjmp(exc, 1); 36 | } -------------------------------------------------------------------------------- /exception.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | #ifndef EXCEPTION_H 31 | #define EXCEPTION_H 32 | 33 | void exception(char * err); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /input.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "input.h" 28 | 29 | input_t * new_input() 30 | { 31 | input_t * in = GC_MALLOC(sizeof(input_t)); 32 | 33 | in->input = NULL; 34 | in->alloc = 0; 35 | in->length = 0; 36 | in->start = 0; 37 | 38 | return in; 39 | } 40 | 41 | char read1(input_t * in) 42 | { 43 | if (in->start < in->length) 44 | return in->input[in->start++]; 45 | 46 | if (in->alloc == in->length) 47 | { 48 | in->input = realloc(in->input, in->alloc + 50); 49 | in->alloc += 50; 50 | } 51 | 52 | in->start++; 53 | return in->input[in->length++] = getchar(); 54 | } 55 | 56 | void skip_whitespace(input_t * in) 57 | { 58 | char c; 59 | 60 | while ((c = read1(in)) == ' ' || c == '\n' || c == '\t') ; 61 | 62 | in->start--; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /input.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include "gc.h" 29 | 30 | #ifndef INPUT_H 31 | #define INPUT_H 32 | 33 | typedef struct 34 | { 35 | char * input; 36 | int alloc; 37 | int length; 38 | int start; 39 | } input_t; 40 | 41 | input_t * new_input(); 42 | 43 | char read1(input_t * in); 44 | 45 | void skip_whitespace(input_t * in); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /parser.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include "parser.h" 29 | 30 | extern ast_t * ast_nil; 31 | 32 | combinator_t * new_combinator() 33 | { 34 | combinator_t * c = GC_MALLOC(sizeof(combinator_t)); 35 | 36 | c->fn = NULL; 37 | c->args = NULL; 38 | 39 | return c; 40 | } 41 | 42 | ast_t * match_fn(input_t * in, void * args) 43 | { 44 | char * str = ((match_args *) args)->str; 45 | 46 | int start = in->start; 47 | int i = 0, len = strlen(str); 48 | 49 | skip_whitespace(in); 50 | 51 | while (i < len && str[i] == read1(in)) i++; 52 | 53 | if (i != len) 54 | { 55 | in->start = start; 56 | return NULL; 57 | } 58 | 59 | return ast_nil; 60 | } 61 | 62 | combinator_t * match(char * str) 63 | { 64 | match_args * args = GC_MALLOC(sizeof(match_args)); 65 | args->str = str; 66 | 67 | combinator_t * comb = new_combinator(); 68 | comb->fn = match_fn; 69 | comb->args = args; 70 | 71 | return comb; 72 | } 73 | 74 | ast_t * expect_fn(input_t * in, void * args) 75 | { 76 | expect_args * eargs = (expect_args *) args; 77 | ast_t * ast; 78 | 79 | combinator_t * comb = eargs->comb; 80 | 81 | if (ast = parse(in, comb)) 82 | return ast; 83 | else 84 | exception(eargs->msg); 85 | 86 | return NULL; 87 | } 88 | 89 | combinator_t * expect(combinator_t * c, char * msg) 90 | { 91 | expect_args * args = GC_MALLOC(sizeof(expect_args)); 92 | args->msg = msg; 93 | args->comb = c; 94 | 95 | combinator_t * comb = new_combinator(); 96 | comb->fn = expect_fn; 97 | comb->args = (void *) args; 98 | 99 | return comb; 100 | } 101 | 102 | ast_t * exact_fn(input_t * in, void * args) 103 | { 104 | char * str = ((match_args *) args)->str; 105 | 106 | int start = in->start; 107 | int i = 0, len = strlen(str); 108 | 109 | while (i < len && str[i] == read1(in)) i++; 110 | 111 | if (i != len) 112 | { 113 | in->start = start; 114 | return NULL; 115 | } 116 | 117 | return ast_nil; 118 | } 119 | 120 | combinator_t * exact(char * str) 121 | { 122 | match_args * args = GC_MALLOC(sizeof(match_args)); 123 | args->str = str; 124 | 125 | combinator_t * comb = new_combinator(); 126 | comb->fn = exact_fn; 127 | comb->args = args; 128 | 129 | return comb; 130 | } 131 | 132 | ast_t * range_fn(input_t * in, void * args) 133 | { 134 | char * str = ((match_args *) args)->str; 135 | int start = in->start; 136 | 137 | char c = read1(in); 138 | 139 | if (str[0] <= c && str[1] >= c) 140 | return ast_nil; 141 | else 142 | { 143 | in->start = start; 144 | return NULL; 145 | } 146 | } 147 | 148 | combinator_t * range(char * str) 149 | { 150 | match_args * args = GC_MALLOC(sizeof(match_args)); 151 | args->str = str; 152 | 153 | if (strlen(str) != 2) 154 | exception("String not of length 2 in range\n"); 155 | 156 | combinator_t * comb = new_combinator(); 157 | comb->fn = range_fn; 158 | comb->args = args; 159 | 160 | return comb; 161 | } 162 | 163 | ast_t * alpha_fn(input_t * in, void * args) 164 | { 165 | int start = in->start; 166 | 167 | char c = read1(in); 168 | 169 | if (isalpha(c)) 170 | return ast_nil; 171 | else 172 | { 173 | in->start = start; 174 | return NULL; 175 | } 176 | } 177 | 178 | combinator_t * alpha() 179 | { 180 | combinator_t * comb = new_combinator(); 181 | comb->fn = alpha_fn; 182 | comb->args = NULL; 183 | 184 | return comb; 185 | } 186 | 187 | ast_t * digit_fn(input_t * in, void * args) 188 | { 189 | int start = in->start; 190 | 191 | char c = read1(in); 192 | 193 | if (isdigit(c)) 194 | return ast_nil; 195 | else 196 | { 197 | in->start = start; 198 | return NULL; 199 | } 200 | } 201 | 202 | combinator_t * digit() 203 | { 204 | combinator_t * comb = new_combinator(); 205 | comb->fn = digit_fn; 206 | comb->args = NULL; 207 | 208 | return comb; 209 | } 210 | 211 | ast_t * anything_fn(input_t * in, void * args) 212 | { 213 | int start = in->start; 214 | 215 | char c = read1(in); 216 | 217 | return ast_nil; 218 | } 219 | 220 | combinator_t * anything() 221 | { 222 | combinator_t * comb = new_combinator(); 223 | comb->fn = anything_fn; 224 | comb->args = NULL; 225 | 226 | return comb; 227 | } 228 | 229 | ast_t * integer_fn(input_t * in, void * args) 230 | { 231 | int start, len; 232 | char c, * text; 233 | 234 | ast_t * ast = new_ast(); 235 | 236 | skip_whitespace(in); 237 | 238 | start = in->start; 239 | 240 | c = read1(in); 241 | 242 | if (!isdigit(c)) 243 | { 244 | in->start = start; 245 | return NULL; 246 | } 247 | 248 | if (c == '0') 249 | { 250 | ast->typ = T_INT; 251 | 252 | ast->sym = sym_lookup("0"); 253 | 254 | return ast; 255 | } 256 | 257 | while (isdigit(c = read1(in))) ; 258 | in->start--; 259 | 260 | ast->typ = T_INT; 261 | 262 | len = in->start - start; 263 | text = GC_MALLOC(len + 1); 264 | 265 | strncpy(text, in->input + start, len); 266 | text[len] = '\0'; 267 | 268 | ast->sym = sym_lookup(text); 269 | 270 | return ast; 271 | } 272 | 273 | combinator_t * integer() 274 | { 275 | combinator_t * comb = new_combinator(); 276 | comb->fn = integer_fn; 277 | comb->args = NULL; 278 | 279 | return comb; 280 | } 281 | 282 | ast_t * cident_fn(input_t * in, void * args) 283 | { 284 | int start, len; 285 | char c, * text; 286 | 287 | ast_t * ast = new_ast(); 288 | 289 | skip_whitespace(in); 290 | 291 | start = in->start; 292 | 293 | c = read1(in); 294 | 295 | if (c != '_' && !isalpha(c)) 296 | { 297 | in->start = start; 298 | return NULL; 299 | } 300 | 301 | while ((c = read1(in)) == '_' || isalpha(c) || isdigit(c)) ; 302 | in->start--; 303 | 304 | ast->typ = T_IDENT; 305 | 306 | len = in->start - start; 307 | 308 | text = GC_MALLOC(len + 1); 309 | 310 | strncpy(text, in->input + start, len); 311 | text[len] = '\0'; 312 | 313 | ast->sym = sym_lookup(text); 314 | 315 | return ast; 316 | } 317 | 318 | combinator_t * cident() 319 | { 320 | combinator_t * comb = new_combinator(); 321 | comb->fn = cident_fn; 322 | comb->args = NULL; 323 | 324 | return comb; 325 | } 326 | 327 | seq_list * new_seq() 328 | { 329 | return GC_MALLOC(sizeof(seq_list)); 330 | } 331 | 332 | ast_t * seq_fn(input_t * in, void * args) 333 | { 334 | int start = in->start; 335 | seq_args * sa = (seq_args *) args; 336 | seq_list * seq = sa->list; 337 | 338 | ast_t * ret = new_ast(); 339 | ret->typ = sa->typ; 340 | 341 | ast_t * ptr = ret; 342 | 343 | while (seq != NULL) 344 | { 345 | ast_t * a = parse(in, seq->comb); 346 | if (a == NULL) 347 | { 348 | in->start = start; 349 | return NULL; 350 | } 351 | 352 | if (a != ast_nil) 353 | { 354 | ptr->next = a; 355 | ptr = ptr->next; 356 | } 357 | 358 | seq = seq->next; 359 | } 360 | 361 | if (sa->typ == T_NONE) 362 | return ret->next; 363 | else 364 | { 365 | ret->child = ret->next; 366 | ret->next = NULL; 367 | return ret; 368 | } 369 | } 370 | 371 | combinator_t * seq(combinator_t * ret, tag_t typ, combinator_t * c1, ...) 372 | { 373 | combinator_t * comb; 374 | seq_list * seq; 375 | seq_args * args; 376 | 377 | va_list ap; 378 | va_start(ap, c1); 379 | 380 | seq = new_seq(); 381 | seq->comb = c1; 382 | 383 | args = GC_MALLOC(sizeof(seq_args)); 384 | args->typ = typ; 385 | args->list = seq; 386 | ret->args = (void *) args; 387 | ret->fn = seq_fn; 388 | 389 | while ((comb = va_arg(ap, combinator_t *)) != NULL) 390 | { 391 | seq->next = new_seq(); 392 | seq = seq->next; 393 | seq->comb = comb; 394 | } 395 | 396 | va_end(ap); 397 | 398 | seq->next = NULL; 399 | 400 | return ret; 401 | } 402 | 403 | ast_t * multi_fn(input_t * in, void * args) 404 | { 405 | seq_list * seq = ((seq_args *) args)->list; 406 | tag_t typ = ((seq_args *) args)->typ; 407 | 408 | while (seq != NULL) 409 | { 410 | ast_t * a = parse(in, seq->comb); 411 | if (a != NULL) 412 | { 413 | if (typ == T_NONE) 414 | return a; 415 | 416 | ast_t * res = new_ast(); 417 | res->typ = typ; 418 | res->child = a; 419 | return res; 420 | } 421 | 422 | seq = seq->next; 423 | } 424 | 425 | return NULL; 426 | } 427 | 428 | combinator_t * multi(combinator_t * ret, tag_t typ, combinator_t * c1, ...) 429 | { 430 | combinator_t * comb; 431 | seq_list * seq; 432 | seq_args * args; 433 | 434 | va_list ap; 435 | va_start(ap, c1); 436 | 437 | seq = new_seq(); 438 | seq->comb = c1; 439 | 440 | args = GC_MALLOC(sizeof(seq_args)); 441 | args->typ = typ; 442 | args->list = seq; 443 | 444 | ret->args = (void *) args; 445 | ret->fn = multi_fn; 446 | 447 | while ((comb = va_arg(ap, combinator_t *)) != NULL) 448 | { 449 | seq->next = new_seq(); 450 | seq = seq->next; 451 | seq->comb = comb; 452 | } 453 | 454 | va_end(ap); 455 | 456 | seq->next = NULL; 457 | 458 | return ret; 459 | } 460 | 461 | ast_t * capture_fn(input_t * in, void * args) 462 | { 463 | capture_args * cap = (capture_args *) args; 464 | 465 | int start; 466 | 467 | skip_whitespace(in); 468 | 469 | start = in->start; 470 | if (parse(in, cap->comb)) 471 | { 472 | ast_t * a = new_ast(); 473 | int len = in->start - start; 474 | char * text = GC_MALLOC(len + 1); 475 | 476 | strncpy(text, in->input + start, len); 477 | text[len] = '\0'; 478 | 479 | a->typ = cap->typ; 480 | a->sym = sym_lookup(text); 481 | 482 | return a; 483 | } 484 | 485 | return NULL; 486 | } 487 | 488 | combinator_t * capture(tag_t typ, combinator_t * c) 489 | { 490 | capture_args * args = GC_MALLOC(sizeof(capture_args)); 491 | args->typ = typ; 492 | args->comb = c; 493 | 494 | combinator_t * comb = new_combinator(); 495 | comb->fn = capture_fn; 496 | comb->args = args; 497 | 498 | return comb; 499 | } 500 | 501 | ast_t * not_fn(input_t * in, void * args) 502 | { 503 | combinator_t * comb = (combinator_t *) args; 504 | int start = in->start; 505 | 506 | if (parse(in, comb)) 507 | { 508 | in->start = start; 509 | return NULL; 510 | } else 511 | return ast_nil; 512 | } 513 | 514 | combinator_t * not(combinator_t * c) 515 | { 516 | combinator_t * comb = new_combinator(); 517 | comb->fn = not_fn; 518 | comb->args = (void *) c; 519 | 520 | return comb; 521 | } 522 | 523 | ast_t * option_fn(input_t * in, void * args) 524 | { 525 | combinator_t * comb = (combinator_t *) args; 526 | ast_t * ast; 527 | int start = in->start; 528 | 529 | if (ast = parse(in, comb)) 530 | return ast; 531 | else 532 | return ast_nil; 533 | } 534 | 535 | combinator_t * option(combinator_t * c) 536 | { 537 | combinator_t * comb = new_combinator(); 538 | comb->fn = option_fn; 539 | comb->args = (void *) c; 540 | 541 | return comb; 542 | } 543 | 544 | ast_t * zeroplus_fn(input_t * in, void * args) 545 | { 546 | capture_args * cap = (capture_args *) args; 547 | combinator_t * comb = cap->comb; 548 | 549 | ast_t * ast; 550 | ast_t ** ptr = * 551 | 552 | while ((*ptr) = parse(in, comb)) 553 | ptr = &((*ptr)->next); 554 | 555 | if (ast == NULL) 556 | return ast_nil; 557 | else if (cap->typ == T_NONE) 558 | return ast; 559 | else 560 | { 561 | ast_t * res = new_ast(); 562 | res->typ = cap->typ; 563 | res->child = ast; 564 | return res; 565 | } 566 | } 567 | 568 | combinator_t * zeroplus(tag_t typ, combinator_t * c) 569 | { 570 | capture_args * args = GC_MALLOC(sizeof(capture_args)); 571 | args->typ = typ; 572 | args->comb = c; 573 | 574 | combinator_t * comb = new_combinator(); 575 | comb->fn = zeroplus_fn; 576 | comb->args = args; 577 | 578 | return comb; 579 | } 580 | 581 | ast_t * oneplus_fn(input_t * in, void * args) 582 | { 583 | capture_args * cap = (capture_args *) args; 584 | combinator_t * comb = cap->comb; 585 | 586 | ast_t * ast; 587 | ast_t ** ptr = * 588 | 589 | ast = parse(in, comb); 590 | if (!ast) 591 | return ast_nil; 592 | ptr = &(ast->next); 593 | 594 | while ((*ptr) = parse(in, comb)) 595 | ptr = &((*ptr)->next); 596 | 597 | if (cap->typ == T_NONE) 598 | return ast; 599 | else 600 | { 601 | ast_t * res = new_ast(); 602 | res->typ = cap->typ; 603 | res->child = ast; 604 | return res; 605 | } 606 | } 607 | 608 | combinator_t * oneplus(tag_t typ, combinator_t * c) 609 | { 610 | capture_args * args = GC_MALLOC(sizeof(capture_args)); 611 | args->typ = typ; 612 | args->comb = c; 613 | 614 | combinator_t * comb = new_combinator(); 615 | comb->fn = oneplus_fn; 616 | comb->args = args; 617 | 618 | return comb; 619 | } 620 | 621 | ast_t * expr_fn(input_t * in, void * args) 622 | { 623 | int alt; 624 | tag_t tag; 625 | op_t * op; 626 | expr_list * list = (expr_list *) args; 627 | 628 | if (list->fix == EXPR_BASE) 629 | return parse(in, list->comb); 630 | 631 | if (list->fix == EXPR_INFIX) 632 | { 633 | if (list->assoc == ASSOC_LEFT) 634 | { 635 | ast_t * lhs = expr_fn(in, (void *) list->next); 636 | if (!lhs) 637 | return NULL; 638 | 639 | while (1) 640 | { 641 | ast_t * rhs; 642 | 643 | op = list->op; 644 | while (op) 645 | { 646 | if (parse(in, op->comb)) 647 | break; 648 | op = op->next; 649 | } 650 | if (!op) break; 651 | 652 | rhs = expr_fn(in, (void *) list->next); 653 | if (!rhs) 654 | exception("Expression expected!\n"); 655 | 656 | lhs = ast2(op->tag, lhs, rhs); 657 | } 658 | 659 | return lhs; 660 | } else if (list->assoc == ASSOC_RIGHT) 661 | { 662 | ast_t * lhs = expr_fn(in, (void *) list->next); 663 | ast_t ** ptr; 664 | 665 | if (!lhs) 666 | return NULL; 667 | 668 | ptr = &lhs; 669 | 670 | while (1) 671 | { 672 | ast_t * rhs; 673 | 674 | op = list->op; 675 | while (op) 676 | { 677 | if (parse(in, op->comb)) 678 | break; 679 | op = op->next; 680 | } 681 | if (!op) break; 682 | 683 | rhs = expr_fn(in, (void *) list->next); 684 | if (!rhs) 685 | exception("Expression expected!\n"); 686 | 687 | (*ptr) = ast2(op->tag, *ptr, rhs); 688 | ptr = &((*ptr)->child->next); 689 | } 690 | 691 | return lhs; 692 | } else 693 | exception("Invalid associativity for infix operator\n"); 694 | } else if (list->fix == EXPR_PREFIX) 695 | { 696 | ast_t * rhs; 697 | 698 | op = list->op; 699 | while (op) 700 | { 701 | if (parse(in, op->comb)) 702 | break; 703 | op = op->next; 704 | } 705 | 706 | rhs = expr_fn(in, (void *) list->next); 707 | if (op && !rhs) 708 | exception("Expression expected!\n"); 709 | 710 | if (op) 711 | return ast1(op->tag, rhs); 712 | else 713 | return rhs; 714 | } else if (list->fix == EXPR_POSTFIX) 715 | { 716 | ast_t * lhs = expr_fn(in, (void *) list->next); 717 | if (!lhs) 718 | return NULL; 719 | 720 | op = list->op; 721 | while (op) 722 | { 723 | if (parse(in, op->comb)) 724 | break; 725 | op = op->next; 726 | } 727 | 728 | if (op) 729 | return ast1(op->tag, lhs); 730 | else 731 | return lhs; 732 | } 733 | } 734 | 735 | combinator_t * expr(combinator_t * exp, combinator_t * base) 736 | { 737 | expr_list * args = GC_MALLOC(sizeof(expr_list)); 738 | args->next = NULL; 739 | args->fix = EXPR_BASE; 740 | args->comb = base; 741 | 742 | exp->fn = expr_fn; 743 | exp->args = args; 744 | 745 | return exp; 746 | } 747 | 748 | void expr_insert(combinator_t * expr, int prec, tag_t tag, expr_fix fix, 749 | expr_assoc assoc, combinator_t * comb) 750 | { 751 | expr_list * list = (expr_list *) expr->args; 752 | int i; 753 | 754 | expr_list * node = GC_MALLOC(sizeof(expr_list)); 755 | op_t * op = GC_MALLOC(sizeof(op_t)); 756 | 757 | op->tag = tag; 758 | op->comb = comb; 759 | node->op = op; 760 | node->fix = fix; 761 | node->assoc = assoc; 762 | 763 | if (prec == 0) 764 | { 765 | node->next = list; 766 | expr->args = (void *) node; 767 | return; 768 | } 769 | 770 | for (i = 0; list != NULL && i < prec - 1; i++) 771 | list = list->next; 772 | 773 | if (list->fix == EXPR_BASE || list == NULL) 774 | exception("Invalid precedence for expression\n"); 775 | 776 | node->next = list->next; 777 | list->next = node; 778 | } 779 | 780 | void expr_altern(combinator_t * expr, int prec, tag_t tag, combinator_t * comb) 781 | { 782 | op_t * op = GC_MALLOC(sizeof(op_t)); 783 | expr_list * list = (expr_list *) expr->args; 784 | int i; 785 | 786 | for (i = 0; list != NULL && i < prec; i++) 787 | list = list->next; 788 | 789 | if (list->fix == EXPR_BASE || list == NULL) 790 | exception("Invalid precedence for expression\n"); 791 | 792 | op->tag = tag; 793 | op->comb = comb; 794 | op->next = list->op; 795 | list->op = op; 796 | } 797 | 798 | ast_t * parse(input_t * in, combinator_t * comb) 799 | { 800 | return comb->fn(in, (void *)comb->args); 801 | } 802 | -------------------------------------------------------------------------------- /parser.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include "input.h" 29 | #include "exception.h" 30 | #include "symbol.h" 31 | #include "ast.h" 32 | #include "gc.h" 33 | 34 | #ifndef PARSER_H 35 | #define PARSER_H 36 | 37 | typedef ast_t * (*comb_fn)(input_t *, void *); 38 | 39 | typedef struct 40 | { 41 | comb_fn fn; 42 | void * args; 43 | } combinator_t; 44 | 45 | typedef struct 46 | { 47 | char * str; 48 | } match_args; 49 | 50 | typedef struct 51 | { 52 | combinator_t * comb; 53 | char * msg; 54 | } expect_args; 55 | 56 | typedef struct seq_list 57 | { 58 | combinator_t * comb; 59 | struct seq_list * next; 60 | } seq_list; 61 | 62 | typedef struct 63 | { 64 | tag_t typ; 65 | seq_list * list; 66 | } seq_args; 67 | 68 | typedef struct 69 | { 70 | tag_t typ; 71 | combinator_t * comb; 72 | } capture_args; 73 | 74 | typedef enum 75 | { 76 | EXPR_BASE, EXPR_INFIX, EXPR_PREFIX, EXPR_POSTFIX 77 | } expr_fix; 78 | 79 | typedef enum 80 | { 81 | ASSOC_LEFT, ASSOC_RIGHT, ASSOC_NONE 82 | } expr_assoc; 83 | 84 | typedef struct op_t 85 | { 86 | tag_t tag; 87 | combinator_t * comb; 88 | struct op_t * next; 89 | } op_t; 90 | 91 | typedef struct expr_list 92 | { 93 | op_t * op; 94 | expr_fix fix; 95 | expr_assoc assoc; 96 | combinator_t * comb; 97 | 98 | struct expr_list * next; 99 | } expr_list; 100 | 101 | combinator_t * new_combinator(); 102 | 103 | combinator_t * match(char * str); 104 | 105 | combinator_t * exact(char * str); 106 | 107 | combinator_t * integer(); 108 | 109 | combinator_t * cident(); 110 | 111 | combinator_t * range(char * str); 112 | 113 | combinator_t * alpha(); 114 | 115 | combinator_t * digit(); 116 | 117 | combinator_t * anything(); 118 | 119 | combinator_t * expect(combinator_t * comb, char * msg); 120 | 121 | combinator_t * seq(combinator_t * ret, tag_t typ, combinator_t * c1, ...); 122 | 123 | combinator_t * multi(combinator_t * ret, tag_t typ, combinator_t * c1, ...); 124 | 125 | combinator_t * capture(tag_t typ, combinator_t * comb); 126 | 127 | combinator_t * not(combinator_t * c); 128 | 129 | combinator_t * option(combinator_t * c); 130 | 131 | combinator_t * zeroplus(tag_t typ, combinator_t * c); 132 | 133 | combinator_t * oneplus(tag_t typ, combinator_t * c); 134 | 135 | combinator_t * expr(combinator_t * exp, combinator_t * base); 136 | 137 | ast_t * parse(input_t * in, combinator_t * comb); 138 | 139 | #endif 140 | -------------------------------------------------------------------------------- /symbol.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "symbol.h" 28 | 29 | sym_t ** sym_tab; 30 | 31 | void sym_tab_init(void) 32 | { 33 | sym_tab = (sym_t **) GC_MALLOC(SYM_TAB_SIZE*sizeof(sym_t *)); 34 | } 35 | 36 | sym_t * new_symbol(const char * name, int length) 37 | { 38 | sym_t * sym = (sym_t *) GC_MALLOC(sizeof(sym_t)); 39 | sym->name = (char *) GC_MALLOC(length + 1); 40 | strcpy(sym->name, name); 41 | return sym; 42 | } 43 | 44 | void print_sym_tab(void) 45 | { 46 | int i; 47 | for (i = 0; i < SYM_TAB_SIZE; i++) 48 | if (sym_tab[i]) 49 | printf("%s\n", sym_tab[i]->name); 50 | } 51 | 52 | int sym_hash(const char * name, int length) 53 | { 54 | int hash = (int) name[0]; 55 | int i; 56 | for (i = 1; i < length; i++) 57 | hash += (name[i] << ((3*i) % 15)); 58 | return hash % SYM_TAB_SIZE; 59 | } 60 | 61 | sym_t * sym_lookup(const char * name) 62 | { 63 | int length = strlen(name); 64 | int hash = sym_hash(name, length); 65 | sym_t * sym; 66 | 67 | while (sym_tab[hash]) 68 | { 69 | if (strcmp(sym_tab[hash]->name, name) == 0) 70 | return sym_tab[hash]; 71 | hash++; 72 | if (hash == SYM_TAB_SIZE) 73 | hash = 0; 74 | } 75 | 76 | sym = new_symbol(name, length); 77 | sym_tab[hash] = sym; 78 | return sym; 79 | } 80 | 81 | -------------------------------------------------------------------------------- /symbol.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include 29 | #include "gc.h" 30 | 31 | #ifndef SYMBOL_H 32 | #define SYMBOL_H 33 | 34 | #define SYM_TAB_SIZE 10000 35 | 36 | typedef struct sym_t { 37 | char * name; 38 | } sym_t; 39 | 40 | void sym_tab_init(void); 41 | 42 | void print_sym_tab(void); 43 | 44 | sym_t * sym_lookup(const char * name); 45 | 46 | #endif 47 | 48 | -------------------------------------------------------------------------------- /types.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include "types.h" 28 | 29 | type_t * t_nil; 30 | type_t * t_int; 31 | type_t * t_bool; 32 | type_t * t_double; 33 | type_t * t_string; 34 | type_t * t_char; 35 | 36 | type_t * new_type(typ_t typ) 37 | { 38 | type_t * t = (type_t *) GC_MALLOC(sizeof(type_t)); 39 | t->typ = typ; 40 | return t; 41 | } 42 | 43 | void types_init(void) 44 | { 45 | t_nil = new_type(NIL); 46 | t_int = new_type(INT); 47 | t_bool = new_type(BOOL); 48 | t_double = new_type(DOUBLE); 49 | t_string = new_type(STRING); 50 | t_char = new_type(CHAR); 51 | } 52 | 53 | type_t * fn_type(type_t * ret, int arity, type_t ** args) 54 | { 55 | int i; 56 | 57 | type_t * t = (type_t *) GC_MALLOC(sizeof(type_t)); 58 | t->typ = FN; 59 | t->args = (type_t **) GC_MALLOC(sizeof(type_t *)*arity); 60 | t->ret = ret; 61 | t->arity = arity; 62 | 63 | for (i = 0; i < arity; i++) 64 | t->args[i] = args[i]; 65 | 66 | return t; 67 | } 68 | 69 | type_t * tuple_type(int arity, type_t ** args) 70 | { 71 | int i; 72 | 73 | type_t * t = (type_t *) GC_MALLOC(sizeof(type_t)); 74 | t->typ = TUPLE; 75 | t->args = (type_t **) GC_MALLOC(sizeof(type_t *)*arity); 76 | t->arity = arity; 77 | 78 | for (i = 0; i < arity; i++) 79 | t->args[i] = args[i]; 80 | 81 | return t; 82 | } 83 | 84 | type_t * data_type(int arity, type_t ** args, sym_t * sym, 85 | sym_t ** slots, int num_params, sym_t ** params) 86 | { 87 | int i; 88 | 89 | type_t * t = (type_t *) GC_MALLOC(sizeof(type_t)); 90 | t->typ = DATATYPE; 91 | t->args = (type_t **) GC_MALLOC(sizeof(type_t *)*arity); 92 | t->slots = (sym_t **) GC_MALLOC(sizeof(sym_t *)*arity); 93 | t->arity = arity; 94 | t->num_params = num_params; 95 | t->params = params; 96 | 97 | for (i = 0; i < arity; i++) 98 | { 99 | t->args[i] = args[i]; 100 | t->slots[i] = slots[i]; 101 | } 102 | 103 | t->sym = sym; 104 | 105 | return t; 106 | } 107 | 108 | type_t * array_type(type_t * el_type) 109 | { 110 | type_t * t = (type_t *) GC_MALLOC(sizeof(type_t)); 111 | t->typ = ARRAY; 112 | t->ret = el_type; 113 | 114 | return t; 115 | } 116 | 117 | type_t * fn_to_lambda_type(type_t * type) 118 | { 119 | type = fn_type(type->ret, type->arity, type->args); 120 | type->typ = LAMBDA; 121 | return type; 122 | } 123 | 124 | type_t * new_typevar(void) 125 | { 126 | static long typevarnum = 0; 127 | type_t * t = new_type(TYPEVAR); 128 | t->arity = typevarnum++; 129 | return t; 130 | } 131 | -------------------------------------------------------------------------------- /types.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright 2012 William Hart. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of 9 | conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | of conditions and the following disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | */ 26 | 27 | #include 28 | #include 29 | #include "symbol.h" 30 | #include "gc.h" 31 | 32 | #ifndef TYPES_H 33 | #define TYPES_H 34 | 35 | typedef enum 36 | { 37 | NIL, BOOL, INT, DOUBLE, STRING, CHAR, 38 | FN, LAMBDA, GENERIC, ARRAY, TUPLE, DATATYPE, TYPEVAR 39 | } typ_t; 40 | 41 | typedef struct type_t 42 | { 43 | typ_t typ; /* kind of type */ 44 | int arity; /* number of args */ 45 | int num_params; /* number of type parameters */ 46 | struct type_t ** args; /* arguments */ 47 | struct type_t * ret; /* return type, for functions */ 48 | sym_t ** params; /* type parameters */ 49 | struct sym_t * sym; /* name of type */ 50 | struct sym_t ** slots; /* names of type args/slots */ 51 | } type_t; 52 | 53 | extern type_t * t_nil; 54 | extern type_t * t_int; 55 | extern type_t * t_bool; 56 | extern type_t * t_double; 57 | extern type_t * t_string; 58 | extern type_t * t_char; 59 | 60 | type_t * new_type(typ_t typ); 61 | 62 | void types_init(void); 63 | 64 | type_t * fn_type(type_t * ret, int arity, type_t ** args); 65 | 66 | type_t * tuple_type(int arity, type_t ** args); 67 | 68 | type_t * data_type(int arity, type_t ** args, sym_t * sym, 69 | sym_t ** slots, int num_params, sym_t ** params); 70 | 71 | type_t * array_type(type_t * el_type); 72 | 73 | type_t * fn_to_lambda_type(type_t * type); 74 | 75 | type_t * new_typevar(void); 76 | 77 | #endif 78 | 79 | --------------------------------------------------------------------------------