├── .travis.yml ├── .gitignore ├── tests ├── data │ ├── inheritance-depth-2 │ │ ├── two.tmpl │ │ ├── base.tmpl │ │ └── one.tmpl │ ├── inheritance-depth-1 │ │ ├── one.tmpl │ │ └── base.tmpl │ └── template-with-logic │ │ ├── base.tmpl │ │ └── child.tmpl ├── test.h ├── test_hashmap.c └── test_template.c ├── src ├── vector.h ├── template.h ├── hashmap.h ├── vector.c ├── hashmap.c └── template.c ├── .github └── workflows │ └── c.yml ├── Makefile ├── LICENSE ├── README.md ├── Vagrantfile └── vendor ├── mpc.h └── mpc.c /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | compiler: 3 | - clang 4 | - gcc 5 | script: 6 | - make check -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | hyde 2 | *.dSYM 3 | bin/ 4 | .vscode 5 | vgcore.* 6 | valgrind-out.txt 7 | .vagrant -------------------------------------------------------------------------------- /tests/data/inheritance-depth-2/two.tmpl: -------------------------------------------------------------------------------- 1 | {% extends "one.tmpl" %} 2 | 3 | {% block footer %} 4 | 2 5 | {% endblock %} -------------------------------------------------------------------------------- /tests/data/inheritance-depth-1/one.tmpl: -------------------------------------------------------------------------------- 1 | {% extends "base.tmpl" %} 2 | 3 | {% block content %} 4 | Child content 5 | {% endblock %} 6 | -------------------------------------------------------------------------------- /tests/data/inheritance-depth-2/base.tmpl: -------------------------------------------------------------------------------- 1 | 0 2 | 3 | {%- block content %} 4 | 0 5 | {%- endblock %} 6 | 7 | {%- block footer %} 8 | 0 9 | {%- endblock -%} -------------------------------------------------------------------------------- /tests/data/inheritance-depth-1/base.tmpl: -------------------------------------------------------------------------------- 1 | Header 2 | 3 | {%- block content %} 4 | Content 5 | {%- endblock %} 6 | 7 | {%- block footer %} 8 | Footer 9 | {%- endblock -%} -------------------------------------------------------------------------------- /tests/data/inheritance-depth-2/one.tmpl: -------------------------------------------------------------------------------- 1 | {% extends "base.tmpl" %} 2 | 3 | {% block content %} 4 | 1 5 | {% endblock %} 6 | 7 | {% block footer %} 8 | 1 9 | {% endblock %} -------------------------------------------------------------------------------- /tests/data/template-with-logic/base.tmpl: -------------------------------------------------------------------------------- 1 | Header 2 | 3 | {%- block content %} 4 | Content 5 | {%- endblock %} 6 | 7 | {%- block footer %} 8 | Footer 9 | {%- endblock -%} -------------------------------------------------------------------------------- /tests/data/template-with-logic/child.tmpl: -------------------------------------------------------------------------------- 1 | {% extends "base.tmpl" %} 2 | 3 | {% block content -%} 4 | {{ "Hello World" | lower }} 5 | {% if 2 < 1 -%} 6 | 2 is less than 1. 7 | {%- else -%} 8 | 2 is more than 1. 9 | {%- endif %} 10 | {%- endblock %} -------------------------------------------------------------------------------- /src/vector.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct vector { 4 | void **values; 5 | int size; 6 | int cap; 7 | }; 8 | 9 | struct vector* vector_new(int cap); 10 | int vector_push(struct vector *vec, void *value); 11 | void vector_free(struct vector *vec); -------------------------------------------------------------------------------- /src/template.h: -------------------------------------------------------------------------------- 1 | #include "hashmap.h" 2 | #include "vector.h" 3 | 4 | 5 | struct env; 6 | struct env *env_new(); 7 | void env_free(struct env *env); 8 | char *template(struct env *env, char *template_name, struct hashmap *ctx); 9 | char *template_string(char *tmpl, struct hashmap *ctx); 10 | char *read_file(char *filename); -------------------------------------------------------------------------------- /.github/workflows/c.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: make check 17 | run: make check 18 | -------------------------------------------------------------------------------- /src/hashmap.h: -------------------------------------------------------------------------------- 1 | #define HASHMAP_CAP 26 2 | 3 | struct hashmap { 4 | struct node *buckets[HASHMAP_CAP]; 5 | }; 6 | 7 | struct hashmap *hashmap_new(); 8 | void *hashmap_insert(struct hashmap *hm, char *key, void *value); 9 | void *hashmap_get(struct hashmap *hm, char *key); 10 | void *hashmap_resolve(struct hashmap *hm, char *key); 11 | void *hashmap_remove(struct hashmap *hm, char *key); 12 | void hashmap_free(struct hashmap *hm); 13 | void hashmap_walk(struct hashmap *hm, void (*fn)(void *value)); -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS= -g -Wall -std=c99 -I. 2 | LDLIBS= 3 | TESTFLAGS= $(CFLAGS) -Isrc/ 4 | ifdef debug 5 | CFLAGS+=-DDEBUG 6 | endif 7 | 8 | all: check 9 | bin:; mkdir -p bin/ 10 | 11 | bin/test_hashmap: src/hashmap.c tests/test_hashmap.c | bin 12 | $(CC) $(TESTFLAGS) $^ -o $@ 13 | 14 | bin/test_template: src/template.c src/hashmap.c src/vector.c tests/test_template.c vendor/mpc.c | bin 15 | $(CC) $(TESTFLAGS) $^ -o $@ 16 | 17 | .PHONY: check 18 | check: bin/test_hashmap bin/test_template 19 | for test in $^; do $$test || exit 1; done 20 | 21 | .PHONY: clean 22 | clean:; rm -r bin/ -------------------------------------------------------------------------------- /src/vector.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "vector.h" 3 | 4 | /* create a new vector of the given capacity */ 5 | struct vector* vector_new(int cap) { 6 | struct vector *l = malloc(sizeof *l); 7 | l->size = 0; 8 | l->cap = cap; 9 | l->values = malloc(l->cap * sizeof *l->values); 10 | return l; 11 | } 12 | 13 | /* push a new value to the end of the vector's memory */ 14 | int vector_push(struct vector *vec, void *value) { 15 | vec->values[vec->size++] = value; 16 | return vec->size - 1; 17 | } 18 | 19 | /* free vector related memory */ 20 | void vector_free(struct vector *l) { 21 | free(l->values); 22 | free(l); 23 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Danny van Kooten 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/test.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define START_TESTS int main() { 7 | #define END_TESTS } 8 | #define TEST(name) strcpy(current_test, #name); 9 | #define assert_null(actual) _assert(actual == NULL, __FILE__, __LINE__, "invalid value: expected NULL, got %s", actual) 10 | #define assert_str(actual, expected) _assert(actual != NULL && strcmp(actual, expected) == 0, __FILE__, __LINE__, "invalid string: expected \"%s\", got \"%s\"", expected, actual) 11 | #define assert(assertion, format, ...) _assert(assertion, __FILE__, __LINE__, format, ##__VA_ARGS__) 12 | #define ARRAY_SIZE(arr) sizeof arr / sizeof arr[0] 13 | 14 | /* used to store the running test name */ 15 | char current_test[256] = {'\0'}; 16 | 17 | static void _assert(int assertion, const char filename[64], const int line, char *format, ...) 18 | { 19 | if (assertion) 20 | { 21 | return; 22 | } 23 | 24 | va_list args; 25 | va_start(args, format); 26 | printf("%s:%d:%s failed: ", filename, line, current_test); 27 | vprintf(format, args); 28 | va_end(args); 29 | printf("\n"); 30 | exit(1); 31 | } -------------------------------------------------------------------------------- /tests/test_hashmap.c: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | #include "hashmap.h" 3 | 4 | START_TESTS 5 | 6 | TEST(hashmap) { 7 | struct hashmap *hm = hashmap_new(); 8 | char *value = hashmap_get(hm, "foo"); 9 | assert_null(value); 10 | value = hashmap_insert(hm, "foo", "bar"); 11 | assert_null(value); 12 | value = hashmap_get(hm, "foo"); 13 | assert_str(value, "bar"); 14 | hashmap_free(hm); 15 | } 16 | 17 | TEST(dot_notation) { 18 | void *val; 19 | struct hashmap *user = hashmap_new(); 20 | val = hashmap_insert(user, "name", "Danny"); 21 | assert_null(val); 22 | struct hashmap *hm = hashmap_new(); 23 | val = hashmap_insert(hm, "user", user); 24 | assert_null(val); 25 | assert(hashmap_resolve(hm, "user") == user, "expected user hashmap, got something else"); 26 | val = hashmap_resolve(hm, "user.name"); 27 | assert_str(val, "Danny"); 28 | hashmap_free(user); 29 | hashmap_free(hm); 30 | } 31 | 32 | TEST(hashmap_remove) { 33 | struct hashmap *hm = hashmap_new(); 34 | hashmap_insert(hm, "foo", "bar"); 35 | char *value = hashmap_get(hm, "foo"); 36 | assert_str(value, "bar"); 37 | 38 | // remove once 39 | value = hashmap_remove(hm, "foo"); 40 | assert_str(value, "bar"); 41 | value = hashmap_get(hm, "foo"); 42 | assert_null(value); 43 | 44 | // remove again (should no-op) 45 | value = hashmap_remove(hm, "foo"); 46 | assert_null(value); 47 | value = hashmap_get(hm, "foo"); 48 | assert_null(value); 49 | 50 | hashmap_free(hm); 51 | } 52 | 53 | END_TESTS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Unja [![Build Status](https://img.shields.io/travis/dannyvankooten/unja/master)](https://travis-ci.org/dannyvankooten/unja) 2 | [![License: MIT](https://img.shields.io/github/license/dannyvankooten/unja)](https://github.com/dannyvankooten/unja/blob/master/LICENSE) 3 | ========== 4 | 5 | A template engine for C, inspired by Jinja and Liquid. 6 | 7 | This is a work in progress and definitely not usable as it stands. 8 | 9 | ### Example 10 | 11 | File `base.tmpl`: 12 | ```html+jinja 13 | 14 | {% block title %}Default title{% endblock %} 15 | 16 | {% block content %} 17 | {% endblock %} 18 | 19 | 20 | ``` 21 | 22 | File `child.tmpl`: 23 | ```html+jinja 24 | {% extends "base.html" %} 25 | 26 | {% block title %}Users{% endblock %} 27 | 28 | {% block content %} 29 | 34 | {% endblock %} 35 | ``` 36 | 37 | File: `example.c` 38 | ```c 39 | int main() { 40 | // parse all templates in the given directory 41 | struct env *env = env_new("./"); 42 | 43 | // create a hashmap for storing template variables 44 | struct hashmap *vars = hashmap_new(); 45 | hashmap_insert(vars, "name", "John Doe"); 46 | 47 | // execute the template 48 | char *output = template(env, "child.tmpl", vars); 49 | printf("%s", output); 50 | 51 | // clean-up allocated memory 52 | free(output); 53 | hashmap_free(vars); 54 | env_free(env); 55 | } 56 | ``` 57 | 58 | ### License 59 | 60 | MIT 61 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # A basic Vagrant box to be able to use GDB and Valgrind for debugging 2 | # Usage: 3 | # 1. Start the machine: `vagrant up` 4 | # 2. SSH into the machine: `vagrant ssh` 5 | # 3. Move into the project directory: `cd /vagrant` 6 | # 4. Run find_memleaks on the binary you want to check: `find_memleaks bin/test_template` 7 | # 8 | # 9 | # -*- mode: ruby -*- 10 | # vi: set ft=ruby : 11 | 12 | # All Vagrant configuration is done below. The "2" in Vagrant.configure 13 | # configures the configuration version (we support older styles for 14 | # backwards compatibility). Please don't change it unless you know what 15 | # you're doing. 16 | Vagrant.configure("2") do |config| 17 | # The most common configuration options are documented and commented below. 18 | # For a complete reference, please see the online documentation at 19 | # https://docs.vagrantup.com. 20 | 21 | # Every Vagrant development environment requires a box. You can search for 22 | # boxes at https://vagrantcloud.com/search. 23 | config.vm.box = "hashicorp/bionic64" 24 | 25 | # Disable automatic box update checking. If you disable this, then 26 | # boxes will only be checked for updates when the user runs 27 | # `vagrant box outdated`. This is not recommended. 28 | # config.vm.box_check_update = false 29 | 30 | # Create a forwarded port mapping which allows access to a specific port 31 | # within the machine from a port on the host machine. In the example below, 32 | # accessing "localhost:8080" will access port 80 on the guest machine. 33 | # NOTE: This will enable public access to the opened port 34 | # config.vm.network "forwarded_port", guest: 80, host: 8080 35 | 36 | # Create a forwarded port mapping which allows access to a specific port 37 | # within the machine from a port on the host machine and only allow access 38 | # via 127.0.0.1 to disable public access 39 | # config.vm.network "forwarded_port", guest: 80, host: 8080, host_ip: "127.0.0.1" 40 | 41 | # Create a private network, which allows host-only access to the machine 42 | # using a specific IP. 43 | # config.vm.network "private_network", ip: "192.168.33.10" 44 | 45 | # Create a public network, which generally matched to bridged network. 46 | # Bridged networks make the machine appear as another physical device on 47 | # your network. 48 | # config.vm.network "public_network" 49 | 50 | # Share an additional folder to the guest VM. The first argument is 51 | # the path on the host to the actual folder. The second argument is 52 | # the path on the guest to mount the folder. And the optional third 53 | # argument is a set of non-required options. 54 | # config.vm.synced_folder ".", "/vagrant_data" 55 | 56 | # Provider-specific configuration so you can fine-tune various 57 | # backing providers for Vagrant. These expose provider-specific options. 58 | # Example for VirtualBox: 59 | # 60 | # config.vm.provider "virtualbox" do |vb| 61 | # # Display the VirtualBox GUI when booting the machine 62 | # vb.gui = true 63 | # 64 | # # Customize the amount of memory on the VM: 65 | # vb.memory = "1024" 66 | # end 67 | # 68 | # View the documentation for the provider you are using for more 69 | # information on available options. 70 | 71 | # Enable provisioning with a shell script. Additional provisioners such as 72 | # Ansible, Chef, Docker, Puppet and Salt are also available. Please see the 73 | # documentation for more information about their specific syntax and use. 74 | config.vm.provision "shell", inline: <<-SHELL 75 | apt-get update 76 | apt-get install -y gcc gdb valgrind 77 | echo 'find_memleaks() { \n\tmake $1 && valgrind --leak-check=full --show-leak-kinds=all --verbose --track-origins=yes --log-file=valgrind-out.txt $1 && cat valgrind-out.txt \n}' >> /home/vagrant/.bash_profile 78 | SHELL 79 | end 80 | -------------------------------------------------------------------------------- /src/hashmap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "hashmap.h" 5 | 6 | struct node { 7 | char *key; 8 | void *value; 9 | struct node *next; 10 | }; 11 | 12 | unsigned long 13 | hash(char *str) 14 | { 15 | unsigned long hash = 5381; 16 | int c; 17 | 18 | while ((c = *str++)) { 19 | hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ 20 | } 21 | 22 | return hash; 23 | } 24 | 25 | /* allocate a new hashmap */ 26 | struct hashmap *hashmap_new() { 27 | struct hashmap *hm = malloc(sizeof *hm); 28 | if (!hm) err(EXIT_FAILURE, "out of memory"); 29 | for (int i=0; i < HASHMAP_CAP; i++) { 30 | hm->buckets[i] = NULL; 31 | } 32 | 33 | return hm; 34 | } 35 | 36 | /* Inserts a key-value pair into the map. Returns NULL if map did not have key, old value if it did. */ 37 | void *hashmap_insert(struct hashmap *hm, char *key, void *value) { 38 | int pos = hash(key) % HASHMAP_CAP; 39 | struct node *head = hm->buckets[pos]; 40 | struct node *node = head; 41 | void *old_value; 42 | 43 | while (node) { 44 | if (strcmp(node->key, key) == 0) { 45 | old_value = node->value; 46 | node->value = value; 47 | return old_value; 48 | } 49 | node = node->next; 50 | } 51 | 52 | node = malloc(sizeof *node); 53 | node->key = key; 54 | node->value = value; 55 | node->next = head; 56 | hm->buckets[pos] = node; 57 | return NULL; 58 | } 59 | 60 | /* Returns a pointer to the value corresponding to the key. */ 61 | void *hashmap_get(struct hashmap *hm, char *key) { 62 | unsigned int pos = hash(key) % HASHMAP_CAP; 63 | struct node *node = hm->buckets[pos]; 64 | while (node != NULL) { 65 | if (strcmp(node->key, key) == 0) { 66 | return node->value; 67 | } 68 | 69 | node = node->next; 70 | } 71 | 72 | return NULL; 73 | } 74 | 75 | /* Retrieve pointer to value by key, handles dot notation for nested hashmaps */ 76 | void *hashmap_resolve(struct hashmap *hm, char *key) { 77 | char tmp_key[64]; 78 | int i = 0; 79 | int j = 0; 80 | 81 | while (1) { 82 | for (j=0; key[i] != '.' && key[i] != '\0'; i++, j++) { 83 | tmp_key[j] = key[i]; 84 | } 85 | tmp_key[j] = '\0'; 86 | hm = hashmap_get(hm, tmp_key); 87 | 88 | // stop if we read key to end of string 89 | if (key[i] == '\0') { 90 | break; 91 | } 92 | 93 | // otherwise, continue reading keys 94 | i++; 95 | } 96 | 97 | return hm; 98 | } 99 | 100 | /* Removes a key from the map, returning the value at the key if the key was previously in the map. */ 101 | void *hashmap_remove(struct hashmap *hm, char *key) { 102 | int pos = hash(key) % HASHMAP_CAP; 103 | struct node *node = hm->buckets[pos]; 104 | struct node *prev = NULL; 105 | void *old_value; 106 | 107 | while (node) { 108 | if (strcmp(node->key, key) == 0) { 109 | if (prev) { 110 | prev->next = node->next; 111 | } else { 112 | hm->buckets[pos] = node->next; 113 | } 114 | old_value = node->value; 115 | free(node); 116 | return old_value; 117 | } 118 | 119 | node = node->next; 120 | prev = node; 121 | } 122 | 123 | return NULL; 124 | } 125 | 126 | void hashmap_walk(struct hashmap *hm, void (*fn)(void *value)) { 127 | struct node *node; 128 | struct node *next; 129 | 130 | for (int i=0; i < HASHMAP_CAP; i++) { 131 | node = hm->buckets[i]; 132 | while (node) { 133 | next = node->next; 134 | fn(node->value); 135 | node = next; 136 | } 137 | } 138 | } 139 | 140 | /* free hashmap related memory */ 141 | void hashmap_free(struct hashmap *hm) { 142 | struct node *node; 143 | struct node *next; 144 | 145 | for (int i=0; i < HASHMAP_CAP; i++) { 146 | node = hm->buckets[i]; 147 | while (node) { 148 | next = node->next; 149 | free(node); 150 | node = next; 151 | } 152 | } 153 | 154 | free(hm); 155 | } -------------------------------------------------------------------------------- /vendor/mpc.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** mpc - Micro Parser Combinator library for C 3 | ** 4 | ** https://github.com/orangeduck/mpc 5 | ** 6 | ** Daniel Holden - contact@daniel-holden.com 7 | ** Licensed under BSD3 8 | */ 9 | 10 | #ifndef mpc_h 11 | #define mpc_h 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | /* 26 | ** State Type 27 | */ 28 | 29 | typedef struct { 30 | long pos; 31 | long row; 32 | long col; 33 | int term; 34 | } mpc_state_t; 35 | 36 | /* 37 | ** Error Type 38 | */ 39 | 40 | typedef struct { 41 | mpc_state_t state; 42 | int expected_num; 43 | char *filename; 44 | char *failure; 45 | char **expected; 46 | char received; 47 | } mpc_err_t; 48 | 49 | void mpc_err_delete(mpc_err_t *e); 50 | char *mpc_err_string(mpc_err_t *e); 51 | void mpc_err_print(mpc_err_t *e); 52 | void mpc_err_print_to(mpc_err_t *e, FILE *f); 53 | 54 | /* 55 | ** Parsing 56 | */ 57 | 58 | typedef void mpc_val_t; 59 | 60 | typedef union { 61 | mpc_err_t *error; 62 | mpc_val_t *output; 63 | } mpc_result_t; 64 | 65 | struct mpc_parser_t; 66 | typedef struct mpc_parser_t mpc_parser_t; 67 | 68 | int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); 69 | int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r); 70 | int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); 71 | int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); 72 | int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); 73 | 74 | /* 75 | ** Function Types 76 | */ 77 | 78 | typedef void(*mpc_dtor_t)(mpc_val_t*); 79 | typedef mpc_val_t*(*mpc_ctor_t)(void); 80 | 81 | typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); 82 | typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); 83 | typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); 84 | 85 | typedef int(*mpc_check_t)(mpc_val_t**); 86 | typedef int(*mpc_check_with_t)(mpc_val_t**,void*); 87 | 88 | /* 89 | ** Building a Parser 90 | */ 91 | 92 | mpc_parser_t *mpc_new(const char *name); 93 | mpc_parser_t *mpc_copy(mpc_parser_t *a); 94 | mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); 95 | mpc_parser_t *mpc_undefine(mpc_parser_t *p); 96 | 97 | void mpc_delete(mpc_parser_t *p); 98 | void mpc_cleanup(int n, ...); 99 | 100 | /* 101 | ** Basic Parsers 102 | */ 103 | 104 | mpc_parser_t *mpc_any(void); 105 | mpc_parser_t *mpc_char(char c); 106 | mpc_parser_t *mpc_range(char s, char e); 107 | mpc_parser_t *mpc_oneof(const char *s); 108 | mpc_parser_t *mpc_noneof(const char *s); 109 | mpc_parser_t *mpc_satisfy(int(*f)(char)); 110 | mpc_parser_t *mpc_string(const char *s); 111 | 112 | /* 113 | ** Other Parsers 114 | */ 115 | 116 | mpc_parser_t *mpc_pass(void); 117 | mpc_parser_t *mpc_fail(const char *m); 118 | mpc_parser_t *mpc_failf(const char *fmt, ...); 119 | mpc_parser_t *mpc_lift(mpc_ctor_t f); 120 | mpc_parser_t *mpc_lift_val(mpc_val_t *x); 121 | mpc_parser_t *mpc_anchor(int(*f)(char,char)); 122 | mpc_parser_t *mpc_state(void); 123 | 124 | /* 125 | ** Combinator Parsers 126 | */ 127 | 128 | mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); 129 | mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); 130 | mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); 131 | mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); 132 | mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); 133 | mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); 134 | mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); 135 | mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); 136 | 137 | mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); 138 | mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); 139 | mpc_parser_t *mpc_maybe(mpc_parser_t *a); 140 | mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); 141 | 142 | mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); 143 | mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); 144 | mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); 145 | 146 | mpc_parser_t *mpc_or(int n, ...); 147 | mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); 148 | 149 | mpc_parser_t *mpc_predictive(mpc_parser_t *a); 150 | 151 | /* 152 | ** Common Parsers 153 | */ 154 | 155 | mpc_parser_t *mpc_eoi(void); 156 | mpc_parser_t *mpc_soi(void); 157 | 158 | mpc_parser_t *mpc_boundary(void); 159 | mpc_parser_t *mpc_boundary_newline(void); 160 | 161 | mpc_parser_t *mpc_whitespace(void); 162 | mpc_parser_t *mpc_whitespaces(void); 163 | mpc_parser_t *mpc_blank(void); 164 | 165 | mpc_parser_t *mpc_newline(void); 166 | mpc_parser_t *mpc_tab(void); 167 | mpc_parser_t *mpc_escape(void); 168 | 169 | mpc_parser_t *mpc_digit(void); 170 | mpc_parser_t *mpc_hexdigit(void); 171 | mpc_parser_t *mpc_octdigit(void); 172 | mpc_parser_t *mpc_digits(void); 173 | mpc_parser_t *mpc_hexdigits(void); 174 | mpc_parser_t *mpc_octdigits(void); 175 | 176 | mpc_parser_t *mpc_lower(void); 177 | mpc_parser_t *mpc_upper(void); 178 | mpc_parser_t *mpc_alpha(void); 179 | mpc_parser_t *mpc_underscore(void); 180 | mpc_parser_t *mpc_alphanum(void); 181 | 182 | mpc_parser_t *mpc_int(void); 183 | mpc_parser_t *mpc_hex(void); 184 | mpc_parser_t *mpc_oct(void); 185 | mpc_parser_t *mpc_number(void); 186 | 187 | mpc_parser_t *mpc_real(void); 188 | mpc_parser_t *mpc_float(void); 189 | 190 | mpc_parser_t *mpc_char_lit(void); 191 | mpc_parser_t *mpc_string_lit(void); 192 | mpc_parser_t *mpc_regex_lit(void); 193 | 194 | mpc_parser_t *mpc_ident(void); 195 | 196 | /* 197 | ** Useful Parsers 198 | */ 199 | 200 | mpc_parser_t *mpc_startwith(mpc_parser_t *a); 201 | mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); 202 | mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); 203 | 204 | mpc_parser_t *mpc_stripl(mpc_parser_t *a); 205 | mpc_parser_t *mpc_stripr(mpc_parser_t *a); 206 | mpc_parser_t *mpc_strip(mpc_parser_t *a); 207 | mpc_parser_t *mpc_tok(mpc_parser_t *a); 208 | mpc_parser_t *mpc_sym(const char *s); 209 | mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); 210 | 211 | mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); 212 | mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); 213 | mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); 214 | mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); 215 | mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); 216 | 217 | mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); 218 | mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); 219 | mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); 220 | mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); 221 | mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); 222 | 223 | /* 224 | ** Common Function Parameters 225 | */ 226 | 227 | void mpcf_dtor_null(mpc_val_t *x); 228 | 229 | mpc_val_t *mpcf_ctor_null(void); 230 | mpc_val_t *mpcf_ctor_str(void); 231 | 232 | mpc_val_t *mpcf_free(mpc_val_t *x); 233 | mpc_val_t *mpcf_int(mpc_val_t *x); 234 | mpc_val_t *mpcf_hex(mpc_val_t *x); 235 | mpc_val_t *mpcf_oct(mpc_val_t *x); 236 | mpc_val_t *mpcf_float(mpc_val_t *x); 237 | mpc_val_t *mpcf_strtriml(mpc_val_t *x); 238 | mpc_val_t *mpcf_strtrimr(mpc_val_t *x); 239 | mpc_val_t *mpcf_strtrim(mpc_val_t *x); 240 | 241 | mpc_val_t *mpcf_escape(mpc_val_t *x); 242 | mpc_val_t *mpcf_escape_regex(mpc_val_t *x); 243 | mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); 244 | mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); 245 | 246 | mpc_val_t *mpcf_unescape(mpc_val_t *x); 247 | mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); 248 | mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); 249 | mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); 250 | 251 | mpc_val_t *mpcf_null(int n, mpc_val_t** xs); 252 | mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); 253 | mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); 254 | mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); 255 | 256 | mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); 257 | mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); 258 | mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); 259 | mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs); 260 | 261 | mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); 262 | mpc_val_t *mpcf_maths(int n, mpc_val_t** xs); 263 | 264 | /* 265 | ** Regular Expression Parsers 266 | */ 267 | 268 | enum { 269 | MPC_RE_DEFAULT = 0, 270 | MPC_RE_M = 1, 271 | MPC_RE_S = 2, 272 | MPC_RE_MULTILINE = 1, 273 | MPC_RE_DOTALL = 2 274 | }; 275 | 276 | mpc_parser_t *mpc_re(const char *re); 277 | mpc_parser_t *mpc_re_mode(const char *re, int mode); 278 | 279 | /* 280 | ** AST 281 | */ 282 | 283 | typedef struct mpc_ast_t { 284 | char *tag; 285 | char *contents; 286 | mpc_state_t state; 287 | int children_num; 288 | struct mpc_ast_t** children; 289 | } mpc_ast_t; 290 | 291 | mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); 292 | mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); 293 | mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); 294 | mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); 295 | mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); 296 | mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t); 297 | mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); 298 | mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); 299 | 300 | void mpc_ast_delete(mpc_ast_t *a); 301 | void mpc_ast_print(mpc_ast_t *a); 302 | void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); 303 | 304 | int mpc_ast_get_index(mpc_ast_t *ast, const char *tag); 305 | int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb); 306 | mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag); 307 | mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb); 308 | 309 | typedef enum { 310 | mpc_ast_trav_order_pre, 311 | mpc_ast_trav_order_post 312 | } mpc_ast_trav_order_t; 313 | 314 | typedef struct mpc_ast_trav_t { 315 | mpc_ast_t *curr_node; 316 | struct mpc_ast_trav_t *parent; 317 | int curr_child; 318 | mpc_ast_trav_order_t order; 319 | } mpc_ast_trav_t; 320 | 321 | mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, 322 | mpc_ast_trav_order_t order); 323 | 324 | mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav); 325 | 326 | void mpc_ast_traverse_free(mpc_ast_trav_t **trav); 327 | 328 | /* 329 | ** Warning: This function currently doesn't test for equality of the `state` member! 330 | */ 331 | int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); 332 | 333 | mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); 334 | mpc_val_t *mpcf_str_ast(mpc_val_t *c); 335 | mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); 336 | 337 | mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); 338 | mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); 339 | mpc_parser_t *mpca_root(mpc_parser_t *a); 340 | mpc_parser_t *mpca_state(mpc_parser_t *a); 341 | mpc_parser_t *mpca_total(mpc_parser_t *a); 342 | 343 | mpc_parser_t *mpca_not(mpc_parser_t *a); 344 | mpc_parser_t *mpca_maybe(mpc_parser_t *a); 345 | 346 | mpc_parser_t *mpca_many(mpc_parser_t *a); 347 | mpc_parser_t *mpca_many1(mpc_parser_t *a); 348 | mpc_parser_t *mpca_count(int n, mpc_parser_t *a); 349 | 350 | mpc_parser_t *mpca_or(int n, ...); 351 | mpc_parser_t *mpca_and(int n, ...); 352 | 353 | enum { 354 | MPCA_LANG_DEFAULT = 0, 355 | MPCA_LANG_PREDICTIVE = 1, 356 | MPCA_LANG_WHITESPACE_SENSITIVE = 2 357 | }; 358 | 359 | mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); 360 | 361 | mpc_err_t *mpca_lang(int flags, const char *language, ...); 362 | mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); 363 | mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); 364 | mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); 365 | 366 | /* 367 | ** Misc 368 | */ 369 | 370 | 371 | void mpc_print(mpc_parser_t *p); 372 | void mpc_optimise(mpc_parser_t *p); 373 | void mpc_stats(mpc_parser_t *p); 374 | 375 | int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, 376 | int(*tester)(const void*, const void*), 377 | mpc_dtor_t destructor, 378 | void(*printer)(const void*)); 379 | 380 | int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, 381 | int(*tester)(const void*, const void*), 382 | mpc_dtor_t destructor, 383 | void(*printer)(const void*)); 384 | 385 | #ifdef __cplusplus 386 | } 387 | #endif 388 | 389 | #endif 390 | -------------------------------------------------------------------------------- /tests/test_template.c: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | #include "template.h" 3 | 4 | START_TESTS 5 | 6 | TEST(textvc_only) { 7 | char *input = "Hello world."; 8 | char *output = template_string(input, NULL); 9 | assert_str(output, "Hello world."); 10 | free(output); 11 | } 12 | 13 | TEST(expr_number) { 14 | char *input = "Hello {{ 5 }}."; 15 | char *output = template_string(input, NULL); 16 | assert_str(output, "Hello 5."); 17 | free(output); 18 | } 19 | 20 | TEST(expr_number_no_spaces) { 21 | char *input = "Hello {{5}}."; 22 | char *output = template_string(input, NULL); 23 | assert_str(output, "Hello 5."); 24 | free(output); 25 | } 26 | 27 | TEST(expr_string) { 28 | char *input = "Hello {{ \"world\" }}."; 29 | char *output = template_string(input, NULL); 30 | assert_str(output, "Hello world."); 31 | free(output); 32 | } 33 | 34 | TEST(expr_string_no_spaces) { 35 | char *input = "Hello {{\"world\"}}."; 36 | char *output = template_string(input, NULL); 37 | assert_str(output, "Hello world."); 38 | free(output); 39 | } 40 | 41 | TEST(expr_symbol) { 42 | char *input = "Hello {{ name }}."; 43 | struct hashmap *ctx = hashmap_new(); 44 | hashmap_insert(ctx, "name", "world"); 45 | char *output = template_string(input, ctx); 46 | assert_str(output, "Hello world."); 47 | hashmap_free(ctx); 48 | free(output); 49 | } 50 | 51 | TEST(expr_symbol_no_spaces) { 52 | char *input = "Hello {{name}}."; 53 | struct hashmap *ctx = hashmap_new(); 54 | hashmap_insert(ctx, "name", "world"); 55 | char *output = template_string(input, ctx); 56 | assert_str(output, "Hello world."); 57 | hashmap_free(ctx); 58 | free(output); 59 | } 60 | 61 | TEST(expr_add) { 62 | struct { 63 | char *input; 64 | char *expected_output; 65 | } tests[] = { 66 | {"{{ 5 + 5 }}.", "10."}, 67 | {"{{ 5 + foo }}.", "15."}, 68 | {"{{ \"foo\" + \"bar\" }}", "foobar"}, 69 | {"{{ \"Hello \" + name }}", "Hello Danny"}, 70 | {"{{ 5 + 5 + 5 }}.", "15."}, 71 | {"{{5+5}}", "10"}, 72 | {"{{ 5+5}}", "10"}, 73 | {"{{ 5 +5}}", "10"}, 74 | {"{{ 5 +5}}", "10"}, 75 | {"{{ 5 + 5}}", "10"}, 76 | {"{{ 5 + 5 }}", "10"}, 77 | }; 78 | 79 | struct hashmap *ctx = hashmap_new(); 80 | hashmap_insert(ctx, "foo", "10"); 81 | hashmap_insert(ctx, "name", "Danny"); 82 | 83 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 84 | char *output = template_string(tests[i].input, ctx); 85 | assert_str(output, tests[i].expected_output); 86 | free(output); 87 | } 88 | 89 | hashmap_free(ctx); 90 | } 91 | 92 | TEST(expr_op_precedence) { 93 | struct { 94 | char *input; 95 | char *expected_output; 96 | } tests[] = { 97 | {"{{ 5 * 2 + 1 }}.", "11."}, 98 | {"{{ 1 + 5 * 2 }}.", "11."}, 99 | {"{{ 10 / 2 + 1 }}.", "6."}, 100 | {"{{ 1 + 10 / 2 }}.", "6."}, 101 | }; 102 | 103 | struct hashmap *ctx = hashmap_new(); 104 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 105 | char *output = template_string(tests[i].input, ctx); 106 | assert_str(output, tests[i].expected_output); 107 | free(output); 108 | } 109 | hashmap_free(ctx); 110 | } 111 | 112 | TEST(expr_subtract) { 113 | char *input = "{{ 5 - 5 }}"; 114 | char *output = template_string(input, NULL); 115 | assert_str(output, "0"); 116 | free(output); 117 | } 118 | 119 | TEST(expr_divide) { 120 | char *input = "{{ 5 / 5 }}"; 121 | char *output = template_string(input, NULL); 122 | assert_str(output, "1"); 123 | free(output); 124 | } 125 | 126 | TEST(expr_multiply) { 127 | char *input = "{{ 5 * 5 }}"; 128 | char *output = template_string(input, NULL); 129 | assert_str(output, "25"); 130 | free(output); 131 | } 132 | 133 | TEST(expr_modulo) { 134 | char *input = "{{ 5 % 4 }}"; 135 | char *output = template_string(input, NULL); 136 | assert_str(output, "1"); 137 | free(output); 138 | } 139 | 140 | TEST(expr_whitespace) { 141 | char *input = "Hello \n{{- \"world\" -}}\n."; 142 | char *output = template_string(input, NULL); 143 | assert_str(output, "Helloworld."); 144 | free(output); 145 | } 146 | 147 | TEST(expr_op_whitespace) { 148 | char *input = "\n{{- 5 + 5 -}}\n"; 149 | char *output = template_string(input, NULL); 150 | assert_str(output, "10"); 151 | free(output); 152 | } 153 | 154 | TEST(for_block) { 155 | char *input = "{% for n in names %}{{ n }}, {% endfor %}"; 156 | struct hashmap *ctx = hashmap_new(); 157 | 158 | struct vector *names = vector_new(9); 159 | vector_push(names, "John"); 160 | vector_push(names, "Sally"); 161 | vector_push(names, "Eric"); 162 | hashmap_insert(ctx, "names", names); 163 | 164 | char *output = template_string(input, ctx); 165 | assert_str(output, "John, Sally, Eric, "); 166 | vector_free(names); 167 | hashmap_free(ctx); 168 | free(output); 169 | } 170 | 171 | 172 | TEST(for_block_vars) { 173 | char *input = "{% for n in names %}" 174 | "{{loop.index + 1}}: {{ n }}" 175 | "{% if loop.first %} <--{% endif %}" 176 | "{% if not loop.last %}\n{% endif %}" 177 | "{% endfor %}"; 178 | struct hashmap *ctx = hashmap_new(); 179 | 180 | struct vector *names = vector_new(9); 181 | vector_push(names, "John"); 182 | vector_push(names, "Sally"); 183 | vector_push(names, "Eric"); 184 | hashmap_insert(ctx, "names", names); 185 | 186 | char *output = template_string(input, ctx); 187 | assert_str(output, "1: John <--\n2: Sally\n3: Eric"); 188 | vector_free(names); 189 | hashmap_free(ctx); 190 | free(output); 191 | } 192 | 193 | TEST(for_block_whitespace) { 194 | char *input = "\n{%- for n in names -%}\n{{ n }}\n{%- endfor -%}\n"; 195 | struct hashmap *ctx = hashmap_new(); 196 | 197 | struct vector *names = vector_new(2); 198 | vector_push(names, "John"); 199 | vector_push(names, "Sally"); 200 | hashmap_insert(ctx, "names", names); 201 | 202 | char *output = template_string(input, ctx); 203 | assert_str(output, "John\nSally"); 204 | vector_free(names); 205 | hashmap_free(ctx); 206 | free(output); 207 | } 208 | 209 | TEST(var_dot_notation) { 210 | char *input = "Hello {{user.name}}!"; 211 | struct hashmap *ctx = hashmap_new(); 212 | struct hashmap *user = hashmap_new(); 213 | hashmap_insert(user, "name", "Danny"); 214 | hashmap_insert(ctx, "user", user); 215 | char *output = template_string(input, ctx); 216 | assert_str(output, "Hello Danny!"); 217 | hashmap_free(user); 218 | hashmap_free(ctx); 219 | free(output); 220 | } 221 | 222 | TEST(comment) { 223 | char *input = "Hello {# comment here #}world."; 224 | char *output = template_string(input, NULL); 225 | assert_str(output, "Hello world."); 226 | free(output); 227 | } 228 | 229 | TEST(if_block) { 230 | struct { 231 | char *input; 232 | char *expected_output; 233 | } tests[] = { 234 | {"{% if 5 > 10 %}1{% endif %}", ""}, 235 | {"{% if 10 > 5 %}1{% endif %}", "1"}, 236 | {"{% if foobar %}1{% endif %}", ""}, 237 | {"{% if name %}1{% endif %}", "1"}, 238 | {"{% if age > 10 %}1{% endif %}", "1"}, 239 | {"{% if 10 + 1 > 10 %}1{% endif %}", "1"}, 240 | {"{% if 6 > 10 - 5 %}1{% endif %}", "1"}, 241 | }; 242 | 243 | struct hashmap *ctx = hashmap_new(); 244 | hashmap_insert(ctx, "name", "Danny"); 245 | hashmap_insert(ctx, "age", "29"); 246 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 247 | char *output = template_string(tests[i].input, ctx); 248 | assert_str(output, tests[i].expected_output); 249 | free(output); 250 | } 251 | 252 | hashmap_free(ctx); 253 | } 254 | 255 | TEST(expr_gt) { 256 | struct { 257 | char *input; 258 | char *expected_output; 259 | } tests[] = { 260 | {"{% if 10 > 9 %}1{% endif %}", "1"}, 261 | {"{% if 10 > 10 %}1{% endif %}", ""}, 262 | {"{% if 10 > 11 %}1{% endif %}", ""}, 263 | }; 264 | 265 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 266 | char *output = template_string(tests[i].input, NULL); 267 | assert_str(output, tests[i].expected_output); 268 | free(output); 269 | } 270 | } 271 | 272 | TEST(expr_gte) { 273 | struct { 274 | char *input; 275 | char *expected_output; 276 | } tests[] = { 277 | {"{% if 10 >= 9 %}1{% endif %}", "1"}, 278 | {"{% if 10 >= 10 %}1{% endif %}", "1"}, 279 | {"{% if 10 >= 11 %}1{% endif %}", ""}, 280 | }; 281 | 282 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 283 | char *output = template_string(tests[i].input, NULL); 284 | assert_str(output, tests[i].expected_output); 285 | free(output); 286 | } 287 | } 288 | 289 | TEST(expr_lt) { 290 | struct { 291 | char *input; 292 | char *expected_output; 293 | } tests[] = { 294 | {"{% if 10 < 9 %}1{% endif %}", ""}, 295 | {"{% if 10 < 10 %}1{% endif %}", ""}, 296 | {"{% if 10 < 11 %}1{% endif %}", "1"}, 297 | }; 298 | 299 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 300 | char *output = template_string(tests[i].input, NULL); 301 | assert_str(output, tests[i].expected_output); 302 | free(output); 303 | } 304 | } 305 | 306 | TEST(expr_lte) { 307 | struct { 308 | char *input; 309 | char *expected_output; 310 | } tests[] = { 311 | {"{% if 10 <= 9 %}1{% endif %}", ""}, 312 | {"{% if 10 <= 10 %}1{% endif %}", "1"}, 313 | {"{% if 10 <= 11 %}1{% endif %}", "1"}, 314 | }; 315 | 316 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 317 | char *output = template_string(tests[i].input, NULL); 318 | assert_str(output, tests[i].expected_output); 319 | free(output); 320 | } 321 | } 322 | 323 | TEST(expr_eq) { 324 | struct { 325 | char *input; 326 | char *expected_output; 327 | } tests[] = { 328 | {"{% if 10 == 20 %}1{% endif %}", ""}, 329 | {"{% if 10 == 10 %}1{% endif %}", "1"}, 330 | }; 331 | 332 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 333 | char *output = template_string(tests[i].input, NULL); 334 | assert_str(output, tests[i].expected_output); 335 | free(output); 336 | } 337 | } 338 | 339 | TEST(expr_string_eq) { 340 | struct { 341 | char *input; 342 | char *expected_output; 343 | } tests[] = { 344 | {"{% if \"foo\" == \"bar\" %}1{% endif %}", ""}, 345 | {"{% if \"foo\" == \"foo\" %}1{% endif %}", "1"}, 346 | }; 347 | 348 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 349 | char *output = template_string(tests[i].input, NULL); 350 | assert_str(output, tests[i].expected_output); 351 | free(output); 352 | } 353 | } 354 | 355 | TEST(expr_not_eq) { 356 | struct { 357 | char *input; 358 | char *expected_output; 359 | } tests[] = { 360 | {"{% if 10 != 20 %}1{% endif %}", "1"}, 361 | {"{% if 10 != 10 %}1{% endif %}", ""}, 362 | }; 363 | 364 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 365 | char *output = template_string(tests[i].input, NULL); 366 | assert_str(output, tests[i].expected_output); 367 | free(output); 368 | } 369 | } 370 | 371 | TEST(expr_string_not_eq) { 372 | struct { 373 | char *input; 374 | char *expected_output; 375 | } tests[] = { 376 | {"{% if \"foo\" != \"bar\" %}1{% endif %}", "1"}, 377 | {"{% if \"foo\" != \"foo\" %}1{% endif %}", ""}, 378 | }; 379 | 380 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 381 | char *output = template_string(tests[i].input, NULL); 382 | assert_str(output, tests[i].expected_output); 383 | free(output); 384 | } 385 | } 386 | 387 | TEST(if_block_whitespace) { 388 | char *input = "\n{%- if 10 > 5 -%}\nOK\n{%- endif -%}\n"; 389 | char *output = template_string(input, NULL); 390 | assert_str(output, "OK"); 391 | free(output); 392 | } 393 | 394 | TEST(if_else_block) { 395 | struct { 396 | char *input; 397 | char *expected_output; 398 | } tests[] = { 399 | {"{% if 5 > 10 %}1{% else %}2{% endif %}", "2"}, 400 | {"{% if 10 > 5 %}1{% else %}2{% endif %}", "1"}, 401 | {"{% if foobar %}1{% else %}2{% endif %}", "2"}, 402 | {"{% if name %}1{% else %}2{% endif %}", "1"}, 403 | {"{% if age < 10 %}1{% else %}2{% endif %}", "2"}, 404 | {"{% if age + 5 < 10 %}1{% else %}2{% endif %}", "2"}, 405 | {"{% if age + 5 > 29 %}1{% else %}2{% endif %}", "1"}, 406 | }; 407 | 408 | struct hashmap *ctx = hashmap_new(); 409 | hashmap_insert(ctx, "name", "Danny"); 410 | hashmap_insert(ctx, "age", "29"); 411 | for (int i=0; i < ARRAY_SIZE(tests); i++) { 412 | char *output = template_string(tests[i].input, ctx); 413 | assert_str(output, tests[i].expected_output); 414 | free(output); 415 | } 416 | 417 | hashmap_free(ctx); 418 | } 419 | 420 | TEST(if_else_block_whitespace) { 421 | char *input = "\n{%- if 5 > 10 -%}NOT OK{% else -%}\nOK\n{%- endif -%}\n"; 422 | char *output = template_string(input, NULL); 423 | assert_str(output, "OK"); 424 | free(output); 425 | } 426 | 427 | TEST(buffer_alloc) { 428 | /* Output a string so that output buffer is longer than template buffer, 429 | to test dynamic allocation */ 430 | char *input = "{{ n }}"; 431 | struct hashmap *ctx = hashmap_new(); 432 | char *text = "Lorem ipsum dolor sit amet."; 433 | hashmap_insert(ctx, "n", text); 434 | 435 | char *output = template_string(input, ctx); 436 | assert_str(output, text); 437 | hashmap_free(ctx); 438 | free(output); 439 | } 440 | 441 | TEST(inheritance_depth_1) { 442 | struct env *env = env_new("./tests/data/inheritance-depth-1/"); 443 | char *output = template(env, "one.tmpl", NULL); 444 | assert_str(output, "Header\nChild content\nFooter\n"); 445 | free(output); 446 | env_free(env); 447 | } 448 | 449 | TEST(inheritance_depth_2) { 450 | struct env *env = env_new("./tests/data/inheritance-depth-2/"); 451 | char *output = template(env, "two.tmpl", NULL); 452 | assert_str(output, "0\n1\n2\n"); 453 | free(output); 454 | env_free(env); 455 | } 456 | 457 | TEST(filter_trim) { 458 | char *input = "{{ text | trim }}"; 459 | struct hashmap *ctx = hashmap_new(); 460 | char *text = "\nHello world\n"; 461 | hashmap_insert(ctx, "text", text); 462 | char *output = template_string(input, ctx); 463 | assert_str(output, "Hello world"); 464 | hashmap_free(ctx); 465 | free(output); 466 | } 467 | 468 | TEST(filter_lower) { 469 | char *input = "{{ \"Hello World\" | lower }}"; 470 | char *output = template_string(input, NULL); 471 | assert_str(output, "hello world"); 472 | free(output); 473 | } 474 | 475 | TEST(filter_wordcount) { 476 | char *input = "{{ \"Hello World. How are we?\" | wordcount }}"; 477 | char *output = template_string(input, NULL); 478 | assert_str(output, "5"); 479 | free(output); 480 | 481 | input = "{% if \"Hello World. How are we?\" | wordcount > 4 %}1{% endif %}"; 482 | output = template_string(input, NULL); 483 | assert_str(output, "1"); 484 | free(output); 485 | } 486 | 487 | TEST(filter_title) { 488 | char *input = "{{ \"Hello world\" | length }}"; 489 | char *output = template_string(input, NULL); 490 | assert_str(output, "11"); 491 | free(output); 492 | } 493 | 494 | TEST(inheritance_depth_2) { 495 | struct env *env = env_new("./tests/data/template-with-logic/"); 496 | char *output = template(env, "child.tmpl", NULL); 497 | assert_str(output, "Header\n\thello world\n\t2 is more than 1.\nFooter\n"); 498 | free(output); 499 | env_free(env); 500 | } 501 | 502 | END_TESTS -------------------------------------------------------------------------------- /src/template.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "vendor/mpc.h" 10 | #include "template.h" 11 | 12 | enum unja_object_type { 13 | OBJ_NULL, 14 | OBJ_INT, 15 | OBJ_STRING, 16 | }; 17 | 18 | struct unja_object { 19 | enum unja_object_type type; 20 | int integer; 21 | char *string; 22 | char *string_ptr; 23 | }; 24 | 25 | struct unja_object null_object = { 26 | .type = OBJ_NULL, 27 | }; 28 | 29 | struct buffer { 30 | int size; 31 | int cap; 32 | char *string; 33 | }; 34 | 35 | struct env { 36 | struct hashmap *templates; 37 | }; 38 | 39 | struct template { 40 | char *name; 41 | mpc_ast_t *ast; 42 | struct hashmap *blocks; 43 | char *parent; 44 | }; 45 | 46 | /* ensure buffer has room for a string sized l, grows buffer capacity if needed */ 47 | void buffer_reserve(struct buffer *buf, int l) { 48 | int req_size = buf->size + l; 49 | if (req_size >= buf->cap) { 50 | while (req_size >= buf->cap) { 51 | buf->cap *= 2; 52 | } 53 | 54 | buf->string = realloc(buf->string, buf->cap * sizeof *buf->string); 55 | if (!buf->string) { 56 | errx(EXIT_FAILURE, "out of memory"); 57 | } 58 | } 59 | } 60 | 61 | 62 | mpc_parser_t *parser_init() { 63 | static mpc_parser_t *template; 64 | if (template != NULL) { 65 | return template; 66 | } 67 | 68 | mpc_parser_t *spaces = mpc_new("spaces"); 69 | mpc_parser_t *symbol = mpc_new("symbol"); 70 | mpc_parser_t *number = mpc_new("number"); 71 | mpc_parser_t *string = mpc_new("string"); 72 | mpc_parser_t *text = mpc_new("text"); 73 | mpc_parser_t *print = mpc_new("print"); 74 | mpc_parser_t *lexp = mpc_new("lexp"); 75 | mpc_parser_t *exp = mpc_new("expression"); 76 | mpc_parser_t *comment = mpc_new("comment"); 77 | mpc_parser_t *statement = mpc_new("statement"); 78 | mpc_parser_t *statement_open = mpc_new("statement_open"); 79 | mpc_parser_t *statement_close = mpc_new("statement_close"); 80 | mpc_parser_t *statement_for = mpc_new("for"); 81 | mpc_parser_t *statement_if = mpc_new("if"); 82 | mpc_parser_t *statement_block = mpc_new("block"); 83 | mpc_parser_t *statement_extends = mpc_new("extends"); 84 | mpc_parser_t *body = mpc_new("body"); 85 | mpc_parser_t *content = mpc_new("content"); 86 | mpc_parser_t *factor = mpc_new("factor"); 87 | mpc_parser_t *term = mpc_new("term"); 88 | mpc_parser_t *filter = mpc_new("filter"); 89 | 90 | template = mpc_new("template"); 91 | mpca_lang(MPCA_LANG_WHITESPACE_SENSITIVE, 92 | " spaces : / */ ;" 93 | " symbol : /[a-zA-Z][a-zA-Z0-9_.]*/ ;" 94 | " number : /[0-9]+/ ;" 95 | " text : /[^{][^{%#]*/;" 96 | " string : '\"' /([^\"])*/ '\"' ;" 97 | " factor : | | ;" 98 | " filter : '|' ; " 99 | " term : ( ('*' | '/' | '%') )* ?;" 100 | " lexp : ( ('+' | '-') )* ;" 101 | " expression: '>' " 102 | " | '<' " 103 | " | \">=\" " 104 | " | \"<=\" " 105 | " | \"!=\" " 106 | " | \"==\" " 107 | " | \"not\" " 108 | " | ;" 109 | " print : /{{2}-? */ / *-?}}/ ;" 110 | " comment : \"{#\" /[^#][^#}]*/ \"#}\" ;" 111 | " statement_open: /{\%-? */;" 112 | " statement_close: / *-?\%}/;" 113 | " for : \"for \" \" in \" \"endfor\" ;" 114 | " block : \"block \" \"endblock\" ;" 115 | " extends : \"extends \" ;" 116 | " if : \"if \" ( \"else\" )? \"endif\" ;" 117 | " statement : | | | ;" 118 | " content : | | | ;" 119 | " body : * ;" 120 | " template : /^/ /$/ ;", 121 | spaces, 122 | filter, 123 | factor, 124 | term, 125 | symbol, 126 | text, 127 | number, 128 | string, 129 | print, 130 | lexp, 131 | exp, 132 | comment, 133 | statement_open, 134 | statement_close, 135 | statement, 136 | statement_if, 137 | statement_for, 138 | statement_block, 139 | statement_extends, 140 | content, 141 | body, 142 | template); 143 | 144 | return template; 145 | } 146 | 147 | mpc_ast_t *parse(char *tmpl) { 148 | mpc_parser_t *parser = parser_init(); 149 | mpc_result_t r; 150 | 151 | if (!mpc_parse("input", tmpl, parser, &r)) { 152 | puts(tmpl); 153 | mpc_err_print(r.error); 154 | mpc_err_delete(r.error); 155 | return NULL; 156 | } 157 | 158 | return r.output; 159 | } 160 | 161 | struct hashmap *find_blocks_in_ast(mpc_ast_t *node, struct hashmap *map) { 162 | if (strstr(node->tag, "content|statement|block")) { 163 | char *name = node->children[2]->contents; 164 | hashmap_insert(map, name, node); 165 | } 166 | 167 | for (int i=0; i < node->children_num; i++) { 168 | find_blocks_in_ast(node->children[i], map); 169 | } 170 | 171 | return map; 172 | } 173 | 174 | struct env *env_new(char *dirname) { 175 | /* store current working dir so we can revert to it after reading templates */ 176 | char working_dir[256]; 177 | getcwd(working_dir, 255); 178 | 179 | DIR *dr = opendir(dirname); 180 | if (dr == NULL) { 181 | errx(EXIT_FAILURE, "could not open directory \"%s\"", dirname); 182 | } 183 | 184 | struct env *env = malloc(sizeof *env); 185 | env->templates = hashmap_new(); 186 | chdir(dirname); 187 | 188 | struct dirent *de; 189 | while ((de = readdir(dr)) != NULL) { 190 | // skip files starting with a dot 191 | if (de->d_name[0] == '.') { 192 | continue; 193 | } 194 | 195 | // copy template name as closedir free's it otherwise 196 | char *name = malloc(strlen(de->d_name) + 1); 197 | strcpy(name, de->d_name); 198 | 199 | char *tmpl = read_file(name); 200 | #if DEBUG 201 | printf("Parsing template from file %s: %s\n", name, tmpl); 202 | #endif 203 | mpc_ast_t *ast = parse(tmpl); 204 | free(tmpl); 205 | 206 | struct template *t = malloc(sizeof *t); 207 | t->ast = ast; 208 | t->name = name; 209 | t->blocks = find_blocks_in_ast(ast, hashmap_new()); 210 | t->parent = NULL; 211 | 212 | if (ast->children_num > 1 && ast->children[1]->children_num > 0 && strstr(ast->children[1]->children[0]->tag, "content|statement|extends")) { 213 | t->parent = ast->children[1]->children[0]->children[2]->children[1]->contents; 214 | } 215 | 216 | hashmap_insert(env->templates, name, t); 217 | } 218 | 219 | closedir(dr); 220 | chdir(working_dir); 221 | return env; 222 | } 223 | 224 | void template_free(void *v) { 225 | struct template *t = (struct template *)v; 226 | hashmap_free(t->blocks); 227 | mpc_ast_delete(t->ast); 228 | free(t->name); 229 | free(t); 230 | } 231 | 232 | void env_free(struct env *env) { 233 | hashmap_walk(env->templates, template_free); 234 | hashmap_free(env->templates); 235 | free(env); 236 | } 237 | 238 | char *read_file(char *filename) { 239 | char *input = malloc(BUFSIZ); 240 | unsigned int size = 0; 241 | 242 | FILE *f = fopen(filename, "r"); 243 | if (!f) { 244 | errx(EXIT_FAILURE, "could not open \"%s\" for reading", filename); 245 | } 246 | 247 | unsigned int read = 0; 248 | while ( (read = fread(input + read, 1, BUFSIZ, f)) > 0) { 249 | size += read; 250 | input = realloc(input, size + BUFSIZ); 251 | } 252 | 253 | fclose(f); 254 | 255 | input[size] = '\0'; 256 | return input; 257 | } 258 | 259 | char *trim_trailing_whitespace(char *str) { 260 | for (int i=strlen(str)-1; i >= 0 && isspace(str[i]); i--) { 261 | str[i] = '\0'; 262 | } 263 | return str; 264 | } 265 | 266 | char *trim_leading_whitespace(char *str) { 267 | while (isspace(*str)) { 268 | str++; 269 | } 270 | 271 | return str; 272 | } 273 | 274 | struct unja_object *make_string_object(char *value, char *value2) { 275 | struct unja_object *obj = malloc(sizeof *obj); 276 | obj->type = OBJ_STRING; 277 | int l = strlen(value) + 1; 278 | if (value2) { 279 | l += strlen(value2); 280 | } 281 | 282 | 283 | obj->string = malloc(l); 284 | obj->string_ptr = obj->string; 285 | strcpy(obj->string, value); 286 | 287 | if(value2) { 288 | strcat(obj->string, value2); 289 | } 290 | return obj; 291 | } 292 | 293 | struct unja_object *make_int_object(int value) { 294 | struct unja_object *obj = malloc(sizeof *obj); 295 | obj->type = OBJ_INT; 296 | obj->integer = value; 297 | return obj; 298 | } 299 | 300 | 301 | void eval_object(struct buffer *buf, struct unja_object *obj) { 302 | char tmp[64]; 303 | 304 | switch (obj->type) { 305 | case OBJ_NULL: 306 | break; 307 | case OBJ_STRING: 308 | buffer_reserve(buf, strlen(obj->string)); 309 | strcat(buf->string, obj->string); 310 | break; 311 | case OBJ_INT: 312 | sprintf(tmp, "%d", obj->integer); 313 | buffer_reserve(buf, strlen(tmp)); 314 | strcat(buf->string, tmp); 315 | break; 316 | } 317 | } 318 | 319 | int object_to_int(struct unja_object *obj) { 320 | switch (obj->type) { 321 | case OBJ_NULL: return 0; 322 | case OBJ_STRING: return atoi(obj->string); 323 | case OBJ_INT: return obj->integer; 324 | } 325 | 326 | return 0; 327 | } 328 | 329 | void object_free(struct unja_object *obj) { 330 | switch(obj->type) { 331 | case OBJ_NULL: return; 332 | case OBJ_STRING: 333 | free(obj->string_ptr); 334 | break; 335 | case OBJ_INT: break; 336 | } 337 | 338 | free(obj); 339 | } 340 | 341 | int object_is_truthy(struct unja_object *obj) { 342 | switch (obj->type) { 343 | case OBJ_NULL: return 0; 344 | case OBJ_STRING: return strlen(obj->string) > 0 && strcmp(obj->string, "0") != 0; 345 | case OBJ_INT: return obj->integer > 0; 346 | } 347 | 348 | return 0; 349 | } 350 | 351 | struct context { 352 | struct hashmap *vars; 353 | struct hashmap *filters; 354 | struct env *env; 355 | struct template *current_template; 356 | }; 357 | 358 | struct unja_object *eval_expression_value(mpc_ast_t* node, struct context *ctx) { 359 | if (strstr(node->tag, "symbol|")) { 360 | /* Return empty string if no vars were passed. Should probably signal error here. */ 361 | if (ctx->vars == NULL) { 362 | return &null_object; 363 | } 364 | 365 | char *key = node->contents; 366 | char *value = hashmap_resolve(ctx->vars, key); 367 | 368 | /* TODO: Handle unexisting symbols (returns NULL currently) */ 369 | if (value == NULL) { 370 | return &null_object; 371 | } 372 | 373 | return make_string_object(value, NULL); 374 | } else if(strstr(node->tag, "number|")) { 375 | return make_int_object(atoi(node->contents)); 376 | } else if(strstr(node->tag, "string|")) { 377 | return make_string_object(node->children[1]->contents, NULL); 378 | } 379 | 380 | return &null_object; 381 | } 382 | 383 | struct unja_object *eval_string_infix_expression(struct unja_object *left, char *op, struct unja_object *right) { 384 | struct unja_object *result; 385 | 386 | if (strcmp(op, "+") == 0) { 387 | result = make_string_object(left->string, right->string); 388 | } else if (strcmp(op, "==") == 0) { 389 | result = make_int_object(strcmp(left->string, right->string) == 0); 390 | } else if(strcmp(op, "!=") == 0) { 391 | result = make_int_object(strcmp(left->string, right->string) != 0); 392 | } else { 393 | errx(EXIT_FAILURE, "invalid string operator: %s", op); 394 | } 395 | 396 | object_free(left); 397 | object_free(right); 398 | return result; 399 | } 400 | 401 | struct unja_object *eval_infix_expression(struct unja_object *left, char *op, struct unja_object *right) { 402 | /* if operator is + and either left or right node is of type string: concat */ 403 | if (left->type == OBJ_STRING && right->type == OBJ_STRING) { 404 | return eval_string_infix_expression(left, op, right); 405 | } 406 | 407 | int result; 408 | switch (op[0]) { 409 | case '+': result = object_to_int(left) + object_to_int(right); break; 410 | case '-': result = object_to_int(left) - object_to_int(right); break; 411 | case '/': result = object_to_int(left) / object_to_int(right); break; 412 | case '*': result = object_to_int(left) * object_to_int(right); break; 413 | case '%': result = object_to_int(left) % object_to_int(right); break; 414 | case '>': 415 | if (op[1] == '=') { 416 | result = object_to_int(left) >= object_to_int(right); 417 | } else { 418 | result = object_to_int(left) > object_to_int(right); 419 | } 420 | break; 421 | case '<': 422 | if (op[1] == '=') { 423 | result = object_to_int(left) <= object_to_int(right); 424 | } else { 425 | result = object_to_int(left) < object_to_int(right); 426 | } 427 | break; 428 | case '!': 429 | if (op[1] == '=') { 430 | result = object_to_int(left) != object_to_int(right); 431 | } else { 432 | errx(EXIT_FAILURE, "invalid int operator: %s", op); 433 | } 434 | break; 435 | case '=': 436 | if (op[1] == '=') { 437 | result = object_to_int(left) == object_to_int(right); 438 | } else { 439 | errx(EXIT_FAILURE, "invalid int operator: %s", op); 440 | } 441 | break; 442 | } 443 | 444 | object_free(left); 445 | object_free(right); 446 | return make_int_object(result); 447 | } 448 | 449 | struct unja_object *eval_expression(mpc_ast_t* expr, struct context *ctx) { 450 | 451 | /* singular term */ 452 | if (expr->children_num == 0 || strstr(expr->tag, "string|")) { 453 | return eval_expression_value(expr, ctx); 454 | } 455 | 456 | struct unja_object *result; 457 | 458 | /* singular negated term */ 459 | if (strcmp(expr->children[0]->contents, "not") == 0) { 460 | result = eval_expression_value(expr->children[2], ctx); 461 | struct unja_object *negated = make_int_object(!object_to_int(result)); 462 | object_free(result); 463 | return negated; 464 | } 465 | 466 | /* otherwise: with operator */ 467 | unsigned int offset = 0; 468 | mpc_ast_t *left_node = expr->children[0]; 469 | struct unja_object *left = eval_expression(left_node, ctx); 470 | result = left; 471 | 472 | while (offset < (expr->children_num - 1)) { 473 | /* Check if we arrived at a filter (guaranteed to be last in expression list) */ 474 | if (strstr(expr->children[offset+1]->tag, "filter")) { 475 | char *filter_name = expr->children[offset+1]->children[3]->contents; 476 | struct unja_object *(*filter_fn)(struct unja_object *) = hashmap_get(ctx->filters, filter_name); 477 | if (NULL == filter_fn) { 478 | errx(EXIT_FAILURE, "unknown filter: %s", filter_name); 479 | } 480 | result = filter_fn(result); 481 | break; 482 | } 483 | 484 | 485 | mpc_ast_t *op = expr->children[offset+2]; 486 | mpc_ast_t *right_node = expr->children[offset+4]; 487 | struct unja_object *right = eval_expression(right_node, ctx); 488 | result = eval_infix_expression(left, op->contents, right); 489 | 490 | left = result; 491 | offset += 4; 492 | } 493 | 494 | return result; 495 | 496 | } 497 | 498 | int eval(struct buffer *buf, mpc_ast_t* t, struct context *ctx) { 499 | static int trim_whitespace = 0; 500 | 501 | // maybe eat whitespace going backward 502 | if (t->children_num > 0 && strstr(t->children[0]->contents, "-")) { 503 | buf->string = trim_trailing_whitespace(buf->string); 504 | } 505 | 506 | if (strstr(t->tag, "content|text")) { 507 | char *str = t->contents; 508 | if (trim_whitespace) { 509 | str = trim_leading_whitespace(str); 510 | trim_whitespace = 0; 511 | } 512 | 513 | buffer_reserve(buf, strlen(str)); 514 | strcat(buf->string, str); 515 | return 0; 516 | } 517 | 518 | if (strstr(t->tag, "content|statement|block")) { 519 | trim_whitespace = strstr(t->children[3]->contents, "-") ? 1 : 0; 520 | char *block_name = t->children[2]->contents; 521 | 522 | // find block in "lowest" template 523 | struct template *templ = ctx->current_template; 524 | mpc_ast_t *block = hashmap_get(templ->blocks, block_name); 525 | while (templ != NULL && block == NULL) { 526 | templ = hashmap_get(ctx->env->templates, templ->parent); 527 | block = hashmap_get(templ->blocks, block_name); 528 | } 529 | 530 | if (block) { 531 | eval(buf, block->children[4], ctx); 532 | } else { 533 | /* block not found in any lower template, so just render the one we got */ 534 | eval(buf, t->children[4], ctx); 535 | } 536 | 537 | trim_whitespace = strstr(t->children[7]->contents, "-") ? 1 : 0; 538 | return 0; 539 | } 540 | 541 | // eval print statement 542 | if (strstr(t->tag, "content|print")) { 543 | trim_whitespace = strstr(t->children[2]->contents, "-") ? 1 : 0; 544 | mpc_ast_t *expr = t->children[1]; 545 | struct unja_object *obj = eval_expression(expr, ctx); 546 | eval_object(buf, obj); 547 | object_free(obj); 548 | return 0; 549 | } 550 | 551 | if (strstr(t->tag, "content|statement|for")) { 552 | char *tmp_key = t->children[2]->contents; 553 | char *iterator_key = t->children[4]->contents; 554 | struct vector *list = hashmap_resolve(ctx->vars, iterator_key); 555 | 556 | /* add "loop" variable to context */ 557 | struct hashmap *loop = hashmap_new(); 558 | char index[8], first[2], last[2]; 559 | hashmap_insert(loop, "index", index); 560 | hashmap_insert(loop, "first", first); 561 | hashmap_insert(loop, "last", last); 562 | hashmap_insert(ctx->vars, "loop", loop); 563 | 564 | /* loop over values in vector */ 565 | for (int i=0; i < list->size; i++) { 566 | /* set loop variable values */ 567 | sprintf(index, "%d", i); 568 | sprintf(first, "%d", i == 0); 569 | sprintf(last, "%d", i == (list->size - 1)); 570 | hashmap_insert(ctx->vars, tmp_key, list->values[i]); 571 | trim_whitespace = strstr(t->children[5]->contents, "-") ? 1 : 0; 572 | 573 | /* evaluate body */ 574 | eval(buf, t->children[6], ctx); 575 | } 576 | 577 | /* remove "loop" variable from context */ 578 | hashmap_remove(ctx->vars, "loop"); 579 | hashmap_free(loop); 580 | 581 | /* trim trailing whitespace if closing tag has minus sign */ 582 | if (strstr(t->children[7]->contents, "-")) { 583 | buf->string = trim_trailing_whitespace(buf->string); 584 | } 585 | 586 | trim_whitespace = strstr(t->children[9]->contents, "-") ? 1 : 0; 587 | return 0; 588 | } 589 | 590 | if (strstr(t->tag, "content|statement|if")) { 591 | mpc_ast_t *expr = t->children[2]; 592 | struct unja_object *result = eval_expression(expr, ctx); 593 | 594 | if (object_is_truthy(result)) { 595 | trim_whitespace = strstr(t->children[3]->contents, "-") ? 1 : 0; 596 | 597 | eval(buf, t->children[4], ctx); 598 | 599 | /* trim trailing whitespace if closing tag has minus sign */ 600 | if (strstr(t->children[5]->contents, "-")) { 601 | buf->string = trim_trailing_whitespace(buf->string); 602 | } 603 | 604 | trim_whitespace = strstr(t->children[7]->contents, "-") ? 1 : 0; 605 | } else { 606 | if (t->children_num > 8) { 607 | trim_whitespace = strstr(t->children[7]->contents, "-") ? 1 : 0; 608 | eval(buf, t->children[8], ctx); 609 | 610 | /* trim trailing whitespace if closing tag has minus sign */ 611 | if (strstr(t->children[9]->contents, "-")) { 612 | buf->string = trim_trailing_whitespace(buf->string); 613 | } 614 | 615 | trim_whitespace = strstr(t->children[11]->contents, "-") ? 1 : 0; 616 | } 617 | } 618 | 619 | object_free(result); 620 | return 0; 621 | } 622 | 623 | for (int i=0; i < t->children_num; i++) { 624 | eval(buf, t->children[i], ctx); 625 | } 626 | 627 | return 0; 628 | } 629 | 630 | char *render_ast(mpc_ast_t *ast, struct context *ctx) { 631 | #if DEBUG 632 | printf("AST: \n"); 633 | mpc_ast_print(ast); 634 | printf("\n"); 635 | #endif 636 | 637 | struct buffer buf; 638 | buf.size = 0; 639 | buf.cap = 256; 640 | buf.string = malloc(buf.cap); 641 | buf.string[0] = '\0'; 642 | eval(&buf, ast, ctx); 643 | return buf.string; 644 | } 645 | 646 | struct unja_object *filter_trim(struct unja_object *obj) { 647 | assert(obj->type == OBJ_STRING); 648 | obj->string = trim_leading_whitespace(obj->string); 649 | trim_trailing_whitespace(obj->string); 650 | return obj; 651 | } 652 | 653 | struct unja_object *filter_lower(struct unja_object *obj) { 654 | assert(obj->type == OBJ_STRING); 655 | int len = strlen(obj->string); 656 | for (int i=0; i < len; i++) { 657 | obj->string[i] = tolower(obj->string[i]); 658 | } 659 | return obj; 660 | } 661 | 662 | struct unja_object *filter_wordcount(struct unja_object *obj) { 663 | assert(obj->type == OBJ_STRING); 664 | int len = strlen(obj->string); 665 | int word_count = 1; 666 | for (int i=0; i < len; i++) { 667 | if (isspace(obj->string[i])) { 668 | word_count++; 669 | } 670 | } 671 | 672 | object_free(obj); 673 | return make_int_object(word_count); 674 | } 675 | 676 | 677 | struct unja_object *filter_length(struct unja_object *obj) { 678 | assert(obj->type == OBJ_STRING); 679 | int len = strlen(obj->string); 680 | object_free(obj); 681 | return make_int_object(len); 682 | } 683 | 684 | struct hashmap *default_filters() { 685 | struct hashmap *filters = hashmap_new(); 686 | hashmap_insert(filters, "trim", filter_trim); 687 | hashmap_insert(filters, "lower", filter_lower); 688 | hashmap_insert(filters, "wordcount", filter_wordcount); 689 | hashmap_insert(filters, "length", filter_length); 690 | return filters; 691 | } 692 | 693 | struct context context_new(struct hashmap *vars, struct env *env, struct template *current_tmpl) { 694 | struct context ctx; 695 | ctx.filters = default_filters(); 696 | ctx.vars = vars; 697 | ctx.env = env; 698 | ctx.current_template = current_tmpl; 699 | return ctx; 700 | } 701 | 702 | void context_free(struct context ctx) { 703 | hashmap_free(ctx.filters); 704 | } 705 | 706 | char *template_string(char *tmpl, struct hashmap *vars) { 707 | #if DEBUG 708 | printf("Template: %s\n", tmpl); 709 | #endif 710 | struct mpc_ast_t *ast = parse(tmpl); 711 | struct context ctx = context_new(vars, NULL, NULL); 712 | char *output = render_ast(ast, &ctx); 713 | mpc_ast_delete(ast); 714 | context_free(ctx); 715 | return output; 716 | } 717 | 718 | char *template(struct env *env, char *template_name, struct hashmap *vars) { 719 | struct template *t = hashmap_get(env->templates, template_name); 720 | #if DEBUG 721 | printf("Template name: %s\n", t->name); 722 | printf("Parent: %s\n", t->parent ? t->parent : "None"); 723 | #endif 724 | 725 | struct context ctx = context_new(vars, env, t); 726 | 727 | // find root template 728 | while (t->parent != NULL) { 729 | char *parent_name = t->parent; 730 | t = hashmap_get(env->templates, parent_name); 731 | 732 | if (t == NULL) { 733 | errx(EXIT_FAILURE, "template tried to extend unexisting parent \"%s\"", parent_name); 734 | break; 735 | } 736 | } 737 | 738 | 739 | char *output = render_ast(t->ast, &ctx); 740 | context_free(ctx); 741 | return output; 742 | } -------------------------------------------------------------------------------- /vendor/mpc.c: -------------------------------------------------------------------------------- 1 | #include "mpc.h" 2 | 3 | /* 4 | ** State Type 5 | */ 6 | 7 | static mpc_state_t mpc_state_invalid(void) { 8 | mpc_state_t s; 9 | s.pos = -1; 10 | s.row = -1; 11 | s.col = -1; 12 | s.term = 0; 13 | return s; 14 | } 15 | 16 | static mpc_state_t mpc_state_new(void) { 17 | mpc_state_t s; 18 | s.pos = 0; 19 | s.row = 0; 20 | s.col = 0; 21 | s.term = 0; 22 | return s; 23 | } 24 | 25 | /* 26 | ** Input Type 27 | */ 28 | 29 | /* 30 | ** In mpc the input type has three modes of 31 | ** operation: String, File and Pipe. 32 | ** 33 | ** String is easy. The whole contents are 34 | ** loaded into a buffer and scanned through. 35 | ** The cursor can jump around at will making 36 | ** backtracking easy. 37 | ** 38 | ** The second is a File which is also somewhat 39 | ** easy. The contents are never loaded into 40 | ** memory but backtracking can still be achieved 41 | ** by seeking in the file at different positions. 42 | ** 43 | ** The final mode is Pipe. This is the difficult 44 | ** one. As we assume pipes cannot be seeked - and 45 | ** only support a single character lookahead at 46 | ** any point, when the input is marked for a 47 | ** potential backtracking we start buffering any 48 | ** input. 49 | ** 50 | ** This means that if we are requested to seek 51 | ** back we can simply start reading from the 52 | ** buffer instead of the input. 53 | ** 54 | ** Of course using `mpc_predictive` will disable 55 | ** backtracking and make LL(1) grammars easy 56 | ** to parse for all input methods. 57 | ** 58 | */ 59 | 60 | enum { 61 | MPC_INPUT_STRING = 0, 62 | MPC_INPUT_FILE = 1, 63 | MPC_INPUT_PIPE = 2 64 | }; 65 | 66 | enum { 67 | MPC_INPUT_MARKS_MIN = 32 68 | }; 69 | 70 | enum { 71 | MPC_INPUT_MEM_NUM = 512 72 | }; 73 | 74 | typedef struct { 75 | char mem[64]; 76 | } mpc_mem_t; 77 | 78 | typedef struct { 79 | 80 | int type; 81 | char *filename; 82 | mpc_state_t state; 83 | 84 | char *string; 85 | char *buffer; 86 | FILE *file; 87 | 88 | int suppress; 89 | int backtrack; 90 | int marks_slots; 91 | int marks_num; 92 | mpc_state_t *marks; 93 | 94 | char *lasts; 95 | char last; 96 | 97 | size_t mem_index; 98 | char mem_full[MPC_INPUT_MEM_NUM]; 99 | mpc_mem_t mem[MPC_INPUT_MEM_NUM]; 100 | 101 | } mpc_input_t; 102 | 103 | static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { 104 | 105 | mpc_input_t *i = malloc(sizeof(mpc_input_t)); 106 | 107 | i->filename = malloc(strlen(filename) + 1); 108 | strcpy(i->filename, filename); 109 | i->type = MPC_INPUT_STRING; 110 | 111 | i->state = mpc_state_new(); 112 | 113 | i->string = malloc(strlen(string) + 1); 114 | strcpy(i->string, string); 115 | i->buffer = NULL; 116 | i->file = NULL; 117 | 118 | i->suppress = 0; 119 | i->backtrack = 1; 120 | i->marks_num = 0; 121 | i->marks_slots = MPC_INPUT_MARKS_MIN; 122 | i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 123 | i->lasts = malloc(sizeof(char) * i->marks_slots); 124 | i->last = '\0'; 125 | 126 | i->mem_index = 0; 127 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 128 | 129 | return i; 130 | } 131 | 132 | static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { 133 | 134 | mpc_input_t *i = malloc(sizeof(mpc_input_t)); 135 | 136 | i->filename = malloc(strlen(filename) + 1); 137 | strcpy(i->filename, filename); 138 | i->type = MPC_INPUT_STRING; 139 | 140 | i->state = mpc_state_new(); 141 | 142 | i->string = malloc(length + 1); 143 | strncpy(i->string, string, length); 144 | i->string[length] = '\0'; 145 | i->buffer = NULL; 146 | i->file = NULL; 147 | 148 | i->suppress = 0; 149 | i->backtrack = 1; 150 | i->marks_num = 0; 151 | i->marks_slots = MPC_INPUT_MARKS_MIN; 152 | i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 153 | i->lasts = malloc(sizeof(char) * i->marks_slots); 154 | i->last = '\0'; 155 | 156 | i->mem_index = 0; 157 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 158 | 159 | return i; 160 | 161 | } 162 | 163 | static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { 164 | 165 | mpc_input_t *i = malloc(sizeof(mpc_input_t)); 166 | 167 | i->filename = malloc(strlen(filename) + 1); 168 | strcpy(i->filename, filename); 169 | 170 | i->type = MPC_INPUT_PIPE; 171 | i->state = mpc_state_new(); 172 | 173 | i->string = NULL; 174 | i->buffer = NULL; 175 | i->file = pipe; 176 | 177 | i->suppress = 0; 178 | i->backtrack = 1; 179 | i->marks_num = 0; 180 | i->marks_slots = MPC_INPUT_MARKS_MIN; 181 | i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 182 | i->lasts = malloc(sizeof(char) * i->marks_slots); 183 | i->last = '\0'; 184 | 185 | i->mem_index = 0; 186 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 187 | 188 | return i; 189 | 190 | } 191 | 192 | static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { 193 | 194 | mpc_input_t *i = malloc(sizeof(mpc_input_t)); 195 | 196 | i->filename = malloc(strlen(filename) + 1); 197 | strcpy(i->filename, filename); 198 | i->type = MPC_INPUT_FILE; 199 | i->state = mpc_state_new(); 200 | 201 | i->string = NULL; 202 | i->buffer = NULL; 203 | i->file = file; 204 | 205 | i->suppress = 0; 206 | i->backtrack = 1; 207 | i->marks_num = 0; 208 | i->marks_slots = MPC_INPUT_MARKS_MIN; 209 | i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 210 | i->lasts = malloc(sizeof(char) * i->marks_slots); 211 | i->last = '\0'; 212 | 213 | i->mem_index = 0; 214 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 215 | 216 | return i; 217 | } 218 | 219 | static void mpc_input_delete(mpc_input_t *i) { 220 | 221 | free(i->filename); 222 | 223 | if (i->type == MPC_INPUT_STRING) { free(i->string); } 224 | if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } 225 | 226 | free(i->marks); 227 | free(i->lasts); 228 | free(i); 229 | } 230 | 231 | static int mpc_mem_ptr(mpc_input_t *i, void *p) { 232 | return 233 | (char*)p >= (char*)(i->mem) && 234 | (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); 235 | } 236 | 237 | static void *mpc_malloc(mpc_input_t *i, size_t n) { 238 | size_t j; 239 | char *p; 240 | 241 | if (n > sizeof(mpc_mem_t)) { return malloc(n); } 242 | 243 | j = i->mem_index; 244 | do { 245 | if (!i->mem_full[i->mem_index]) { 246 | p = (void*)(i->mem + i->mem_index); 247 | i->mem_full[i->mem_index] = 1; 248 | i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; 249 | return p; 250 | } 251 | i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; 252 | } while (j != i->mem_index); 253 | 254 | return malloc(n); 255 | } 256 | 257 | static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { 258 | char *x = mpc_malloc(i, n * m); 259 | memset(x, 0, n * m); 260 | return x; 261 | } 262 | 263 | static void mpc_free(mpc_input_t *i, void *p) { 264 | size_t j; 265 | if (!mpc_mem_ptr(i, p)) { free(p); return; } 266 | j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); 267 | i->mem_full[j] = 0; 268 | } 269 | 270 | static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { 271 | 272 | char *q = NULL; 273 | 274 | if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } 275 | 276 | if (n > sizeof(mpc_mem_t)) { 277 | q = malloc(n); 278 | memcpy(q, p, sizeof(mpc_mem_t)); 279 | mpc_free(i, p); 280 | return q; 281 | } 282 | 283 | return p; 284 | } 285 | 286 | static void *mpc_export(mpc_input_t *i, void *p) { 287 | char *q = NULL; 288 | if (!mpc_mem_ptr(i, p)) { return p; } 289 | q = malloc(sizeof(mpc_mem_t)); 290 | memcpy(q, p, sizeof(mpc_mem_t)); 291 | mpc_free(i, p); 292 | return q; 293 | } 294 | 295 | static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } 296 | static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } 297 | 298 | static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } 299 | static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } 300 | 301 | static void mpc_input_mark(mpc_input_t *i) { 302 | 303 | if (i->backtrack < 1) { return; } 304 | 305 | i->marks_num++; 306 | 307 | if (i->marks_num > i->marks_slots) { 308 | i->marks_slots = i->marks_num + i->marks_num / 2; 309 | i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); 310 | i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); 311 | } 312 | 313 | i->marks[i->marks_num-1] = i->state; 314 | i->lasts[i->marks_num-1] = i->last; 315 | 316 | if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { 317 | i->buffer = calloc(1, 1); 318 | } 319 | 320 | } 321 | 322 | static void mpc_input_unmark(mpc_input_t *i) { 323 | int j; 324 | 325 | if (i->backtrack < 1) { return; } 326 | 327 | i->marks_num--; 328 | 329 | if (i->marks_slots > i->marks_num + i->marks_num / 2 330 | && i->marks_slots > MPC_INPUT_MARKS_MIN) { 331 | i->marks_slots = 332 | i->marks_num > MPC_INPUT_MARKS_MIN ? 333 | i->marks_num : MPC_INPUT_MARKS_MIN; 334 | i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); 335 | i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); 336 | } 337 | 338 | if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { 339 | for (j = strlen(i->buffer) - 1; j >= 0; j--) 340 | ungetc(i->buffer[j], i->file); 341 | 342 | free(i->buffer); 343 | i->buffer = NULL; 344 | } 345 | 346 | } 347 | 348 | static void mpc_input_rewind(mpc_input_t *i) { 349 | 350 | if (i->backtrack < 1) { return; } 351 | 352 | i->state = i->marks[i->marks_num-1]; 353 | i->last = i->lasts[i->marks_num-1]; 354 | 355 | if (i->type == MPC_INPUT_FILE) { 356 | fseek(i->file, i->state.pos, SEEK_SET); 357 | } 358 | 359 | mpc_input_unmark(i); 360 | } 361 | 362 | static int mpc_input_buffer_in_range(mpc_input_t *i) { 363 | return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); 364 | } 365 | 366 | static char mpc_input_buffer_get(mpc_input_t *i) { 367 | return i->buffer[i->state.pos - i->marks[0].pos]; 368 | } 369 | 370 | static char mpc_input_getc(mpc_input_t *i) { 371 | 372 | char c = '\0'; 373 | 374 | switch (i->type) { 375 | 376 | case MPC_INPUT_STRING: return i->string[i->state.pos]; 377 | case MPC_INPUT_FILE: c = fgetc(i->file); return c; 378 | case MPC_INPUT_PIPE: 379 | 380 | if (!i->buffer) { c = getc(i->file); return c; } 381 | 382 | if (i->buffer && mpc_input_buffer_in_range(i)) { 383 | c = mpc_input_buffer_get(i); 384 | return c; 385 | } else { 386 | c = getc(i->file); 387 | return c; 388 | } 389 | 390 | default: return c; 391 | } 392 | } 393 | 394 | static char mpc_input_peekc(mpc_input_t *i) { 395 | 396 | char c = '\0'; 397 | 398 | switch (i->type) { 399 | case MPC_INPUT_STRING: return i->string[i->state.pos]; 400 | case MPC_INPUT_FILE: 401 | 402 | c = fgetc(i->file); 403 | if (feof(i->file)) { return '\0'; } 404 | 405 | fseek(i->file, -1, SEEK_CUR); 406 | return c; 407 | 408 | case MPC_INPUT_PIPE: 409 | 410 | if (!i->buffer) { 411 | c = getc(i->file); 412 | if (feof(i->file)) { return '\0'; } 413 | ungetc(c, i->file); 414 | return c; 415 | } 416 | 417 | if (i->buffer && mpc_input_buffer_in_range(i)) { 418 | return mpc_input_buffer_get(i); 419 | } else { 420 | c = getc(i->file); 421 | if (feof(i->file)) { return '\0'; } 422 | ungetc(c, i->file); 423 | return c; 424 | } 425 | 426 | default: return c; 427 | } 428 | 429 | } 430 | 431 | static int mpc_input_terminated(mpc_input_t *i) { 432 | return mpc_input_peekc(i) == '\0'; 433 | } 434 | 435 | static int mpc_input_failure(mpc_input_t *i, char c) { 436 | 437 | switch (i->type) { 438 | case MPC_INPUT_STRING: { break; } 439 | case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } 440 | case MPC_INPUT_PIPE: { 441 | 442 | if (!i->buffer) { ungetc(c, i->file); break; } 443 | 444 | if (i->buffer && mpc_input_buffer_in_range(i)) { 445 | break; 446 | } else { 447 | ungetc(c, i->file); 448 | } 449 | } 450 | default: { break; } 451 | } 452 | return 0; 453 | } 454 | 455 | static int mpc_input_success(mpc_input_t *i, char c, char **o) { 456 | 457 | if (i->type == MPC_INPUT_PIPE 458 | && i->buffer && !mpc_input_buffer_in_range(i)) { 459 | i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); 460 | i->buffer[strlen(i->buffer) + 1] = '\0'; 461 | i->buffer[strlen(i->buffer) + 0] = c; 462 | } 463 | 464 | i->last = c; 465 | i->state.pos++; 466 | i->state.col++; 467 | 468 | if (c == '\n') { 469 | i->state.col = 0; 470 | i->state.row++; 471 | } 472 | 473 | if (o) { 474 | (*o) = mpc_malloc(i, 2); 475 | (*o)[0] = c; 476 | (*o)[1] = '\0'; 477 | } 478 | 479 | return 1; 480 | } 481 | 482 | static int mpc_input_any(mpc_input_t *i, char **o) { 483 | char x; 484 | if (mpc_input_terminated(i)) { return 0; } 485 | x = mpc_input_getc(i); 486 | return mpc_input_success(i, x, o); 487 | } 488 | 489 | static int mpc_input_char(mpc_input_t *i, char c, char **o) { 490 | char x; 491 | if (mpc_input_terminated(i)) { return 0; } 492 | x = mpc_input_getc(i); 493 | return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 494 | } 495 | 496 | static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { 497 | char x; 498 | if (mpc_input_terminated(i)) { return 0; } 499 | x = mpc_input_getc(i); 500 | return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 501 | } 502 | 503 | static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { 504 | char x; 505 | if (mpc_input_terminated(i)) { return 0; } 506 | x = mpc_input_getc(i); 507 | return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 508 | } 509 | 510 | static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { 511 | char x; 512 | if (mpc_input_terminated(i)) { return 0; } 513 | x = mpc_input_getc(i); 514 | return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 515 | } 516 | 517 | static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { 518 | char x; 519 | if (mpc_input_terminated(i)) { return 0; } 520 | x = mpc_input_getc(i); 521 | return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 522 | } 523 | 524 | static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { 525 | 526 | const char *x = c; 527 | 528 | mpc_input_mark(i); 529 | while (*x) { 530 | if (!mpc_input_char(i, *x, NULL)) { 531 | mpc_input_rewind(i); 532 | return 0; 533 | } 534 | x++; 535 | } 536 | mpc_input_unmark(i); 537 | 538 | *o = mpc_malloc(i, strlen(c) + 1); 539 | strcpy(*o, c); 540 | return 1; 541 | } 542 | 543 | static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { 544 | *o = NULL; 545 | return f(i->last, mpc_input_peekc(i)); 546 | } 547 | 548 | static int mpc_input_soi(mpc_input_t* i, char **o) { 549 | *o = NULL; 550 | return i->last == '\0'; 551 | } 552 | 553 | static int mpc_input_eoi(mpc_input_t* i, char **o) { 554 | *o = NULL; 555 | if (i->state.term) { 556 | return 0; 557 | } else if (mpc_input_terminated(i)) { 558 | i->state.term = 1; 559 | return 1; 560 | } else { 561 | return 0; 562 | } 563 | } 564 | 565 | static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { 566 | mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); 567 | memcpy(r, &i->state, sizeof(mpc_state_t)); 568 | return r; 569 | } 570 | 571 | /* 572 | ** Error Type 573 | */ 574 | 575 | void mpc_err_delete(mpc_err_t *x) { 576 | int i; 577 | for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } 578 | free(x->expected); 579 | free(x->filename); 580 | free(x->failure); 581 | free(x); 582 | } 583 | 584 | void mpc_err_print(mpc_err_t *x) { 585 | mpc_err_print_to(x, stdout); 586 | } 587 | 588 | void mpc_err_print_to(mpc_err_t *x, FILE *f) { 589 | char *str = mpc_err_string(x); 590 | fprintf(f, "%s", str); 591 | free(str); 592 | } 593 | 594 | static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { 595 | /* TODO: Error Checking on Length */ 596 | int left = ((*max) - (*pos)); 597 | va_list va; 598 | va_start(va, fmt); 599 | if (left < 0) { left = 0;} 600 | (*pos) += vsprintf(buffer + (*pos), fmt, va); 601 | va_end(va); 602 | } 603 | 604 | static char char_unescape_buffer[4]; 605 | 606 | static const char *mpc_err_char_unescape(char c) { 607 | 608 | char_unescape_buffer[0] = '\''; 609 | char_unescape_buffer[1] = ' '; 610 | char_unescape_buffer[2] = '\''; 611 | char_unescape_buffer[3] = '\0'; 612 | 613 | switch (c) { 614 | case '\a': return "bell"; 615 | case '\b': return "backspace"; 616 | case '\f': return "formfeed"; 617 | case '\r': return "carriage return"; 618 | case '\v': return "vertical tab"; 619 | case '\0': return "end of input"; 620 | case '\n': return "newline"; 621 | case '\t': return "tab"; 622 | case ' ' : return "space"; 623 | default: 624 | char_unescape_buffer[1] = c; 625 | return char_unescape_buffer; 626 | } 627 | 628 | } 629 | 630 | char *mpc_err_string(mpc_err_t *x) { 631 | 632 | int i; 633 | int pos = 0; 634 | int max = 1023; 635 | char *buffer = calloc(1, 1024); 636 | 637 | if (x->failure) { 638 | mpc_err_string_cat(buffer, &pos, &max, 639 | "%s: error: %s\n", x->filename, x->failure); 640 | return buffer; 641 | } 642 | 643 | mpc_err_string_cat(buffer, &pos, &max, 644 | "%s:%i:%i: error: expected ", x->filename, x->state.row+1, x->state.col+1); 645 | 646 | if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } 647 | if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } 648 | if (x->expected_num >= 2) { 649 | 650 | for (i = 0; i < x->expected_num-2; i++) { 651 | mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); 652 | } 653 | 654 | mpc_err_string_cat(buffer, &pos, &max, "%s or %s", 655 | x->expected[x->expected_num-2], 656 | x->expected[x->expected_num-1]); 657 | } 658 | 659 | mpc_err_string_cat(buffer, &pos, &max, " at "); 660 | mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received)); 661 | mpc_err_string_cat(buffer, &pos, &max, "\n"); 662 | 663 | return realloc(buffer, strlen(buffer) + 1); 664 | } 665 | 666 | static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { 667 | mpc_err_t *x; 668 | if (i->suppress) { return NULL; } 669 | x = mpc_malloc(i, sizeof(mpc_err_t)); 670 | x->filename = mpc_malloc(i, strlen(i->filename) + 1); 671 | strcpy(x->filename, i->filename); 672 | x->state = i->state; 673 | x->expected_num = 1; 674 | x->expected = mpc_malloc(i, sizeof(char*)); 675 | x->expected[0] = mpc_malloc(i, strlen(expected) + 1); 676 | strcpy(x->expected[0], expected); 677 | x->failure = NULL; 678 | x->received = mpc_input_peekc(i); 679 | return x; 680 | } 681 | 682 | static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { 683 | mpc_err_t *x; 684 | if (i->suppress) { return NULL; } 685 | x = mpc_malloc(i, sizeof(mpc_err_t)); 686 | x->filename = mpc_malloc(i, strlen(i->filename) + 1); 687 | strcpy(x->filename, i->filename); 688 | x->state = i->state; 689 | x->expected_num = 0; 690 | x->expected = NULL; 691 | x->failure = mpc_malloc(i, strlen(failure) + 1); 692 | strcpy(x->failure, failure); 693 | x->received = ' '; 694 | return x; 695 | } 696 | 697 | static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { 698 | mpc_err_t *x; 699 | x = malloc(sizeof(mpc_err_t)); 700 | x->filename = malloc(strlen(filename) + 1); 701 | strcpy(x->filename, filename); 702 | x->state = mpc_state_new(); 703 | x->expected_num = 0; 704 | x->expected = NULL; 705 | x->failure = malloc(strlen(failure) + 1); 706 | strcpy(x->failure, failure); 707 | x->received = ' '; 708 | return x; 709 | } 710 | 711 | static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { 712 | int j; 713 | if (x == NULL) { return; } 714 | for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } 715 | mpc_free(i, x->expected); 716 | mpc_free(i, x->filename); 717 | mpc_free(i, x->failure); 718 | mpc_free(i, x); 719 | } 720 | 721 | static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { 722 | int j; 723 | for (j = 0; j < x->expected_num; j++) { 724 | x->expected[j] = mpc_export(i, x->expected[j]); 725 | } 726 | x->expected = mpc_export(i, x->expected); 727 | x->filename = mpc_export(i, x->filename); 728 | x->failure = mpc_export(i, x->failure); 729 | return mpc_export(i, x); 730 | } 731 | 732 | static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { 733 | int j; 734 | (void)i; 735 | for (j = 0; j < x->expected_num; j++) { 736 | if (strcmp(x->expected[j], expected) == 0) { return 1; } 737 | } 738 | return 0; 739 | } 740 | 741 | static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { 742 | (void)i; 743 | x->expected_num++; 744 | x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 745 | x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); 746 | strcpy(x->expected[x->expected_num-1], expected); 747 | } 748 | 749 | static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { 750 | 751 | int j, k, fst; 752 | mpc_err_t *e; 753 | 754 | fst = -1; 755 | for (j = 0; j < n; j++) { 756 | if (x[j] != NULL) { fst = j; } 757 | } 758 | 759 | if (fst == -1) { return NULL; } 760 | 761 | e = mpc_malloc(i, sizeof(mpc_err_t)); 762 | e->state = mpc_state_invalid(); 763 | e->expected_num = 0; 764 | e->expected = NULL; 765 | e->failure = NULL; 766 | e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); 767 | strcpy(e->filename, x[fst]->filename); 768 | 769 | for (j = 0; j < n; j++) { 770 | if (x[j] == NULL) { continue; } 771 | if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } 772 | } 773 | 774 | for (j = 0; j < n; j++) { 775 | if (x[j] == NULL) { continue; } 776 | if (x[j]->state.pos < e->state.pos) { continue; } 777 | 778 | if (x[j]->failure) { 779 | e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); 780 | strcpy(e->failure, x[j]->failure); 781 | break; 782 | } 783 | 784 | e->received = x[j]->received; 785 | 786 | for (k = 0; k < x[j]->expected_num; k++) { 787 | if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { 788 | mpc_err_add_expected(i, e, x[j]->expected[k]); 789 | } 790 | } 791 | } 792 | 793 | for (j = 0; j < n; j++) { 794 | if (x[j] == NULL) { continue; } 795 | mpc_err_delete_internal(i, x[j]); 796 | } 797 | 798 | return e; 799 | } 800 | 801 | static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { 802 | 803 | int j = 0; 804 | size_t l = 0; 805 | char *expect = NULL; 806 | 807 | if (x == NULL) { return NULL; } 808 | 809 | if (x->expected_num == 0) { 810 | expect = mpc_calloc(i, 1, 1); 811 | x->expected_num = 1; 812 | x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 813 | x->expected[0] = expect; 814 | return x; 815 | } 816 | 817 | else if (x->expected_num == 1) { 818 | expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); 819 | strcpy(expect, prefix); 820 | strcat(expect, x->expected[0]); 821 | mpc_free(i, x->expected[0]); 822 | x->expected[0] = expect; 823 | return x; 824 | } 825 | 826 | else if (x->expected_num > 1) { 827 | 828 | l += strlen(prefix); 829 | for (j = 0; j < x->expected_num-2; j++) { 830 | l += strlen(x->expected[j]) + strlen(", "); 831 | } 832 | l += strlen(x->expected[x->expected_num-2]); 833 | l += strlen(" or "); 834 | l += strlen(x->expected[x->expected_num-1]); 835 | 836 | expect = mpc_malloc(i, l + 1); 837 | 838 | strcpy(expect, prefix); 839 | for (j = 0; j < x->expected_num-2; j++) { 840 | strcat(expect, x->expected[j]); strcat(expect, ", "); 841 | } 842 | strcat(expect, x->expected[x->expected_num-2]); 843 | strcat(expect, " or "); 844 | strcat(expect, x->expected[x->expected_num-1]); 845 | 846 | for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } 847 | 848 | x->expected_num = 1; 849 | x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 850 | x->expected[0] = expect; 851 | return x; 852 | } 853 | 854 | return NULL; 855 | } 856 | 857 | static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { 858 | return mpc_err_repeat(i, x, "one or more of "); 859 | } 860 | 861 | static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { 862 | mpc_err_t *y; 863 | int digits = n/10 + 1; 864 | char *prefix; 865 | prefix = mpc_malloc(i, digits + strlen(" of ") + 1); 866 | sprintf(prefix, "%i of ", n); 867 | y = mpc_err_repeat(i, x, prefix); 868 | mpc_free(i, prefix); 869 | return y; 870 | } 871 | 872 | static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { 873 | mpc_err_t *errs[2]; 874 | errs[0] = x; 875 | errs[1] = y; 876 | return mpc_err_or(i, errs, 2); 877 | } 878 | 879 | /* 880 | ** Parser Type 881 | */ 882 | 883 | enum { 884 | MPC_TYPE_UNDEFINED = 0, 885 | MPC_TYPE_PASS = 1, 886 | MPC_TYPE_FAIL = 2, 887 | MPC_TYPE_LIFT = 3, 888 | MPC_TYPE_LIFT_VAL = 4, 889 | MPC_TYPE_EXPECT = 5, 890 | MPC_TYPE_ANCHOR = 6, 891 | MPC_TYPE_STATE = 7, 892 | 893 | MPC_TYPE_ANY = 8, 894 | MPC_TYPE_SINGLE = 9, 895 | MPC_TYPE_ONEOF = 10, 896 | MPC_TYPE_NONEOF = 11, 897 | MPC_TYPE_RANGE = 12, 898 | MPC_TYPE_SATISFY = 13, 899 | MPC_TYPE_STRING = 14, 900 | 901 | MPC_TYPE_APPLY = 15, 902 | MPC_TYPE_APPLY_TO = 16, 903 | MPC_TYPE_PREDICT = 17, 904 | MPC_TYPE_NOT = 18, 905 | MPC_TYPE_MAYBE = 19, 906 | MPC_TYPE_MANY = 20, 907 | MPC_TYPE_MANY1 = 21, 908 | MPC_TYPE_COUNT = 22, 909 | 910 | MPC_TYPE_OR = 23, 911 | MPC_TYPE_AND = 24, 912 | 913 | MPC_TYPE_CHECK = 25, 914 | MPC_TYPE_CHECK_WITH = 26, 915 | 916 | MPC_TYPE_SOI = 27, 917 | MPC_TYPE_EOI = 28 918 | }; 919 | 920 | typedef struct { char *m; } mpc_pdata_fail_t; 921 | typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; 922 | typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; 923 | typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; 924 | typedef struct { char x; } mpc_pdata_single_t; 925 | typedef struct { char x; char y; } mpc_pdata_range_t; 926 | typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; 927 | typedef struct { char *x; } mpc_pdata_string_t; 928 | typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; 929 | typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; 930 | typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; 931 | typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; 932 | typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; 933 | typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; 934 | typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; 935 | typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; 936 | typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; 937 | 938 | typedef union { 939 | mpc_pdata_fail_t fail; 940 | mpc_pdata_lift_t lift; 941 | mpc_pdata_expect_t expect; 942 | mpc_pdata_anchor_t anchor; 943 | mpc_pdata_single_t single; 944 | mpc_pdata_range_t range; 945 | mpc_pdata_satisfy_t satisfy; 946 | mpc_pdata_string_t string; 947 | mpc_pdata_apply_t apply; 948 | mpc_pdata_apply_to_t apply_to; 949 | mpc_pdata_check_t check; 950 | mpc_pdata_check_with_t check_with; 951 | mpc_pdata_predict_t predict; 952 | mpc_pdata_not_t not; 953 | mpc_pdata_repeat_t repeat; 954 | mpc_pdata_and_t and; 955 | mpc_pdata_or_t or; 956 | } mpc_pdata_t; 957 | 958 | struct mpc_parser_t { 959 | char *name; 960 | mpc_pdata_t data; 961 | char type; 962 | char retained; 963 | }; 964 | 965 | static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { 966 | int j; 967 | for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } 968 | return xs[x]; 969 | } 970 | 971 | static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } 972 | static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } 973 | static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } 974 | 975 | static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { 976 | int j; 977 | size_t l = 0; 978 | if (n == 0) { return mpc_calloc(i, 1, 1); } 979 | for (j = 0; j < n; j++) { l += strlen(xs[j]); } 980 | xs[0] = mpc_realloc(i, xs[0], l + 1); 981 | for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } 982 | return xs[0]; 983 | } 984 | 985 | static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { 986 | mpc_state_t *s = ((mpc_state_t**)xs)[0]; 987 | mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; 988 | a = mpc_ast_state(a, *s); 989 | mpc_free(i, s); 990 | (void) n; 991 | return a; 992 | } 993 | 994 | static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { 995 | int j; 996 | if (f == mpcf_null) { return mpcf_null(n, xs); } 997 | if (f == mpcf_fst) { return mpcf_fst(n, xs); } 998 | if (f == mpcf_snd) { return mpcf_snd(n, xs); } 999 | if (f == mpcf_trd) { return mpcf_trd(n, xs); } 1000 | if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } 1001 | if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } 1002 | if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } 1003 | if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } 1004 | if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } 1005 | for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } 1006 | return f(j, xs); 1007 | } 1008 | 1009 | static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { 1010 | mpc_free(i, x); 1011 | return NULL; 1012 | } 1013 | 1014 | static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { 1015 | mpc_ast_t *a = mpc_ast_new("", c); 1016 | mpc_free(i, c); 1017 | return a; 1018 | } 1019 | 1020 | static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { 1021 | if (f == mpcf_free) { return mpcf_input_free(i, x); } 1022 | if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } 1023 | return f(mpc_export(i, x)); 1024 | } 1025 | 1026 | static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { 1027 | return f(mpc_export(i, x), d); 1028 | } 1029 | 1030 | static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { 1031 | if (d == free) { mpc_free(i, x); return; } 1032 | d(mpc_export(i, x)); 1033 | } 1034 | 1035 | enum { 1036 | MPC_PARSE_STACK_MIN = 4 1037 | }; 1038 | 1039 | #define MPC_SUCCESS(x) r->output = x; return 1 1040 | #define MPC_FAILURE(x) r->error = x; return 0 1041 | #define MPC_PRIMITIVE(x) \ 1042 | if (x) { MPC_SUCCESS(r->output); } \ 1043 | else { MPC_FAILURE(NULL); } 1044 | 1045 | #define MPC_MAX_RECURSION_DEPTH 1000 1046 | 1047 | static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { 1048 | 1049 | int j = 0, k = 0; 1050 | mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; 1051 | mpc_result_t *results; 1052 | int results_slots = MPC_PARSE_STACK_MIN; 1053 | 1054 | if (depth == MPC_MAX_RECURSION_DEPTH) 1055 | { 1056 | MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); 1057 | } 1058 | 1059 | switch (p->type) { 1060 | 1061 | /* Basic Parsers */ 1062 | 1063 | case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); 1064 | case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); 1065 | case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); 1066 | case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); 1067 | case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); 1068 | case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); 1069 | case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); 1070 | case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); 1071 | case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); 1072 | case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); 1073 | 1074 | /* Other parsers */ 1075 | 1076 | case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); 1077 | case MPC_TYPE_PASS: MPC_SUCCESS(NULL); 1078 | case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); 1079 | case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); 1080 | case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); 1081 | case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); 1082 | 1083 | /* Application Parsers */ 1084 | 1085 | case MPC_TYPE_APPLY: 1086 | if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { 1087 | MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); 1088 | } else { 1089 | MPC_FAILURE(r->output); 1090 | } 1091 | 1092 | case MPC_TYPE_APPLY_TO: 1093 | if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { 1094 | MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); 1095 | } else { 1096 | MPC_FAILURE(r->error); 1097 | } 1098 | 1099 | case MPC_TYPE_CHECK: 1100 | if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { 1101 | if (p->data.check.f(&r->output)) { 1102 | MPC_SUCCESS(r->output); 1103 | } else { 1104 | mpc_parse_dtor(i, p->data.check.dx, r->output); 1105 | MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); 1106 | } 1107 | } else { 1108 | MPC_FAILURE(r->error); 1109 | } 1110 | 1111 | case MPC_TYPE_CHECK_WITH: 1112 | if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { 1113 | if (p->data.check_with.f(&r->output, p->data.check_with.d)) { 1114 | MPC_SUCCESS(r->output); 1115 | } else { 1116 | mpc_parse_dtor(i, p->data.check.dx, r->output); 1117 | MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); 1118 | } 1119 | } else { 1120 | MPC_FAILURE(r->error); 1121 | } 1122 | 1123 | case MPC_TYPE_EXPECT: 1124 | mpc_input_suppress_enable(i); 1125 | if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { 1126 | mpc_input_suppress_disable(i); 1127 | MPC_SUCCESS(r->output); 1128 | } else { 1129 | mpc_input_suppress_disable(i); 1130 | MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); 1131 | } 1132 | 1133 | case MPC_TYPE_PREDICT: 1134 | mpc_input_backtrack_disable(i); 1135 | if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { 1136 | mpc_input_backtrack_enable(i); 1137 | MPC_SUCCESS(r->output); 1138 | } else { 1139 | mpc_input_backtrack_enable(i); 1140 | MPC_FAILURE(r->error); 1141 | } 1142 | 1143 | /* Optional Parsers */ 1144 | 1145 | /* TODO: Update Not Error Message */ 1146 | 1147 | case MPC_TYPE_NOT: 1148 | mpc_input_mark(i); 1149 | mpc_input_suppress_enable(i); 1150 | if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { 1151 | mpc_input_rewind(i); 1152 | mpc_input_suppress_disable(i); 1153 | mpc_parse_dtor(i, p->data.not.dx, r->output); 1154 | MPC_FAILURE(mpc_err_new(i, "opposite")); 1155 | } else { 1156 | mpc_input_unmark(i); 1157 | mpc_input_suppress_disable(i); 1158 | MPC_SUCCESS(p->data.not.lf()); 1159 | } 1160 | 1161 | case MPC_TYPE_MAYBE: 1162 | if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { 1163 | MPC_SUCCESS(r->output); 1164 | } else { 1165 | *e = mpc_err_merge(i, *e, r->error); 1166 | MPC_SUCCESS(p->data.not.lf()); 1167 | } 1168 | 1169 | /* Repeat Parsers */ 1170 | 1171 | case MPC_TYPE_MANY: 1172 | 1173 | results = results_stk; 1174 | 1175 | while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { 1176 | j++; 1177 | if (j == MPC_PARSE_STACK_MIN) { 1178 | results_slots = j + j / 2; 1179 | results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); 1180 | memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); 1181 | } else if (j >= results_slots) { 1182 | results_slots = j + j / 2; 1183 | results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); 1184 | } 1185 | } 1186 | 1187 | *e = mpc_err_merge(i, *e, results[j].error); 1188 | 1189 | MPC_SUCCESS( 1190 | mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1191 | if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1192 | 1193 | case MPC_TYPE_MANY1: 1194 | 1195 | results = results_stk; 1196 | 1197 | while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { 1198 | j++; 1199 | if (j == MPC_PARSE_STACK_MIN) { 1200 | results_slots = j + j / 2; 1201 | results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); 1202 | memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); 1203 | } else if (j >= results_slots) { 1204 | results_slots = j + j / 2; 1205 | results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); 1206 | } 1207 | } 1208 | 1209 | if (j == 0) { 1210 | MPC_FAILURE( 1211 | mpc_err_many1(i, results[j].error); 1212 | if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1213 | } else { 1214 | 1215 | *e = mpc_err_merge(i, *e, results[j].error); 1216 | 1217 | MPC_SUCCESS( 1218 | mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1219 | if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1220 | } 1221 | 1222 | case MPC_TYPE_COUNT: 1223 | 1224 | results = p->data.repeat.n > MPC_PARSE_STACK_MIN 1225 | ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) 1226 | : results_stk; 1227 | 1228 | while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { 1229 | j++; 1230 | if (j == p->data.repeat.n) { break; } 1231 | } 1232 | 1233 | if (j == p->data.repeat.n) { 1234 | MPC_SUCCESS( 1235 | mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1236 | if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1237 | } else { 1238 | for (k = 0; k < j; k++) { 1239 | mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); 1240 | } 1241 | MPC_FAILURE( 1242 | mpc_err_count(i, results[j].error, p->data.repeat.n); 1243 | if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1244 | } 1245 | 1246 | /* Combinatory Parsers */ 1247 | 1248 | case MPC_TYPE_OR: 1249 | 1250 | if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } 1251 | 1252 | results = p->data.or.n > MPC_PARSE_STACK_MIN 1253 | ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) 1254 | : results_stk; 1255 | 1256 | for (j = 0; j < p->data.or.n; j++) { 1257 | if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { 1258 | MPC_SUCCESS(results[j].output; 1259 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1260 | } else { 1261 | *e = mpc_err_merge(i, *e, results[j].error); 1262 | } 1263 | } 1264 | 1265 | MPC_FAILURE(NULL; 1266 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1267 | 1268 | case MPC_TYPE_AND: 1269 | 1270 | if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } 1271 | 1272 | results = p->data.or.n > MPC_PARSE_STACK_MIN 1273 | ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) 1274 | : results_stk; 1275 | 1276 | mpc_input_mark(i); 1277 | for (j = 0; j < p->data.and.n; j++) { 1278 | if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { 1279 | mpc_input_rewind(i); 1280 | for (k = 0; k < j; k++) { 1281 | mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); 1282 | } 1283 | MPC_FAILURE(results[j].error; 1284 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1285 | } 1286 | } 1287 | mpc_input_unmark(i); 1288 | MPC_SUCCESS( 1289 | mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); 1290 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1291 | 1292 | /* End */ 1293 | 1294 | default: 1295 | 1296 | MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); 1297 | } 1298 | 1299 | return 0; 1300 | 1301 | } 1302 | 1303 | #undef MPC_SUCCESS 1304 | #undef MPC_FAILURE 1305 | #undef MPC_PRIMITIVE 1306 | 1307 | int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { 1308 | int x; 1309 | mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); 1310 | e->state = mpc_state_invalid(); 1311 | x = mpc_parse_run(i, p, r, &e, 0); 1312 | if (x) { 1313 | mpc_err_delete_internal(i, e); 1314 | r->output = mpc_export(i, r->output); 1315 | } else { 1316 | r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); 1317 | } 1318 | return x; 1319 | } 1320 | 1321 | int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { 1322 | int x; 1323 | mpc_input_t *i = mpc_input_new_string(filename, string); 1324 | x = mpc_parse_input(i, p, r); 1325 | mpc_input_delete(i); 1326 | return x; 1327 | } 1328 | 1329 | int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { 1330 | int x; 1331 | mpc_input_t *i = mpc_input_new_nstring(filename, string, length); 1332 | x = mpc_parse_input(i, p, r); 1333 | mpc_input_delete(i); 1334 | return x; 1335 | } 1336 | 1337 | int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { 1338 | int x; 1339 | mpc_input_t *i = mpc_input_new_file(filename, file); 1340 | x = mpc_parse_input(i, p, r); 1341 | mpc_input_delete(i); 1342 | return x; 1343 | } 1344 | 1345 | int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { 1346 | int x; 1347 | mpc_input_t *i = mpc_input_new_pipe(filename, pipe); 1348 | x = mpc_parse_input(i, p, r); 1349 | mpc_input_delete(i); 1350 | return x; 1351 | } 1352 | 1353 | int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { 1354 | 1355 | FILE *f = fopen(filename, "rb"); 1356 | int res; 1357 | 1358 | if (f == NULL) { 1359 | r->output = NULL; 1360 | r->error = mpc_err_file(filename, "Unable to open file!"); 1361 | return 0; 1362 | } 1363 | 1364 | res = mpc_parse_file(filename, f, p, r); 1365 | fclose(f); 1366 | return res; 1367 | } 1368 | 1369 | /* 1370 | ** Building a Parser 1371 | */ 1372 | 1373 | static void mpc_undefine_unretained(mpc_parser_t *p, int force); 1374 | 1375 | static void mpc_undefine_or(mpc_parser_t *p) { 1376 | 1377 | int i; 1378 | for (i = 0; i < p->data.or.n; i++) { 1379 | mpc_undefine_unretained(p->data.or.xs[i], 0); 1380 | } 1381 | free(p->data.or.xs); 1382 | 1383 | } 1384 | 1385 | static void mpc_undefine_and(mpc_parser_t *p) { 1386 | 1387 | int i; 1388 | for (i = 0; i < p->data.and.n; i++) { 1389 | mpc_undefine_unretained(p->data.and.xs[i], 0); 1390 | } 1391 | free(p->data.and.xs); 1392 | free(p->data.and.dxs); 1393 | 1394 | } 1395 | 1396 | static void mpc_undefine_unretained(mpc_parser_t *p, int force) { 1397 | 1398 | if (p->retained && !force) { return; } 1399 | 1400 | switch (p->type) { 1401 | 1402 | case MPC_TYPE_FAIL: free(p->data.fail.m); break; 1403 | 1404 | case MPC_TYPE_ONEOF: 1405 | case MPC_TYPE_NONEOF: 1406 | case MPC_TYPE_STRING: 1407 | free(p->data.string.x); 1408 | break; 1409 | 1410 | case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; 1411 | case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; 1412 | case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; 1413 | 1414 | case MPC_TYPE_MAYBE: 1415 | case MPC_TYPE_NOT: 1416 | mpc_undefine_unretained(p->data.not.x, 0); 1417 | break; 1418 | 1419 | case MPC_TYPE_EXPECT: 1420 | mpc_undefine_unretained(p->data.expect.x, 0); 1421 | free(p->data.expect.m); 1422 | break; 1423 | 1424 | case MPC_TYPE_MANY: 1425 | case MPC_TYPE_MANY1: 1426 | case MPC_TYPE_COUNT: 1427 | mpc_undefine_unretained(p->data.repeat.x, 0); 1428 | break; 1429 | 1430 | case MPC_TYPE_OR: mpc_undefine_or(p); break; 1431 | case MPC_TYPE_AND: mpc_undefine_and(p); break; 1432 | 1433 | case MPC_TYPE_CHECK: 1434 | mpc_undefine_unretained(p->data.check.x, 0); 1435 | free(p->data.check.e); 1436 | break; 1437 | 1438 | case MPC_TYPE_CHECK_WITH: 1439 | mpc_undefine_unretained(p->data.check_with.x, 0); 1440 | free(p->data.check_with.e); 1441 | break; 1442 | 1443 | default: break; 1444 | } 1445 | 1446 | if (!force) { 1447 | free(p->name); 1448 | free(p); 1449 | } 1450 | 1451 | } 1452 | 1453 | void mpc_delete(mpc_parser_t *p) { 1454 | if (p->retained) { 1455 | 1456 | if (p->type != MPC_TYPE_UNDEFINED) { 1457 | mpc_undefine_unretained(p, 0); 1458 | } 1459 | 1460 | free(p->name); 1461 | free(p); 1462 | 1463 | } else { 1464 | mpc_undefine_unretained(p, 0); 1465 | } 1466 | } 1467 | 1468 | static void mpc_soft_delete(mpc_val_t *x) { 1469 | mpc_undefine_unretained(x, 0); 1470 | } 1471 | 1472 | static mpc_parser_t *mpc_undefined(void) { 1473 | mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); 1474 | p->retained = 0; 1475 | p->type = MPC_TYPE_UNDEFINED; 1476 | p->name = NULL; 1477 | return p; 1478 | } 1479 | 1480 | mpc_parser_t *mpc_new(const char *name) { 1481 | mpc_parser_t *p = mpc_undefined(); 1482 | p->retained = 1; 1483 | p->name = realloc(p->name, strlen(name) + 1); 1484 | strcpy(p->name, name); 1485 | return p; 1486 | } 1487 | 1488 | mpc_parser_t *mpc_copy(mpc_parser_t *a) { 1489 | int i = 0; 1490 | mpc_parser_t *p; 1491 | 1492 | if (a->retained) { return a; } 1493 | 1494 | p = mpc_undefined(); 1495 | p->retained = a->retained; 1496 | p->type = a->type; 1497 | p->data = a->data; 1498 | 1499 | if (a->name) { 1500 | p->name = malloc(strlen(a->name)+1); 1501 | strcpy(p->name, a->name); 1502 | } 1503 | 1504 | switch (a->type) { 1505 | 1506 | case MPC_TYPE_FAIL: 1507 | p->data.fail.m = malloc(strlen(a->data.fail.m)+1); 1508 | strcpy(p->data.fail.m, a->data.fail.m); 1509 | break; 1510 | 1511 | case MPC_TYPE_ONEOF: 1512 | case MPC_TYPE_NONEOF: 1513 | case MPC_TYPE_STRING: 1514 | p->data.string.x = malloc(strlen(a->data.string.x)+1); 1515 | strcpy(p->data.string.x, a->data.string.x); 1516 | break; 1517 | 1518 | case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; 1519 | case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; 1520 | case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; 1521 | 1522 | case MPC_TYPE_MAYBE: 1523 | case MPC_TYPE_NOT: 1524 | p->data.not.x = mpc_copy(a->data.not.x); 1525 | break; 1526 | 1527 | case MPC_TYPE_EXPECT: 1528 | p->data.expect.x = mpc_copy(a->data.expect.x); 1529 | p->data.expect.m = malloc(strlen(a->data.expect.m)+1); 1530 | strcpy(p->data.expect.m, a->data.expect.m); 1531 | break; 1532 | 1533 | case MPC_TYPE_MANY: 1534 | case MPC_TYPE_MANY1: 1535 | case MPC_TYPE_COUNT: 1536 | p->data.repeat.x = mpc_copy(a->data.repeat.x); 1537 | break; 1538 | 1539 | case MPC_TYPE_OR: 1540 | p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); 1541 | for (i = 0; i < a->data.or.n; i++) { 1542 | p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); 1543 | } 1544 | break; 1545 | case MPC_TYPE_AND: 1546 | p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); 1547 | for (i = 0; i < a->data.and.n; i++) { 1548 | p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); 1549 | } 1550 | p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); 1551 | for (i = 0; i < a->data.and.n-1; i++) { 1552 | p->data.and.dxs[i] = a->data.and.dxs[i]; 1553 | } 1554 | break; 1555 | 1556 | case MPC_TYPE_CHECK: 1557 | p->data.check.x = mpc_copy(a->data.check.x); 1558 | p->data.check.e = malloc(strlen(a->data.check.e)+1); 1559 | strcpy(p->data.check.e, a->data.check.e); 1560 | break; 1561 | case MPC_TYPE_CHECK_WITH: 1562 | p->data.check_with.x = mpc_copy(a->data.check_with.x); 1563 | p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); 1564 | strcpy(p->data.check_with.e, a->data.check_with.e); 1565 | break; 1566 | 1567 | default: break; 1568 | } 1569 | 1570 | 1571 | return p; 1572 | } 1573 | 1574 | mpc_parser_t *mpc_undefine(mpc_parser_t *p) { 1575 | mpc_undefine_unretained(p, 1); 1576 | p->type = MPC_TYPE_UNDEFINED; 1577 | return p; 1578 | } 1579 | 1580 | mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { 1581 | 1582 | if (p->retained) { 1583 | p->type = a->type; 1584 | p->data = a->data; 1585 | } else { 1586 | mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); 1587 | p->type = a2->type; 1588 | p->data = a2->data; 1589 | free(a2); 1590 | } 1591 | 1592 | free(a); 1593 | return p; 1594 | } 1595 | 1596 | void mpc_cleanup(int n, ...) { 1597 | int i; 1598 | mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); 1599 | 1600 | va_list va; 1601 | va_start(va, n); 1602 | for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } 1603 | for (i = 0; i < n; i++) { mpc_undefine(list[i]); } 1604 | for (i = 0; i < n; i++) { mpc_delete(list[i]); } 1605 | va_end(va); 1606 | 1607 | free(list); 1608 | } 1609 | 1610 | mpc_parser_t *mpc_pass(void) { 1611 | mpc_parser_t *p = mpc_undefined(); 1612 | p->type = MPC_TYPE_PASS; 1613 | return p; 1614 | } 1615 | 1616 | mpc_parser_t *mpc_fail(const char *m) { 1617 | mpc_parser_t *p = mpc_undefined(); 1618 | p->type = MPC_TYPE_FAIL; 1619 | p->data.fail.m = malloc(strlen(m) + 1); 1620 | strcpy(p->data.fail.m, m); 1621 | return p; 1622 | } 1623 | 1624 | /* 1625 | ** As `snprintf` is not ANSI standard this 1626 | ** function `mpc_failf` should be considered 1627 | ** unsafe. 1628 | ** 1629 | ** You have a few options if this is going to be 1630 | ** trouble. 1631 | ** 1632 | ** - Ensure the format string does not exceed 1633 | ** the buffer length using precision specifiers 1634 | ** such as `%.512s`. 1635 | ** 1636 | ** - Patch this function in your code base to 1637 | ** use `snprintf` or whatever variant your 1638 | ** system supports. 1639 | ** 1640 | ** - Avoid it altogether. 1641 | ** 1642 | */ 1643 | 1644 | mpc_parser_t *mpc_failf(const char *fmt, ...) { 1645 | 1646 | va_list va; 1647 | char *buffer; 1648 | 1649 | mpc_parser_t *p = mpc_undefined(); 1650 | p->type = MPC_TYPE_FAIL; 1651 | 1652 | va_start(va, fmt); 1653 | buffer = malloc(2048); 1654 | vsprintf(buffer, fmt, va); 1655 | va_end(va); 1656 | 1657 | buffer = realloc(buffer, strlen(buffer) + 1); 1658 | p->data.fail.m = buffer; 1659 | return p; 1660 | 1661 | } 1662 | 1663 | mpc_parser_t *mpc_lift_val(mpc_val_t *x) { 1664 | mpc_parser_t *p = mpc_undefined(); 1665 | p->type = MPC_TYPE_LIFT_VAL; 1666 | p->data.lift.x = x; 1667 | return p; 1668 | } 1669 | 1670 | mpc_parser_t *mpc_lift(mpc_ctor_t lf) { 1671 | mpc_parser_t *p = mpc_undefined(); 1672 | p->type = MPC_TYPE_LIFT; 1673 | p->data.lift.lf = lf; 1674 | return p; 1675 | } 1676 | 1677 | mpc_parser_t *mpc_anchor(int(*f)(char,char)) { 1678 | mpc_parser_t *p = mpc_undefined(); 1679 | p->type = MPC_TYPE_ANCHOR; 1680 | p->data.anchor.f = f; 1681 | return mpc_expect(p, "anchor"); 1682 | } 1683 | 1684 | mpc_parser_t *mpc_state(void) { 1685 | mpc_parser_t *p = mpc_undefined(); 1686 | p->type = MPC_TYPE_STATE; 1687 | return p; 1688 | } 1689 | 1690 | mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { 1691 | mpc_parser_t *p = mpc_undefined(); 1692 | p->type = MPC_TYPE_EXPECT; 1693 | p->data.expect.x = a; 1694 | p->data.expect.m = malloc(strlen(expected) + 1); 1695 | strcpy(p->data.expect.m, expected); 1696 | return p; 1697 | } 1698 | 1699 | /* 1700 | ** As `snprintf` is not ANSI standard this 1701 | ** function `mpc_expectf` should be considered 1702 | ** unsafe. 1703 | ** 1704 | ** You have a few options if this is going to be 1705 | ** trouble. 1706 | ** 1707 | ** - Ensure the format string does not exceed 1708 | ** the buffer length using precision specifiers 1709 | ** such as `%.512s`. 1710 | ** 1711 | ** - Patch this function in your code base to 1712 | ** use `snprintf` or whatever variant your 1713 | ** system supports. 1714 | ** 1715 | ** - Avoid it altogether. 1716 | ** 1717 | */ 1718 | 1719 | mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { 1720 | va_list va; 1721 | char *buffer; 1722 | 1723 | mpc_parser_t *p = mpc_undefined(); 1724 | p->type = MPC_TYPE_EXPECT; 1725 | 1726 | va_start(va, fmt); 1727 | buffer = malloc(2048); 1728 | vsprintf(buffer, fmt, va); 1729 | va_end(va); 1730 | 1731 | buffer = realloc(buffer, strlen(buffer) + 1); 1732 | p->data.expect.x = a; 1733 | p->data.expect.m = buffer; 1734 | return p; 1735 | } 1736 | 1737 | /* 1738 | ** Basic Parsers 1739 | */ 1740 | 1741 | mpc_parser_t *mpc_any(void) { 1742 | mpc_parser_t *p = mpc_undefined(); 1743 | p->type = MPC_TYPE_ANY; 1744 | return mpc_expect(p, "any character"); 1745 | } 1746 | 1747 | mpc_parser_t *mpc_char(char c) { 1748 | mpc_parser_t *p = mpc_undefined(); 1749 | p->type = MPC_TYPE_SINGLE; 1750 | p->data.single.x = c; 1751 | return mpc_expectf(p, "'%c'", c); 1752 | } 1753 | 1754 | mpc_parser_t *mpc_range(char s, char e) { 1755 | mpc_parser_t *p = mpc_undefined(); 1756 | p->type = MPC_TYPE_RANGE; 1757 | p->data.range.x = s; 1758 | p->data.range.y = e; 1759 | return mpc_expectf(p, "character between '%c' and '%c'", s, e); 1760 | } 1761 | 1762 | mpc_parser_t *mpc_oneof(const char *s) { 1763 | mpc_parser_t *p = mpc_undefined(); 1764 | p->type = MPC_TYPE_ONEOF; 1765 | p->data.string.x = malloc(strlen(s) + 1); 1766 | strcpy(p->data.string.x, s); 1767 | return mpc_expectf(p, "one of '%s'", s); 1768 | } 1769 | 1770 | mpc_parser_t *mpc_noneof(const char *s) { 1771 | mpc_parser_t *p = mpc_undefined(); 1772 | p->type = MPC_TYPE_NONEOF; 1773 | p->data.string.x = malloc(strlen(s) + 1); 1774 | strcpy(p->data.string.x, s); 1775 | return mpc_expectf(p, "none of '%s'", s); 1776 | 1777 | } 1778 | 1779 | mpc_parser_t *mpc_satisfy(int(*f)(char)) { 1780 | mpc_parser_t *p = mpc_undefined(); 1781 | p->type = MPC_TYPE_SATISFY; 1782 | p->data.satisfy.f = f; 1783 | return mpc_expectf(p, "character satisfying function %p", f); 1784 | } 1785 | 1786 | mpc_parser_t *mpc_string(const char *s) { 1787 | mpc_parser_t *p = mpc_undefined(); 1788 | p->type = MPC_TYPE_STRING; 1789 | p->data.string.x = malloc(strlen(s) + 1); 1790 | strcpy(p->data.string.x, s); 1791 | return mpc_expectf(p, "\"%s\"", s); 1792 | } 1793 | 1794 | /* 1795 | ** Core Parsers 1796 | */ 1797 | 1798 | mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { 1799 | mpc_parser_t *p = mpc_undefined(); 1800 | p->type = MPC_TYPE_APPLY; 1801 | p->data.apply.x = a; 1802 | p->data.apply.f = f; 1803 | return p; 1804 | } 1805 | 1806 | mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { 1807 | mpc_parser_t *p = mpc_undefined(); 1808 | p->type = MPC_TYPE_APPLY_TO; 1809 | p->data.apply_to.x = a; 1810 | p->data.apply_to.f = f; 1811 | p->data.apply_to.d = x; 1812 | return p; 1813 | } 1814 | 1815 | mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { 1816 | mpc_parser_t *p = mpc_undefined(); 1817 | p->type = MPC_TYPE_CHECK; 1818 | p->data.check.x = a; 1819 | p->data.check.dx = da; 1820 | p->data.check.f = f; 1821 | p->data.check.e = malloc(strlen(e) + 1); 1822 | strcpy(p->data.check.e, e); 1823 | return p; 1824 | } 1825 | 1826 | mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { 1827 | mpc_parser_t *p = mpc_undefined(); 1828 | p->type = MPC_TYPE_CHECK_WITH; 1829 | p->data.check_with.x = a; 1830 | p->data.check_with.dx = da; 1831 | p->data.check_with.f = f; 1832 | p->data.check_with.d = x; 1833 | p->data.check_with.e = malloc(strlen(e) + 1); 1834 | strcpy(p->data.check_with.e, e); 1835 | return p; 1836 | } 1837 | 1838 | mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { 1839 | va_list va; 1840 | char *buffer; 1841 | mpc_parser_t *p; 1842 | 1843 | va_start(va, fmt); 1844 | buffer = malloc(2048); 1845 | vsprintf(buffer, fmt, va); 1846 | va_end(va); 1847 | 1848 | p = mpc_check(a, da, f, buffer); 1849 | free(buffer); 1850 | 1851 | return p; 1852 | } 1853 | 1854 | mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { 1855 | va_list va; 1856 | char *buffer; 1857 | mpc_parser_t *p; 1858 | 1859 | va_start(va, fmt); 1860 | buffer = malloc(2048); 1861 | vsprintf(buffer, fmt, va); 1862 | va_end(va); 1863 | 1864 | p = mpc_check_with(a, da, f, x, buffer); 1865 | free(buffer); 1866 | 1867 | return p; 1868 | } 1869 | 1870 | mpc_parser_t *mpc_predictive(mpc_parser_t *a) { 1871 | mpc_parser_t *p = mpc_undefined(); 1872 | p->type = MPC_TYPE_PREDICT; 1873 | p->data.predict.x = a; 1874 | return p; 1875 | } 1876 | 1877 | mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { 1878 | mpc_parser_t *p = mpc_undefined(); 1879 | p->type = MPC_TYPE_NOT; 1880 | p->data.not.x = a; 1881 | p->data.not.dx = da; 1882 | p->data.not.lf = lf; 1883 | return p; 1884 | } 1885 | 1886 | mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { 1887 | return mpc_not_lift(a, da, mpcf_ctor_null); 1888 | } 1889 | 1890 | mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { 1891 | mpc_parser_t *p = mpc_undefined(); 1892 | p->type = MPC_TYPE_MAYBE; 1893 | p->data.not.x = a; 1894 | p->data.not.lf = lf; 1895 | return p; 1896 | } 1897 | 1898 | mpc_parser_t *mpc_maybe(mpc_parser_t *a) { 1899 | return mpc_maybe_lift(a, mpcf_ctor_null); 1900 | } 1901 | 1902 | mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { 1903 | mpc_parser_t *p = mpc_undefined(); 1904 | p->type = MPC_TYPE_MANY; 1905 | p->data.repeat.x = a; 1906 | p->data.repeat.f = f; 1907 | return p; 1908 | } 1909 | 1910 | mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { 1911 | mpc_parser_t *p = mpc_undefined(); 1912 | p->type = MPC_TYPE_MANY1; 1913 | p->data.repeat.x = a; 1914 | p->data.repeat.f = f; 1915 | return p; 1916 | } 1917 | 1918 | mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { 1919 | mpc_parser_t *p = mpc_undefined(); 1920 | p->type = MPC_TYPE_COUNT; 1921 | p->data.repeat.n = n; 1922 | p->data.repeat.f = f; 1923 | p->data.repeat.x = a; 1924 | p->data.repeat.dx = da; 1925 | return p; 1926 | } 1927 | 1928 | mpc_parser_t *mpc_or(int n, ...) { 1929 | 1930 | int i; 1931 | va_list va; 1932 | 1933 | mpc_parser_t *p = mpc_undefined(); 1934 | 1935 | p->type = MPC_TYPE_OR; 1936 | p->data.or.n = n; 1937 | p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); 1938 | 1939 | va_start(va, n); 1940 | for (i = 0; i < n; i++) { 1941 | p->data.or.xs[i] = va_arg(va, mpc_parser_t*); 1942 | } 1943 | va_end(va); 1944 | 1945 | return p; 1946 | } 1947 | 1948 | mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { 1949 | 1950 | int i; 1951 | va_list va; 1952 | 1953 | mpc_parser_t *p = mpc_undefined(); 1954 | 1955 | p->type = MPC_TYPE_AND; 1956 | p->data.and.n = n; 1957 | p->data.and.f = f; 1958 | p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); 1959 | p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); 1960 | 1961 | va_start(va, f); 1962 | for (i = 0; i < n; i++) { 1963 | p->data.and.xs[i] = va_arg(va, mpc_parser_t*); 1964 | } 1965 | for (i = 0; i < (n-1); i++) { 1966 | p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); 1967 | } 1968 | va_end(va); 1969 | 1970 | return p; 1971 | } 1972 | 1973 | /* 1974 | ** Common Parsers 1975 | */ 1976 | 1977 | mpc_parser_t *mpc_soi(void) { 1978 | mpc_parser_t *p = mpc_undefined(); 1979 | p->type = MPC_TYPE_SOI; 1980 | return mpc_expect(p, "start of input"); 1981 | } 1982 | 1983 | mpc_parser_t *mpc_eoi(void) { 1984 | mpc_parser_t *p = mpc_undefined(); 1985 | p->type = MPC_TYPE_EOI; 1986 | return mpc_expect(p, "end of input"); 1987 | } 1988 | 1989 | static int mpc_boundary_anchor(char prev, char next) { 1990 | const char* word = "abcdefghijklmnopqrstuvwxyz" 1991 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 1992 | "0123456789_"; 1993 | if ( strchr(word, next) && prev == '\0') { return 1; } 1994 | if ( strchr(word, prev) && next == '\0') { return 1; } 1995 | if ( strchr(word, next) && !strchr(word, prev)) { return 1; } 1996 | if (!strchr(word, next) && strchr(word, prev)) { return 1; } 1997 | return 0; 1998 | } 1999 | 2000 | static int mpc_boundary_newline_anchor(char prev, char next) { 2001 | (void)next; 2002 | return prev == '\n'; 2003 | } 2004 | 2005 | mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } 2006 | mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } 2007 | 2008 | mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } 2009 | mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } 2010 | mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } 2011 | 2012 | mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } 2013 | mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } 2014 | mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } 2015 | 2016 | mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } 2017 | mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } 2018 | mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } 2019 | mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } 2020 | mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } 2021 | mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } 2022 | 2023 | mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } 2024 | mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } 2025 | mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } 2026 | mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } 2027 | mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } 2028 | 2029 | mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } 2030 | mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } 2031 | mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } 2032 | mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } 2033 | 2034 | mpc_parser_t *mpc_real(void) { 2035 | 2036 | /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ 2037 | 2038 | mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; 2039 | 2040 | p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); 2041 | p1 = mpc_digits(); 2042 | p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); 2043 | p30 = mpc_oneof("eE"); 2044 | p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); 2045 | p32 = mpc_digits(); 2046 | p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); 2047 | 2048 | return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); 2049 | 2050 | } 2051 | 2052 | mpc_parser_t *mpc_float(void) { 2053 | return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); 2054 | } 2055 | 2056 | mpc_parser_t *mpc_char_lit(void) { 2057 | return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); 2058 | } 2059 | 2060 | mpc_parser_t *mpc_string_lit(void) { 2061 | mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); 2062 | return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); 2063 | } 2064 | 2065 | mpc_parser_t *mpc_regex_lit(void) { 2066 | mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); 2067 | return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); 2068 | } 2069 | 2070 | mpc_parser_t *mpc_ident(void) { 2071 | mpc_parser_t *p0, *p1; 2072 | p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); 2073 | p1 = mpc_many(mpcf_strfold, mpc_alphanum()); 2074 | return mpc_and(2, mpcf_strfold, p0, p1, free); 2075 | } 2076 | 2077 | /* 2078 | ** Useful Parsers 2079 | */ 2080 | 2081 | mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } 2082 | mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } 2083 | mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } 2084 | 2085 | mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } 2086 | mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } 2087 | mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } 2088 | mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } 2089 | mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } 2090 | 2091 | mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } 2092 | 2093 | mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { 2094 | return mpc_and(3, mpcf_snd_free, 2095 | mpc_string(o), a, mpc_string(c), 2096 | free, ad); 2097 | } 2098 | 2099 | mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } 2100 | mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } 2101 | mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } 2102 | mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } 2103 | 2104 | mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { 2105 | return mpc_and(3, mpcf_snd_free, 2106 | mpc_sym(o), mpc_tok(a), mpc_sym(c), 2107 | free, ad); 2108 | } 2109 | 2110 | mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } 2111 | mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } 2112 | mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } 2113 | mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } 2114 | 2115 | /* 2116 | ** Regular Expression Parsers 2117 | */ 2118 | 2119 | /* 2120 | ** So here is a cute bootstrapping. 2121 | ** 2122 | ** I'm using the previously defined 2123 | ** mpc constructs and functions to 2124 | ** parse the user regex string and 2125 | ** construct a parser from it. 2126 | ** 2127 | ** As it turns out lots of the standard 2128 | ** mpc functions look a lot like `fold` 2129 | ** functions and so can be used indirectly 2130 | ** by many of the parsing functions to build 2131 | ** a parser directly - as we are parsing. 2132 | ** 2133 | ** This is certainly something that 2134 | ** would be less elegant/interesting 2135 | ** in a two-phase parser which first 2136 | ** builds an AST and then traverses it 2137 | ** to generate the object. 2138 | ** 2139 | ** This whole thing acts as a great 2140 | ** case study for how trivial it can be 2141 | ** to write a great parser in a few 2142 | ** lines of code using mpc. 2143 | */ 2144 | 2145 | /* 2146 | ** 2147 | ** ### Regular Expression Grammar 2148 | ** 2149 | ** : | ( "|" ) 2150 | ** 2151 | ** : * 2152 | ** 2153 | ** : 2154 | ** | "*" 2155 | ** | "+" 2156 | ** | "?" 2157 | ** | "{" "}" 2158 | ** 2159 | ** : 2160 | ** | "\" 2161 | ** | "(" ")" 2162 | ** | "[" "]" 2163 | */ 2164 | 2165 | static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { 2166 | (void) n; 2167 | if (xs[1] == NULL) { return xs[0]; } 2168 | else { return mpc_or(2, xs[0], xs[1]); } 2169 | } 2170 | 2171 | static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { 2172 | int i; 2173 | mpc_parser_t *p = mpc_lift(mpcf_ctor_str); 2174 | for (i = 0; i < n; i++) { 2175 | p = mpc_and(2, mpcf_strfold, p, xs[i], free); 2176 | } 2177 | return p; 2178 | } 2179 | 2180 | static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { 2181 | int num; 2182 | (void) n; 2183 | if (xs[1] == NULL) { return xs[0]; } 2184 | switch(((char*)xs[1])[0]) 2185 | { 2186 | case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; 2187 | case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; 2188 | case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; 2189 | default: 2190 | num = *(int*)xs[1]; 2191 | free(xs[1]); 2192 | } 2193 | 2194 | return mpc_count(num, mpcf_strfold, xs[0], free); 2195 | } 2196 | 2197 | static mpc_parser_t *mpc_re_escape_char(char c) { 2198 | switch (c) { 2199 | case 'a': return mpc_char('\a'); 2200 | case 'f': return mpc_char('\f'); 2201 | case 'n': return mpc_char('\n'); 2202 | case 'r': return mpc_char('\r'); 2203 | case 't': return mpc_char('\t'); 2204 | case 'v': return mpc_char('\v'); 2205 | case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); 2206 | case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); 2207 | case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); 2208 | case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); 2209 | case 'd': return mpc_digit(); 2210 | case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); 2211 | case 's': return mpc_whitespace(); 2212 | case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); 2213 | case 'w': return mpc_alphanum(); 2214 | case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); 2215 | default: return NULL; 2216 | } 2217 | } 2218 | 2219 | static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { 2220 | 2221 | int mode = *((int*)data); 2222 | char *s = x; 2223 | mpc_parser_t *p; 2224 | 2225 | /* Any Character */ 2226 | if (s[0] == '.') { 2227 | free(s); 2228 | if (mode & MPC_RE_DOTALL) { 2229 | return mpc_any(); 2230 | } else { 2231 | return mpc_expect(mpc_noneof("\n"), "any character except a newline"); 2232 | } 2233 | } 2234 | 2235 | /* Start of Input */ 2236 | if (s[0] == '^') { 2237 | free(s); 2238 | if (mode & MPC_RE_MULTILINE) { 2239 | return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); 2240 | } else { 2241 | return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); 2242 | } 2243 | } 2244 | 2245 | /* End of Input */ 2246 | if (s[0] == '$') { 2247 | free(s); 2248 | if (mode & MPC_RE_MULTILINE) { 2249 | return mpc_or(2, 2250 | mpc_newline(), 2251 | mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); 2252 | } else { 2253 | return mpc_or(2, 2254 | mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), 2255 | mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); 2256 | } 2257 | } 2258 | 2259 | /* Regex Escape */ 2260 | if (s[0] == '\\') { 2261 | p = mpc_re_escape_char(s[1]); 2262 | p = (p == NULL) ? mpc_char(s[1]) : p; 2263 | free(s); 2264 | return p; 2265 | } 2266 | 2267 | /* Regex Standard */ 2268 | p = mpc_char(s[0]); 2269 | free(s); 2270 | return p; 2271 | } 2272 | 2273 | static const char *mpc_re_range_escape_char(char c) { 2274 | switch (c) { 2275 | case '-': return "-"; 2276 | case 'a': return "\a"; 2277 | case 'f': return "\f"; 2278 | case 'n': return "\n"; 2279 | case 'r': return "\r"; 2280 | case 't': return "\t"; 2281 | case 'v': return "\v"; 2282 | case 'b': return "\b"; 2283 | case 'd': return "0123456789"; 2284 | case 's': return " \f\n\r\t\v"; 2285 | case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; 2286 | default: return NULL; 2287 | } 2288 | } 2289 | 2290 | static mpc_val_t *mpcf_re_range(mpc_val_t *x) { 2291 | 2292 | mpc_parser_t *out; 2293 | size_t i, j; 2294 | size_t start, end; 2295 | const char *tmp = NULL; 2296 | const char *s = x; 2297 | int comp = s[0] == '^' ? 1 : 0; 2298 | char *range = calloc(1,1); 2299 | 2300 | if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } 2301 | if (s[0] == '^' && 2302 | s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } 2303 | 2304 | for (i = comp; i < strlen(s); i++){ 2305 | 2306 | /* Regex Range Escape */ 2307 | if (s[i] == '\\') { 2308 | tmp = mpc_re_range_escape_char(s[i+1]); 2309 | if (tmp != NULL) { 2310 | range = realloc(range, strlen(range) + strlen(tmp) + 1); 2311 | strcat(range, tmp); 2312 | } else { 2313 | range = realloc(range, strlen(range) + 1 + 1); 2314 | range[strlen(range) + 1] = '\0'; 2315 | range[strlen(range) + 0] = s[i+1]; 2316 | } 2317 | i++; 2318 | } 2319 | 2320 | /* Regex Range...Range */ 2321 | else if (s[i] == '-') { 2322 | if (s[i+1] == '\0' || i == 0) { 2323 | range = realloc(range, strlen(range) + strlen("-") + 1); 2324 | strcat(range, "-"); 2325 | } else { 2326 | start = s[i-1]+1; 2327 | end = s[i+1]-1; 2328 | for (j = start; j <= end; j++) { 2329 | range = realloc(range, strlen(range) + 1 + 1 + 1); 2330 | range[strlen(range) + 1] = '\0'; 2331 | range[strlen(range) + 0] = (char)j; 2332 | } 2333 | } 2334 | } 2335 | 2336 | /* Regex Range Normal */ 2337 | else { 2338 | range = realloc(range, strlen(range) + 1 + 1); 2339 | range[strlen(range) + 1] = '\0'; 2340 | range[strlen(range) + 0] = s[i]; 2341 | } 2342 | 2343 | } 2344 | 2345 | out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); 2346 | 2347 | free(x); 2348 | free(range); 2349 | 2350 | return out; 2351 | } 2352 | 2353 | mpc_parser_t *mpc_re(const char *re) { 2354 | return mpc_re_mode(re, MPC_RE_DEFAULT); 2355 | } 2356 | 2357 | mpc_parser_t *mpc_re_mode(const char *re, int mode) { 2358 | 2359 | char *err_msg; 2360 | mpc_parser_t *err_out; 2361 | mpc_result_t r; 2362 | mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; 2363 | 2364 | Regex = mpc_new("regex"); 2365 | Term = mpc_new("term"); 2366 | Factor = mpc_new("factor"); 2367 | Base = mpc_new("base"); 2368 | Range = mpc_new("range"); 2369 | 2370 | mpc_define(Regex, mpc_and(2, mpcf_re_or, 2371 | Term, 2372 | mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), 2373 | (mpc_dtor_t)mpc_delete 2374 | )); 2375 | 2376 | mpc_define(Term, mpc_many(mpcf_re_and, Factor)); 2377 | 2378 | mpc_define(Factor, mpc_and(2, mpcf_re_repeat, 2379 | Base, 2380 | mpc_or(5, 2381 | mpc_char('*'), mpc_char('+'), mpc_char('?'), 2382 | mpc_brackets(mpc_int(), free), 2383 | mpc_pass()), 2384 | (mpc_dtor_t)mpc_delete 2385 | )); 2386 | 2387 | mpc_define(Base, mpc_or(4, 2388 | mpc_parens(Regex, (mpc_dtor_t)mpc_delete), 2389 | mpc_squares(Range, (mpc_dtor_t)mpc_delete), 2390 | mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), 2391 | mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) 2392 | )); 2393 | 2394 | mpc_define(Range, mpc_apply( 2395 | mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), 2396 | mpcf_re_range 2397 | )); 2398 | 2399 | RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); 2400 | 2401 | mpc_optimise(RegexEnclose); 2402 | mpc_optimise(Regex); 2403 | mpc_optimise(Term); 2404 | mpc_optimise(Factor); 2405 | mpc_optimise(Base); 2406 | mpc_optimise(Range); 2407 | 2408 | if(!mpc_parse("", re, RegexEnclose, &r)) { 2409 | err_msg = mpc_err_string(r.error); 2410 | err_out = mpc_failf("Invalid Regex: %s", err_msg); 2411 | mpc_err_delete(r.error); 2412 | free(err_msg); 2413 | r.output = err_out; 2414 | } 2415 | 2416 | mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); 2417 | 2418 | mpc_optimise(r.output); 2419 | 2420 | return r.output; 2421 | 2422 | } 2423 | 2424 | /* 2425 | ** Common Fold Functions 2426 | */ 2427 | 2428 | void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } 2429 | 2430 | mpc_val_t *mpcf_ctor_null(void) { return NULL; } 2431 | mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } 2432 | mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } 2433 | 2434 | mpc_val_t *mpcf_int(mpc_val_t *x) { 2435 | int *y = malloc(sizeof(int)); 2436 | *y = strtol(x, NULL, 10); 2437 | free(x); 2438 | return y; 2439 | } 2440 | 2441 | mpc_val_t *mpcf_hex(mpc_val_t *x) { 2442 | int *y = malloc(sizeof(int)); 2443 | *y = strtol(x, NULL, 16); 2444 | free(x); 2445 | return y; 2446 | } 2447 | 2448 | mpc_val_t *mpcf_oct(mpc_val_t *x) { 2449 | int *y = malloc(sizeof(int)); 2450 | *y = strtol(x, NULL, 8); 2451 | free(x); 2452 | return y; 2453 | } 2454 | 2455 | mpc_val_t *mpcf_float(mpc_val_t *x) { 2456 | float *y = malloc(sizeof(float)); 2457 | *y = strtod(x, NULL); 2458 | free(x); 2459 | return y; 2460 | } 2461 | 2462 | mpc_val_t *mpcf_strtriml(mpc_val_t *x) { 2463 | char *s = x; 2464 | while (isspace((unsigned char)*s)) { 2465 | memmove(s, s+1, strlen(s)); 2466 | } 2467 | return s; 2468 | } 2469 | 2470 | mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { 2471 | char *s = x; 2472 | size_t l = strlen(s); 2473 | while (l > 0 && isspace((unsigned char)s[l-1])) { 2474 | s[l-1] = '\0'; l--; 2475 | } 2476 | return s; 2477 | } 2478 | 2479 | mpc_val_t *mpcf_strtrim(mpc_val_t *x) { 2480 | return mpcf_strtriml(mpcf_strtrimr(x)); 2481 | } 2482 | 2483 | static const char mpc_escape_input_c[] = { 2484 | '\a', '\b', '\f', '\n', '\r', 2485 | '\t', '\v', '\\', '\'', '\"', '\0'}; 2486 | 2487 | static const char *mpc_escape_output_c[] = { 2488 | "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", 2489 | "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; 2490 | 2491 | static const char mpc_escape_input_raw_re[] = { '/' }; 2492 | static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; 2493 | 2494 | static const char mpc_escape_input_raw_cstr[] = { '"' }; 2495 | static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; 2496 | 2497 | static const char mpc_escape_input_raw_cchar[] = { '\'' }; 2498 | static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; 2499 | 2500 | static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { 2501 | 2502 | int i; 2503 | int found; 2504 | char buff[2]; 2505 | char *s = x; 2506 | char *y = calloc(1, 1); 2507 | 2508 | while (*s) { 2509 | 2510 | i = 0; 2511 | found = 0; 2512 | 2513 | while (output[i]) { 2514 | if (*s == input[i]) { 2515 | y = realloc(y, strlen(y) + strlen(output[i]) + 1); 2516 | strcat(y, output[i]); 2517 | found = 1; 2518 | break; 2519 | } 2520 | i++; 2521 | } 2522 | 2523 | if (!found) { 2524 | y = realloc(y, strlen(y) + 2); 2525 | buff[0] = *s; buff[1] = '\0'; 2526 | strcat(y, buff); 2527 | } 2528 | 2529 | s++; 2530 | } 2531 | 2532 | 2533 | return y; 2534 | } 2535 | 2536 | static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { 2537 | 2538 | int i; 2539 | int found = 0; 2540 | char buff[2]; 2541 | char *s = x; 2542 | char *y = calloc(1, 1); 2543 | 2544 | while (*s) { 2545 | 2546 | i = 0; 2547 | found = 0; 2548 | 2549 | while (output[i]) { 2550 | if ((*(s+0)) == output[i][0] && 2551 | (*(s+1)) == output[i][1]) { 2552 | y = realloc(y, strlen(y) + 1 + 1); 2553 | buff[0] = input[i]; buff[1] = '\0'; 2554 | strcat(y, buff); 2555 | found = 1; 2556 | s++; 2557 | break; 2558 | } 2559 | i++; 2560 | } 2561 | 2562 | if (!found) { 2563 | y = realloc(y, strlen(y) + 1 + 1); 2564 | buff[0] = *s; buff[1] = '\0'; 2565 | strcat(y, buff); 2566 | } 2567 | 2568 | if (*s == '\0') { break; } 2569 | else { s++; } 2570 | } 2571 | 2572 | return y; 2573 | 2574 | } 2575 | 2576 | mpc_val_t *mpcf_escape(mpc_val_t *x) { 2577 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); 2578 | free(x); 2579 | return y; 2580 | } 2581 | 2582 | mpc_val_t *mpcf_unescape(mpc_val_t *x) { 2583 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); 2584 | free(x); 2585 | return y; 2586 | } 2587 | 2588 | mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { 2589 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); 2590 | free(x); 2591 | return y; 2592 | } 2593 | 2594 | mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { 2595 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); 2596 | free(x); 2597 | return y; 2598 | } 2599 | 2600 | mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { 2601 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); 2602 | free(x); 2603 | return y; 2604 | } 2605 | 2606 | mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { 2607 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); 2608 | free(x); 2609 | return y; 2610 | } 2611 | 2612 | mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { 2613 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); 2614 | free(x); 2615 | return y; 2616 | } 2617 | 2618 | mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { 2619 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); 2620 | free(x); 2621 | return y; 2622 | } 2623 | 2624 | mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } 2625 | mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } 2626 | mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } 2627 | mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } 2628 | 2629 | static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { 2630 | int i; 2631 | for (i = 0; i < n; i++) { 2632 | if (i != x) { free(xs[i]); } 2633 | } 2634 | return xs[x]; 2635 | } 2636 | 2637 | mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } 2638 | mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } 2639 | mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } 2640 | 2641 | mpc_val_t *mpcf_freefold(int n, mpc_val_t **xs) { 2642 | int i; 2643 | for (i = 0; i < n; i++) { 2644 | free(xs[i]); 2645 | } 2646 | return NULL; 2647 | } 2648 | 2649 | mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { 2650 | int i; 2651 | size_t l = 0; 2652 | 2653 | if (n == 0) { return calloc(1, 1); } 2654 | 2655 | for (i = 0; i < n; i++) { l += strlen(xs[i]); } 2656 | 2657 | xs[0] = realloc(xs[0], l + 1); 2658 | 2659 | for (i = 1; i < n; i++) { 2660 | strcat(xs[0], xs[i]); free(xs[i]); 2661 | } 2662 | 2663 | return xs[0]; 2664 | } 2665 | 2666 | mpc_val_t *mpcf_maths(int n, mpc_val_t **xs) { 2667 | int **vs = (int**)xs; 2668 | (void) n; 2669 | 2670 | switch(((char*)xs[1])[0]) 2671 | { 2672 | case '*': { *vs[0] *= *vs[2]; }; break; 2673 | case '/': { *vs[0] /= *vs[2]; }; break; 2674 | case '%': { *vs[0] %= *vs[2]; }; break; 2675 | case '+': { *vs[0] += *vs[2]; }; break; 2676 | case '-': { *vs[0] -= *vs[2]; }; break; 2677 | default: break; 2678 | } 2679 | 2680 | free(xs[1]); free(xs[2]); 2681 | 2682 | return xs[0]; 2683 | } 2684 | 2685 | /* 2686 | ** Printing 2687 | */ 2688 | 2689 | static void mpc_print_unretained(mpc_parser_t *p, int force) { 2690 | 2691 | /* TODO: Print Everything Escaped */ 2692 | 2693 | int i; 2694 | char *s, *e; 2695 | char buff[2]; 2696 | 2697 | if (p->retained && !force) {; 2698 | if (p->name) { printf("<%s>", p->name); } 2699 | else { printf(""); } 2700 | return; 2701 | } 2702 | 2703 | if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } 2704 | if (p->type == MPC_TYPE_PASS) { printf("<:>"); } 2705 | if (p->type == MPC_TYPE_FAIL) { printf(""); } 2706 | if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } 2707 | if (p->type == MPC_TYPE_STATE) { printf(""); } 2708 | if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } 2709 | if (p->type == MPC_TYPE_EXPECT) { 2710 | printf("%s", p->data.expect.m); 2711 | /*mpc_print_unretained(p->data.expect.x, 0);*/ 2712 | } 2713 | 2714 | if (p->type == MPC_TYPE_ANY) { printf("<.>"); } 2715 | if (p->type == MPC_TYPE_SATISFY) { printf(""); } 2716 | 2717 | if (p->type == MPC_TYPE_SINGLE) { 2718 | buff[0] = p->data.single.x; buff[1] = '\0'; 2719 | s = mpcf_escape_new( 2720 | buff, 2721 | mpc_escape_input_c, 2722 | mpc_escape_output_c); 2723 | printf("'%s'", s); 2724 | free(s); 2725 | } 2726 | 2727 | if (p->type == MPC_TYPE_RANGE) { 2728 | buff[0] = p->data.range.x; buff[1] = '\0'; 2729 | s = mpcf_escape_new( 2730 | buff, 2731 | mpc_escape_input_c, 2732 | mpc_escape_output_c); 2733 | buff[0] = p->data.range.y; buff[1] = '\0'; 2734 | e = mpcf_escape_new( 2735 | buff, 2736 | mpc_escape_input_c, 2737 | mpc_escape_output_c); 2738 | printf("[%s-%s]", s, e); 2739 | free(s); 2740 | free(e); 2741 | } 2742 | 2743 | if (p->type == MPC_TYPE_ONEOF) { 2744 | s = mpcf_escape_new( 2745 | p->data.string.x, 2746 | mpc_escape_input_c, 2747 | mpc_escape_output_c); 2748 | printf("[%s]", s); 2749 | free(s); 2750 | } 2751 | 2752 | if (p->type == MPC_TYPE_NONEOF) { 2753 | s = mpcf_escape_new( 2754 | p->data.string.x, 2755 | mpc_escape_input_c, 2756 | mpc_escape_output_c); 2757 | printf("[^%s]", s); 2758 | free(s); 2759 | } 2760 | 2761 | if (p->type == MPC_TYPE_STRING) { 2762 | s = mpcf_escape_new( 2763 | p->data.string.x, 2764 | mpc_escape_input_c, 2765 | mpc_escape_output_c); 2766 | printf("\"%s\"", s); 2767 | free(s); 2768 | } 2769 | 2770 | if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } 2771 | if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } 2772 | if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } 2773 | 2774 | if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } 2775 | if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } 2776 | 2777 | if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } 2778 | if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } 2779 | if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } 2780 | 2781 | if (p->type == MPC_TYPE_OR) { 2782 | printf("("); 2783 | for(i = 0; i < p->data.or.n-1; i++) { 2784 | mpc_print_unretained(p->data.or.xs[i], 0); 2785 | printf(" | "); 2786 | } 2787 | mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); 2788 | printf(")"); 2789 | } 2790 | 2791 | if (p->type == MPC_TYPE_AND) { 2792 | printf("("); 2793 | for(i = 0; i < p->data.and.n-1; i++) { 2794 | mpc_print_unretained(p->data.and.xs[i], 0); 2795 | printf(" "); 2796 | } 2797 | mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); 2798 | printf(")"); 2799 | } 2800 | 2801 | if (p->type == MPC_TYPE_CHECK) { 2802 | mpc_print_unretained(p->data.check.x, 0); 2803 | printf("->?"); 2804 | } 2805 | if (p->type == MPC_TYPE_CHECK_WITH) { 2806 | mpc_print_unretained(p->data.check_with.x, 0); 2807 | printf("->?"); 2808 | } 2809 | 2810 | } 2811 | 2812 | void mpc_print(mpc_parser_t *p) { 2813 | mpc_print_unretained(p, 1); 2814 | printf("\n"); 2815 | } 2816 | 2817 | /* 2818 | ** Testing 2819 | */ 2820 | 2821 | /* 2822 | ** These functions are slightly unwieldy and 2823 | ** also the whole of the testing suite for mpc 2824 | ** mpc is pretty shaky. 2825 | ** 2826 | ** It could do with a lot more tests and more 2827 | ** precision. Currently I am only really testing 2828 | ** changes off of the examples. 2829 | ** 2830 | */ 2831 | 2832 | int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, 2833 | int(*tester)(const void*, const void*), 2834 | mpc_dtor_t destructor, 2835 | void(*printer)(const void*)) { 2836 | mpc_result_t r; 2837 | (void) printer; 2838 | if (mpc_parse("", s, p, &r)) { 2839 | 2840 | if (tester(r.output, d)) { 2841 | destructor(r.output); 2842 | return 0; 2843 | } else { 2844 | destructor(r.output); 2845 | return 1; 2846 | } 2847 | 2848 | } else { 2849 | mpc_err_delete(r.error); 2850 | return 1; 2851 | } 2852 | 2853 | } 2854 | 2855 | int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, 2856 | int(*tester)(const void*, const void*), 2857 | mpc_dtor_t destructor, 2858 | void(*printer)(const void*)) { 2859 | 2860 | mpc_result_t r; 2861 | if (mpc_parse("", s, p, &r)) { 2862 | 2863 | if (tester(r.output, d)) { 2864 | destructor(r.output); 2865 | return 1; 2866 | } else { 2867 | printf("Got "); printer(r.output); printf("\n"); 2868 | printf("Expected "); printer(d); printf("\n"); 2869 | destructor(r.output); 2870 | return 0; 2871 | } 2872 | 2873 | } else { 2874 | mpc_err_print(r.error); 2875 | mpc_err_delete(r.error); 2876 | return 0; 2877 | 2878 | } 2879 | 2880 | } 2881 | 2882 | 2883 | /* 2884 | ** AST 2885 | */ 2886 | 2887 | void mpc_ast_delete(mpc_ast_t *a) { 2888 | 2889 | int i; 2890 | 2891 | if (a == NULL) { return; } 2892 | 2893 | for (i = 0; i < a->children_num; i++) { 2894 | mpc_ast_delete(a->children[i]); 2895 | } 2896 | 2897 | free(a->children); 2898 | free(a->tag); 2899 | free(a->contents); 2900 | free(a); 2901 | 2902 | } 2903 | 2904 | static void mpc_ast_delete_no_children(mpc_ast_t *a) { 2905 | free(a->children); 2906 | free(a->tag); 2907 | free(a->contents); 2908 | free(a); 2909 | } 2910 | 2911 | mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { 2912 | 2913 | mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); 2914 | 2915 | a->tag = malloc(strlen(tag) + 1); 2916 | strcpy(a->tag, tag); 2917 | 2918 | a->contents = malloc(strlen(contents) + 1); 2919 | strcpy(a->contents, contents); 2920 | 2921 | a->state = mpc_state_new(); 2922 | 2923 | a->children_num = 0; 2924 | a->children = NULL; 2925 | return a; 2926 | 2927 | } 2928 | 2929 | mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { 2930 | 2931 | mpc_ast_t *a = mpc_ast_new(tag, ""); 2932 | 2933 | int i; 2934 | va_list va; 2935 | va_start(va, tag); 2936 | 2937 | for (i = 0; i < n; i++) { 2938 | mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); 2939 | } 2940 | 2941 | va_end(va); 2942 | 2943 | return a; 2944 | 2945 | } 2946 | 2947 | mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { 2948 | 2949 | mpc_ast_t *r; 2950 | 2951 | if (a == NULL) { return a; } 2952 | if (a->children_num == 0) { return a; } 2953 | if (a->children_num == 1) { return a; } 2954 | 2955 | r = mpc_ast_new(">", ""); 2956 | mpc_ast_add_child(r, a); 2957 | return r; 2958 | } 2959 | 2960 | int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { 2961 | 2962 | int i; 2963 | 2964 | if (strcmp(a->tag, b->tag) != 0) { return 0; } 2965 | if (strcmp(a->contents, b->contents) != 0) { return 0; } 2966 | if (a->children_num != b->children_num) { return 0; } 2967 | 2968 | for (i = 0; i < a->children_num; i++) { 2969 | if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } 2970 | } 2971 | 2972 | return 1; 2973 | } 2974 | 2975 | mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { 2976 | r->children_num++; 2977 | r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); 2978 | r->children[r->children_num-1] = a; 2979 | return r; 2980 | } 2981 | 2982 | mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { 2983 | if (a == NULL) { return a; } 2984 | a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); 2985 | memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); 2986 | memmove(a->tag, t, strlen(t)); 2987 | memmove(a->tag + strlen(t), "|", 1); 2988 | return a; 2989 | } 2990 | 2991 | mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { 2992 | if (a == NULL) { return a; } 2993 | a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); 2994 | memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); 2995 | memmove(a->tag, t, (strlen(t)-1)); 2996 | return a; 2997 | } 2998 | 2999 | mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { 3000 | a->tag = realloc(a->tag, strlen(t) + 1); 3001 | strcpy(a->tag, t); 3002 | return a; 3003 | } 3004 | 3005 | mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { 3006 | if (a == NULL) { return a; } 3007 | a->state = s; 3008 | return a; 3009 | } 3010 | 3011 | static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { 3012 | 3013 | int i; 3014 | 3015 | if (a == NULL) { 3016 | fprintf(fp, "NULL\n"); 3017 | return; 3018 | } 3019 | 3020 | for (i = 0; i < d; i++) { fprintf(fp, " "); } 3021 | 3022 | if (strlen(a->contents)) { 3023 | fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, 3024 | (long unsigned int)(a->state.row+1), 3025 | (long unsigned int)(a->state.col+1), 3026 | a->contents); 3027 | } else { 3028 | fprintf(fp, "%s \n", a->tag); 3029 | } 3030 | 3031 | for (i = 0; i < a->children_num; i++) { 3032 | mpc_ast_print_depth(a->children[i], d+1, fp); 3033 | } 3034 | 3035 | } 3036 | 3037 | void mpc_ast_print(mpc_ast_t *a) { 3038 | mpc_ast_print_depth(a, 0, stdout); 3039 | } 3040 | 3041 | void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { 3042 | mpc_ast_print_depth(a, 0, fp); 3043 | } 3044 | 3045 | int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { 3046 | return mpc_ast_get_index_lb(ast, tag, 0); 3047 | } 3048 | 3049 | int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { 3050 | int i; 3051 | 3052 | for(i=lb; ichildren_num; i++) { 3053 | if(strcmp(ast->children[i]->tag, tag) == 0) { 3054 | return i; 3055 | } 3056 | } 3057 | 3058 | return -1; 3059 | } 3060 | 3061 | mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { 3062 | return mpc_ast_get_child_lb(ast, tag, 0); 3063 | } 3064 | 3065 | mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { 3066 | int i; 3067 | 3068 | for(i=lb; ichildren_num; i++) { 3069 | if(strcmp(ast->children[i]->tag, tag) == 0) { 3070 | return ast->children[i]; 3071 | } 3072 | } 3073 | 3074 | return NULL; 3075 | } 3076 | 3077 | mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, 3078 | mpc_ast_trav_order_t order) 3079 | { 3080 | mpc_ast_trav_t *trav, *n_trav; 3081 | mpc_ast_t *cnode = ast; 3082 | 3083 | /* Create the traversal structure */ 3084 | trav = malloc(sizeof(mpc_ast_trav_t)); 3085 | trav->curr_node = cnode; 3086 | trav->parent = NULL; 3087 | trav->curr_child = 0; 3088 | trav->order = order; 3089 | 3090 | /* Get start node */ 3091 | switch(order) { 3092 | case mpc_ast_trav_order_pre: 3093 | /* Nothing else is needed for pre order start */ 3094 | break; 3095 | 3096 | case mpc_ast_trav_order_post: 3097 | while(cnode->children_num > 0) { 3098 | cnode = cnode->children[0]; 3099 | 3100 | n_trav = malloc(sizeof(mpc_ast_trav_t)); 3101 | n_trav->curr_node = cnode; 3102 | n_trav->parent = trav; 3103 | n_trav->curr_child = 0; 3104 | n_trav->order = order; 3105 | 3106 | trav = n_trav; 3107 | } 3108 | 3109 | break; 3110 | 3111 | default: 3112 | /* Unreachable, but compiler complaints */ 3113 | break; 3114 | } 3115 | 3116 | return trav; 3117 | } 3118 | 3119 | mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { 3120 | mpc_ast_trav_t *n_trav, *to_free; 3121 | mpc_ast_t *ret = NULL; 3122 | int cchild; 3123 | 3124 | /* The end of traversal was reached */ 3125 | if(*trav == NULL) return NULL; 3126 | 3127 | switch((*trav)->order) { 3128 | case mpc_ast_trav_order_pre: 3129 | ret = (*trav)->curr_node; 3130 | 3131 | /* If there aren't any more children, go up */ 3132 | while(*trav != NULL && 3133 | (*trav)->curr_child >= (*trav)->curr_node->children_num) 3134 | { 3135 | to_free = *trav; 3136 | *trav = (*trav)->parent; 3137 | free(to_free); 3138 | } 3139 | 3140 | /* If trav is NULL, the end was reached */ 3141 | if(*trav == NULL) { 3142 | break; 3143 | } 3144 | 3145 | /* Go to next child */ 3146 | n_trav = malloc(sizeof(mpc_ast_trav_t)); 3147 | 3148 | cchild = (*trav)->curr_child; 3149 | n_trav->curr_node = (*trav)->curr_node->children[cchild]; 3150 | n_trav->parent = *trav; 3151 | n_trav->curr_child = 0; 3152 | n_trav->order = (*trav)->order; 3153 | 3154 | (*trav)->curr_child++; 3155 | *trav = n_trav; 3156 | 3157 | break; 3158 | 3159 | case mpc_ast_trav_order_post: 3160 | ret = (*trav)->curr_node; 3161 | 3162 | /* Move up tree to the parent If the parent doesn't have any more nodes, 3163 | * then this is the current node. If it does, move down to its left most 3164 | * child. Also, free the previous traversal node */ 3165 | to_free = *trav; 3166 | *trav = (*trav)->parent; 3167 | free(to_free); 3168 | 3169 | if(*trav == NULL) 3170 | break; 3171 | 3172 | /* Next child */ 3173 | (*trav)->curr_child++; 3174 | 3175 | /* If there aren't any more children, this is the next node */ 3176 | if((*trav)->curr_child >= (*trav)->curr_node->children_num) { 3177 | break; 3178 | } 3179 | 3180 | /* If there are still more children, find the leftmost child from this 3181 | * node */ 3182 | while((*trav)->curr_node->children_num > 0) { 3183 | n_trav = malloc(sizeof(mpc_ast_trav_t)); 3184 | 3185 | cchild = (*trav)->curr_child; 3186 | n_trav->curr_node = (*trav)->curr_node->children[cchild]; 3187 | n_trav->parent = *trav; 3188 | n_trav->curr_child = 0; 3189 | n_trav->order = (*trav)->order; 3190 | 3191 | *trav = n_trav; 3192 | } 3193 | 3194 | default: 3195 | /* Unreachable, but compiler complaints */ 3196 | break; 3197 | } 3198 | 3199 | return ret; 3200 | } 3201 | 3202 | void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { 3203 | mpc_ast_trav_t *n_trav; 3204 | 3205 | /* Go through parents until all are free */ 3206 | while(*trav != NULL) { 3207 | n_trav = (*trav)->parent; 3208 | free(*trav); 3209 | *trav = n_trav; 3210 | } 3211 | } 3212 | 3213 | mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { 3214 | 3215 | int i, j; 3216 | mpc_ast_t** as = (mpc_ast_t**)xs; 3217 | mpc_ast_t *r; 3218 | 3219 | if (n == 0) { return NULL; } 3220 | if (n == 1) { return xs[0]; } 3221 | if (n == 2 && xs[1] == NULL) { return xs[0]; } 3222 | if (n == 2 && xs[0] == NULL) { return xs[1]; } 3223 | 3224 | r = mpc_ast_new(">", ""); 3225 | 3226 | for (i = 0; i < n; i++) { 3227 | 3228 | if (as[i] == NULL) { continue; } 3229 | 3230 | if (as[i] && as[i]->children_num == 0) { 3231 | mpc_ast_add_child(r, as[i]); 3232 | } else if (as[i] && as[i]->children_num == 1) { 3233 | mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); 3234 | mpc_ast_delete_no_children(as[i]); 3235 | } else if (as[i] && as[i]->children_num >= 2) { 3236 | for (j = 0; j < as[i]->children_num; j++) { 3237 | mpc_ast_add_child(r, as[i]->children[j]); 3238 | } 3239 | mpc_ast_delete_no_children(as[i]); 3240 | } 3241 | 3242 | } 3243 | 3244 | if (r->children_num) { 3245 | r->state = r->children[0]->state; 3246 | } 3247 | 3248 | return r; 3249 | } 3250 | 3251 | mpc_val_t *mpcf_str_ast(mpc_val_t *c) { 3252 | mpc_ast_t *a = mpc_ast_new("", c); 3253 | free(c); 3254 | return a; 3255 | } 3256 | 3257 | mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { 3258 | mpc_state_t *s = ((mpc_state_t**)xs)[0]; 3259 | mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; 3260 | (void)n; 3261 | a = mpc_ast_state(a, *s); 3262 | free(s); 3263 | return a; 3264 | } 3265 | 3266 | mpc_parser_t *mpca_state(mpc_parser_t *a) { 3267 | return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); 3268 | } 3269 | 3270 | mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { 3271 | return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); 3272 | } 3273 | 3274 | mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { 3275 | return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); 3276 | } 3277 | 3278 | mpc_parser_t *mpca_root(mpc_parser_t *a) { 3279 | return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); 3280 | } 3281 | 3282 | mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } 3283 | mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } 3284 | mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } 3285 | mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } 3286 | mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } 3287 | 3288 | mpc_parser_t *mpca_or(int n, ...) { 3289 | 3290 | int i; 3291 | va_list va; 3292 | 3293 | mpc_parser_t *p = mpc_undefined(); 3294 | 3295 | p->type = MPC_TYPE_OR; 3296 | p->data.or.n = n; 3297 | p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); 3298 | 3299 | va_start(va, n); 3300 | for (i = 0; i < n; i++) { 3301 | p->data.or.xs[i] = va_arg(va, mpc_parser_t*); 3302 | } 3303 | va_end(va); 3304 | 3305 | return p; 3306 | 3307 | } 3308 | 3309 | mpc_parser_t *mpca_and(int n, ...) { 3310 | 3311 | int i; 3312 | va_list va; 3313 | 3314 | mpc_parser_t *p = mpc_undefined(); 3315 | 3316 | p->type = MPC_TYPE_AND; 3317 | p->data.and.n = n; 3318 | p->data.and.f = mpcf_fold_ast; 3319 | p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); 3320 | p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); 3321 | 3322 | va_start(va, n); 3323 | for (i = 0; i < n; i++) { 3324 | p->data.and.xs[i] = va_arg(va, mpc_parser_t*); 3325 | } 3326 | for (i = 0; i < (n-1); i++) { 3327 | p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; 3328 | } 3329 | va_end(va); 3330 | 3331 | return p; 3332 | } 3333 | 3334 | mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } 3335 | 3336 | /* 3337 | ** Grammar Parser 3338 | */ 3339 | 3340 | /* 3341 | ** This is another interesting bootstrapping. 3342 | ** 3343 | ** Having a general purpose AST type allows 3344 | ** users to specify the grammar alone and 3345 | ** let all fold rules be automatically taken 3346 | ** care of by existing functions. 3347 | ** 3348 | ** You don't get to control the type spat 3349 | ** out but this means you can make a nice 3350 | ** parser to take in some grammar in nice 3351 | ** syntax and spit out a parser that works. 3352 | ** 3353 | ** The grammar for this looks surprisingly 3354 | ** like regex but the main difference is that 3355 | ** it is now whitespace insensitive and the 3356 | ** base type takes literals of some form. 3357 | */ 3358 | 3359 | /* 3360 | ** 3361 | ** ### Grammar Grammar 3362 | ** 3363 | ** : ( "|" ) | 3364 | ** 3365 | ** : * 3366 | ** 3367 | ** : 3368 | ** | "*" 3369 | ** | "+" 3370 | ** | "?" 3371 | ** | "{" "}" 3372 | ** 3373 | ** : "<" ( | ) ">" 3374 | ** | 3375 | ** | 3376 | ** | 3377 | ** | "(" ")" 3378 | */ 3379 | 3380 | typedef struct { 3381 | va_list *va; 3382 | int parsers_num; 3383 | mpc_parser_t **parsers; 3384 | int flags; 3385 | } mpca_grammar_st_t; 3386 | 3387 | static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { 3388 | (void) n; 3389 | if (xs[1] == NULL) { return xs[0]; } 3390 | else { return mpca_or(2, xs[0], xs[1]); } 3391 | } 3392 | 3393 | static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { 3394 | int i; 3395 | mpc_parser_t *p = mpc_pass(); 3396 | for (i = 0; i < n; i++) { 3397 | if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } 3398 | } 3399 | return p; 3400 | } 3401 | 3402 | static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { 3403 | int num; 3404 | (void) n; 3405 | if (xs[1] == NULL) { return xs[0]; } 3406 | switch(((char*)xs[1])[0]) 3407 | { 3408 | case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; 3409 | case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; 3410 | case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; 3411 | case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; 3412 | default: 3413 | num = *((int*)xs[1]); 3414 | free(xs[1]); 3415 | } 3416 | return mpca_count(num, xs[0]); 3417 | } 3418 | 3419 | static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { 3420 | mpca_grammar_st_t *st = s; 3421 | char *y = mpcf_unescape(x); 3422 | mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); 3423 | free(y); 3424 | return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); 3425 | } 3426 | 3427 | static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { 3428 | mpca_grammar_st_t *st = s; 3429 | char *y = mpcf_unescape(x); 3430 | mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); 3431 | free(y); 3432 | return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); 3433 | } 3434 | 3435 | static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { 3436 | char *y = xs[0]; 3437 | char *m = xs[1]; 3438 | mpca_grammar_st_t *st = xs[2]; 3439 | mpc_parser_t *p; 3440 | int mode = MPC_RE_DEFAULT; 3441 | 3442 | (void)n; 3443 | if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } 3444 | if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } 3445 | y = mpcf_unescape_regex(y); 3446 | p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); 3447 | free(y); 3448 | free(m); 3449 | 3450 | return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); 3451 | } 3452 | 3453 | /* Should this just use `isdigit` instead? */ 3454 | static int is_number(const char* s) { 3455 | size_t i; 3456 | for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } 3457 | return 1; 3458 | } 3459 | 3460 | static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { 3461 | 3462 | int i; 3463 | mpc_parser_t *p; 3464 | 3465 | /* Case of Number */ 3466 | if (is_number(x)) { 3467 | 3468 | i = strtol(x, NULL, 10); 3469 | 3470 | while (st->parsers_num <= i) { 3471 | st->parsers_num++; 3472 | st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); 3473 | st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); 3474 | if (st->parsers[st->parsers_num-1] == NULL) { 3475 | return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); 3476 | } 3477 | } 3478 | 3479 | return st->parsers[st->parsers_num-1]; 3480 | 3481 | /* Case of Identifier */ 3482 | } else { 3483 | 3484 | /* Search Existing Parsers */ 3485 | for (i = 0; i < st->parsers_num; i++) { 3486 | mpc_parser_t *q = st->parsers[i]; 3487 | if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } 3488 | if (q->name && strcmp(q->name, x) == 0) { return q; } 3489 | } 3490 | 3491 | /* Search New Parsers */ 3492 | while (1) { 3493 | 3494 | p = va_arg(*st->va, mpc_parser_t*); 3495 | 3496 | st->parsers_num++; 3497 | st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); 3498 | st->parsers[st->parsers_num-1] = p; 3499 | 3500 | if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } 3501 | if (p->name && strcmp(p->name, x) == 0) { return p; } 3502 | 3503 | } 3504 | 3505 | } 3506 | 3507 | } 3508 | 3509 | static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { 3510 | 3511 | mpca_grammar_st_t *st = s; 3512 | mpc_parser_t *p = mpca_grammar_find_parser(x, st); 3513 | free(x); 3514 | 3515 | if (p->name) { 3516 | return mpca_state(mpca_root(mpca_add_tag(p, p->name))); 3517 | } else { 3518 | return mpca_state(mpca_root(p)); 3519 | } 3520 | } 3521 | 3522 | mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { 3523 | 3524 | char *err_msg; 3525 | mpc_parser_t *err_out; 3526 | mpc_result_t r; 3527 | mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; 3528 | 3529 | GrammarTotal = mpc_new("grammar_total"); 3530 | Grammar = mpc_new("grammar"); 3531 | Term = mpc_new("term"); 3532 | Factor = mpc_new("factor"); 3533 | Base = mpc_new("base"); 3534 | 3535 | mpc_define(GrammarTotal, 3536 | mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) 3537 | ); 3538 | 3539 | mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, 3540 | Term, 3541 | mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), 3542 | mpc_soft_delete 3543 | )); 3544 | 3545 | mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); 3546 | 3547 | mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, 3548 | Base, 3549 | mpc_or(6, 3550 | mpc_sym("*"), 3551 | mpc_sym("+"), 3552 | mpc_sym("?"), 3553 | mpc_sym("!"), 3554 | mpc_tok_brackets(mpc_int(), free), 3555 | mpc_pass()), 3556 | mpc_soft_delete 3557 | )); 3558 | 3559 | mpc_define(Base, mpc_or(5, 3560 | mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), 3561 | mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), 3562 | mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), 3563 | mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), 3564 | mpc_tok_parens(Grammar, mpc_soft_delete) 3565 | )); 3566 | 3567 | mpc_optimise(GrammarTotal); 3568 | mpc_optimise(Grammar); 3569 | mpc_optimise(Factor); 3570 | mpc_optimise(Term); 3571 | mpc_optimise(Base); 3572 | 3573 | if(!mpc_parse("", grammar, GrammarTotal, &r)) { 3574 | err_msg = mpc_err_string(r.error); 3575 | err_out = mpc_failf("Invalid Grammar: %s", err_msg); 3576 | mpc_err_delete(r.error); 3577 | free(err_msg); 3578 | r.output = err_out; 3579 | } 3580 | 3581 | mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); 3582 | 3583 | mpc_optimise(r.output); 3584 | 3585 | return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; 3586 | 3587 | } 3588 | 3589 | mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { 3590 | mpca_grammar_st_t st; 3591 | mpc_parser_t *res; 3592 | va_list va; 3593 | va_start(va, grammar); 3594 | 3595 | st.va = &va; 3596 | st.parsers_num = 0; 3597 | st.parsers = NULL; 3598 | st.flags = flags; 3599 | 3600 | res = mpca_grammar_st(grammar, &st); 3601 | free(st.parsers); 3602 | va_end(va); 3603 | return res; 3604 | } 3605 | 3606 | typedef struct { 3607 | char *ident; 3608 | char *name; 3609 | mpc_parser_t *grammar; 3610 | } mpca_stmt_t; 3611 | 3612 | static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { 3613 | mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); 3614 | stmt->ident = ((char**)xs)[0]; 3615 | stmt->name = ((char**)xs)[1]; 3616 | stmt->grammar = ((mpc_parser_t**)xs)[3]; 3617 | (void) n; 3618 | free(((char**)xs)[2]); 3619 | free(((char**)xs)[4]); 3620 | 3621 | return stmt; 3622 | } 3623 | 3624 | static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { 3625 | 3626 | int i; 3627 | mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); 3628 | 3629 | for (i = 0; i < n; i++) { 3630 | stmts[i] = xs[i]; 3631 | } 3632 | stmts[n] = NULL; 3633 | 3634 | return stmts; 3635 | } 3636 | 3637 | static void mpca_stmt_list_delete(mpc_val_t *x) { 3638 | 3639 | mpca_stmt_t **stmts = x; 3640 | 3641 | while(*stmts) { 3642 | mpca_stmt_t *stmt = *stmts; 3643 | free(stmt->ident); 3644 | free(stmt->name); 3645 | mpc_soft_delete(stmt->grammar); 3646 | free(stmt); 3647 | stmts++; 3648 | } 3649 | free(x); 3650 | 3651 | } 3652 | 3653 | static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { 3654 | 3655 | mpca_grammar_st_t *st = s; 3656 | mpca_stmt_t *stmt; 3657 | mpca_stmt_t **stmts = x; 3658 | mpc_parser_t *left; 3659 | 3660 | while(*stmts) { 3661 | stmt = *stmts; 3662 | left = mpca_grammar_find_parser(stmt->ident, st); 3663 | if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } 3664 | if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } 3665 | mpc_optimise(stmt->grammar); 3666 | mpc_define(left, stmt->grammar); 3667 | free(stmt->ident); 3668 | free(stmt->name); 3669 | free(stmt); 3670 | stmts++; 3671 | } 3672 | 3673 | free(x); 3674 | 3675 | return NULL; 3676 | } 3677 | 3678 | static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { 3679 | 3680 | mpc_result_t r; 3681 | mpc_err_t *e; 3682 | mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; 3683 | 3684 | Lang = mpc_new("lang"); 3685 | Stmt = mpc_new("stmt"); 3686 | Grammar = mpc_new("grammar"); 3687 | Term = mpc_new("term"); 3688 | Factor = mpc_new("factor"); 3689 | Base = mpc_new("base"); 3690 | 3691 | mpc_define(Lang, mpc_apply_to( 3692 | mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), 3693 | mpca_stmt_list_apply_to, st 3694 | )); 3695 | 3696 | mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, 3697 | mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), 3698 | free, free, free, mpc_soft_delete 3699 | )); 3700 | 3701 | mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, 3702 | Term, 3703 | mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), 3704 | mpc_soft_delete 3705 | )); 3706 | 3707 | mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); 3708 | 3709 | mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, 3710 | Base, 3711 | mpc_or(6, 3712 | mpc_sym("*"), 3713 | mpc_sym("+"), 3714 | mpc_sym("?"), 3715 | mpc_sym("!"), 3716 | mpc_tok_brackets(mpc_int(), free), 3717 | mpc_pass()), 3718 | mpc_soft_delete 3719 | )); 3720 | 3721 | mpc_define(Base, mpc_or(5, 3722 | mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), 3723 | mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), 3724 | mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), 3725 | mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), 3726 | mpc_tok_parens(Grammar, mpc_soft_delete) 3727 | )); 3728 | 3729 | mpc_optimise(Lang); 3730 | mpc_optimise(Stmt); 3731 | mpc_optimise(Grammar); 3732 | mpc_optimise(Term); 3733 | mpc_optimise(Factor); 3734 | mpc_optimise(Base); 3735 | 3736 | if (!mpc_parse_input(i, Lang, &r)) { 3737 | e = r.error; 3738 | } else { 3739 | e = NULL; 3740 | } 3741 | 3742 | mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); 3743 | 3744 | return e; 3745 | } 3746 | 3747 | mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { 3748 | mpca_grammar_st_t st; 3749 | mpc_input_t *i; 3750 | mpc_err_t *err; 3751 | 3752 | va_list va; 3753 | va_start(va, f); 3754 | 3755 | st.va = &va; 3756 | st.parsers_num = 0; 3757 | st.parsers = NULL; 3758 | st.flags = flags; 3759 | 3760 | i = mpc_input_new_file("", f); 3761 | err = mpca_lang_st(i, &st); 3762 | mpc_input_delete(i); 3763 | 3764 | free(st.parsers); 3765 | va_end(va); 3766 | return err; 3767 | } 3768 | 3769 | mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { 3770 | mpca_grammar_st_t st; 3771 | mpc_input_t *i; 3772 | mpc_err_t *err; 3773 | 3774 | va_list va; 3775 | va_start(va, p); 3776 | 3777 | st.va = &va; 3778 | st.parsers_num = 0; 3779 | st.parsers = NULL; 3780 | st.flags = flags; 3781 | 3782 | i = mpc_input_new_pipe("", p); 3783 | err = mpca_lang_st(i, &st); 3784 | mpc_input_delete(i); 3785 | 3786 | free(st.parsers); 3787 | va_end(va); 3788 | return err; 3789 | } 3790 | 3791 | mpc_err_t *mpca_lang(int flags, const char *language, ...) { 3792 | 3793 | mpca_grammar_st_t st; 3794 | mpc_input_t *i; 3795 | mpc_err_t *err; 3796 | 3797 | va_list va; 3798 | va_start(va, language); 3799 | 3800 | st.va = &va; 3801 | st.parsers_num = 0; 3802 | st.parsers = NULL; 3803 | st.flags = flags; 3804 | 3805 | i = mpc_input_new_string("", language); 3806 | err = mpca_lang_st(i, &st); 3807 | mpc_input_delete(i); 3808 | 3809 | free(st.parsers); 3810 | va_end(va); 3811 | return err; 3812 | } 3813 | 3814 | mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { 3815 | 3816 | mpca_grammar_st_t st; 3817 | mpc_input_t *i; 3818 | mpc_err_t *err; 3819 | 3820 | va_list va; 3821 | 3822 | FILE *f = fopen(filename, "rb"); 3823 | 3824 | if (f == NULL) { 3825 | err = mpc_err_file(filename, "Unable to open file!"); 3826 | return err; 3827 | } 3828 | 3829 | va_start(va, filename); 3830 | 3831 | st.va = &va; 3832 | st.parsers_num = 0; 3833 | st.parsers = NULL; 3834 | st.flags = flags; 3835 | 3836 | i = mpc_input_new_file(filename, f); 3837 | err = mpca_lang_st(i, &st); 3838 | mpc_input_delete(i); 3839 | 3840 | free(st.parsers); 3841 | va_end(va); 3842 | 3843 | fclose(f); 3844 | 3845 | return err; 3846 | } 3847 | 3848 | static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { 3849 | 3850 | int i, total; 3851 | 3852 | if (p->retained && !force) { return 0; } 3853 | 3854 | if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } 3855 | 3856 | if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } 3857 | if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } 3858 | if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } 3859 | 3860 | if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } 3861 | if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } 3862 | 3863 | if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } 3864 | if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } 3865 | 3866 | if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3867 | if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3868 | if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3869 | 3870 | if (p->type == MPC_TYPE_OR) { 3871 | total = 1; 3872 | for(i = 0; i < p->data.or.n; i++) { 3873 | total += mpc_nodecount_unretained(p->data.or.xs[i], 0); 3874 | } 3875 | return total; 3876 | } 3877 | 3878 | if (p->type == MPC_TYPE_AND) { 3879 | total = 1; 3880 | for(i = 0; i < p->data.and.n; i++) { 3881 | total += mpc_nodecount_unretained(p->data.and.xs[i], 0); 3882 | } 3883 | return total; 3884 | } 3885 | 3886 | return 1; 3887 | 3888 | } 3889 | 3890 | void mpc_stats(mpc_parser_t* p) { 3891 | printf("Stats\n"); 3892 | printf("=====\n"); 3893 | printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); 3894 | } 3895 | 3896 | static void mpc_optimise_unretained(mpc_parser_t *p, int force) { 3897 | 3898 | int i, n, m; 3899 | mpc_parser_t *t; 3900 | 3901 | if (p->retained && !force) { return; } 3902 | 3903 | /* Optimise Subexpressions */ 3904 | 3905 | if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } 3906 | if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } 3907 | if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } 3908 | if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } 3909 | if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } 3910 | if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } 3911 | if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } 3912 | if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } 3913 | if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3914 | if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3915 | if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3916 | 3917 | if (p->type == MPC_TYPE_OR) { 3918 | for(i = 0; i < p->data.or.n; i++) { 3919 | mpc_optimise_unretained(p->data.or.xs[i], 0); 3920 | } 3921 | } 3922 | 3923 | if (p->type == MPC_TYPE_AND) { 3924 | for(i = 0; i < p->data.and.n; i++) { 3925 | mpc_optimise_unretained(p->data.and.xs[i], 0); 3926 | } 3927 | } 3928 | 3929 | /* Perform optimisations */ 3930 | 3931 | while (1) { 3932 | 3933 | /* Merge rhs `or` */ 3934 | if (p->type == MPC_TYPE_OR 3935 | && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR 3936 | && !p->data.or.xs[p->data.or.n-1]->retained) { 3937 | t = p->data.or.xs[p->data.or.n-1]; 3938 | n = p->data.or.n; m = t->data.or.n; 3939 | p->data.or.n = n + m - 1; 3940 | p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3941 | memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); 3942 | free(t->data.or.xs); free(t->name); free(t); 3943 | continue; 3944 | } 3945 | 3946 | /* Merge lhs `or` */ 3947 | if (p->type == MPC_TYPE_OR 3948 | && p->data.or.xs[0]->type == MPC_TYPE_OR 3949 | && !p->data.or.xs[0]->retained) { 3950 | t = p->data.or.xs[0]; 3951 | n = p->data.or.n; m = t->data.or.n; 3952 | p->data.or.n = n + m - 1; 3953 | p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3954 | memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 3955 | memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); 3956 | free(t->data.or.xs); free(t->name); free(t); 3957 | continue; 3958 | } 3959 | 3960 | /* Remove ast `pass` */ 3961 | if (p->type == MPC_TYPE_AND 3962 | && p->data.and.n == 2 3963 | && p->data.and.xs[0]->type == MPC_TYPE_PASS 3964 | && !p->data.and.xs[0]->retained 3965 | && p->data.and.f == mpcf_fold_ast) { 3966 | t = p->data.and.xs[1]; 3967 | mpc_delete(p->data.and.xs[0]); 3968 | free(p->data.and.xs); free(p->data.and.dxs); free(p->name); 3969 | memcpy(p, t, sizeof(mpc_parser_t)); 3970 | free(t); 3971 | continue; 3972 | } 3973 | 3974 | /* Merge ast lhs `and` */ 3975 | if (p->type == MPC_TYPE_AND 3976 | && p->data.and.f == mpcf_fold_ast 3977 | && p->data.and.xs[0]->type == MPC_TYPE_AND 3978 | && !p->data.and.xs[0]->retained 3979 | && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { 3980 | t = p->data.and.xs[0]; 3981 | n = p->data.and.n; m = t->data.and.n; 3982 | p->data.and.n = n + m - 1; 3983 | p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); 3984 | p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3985 | memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 3986 | memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3987 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } 3988 | free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3989 | continue; 3990 | } 3991 | 3992 | /* Merge ast rhs `and` */ 3993 | if (p->type == MPC_TYPE_AND 3994 | && p->data.and.f == mpcf_fold_ast 3995 | && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND 3996 | && !p->data.and.xs[p->data.and.n-1]->retained 3997 | && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { 3998 | t = p->data.and.xs[p->data.and.n-1]; 3999 | n = p->data.and.n; m = t->data.and.n; 4000 | p->data.and.n = n + m - 1; 4001 | p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); 4002 | p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 4003 | memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); 4004 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } 4005 | free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 4006 | continue; 4007 | } 4008 | 4009 | /* Remove re `lift` */ 4010 | if (p->type == MPC_TYPE_AND 4011 | && p->data.and.n == 2 4012 | && p->data.and.xs[0]->type == MPC_TYPE_LIFT 4013 | && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str 4014 | && !p->data.and.xs[0]->retained 4015 | && p->data.and.f == mpcf_strfold) { 4016 | t = p->data.and.xs[1]; 4017 | mpc_delete(p->data.and.xs[0]); 4018 | free(p->data.and.xs); free(p->data.and.dxs); free(p->name); 4019 | memcpy(p, t, sizeof(mpc_parser_t)); 4020 | free(t); 4021 | continue; 4022 | } 4023 | 4024 | /* Merge re lhs `and` */ 4025 | if (p->type == MPC_TYPE_AND 4026 | && p->data.and.f == mpcf_strfold 4027 | && p->data.and.xs[0]->type == MPC_TYPE_AND 4028 | && !p->data.and.xs[0]->retained 4029 | && p->data.and.xs[0]->data.and.f == mpcf_strfold) { 4030 | t = p->data.and.xs[0]; 4031 | n = p->data.and.n; m = t->data.and.n; 4032 | p->data.and.n = n + m - 1; 4033 | p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); 4034 | p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 4035 | memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 4036 | memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); 4037 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } 4038 | free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 4039 | continue; 4040 | } 4041 | 4042 | /* Merge re rhs `and` */ 4043 | if (p->type == MPC_TYPE_AND 4044 | && p->data.and.f == mpcf_strfold 4045 | && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND 4046 | && !p->data.and.xs[p->data.and.n-1]->retained 4047 | && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { 4048 | t = p->data.and.xs[p->data.and.n-1]; 4049 | n = p->data.and.n; m = t->data.and.n; 4050 | p->data.and.n = n + m - 1; 4051 | p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); 4052 | p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 4053 | memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); 4054 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } 4055 | free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 4056 | continue; 4057 | } 4058 | 4059 | return; 4060 | 4061 | } 4062 | 4063 | } 4064 | 4065 | void mpc_optimise(mpc_parser_t *p) { 4066 | mpc_optimise_unretained(p, 1); 4067 | } 4068 | 4069 | --------------------------------------------------------------------------------