├── .gitattributes ├── .gitignore ├── LICENSE.md ├── Makefile ├── README.md ├── examples ├── Lenna.png ├── advanced.c ├── basic.c ├── bzip2.c ├── jfk.wav ├── mpc.c ├── oggenc.c └── prelude.lspy ├── package.json ├── tgc.c └── tgc.h /.gitattributes: -------------------------------------------------------------------------------- 1 | "* text=auto" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.a 3 | *.o 4 | *.so 5 | *.exe 6 | *.dSYM 7 | examples/mpc 8 | examples/bzip2 9 | examples/basic 10 | examples/advanced 11 | examples/oggenc 12 | examples/jfk.ogg 13 | examples/Lenna.png.bz 14 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Licensed Under BSD 2 | 3 | Copyright (c) 2013, Daniel Holden 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | The views and conclusions contained in the software and documentation are those 27 | of the authors and should not be interpreted as representing official policies, 28 | either expressed or implied, of the FreeBSD Project. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC ?= gcc 2 | AR ?= ar 3 | CFLAGS = -ansi -O3 -fpic -pedantic -g -Wall -Wno-unused 4 | LFLAGS = -fpic 5 | ECFLAGS = -std=c99 -O3 -g 6 | 7 | INCDIR = $(PREFIX)/include 8 | LIBDIR = $(PREFIX)/lib 9 | 10 | OBJECT = tgc.o 11 | STATIC = libtgc.a 12 | DYNAMIC = libtgc.so 13 | 14 | ifeq ($(findstring MINGW,$(shell uname)),MINGW) 15 | CHECKER = 16 | else 17 | CHECKER = valgrind --undef-value-errors=no --leak-check=full 18 | endif 19 | 20 | all: $(STATIC) $(DYNAMIC) 21 | 22 | $(OBJECT): tgc.c tgc.h 23 | $(CC) -c $(CFLAGS) tgc.c 24 | 25 | $(DYNAMIC): $(OBJECT) 26 | $(CC) -shared -o $@ $^ 27 | 28 | $(STATIC): $(OBJECT) 29 | $(AR) rcs $@ $^ 30 | 31 | install: 32 | cp -f $(STATIC) $(LIBDIR) 33 | cp -f tgc.h $(INCDIR) 34 | 35 | check: 36 | $(CC) $(ECFLAGS) examples/basic.c tgc.c -o ./examples/basic && \ 37 | $(CHECKER) ./examples/basic 38 | $(CC) $(ECFLAGS) examples/advanced.c tgc.c -o ./examples/advanced && \ 39 | $(CHECKER) ./examples/advanced 40 | $(CC) $(ECFLAGS) examples/bzip2.c tgc.c -o ./examples/bzip2 && \ 41 | $(CHECKER) ./examples/bzip2 -c ./examples/Lenna.png -9 > ./examples/Lenna.png.bz 42 | $(CC) $(ECFLAGS) examples/mpc.c tgc.c -o ./examples/mpc && \ 43 | $(CHECKER) ./examples/mpc ./examples/prelude.lspy 44 | $(CC) $(ECFLAGS) examples/oggenc.c tgc.c -lm -o ./examples/oggenc && \ 45 | $(CHECKER) ./examples/oggenc ./examples/jfk.wav 46 | 47 | clean: 48 | rm -rf $(STATIC) $(DYNAMIC) $(OBJECT) 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Tiny Garbage Collector 2 | ====================== 3 | 4 | About 5 | ----- 6 | 7 | `tgc` is a tiny garbage collector for C written in ~500 lines of code and based 8 | on the [Cello Garbage Collector](http://libcello.org/learn/garbage-collection). 9 | 10 | ```c 11 | #include "tgc.h" 12 | 13 | static tgc_t gc; 14 | 15 | static void example_function() { 16 | char *message = tgc_alloc(&gc, 64); 17 | strcpy(message, "No More Memory Leaks!"); 18 | } 19 | 20 | int main(int argc, char **argv) { 21 | tgc_start(&gc, &argc); 22 | 23 | example_function(); 24 | 25 | tgc_stop(&gc); 26 | } 27 | ``` 28 | 29 | Usage 30 | ----- 31 | 32 | `tgc` is a conservative, thread local, mark and sweep garbage collector, 33 | which supports destructors, and automatically frees memory allocated by 34 | `tgc_alloc` and friends after it becomes _unreachable_. 35 | 36 | A memory allocation is considered _reachable_ by `tgc` if... 37 | 38 | * a pointer points to it, located on the stack at least one function call 39 | deeper than the call to `tgc_start`, or, 40 | * a pointer points to it, inside memory allocated by `tgc_alloc` 41 | and friends. 42 | 43 | Otherwise a memory allocation is considered _unreachable_. 44 | 45 | Therefore some things that _don't_ qualify an allocation as _reachable_ are, 46 | if... 47 | 48 | * a pointer points to an address inside of it, but not at the start of it, or, 49 | * a pointer points to it from inside the `static` data segment, or, 50 | * a pointer points to it from memory allocated by `malloc`, 51 | `calloc`, `realloc` or any other non-`tgc` allocation methods, or, 52 | * a pointer points to it from a different thread, or, 53 | * a pointer points to it from any other unreachable location. 54 | 55 | Given these conditions, `tgc` will free memory allocations some time after 56 | they become _unreachable_. To do this it performs an iteration of _mark and 57 | sweep_ when `tgc_alloc` is called and the number of memory allocations exceeds 58 | some threshold. It can also be run manually with `tgc_run`. 59 | 60 | Memory allocated by `tgc_alloc` can be manually freed with `tgc_free`, and 61 | destructors (functions to be run just before memory is freed), can be 62 | registered with `tgc_set_dtor`. 63 | 64 | 65 | Reference 66 | --------- 67 | 68 | ```c 69 | void tgc_start(tgc_t *gc, void *stk); 70 | ``` 71 | 72 | Start the garbage collector on the current thread, beginning at the stack 73 | location given by the `stk` variable. Usually this can be found using the 74 | address of any local variable, and then the garbage collector will cover all 75 | memory at least one function call deeper. 76 | 77 | * * * 78 | 79 | ```c 80 | void tgc_stop(tgc_t *gc); 81 | ``` 82 | 83 | Stop the garbage collector and free its internal memory. 84 | 85 | * * * 86 | 87 | ```c 88 | void tgc_run(tgc_t *gc); 89 | ``` 90 | 91 | Run an iteration of the garbage collector, freeing any unreachable memory. 92 | 93 | * * * 94 | 95 | ```c 96 | void tgc_pause(tgc_t *gc); 97 | void tgc_resume(tgc_t *gc); 98 | ``` 99 | 100 | Pause or resume the garbage collector. While paused the garbage collector will 101 | not run during any allocations made. 102 | 103 | * * * 104 | 105 | ```c 106 | void *tgc_alloc(gc_t *gc, size_t size); 107 | ``` 108 | 109 | Allocate memory via the garbage collector to be automatically freed once it 110 | becomes unreachable. 111 | 112 | * * * 113 | 114 | ```c 115 | void *tgc_calloc(gc_t *gc, size_t num, size_t size); 116 | ``` 117 | 118 | Allocate memory via the garbage collector and initalise it to zero. 119 | 120 | * * * 121 | 122 | ```c 123 | void *tgc_realloc(gc_t *gc, void *ptr, size_t size); 124 | ``` 125 | 126 | Reallocate memory allocated by the garbage collector. 127 | 128 | * * * 129 | 130 | ```c 131 | void tgc_free(gc_t *gc, void *ptr); 132 | ``` 133 | 134 | Manually free an allocation made by the garbage collector. Runs any destructor 135 | if registered. 136 | 137 | * * * 138 | 139 | ```c 140 | void *tgc_alloc_opt(tgc_t *gc, size_t size, int flags, void(*dtor)(void*)); 141 | ``` 142 | 143 | Allocate memory via the garbage collector with the given flags and destructor. 144 | 145 | For the `flags` argument, the flag `TGC_ROOT` may be specified to indicate that 146 | the allocation is a garbage collection _root_ and so should not be 147 | automatically freed and instead will be manually freed by the user with 148 | `tgc_free`. Because roots are not automatically freed, they can exist in 149 | normally unreachable locations such as in the `static` data segment or in 150 | memory allocated by `malloc`. 151 | 152 | The flag `TGC_LEAF` may be specified to indicate that the allocation is a 153 | garbage collection _leaf_ and so contains no pointers to other allocations 154 | inside. This can benefit performance in many cases. For example, when 155 | allocating a large string there is no point the garbage collector scanning 156 | this allocation - it can take a long time and doesn't contain any pointers. 157 | 158 | Otherwise the `flags` argument can be set to zero. 159 | 160 | The `dtor` argument lets the user specify a _destructor_ function to be run 161 | just before the memory is freed. Destructors have many uses, for example they 162 | are often used to automatically release system resources (such as file handles) 163 | when a data structure is finished with them. For no destructor the value `NULL` 164 | can be used. 165 | 166 | * * * 167 | 168 | ```c 169 | void *tgc_calloc_opt(tgc_t *gc, size_t num, size_t size, int flags, void(*dtor)(void*)); 170 | ``` 171 | 172 | Allocate memory via the garbage collector with the given flags and destructor 173 | and initalise to zero. 174 | 175 | * * * 176 | 177 | ```c 178 | void tgc_set_dtor(tgc_t *gc, void *ptr, void(*dtor)(void*)); 179 | ``` 180 | 181 | Register a destructor function to be called after the memory allocation `ptr` 182 | becomes unreachable, and just before it is freed by the garbage collector. 183 | 184 | * * * 185 | 186 | ```c 187 | void tgc_set_flags(tgc_t *gc, void *ptr, int flags); 188 | ``` 189 | 190 | Set the flags associated with a memory allocation, for example the value 191 | `TGC_ROOT` can be used to specify that an allocation is a garbage collection 192 | root. 193 | 194 | * * * 195 | 196 | ```c 197 | int tgc_get_flags(tgc_t *gc, void *ptr); 198 | ``` 199 | 200 | Get the flags associated with a memory allocation. 201 | 202 | * * * 203 | 204 | ```c 205 | void(*tgc_get_dtor(tgc_t *gc, void *ptr))(void*); 206 | ``` 207 | 208 | Get the destructor associated with a memory allocation. 209 | 210 | * * * 211 | 212 | ```c 213 | size_t tgc_get_size(tgc_t *gc, void *ptr); 214 | ``` 215 | 216 | Get the size of a memory allocation. 217 | 218 | F.A.Q 219 | ----- 220 | 221 | ### Is this real/safe/portable? 222 | 223 | Definitely! While there is no way to create a _completely_ safe/portable 224 | garbage collector in C this collector doesn't use any platform specific tricks 225 | and only makes the most basic assumptions about the platform, such as that the 226 | architecture using a continuous call stack to implement function frames. 227 | 228 | It _should_ be safe to use for more or less all reasonable architectures found 229 | in the wild and has been tested on Linux, Windows, and OSX, where it was easily 230 | integrated into several large real world programs (see `examples`) such as 231 | `bzip2` and `oggenc` without issue. 232 | 233 | Saying all of that, there are the normal warnings - this library performs 234 | _undefined behaviour_ as specified by the C standard and so you use it at your 235 | own risk - there is no guarantee that something like a compiler or OS update 236 | wont mysteriously break it. 237 | 238 | 239 | ### What happens when some data just happens to look like a pointer? 240 | 241 | In this unlikely case `tgc` will treat the data as a pointer and assume that 242 | the memory allocation it points to is still reachable. If this is causing your 243 | application trouble by not allowing a large memory allocation to be freed 244 | consider freeing it manually with `tgc_free`. 245 | 246 | 247 | ### `tgc` isn't working when I increment pointers! 248 | 249 | Due to the way `tgc` works, it always needs a pointer to the start of each 250 | memory allocation to be reachable. This can break algorithms such as the 251 | following, which work by incrementing a pointer. 252 | 253 | ```c 254 | void bad_function(char *y) { 255 | char *x = tgc_alloc(&gc, strlen(y) + 1); 256 | strcpy(x, y); 257 | while (*x) { 258 | do_some_processsing(x); 259 | x++; 260 | } 261 | } 262 | ``` 263 | 264 | Here, when `x` is incremented, it no longer points to the start of the memory 265 | allocation made by `tgc_alloc`. Then during `do_some_processing`, if a sweep 266 | is performed, `x` will be declared as unreachable and the memory freed. 267 | 268 | If the pointer `x` is also stored elsewhere such as inside a heap structure 269 | there is no issue with incrementing a copy of it - so most of the time you 270 | don't need to worry, but occasionally you may need to adjust algorithms which 271 | do significant pointer arithmetic. For example, in this case the pointer can be 272 | left as-is and an integer used to index it instead: 273 | 274 | ```c 275 | void good_function(char *y) { 276 | int i; 277 | char *x = tgc_alloc(&gc, strlen(y) + 1); 278 | strcpy(x, y); 279 | for (i = 0; i < strlen(x); i++) { 280 | do_some_processsing(&x[i]); 281 | } 282 | } 283 | ``` 284 | 285 | For now this is the behaviour of `tgc` until I think of a way to 286 | deal with offset pointers nicely. 287 | 288 | 289 | ### `tgc` isn't working when optimisations are enabled! 290 | 291 | Variables are only considered reachable if they are one function call shallower 292 | than the call to `tgc_start`. If optimisations are enabled sometimes the 293 | compiler will inline functions which removes this one level of indirection. 294 | 295 | The most portable way to get compilers not to inline functions is to call them 296 | through `volatile` function pointers. 297 | 298 | ```c 299 | static tgc_t gc; 300 | 301 | void please_dont_inline(void) { 302 | ... 303 | } 304 | 305 | int main(int argc, char **argv) { 306 | 307 | tgc_start(&gc, &argc); 308 | 309 | void (*volatile func)(void) = please_dont_inline; 310 | func(); 311 | 312 | tgc_stop(&gc); 313 | 314 | return 1; 315 | } 316 | ``` 317 | 318 | ### `tgc` isn't working with `setjmp` and `longjmp`! 319 | 320 | Unfortunately `tgc` doesn't work properly with `setjmp` and `longjmp` since 321 | these functions can cause complex stack behaviour. One simple option is to 322 | disable the garbage collector while using these functions and to re-enable 323 | it afterwards. 324 | 325 | ### Why do I get _uninitialised values_ warnings with Valgrind? 326 | 327 | The garbage collector scans the stack memory and this naturally contains 328 | uninitialised values. It scans memory safely, but if you are running through 329 | Valgrind these accesses will be reported as warnings/errors. Other than this 330 | `tgc` shouldn't have any memory errors in Valgrind, so the easiest way to 331 | disable these to examine any real problems is to run Valgrind with the option 332 | `--undef-value-errors=no`. 333 | 334 | ### Is `tgc` fast? 335 | 336 | At the moment `tgc` has decent performance - it is competative with many 337 | existing memory management systems - but definitely can't claim to be the 338 | fastest garbage collector on the market. Saying that, there is a fair amount of 339 | low hanging fruit for anyone interested in optimising it - so some potential to 340 | be faster exists. 341 | 342 | 343 | How it Works 344 | ------------ 345 | 346 | For a basic _mark and sweep_ garbage collector two things are required. The 347 | first thing is a list of all of the allocations made by the program. The second 348 | is a list of all the allocations _in use_ by the program at any given time. 349 | With these two things the algorithm is simple - compare the two lists and free 350 | any allocations which are in the first list, but not in the second - exactly 351 | those allocations which are no longer in use. 352 | 353 | To get a list of all the allocations made by the progam is relatively 354 | simple. We make the programmer use a special function we've prepared (in this 355 | case `tgc_alloc`) which allocates memory, and then adds a pointer to that 356 | memory to an internal list. If at any point this allocation is freed (such as 357 | by `tgc_free`), it is removed from the list. 358 | 359 | The second list is the difficult one - the list of allocations _in use_ by the 360 | program. At first, with C's semantics, pointer arithematic, and all the crazy 361 | flexibility that comes with it, it might seem like finding all the allocations 362 | in use by the program at any point in time is impossible, and to some extent 363 | you'd be right. It can actually be shown that this problem reduces to the 364 | halting problem in the most general case - even for languages saner than C - 365 | but by slightly adjusting our problem statement, and assuming we are only 366 | dealing with a set of _well behaved_ C programs of some form, we can come up 367 | with something that works. 368 | 369 | First we have to relax our goal a little. Instead of trying to find all of 370 | the memory allocations _in use_ by a program, we can instead try to find all 371 | the _reachable_ memory allocations - those allocations which have a pointer 372 | pointing to them somewhere in the program's memory. The distinction here is 373 | subtle but important. For example, I _could_ write a C program which makes an 374 | allocation, encodes the returned pointer as a string, and performs `rot13` on 375 | that string, later on decoding the string, casting it back to a pointer, 376 | and using the memory as if nothing had happened. This is a perfectly valid, C 377 | program, and the crazy memory allocation is _is use_ throughout. It is just 378 | that during the pointer's `rot13` encoding there is no practical way to know 379 | that this memory allocation is still going to be used later on. 380 | 381 | So instead we want to make a list of all memory allocations which are pointed 382 | to by pointers in the program's memory. For most _well behaved_ C programs this 383 | is enough to tell if an allocation is in use. 384 | 385 | In general, memory in C exists in three different segments. We have the stack, 386 | the heap, and the data segment. This means - if a pointer to a certain 387 | allocation exists in the program's memory it must be in one of these locations. 388 | Now the challenge is to find these locations, and scan them for pointers. 389 | 390 | The data segment is the most difficult - there is no portable way to get the 391 | bounds of this segment. But because the data segment is somewhat limited in use 392 | we can choose to ignore it - we tell users that allocations only pointed to 393 | from the data segment are not considered reachable. 394 | 395 | As an aside, for programmers coming from other languages, this might seem like 396 | a poor solution - to simply ask the programmer not to store pointers to 397 | allocations in this segment - and in many ways it is. It is never a good 398 | interface to _request_ the programmer do something in the documentation - 399 | instead it is better to handle every edge case to make it impossible for them 400 | to create an error. But this is C - in C programmers are constantly asked _not_ 401 | to do things which are perfectly possible. In fact - one of the very things 402 | this library is trying to deal with is the fact that programmers are only 403 | _asked_ to make sure they free dynamically allocated memory - there is no 404 | system in place to enforce this. So _for C_ this is a perfectly reasonable 405 | interface. And there is an added advantage - it makes the implementation far 406 | more simple - far more adaptable. In other words - [Worse Is Better](https://en.wikipedia.org/wiki/Worse_is_better). 407 | 408 | With the data segment covered we have the heap and the stack. If we consider 409 | only the heap allocations which have been made via `tgc_alloc` and friends then 410 | our job is again made easy - in our list of all allocations we also store the 411 | size of each allocation. Then, if we need to scan one of the memory regions 412 | we've allocated, the task is made easy. 413 | 414 | With the heap and the data segment covered, this leaves us with the stack - 415 | this is the most tricky segment. The stack is something we don't have any 416 | control over, but we do know that for most reasonable implementations of C, the 417 | stack is a continuous area of memory that is expanded downwards (or for some 418 | implementations upwards, but it doesn't matter) for each function call. It 419 | contains the most important memory in regards to reachability - all of the 420 | local variables used in functions. 421 | 422 | If we can get the memory addresses of the top and the bottom of the stack we 423 | can scan the memory inbetween as if it were heap memory, and add to our list of 424 | reachable pointers all those found inbetween. 425 | 426 | Assuming the stack grows from top to bottom we can get a conservative 427 | approximation of the bottom of the stack by just taking the address of some 428 | local variable. 429 | 430 | ```c 431 | void *stack_bottom(void) { 432 | int x; 433 | return &x; 434 | } 435 | ``` 436 | 437 | This address should cover the memory of all the local variables for whichever 438 | function calls it. For this reason we need to ensure two things before we 439 | actually do call it. First we want to make sure we flush all of the values in 440 | the registers onto the stack so that we don't miss a pointer hiding in a 441 | register, and secondly we want to make sure the call to `stack_bottom` isn't 442 | inlined by the compiler. 443 | 444 | We can spill the registers into stack memory in a somewhat portable way with 445 | `setjmp` - which puts the registers into a `jmp_buf` variable. And we can 446 | ensure that the function is not inlined by only calling it via a volatile 447 | function pointer. The `volatile` keyword forces the compiler to always manually 448 | read the pointer value from memory before calling the function, ensuring it 449 | cannot be inlined. 450 | 451 | ```c 452 | void *get_stack_bottom(void) { 453 | jmt_buf env; 454 | setjmp(env); 455 | void *(*volatile f)(void) = stack_bottom; 456 | return f(); 457 | } 458 | ``` 459 | 460 | To get the top of the stack we can again get the address of a local variable. 461 | This time it is easier if we simply ask the programmer to supply us with one. 462 | If the programmer wishes for the garbage collector to scan the whole stack he 463 | can give the address of a local variable in `main`. This address should cover 464 | all function calls one deeper than `main`. This we can store in some global 465 | (or local) variable. 466 | 467 | 468 | ```c 469 | static void *stack_top = NULL; 470 | 471 | int main(int argc, char **argv) { 472 | stack_top = &argc; 473 | run_program(argc, argv); 474 | return 1; 475 | } 476 | ``` 477 | 478 | Now, at any point we can get a safe approximate upper and lower bound of the 479 | stack memory, allowing us to scan it for pointers. We interprit each bound as a 480 | `void **` - a pointer to an array of pointers, and iterate, interpriting the 481 | memory inbetween as pointers. 482 | 483 | ```c 484 | void mark(void) { 485 | void **p; 486 | void **t = stack_top; 487 | void **b = get_stack_bottom(); 488 | 489 | for (p = t; p < b; p++) { 490 | scan(*p); 491 | } 492 | } 493 | ``` 494 | 495 | 496 | 497 | 498 | -------------------------------------------------------------------------------- /examples/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orangeduck/tgc/81ce102bfd52ee7dba53ec69b4d6a81b0e58c8d4/examples/Lenna.png -------------------------------------------------------------------------------- /examples/advanced.c: -------------------------------------------------------------------------------- 1 | #include "../tgc.h" 2 | 3 | typedef struct object { 4 | char *b; 5 | int a; 6 | struct object *first; 7 | struct object *second; 8 | struct object **others; 9 | } object; 10 | 11 | static tgc_t gc; 12 | 13 | static void waste(void) { 14 | object *x = tgc_alloc(&gc, sizeof(object)); 15 | } 16 | 17 | static object *object_new(void) { 18 | object *x = tgc_alloc(&gc, sizeof(object)); 19 | waste(); 20 | x->a = 1; 21 | x->b = tgc_calloc(&gc, 1, 100); 22 | x->b[0] = 'a'; 23 | x->first = tgc_alloc(&gc, sizeof(object)); 24 | waste(); 25 | x->second = tgc_alloc(&gc, sizeof(object)); 26 | waste(); 27 | x->others = NULL; 28 | waste(); 29 | return x; 30 | } 31 | 32 | static void object_resize(object *x, int num) { 33 | x->others = tgc_realloc(&gc, x->others, sizeof(object*) * num); 34 | waste(); 35 | } 36 | 37 | static void example_function(int depth) { 38 | object *x = object_new(); 39 | object *y = object_new(); 40 | 41 | object_resize(x, 100); 42 | object_resize(y, 50); 43 | 44 | object_resize(x, 75); 45 | object_resize(y, 75); 46 | 47 | if (depth < 10) {example_function(depth+1); } 48 | 49 | x->others[10] = object_new(); 50 | y->others[10] = object_new(); 51 | 52 | x->others[25] = object_new(); 53 | object_resize(x->others[25], 30); 54 | 55 | } 56 | 57 | int main(int argc, char **argv) { 58 | 59 | tgc_start(&gc, &argc); 60 | 61 | example_function(0); 62 | 63 | tgc_stop(&gc); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /examples/basic.c: -------------------------------------------------------------------------------- 1 | #include "../tgc.h" 2 | 3 | static tgc_t gc; 4 | 5 | static void example_function() { 6 | void *memory = tgc_alloc(&gc, 1024); 7 | } 8 | 9 | int main(int argc, char **argv) { 10 | 11 | tgc_start(&gc, &argc); 12 | 13 | example_function(); 14 | 15 | tgc_stop(&gc); 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /examples/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orangeduck/tgc/81ce102bfd52ee7dba53ec69b4d6a81b0e58c8d4/examples/jfk.wav -------------------------------------------------------------------------------- /examples/mpc.c: -------------------------------------------------------------------------------- 1 | #include "../tgc.h" 2 | 3 | static tgc_t gc; 4 | static void nothing(void *x) {} 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | ** State Type 16 | */ 17 | 18 | typedef struct { 19 | long pos; 20 | long row; 21 | long col; 22 | } mpc_state_t; 23 | 24 | /* 25 | ** Error Type 26 | */ 27 | 28 | typedef struct { 29 | mpc_state_t state; 30 | int expected_num; 31 | char *filename; 32 | char *failure; 33 | char **expected; 34 | char recieved; 35 | } mpc_err_t; 36 | 37 | void mpc_err_delete(mpc_err_t *e); 38 | char *mpc_err_string(mpc_err_t *e); 39 | void mpc_err_print(mpc_err_t *e); 40 | void mpc_err_print_to(mpc_err_t *e, FILE *f); 41 | 42 | /* 43 | ** Parsing 44 | */ 45 | 46 | typedef void mpc_val_t; 47 | 48 | typedef union { 49 | mpc_err_t *error; 50 | mpc_val_t *output; 51 | } mpc_result_t; 52 | 53 | struct mpc_parser_t; 54 | typedef struct mpc_parser_t mpc_parser_t; 55 | 56 | int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); 57 | int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); 58 | int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); 59 | int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); 60 | 61 | /* 62 | ** Function Types 63 | */ 64 | 65 | typedef void(*mpc_dtor_t)(mpc_val_t*); 66 | typedef mpc_val_t*(*mpc_ctor_t)(void); 67 | 68 | typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); 69 | typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); 70 | typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); 71 | 72 | /* 73 | ** Building a Parser 74 | */ 75 | 76 | mpc_parser_t *mpc_new(const char *name); 77 | mpc_parser_t *mpc_copy(mpc_parser_t *a); 78 | mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); 79 | mpc_parser_t *mpc_undefine(mpc_parser_t *p); 80 | 81 | void mpc_delete(mpc_parser_t *p); 82 | void mpc_cleanup(int n, ...); 83 | 84 | /* 85 | ** Basic Parsers 86 | */ 87 | 88 | mpc_parser_t *mpc_any(void); 89 | mpc_parser_t *mpc_char(char c); 90 | mpc_parser_t *mpc_range(char s, char e); 91 | mpc_parser_t *mpc_oneof(const char *s); 92 | mpc_parser_t *mpc_noneof(const char *s); 93 | mpc_parser_t *mpc_satisfy(int(*f)(char)); 94 | mpc_parser_t *mpc_string(const char *s); 95 | 96 | /* 97 | ** Other Parsers 98 | */ 99 | 100 | mpc_parser_t *mpc_pass(void); 101 | mpc_parser_t *mpc_fail(const char *m); 102 | mpc_parser_t *mpc_failf(const char *fmt, ...); 103 | mpc_parser_t *mpc_lift(mpc_ctor_t f); 104 | mpc_parser_t *mpc_lift_val(mpc_val_t *x); 105 | mpc_parser_t *mpc_anchor(int(*f)(char,char)); 106 | mpc_parser_t *mpc_state(void); 107 | 108 | /* 109 | ** Combinator Parsers 110 | */ 111 | 112 | mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); 113 | mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); 114 | mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); 115 | mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); 116 | 117 | mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); 118 | mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); 119 | mpc_parser_t *mpc_maybe(mpc_parser_t *a); 120 | mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); 121 | 122 | mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); 123 | mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); 124 | mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); 125 | 126 | mpc_parser_t *mpc_or(int n, ...); 127 | mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); 128 | 129 | mpc_parser_t *mpc_predictive(mpc_parser_t *a); 130 | 131 | /* 132 | ** Common Parsers 133 | */ 134 | 135 | mpc_parser_t *mpc_eoi(void); 136 | mpc_parser_t *mpc_soi(void); 137 | 138 | mpc_parser_t *mpc_boundary(void); 139 | 140 | mpc_parser_t *mpc_whitespace(void); 141 | mpc_parser_t *mpc_whitespaces(void); 142 | mpc_parser_t *mpc_blank(void); 143 | 144 | mpc_parser_t *mpc_newline(void); 145 | mpc_parser_t *mpc_tab(void); 146 | mpc_parser_t *mpc_escape(void); 147 | 148 | mpc_parser_t *mpc_digit(void); 149 | mpc_parser_t *mpc_hexdigit(void); 150 | mpc_parser_t *mpc_octdigit(void); 151 | mpc_parser_t *mpc_digits(void); 152 | mpc_parser_t *mpc_hexdigits(void); 153 | mpc_parser_t *mpc_octdigits(void); 154 | 155 | mpc_parser_t *mpc_lower(void); 156 | mpc_parser_t *mpc_upper(void); 157 | mpc_parser_t *mpc_alpha(void); 158 | mpc_parser_t *mpc_underscore(void); 159 | mpc_parser_t *mpc_alphanum(void); 160 | 161 | mpc_parser_t *mpc_int(void); 162 | mpc_parser_t *mpc_hex(void); 163 | mpc_parser_t *mpc_oct(void); 164 | mpc_parser_t *mpc_number(void); 165 | 166 | mpc_parser_t *mpc_real(void); 167 | mpc_parser_t *mpc_float(void); 168 | 169 | mpc_parser_t *mpc_char_lit(void); 170 | mpc_parser_t *mpc_string_lit(void); 171 | mpc_parser_t *mpc_regex_lit(void); 172 | 173 | mpc_parser_t *mpc_ident(void); 174 | 175 | /* 176 | ** Useful Parsers 177 | */ 178 | 179 | mpc_parser_t *mpc_startwith(mpc_parser_t *a); 180 | mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); 181 | mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); 182 | 183 | mpc_parser_t *mpc_stripl(mpc_parser_t *a); 184 | mpc_parser_t *mpc_stripr(mpc_parser_t *a); 185 | mpc_parser_t *mpc_strip(mpc_parser_t *a); 186 | mpc_parser_t *mpc_tok(mpc_parser_t *a); 187 | mpc_parser_t *mpc_sym(const char *s); 188 | mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); 189 | 190 | mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); 191 | mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); 192 | mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); 193 | mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); 194 | mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); 195 | 196 | mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); 197 | mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); 198 | mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); 199 | mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); 200 | mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); 201 | 202 | /* 203 | ** Common Function Parameters 204 | */ 205 | 206 | void mpcf_dtor_null(mpc_val_t *x); 207 | 208 | mpc_val_t *mpcf_ctor_null(void); 209 | mpc_val_t *mpcf_ctor_str(void); 210 | 211 | mpc_val_t *mpcf_free(mpc_val_t *x); 212 | mpc_val_t *mpcf_int(mpc_val_t *x); 213 | mpc_val_t *mpcf_hex(mpc_val_t *x); 214 | mpc_val_t *mpcf_oct(mpc_val_t *x); 215 | mpc_val_t *mpcf_float(mpc_val_t *x); 216 | mpc_val_t *mpcf_strtriml(mpc_val_t *x); 217 | mpc_val_t *mpcf_strtrimr(mpc_val_t *x); 218 | mpc_val_t *mpcf_strtrim(mpc_val_t *x); 219 | 220 | mpc_val_t *mpcf_escape(mpc_val_t *x); 221 | mpc_val_t *mpcf_escape_regex(mpc_val_t *x); 222 | mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); 223 | mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); 224 | 225 | mpc_val_t *mpcf_unescape(mpc_val_t *x); 226 | mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); 227 | mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); 228 | mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); 229 | 230 | mpc_val_t *mpcf_null(int n, mpc_val_t** xs); 231 | mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); 232 | mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); 233 | mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); 234 | 235 | mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); 236 | mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); 237 | mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); 238 | 239 | mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); 240 | mpc_val_t *mpcf_maths(int n, mpc_val_t** xs); 241 | 242 | /* 243 | ** Regular Expression Parsers 244 | */ 245 | 246 | mpc_parser_t *mpc_re(const char *re); 247 | 248 | /* 249 | ** AST 250 | */ 251 | 252 | typedef struct mpc_ast_t { 253 | char *tag; 254 | char *contents; 255 | mpc_state_t state; 256 | int children_num; 257 | struct mpc_ast_t** children; 258 | } mpc_ast_t; 259 | 260 | mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); 261 | mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); 262 | mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); 263 | mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); 264 | mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); 265 | mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); 266 | mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); 267 | 268 | void mpc_ast_delete(mpc_ast_t *a); 269 | void mpc_ast_print(mpc_ast_t *a); 270 | void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); 271 | 272 | /* 273 | ** Warning: This function currently doesn't test for equality of the `state` member! 274 | */ 275 | int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); 276 | 277 | mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); 278 | mpc_val_t *mpcf_str_ast(mpc_val_t *c); 279 | mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); 280 | 281 | mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); 282 | mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); 283 | mpc_parser_t *mpca_root(mpc_parser_t *a); 284 | mpc_parser_t *mpca_state(mpc_parser_t *a); 285 | mpc_parser_t *mpca_total(mpc_parser_t *a); 286 | 287 | mpc_parser_t *mpca_not(mpc_parser_t *a); 288 | mpc_parser_t *mpca_maybe(mpc_parser_t *a); 289 | 290 | mpc_parser_t *mpca_many(mpc_parser_t *a); 291 | mpc_parser_t *mpca_many1(mpc_parser_t *a); 292 | mpc_parser_t *mpca_count(int n, mpc_parser_t *a); 293 | 294 | mpc_parser_t *mpca_or(int n, ...); 295 | mpc_parser_t *mpca_and(int n, ...); 296 | 297 | enum { 298 | MPCA_LANG_DEFAULT = 0, 299 | MPCA_LANG_PREDICTIVE = 1, 300 | MPCA_LANG_WHITESPACE_SENSITIVE = 2 301 | }; 302 | 303 | mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); 304 | 305 | mpc_err_t *mpca_lang(int flags, const char *language, ...); 306 | mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); 307 | mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); 308 | mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); 309 | 310 | /* 311 | ** Misc 312 | */ 313 | 314 | 315 | void mpc_print(mpc_parser_t *p); 316 | void mpc_optimise(mpc_parser_t *p); 317 | void mpc_stats(mpc_parser_t *p); 318 | 319 | int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, 320 | int(*tester)(const void*, const void*), 321 | mpc_dtor_t destructor, 322 | void(*printer)(const void*)); 323 | 324 | int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, 325 | int(*tester)(const void*, const void*), 326 | mpc_dtor_t destructor, 327 | void(*printer)(const void*)); 328 | 329 | /* 330 | ** State Type 331 | */ 332 | 333 | static mpc_state_t mpc_state_invalid(void) { 334 | mpc_state_t s; 335 | s.pos = -1; 336 | s.row = -1; 337 | s.col = -1; 338 | return s; 339 | } 340 | 341 | static mpc_state_t mpc_state_new(void) { 342 | mpc_state_t s; 343 | s.pos = 0; 344 | s.row = 0; 345 | s.col = 0; 346 | return s; 347 | } 348 | 349 | /* 350 | ** Input Type 351 | */ 352 | 353 | /* 354 | ** In mpc the input type has three modes of 355 | ** operation: String, File and Pipe. 356 | ** 357 | ** String is easy. The whole contents are 358 | ** loaded into a buffer and scanned through. 359 | ** The cursor can jump around at will making 360 | ** backtracking easy. 361 | ** 362 | ** The second is a File which is also somewhat 363 | ** easy. The contents are never loaded into 364 | ** memory but backtracking can still be achieved 365 | ** by seeking in the file at different positions. 366 | ** 367 | ** The final mode is Pipe. This is the difficult 368 | ** one. As we assume pipes cannot be seeked - and 369 | ** only support a single character lookahead at 370 | ** any point, when the input is marked for a 371 | ** potential backtracking we start buffering any 372 | ** input. 373 | ** 374 | ** This means that if we are requested to seek 375 | ** back we can simply start reading from the 376 | ** buffer instead of the input. 377 | ** 378 | ** Of course using `mpc_predictive` will disable 379 | ** backtracking and make LL(1) grammars easy 380 | ** to parse for all input methods. 381 | ** 382 | */ 383 | 384 | enum { 385 | MPC_INPUT_STRING = 0, 386 | MPC_INPUT_FILE = 1, 387 | MPC_INPUT_PIPE = 2 388 | }; 389 | 390 | enum { 391 | MPC_INPUT_MARKS_MIN = 32 392 | }; 393 | 394 | enum { 395 | MPC_INPUT_MEM_NUM = 512 396 | }; 397 | 398 | typedef struct { 399 | char mem[64]; 400 | } mpc_mem_t; 401 | 402 | typedef struct { 403 | 404 | int type; 405 | char *filename; 406 | mpc_state_t state; 407 | 408 | char *string; 409 | char *buffer; 410 | FILE *file; 411 | 412 | int suppress; 413 | int backtrack; 414 | int marks_slots; 415 | int marks_num; 416 | mpc_state_t *marks; 417 | 418 | char *lasts; 419 | char last; 420 | 421 | size_t mem_index; 422 | char mem_full[MPC_INPUT_MEM_NUM]; 423 | mpc_mem_t mem[MPC_INPUT_MEM_NUM]; 424 | 425 | } mpc_input_t; 426 | 427 | static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { 428 | 429 | mpc_input_t *i = tgc_alloc(&gc, sizeof(mpc_input_t)); 430 | 431 | i->filename = tgc_alloc(&gc, strlen(filename) + 1); 432 | strcpy(i->filename, filename); 433 | i->type = MPC_INPUT_STRING; 434 | 435 | i->state = mpc_state_new(); 436 | 437 | i->string = tgc_alloc(&gc, strlen(string) + 1); 438 | strcpy(i->string, string); 439 | i->buffer = NULL; 440 | i->file = NULL; 441 | 442 | i->suppress = 0; 443 | i->backtrack = 1; 444 | i->marks_num = 0; 445 | i->marks_slots = MPC_INPUT_MARKS_MIN; 446 | i->marks = tgc_alloc(&gc, sizeof(mpc_state_t) * i->marks_slots); 447 | i->lasts = tgc_alloc(&gc, sizeof(char) * i->marks_slots); 448 | i->last = '\0'; 449 | 450 | i->mem_index = 0; 451 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 452 | 453 | return i; 454 | } 455 | 456 | static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { 457 | 458 | mpc_input_t *i = tgc_alloc(&gc, sizeof(mpc_input_t)); 459 | 460 | i->filename = tgc_alloc(&gc, strlen(filename) + 1); 461 | strcpy(i->filename, filename); 462 | 463 | i->type = MPC_INPUT_PIPE; 464 | i->state = mpc_state_new(); 465 | 466 | i->string = NULL; 467 | i->buffer = NULL; 468 | i->file = pipe; 469 | 470 | i->suppress = 0; 471 | i->backtrack = 1; 472 | i->marks_num = 0; 473 | i->marks_slots = MPC_INPUT_MARKS_MIN; 474 | i->marks = tgc_alloc(&gc, sizeof(mpc_state_t) * i->marks_slots); 475 | i->lasts = tgc_alloc(&gc, sizeof(char) * i->marks_slots); 476 | i->last = '\0'; 477 | 478 | i->mem_index = 0; 479 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 480 | 481 | return i; 482 | 483 | } 484 | 485 | static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { 486 | 487 | mpc_input_t *i = tgc_alloc(&gc, sizeof(mpc_input_t)); 488 | 489 | i->filename = tgc_alloc(&gc, strlen(filename) + 1); 490 | strcpy(i->filename, filename); 491 | i->type = MPC_INPUT_FILE; 492 | i->state = mpc_state_new(); 493 | 494 | i->string = NULL; 495 | i->buffer = NULL; 496 | i->file = file; 497 | 498 | i->suppress = 0; 499 | i->backtrack = 1; 500 | i->marks_num = 0; 501 | i->marks_slots = MPC_INPUT_MARKS_MIN; 502 | i->marks = tgc_alloc(&gc, sizeof(mpc_state_t) * i->marks_slots); 503 | i->lasts = tgc_alloc(&gc, sizeof(char) * i->marks_slots); 504 | i->last = '\0'; 505 | 506 | i->mem_index = 0; 507 | memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 508 | 509 | return i; 510 | } 511 | 512 | static void mpc_input_delete(mpc_input_t *i) { 513 | 514 | //free(i->filename); 515 | 516 | if (i->type == MPC_INPUT_STRING) { 517 | //free(i->string); 518 | } 519 | if (i->type == MPC_INPUT_PIPE) { 520 | //free(i->buffer); 521 | } 522 | 523 | //free(i->marks); 524 | //free(i->lasts); 525 | //free(i); 526 | } 527 | 528 | static int mpc_mem_ptr(mpc_input_t *i, void *p) { 529 | return 530 | (char*)p >= (char*)(i->mem) && 531 | (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); 532 | } 533 | 534 | static void *mpc_malloc(mpc_input_t *i, size_t n) { 535 | size_t j; 536 | char *p; 537 | 538 | if (n > sizeof(mpc_mem_t)) { return tgc_alloc(&gc, n); } 539 | 540 | j = i->mem_index; 541 | do { 542 | if (!i->mem_full[i->mem_index]) { 543 | p = (void*)(i->mem + i->mem_index); 544 | i->mem_full[i->mem_index] = 1; 545 | i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; 546 | return p; 547 | } 548 | i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; 549 | } while (j != i->mem_index); 550 | 551 | return tgc_alloc(&gc, n); 552 | } 553 | 554 | static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { 555 | char *x = mpc_malloc(i, n * m); 556 | memset(x, 0, n * m); 557 | return x; 558 | } 559 | 560 | static void mpc_free(mpc_input_t *i, void *p) { 561 | size_t j; 562 | if (!mpc_mem_ptr(i, p)) { 563 | //free(p); 564 | return; 565 | } 566 | j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); 567 | i->mem_full[j] = 0; 568 | } 569 | 570 | static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { 571 | 572 | char *q = NULL; 573 | 574 | if (!mpc_mem_ptr(i, p)) { return tgc_realloc(&gc, p, n); } 575 | 576 | if (n > sizeof(mpc_mem_t)) { 577 | q = tgc_alloc(&gc, n); 578 | memcpy(q, p, sizeof(mpc_mem_t)); 579 | mpc_free(i, p); 580 | return q; 581 | } 582 | 583 | return p; 584 | } 585 | 586 | static void *mpc_export(mpc_input_t *i, void *p) { 587 | char *q = NULL; 588 | if (!mpc_mem_ptr(i, p)) { return p; } 589 | q = tgc_alloc(&gc, sizeof(mpc_mem_t)); 590 | memcpy(q, p, sizeof(mpc_mem_t)); 591 | mpc_free(i, p); 592 | return q; 593 | } 594 | 595 | static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } 596 | static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } 597 | 598 | static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } 599 | static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } 600 | 601 | static void mpc_input_mark(mpc_input_t *i) { 602 | 603 | if (i->backtrack < 1) { return; } 604 | 605 | i->marks_num++; 606 | 607 | if (i->marks_num > i->marks_slots) { 608 | i->marks_slots = i->marks_num + i->marks_num / 2; 609 | i->marks = tgc_realloc(&gc, i->marks, sizeof(mpc_state_t) * i->marks_slots); 610 | i->lasts = tgc_realloc(&gc, i->lasts, sizeof(char) * i->marks_slots); 611 | } 612 | 613 | i->marks[i->marks_num-1] = i->state; 614 | i->lasts[i->marks_num-1] = i->last; 615 | 616 | if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { 617 | i->buffer = tgc_calloc(&gc, 1, 1); 618 | } 619 | 620 | } 621 | 622 | static void mpc_input_unmark(mpc_input_t *i) { 623 | 624 | if (i->backtrack < 1) { return; } 625 | 626 | i->marks_num--; 627 | 628 | if (i->marks_slots > i->marks_num + i->marks_num / 2 629 | && i->marks_slots > MPC_INPUT_MARKS_MIN) { 630 | i->marks_slots = 631 | i->marks_num > MPC_INPUT_MARKS_MIN ? 632 | i->marks_num : MPC_INPUT_MARKS_MIN; 633 | i->marks = tgc_realloc(&gc, i->marks, sizeof(mpc_state_t) * i->marks_slots); 634 | i->lasts = tgc_realloc(&gc, i->lasts, sizeof(char) * i->marks_slots); 635 | } 636 | 637 | if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { 638 | //free(i->buffer); 639 | i->buffer = NULL; 640 | } 641 | 642 | } 643 | 644 | static void mpc_input_rewind(mpc_input_t *i) { 645 | 646 | if (i->backtrack < 1) { return; } 647 | 648 | i->state = i->marks[i->marks_num-1]; 649 | i->last = i->lasts[i->marks_num-1]; 650 | 651 | if (i->type == MPC_INPUT_FILE) { 652 | fseek(i->file, i->state.pos, SEEK_SET); 653 | } 654 | 655 | mpc_input_unmark(i); 656 | } 657 | 658 | static int mpc_input_buffer_in_range(mpc_input_t *i) { 659 | return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); 660 | } 661 | 662 | static char mpc_input_buffer_get(mpc_input_t *i) { 663 | return i->buffer[i->state.pos - i->marks[0].pos]; 664 | } 665 | 666 | static int mpc_input_terminated(mpc_input_t *i) { 667 | if (i->type == MPC_INPUT_STRING && i->state.pos == (long)strlen(i->string)) { return 1; } 668 | if (i->type == MPC_INPUT_FILE && feof(i->file)) { return 1; } 669 | if (i->type == MPC_INPUT_PIPE && feof(i->file)) { return 1; } 670 | return 0; 671 | } 672 | 673 | static char mpc_input_getc(mpc_input_t *i) { 674 | 675 | char c = '\0'; 676 | 677 | switch (i->type) { 678 | 679 | case MPC_INPUT_STRING: return i->string[i->state.pos]; 680 | case MPC_INPUT_FILE: c = fgetc(i->file); return c; 681 | case MPC_INPUT_PIPE: 682 | 683 | if (!i->buffer) { c = getc(i->file); return c; } 684 | 685 | if (i->buffer && mpc_input_buffer_in_range(i)) { 686 | c = mpc_input_buffer_get(i); 687 | return c; 688 | } else { 689 | c = getc(i->file); 690 | return c; 691 | } 692 | 693 | default: return c; 694 | } 695 | } 696 | 697 | static char mpc_input_peekc(mpc_input_t *i) { 698 | 699 | char c = '\0'; 700 | 701 | switch (i->type) { 702 | case MPC_INPUT_STRING: return i->string[i->state.pos]; 703 | case MPC_INPUT_FILE: 704 | 705 | c = fgetc(i->file); 706 | if (feof(i->file)) { return '\0'; } 707 | 708 | fseek(i->file, -1, SEEK_CUR); 709 | return c; 710 | 711 | case MPC_INPUT_PIPE: 712 | 713 | if (!i->buffer) { 714 | c = getc(i->file); 715 | if (feof(i->file)) { return '\0'; } 716 | ungetc(c, i->file); 717 | return c; 718 | } 719 | 720 | if (i->buffer && mpc_input_buffer_in_range(i)) { 721 | return mpc_input_buffer_get(i); 722 | } else { 723 | c = getc(i->file); 724 | if (feof(i->file)) { return '\0'; } 725 | ungetc(c, i->file); 726 | return c; 727 | } 728 | 729 | default: return c; 730 | } 731 | 732 | } 733 | 734 | static int mpc_input_failure(mpc_input_t *i, char c) { 735 | 736 | switch (i->type) { 737 | case MPC_INPUT_STRING: { break; } 738 | case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } 739 | case MPC_INPUT_PIPE: { 740 | 741 | if (!i->buffer) { ungetc(c, i->file); break; } 742 | 743 | if (i->buffer && mpc_input_buffer_in_range(i)) { 744 | break; 745 | } else { 746 | ungetc(c, i->file); 747 | } 748 | } 749 | default: { break; } 750 | } 751 | return 0; 752 | } 753 | 754 | static int mpc_input_success(mpc_input_t *i, char c, char **o) { 755 | 756 | if (i->type == MPC_INPUT_PIPE 757 | && i->buffer && !mpc_input_buffer_in_range(i)) { 758 | i->buffer = tgc_realloc(&gc, i->buffer, strlen(i->buffer) + 2); 759 | i->buffer[strlen(i->buffer) + 1] = '\0'; 760 | i->buffer[strlen(i->buffer) + 0] = c; 761 | } 762 | 763 | i->last = c; 764 | i->state.pos++; 765 | i->state.col++; 766 | 767 | if (c == '\n') { 768 | i->state.col = 0; 769 | i->state.row++; 770 | } 771 | 772 | if (o) { 773 | (*o) = mpc_malloc(i, 2); 774 | (*o)[0] = c; 775 | (*o)[1] = '\0'; 776 | } 777 | 778 | return 1; 779 | } 780 | 781 | static int mpc_input_any(mpc_input_t *i, char **o) { 782 | char x = mpc_input_getc(i); 783 | if (mpc_input_terminated(i)) { return 0; } 784 | return mpc_input_success(i, x, o); 785 | } 786 | 787 | static int mpc_input_char(mpc_input_t *i, char c, char **o) { 788 | char x = mpc_input_getc(i); 789 | if (mpc_input_terminated(i)) { return 0; } 790 | return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 791 | } 792 | 793 | static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { 794 | char x = mpc_input_getc(i); 795 | if (mpc_input_terminated(i)) { return 0; } 796 | return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 797 | } 798 | 799 | static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { 800 | char x = mpc_input_getc(i); 801 | if (mpc_input_terminated(i)) { return 0; } 802 | return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 803 | } 804 | 805 | static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { 806 | char x = mpc_input_getc(i); 807 | if (mpc_input_terminated(i)) { return 0; } 808 | return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 809 | } 810 | 811 | static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { 812 | char x = mpc_input_getc(i); 813 | if (mpc_input_terminated(i)) { return 0; } 814 | return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 815 | } 816 | 817 | static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { 818 | 819 | const char *x = c; 820 | 821 | mpc_input_mark(i); 822 | while (*x) { 823 | if (!mpc_input_char(i, *x, NULL)) { 824 | mpc_input_rewind(i); 825 | return 0; 826 | } 827 | x++; 828 | } 829 | mpc_input_unmark(i); 830 | 831 | *o = mpc_malloc(i, strlen(c) + 1); 832 | strcpy(*o, c); 833 | return 1; 834 | } 835 | 836 | static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { 837 | *o = NULL; 838 | return f(i->last, mpc_input_peekc(i)); 839 | } 840 | 841 | static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { 842 | mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); 843 | memcpy(r, &i->state, sizeof(mpc_state_t)); 844 | return r; 845 | } 846 | 847 | /* 848 | ** Error Type 849 | */ 850 | 851 | void mpc_err_delete(mpc_err_t *x) { 852 | int i; 853 | for (i = 0; i < x->expected_num; i++) { 854 | //free(x->expected[i]); 855 | } 856 | //free(x->expected); 857 | //free(x->filename); 858 | //free(x->failure); 859 | //free(x); 860 | } 861 | 862 | void mpc_err_print(mpc_err_t *x) { 863 | mpc_err_print_to(x, stdout); 864 | } 865 | 866 | void mpc_err_print_to(mpc_err_t *x, FILE *f) { 867 | char *str = mpc_err_string(x); 868 | fprintf(f, "%s", str); 869 | //free(str); 870 | } 871 | 872 | static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { 873 | /* TODO: Error Checking on Length */ 874 | int left = ((*max) - (*pos)); 875 | va_list va; 876 | va_start(va, fmt); 877 | if (left < 0) { left = 0;} 878 | (*pos) += vsprintf(buffer + (*pos), fmt, va); 879 | va_end(va); 880 | } 881 | 882 | static char char_unescape_buffer[4]; 883 | 884 | static const char *mpc_err_char_unescape(char c) { 885 | 886 | char_unescape_buffer[0] = '\''; 887 | char_unescape_buffer[1] = ' '; 888 | char_unescape_buffer[2] = '\''; 889 | char_unescape_buffer[3] = '\0'; 890 | 891 | switch (c) { 892 | case '\a': return "bell"; 893 | case '\b': return "backspace"; 894 | case '\f': return "formfeed"; 895 | case '\r': return "carriage return"; 896 | case '\v': return "vertical tab"; 897 | case '\0': return "end of input"; 898 | case '\n': return "newline"; 899 | case '\t': return "tab"; 900 | case ' ' : return "space"; 901 | default: 902 | char_unescape_buffer[1] = c; 903 | return char_unescape_buffer; 904 | } 905 | 906 | } 907 | 908 | char *mpc_err_string(mpc_err_t *x) { 909 | 910 | int i; 911 | int pos = 0; 912 | int max = 1023; 913 | char *buffer = tgc_calloc(&gc, 1, 1024); 914 | 915 | if (x->failure) { 916 | mpc_err_string_cat(buffer, &pos, &max, 917 | "%s: error: %s\n", x->filename, x->failure); 918 | return buffer; 919 | } 920 | 921 | mpc_err_string_cat(buffer, &pos, &max, 922 | "%s:%i:%i: error: expected ", x->filename, x->state.row+1, x->state.col+1); 923 | 924 | if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } 925 | if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } 926 | if (x->expected_num >= 2) { 927 | 928 | for (i = 0; i < x->expected_num-2; i++) { 929 | mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); 930 | } 931 | 932 | mpc_err_string_cat(buffer, &pos, &max, "%s or %s", 933 | x->expected[x->expected_num-2], 934 | x->expected[x->expected_num-1]); 935 | } 936 | 937 | mpc_err_string_cat(buffer, &pos, &max, " at "); 938 | mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->recieved)); 939 | mpc_err_string_cat(buffer, &pos, &max, "\n"); 940 | 941 | return tgc_realloc(&gc, buffer, strlen(buffer) + 1); 942 | } 943 | 944 | static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { 945 | mpc_err_t *x; 946 | if (i->suppress) { return NULL; } 947 | x = mpc_malloc(i, sizeof(mpc_err_t)); 948 | x->filename = mpc_malloc(i, strlen(i->filename) + 1); 949 | strcpy(x->filename, i->filename); 950 | x->state = i->state; 951 | x->expected_num = 1; 952 | x->expected = mpc_malloc(i, sizeof(char*)); 953 | x->expected[0] = mpc_malloc(i, strlen(expected) + 1); 954 | strcpy(x->expected[0], expected); 955 | x->failure = NULL; 956 | x->recieved = mpc_input_peekc(i); 957 | return x; 958 | } 959 | 960 | static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { 961 | mpc_err_t *x; 962 | if (i->suppress) { return NULL; } 963 | x = mpc_malloc(i, sizeof(mpc_err_t)); 964 | x->filename = mpc_malloc(i, strlen(i->filename) + 1); 965 | strcpy(x->filename, i->filename); 966 | x->state = i->state; 967 | x->expected_num = 0; 968 | x->expected = NULL; 969 | x->failure = mpc_malloc(i, strlen(failure) + 1); 970 | strcpy(x->failure, failure); 971 | x->recieved = ' '; 972 | return x; 973 | } 974 | 975 | static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { 976 | mpc_err_t *x; 977 | x = tgc_alloc(&gc, sizeof(mpc_err_t)); 978 | x->filename = tgc_alloc(&gc, strlen(filename) + 1); 979 | strcpy(x->filename, filename); 980 | x->state = mpc_state_new(); 981 | x->expected_num = 0; 982 | x->expected = NULL; 983 | x->failure = tgc_alloc(&gc, strlen(failure) + 1); 984 | strcpy(x->failure, failure); 985 | x->recieved = ' '; 986 | return x; 987 | } 988 | 989 | static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { 990 | int j; 991 | if (x == NULL) { return; } 992 | for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } 993 | mpc_free(i, x->expected); 994 | mpc_free(i, x->filename); 995 | mpc_free(i, x->failure); 996 | mpc_free(i, x); 997 | } 998 | 999 | static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { 1000 | int j; 1001 | for (j = 0; j < x->expected_num; j++) { 1002 | x->expected[j] = mpc_export(i, x->expected[j]); 1003 | } 1004 | x->expected = mpc_export(i, x->expected); 1005 | x->filename = mpc_export(i, x->filename); 1006 | x->failure = mpc_export(i, x->failure); 1007 | return mpc_export(i, x); 1008 | } 1009 | 1010 | static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { 1011 | int j; 1012 | (void)i; 1013 | for (j = 0; j < x->expected_num; j++) { 1014 | if (strcmp(x->expected[j], expected) == 0) { return 1; } 1015 | } 1016 | return 0; 1017 | } 1018 | 1019 | static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { 1020 | (void)i; 1021 | x->expected_num++; 1022 | x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 1023 | x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); 1024 | strcpy(x->expected[x->expected_num-1], expected); 1025 | } 1026 | 1027 | static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { 1028 | 1029 | int j, k, fst; 1030 | mpc_err_t *e; 1031 | 1032 | fst = -1; 1033 | for (j = 0; j < n; j++) { 1034 | if (x[j] != NULL) { fst = j; } 1035 | } 1036 | 1037 | if (fst == -1) { return NULL; } 1038 | 1039 | e = mpc_malloc(i, sizeof(mpc_err_t)); 1040 | e->state = mpc_state_invalid(); 1041 | e->expected_num = 0; 1042 | e->expected = NULL; 1043 | e->failure = NULL; 1044 | e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); 1045 | strcpy(e->filename, x[fst]->filename); 1046 | 1047 | for (j = 0; j < n; j++) { 1048 | if (x[j] == NULL) { continue; } 1049 | if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } 1050 | } 1051 | 1052 | for (j = 0; j < n; j++) { 1053 | if (x[j] == NULL) { continue; } 1054 | if (x[j]->state.pos < e->state.pos) { continue; } 1055 | 1056 | if (x[j]->failure) { 1057 | e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); 1058 | strcpy(e->failure, x[j]->failure); 1059 | break; 1060 | } 1061 | 1062 | e->recieved = x[j]->recieved; 1063 | 1064 | for (k = 0; k < x[j]->expected_num; k++) { 1065 | if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { 1066 | mpc_err_add_expected(i, e, x[j]->expected[k]); 1067 | } 1068 | } 1069 | } 1070 | 1071 | for (j = 0; j < n; j++) { 1072 | if (x[j] == NULL) { continue; } 1073 | mpc_err_delete_internal(i, x[j]); 1074 | } 1075 | 1076 | return e; 1077 | } 1078 | 1079 | static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { 1080 | 1081 | int j = 0; 1082 | size_t l = 0; 1083 | char *expect = NULL; 1084 | 1085 | if (x == NULL) { return NULL; } 1086 | 1087 | if (x->expected_num == 0) { 1088 | expect = mpc_calloc(i, 1, 1); 1089 | x->expected_num = 1; 1090 | x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 1091 | x->expected[0] = expect; 1092 | return x; 1093 | } 1094 | 1095 | else if (x->expected_num == 1) { 1096 | expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); 1097 | strcpy(expect, prefix); 1098 | strcat(expect, x->expected[0]); 1099 | mpc_free(i, x->expected[0]); 1100 | x->expected[0] = expect; 1101 | return x; 1102 | } 1103 | 1104 | else if (x->expected_num > 1) { 1105 | 1106 | l += strlen(prefix); 1107 | for (j = 0; j < x->expected_num-2; j++) { 1108 | l += strlen(x->expected[j]) + strlen(", "); 1109 | } 1110 | l += strlen(x->expected[x->expected_num-2]); 1111 | l += strlen(" or "); 1112 | l += strlen(x->expected[x->expected_num-1]); 1113 | 1114 | expect = mpc_malloc(i, l + 1); 1115 | 1116 | strcpy(expect, prefix); 1117 | for (j = 0; j < x->expected_num-2; j++) { 1118 | strcat(expect, x->expected[j]); strcat(expect, ", "); 1119 | } 1120 | strcat(expect, x->expected[x->expected_num-2]); 1121 | strcat(expect, " or "); 1122 | strcat(expect, x->expected[x->expected_num-1]); 1123 | 1124 | for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } 1125 | 1126 | x->expected_num = 1; 1127 | x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 1128 | x->expected[0] = expect; 1129 | return x; 1130 | } 1131 | 1132 | return NULL; 1133 | } 1134 | 1135 | static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { 1136 | return mpc_err_repeat(i, x, "one or more of "); 1137 | } 1138 | 1139 | static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { 1140 | mpc_err_t *y; 1141 | int digits = n/10 + 1; 1142 | char *prefix; 1143 | prefix = mpc_malloc(i, digits + strlen(" of ") + 1); 1144 | sprintf(prefix, "%i of ", n); 1145 | y = mpc_err_repeat(i, x, prefix); 1146 | mpc_free(i, prefix); 1147 | return y; 1148 | } 1149 | 1150 | static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { 1151 | mpc_err_t *errs[2]; 1152 | errs[0] = x; 1153 | errs[1] = y; 1154 | return mpc_err_or(i, errs, 2); 1155 | } 1156 | 1157 | /* 1158 | ** Parser Type 1159 | */ 1160 | 1161 | enum { 1162 | MPC_TYPE_UNDEFINED = 0, 1163 | MPC_TYPE_PASS = 1, 1164 | MPC_TYPE_FAIL = 2, 1165 | MPC_TYPE_LIFT = 3, 1166 | MPC_TYPE_LIFT_VAL = 4, 1167 | MPC_TYPE_EXPECT = 5, 1168 | MPC_TYPE_ANCHOR = 6, 1169 | MPC_TYPE_STATE = 7, 1170 | 1171 | MPC_TYPE_ANY = 8, 1172 | MPC_TYPE_SINGLE = 9, 1173 | MPC_TYPE_ONEOF = 10, 1174 | MPC_TYPE_NONEOF = 11, 1175 | MPC_TYPE_RANGE = 12, 1176 | MPC_TYPE_SATISFY = 13, 1177 | MPC_TYPE_STRING = 14, 1178 | 1179 | MPC_TYPE_APPLY = 15, 1180 | MPC_TYPE_APPLY_TO = 16, 1181 | MPC_TYPE_PREDICT = 17, 1182 | MPC_TYPE_NOT = 18, 1183 | MPC_TYPE_MAYBE = 19, 1184 | MPC_TYPE_MANY = 20, 1185 | MPC_TYPE_MANY1 = 21, 1186 | MPC_TYPE_COUNT = 22, 1187 | 1188 | MPC_TYPE_OR = 23, 1189 | MPC_TYPE_AND = 24 1190 | }; 1191 | 1192 | typedef struct { char *m; } mpc_pdata_fail_t; 1193 | typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; 1194 | typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; 1195 | typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; 1196 | typedef struct { char x; } mpc_pdata_single_t; 1197 | typedef struct { char x; char y; } mpc_pdata_range_t; 1198 | typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; 1199 | typedef struct { char *x; } mpc_pdata_string_t; 1200 | typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; 1201 | typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; 1202 | typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; 1203 | typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; 1204 | typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; 1205 | typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; 1206 | typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; 1207 | 1208 | typedef union { 1209 | mpc_pdata_fail_t fail; 1210 | mpc_pdata_lift_t lift; 1211 | mpc_pdata_expect_t expect; 1212 | mpc_pdata_anchor_t anchor; 1213 | mpc_pdata_single_t single; 1214 | mpc_pdata_range_t range; 1215 | mpc_pdata_satisfy_t satisfy; 1216 | mpc_pdata_string_t string; 1217 | mpc_pdata_apply_t apply; 1218 | mpc_pdata_apply_to_t apply_to; 1219 | mpc_pdata_predict_t predict; 1220 | mpc_pdata_not_t not; 1221 | mpc_pdata_repeat_t repeat; 1222 | mpc_pdata_and_t and; 1223 | mpc_pdata_or_t or; 1224 | } mpc_pdata_t; 1225 | 1226 | struct mpc_parser_t { 1227 | char retained; 1228 | char *name; 1229 | char type; 1230 | mpc_pdata_t data; 1231 | }; 1232 | 1233 | static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { 1234 | int j; 1235 | for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } 1236 | return xs[x]; 1237 | } 1238 | 1239 | static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } 1240 | static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } 1241 | static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } 1242 | 1243 | static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { 1244 | int j; 1245 | size_t l = 0; 1246 | if (n == 0) { return mpc_calloc(i, 1, 1); } 1247 | for (j = 0; j < n; j++) { l += strlen(xs[j]); } 1248 | xs[0] = mpc_realloc(i, xs[0], l + 1); 1249 | for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } 1250 | return xs[0]; 1251 | } 1252 | 1253 | static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { 1254 | mpc_state_t *s = ((mpc_state_t**)xs)[0]; 1255 | mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; 1256 | a = mpc_ast_state(a, *s); 1257 | mpc_free(i, s); 1258 | (void) n; 1259 | return a; 1260 | } 1261 | 1262 | static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { 1263 | int j; 1264 | if (f == mpcf_null) { return mpcf_null(n, xs); } 1265 | if (f == mpcf_fst) { return mpcf_fst(n, xs); } 1266 | if (f == mpcf_snd) { return mpcf_snd(n, xs); } 1267 | if (f == mpcf_trd) { return mpcf_trd(n, xs); } 1268 | if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } 1269 | if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } 1270 | if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } 1271 | if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } 1272 | if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } 1273 | for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } 1274 | return f(j, xs); 1275 | } 1276 | 1277 | static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { 1278 | mpc_free(i, x); 1279 | return NULL; 1280 | } 1281 | 1282 | static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { 1283 | mpc_ast_t *a = mpc_ast_new("", c); 1284 | mpc_free(i, c); 1285 | return a; 1286 | } 1287 | 1288 | static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { 1289 | if (f == mpcf_free) { return mpcf_input_free(i, x); } 1290 | if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } 1291 | return f(mpc_export(i, x)); 1292 | } 1293 | 1294 | static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { 1295 | return f(mpc_export(i, x), d); 1296 | } 1297 | 1298 | static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { 1299 | if (d == free || d == nothing) { mpc_free(i, x); return; } 1300 | d(mpc_export(i, x)); 1301 | } 1302 | 1303 | enum { 1304 | MPC_PARSE_STACK_MIN = 4 1305 | }; 1306 | 1307 | #define MPC_SUCCESS(x) r->output = x; return 1 1308 | #define MPC_FAILURE(x) r->error = x; return 0 1309 | #define MPC_PRIMITIVE(x) \ 1310 | if (x) { MPC_SUCCESS(r->output); } \ 1311 | else { MPC_FAILURE(NULL); } 1312 | 1313 | static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e) { 1314 | 1315 | int j = 0, k = 0; 1316 | mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; 1317 | mpc_result_t *results; 1318 | int results_slots = MPC_PARSE_STACK_MIN; 1319 | 1320 | switch (p->type) { 1321 | 1322 | /* Basic Parsers */ 1323 | 1324 | case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); 1325 | case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); 1326 | case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); 1327 | case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); 1328 | case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); 1329 | case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); 1330 | case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); 1331 | case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); 1332 | 1333 | /* Other parsers */ 1334 | 1335 | case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); 1336 | case MPC_TYPE_PASS: MPC_SUCCESS(NULL); 1337 | case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); 1338 | case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); 1339 | case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); 1340 | case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); 1341 | 1342 | /* Application Parsers */ 1343 | 1344 | case MPC_TYPE_APPLY: 1345 | if (mpc_parse_run(i, p->data.apply.x, r, e)) { 1346 | MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); 1347 | } else { 1348 | MPC_FAILURE(r->output); 1349 | } 1350 | 1351 | case MPC_TYPE_APPLY_TO: 1352 | if (mpc_parse_run(i, p->data.apply_to.x, r, e)) { 1353 | MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); 1354 | } else { 1355 | MPC_FAILURE(r->error); 1356 | } 1357 | 1358 | case MPC_TYPE_EXPECT: 1359 | mpc_input_suppress_enable(i); 1360 | if (mpc_parse_run(i, p->data.expect.x, r, e)) { 1361 | mpc_input_suppress_disable(i); 1362 | MPC_SUCCESS(r->output); 1363 | } else { 1364 | mpc_input_suppress_disable(i); 1365 | MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); 1366 | } 1367 | 1368 | case MPC_TYPE_PREDICT: 1369 | mpc_input_backtrack_disable(i); 1370 | if (mpc_parse_run(i, p->data.predict.x, r, e)) { 1371 | mpc_input_backtrack_enable(i); 1372 | MPC_SUCCESS(r->output); 1373 | } else { 1374 | mpc_input_backtrack_enable(i); 1375 | MPC_FAILURE(r->error); 1376 | } 1377 | 1378 | /* Optional Parsers */ 1379 | 1380 | /* TODO: Update Not Error Message */ 1381 | 1382 | case MPC_TYPE_NOT: 1383 | mpc_input_mark(i); 1384 | mpc_input_suppress_enable(i); 1385 | if (mpc_parse_run(i, p->data.not.x, r, e)) { 1386 | mpc_input_rewind(i); 1387 | mpc_input_suppress_disable(i); 1388 | mpc_parse_dtor(i, p->data.not.dx, r->output); 1389 | MPC_FAILURE(mpc_err_new(i, "opposite")); 1390 | } else { 1391 | mpc_input_unmark(i); 1392 | mpc_input_suppress_disable(i); 1393 | MPC_SUCCESS(p->data.not.lf()); 1394 | } 1395 | 1396 | case MPC_TYPE_MAYBE: 1397 | if (mpc_parse_run(i, p->data.not.x, r, e)) { 1398 | MPC_SUCCESS(r->output); 1399 | } else { 1400 | *e = mpc_err_merge(i, *e, r->error); 1401 | MPC_SUCCESS(p->data.not.lf()); 1402 | } 1403 | 1404 | /* Repeat Parsers */ 1405 | 1406 | case MPC_TYPE_MANY: 1407 | 1408 | results = results_stk; 1409 | 1410 | while (mpc_parse_run(i, p->data.repeat.x, &results[j], e)) { 1411 | j++; 1412 | if (j == MPC_PARSE_STACK_MIN) { 1413 | results_slots = j + j / 2; 1414 | results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); 1415 | memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); 1416 | } else if (j >= results_slots) { 1417 | results_slots = j + j / 2; 1418 | results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); 1419 | } 1420 | } 1421 | 1422 | *e = mpc_err_merge(i, *e, results[j].error); 1423 | MPC_SUCCESS( 1424 | mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1425 | if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1426 | 1427 | case MPC_TYPE_MANY1: 1428 | 1429 | results = results_stk; 1430 | 1431 | while (mpc_parse_run(i, p->data.repeat.x, &results[j], e)) { 1432 | j++; 1433 | if (j == MPC_PARSE_STACK_MIN) { 1434 | results_slots = j + j / 2; 1435 | results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); 1436 | memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); 1437 | } else if (j >= results_slots) { 1438 | results_slots = j + j / 2; 1439 | results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); 1440 | } 1441 | } 1442 | 1443 | if (j == 0) { 1444 | MPC_FAILURE( 1445 | mpc_err_many1(i, results[j].error); 1446 | if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1447 | } else { 1448 | *e = mpc_err_merge(i, *e, results[j].error); 1449 | MPC_SUCCESS( 1450 | mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1451 | if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1452 | } 1453 | 1454 | case MPC_TYPE_COUNT: 1455 | 1456 | results = p->data.repeat.n > MPC_PARSE_STACK_MIN 1457 | ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) 1458 | : results_stk; 1459 | 1460 | while (mpc_parse_run(i, p->data.repeat.x, &results[j], e)) { 1461 | j++; 1462 | if (j == p->data.repeat.n) { break; } 1463 | } 1464 | 1465 | if (j == p->data.repeat.n) { 1466 | MPC_SUCCESS( 1467 | mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1468 | if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1469 | } else { 1470 | for (k = 0; k < j; k++) { 1471 | mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); 1472 | } 1473 | MPC_FAILURE( 1474 | mpc_err_count(i, results[j].error, p->data.repeat.n); 1475 | if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1476 | } 1477 | 1478 | /* Combinatory Parsers */ 1479 | 1480 | case MPC_TYPE_OR: 1481 | 1482 | if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } 1483 | 1484 | results = p->data.or.n > MPC_PARSE_STACK_MIN 1485 | ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) 1486 | : results_stk; 1487 | 1488 | for (j = 0; j < p->data.or.n; j++) { 1489 | if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e)) { 1490 | MPC_SUCCESS(results[j].output; 1491 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1492 | } else { 1493 | *e = mpc_err_merge(i, *e, results[j].error); 1494 | } 1495 | } 1496 | 1497 | MPC_FAILURE(NULL; 1498 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1499 | 1500 | case MPC_TYPE_AND: 1501 | 1502 | if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } 1503 | 1504 | results = p->data.or.n > MPC_PARSE_STACK_MIN 1505 | ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) 1506 | : results_stk; 1507 | 1508 | mpc_input_mark(i); 1509 | for (j = 0; j < p->data.and.n; j++) { 1510 | if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e)) { 1511 | mpc_input_rewind(i); 1512 | for (k = 0; k < j; k++) { 1513 | mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); 1514 | } 1515 | MPC_FAILURE(results[j].error; 1516 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1517 | } 1518 | } 1519 | mpc_input_unmark(i); 1520 | MPC_SUCCESS( 1521 | mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); 1522 | if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1523 | 1524 | /* End */ 1525 | 1526 | default: 1527 | 1528 | MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); 1529 | } 1530 | 1531 | return 0; 1532 | 1533 | } 1534 | 1535 | #undef MPC_SUCCESS 1536 | #undef MPC_FAILURE 1537 | #undef MPC_PRIMITIVE 1538 | 1539 | int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { 1540 | int x; 1541 | mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); 1542 | e->state = mpc_state_invalid(); 1543 | x = mpc_parse_run(i, p, r, &e); 1544 | if (x) { 1545 | mpc_err_delete_internal(i, e); 1546 | r->output = mpc_export(i, r->output); 1547 | } else { 1548 | r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); 1549 | } 1550 | return x; 1551 | } 1552 | 1553 | int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { 1554 | int x; 1555 | mpc_input_t *i = mpc_input_new_string(filename, string); 1556 | x = mpc_parse_input(i, p, r); 1557 | mpc_input_delete(i); 1558 | return x; 1559 | } 1560 | 1561 | int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { 1562 | int x; 1563 | mpc_input_t *i = mpc_input_new_file(filename, file); 1564 | x = mpc_parse_input(i, p, r); 1565 | mpc_input_delete(i); 1566 | return x; 1567 | } 1568 | 1569 | int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { 1570 | int x; 1571 | mpc_input_t *i = mpc_input_new_pipe(filename, pipe); 1572 | x = mpc_parse_input(i, p, r); 1573 | mpc_input_delete(i); 1574 | return x; 1575 | } 1576 | 1577 | int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { 1578 | 1579 | FILE *f = fopen(filename, "rb"); 1580 | int res; 1581 | 1582 | if (f == NULL) { 1583 | r->output = NULL; 1584 | r->error = mpc_err_file(filename, "Unable to open file!"); 1585 | return 0; 1586 | } 1587 | 1588 | res = mpc_parse_file(filename, f, p, r); 1589 | fclose(f); 1590 | return res; 1591 | } 1592 | 1593 | /* 1594 | ** Building a Parser 1595 | */ 1596 | 1597 | static void mpc_undefine_unretained(mpc_parser_t *p, int force); 1598 | 1599 | static void mpc_undefine_or(mpc_parser_t *p) { 1600 | 1601 | int i; 1602 | for (i = 0; i < p->data.or.n; i++) { 1603 | mpc_undefine_unretained(p->data.or.xs[i], 0); 1604 | } 1605 | //free(p->data.or.xs); 1606 | 1607 | } 1608 | 1609 | static void mpc_undefine_and(mpc_parser_t *p) { 1610 | 1611 | int i; 1612 | for (i = 0; i < p->data.and.n; i++) { 1613 | mpc_undefine_unretained(p->data.and.xs[i], 0); 1614 | } 1615 | //free(p->data.and.xs); 1616 | //free(p->data.and.dxs); 1617 | 1618 | } 1619 | 1620 | static void mpc_undefine_unretained(mpc_parser_t *p, int force) { 1621 | 1622 | if (p->retained && !force) { return; } 1623 | 1624 | switch (p->type) { 1625 | 1626 | case MPC_TYPE_FAIL: //free(p->data.fail.m); 1627 | break; 1628 | 1629 | case MPC_TYPE_ONEOF: 1630 | case MPC_TYPE_NONEOF: 1631 | case MPC_TYPE_STRING: 1632 | //free(p->data.string.x); 1633 | break; 1634 | 1635 | case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; 1636 | case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; 1637 | case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; 1638 | 1639 | case MPC_TYPE_MAYBE: 1640 | case MPC_TYPE_NOT: 1641 | mpc_undefine_unretained(p->data.not.x, 0); 1642 | break; 1643 | 1644 | case MPC_TYPE_EXPECT: 1645 | mpc_undefine_unretained(p->data.expect.x, 0); 1646 | //free(p->data.expect.m); 1647 | break; 1648 | 1649 | case MPC_TYPE_MANY: 1650 | case MPC_TYPE_MANY1: 1651 | case MPC_TYPE_COUNT: 1652 | mpc_undefine_unretained(p->data.repeat.x, 0); 1653 | break; 1654 | 1655 | case MPC_TYPE_OR: mpc_undefine_or(p); break; 1656 | case MPC_TYPE_AND: mpc_undefine_and(p); break; 1657 | 1658 | default: break; 1659 | } 1660 | 1661 | if (!force) { 1662 | //free(p->name); 1663 | //free(p); 1664 | } 1665 | 1666 | } 1667 | 1668 | void mpc_delete(mpc_parser_t *p) { 1669 | if (p->retained) { 1670 | 1671 | if (p->type != MPC_TYPE_UNDEFINED) { 1672 | mpc_undefine_unretained(p, 0); 1673 | } 1674 | 1675 | //free(p->name); 1676 | //free(p); 1677 | 1678 | } else { 1679 | mpc_undefine_unretained(p, 0); 1680 | } 1681 | } 1682 | 1683 | static void mpc_soft_delete(mpc_val_t *x) { 1684 | mpc_undefine_unretained(x, 0); 1685 | } 1686 | 1687 | static mpc_parser_t *mpc_undefined(void) { 1688 | mpc_parser_t *p = tgc_calloc(&gc, 1, sizeof(mpc_parser_t)); 1689 | p->retained = 0; 1690 | p->type = MPC_TYPE_UNDEFINED; 1691 | p->name = NULL; 1692 | return p; 1693 | } 1694 | 1695 | mpc_parser_t *mpc_new(const char *name) { 1696 | mpc_parser_t *p = mpc_undefined(); 1697 | p->retained = 1; 1698 | p->name = tgc_realloc(&gc, p->name, strlen(name) + 1); 1699 | strcpy(p->name, name); 1700 | return p; 1701 | } 1702 | 1703 | mpc_parser_t *mpc_copy(mpc_parser_t *a) { 1704 | int i = 0; 1705 | mpc_parser_t *p; 1706 | 1707 | if (a->retained) { return a; } 1708 | 1709 | p = mpc_undefined(); 1710 | p->retained = a->retained; 1711 | p->type = a->type; 1712 | p->data = a->data; 1713 | 1714 | if (a->name) { 1715 | p->name = tgc_alloc(&gc, strlen(a->name)+1); 1716 | strcpy(p->name, a->name); 1717 | } 1718 | 1719 | switch (a->type) { 1720 | 1721 | case MPC_TYPE_FAIL: 1722 | p->data.fail.m = tgc_alloc(&gc, strlen(a->data.fail.m)+1); 1723 | strcpy(p->data.fail.m, a->data.fail.m); 1724 | break; 1725 | 1726 | case MPC_TYPE_ONEOF: 1727 | case MPC_TYPE_NONEOF: 1728 | case MPC_TYPE_STRING: 1729 | p->data.string.x = tgc_alloc(&gc, strlen(a->data.string.x)+1); 1730 | strcpy(p->data.string.x, a->data.string.x); 1731 | break; 1732 | 1733 | case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; 1734 | case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; 1735 | case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; 1736 | 1737 | case MPC_TYPE_MAYBE: 1738 | case MPC_TYPE_NOT: 1739 | p->data.not.x = mpc_copy(a->data.not.x); 1740 | break; 1741 | 1742 | case MPC_TYPE_EXPECT: 1743 | p->data.expect.x = mpc_copy(a->data.expect.x); 1744 | p->data.expect.m = tgc_alloc(&gc, strlen(a->data.expect.m)+1); 1745 | strcpy(p->data.expect.m, a->data.expect.m); 1746 | break; 1747 | 1748 | case MPC_TYPE_MANY: 1749 | case MPC_TYPE_MANY1: 1750 | case MPC_TYPE_COUNT: 1751 | p->data.repeat.x = mpc_copy(a->data.repeat.x); 1752 | break; 1753 | 1754 | case MPC_TYPE_OR: 1755 | p->data.or.xs = tgc_alloc(&gc, a->data.or.n * sizeof(mpc_parser_t*)); 1756 | for (i = 0; i < a->data.or.n; i++) { 1757 | p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); 1758 | } 1759 | break; 1760 | case MPC_TYPE_AND: 1761 | p->data.and.xs = tgc_alloc(&gc, a->data.and.n * sizeof(mpc_parser_t*)); 1762 | for (i = 0; i < a->data.and.n; i++) { 1763 | p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); 1764 | } 1765 | p->data.and.dxs = tgc_alloc(&gc, (a->data.and.n-1) * sizeof(mpc_dtor_t)); 1766 | for (i = 0; i < a->data.and.n-1; i++) { 1767 | p->data.and.dxs[i] = a->data.and.dxs[i]; 1768 | } 1769 | break; 1770 | 1771 | default: break; 1772 | } 1773 | 1774 | 1775 | return p; 1776 | } 1777 | 1778 | mpc_parser_t *mpc_undefine(mpc_parser_t *p) { 1779 | mpc_undefine_unretained(p, 1); 1780 | p->type = MPC_TYPE_UNDEFINED; 1781 | return p; 1782 | } 1783 | 1784 | mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { 1785 | 1786 | if (p->retained) { 1787 | p->type = a->type; 1788 | p->data = a->data; 1789 | } else { 1790 | mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); 1791 | p->type = a2->type; 1792 | p->data = a2->data; 1793 | //free(a2); 1794 | } 1795 | 1796 | //free(a); 1797 | return p; 1798 | } 1799 | 1800 | void mpc_cleanup(int n, ...) { 1801 | int i; 1802 | mpc_parser_t **list = tgc_alloc(&gc, sizeof(mpc_parser_t*) * n); 1803 | 1804 | va_list va; 1805 | va_start(va, n); 1806 | for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } 1807 | for (i = 0; i < n; i++) { mpc_undefine(list[i]); } 1808 | for (i = 0; i < n; i++) { mpc_delete(list[i]); } 1809 | va_end(va); 1810 | 1811 | //free(list); 1812 | } 1813 | 1814 | mpc_parser_t *mpc_pass(void) { 1815 | mpc_parser_t *p = mpc_undefined(); 1816 | p->type = MPC_TYPE_PASS; 1817 | return p; 1818 | } 1819 | 1820 | mpc_parser_t *mpc_fail(const char *m) { 1821 | mpc_parser_t *p = mpc_undefined(); 1822 | p->type = MPC_TYPE_FAIL; 1823 | p->data.fail.m = tgc_alloc(&gc, strlen(m) + 1); 1824 | strcpy(p->data.fail.m, m); 1825 | return p; 1826 | } 1827 | 1828 | /* 1829 | ** As `snprintf` is not ANSI standard this 1830 | ** function `mpc_failf` should be considered 1831 | ** unsafe. 1832 | ** 1833 | ** You have a few options if this is going to be 1834 | ** trouble. 1835 | ** 1836 | ** - Ensure the format string does not exceed 1837 | ** the buffer length using precision specifiers 1838 | ** such as `%.512s`. 1839 | ** 1840 | ** - Patch this function in your code base to 1841 | ** use `snprintf` or whatever variant your 1842 | ** system supports. 1843 | ** 1844 | ** - Avoid it altogether. 1845 | ** 1846 | */ 1847 | 1848 | mpc_parser_t *mpc_failf(const char *fmt, ...) { 1849 | 1850 | va_list va; 1851 | char *buffer; 1852 | 1853 | mpc_parser_t *p = mpc_undefined(); 1854 | p->type = MPC_TYPE_FAIL; 1855 | 1856 | va_start(va, fmt); 1857 | buffer = tgc_alloc(&gc, 2048); 1858 | vsprintf(buffer, fmt, va); 1859 | va_end(va); 1860 | 1861 | buffer = tgc_realloc(&gc, buffer, strlen(buffer) + 1); 1862 | p->data.fail.m = buffer; 1863 | return p; 1864 | 1865 | } 1866 | 1867 | mpc_parser_t *mpc_lift_val(mpc_val_t *x) { 1868 | mpc_parser_t *p = mpc_undefined(); 1869 | p->type = MPC_TYPE_LIFT_VAL; 1870 | p->data.lift.x = x; 1871 | return p; 1872 | } 1873 | 1874 | mpc_parser_t *mpc_lift(mpc_ctor_t lf) { 1875 | mpc_parser_t *p = mpc_undefined(); 1876 | p->type = MPC_TYPE_LIFT; 1877 | p->data.lift.lf = lf; 1878 | return p; 1879 | } 1880 | 1881 | mpc_parser_t *mpc_anchor(int(*f)(char,char)) { 1882 | mpc_parser_t *p = mpc_undefined(); 1883 | p->type = MPC_TYPE_ANCHOR; 1884 | p->data.anchor.f = f; 1885 | return mpc_expect(p, "anchor"); 1886 | } 1887 | 1888 | mpc_parser_t *mpc_state(void) { 1889 | mpc_parser_t *p = mpc_undefined(); 1890 | p->type = MPC_TYPE_STATE; 1891 | return p; 1892 | } 1893 | 1894 | mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { 1895 | mpc_parser_t *p = mpc_undefined(); 1896 | p->type = MPC_TYPE_EXPECT; 1897 | p->data.expect.x = a; 1898 | p->data.expect.m = tgc_alloc(&gc, strlen(expected) + 1); 1899 | strcpy(p->data.expect.m, expected); 1900 | return p; 1901 | } 1902 | 1903 | /* 1904 | ** As `snprintf` is not ANSI standard this 1905 | ** function `mpc_expectf` should be considered 1906 | ** unsafe. 1907 | ** 1908 | ** You have a few options if this is going to be 1909 | ** trouble. 1910 | ** 1911 | ** - Ensure the format string does not exceed 1912 | ** the buffer length using precision specifiers 1913 | ** such as `%.512s`. 1914 | ** 1915 | ** - Patch this function in your code base to 1916 | ** use `snprintf` or whatever variant your 1917 | ** system supports. 1918 | ** 1919 | ** - Avoid it altogether. 1920 | ** 1921 | */ 1922 | 1923 | mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { 1924 | va_list va; 1925 | char *buffer; 1926 | 1927 | mpc_parser_t *p = mpc_undefined(); 1928 | p->type = MPC_TYPE_EXPECT; 1929 | 1930 | va_start(va, fmt); 1931 | buffer = tgc_alloc(&gc, 2048); 1932 | vsprintf(buffer, fmt, va); 1933 | va_end(va); 1934 | 1935 | buffer = tgc_realloc(&gc, buffer, strlen(buffer) + 1); 1936 | p->data.expect.x = a; 1937 | p->data.expect.m = buffer; 1938 | return p; 1939 | } 1940 | 1941 | /* 1942 | ** Basic Parsers 1943 | */ 1944 | 1945 | mpc_parser_t *mpc_any(void) { 1946 | mpc_parser_t *p = mpc_undefined(); 1947 | p->type = MPC_TYPE_ANY; 1948 | return mpc_expect(p, "any character"); 1949 | } 1950 | 1951 | mpc_parser_t *mpc_char(char c) { 1952 | mpc_parser_t *p = mpc_undefined(); 1953 | p->type = MPC_TYPE_SINGLE; 1954 | p->data.single.x = c; 1955 | return mpc_expectf(p, "'%c'", c); 1956 | } 1957 | 1958 | mpc_parser_t *mpc_range(char s, char e) { 1959 | mpc_parser_t *p = mpc_undefined(); 1960 | p->type = MPC_TYPE_RANGE; 1961 | p->data.range.x = s; 1962 | p->data.range.y = e; 1963 | return mpc_expectf(p, "character between '%c' and '%c'", s, e); 1964 | } 1965 | 1966 | mpc_parser_t *mpc_oneof(const char *s) { 1967 | mpc_parser_t *p = mpc_undefined(); 1968 | p->type = MPC_TYPE_ONEOF; 1969 | p->data.string.x = tgc_alloc(&gc, strlen(s) + 1); 1970 | strcpy(p->data.string.x, s); 1971 | return mpc_expectf(p, "one of '%s'", s); 1972 | } 1973 | 1974 | mpc_parser_t *mpc_noneof(const char *s) { 1975 | mpc_parser_t *p = mpc_undefined(); 1976 | p->type = MPC_TYPE_NONEOF; 1977 | p->data.string.x = tgc_alloc(&gc, strlen(s) + 1); 1978 | strcpy(p->data.string.x, s); 1979 | return mpc_expectf(p, "none of '%s'", s); 1980 | 1981 | } 1982 | 1983 | mpc_parser_t *mpc_satisfy(int(*f)(char)) { 1984 | mpc_parser_t *p = mpc_undefined(); 1985 | p->type = MPC_TYPE_SATISFY; 1986 | p->data.satisfy.f = f; 1987 | return mpc_expectf(p, "character satisfying function %p", f); 1988 | } 1989 | 1990 | mpc_parser_t *mpc_string(const char *s) { 1991 | mpc_parser_t *p = mpc_undefined(); 1992 | p->type = MPC_TYPE_STRING; 1993 | p->data.string.x = tgc_alloc(&gc, strlen(s) + 1); 1994 | strcpy(p->data.string.x, s); 1995 | return mpc_expectf(p, "\"%s\"", s); 1996 | } 1997 | 1998 | /* 1999 | ** Core Parsers 2000 | */ 2001 | 2002 | mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { 2003 | mpc_parser_t *p = mpc_undefined(); 2004 | p->type = MPC_TYPE_APPLY; 2005 | p->data.apply.x = a; 2006 | p->data.apply.f = f; 2007 | return p; 2008 | } 2009 | 2010 | mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { 2011 | mpc_parser_t *p = mpc_undefined(); 2012 | p->type = MPC_TYPE_APPLY_TO; 2013 | p->data.apply_to.x = a; 2014 | p->data.apply_to.f = f; 2015 | p->data.apply_to.d = x; 2016 | return p; 2017 | } 2018 | 2019 | mpc_parser_t *mpc_predictive(mpc_parser_t *a) { 2020 | mpc_parser_t *p = mpc_undefined(); 2021 | p->type = MPC_TYPE_PREDICT; 2022 | p->data.predict.x = a; 2023 | return p; 2024 | } 2025 | 2026 | mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { 2027 | mpc_parser_t *p = mpc_undefined(); 2028 | p->type = MPC_TYPE_NOT; 2029 | p->data.not.x = a; 2030 | p->data.not.dx = da; 2031 | p->data.not.lf = lf; 2032 | return p; 2033 | } 2034 | 2035 | mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { 2036 | return mpc_not_lift(a, da, mpcf_ctor_null); 2037 | } 2038 | 2039 | mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { 2040 | mpc_parser_t *p = mpc_undefined(); 2041 | p->type = MPC_TYPE_MAYBE; 2042 | p->data.not.x = a; 2043 | p->data.not.lf = lf; 2044 | return p; 2045 | } 2046 | 2047 | mpc_parser_t *mpc_maybe(mpc_parser_t *a) { 2048 | return mpc_maybe_lift(a, mpcf_ctor_null); 2049 | } 2050 | 2051 | mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { 2052 | mpc_parser_t *p = mpc_undefined(); 2053 | p->type = MPC_TYPE_MANY; 2054 | p->data.repeat.x = a; 2055 | p->data.repeat.f = f; 2056 | return p; 2057 | } 2058 | 2059 | mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { 2060 | mpc_parser_t *p = mpc_undefined(); 2061 | p->type = MPC_TYPE_MANY1; 2062 | p->data.repeat.x = a; 2063 | p->data.repeat.f = f; 2064 | return p; 2065 | } 2066 | 2067 | mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { 2068 | mpc_parser_t *p = mpc_undefined(); 2069 | p->type = MPC_TYPE_COUNT; 2070 | p->data.repeat.n = n; 2071 | p->data.repeat.f = f; 2072 | p->data.repeat.x = a; 2073 | p->data.repeat.dx = da; 2074 | return p; 2075 | } 2076 | 2077 | mpc_parser_t *mpc_or(int n, ...) { 2078 | 2079 | int i; 2080 | va_list va; 2081 | 2082 | mpc_parser_t *p = mpc_undefined(); 2083 | 2084 | p->type = MPC_TYPE_OR; 2085 | p->data.or.n = n; 2086 | p->data.or.xs = tgc_alloc(&gc, sizeof(mpc_parser_t*) * n); 2087 | 2088 | va_start(va, n); 2089 | for (i = 0; i < n; i++) { 2090 | p->data.or.xs[i] = va_arg(va, mpc_parser_t*); 2091 | } 2092 | va_end(va); 2093 | 2094 | return p; 2095 | } 2096 | 2097 | mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { 2098 | 2099 | int i; 2100 | va_list va; 2101 | 2102 | mpc_parser_t *p = mpc_undefined(); 2103 | 2104 | p->type = MPC_TYPE_AND; 2105 | p->data.and.n = n; 2106 | p->data.and.f = f; 2107 | p->data.and.xs = tgc_alloc(&gc, sizeof(mpc_parser_t*) * n); 2108 | p->data.and.dxs = tgc_alloc(&gc, sizeof(mpc_dtor_t) * (n-1)); 2109 | 2110 | va_start(va, f); 2111 | for (i = 0; i < n; i++) { 2112 | p->data.and.xs[i] = va_arg(va, mpc_parser_t*); 2113 | } 2114 | for (i = 0; i < (n-1); i++) { 2115 | p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); 2116 | } 2117 | va_end(va); 2118 | 2119 | return p; 2120 | } 2121 | 2122 | /* 2123 | ** Common Parsers 2124 | */ 2125 | 2126 | static int mpc_soi_anchor(char prev, char next) { (void) next; return (prev == '\0'); } 2127 | static int mpc_eoi_anchor(char prev, char next) { (void) prev; return (next == '\0'); } 2128 | 2129 | mpc_parser_t *mpc_soi(void) { return mpc_expect(mpc_anchor(mpc_soi_anchor), "start of input"); } 2130 | mpc_parser_t *mpc_eoi(void) { return mpc_expect(mpc_anchor(mpc_eoi_anchor), "end of input"); } 2131 | 2132 | static int mpc_boundary_anchor(char prev, char next) { 2133 | const char* word = "abcdefghijklmnopqrstuvwxyz" 2134 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 2135 | "0123456789_"; 2136 | if ( strchr(word, next) && prev == '\0') { return 1; } 2137 | if ( strchr(word, prev) && next == '\0') { return 1; } 2138 | if ( strchr(word, next) && !strchr(word, prev)) { return 1; } 2139 | if (!strchr(word, next) && strchr(word, prev)) { return 1; } 2140 | return 0; 2141 | } 2142 | 2143 | mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "boundary"); } 2144 | 2145 | mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } 2146 | mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } 2147 | mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } 2148 | 2149 | mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } 2150 | mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } 2151 | mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), nothing); } 2152 | 2153 | mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } 2154 | mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } 2155 | mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } 2156 | mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } 2157 | mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } 2158 | mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } 2159 | 2160 | mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } 2161 | mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } 2162 | mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } 2163 | mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } 2164 | mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } 2165 | 2166 | mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } 2167 | mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } 2168 | mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } 2169 | mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } 2170 | 2171 | mpc_parser_t *mpc_real(void) { 2172 | 2173 | /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ 2174 | 2175 | mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; 2176 | 2177 | p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); 2178 | p1 = mpc_digits(); 2179 | p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), nothing), mpcf_ctor_str); 2180 | p30 = mpc_oneof("eE"); 2181 | p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); 2182 | p32 = mpc_digits(); 2183 | p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, nothing, nothing), mpcf_ctor_str); 2184 | 2185 | return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, nothing, nothing, nothing), "real"); 2186 | 2187 | } 2188 | 2189 | mpc_parser_t *mpc_float(void) { 2190 | return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); 2191 | } 2192 | 2193 | mpc_parser_t *mpc_char_lit(void) { 2194 | return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), nothing, "'", "'"), "char"); 2195 | } 2196 | 2197 | mpc_parser_t *mpc_string_lit(void) { 2198 | mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); 2199 | return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), nothing, "\"", "\""), "string"); 2200 | } 2201 | 2202 | mpc_parser_t *mpc_regex_lit(void) { 2203 | mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); 2204 | return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), nothing, "/", "/"), "regex"); 2205 | } 2206 | 2207 | mpc_parser_t *mpc_ident(void) { 2208 | mpc_parser_t *p0, *p1; 2209 | p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); 2210 | p1 = mpc_many(mpcf_strfold, mpc_alphanum()); 2211 | return mpc_and(2, mpcf_strfold, p0, p1, nothing); 2212 | } 2213 | 2214 | /* 2215 | ** Useful Parsers 2216 | */ 2217 | 2218 | mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } 2219 | mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } 2220 | mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } 2221 | 2222 | mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } 2223 | mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } 2224 | mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } 2225 | mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } 2226 | mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } 2227 | 2228 | mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } 2229 | 2230 | mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { 2231 | return mpc_and(3, mpcf_snd_free, 2232 | mpc_string(o), a, mpc_string(c), 2233 | nothing, ad); 2234 | } 2235 | 2236 | mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } 2237 | mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } 2238 | mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } 2239 | mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } 2240 | 2241 | mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { 2242 | return mpc_and(3, mpcf_snd_free, 2243 | mpc_sym(o), mpc_tok(a), mpc_sym(c), 2244 | nothing, ad); 2245 | } 2246 | 2247 | mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } 2248 | mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } 2249 | mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } 2250 | mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } 2251 | 2252 | /* 2253 | ** Regular Expression Parsers 2254 | */ 2255 | 2256 | /* 2257 | ** So here is a cute bootstrapping. 2258 | ** 2259 | ** I'm using the previously defined 2260 | ** mpc constructs and functions to 2261 | ** parse the user regex string and 2262 | ** construct a parser from it. 2263 | ** 2264 | ** As it turns out lots of the standard 2265 | ** mpc functions look a lot like `fold` 2266 | ** functions and so can be used indirectly 2267 | ** by many of the parsing functions to build 2268 | ** a parser directly - as we are parsing. 2269 | ** 2270 | ** This is certainly something that 2271 | ** would be less elegant/interesting 2272 | ** in a two-phase parser which first 2273 | ** builds an AST and then traverses it 2274 | ** to generate the object. 2275 | ** 2276 | ** This whole thing acts as a great 2277 | ** case study for how trivial it can be 2278 | ** to write a great parser in a few 2279 | ** lines of code using mpc. 2280 | */ 2281 | 2282 | /* 2283 | ** 2284 | ** ### Regular Expression Grammar 2285 | ** 2286 | ** : | ( "|" ) 2287 | ** 2288 | ** : * 2289 | ** 2290 | ** : 2291 | ** | "*" 2292 | ** | "+" 2293 | ** | "?" 2294 | ** | "{" "}" 2295 | ** 2296 | ** : 2297 | ** | "\" 2298 | ** | "(" ")" 2299 | ** | "[" "]" 2300 | */ 2301 | 2302 | static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { 2303 | (void) n; 2304 | if (xs[1] == NULL) { return xs[0]; } 2305 | else { return mpc_or(2, xs[0], xs[1]); } 2306 | } 2307 | 2308 | static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { 2309 | int i; 2310 | mpc_parser_t *p = mpc_lift(mpcf_ctor_str); 2311 | for (i = 0; i < n; i++) { 2312 | p = mpc_and(2, mpcf_strfold, p, xs[i], nothing); 2313 | } 2314 | return p; 2315 | } 2316 | 2317 | static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { 2318 | int num; 2319 | (void) n; 2320 | if (xs[1] == NULL) { return xs[0]; } 2321 | if (strcmp(xs[1], "*") == 0) { //free(xs[1]); 2322 | return mpc_many(mpcf_strfold, xs[0]); } 2323 | if (strcmp(xs[1], "+") == 0) { //free(xs[1]); 2324 | return mpc_many1(mpcf_strfold, xs[0]); } 2325 | if (strcmp(xs[1], "?") == 0) { //free(xs[1]); 2326 | return mpc_maybe_lift(xs[0], mpcf_ctor_str); } 2327 | num = *(int*)xs[1]; 2328 | //free(xs[1]); 2329 | 2330 | return mpc_count(num, mpcf_strfold, xs[0], nothing); 2331 | } 2332 | 2333 | static mpc_parser_t *mpc_re_escape_char(char c) { 2334 | switch (c) { 2335 | case 'a': return mpc_char('\a'); 2336 | case 'f': return mpc_char('\f'); 2337 | case 'n': return mpc_char('\n'); 2338 | case 'r': return mpc_char('\r'); 2339 | case 't': return mpc_char('\t'); 2340 | case 'v': return mpc_char('\v'); 2341 | case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), nothing); 2342 | case 'B': return mpc_not_lift(mpc_boundary(), nothing, mpcf_ctor_str); 2343 | case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), nothing); 2344 | case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), nothing); 2345 | case 'd': return mpc_digit(); 2346 | case 'D': return mpc_not_lift(mpc_digit(), nothing, mpcf_ctor_str); 2347 | case 's': return mpc_whitespace(); 2348 | case 'S': return mpc_not_lift(mpc_whitespace(), nothing, mpcf_ctor_str); 2349 | case 'w': return mpc_alphanum(); 2350 | case 'W': return mpc_not_lift(mpc_alphanum(), nothing, mpcf_ctor_str); 2351 | default: return NULL; 2352 | } 2353 | } 2354 | 2355 | static mpc_val_t *mpcf_re_escape(mpc_val_t *x) { 2356 | 2357 | char *s = x; 2358 | mpc_parser_t *p; 2359 | 2360 | /* Regex Special Characters */ 2361 | if (s[0] == '.') { //free(s); 2362 | return mpc_any(); } 2363 | if (s[0] == '^') { //free(s); 2364 | return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), nothing); } 2365 | if (s[0] == '$') { //free(s); 2366 | return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), nothing); } 2367 | 2368 | /* Regex Escape */ 2369 | if (s[0] == '\\') { 2370 | p = mpc_re_escape_char(s[1]); 2371 | p = (p == NULL) ? mpc_char(s[1]) : p; 2372 | //free(s); 2373 | return p; 2374 | } 2375 | 2376 | /* Regex Standard */ 2377 | p = mpc_char(s[0]); 2378 | //free(s); 2379 | return p; 2380 | } 2381 | 2382 | static const char *mpc_re_range_escape_char(char c) { 2383 | switch (c) { 2384 | case '-': return "-"; 2385 | case 'a': return "\a"; 2386 | case 'f': return "\f"; 2387 | case 'n': return "\n"; 2388 | case 'r': return "\r"; 2389 | case 't': return "\t"; 2390 | case 'v': return "\v"; 2391 | case 'b': return "\b"; 2392 | case 'd': return "0123456789"; 2393 | case 's': return " \f\n\r\t\v"; 2394 | case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; 2395 | default: return NULL; 2396 | } 2397 | } 2398 | 2399 | static mpc_val_t *mpcf_re_range(mpc_val_t *x) { 2400 | 2401 | mpc_parser_t *out; 2402 | size_t i, j; 2403 | size_t start, end; 2404 | const char *tmp = NULL; 2405 | const char *s = x; 2406 | int comp = s[0] == '^' ? 1 : 0; 2407 | char *range = tgc_calloc(&gc, 1,1); 2408 | 2409 | if (s[0] == '\0') { //free(x); 2410 | return mpc_fail("Invalid Regex Range Expression"); } 2411 | if (s[0] == '^' && 2412 | s[1] == '\0') { //free(x); 2413 | return mpc_fail("Invalid Regex Range Expression"); } 2414 | 2415 | for (i = comp; i < strlen(s); i++){ 2416 | 2417 | /* Regex Range Escape */ 2418 | if (s[i] == '\\') { 2419 | tmp = mpc_re_range_escape_char(s[i+1]); 2420 | if (tmp != NULL) { 2421 | range = tgc_realloc(&gc, range, strlen(range) + strlen(tmp) + 1); 2422 | strcat(range, tmp); 2423 | } else { 2424 | range = tgc_realloc(&gc, range, strlen(range) + 1 + 1); 2425 | range[strlen(range) + 1] = '\0'; 2426 | range[strlen(range) + 0] = s[i+1]; 2427 | } 2428 | i++; 2429 | } 2430 | 2431 | /* Regex Range...Range */ 2432 | else if (s[i] == '-') { 2433 | if (s[i+1] == '\0' || i == 0) { 2434 | range = tgc_realloc(&gc, range, strlen(range) + strlen("-") + 1); 2435 | strcat(range, "-"); 2436 | } else { 2437 | start = s[i-1]+1; 2438 | end = s[i+1]-1; 2439 | for (j = start; j <= end; j++) { 2440 | range = tgc_realloc(&gc, range, strlen(range) + 1 + 1 + 1); 2441 | range[strlen(range) + 1] = '\0'; 2442 | range[strlen(range) + 0] = j; 2443 | } 2444 | } 2445 | } 2446 | 2447 | /* Regex Range Normal */ 2448 | else { 2449 | range = tgc_realloc(&gc, range, strlen(range) + 1 + 1); 2450 | range[strlen(range) + 1] = '\0'; 2451 | range[strlen(range) + 0] = s[i]; 2452 | } 2453 | 2454 | } 2455 | 2456 | out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); 2457 | 2458 | //free(x); 2459 | //free(range); 2460 | 2461 | return out; 2462 | } 2463 | 2464 | mpc_parser_t *mpc_re(const char *re) { 2465 | 2466 | char *err_msg; 2467 | mpc_parser_t *err_out; 2468 | mpc_result_t r; 2469 | mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; 2470 | 2471 | Regex = mpc_new("regex"); 2472 | Term = mpc_new("term"); 2473 | Factor = mpc_new("factor"); 2474 | Base = mpc_new("base"); 2475 | Range = mpc_new("range"); 2476 | 2477 | mpc_define(Regex, mpc_and(2, mpcf_re_or, 2478 | Term, 2479 | mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, nothing)), 2480 | (mpc_dtor_t)mpc_delete 2481 | )); 2482 | 2483 | mpc_define(Term, mpc_many(mpcf_re_and, Factor)); 2484 | 2485 | mpc_define(Factor, mpc_and(2, mpcf_re_repeat, 2486 | Base, 2487 | mpc_or(5, 2488 | mpc_char('*'), mpc_char('+'), mpc_char('?'), 2489 | mpc_brackets(mpc_int(), nothing), 2490 | mpc_pass()), 2491 | (mpc_dtor_t)mpc_delete 2492 | )); 2493 | 2494 | mpc_define(Base, mpc_or(4, 2495 | mpc_parens(Regex, (mpc_dtor_t)mpc_delete), 2496 | mpc_squares(Range, (mpc_dtor_t)mpc_delete), 2497 | mpc_apply(mpc_escape(), mpcf_re_escape), 2498 | mpc_apply(mpc_noneof(")|"), mpcf_re_escape) 2499 | )); 2500 | 2501 | mpc_define(Range, mpc_apply( 2502 | mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), 2503 | mpcf_re_range 2504 | )); 2505 | 2506 | RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); 2507 | 2508 | mpc_optimise(RegexEnclose); 2509 | mpc_optimise(Regex); 2510 | mpc_optimise(Term); 2511 | mpc_optimise(Factor); 2512 | mpc_optimise(Base); 2513 | mpc_optimise(Range); 2514 | 2515 | if(!mpc_parse("", re, RegexEnclose, &r)) { 2516 | err_msg = mpc_err_string(r.error); 2517 | err_out = mpc_failf("Invalid Regex: %s", err_msg); 2518 | mpc_err_delete(r.error); 2519 | //free(err_msg); 2520 | r.output = err_out; 2521 | } 2522 | 2523 | mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); 2524 | 2525 | mpc_optimise(r.output); 2526 | 2527 | return r.output; 2528 | 2529 | } 2530 | 2531 | /* 2532 | ** Common Fold Functions 2533 | */ 2534 | 2535 | void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } 2536 | 2537 | mpc_val_t *mpcf_ctor_null(void) { return NULL; } 2538 | mpc_val_t *mpcf_ctor_str(void) { return tgc_calloc(&gc, 1, 1); } 2539 | mpc_val_t *mpcf_free(mpc_val_t *x) { //free(x); 2540 | return NULL; } 2541 | 2542 | mpc_val_t *mpcf_int(mpc_val_t *x) { 2543 | int *y = tgc_alloc(&gc, sizeof(int)); 2544 | *y = strtol(x, NULL, 10); 2545 | //free(x); 2546 | return y; 2547 | } 2548 | 2549 | mpc_val_t *mpcf_hex(mpc_val_t *x) { 2550 | int *y = tgc_alloc(&gc, sizeof(int)); 2551 | *y = strtol(x, NULL, 16); 2552 | //free(x); 2553 | return y; 2554 | } 2555 | 2556 | mpc_val_t *mpcf_oct(mpc_val_t *x) { 2557 | int *y = tgc_alloc(&gc, sizeof(int)); 2558 | *y = strtol(x, NULL, 8); 2559 | //free(x); 2560 | return y; 2561 | } 2562 | 2563 | mpc_val_t *mpcf_float(mpc_val_t *x) { 2564 | float *y = tgc_alloc(&gc, sizeof(float)); 2565 | *y = strtod(x, NULL); 2566 | //free(x); 2567 | return y; 2568 | } 2569 | 2570 | mpc_val_t *mpcf_strtriml(mpc_val_t *x) { 2571 | char *s = x; 2572 | while (isspace(*s)) { 2573 | memmove(s, s+1, strlen(s)); 2574 | } 2575 | return s; 2576 | } 2577 | 2578 | mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { 2579 | char *s = x; 2580 | size_t l = strlen(s); 2581 | while (isspace(s[l-1])) { 2582 | s[l-1] = '\0'; l--; 2583 | } 2584 | return s; 2585 | } 2586 | 2587 | mpc_val_t *mpcf_strtrim(mpc_val_t *x) { 2588 | return mpcf_strtriml(mpcf_strtrimr(x)); 2589 | } 2590 | 2591 | static const char mpc_escape_input_c[] = { 2592 | '\a', '\b', '\f', '\n', '\r', 2593 | '\t', '\v', '\\', '\'', '\"', '\0'}; 2594 | 2595 | static const char *mpc_escape_output_c[] = { 2596 | "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", 2597 | "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; 2598 | 2599 | static const char mpc_escape_input_raw_re[] = { '/' }; 2600 | static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; 2601 | 2602 | static const char mpc_escape_input_raw_cstr[] = { '"' }; 2603 | static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; 2604 | 2605 | static const char mpc_escape_input_raw_cchar[] = { '\'' }; 2606 | static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; 2607 | 2608 | static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { 2609 | 2610 | int i; 2611 | int found; 2612 | char buff[2]; 2613 | char *s = x; 2614 | char *y = tgc_calloc(&gc, 1, 1); 2615 | 2616 | while (*s) { 2617 | 2618 | i = 0; 2619 | found = 0; 2620 | 2621 | while (output[i]) { 2622 | if (*s == input[i]) { 2623 | y = tgc_realloc(&gc, y, strlen(y) + strlen(output[i]) + 1); 2624 | strcat(y, output[i]); 2625 | found = 1; 2626 | break; 2627 | } 2628 | i++; 2629 | } 2630 | 2631 | if (!found) { 2632 | y = tgc_realloc(&gc, y, strlen(y) + 2); 2633 | buff[0] = *s; buff[1] = '\0'; 2634 | strcat(y, buff); 2635 | } 2636 | 2637 | s++; 2638 | } 2639 | 2640 | 2641 | return y; 2642 | } 2643 | 2644 | static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { 2645 | 2646 | int i, j; 2647 | int found = 0; 2648 | char buff[2]; 2649 | char *s = x; 2650 | char *y = tgc_calloc(&gc, 1, 1); 2651 | 2652 | while (s[j]) { 2653 | 2654 | i = 0; 2655 | found = 0; 2656 | 2657 | while (output[i]) { 2658 | if (s[j+0] == output[i][0] 2659 | && s[j+1] == output[i][1]) { 2660 | y = tgc_realloc(&gc, y, strlen(y) + 1 + 1); 2661 | buff[0] = input[i]; buff[1] = '\0'; 2662 | strcat(y, buff); 2663 | found = 1; 2664 | j++; 2665 | break; 2666 | } 2667 | i++; 2668 | } 2669 | 2670 | if (!found) { 2671 | y = tgc_realloc(&gc, y, strlen(y) + 1 + 1); 2672 | buff[0] = s[j]; 2673 | buff[1] = '\0'; 2674 | strcat(y, buff); 2675 | } 2676 | 2677 | if (s[j] == '\0') { break; } 2678 | else { j++; } 2679 | } 2680 | 2681 | return y; 2682 | 2683 | } 2684 | 2685 | mpc_val_t *mpcf_escape(mpc_val_t *x) { 2686 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); 2687 | //free(x); 2688 | return y; 2689 | } 2690 | 2691 | mpc_val_t *mpcf_unescape(mpc_val_t *x) { 2692 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); 2693 | //free(x); 2694 | return y; 2695 | } 2696 | 2697 | mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { 2698 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); 2699 | //free(x); 2700 | return y; 2701 | } 2702 | 2703 | mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { 2704 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); 2705 | //free(x); 2706 | return y; 2707 | } 2708 | 2709 | mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { 2710 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); 2711 | //free(x); 2712 | return y; 2713 | } 2714 | 2715 | mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { 2716 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); 2717 | //free(x); 2718 | return y; 2719 | } 2720 | 2721 | mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { 2722 | mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); 2723 | //free(x); 2724 | return y; 2725 | } 2726 | 2727 | mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { 2728 | mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); 2729 | //free(x); 2730 | return y; 2731 | } 2732 | 2733 | mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } 2734 | mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } 2735 | mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } 2736 | mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } 2737 | 2738 | static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { 2739 | int i; 2740 | for (i = 0; i < n; i++) { 2741 | if (i != x) { 2742 | //free(xs[i]); 2743 | } 2744 | } 2745 | return xs[x]; 2746 | } 2747 | 2748 | mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } 2749 | mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } 2750 | mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } 2751 | 2752 | mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { 2753 | int i; 2754 | size_t l = 0; 2755 | 2756 | if (n == 0) { return tgc_calloc(&gc, 1, 1); } 2757 | 2758 | for (i = 0; i < n; i++) { l += strlen(xs[i]); } 2759 | 2760 | xs[0] = tgc_realloc(&gc, xs[0], l + 1); 2761 | 2762 | for (i = 1; i < n; i++) { 2763 | strcat(xs[0], xs[i]); //free(xs[i]); 2764 | } 2765 | 2766 | return xs[0]; 2767 | } 2768 | 2769 | mpc_val_t *mpcf_maths(int n, mpc_val_t **xs) { 2770 | int **vs = (int**)xs; 2771 | (void) n; 2772 | 2773 | if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; } 2774 | if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; } 2775 | if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; } 2776 | if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; } 2777 | if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; } 2778 | 2779 | //free(xs[1]); free(xs[2]); 2780 | 2781 | return xs[0]; 2782 | } 2783 | 2784 | /* 2785 | ** Printing 2786 | */ 2787 | 2788 | static void mpc_print_unretained(mpc_parser_t *p, int force) { 2789 | 2790 | /* TODO: Print Everything Escaped */ 2791 | 2792 | int i; 2793 | char *s, *e; 2794 | char buff[2]; 2795 | 2796 | if (p->retained && !force) {; 2797 | if (p->name) { printf("<%s>", p->name); } 2798 | else { printf(""); } 2799 | return; 2800 | } 2801 | 2802 | if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } 2803 | if (p->type == MPC_TYPE_PASS) { printf("<:>"); } 2804 | if (p->type == MPC_TYPE_FAIL) { printf(""); } 2805 | if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } 2806 | if (p->type == MPC_TYPE_STATE) { printf(""); } 2807 | if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } 2808 | if (p->type == MPC_TYPE_EXPECT) { 2809 | printf("%s", p->data.expect.m); 2810 | /*mpc_print_unretained(p->data.expect.x, 0);*/ 2811 | } 2812 | 2813 | if (p->type == MPC_TYPE_ANY) { printf("<.>"); } 2814 | if (p->type == MPC_TYPE_SATISFY) { printf(""); } 2815 | 2816 | if (p->type == MPC_TYPE_SINGLE) { 2817 | buff[0] = p->data.single.x; buff[1] = '\0'; 2818 | s = mpcf_escape_new( 2819 | buff, 2820 | mpc_escape_input_c, 2821 | mpc_escape_output_c); 2822 | printf("'%s'", s); 2823 | //free(s); 2824 | } 2825 | 2826 | if (p->type == MPC_TYPE_RANGE) { 2827 | buff[0] = p->data.range.x; buff[1] = '\0'; 2828 | s = mpcf_escape_new( 2829 | buff, 2830 | mpc_escape_input_c, 2831 | mpc_escape_output_c); 2832 | buff[0] = p->data.range.y; buff[1] = '\0'; 2833 | e = mpcf_escape_new( 2834 | buff, 2835 | mpc_escape_input_c, 2836 | mpc_escape_output_c); 2837 | printf("[%s-%s]", s, e); 2838 | //free(s); 2839 | //free(e); 2840 | } 2841 | 2842 | if (p->type == MPC_TYPE_ONEOF) { 2843 | s = mpcf_escape_new( 2844 | p->data.string.x, 2845 | mpc_escape_input_c, 2846 | mpc_escape_output_c); 2847 | printf("[%s]", s); 2848 | //free(s); 2849 | } 2850 | 2851 | if (p->type == MPC_TYPE_NONEOF) { 2852 | s = mpcf_escape_new( 2853 | p->data.string.x, 2854 | mpc_escape_input_c, 2855 | mpc_escape_output_c); 2856 | printf("[^%s]", s); 2857 | //free(s); 2858 | } 2859 | 2860 | if (p->type == MPC_TYPE_STRING) { 2861 | s = mpcf_escape_new( 2862 | p->data.string.x, 2863 | mpc_escape_input_c, 2864 | mpc_escape_output_c); 2865 | printf("\"%s\"", s); 2866 | //free(s); 2867 | } 2868 | 2869 | if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } 2870 | if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } 2871 | if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } 2872 | 2873 | if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } 2874 | if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } 2875 | 2876 | if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } 2877 | if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } 2878 | if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } 2879 | 2880 | if (p->type == MPC_TYPE_OR) { 2881 | printf("("); 2882 | for(i = 0; i < p->data.or.n-1; i++) { 2883 | mpc_print_unretained(p->data.or.xs[i], 0); 2884 | printf(" | "); 2885 | } 2886 | mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); 2887 | printf(")"); 2888 | } 2889 | 2890 | if (p->type == MPC_TYPE_AND) { 2891 | printf("("); 2892 | for(i = 0; i < p->data.and.n-1; i++) { 2893 | mpc_print_unretained(p->data.and.xs[i], 0); 2894 | printf(" "); 2895 | } 2896 | mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); 2897 | printf(")"); 2898 | } 2899 | 2900 | } 2901 | 2902 | void mpc_print(mpc_parser_t *p) { 2903 | mpc_print_unretained(p, 1); 2904 | printf("\n"); 2905 | } 2906 | 2907 | /* 2908 | ** Testing 2909 | */ 2910 | 2911 | /* 2912 | ** These functions are slightly unwieldy and 2913 | ** also the whole of the testing suite for mpc 2914 | ** mpc is pretty shaky. 2915 | ** 2916 | ** It could do with a lot more tests and more 2917 | ** precision. Currently I am only really testing 2918 | ** changes off of the examples. 2919 | ** 2920 | */ 2921 | 2922 | int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, 2923 | int(*tester)(const void*, const void*), 2924 | mpc_dtor_t destructor, 2925 | void(*printer)(const void*)) { 2926 | mpc_result_t r; 2927 | (void) printer; 2928 | if (mpc_parse("", s, p, &r)) { 2929 | 2930 | if (tester(r.output, d)) { 2931 | destructor(r.output); 2932 | return 0; 2933 | } else { 2934 | destructor(r.output); 2935 | return 1; 2936 | } 2937 | 2938 | } else { 2939 | mpc_err_delete(r.error); 2940 | return 1; 2941 | } 2942 | 2943 | } 2944 | 2945 | int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, 2946 | int(*tester)(const void*, const void*), 2947 | mpc_dtor_t destructor, 2948 | void(*printer)(const void*)) { 2949 | 2950 | mpc_result_t r; 2951 | if (mpc_parse("", s, p, &r)) { 2952 | 2953 | if (tester(r.output, d)) { 2954 | destructor(r.output); 2955 | return 1; 2956 | } else { 2957 | printf("Got "); printer(r.output); printf("\n"); 2958 | printf("Expected "); printer(d); printf("\n"); 2959 | destructor(r.output); 2960 | return 0; 2961 | } 2962 | 2963 | } else { 2964 | mpc_err_print(r.error); 2965 | mpc_err_delete(r.error); 2966 | return 0; 2967 | 2968 | } 2969 | 2970 | } 2971 | 2972 | 2973 | /* 2974 | ** AST 2975 | */ 2976 | 2977 | void mpc_ast_delete(mpc_ast_t *a) { 2978 | 2979 | int i; 2980 | 2981 | if (a == NULL) { return; } 2982 | 2983 | for (i = 0; i < a->children_num; i++) { 2984 | mpc_ast_delete(a->children[i]); 2985 | } 2986 | 2987 | //free(a->children); 2988 | //free(a->tag); 2989 | //free(a->contents); 2990 | //free(a); 2991 | 2992 | } 2993 | 2994 | static void mpc_ast_delete_no_children(mpc_ast_t *a) { 2995 | //free(a->children); 2996 | //free(a->tag); 2997 | //free(a->contents); 2998 | //free(a); 2999 | } 3000 | 3001 | mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { 3002 | 3003 | mpc_ast_t *a = tgc_alloc(&gc, sizeof(mpc_ast_t)); 3004 | 3005 | a->tag = tgc_alloc(&gc, strlen(tag) + 1); 3006 | strcpy(a->tag, tag); 3007 | 3008 | a->contents = tgc_alloc(&gc, strlen(contents) + 1); 3009 | strcpy(a->contents, contents); 3010 | 3011 | a->state = mpc_state_new(); 3012 | 3013 | a->children_num = 0; 3014 | a->children = NULL; 3015 | return a; 3016 | 3017 | } 3018 | 3019 | mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { 3020 | 3021 | mpc_ast_t *a = mpc_ast_new(tag, ""); 3022 | 3023 | int i; 3024 | va_list va; 3025 | va_start(va, tag); 3026 | 3027 | for (i = 0; i < n; i++) { 3028 | mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); 3029 | } 3030 | 3031 | va_end(va); 3032 | 3033 | return a; 3034 | 3035 | } 3036 | 3037 | mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { 3038 | 3039 | mpc_ast_t *r; 3040 | 3041 | if (a == NULL) { return a; } 3042 | if (a->children_num == 0) { return a; } 3043 | if (a->children_num == 1) { return a; } 3044 | 3045 | r = mpc_ast_new(">", ""); 3046 | mpc_ast_add_child(r, a); 3047 | return r; 3048 | } 3049 | 3050 | int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { 3051 | 3052 | int i; 3053 | 3054 | if (strcmp(a->tag, b->tag) != 0) { return 0; } 3055 | if (strcmp(a->contents, b->contents) != 0) { return 0; } 3056 | if (a->children_num != b->children_num) { return 0; } 3057 | 3058 | for (i = 0; i < a->children_num; i++) { 3059 | if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } 3060 | } 3061 | 3062 | return 1; 3063 | } 3064 | 3065 | mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { 3066 | r->children_num++; 3067 | r->children = tgc_realloc(&gc, r->children, sizeof(mpc_ast_t*) * r->children_num); 3068 | r->children[r->children_num-1] = a; 3069 | return r; 3070 | } 3071 | 3072 | mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { 3073 | if (a == NULL) { return a; } 3074 | a->tag = tgc_realloc(&gc, a->tag, strlen(t) + 1 + strlen(a->tag) + 1); 3075 | memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); 3076 | memmove(a->tag, t, strlen(t)); 3077 | memmove(a->tag + strlen(t), "|", 1); 3078 | return a; 3079 | } 3080 | 3081 | mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { 3082 | a->tag = tgc_realloc(&gc, a->tag, strlen(t) + 1); 3083 | strcpy(a->tag, t); 3084 | return a; 3085 | } 3086 | 3087 | mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { 3088 | if (a == NULL) { return a; } 3089 | a->state = s; 3090 | return a; 3091 | } 3092 | 3093 | static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { 3094 | 3095 | int i; 3096 | 3097 | if (a == NULL) { 3098 | fprintf(fp, "NULL\n"); 3099 | return; 3100 | } 3101 | 3102 | for (i = 0; i < d; i++) { fprintf(fp, " "); } 3103 | 3104 | if (strlen(a->contents)) { 3105 | fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, 3106 | (long unsigned int)(a->state.row+1), 3107 | (long unsigned int)(a->state.col+1), 3108 | a->contents); 3109 | } else { 3110 | fprintf(fp, "%s \n", a->tag); 3111 | } 3112 | 3113 | for (i = 0; i < a->children_num; i++) { 3114 | mpc_ast_print_depth(a->children[i], d+1, fp); 3115 | } 3116 | 3117 | } 3118 | 3119 | void mpc_ast_print(mpc_ast_t *a) { 3120 | mpc_ast_print_depth(a, 0, stdout); 3121 | } 3122 | 3123 | void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { 3124 | mpc_ast_print_depth(a, 0, fp); 3125 | } 3126 | 3127 | mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { 3128 | 3129 | int i, j; 3130 | mpc_ast_t** as = (mpc_ast_t**)xs; 3131 | mpc_ast_t *r; 3132 | 3133 | if (n == 0) { return NULL; } 3134 | if (n == 1) { return xs[0]; } 3135 | if (n == 2 && xs[1] == NULL) { return xs[0]; } 3136 | if (n == 2 && xs[0] == NULL) { return xs[1]; } 3137 | 3138 | r = mpc_ast_new(">", ""); 3139 | 3140 | for (i = 0; i < n; i++) { 3141 | 3142 | if (as[i] == NULL) { continue; } 3143 | 3144 | if (as[i] && as[i]->children_num > 0) { 3145 | 3146 | for (j = 0; j < as[i]->children_num; j++) { 3147 | mpc_ast_add_child(r, as[i]->children[j]); 3148 | } 3149 | 3150 | mpc_ast_delete_no_children(as[i]); 3151 | 3152 | } else if (as[i] && as[i]->children_num == 0) { 3153 | mpc_ast_add_child(r, as[i]); 3154 | } 3155 | 3156 | } 3157 | 3158 | if (r->children_num) { 3159 | r->state = r->children[0]->state; 3160 | } 3161 | 3162 | return r; 3163 | } 3164 | 3165 | mpc_val_t *mpcf_str_ast(mpc_val_t *c) { 3166 | mpc_ast_t *a = mpc_ast_new("", c); 3167 | //free(c); 3168 | return a; 3169 | } 3170 | 3171 | mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { 3172 | mpc_state_t *s = ((mpc_state_t**)xs)[0]; 3173 | mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; 3174 | (void)n; 3175 | a = mpc_ast_state(a, *s); 3176 | //free(s); 3177 | return a; 3178 | } 3179 | 3180 | mpc_parser_t *mpca_state(mpc_parser_t *a) { 3181 | return mpc_and(2, mpcf_state_ast, mpc_state(), a, nothing); 3182 | } 3183 | 3184 | mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { 3185 | return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); 3186 | } 3187 | 3188 | mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { 3189 | return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); 3190 | } 3191 | 3192 | mpc_parser_t *mpca_root(mpc_parser_t *a) { 3193 | return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); 3194 | } 3195 | 3196 | mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } 3197 | mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } 3198 | mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } 3199 | mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } 3200 | mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } 3201 | 3202 | mpc_parser_t *mpca_or(int n, ...) { 3203 | 3204 | int i; 3205 | va_list va; 3206 | 3207 | mpc_parser_t *p = mpc_undefined(); 3208 | 3209 | p->type = MPC_TYPE_OR; 3210 | p->data.or.n = n; 3211 | p->data.or.xs = tgc_alloc(&gc, sizeof(mpc_parser_t*) * n); 3212 | 3213 | va_start(va, n); 3214 | for (i = 0; i < n; i++) { 3215 | p->data.or.xs[i] = va_arg(va, mpc_parser_t*); 3216 | } 3217 | va_end(va); 3218 | 3219 | return p; 3220 | 3221 | } 3222 | 3223 | mpc_parser_t *mpca_and(int n, ...) { 3224 | 3225 | int i; 3226 | va_list va; 3227 | 3228 | mpc_parser_t *p = mpc_undefined(); 3229 | 3230 | p->type = MPC_TYPE_AND; 3231 | p->data.and.n = n; 3232 | p->data.and.f = mpcf_fold_ast; 3233 | p->data.and.xs = tgc_alloc(&gc, sizeof(mpc_parser_t*) * n); 3234 | p->data.and.dxs = tgc_alloc(&gc, sizeof(mpc_dtor_t) * (n-1)); 3235 | 3236 | va_start(va, n); 3237 | for (i = 0; i < n; i++) { 3238 | p->data.and.xs[i] = va_arg(va, mpc_parser_t*); 3239 | } 3240 | for (i = 0; i < (n-1); i++) { 3241 | p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; 3242 | } 3243 | va_end(va); 3244 | 3245 | return p; 3246 | } 3247 | 3248 | mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } 3249 | 3250 | /* 3251 | ** Grammar Parser 3252 | */ 3253 | 3254 | /* 3255 | ** This is another interesting bootstrapping. 3256 | ** 3257 | ** Having a general purpose AST type allows 3258 | ** users to specify the grammar alone and 3259 | ** let all fold rules be automatically taken 3260 | ** care of by existing functions. 3261 | ** 3262 | ** You don't get to control the type spat 3263 | ** out but this means you can make a nice 3264 | ** parser to take in some grammar in nice 3265 | ** syntax and spit out a parser that works. 3266 | ** 3267 | ** The grammar for this looks surprisingly 3268 | ** like regex but the main difference is that 3269 | ** it is now whitespace insensitive and the 3270 | ** base type takes literals of some form. 3271 | */ 3272 | 3273 | /* 3274 | ** 3275 | ** ### Grammar Grammar 3276 | ** 3277 | ** : ( "|" ) | 3278 | ** 3279 | ** : * 3280 | ** 3281 | ** : 3282 | ** | "*" 3283 | ** | "+" 3284 | ** | "?" 3285 | ** | "{" "}" 3286 | ** 3287 | ** : "<" ( | ) ">" 3288 | ** | 3289 | ** | 3290 | ** | 3291 | ** | "(" ")" 3292 | */ 3293 | 3294 | typedef struct { 3295 | va_list *va; 3296 | int parsers_num; 3297 | mpc_parser_t **parsers; 3298 | int flags; 3299 | } mpca_grammar_st_t; 3300 | 3301 | static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { 3302 | (void) n; 3303 | if (xs[1] == NULL) { return xs[0]; } 3304 | else { return mpca_or(2, xs[0], xs[1]); } 3305 | } 3306 | 3307 | static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { 3308 | int i; 3309 | mpc_parser_t *p = mpc_pass(); 3310 | for (i = 0; i < n; i++) { 3311 | if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } 3312 | } 3313 | return p; 3314 | } 3315 | 3316 | static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { 3317 | int num; 3318 | (void) n; 3319 | if (xs[1] == NULL) { return xs[0]; } 3320 | if (strcmp(xs[1], "*") == 0) { //free(xs[1]); 3321 | return mpca_many(xs[0]); } 3322 | if (strcmp(xs[1], "+") == 0) { //free(xs[1]); 3323 | return mpca_many1(xs[0]); } 3324 | if (strcmp(xs[1], "?") == 0) { //free(xs[1]); 3325 | return mpca_maybe(xs[0]); } 3326 | if (strcmp(xs[1], "!") == 0) { //free(xs[1]); 3327 | return mpca_not(xs[0]); } 3328 | num = *((int*)xs[1]); 3329 | //free(xs[1]); 3330 | return mpca_count(num, xs[0]); 3331 | } 3332 | 3333 | static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { 3334 | mpca_grammar_st_t *st = s; 3335 | char *y = mpcf_unescape(x); 3336 | mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); 3337 | //free(y); 3338 | return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); 3339 | } 3340 | 3341 | static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { 3342 | mpca_grammar_st_t *st = s; 3343 | char *y = mpcf_unescape(x); 3344 | mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); 3345 | //free(y); 3346 | return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); 3347 | } 3348 | 3349 | static mpc_val_t *mpcaf_grammar_regex(mpc_val_t *x, void *s) { 3350 | mpca_grammar_st_t *st = s; 3351 | char *y = mpcf_unescape_regex(x); 3352 | mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re(y) : mpc_tok(mpc_re(y)); 3353 | //free(y); 3354 | return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); 3355 | } 3356 | 3357 | /* Should this just use `isdigit` instead? */ 3358 | static int is_number(const char* s) { 3359 | size_t i; 3360 | for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } 3361 | return 1; 3362 | } 3363 | 3364 | static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { 3365 | 3366 | int i; 3367 | mpc_parser_t *p; 3368 | 3369 | /* Case of Number */ 3370 | if (is_number(x)) { 3371 | 3372 | i = strtol(x, NULL, 10); 3373 | 3374 | while (st->parsers_num <= i) { 3375 | st->parsers_num++; 3376 | st->parsers = tgc_realloc(&gc, st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); 3377 | st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); 3378 | if (st->parsers[st->parsers_num-1] == NULL) { 3379 | return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); 3380 | } 3381 | } 3382 | 3383 | return st->parsers[st->parsers_num-1]; 3384 | 3385 | /* Case of Identifier */ 3386 | } else { 3387 | 3388 | /* Search Existing Parsers */ 3389 | for (i = 0; i < st->parsers_num; i++) { 3390 | mpc_parser_t *q = st->parsers[i]; 3391 | if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } 3392 | if (q->name && strcmp(q->name, x) == 0) { return q; } 3393 | } 3394 | 3395 | /* Search New Parsers */ 3396 | while (1) { 3397 | 3398 | p = va_arg(*st->va, mpc_parser_t*); 3399 | 3400 | st->parsers_num++; 3401 | st->parsers = tgc_realloc(&gc, st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); 3402 | st->parsers[st->parsers_num-1] = p; 3403 | 3404 | if (p == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } 3405 | if (p->name 3406 | && strcmp(p->name, x) == 0) { return p; } 3407 | 3408 | } 3409 | 3410 | } 3411 | 3412 | } 3413 | 3414 | static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { 3415 | 3416 | mpca_grammar_st_t *st = s; 3417 | mpc_parser_t *p = mpca_grammar_find_parser(x, st); 3418 | //free(x); 3419 | 3420 | if (p->name) { 3421 | return mpca_state(mpca_root(mpca_add_tag(p, p->name))); 3422 | } else { 3423 | return mpca_state(mpca_root(p)); 3424 | } 3425 | } 3426 | 3427 | mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { 3428 | 3429 | char *err_msg; 3430 | mpc_parser_t *err_out; 3431 | mpc_result_t r; 3432 | mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; 3433 | 3434 | GrammarTotal = mpc_new("grammar_total"); 3435 | Grammar = mpc_new("grammar"); 3436 | Term = mpc_new("term"); 3437 | Factor = mpc_new("factor"); 3438 | Base = mpc_new("base"); 3439 | 3440 | mpc_define(GrammarTotal, 3441 | mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) 3442 | ); 3443 | 3444 | mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, 3445 | Term, 3446 | mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, nothing)), 3447 | mpc_soft_delete 3448 | )); 3449 | 3450 | mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); 3451 | 3452 | mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, 3453 | Base, 3454 | mpc_or(6, 3455 | mpc_sym("*"), 3456 | mpc_sym("+"), 3457 | mpc_sym("?"), 3458 | mpc_sym("!"), 3459 | mpc_tok_brackets(mpc_int(), nothing), 3460 | mpc_pass()), 3461 | mpc_soft_delete 3462 | )); 3463 | 3464 | mpc_define(Base, mpc_or(5, 3465 | mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), 3466 | mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), 3467 | mpc_apply_to(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex, st), 3468 | mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), nothing), mpcaf_grammar_id, st), 3469 | mpc_tok_parens(Grammar, mpc_soft_delete) 3470 | )); 3471 | 3472 | mpc_optimise(GrammarTotal); 3473 | mpc_optimise(Grammar); 3474 | mpc_optimise(Factor); 3475 | mpc_optimise(Term); 3476 | mpc_optimise(Base); 3477 | 3478 | if(!mpc_parse("", grammar, GrammarTotal, &r)) { 3479 | err_msg = mpc_err_string(r.error); 3480 | err_out = mpc_failf("Invalid Grammar: %s", err_msg); 3481 | mpc_err_delete(r.error); 3482 | //free(err_msg); 3483 | r.output = err_out; 3484 | } 3485 | 3486 | mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); 3487 | 3488 | mpc_optimise(r.output); 3489 | 3490 | return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; 3491 | 3492 | } 3493 | 3494 | mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { 3495 | mpca_grammar_st_t st; 3496 | mpc_parser_t *res; 3497 | va_list va; 3498 | va_start(va, grammar); 3499 | 3500 | st.va = &va; 3501 | st.parsers_num = 0; 3502 | st.parsers = NULL; 3503 | st.flags = flags; 3504 | 3505 | res = mpca_grammar_st(grammar, &st); 3506 | //free(st.parsers); 3507 | va_end(va); 3508 | return res; 3509 | } 3510 | 3511 | typedef struct { 3512 | char *ident; 3513 | char *name; 3514 | mpc_parser_t *grammar; 3515 | } mpca_stmt_t; 3516 | 3517 | static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { 3518 | mpca_stmt_t *stmt = tgc_alloc(&gc, sizeof(mpca_stmt_t)); 3519 | stmt->ident = ((char**)xs)[0]; 3520 | stmt->name = ((char**)xs)[1]; 3521 | stmt->grammar = ((mpc_parser_t**)xs)[3]; 3522 | (void) n; 3523 | //free(((char**)xs)[2]); 3524 | //free(((char**)xs)[4]); 3525 | 3526 | return stmt; 3527 | } 3528 | 3529 | static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { 3530 | 3531 | int i; 3532 | mpca_stmt_t **stmts = tgc_alloc(&gc, sizeof(mpca_stmt_t*) * (n+1)); 3533 | 3534 | for (i = 0; i < n; i++) { 3535 | stmts[i] = xs[i]; 3536 | } 3537 | stmts[n] = NULL; 3538 | 3539 | return stmts; 3540 | } 3541 | 3542 | static void mpca_stmt_list_delete(mpc_val_t *x) { 3543 | 3544 | mpca_stmt_t **stmts = x; 3545 | 3546 | while(*stmts) { 3547 | mpca_stmt_t *stmt = *stmts; 3548 | //free(stmt->ident); 3549 | //free(stmt->name); 3550 | mpc_soft_delete(stmt->grammar); 3551 | //free(stmt); 3552 | stmts++; 3553 | } 3554 | //free(x); 3555 | 3556 | } 3557 | 3558 | static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { 3559 | 3560 | mpca_grammar_st_t *st = s; 3561 | mpca_stmt_t *stmt; 3562 | mpca_stmt_t **stmts = x; 3563 | mpc_parser_t *left; 3564 | 3565 | while(*stmts) { 3566 | stmt = *stmts; 3567 | left = mpca_grammar_find_parser(stmt->ident, st); 3568 | if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } 3569 | if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } 3570 | mpc_optimise(stmt->grammar); 3571 | mpc_define(left, stmt->grammar); 3572 | //free(stmt->ident); 3573 | //free(stmt->name); 3574 | //free(stmt); 3575 | stmts++; 3576 | } 3577 | 3578 | //free(x); 3579 | 3580 | return NULL; 3581 | } 3582 | 3583 | static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { 3584 | 3585 | mpc_result_t r; 3586 | mpc_err_t *e; 3587 | mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; 3588 | 3589 | Lang = mpc_new("lang"); 3590 | Stmt = mpc_new("stmt"); 3591 | Grammar = mpc_new("grammar"); 3592 | Term = mpc_new("term"); 3593 | Factor = mpc_new("factor"); 3594 | Base = mpc_new("base"); 3595 | 3596 | mpc_define(Lang, mpc_apply_to( 3597 | mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), 3598 | mpca_stmt_list_apply_to, st 3599 | )); 3600 | 3601 | mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, 3602 | mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), 3603 | nothing, nothing, nothing, mpc_soft_delete 3604 | )); 3605 | 3606 | mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, 3607 | Term, 3608 | mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, nothing)), 3609 | mpc_soft_delete 3610 | )); 3611 | 3612 | mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); 3613 | 3614 | mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, 3615 | Base, 3616 | mpc_or(6, 3617 | mpc_sym("*"), 3618 | mpc_sym("+"), 3619 | mpc_sym("?"), 3620 | mpc_sym("!"), 3621 | mpc_tok_brackets(mpc_int(), nothing), 3622 | mpc_pass()), 3623 | mpc_soft_delete 3624 | )); 3625 | 3626 | mpc_define(Base, mpc_or(5, 3627 | mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), 3628 | mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), 3629 | mpc_apply_to(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex, st), 3630 | mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), nothing), mpcaf_grammar_id, st), 3631 | mpc_tok_parens(Grammar, mpc_soft_delete) 3632 | )); 3633 | 3634 | mpc_optimise(Lang); 3635 | mpc_optimise(Stmt); 3636 | mpc_optimise(Grammar); 3637 | mpc_optimise(Term); 3638 | mpc_optimise(Factor); 3639 | mpc_optimise(Base); 3640 | 3641 | if (!mpc_parse_input(i, Lang, &r)) { 3642 | e = r.error; 3643 | } else { 3644 | e = NULL; 3645 | } 3646 | 3647 | mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); 3648 | 3649 | return e; 3650 | } 3651 | 3652 | mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { 3653 | mpca_grammar_st_t st; 3654 | mpc_input_t *i; 3655 | mpc_err_t *err; 3656 | 3657 | va_list va; 3658 | va_start(va, f); 3659 | 3660 | st.va = &va; 3661 | st.parsers_num = 0; 3662 | st.parsers = NULL; 3663 | st.flags = flags; 3664 | 3665 | i = mpc_input_new_file("", f); 3666 | err = mpca_lang_st(i, &st); 3667 | mpc_input_delete(i); 3668 | 3669 | //free(st.parsers); 3670 | va_end(va); 3671 | return err; 3672 | } 3673 | 3674 | mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { 3675 | mpca_grammar_st_t st; 3676 | mpc_input_t *i; 3677 | mpc_err_t *err; 3678 | 3679 | va_list va; 3680 | va_start(va, p); 3681 | 3682 | st.va = &va; 3683 | st.parsers_num = 0; 3684 | st.parsers = NULL; 3685 | st.flags = flags; 3686 | 3687 | i = mpc_input_new_pipe("", p); 3688 | err = mpca_lang_st(i, &st); 3689 | mpc_input_delete(i); 3690 | 3691 | //free(st.parsers); 3692 | va_end(va); 3693 | return err; 3694 | } 3695 | 3696 | mpc_err_t *mpca_lang(int flags, const char *language, ...) { 3697 | 3698 | mpca_grammar_st_t st; 3699 | mpc_input_t *i; 3700 | mpc_err_t *err; 3701 | 3702 | va_list va; 3703 | va_start(va, language); 3704 | 3705 | st.va = &va; 3706 | st.parsers_num = 0; 3707 | st.parsers = NULL; 3708 | st.flags = flags; 3709 | 3710 | i = mpc_input_new_string("", language); 3711 | err = mpca_lang_st(i, &st); 3712 | mpc_input_delete(i); 3713 | 3714 | //free(st.parsers); 3715 | va_end(va); 3716 | return err; 3717 | } 3718 | 3719 | mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { 3720 | 3721 | mpca_grammar_st_t st; 3722 | mpc_input_t *i; 3723 | mpc_err_t *err; 3724 | 3725 | va_list va; 3726 | 3727 | FILE *f = fopen(filename, "rb"); 3728 | 3729 | if (f == NULL) { 3730 | err = mpc_err_file(filename, "Unable to open file!"); 3731 | return err; 3732 | } 3733 | 3734 | va_start(va, filename); 3735 | 3736 | st.va = &va; 3737 | st.parsers_num = 0; 3738 | st.parsers = NULL; 3739 | st.flags = flags; 3740 | 3741 | i = mpc_input_new_file(filename, f); 3742 | err = mpca_lang_st(i, &st); 3743 | mpc_input_delete(i); 3744 | 3745 | //free(st.parsers); 3746 | va_end(va); 3747 | 3748 | fclose(f); 3749 | 3750 | return err; 3751 | } 3752 | 3753 | static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { 3754 | 3755 | int i, total; 3756 | 3757 | if (p->retained && !force) { return 0; } 3758 | 3759 | if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } 3760 | 3761 | if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } 3762 | if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } 3763 | if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } 3764 | 3765 | if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } 3766 | if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } 3767 | 3768 | if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3769 | if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3770 | if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3771 | 3772 | if (p->type == MPC_TYPE_OR) { 3773 | total = 0; 3774 | for(i = 0; i < p->data.or.n; i++) { 3775 | total += mpc_nodecount_unretained(p->data.or.xs[i], 0); 3776 | } 3777 | return total; 3778 | } 3779 | 3780 | if (p->type == MPC_TYPE_AND) { 3781 | total = 0; 3782 | for(i = 0; i < p->data.and.n; i++) { 3783 | total += mpc_nodecount_unretained(p->data.and.xs[i], 0); 3784 | } 3785 | return total; 3786 | } 3787 | 3788 | return 1; 3789 | 3790 | } 3791 | 3792 | void mpc_stats(mpc_parser_t* p) { 3793 | printf("Stats\n"); 3794 | printf("=====\n"); 3795 | printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); 3796 | } 3797 | 3798 | static void mpc_optimise_unretained(mpc_parser_t *p, int force) { 3799 | 3800 | int i, n, m; 3801 | mpc_parser_t *t; 3802 | 3803 | if (p->retained && !force) { return; } 3804 | 3805 | /* Optimise Subexpressions */ 3806 | 3807 | if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } 3808 | if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } 3809 | if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } 3810 | if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } 3811 | if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } 3812 | if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } 3813 | if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3814 | if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3815 | if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3816 | 3817 | if (p->type == MPC_TYPE_OR) { 3818 | for(i = 0; i < p->data.or.n; i++) { 3819 | mpc_optimise_unretained(p->data.or.xs[i], 0); 3820 | } 3821 | } 3822 | 3823 | if (p->type == MPC_TYPE_AND) { 3824 | for(i = 0; i < p->data.and.n; i++) { 3825 | mpc_optimise_unretained(p->data.and.xs[i], 0); 3826 | } 3827 | } 3828 | 3829 | /* Perform optimisations */ 3830 | 3831 | while (1) { 3832 | 3833 | /* Merge rhs `or` */ 3834 | if (p->type == MPC_TYPE_OR 3835 | && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR 3836 | && !p->data.or.xs[p->data.or.n-1]->retained) { 3837 | t = p->data.or.xs[p->data.or.n-1]; 3838 | n = p->data.or.n; m = t->data.or.n; 3839 | p->data.or.n = n + m - 1; 3840 | p->data.or.xs = tgc_realloc(&gc, p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3841 | memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); 3842 | //free(t->data.or.xs); free(t->name); free(t); 3843 | continue; 3844 | } 3845 | 3846 | /* Merge lhs `or` */ 3847 | if (p->type == MPC_TYPE_OR 3848 | && p->data.or.xs[0]->type == MPC_TYPE_OR 3849 | && !p->data.or.xs[0]->retained) { 3850 | t = p->data.or.xs[0]; 3851 | n = p->data.or.n; m = t->data.or.n; 3852 | p->data.or.n = n + m - 1; 3853 | p->data.or.xs = tgc_realloc(&gc, p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3854 | memmove(p->data.or.xs + m, t->data.or.xs + 1, n * sizeof(mpc_parser_t*)); 3855 | memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); 3856 | //free(t->data.or.xs); free(t->name); free(t); 3857 | continue; 3858 | } 3859 | 3860 | /* Remove ast `pass` */ 3861 | if (p->type == MPC_TYPE_AND 3862 | && p->data.and.n == 2 3863 | && p->data.and.xs[0]->type == MPC_TYPE_PASS 3864 | && !p->data.and.xs[0]->retained 3865 | && p->data.and.f == mpcf_fold_ast) { 3866 | t = p->data.and.xs[1]; 3867 | mpc_delete(p->data.and.xs[0]); 3868 | //free(p->data.and.xs); free(p->data.and.dxs); free(p->name); 3869 | memcpy(p, t, sizeof(mpc_parser_t)); 3870 | //free(t); 3871 | continue; 3872 | } 3873 | 3874 | /* Merge ast lhs `and` */ 3875 | if (p->type == MPC_TYPE_AND 3876 | && p->data.and.f == mpcf_fold_ast 3877 | && p->data.and.xs[0]->type == MPC_TYPE_AND 3878 | && !p->data.and.xs[0]->retained 3879 | && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { 3880 | t = p->data.and.xs[0]; 3881 | n = p->data.and.n; m = t->data.and.n; 3882 | p->data.and.n = n + m - 1; 3883 | p->data.and.xs = tgc_realloc(&gc, p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); 3884 | p->data.and.dxs = tgc_realloc(&gc, p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3885 | memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 3886 | memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3887 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } 3888 | //free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3889 | continue; 3890 | } 3891 | 3892 | /* Merge ast rhs `and` */ 3893 | if (p->type == MPC_TYPE_AND 3894 | && p->data.and.f == mpcf_fold_ast 3895 | && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND 3896 | && !p->data.and.xs[p->data.and.n-1]->retained 3897 | && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { 3898 | t = p->data.and.xs[p->data.and.n-1]; 3899 | n = p->data.and.n; m = t->data.and.n; 3900 | p->data.and.n = n + m - 1; 3901 | p->data.and.xs = tgc_realloc(&gc, p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3902 | p->data.and.dxs = tgc_realloc(&gc, p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3903 | memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3904 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } 3905 | //free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3906 | continue; 3907 | } 3908 | 3909 | /* Remove re `lift` */ 3910 | if (p->type == MPC_TYPE_AND 3911 | && p->data.and.n == 2 3912 | && p->data.and.xs[0]->type == MPC_TYPE_LIFT 3913 | && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str 3914 | && !p->data.and.xs[0]->retained 3915 | && p->data.and.f == mpcf_strfold) { 3916 | t = p->data.and.xs[1]; 3917 | mpc_delete(p->data.and.xs[0]); 3918 | //free(p->data.and.xs); free(p->data.and.dxs); free(p->name); 3919 | memcpy(p, t, sizeof(mpc_parser_t)); 3920 | //free(t); 3921 | continue; 3922 | } 3923 | 3924 | /* Merge re lhs `and` */ 3925 | if (p->type == MPC_TYPE_AND 3926 | && p->data.and.f == mpcf_strfold 3927 | && p->data.and.xs[0]->type == MPC_TYPE_AND 3928 | && !p->data.and.xs[0]->retained 3929 | && p->data.and.xs[0]->data.and.f == mpcf_strfold) { 3930 | t = p->data.and.xs[0]; 3931 | n = p->data.and.n; m = t->data.and.n; 3932 | p->data.and.n = n + m - 1; 3933 | p->data.and.xs = tgc_realloc(&gc, p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); 3934 | p->data.and.dxs = tgc_realloc(&gc, p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3935 | memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 3936 | memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3937 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = nothing; } 3938 | //free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3939 | continue; 3940 | } 3941 | 3942 | /* Merge re rhs `and` */ 3943 | if (p->type == MPC_TYPE_AND 3944 | && p->data.and.f == mpcf_strfold 3945 | && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND 3946 | && !p->data.and.xs[p->data.and.n-1]->retained 3947 | && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { 3948 | t = p->data.and.xs[p->data.and.n-1]; 3949 | n = p->data.and.n; m = t->data.and.n; 3950 | p->data.and.n = n + m - 1; 3951 | p->data.and.xs = tgc_realloc(&gc, p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3952 | p->data.and.dxs = tgc_realloc(&gc, p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3953 | memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3954 | for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = nothing; } 3955 | //free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3956 | continue; 3957 | } 3958 | 3959 | return; 3960 | 3961 | } 3962 | 3963 | } 3964 | 3965 | void mpc_optimise(mpc_parser_t *p) { 3966 | mpc_optimise_unretained(p, 1); 3967 | } 3968 | 3969 | static int main_mpc(int argc, char **argv) { 3970 | 3971 | int i; 3972 | mpc_result_t r; 3973 | 3974 | mpc_parser_t* Number = mpc_new("number"); 3975 | mpc_parser_t* Symbol = mpc_new("symbol"); 3976 | mpc_parser_t* String = mpc_new("string"); 3977 | mpc_parser_t* Comment = mpc_new("comment"); 3978 | mpc_parser_t* Sexpr = mpc_new("sexpr"); 3979 | mpc_parser_t* Qexpr = mpc_new("qexpr"); 3980 | mpc_parser_t* Expr = mpc_new("expr"); 3981 | mpc_parser_t* Lispy = mpc_new("lispy"); 3982 | 3983 | mpca_lang(MPCA_LANG_PREDICTIVE, 3984 | " number \"number\" : /[0-9]+/ ; " 3985 | " symbol \"symbol\" : /[a-zA-Z0-9_+\\-*\\/\\\\=<>!&]+/ ; " 3986 | " string \"string\" : /\"(\\\\.|[^\"])*\"/ ; " 3987 | " comment : /;[^\\r\\n]*/ ; " 3988 | " sexpr : '(' * ')' ; " 3989 | " qexpr : '{' * '}' ; " 3990 | " expr : | | " 3991 | " | | | ; " 3992 | " lispy : /^/ * /$/ ; ", 3993 | Number, Symbol, String, Comment, Sexpr, Qexpr, Expr, Lispy, NULL); 3994 | 3995 | if (argc > 1) { 3996 | 3997 | if (mpc_parse_contents(argv[1], Lispy, &r)) { 3998 | /*mpc_ast_print(r.output);*/ 3999 | mpc_ast_delete(r.output); 4000 | } else { 4001 | mpc_err_print(r.error); 4002 | mpc_err_delete(r.error); 4003 | } 4004 | 4005 | } else { 4006 | 4007 | if (mpc_parse_pipe("", stdin, Lispy, &r)) { 4008 | mpc_ast_print(r.output); 4009 | mpc_ast_delete(r.output); 4010 | } else { 4011 | mpc_err_print(r.error); 4012 | mpc_err_delete(r.error); 4013 | } 4014 | 4015 | } 4016 | 4017 | mpc_cleanup(8, Number, Symbol, String, Comment, Sexpr, Qexpr, Expr, Lispy); 4018 | 4019 | return 0; 4020 | 4021 | } 4022 | 4023 | int main(int argc, char **argv) { 4024 | int (*volatile func)(int,char**) = main_mpc; 4025 | tgc_start(&gc, &argc); 4026 | int result = func(argc, argv); 4027 | tgc_stop(&gc); 4028 | return result; 4029 | } 4030 | 4031 | 4032 | -------------------------------------------------------------------------------- /examples/prelude.lspy: -------------------------------------------------------------------------------- 1 | ;;; 2 | ;;; Lispy Standard Prelude 3 | ;;; 4 | 5 | ;;; Atoms 6 | (def {nil} {}) 7 | (def {true} 1) 8 | (def {false} 0) 9 | 10 | ;;; Functional Functions 11 | 12 | ; Function Definitions 13 | (def {fun} (\ {f b} { 14 | def (head f) (\ (tail f) b) 15 | })) 16 | 17 | ; Open new scope 18 | (fun {let b} { 19 | ((\ {_} b) ()) 20 | }) 21 | 22 | ; Unpack List to Function 23 | (fun {unpack f l} { 24 | eval (join (list f) l) 25 | }) 26 | 27 | ; Unapply List to Function 28 | (fun {pack f & xs} {f xs}) 29 | 30 | ; Curried and Uncurried calling 31 | (def {curry} {unpack}) 32 | (def {uncurry} {pack}) 33 | 34 | ; Perform Several things in Sequence 35 | (fun {do & l} { 36 | if (== l {}) 37 | {{}} 38 | {last l} 39 | }) 40 | 41 | ;;; Logical Functions 42 | 43 | ; Logical Functions 44 | (fun {not x} {- 1 x}) 45 | (fun {or x y} {+ x y}) 46 | (fun {and x y} {* x y}) 47 | 48 | 49 | ;;; Numeric Functions 50 | 51 | ; Minimum of Arguments 52 | (fun {min & xs} { 53 | if (== (tail xs) {}) {fst xs} 54 | {do 55 | (= {rest} (unpack min (tail xs))) 56 | (= {item} (fst xs)) 57 | (if (< item rest) {item} {rest}) 58 | } 59 | }) 60 | 61 | ; Minimum of Arguments 62 | (fun {max & xs} { 63 | if (== (tail xs) {}) {fst xs} 64 | {do 65 | (= {rest} (unpack max (tail xs))) 66 | (= {item} (fst xs)) 67 | (if (> item rest) {item} {rest}) 68 | } 69 | }) 70 | 71 | ;;; Conditional Functions 72 | 73 | (fun {select & cs} { 74 | if (== cs {}) 75 | {error "No Selection Found"} 76 | {if (fst (fst cs)) {snd (fst cs)} {unpack select (tail cs)}} 77 | }) 78 | 79 | (fun {case x & cs} { 80 | if (== cs {}) 81 | {error "No Case Found"} 82 | {if (== x (fst (fst cs))) {snd (fst cs)} {unpack case (join (list x) (tail cs))}} 83 | }) 84 | 85 | (def {otherwise} true) 86 | 87 | 88 | ;;; Misc Functions 89 | 90 | (fun {flip f a b} {f b a}) 91 | (fun {ghost & xs} {eval xs}) 92 | (fun {comp f g x} {f (g x)}) 93 | 94 | ;;; List Functions 95 | 96 | ; First, Second, or Third Item in List 97 | (fun {fst l} { eval (head l) }) 98 | (fun {snd l} { eval (head (tail l)) }) 99 | (fun {trd l} { eval (head (tail (tail l))) }) 100 | 101 | ; List Length 102 | (fun {len l} { 103 | if (== l {}) 104 | {0} 105 | {+ 1 (len (tail l))} 106 | }) 107 | 108 | ; Nth item in List 109 | (fun {nth n l} { 110 | if (== n 0) 111 | {fst l} 112 | {nth (- n 1) (tail l)} 113 | }) 114 | 115 | ; Last item in List 116 | (fun {last l} {nth (- (len l) 1) l}) 117 | 118 | ; Apply Function to List 119 | (fun {map f l} { 120 | if (== l {}) 121 | {{}} 122 | {join (list (f (fst l))) (map f (tail l))} 123 | }) 124 | 125 | ; Apply Filter to List 126 | (fun {filter f l} { 127 | if (== l {}) 128 | {{}} 129 | {join (if (f (fst l)) {head l} {{}}) (filter f (tail l))} 130 | }) 131 | 132 | ; Return all of list but last element 133 | (fun {init l} { 134 | if (== (tail l) {}) 135 | {{}} 136 | {join (head l) (init (tail l))} 137 | }) 138 | 139 | ; Reverse List 140 | (fun {reverse l} { 141 | if (== l {}) 142 | {{}} 143 | {join (reverse (tail l)) (head l)} 144 | }) 145 | 146 | ; Fold Left 147 | (fun {foldl f z l} { 148 | if (== l {}) 149 | {z} 150 | {foldl f (f z (fst l)) (tail l)} 151 | }) 152 | 153 | ; Fold Right 154 | (fun {foldr f z l} { 155 | if (== l {}) 156 | {z} 157 | {f (fst l) (foldr f z (tail l))} 158 | }) 159 | 160 | (fun {sum l} {foldl + 0 l}) 161 | (fun {product l} {foldl * 1 l}) 162 | 163 | ; Take N items 164 | (fun {take n l} { 165 | if (== n 0) 166 | {{}} 167 | {join (head l) (take (- n 1) (tail l))} 168 | }) 169 | 170 | ; Drop N items 171 | (fun {drop n l} { 172 | if (== n 0) 173 | {l} 174 | {drop (- n 1) (tail l)} 175 | }) 176 | 177 | ; Split at N 178 | (fun {split n l} {list (take n l) (drop n l)}) 179 | 180 | ; Take While 181 | (fun {take-while f l} { 182 | if (not (unpack f (head l))) 183 | {{}} 184 | {join (head l) (take-while f (tail l))} 185 | }) 186 | 187 | ; Drop While 188 | (fun {drop-while f l} { 189 | if (not (unpack f (head l))) 190 | {l} 191 | {drop-while f (tail l)} 192 | }) 193 | 194 | ; Element of List 195 | (fun {elem x l} { 196 | if (== l {}) 197 | {false} 198 | {if (== x (fst l)) {true} {elem x (tail l)}} 199 | }) 200 | 201 | ; Find element in list of pairs 202 | (fun {lookup x l} { 203 | if (== l {}) 204 | {error "No Element Found"} 205 | {do 206 | (= {key} (fst (fst l))) 207 | (= {val} (snd (fst l))) 208 | (if (== key x) {val} {lookup x (tail l)}) 209 | } 210 | }) 211 | 212 | ; Zip two lists together into a list of pairs 213 | (fun {zip x y} { 214 | if (or (== x {}) (== y {})) 215 | {{}} 216 | {join (list (join (head x) (head y))) (zip (tail x) (tail y))} 217 | }) 218 | 219 | ; Unzip a list of pairs into two lists 220 | (fun {unzip l} { 221 | if (== l {}) 222 | {{{} {}}} 223 | {do 224 | (= {x} (fst l)) 225 | (= {xs} (unzip (tail l))) 226 | (list (join (head x) (fst xs)) (join (tail x) (snd xs))) 227 | } 228 | }) 229 | 230 | ;;; Other Fun 231 | 232 | ; Fibonacci 233 | (fun {fib n} { 234 | select 235 | { (== n 0) 0 } 236 | { (== n 1) 1 } 237 | { otherwise (+ (fib (- n 1)) (fib (- n 2))) } 238 | }) 239 | 240 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tgc", 3 | "version": "0.1.0", 4 | "repo": "orangeduck/tgc", 5 | "description": "A tiny Garbage Collector for C", 6 | "keywords": ["garbage", "collector", "library", "c", "tgc"], 7 | "license": "BSD", 8 | "src": ["tgc.c", "tgc.h"] 9 | } 10 | -------------------------------------------------------------------------------- /tgc.c: -------------------------------------------------------------------------------- 1 | #include "tgc.h" 2 | 3 | static size_t tgc_hash(void *ptr) { 4 | uintptr_t ad = (uintptr_t) ptr; 5 | return (size_t) ((13*ad) ^ (ad >> 15)); 6 | } 7 | 8 | static size_t tgc_probe(tgc_t* gc, size_t i, size_t h) { 9 | long v = i - (h-1); 10 | if (v < 0) { v = gc->nslots + v; } 11 | return v; 12 | } 13 | 14 | static tgc_ptr_t *tgc_get_ptr(tgc_t *gc, void *ptr) { 15 | size_t i, j, h; 16 | i = tgc_hash(ptr) % gc->nslots; j = 0; 17 | while (1) { 18 | h = gc->items[i].hash; 19 | if (h == 0 || j > tgc_probe(gc, i, h)) { return NULL; } 20 | if (gc->items[i].ptr == ptr) { return &gc->items[i]; } 21 | i = (i+1) % gc->nslots; j++; 22 | } 23 | return NULL; 24 | } 25 | 26 | static void tgc_add_ptr( 27 | tgc_t *gc, void *ptr, size_t size, 28 | int flags, void(*dtor)(void*)) { 29 | 30 | tgc_ptr_t item, tmp; 31 | size_t h, p, i, j; 32 | 33 | i = tgc_hash(ptr) % gc->nslots; j = 0; 34 | 35 | item.ptr = ptr; 36 | item.flags = flags; 37 | item.size = size; 38 | item.hash = i+1; 39 | item.dtor = dtor; 40 | 41 | while (1) { 42 | h = gc->items[i].hash; 43 | if (h == 0) { gc->items[i] = item; return; } 44 | if (gc->items[i].ptr == item.ptr) { return; } 45 | p = tgc_probe(gc, i, h); 46 | if (j >= p) { 47 | tmp = gc->items[i]; 48 | gc->items[i] = item; 49 | item = tmp; 50 | j = p; 51 | } 52 | i = (i+1) % gc->nslots; j++; 53 | } 54 | 55 | } 56 | 57 | static void tgc_rem_ptr(tgc_t *gc, void *ptr) { 58 | 59 | size_t i, j, h, nj, nh; 60 | 61 | if (gc->nitems == 0) { return; } 62 | 63 | for (i = 0; i < gc->nfrees; i++) { 64 | if (gc->frees[i].ptr == ptr) { gc->frees[i].ptr = NULL; } 65 | } 66 | 67 | i = tgc_hash(ptr) % gc->nslots; j = 0; 68 | 69 | while (1) { 70 | h = gc->items[i].hash; 71 | if (h == 0 || j > tgc_probe(gc, i, h)) { return; } 72 | if (gc->items[i].ptr == ptr) { 73 | memset(&gc->items[i], 0, sizeof(tgc_ptr_t)); 74 | j = i; 75 | while (1) { 76 | nj = (j+1) % gc->nslots; 77 | nh = gc->items[nj].hash; 78 | if (nh != 0 && tgc_probe(gc, nj, nh) > 0) { 79 | memcpy(&gc->items[ j], &gc->items[nj], sizeof(tgc_ptr_t)); 80 | memset(&gc->items[nj], 0, sizeof(tgc_ptr_t)); 81 | j = nj; 82 | } else { 83 | break; 84 | } 85 | } 86 | gc->nitems--; 87 | return; 88 | } 89 | i = (i+1) % gc->nslots; j++; 90 | } 91 | 92 | } 93 | 94 | 95 | enum { 96 | TGC_PRIMES_COUNT = 24 97 | }; 98 | 99 | static const size_t tgc_primes[TGC_PRIMES_COUNT] = { 100 | 0, 1, 5, 11, 101 | 23, 53, 101, 197, 102 | 389, 683, 1259, 2417, 103 | 4733, 9371, 18617, 37097, 104 | 74093, 148073, 296099, 592019, 105 | 1100009, 2200013, 4400021, 8800019 106 | }; 107 | 108 | static size_t tgc_ideal_size(tgc_t* gc, size_t size) { 109 | size_t i, last; 110 | size = (size_t)((double)(size+1) / gc->loadfactor); 111 | for (i = 0; i < TGC_PRIMES_COUNT; i++) { 112 | if (tgc_primes[i] >= size) { return tgc_primes[i]; } 113 | } 114 | last = tgc_primes[TGC_PRIMES_COUNT-1]; 115 | for (i = 0;; i++) { 116 | if (last * i >= size) { return last * i; } 117 | } 118 | return 0; 119 | } 120 | 121 | static int tgc_rehash(tgc_t* gc, size_t new_size) { 122 | 123 | size_t i; 124 | tgc_ptr_t *old_items = gc->items; 125 | size_t old_size = gc->nslots; 126 | 127 | gc->nslots = new_size; 128 | gc->items = calloc(gc->nslots, sizeof(tgc_ptr_t)); 129 | 130 | if (gc->items == NULL) { 131 | gc->nslots = old_size; 132 | gc->items = old_items; 133 | return 0; 134 | } 135 | 136 | for (i = 0; i < old_size; i++) { 137 | if (old_items[i].hash != 0) { 138 | tgc_add_ptr(gc, 139 | old_items[i].ptr, old_items[i].size, 140 | old_items[i].flags, old_items[i].dtor); 141 | } 142 | } 143 | 144 | free(old_items); 145 | 146 | return 1; 147 | } 148 | 149 | static int tgc_resize_more(tgc_t *gc) { 150 | size_t new_size = tgc_ideal_size(gc, gc->nitems); 151 | size_t old_size = gc->nslots; 152 | return (new_size > old_size) ? tgc_rehash(gc, new_size) : 1; 153 | } 154 | 155 | static int tgc_resize_less(tgc_t *gc) { 156 | size_t new_size = tgc_ideal_size(gc, gc->nitems); 157 | size_t old_size = gc->nslots; 158 | return (new_size < old_size) ? tgc_rehash(gc, new_size) : 1; 159 | } 160 | 161 | static void tgc_mark_ptr(tgc_t *gc, void *ptr) { 162 | 163 | size_t i, j, h, k; 164 | 165 | if ((uintptr_t)ptr < gc->minptr 166 | || (uintptr_t)ptr > gc->maxptr) { return; } 167 | 168 | i = tgc_hash(ptr) % gc->nslots; j = 0; 169 | 170 | while (1) { 171 | h = gc->items[i].hash; 172 | if (h == 0 || j > tgc_probe(gc, i, h)) { return; } 173 | if (ptr == gc->items[i].ptr) { 174 | if (gc->items[i].flags & TGC_MARK) { return; } 175 | gc->items[i].flags |= TGC_MARK; 176 | if (gc->items[i].flags & TGC_LEAF) { return; } 177 | for (k = 0; k < gc->items[i].size/sizeof(void*); k++) { 178 | tgc_mark_ptr(gc, ((void**)gc->items[i].ptr)[k]); 179 | } 180 | return; 181 | } 182 | i = (i+1) % gc->nslots; j++; 183 | } 184 | 185 | } 186 | 187 | static void tgc_mark_stack(tgc_t *gc) { 188 | 189 | void *stk, *bot, *top, *p; 190 | bot = gc->bottom; top = &stk; 191 | 192 | if (bot == top) { return; } 193 | 194 | if (bot < top) { 195 | for (p = top; p >= bot; p = ((char*)p) - sizeof(void*)) { 196 | tgc_mark_ptr(gc, *((void**)p)); 197 | } 198 | } 199 | 200 | if (bot > top) { 201 | for (p = top; p <= bot; p = ((char*)p) + sizeof(void*)) { 202 | tgc_mark_ptr(gc, *((void**)p)); 203 | } 204 | } 205 | 206 | } 207 | 208 | static void tgc_mark(tgc_t *gc) { 209 | 210 | size_t i, k; 211 | jmp_buf env; 212 | void (*volatile mark_stack)(tgc_t*) = tgc_mark_stack; 213 | 214 | if (gc->nitems == 0) { return; } 215 | 216 | for (i = 0; i < gc->nslots; i++) { 217 | if (gc->items[i].hash == 0) { continue; } 218 | if (gc->items[i].flags & TGC_MARK) { continue; } 219 | if (gc->items[i].flags & TGC_ROOT) { 220 | gc->items[i].flags |= TGC_MARK; 221 | if (gc->items[i].flags & TGC_LEAF) { continue; } 222 | for (k = 0; k < gc->items[i].size/sizeof(void*); k++) { 223 | tgc_mark_ptr(gc, ((void**)gc->items[i].ptr)[k]); 224 | } 225 | continue; 226 | } 227 | } 228 | 229 | memset(&env, 0, sizeof(jmp_buf)); 230 | setjmp(env); 231 | mark_stack(gc); 232 | 233 | } 234 | 235 | void tgc_sweep(tgc_t *gc) { 236 | 237 | size_t i, j, k, nj, nh; 238 | 239 | if (gc->nitems == 0) { return; } 240 | 241 | gc->nfrees = 0; 242 | for (i = 0; i < gc->nslots; i++) { 243 | if (gc->items[i].hash == 0) { continue; } 244 | if (gc->items[i].flags & TGC_MARK) { continue; } 245 | if (gc->items[i].flags & TGC_ROOT) { continue; } 246 | gc->nfrees++; 247 | } 248 | 249 | gc->frees = realloc(gc->frees, sizeof(tgc_ptr_t) * gc->nfrees); 250 | if (gc->frees == NULL) { return; } 251 | 252 | i = 0; k = 0; 253 | while (i < gc->nslots) { 254 | if (gc->items[i].hash == 0) { i++; continue; } 255 | if (gc->items[i].flags & TGC_MARK) { i++; continue; } 256 | if (gc->items[i].flags & TGC_ROOT) { i++; continue; } 257 | 258 | gc->frees[k] = gc->items[i]; k++; 259 | memset(&gc->items[i], 0, sizeof(tgc_ptr_t)); 260 | 261 | j = i; 262 | while (1) { 263 | nj = (j+1) % gc->nslots; 264 | nh = gc->items[nj].hash; 265 | if (nh != 0 && tgc_probe(gc, nj, nh) > 0) { 266 | memcpy(&gc->items[ j], &gc->items[nj], sizeof(tgc_ptr_t)); 267 | memset(&gc->items[nj], 0, sizeof(tgc_ptr_t)); 268 | j = nj; 269 | } else { 270 | break; 271 | } 272 | } 273 | gc->nitems--; 274 | } 275 | 276 | for (i = 0; i < gc->nslots; i++) { 277 | if (gc->items[i].hash == 0) { continue; } 278 | if (gc->items[i].flags & TGC_MARK) { 279 | gc->items[i].flags &= ~TGC_MARK; 280 | } 281 | } 282 | 283 | tgc_resize_less(gc); 284 | 285 | gc->mitems = gc->nitems + (size_t)(gc->nitems * gc->sweepfactor) + 1; 286 | 287 | for (i = 0; i < gc->nfrees; i++) { 288 | if (gc->frees[i].ptr) { 289 | if (gc->frees[i].dtor) { gc->frees[i].dtor(gc->frees[i].ptr); } 290 | free(gc->frees[i].ptr); 291 | } 292 | } 293 | 294 | free(gc->frees); 295 | gc->frees = NULL; 296 | gc->nfrees = 0; 297 | 298 | } 299 | 300 | void tgc_start(tgc_t *gc, void *stk) { 301 | gc->bottom = stk; 302 | gc->paused = 0; 303 | gc->nitems = 0; 304 | gc->nslots = 0; 305 | gc->mitems = 0; 306 | gc->nfrees = 0; 307 | gc->maxptr = 0; 308 | gc->items = NULL; 309 | gc->frees = NULL; 310 | gc->minptr = UINTPTR_MAX; 311 | gc->loadfactor = 0.9; 312 | gc->sweepfactor = 0.5; 313 | } 314 | 315 | void tgc_stop(tgc_t *gc) { 316 | tgc_sweep(gc); 317 | free(gc->items); 318 | free(gc->frees); 319 | } 320 | 321 | void tgc_pause(tgc_t *gc) { 322 | gc->paused = 1; 323 | } 324 | 325 | void tgc_resume(tgc_t *gc) { 326 | gc->paused = 0; 327 | } 328 | 329 | void tgc_run(tgc_t *gc) { 330 | tgc_mark(gc); 331 | tgc_sweep(gc); 332 | } 333 | 334 | static void *tgc_add( 335 | tgc_t *gc, void *ptr, size_t size, 336 | int flags, void(*dtor)(void*)) { 337 | 338 | gc->nitems++; 339 | gc->maxptr = ((uintptr_t)ptr) + size > gc->maxptr ? 340 | ((uintptr_t)ptr) + size : gc->maxptr; 341 | gc->minptr = ((uintptr_t)ptr) < gc->minptr ? 342 | ((uintptr_t)ptr) : gc->minptr; 343 | 344 | if (tgc_resize_more(gc)) { 345 | tgc_add_ptr(gc, ptr, size, flags, dtor); 346 | if (!gc->paused && gc->nitems > gc->mitems) { 347 | tgc_run(gc); 348 | } 349 | return ptr; 350 | } else { 351 | gc->nitems--; 352 | free(ptr); 353 | return NULL; 354 | } 355 | } 356 | 357 | static void tgc_rem(tgc_t *gc, void *ptr) { 358 | tgc_rem_ptr(gc, ptr); 359 | tgc_resize_less(gc); 360 | gc->mitems = gc->nitems + gc->nitems / 2 + 1; 361 | } 362 | 363 | void *tgc_alloc(tgc_t *gc, size_t size) { 364 | return tgc_alloc_opt(gc, size, 0, NULL); 365 | } 366 | 367 | void *tgc_calloc(tgc_t *gc, size_t num, size_t size) { 368 | return tgc_calloc_opt(gc, num, size, 0, NULL); 369 | } 370 | 371 | void *tgc_realloc(tgc_t *gc, void *ptr, size_t size) { 372 | 373 | tgc_ptr_t *p; 374 | void *qtr = realloc(ptr, size); 375 | 376 | if (qtr == NULL) { 377 | tgc_rem(gc, ptr); 378 | return qtr; 379 | } 380 | 381 | if (ptr == NULL) { 382 | tgc_add(gc, qtr, size, 0, NULL); 383 | return qtr; 384 | } 385 | 386 | p = tgc_get_ptr(gc, ptr); 387 | 388 | if (p && qtr == ptr) { 389 | p->size = size; 390 | return qtr; 391 | } 392 | 393 | if (p && qtr != ptr) { 394 | int flags = p->flags; 395 | void(*dtor)(void*) = p->dtor; 396 | tgc_rem(gc, ptr); 397 | tgc_add(gc, qtr, size, flags, dtor); 398 | return qtr; 399 | } 400 | 401 | return NULL; 402 | } 403 | 404 | void tgc_free(tgc_t *gc, void *ptr) { 405 | tgc_ptr_t *p = tgc_get_ptr(gc, ptr); 406 | if (p) { 407 | if (p->dtor) { 408 | p->dtor(ptr); 409 | } 410 | free(ptr); 411 | tgc_rem(gc, ptr); 412 | } 413 | } 414 | 415 | void *tgc_alloc_opt(tgc_t *gc, size_t size, int flags, void(*dtor)(void*)) { 416 | void *ptr = malloc(size); 417 | if (ptr != NULL) { 418 | ptr = tgc_add(gc, ptr, size, flags, dtor); 419 | } 420 | return ptr; 421 | } 422 | 423 | void *tgc_calloc_opt( 424 | tgc_t *gc, size_t num, size_t size, 425 | int flags, void(*dtor)(void*)) { 426 | void *ptr = calloc(num, size); 427 | if (ptr != NULL) { 428 | ptr = tgc_add(gc, ptr, num * size, flags, dtor); 429 | } 430 | return ptr; 431 | } 432 | 433 | void tgc_set_dtor(tgc_t *gc, void *ptr, void(*dtor)(void*)) { 434 | tgc_ptr_t *p = tgc_get_ptr(gc, ptr); 435 | if (p) { p->dtor = dtor; } 436 | } 437 | 438 | void tgc_set_flags(tgc_t *gc, void *ptr, int flags) { 439 | tgc_ptr_t *p = tgc_get_ptr(gc, ptr); 440 | if (p) { p->flags = flags; } 441 | } 442 | 443 | int tgc_get_flags(tgc_t *gc, void *ptr) { 444 | tgc_ptr_t *p = tgc_get_ptr(gc, ptr); 445 | if (p) { return p->flags; } 446 | return 0; 447 | } 448 | 449 | void(*tgc_get_dtor(tgc_t *gc, void *ptr))(void*) { 450 | tgc_ptr_t *p = tgc_get_ptr(gc, ptr); 451 | if (p) { return p->dtor; } 452 | return NULL; 453 | } 454 | 455 | size_t tgc_get_size(tgc_t *gc, void *ptr) { 456 | tgc_ptr_t *p = tgc_get_ptr(gc, ptr); 457 | if (p) { return p->size; } 458 | return 0; 459 | } 460 | -------------------------------------------------------------------------------- /tgc.h: -------------------------------------------------------------------------------- 1 | #ifndef TGC_H 2 | #define TGC_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | enum { 10 | TGC_MARK = 0x01, 11 | TGC_ROOT = 0x02, 12 | TGC_LEAF = 0x04 13 | }; 14 | 15 | typedef struct { 16 | void *ptr; 17 | int flags; 18 | size_t size, hash; 19 | void (*dtor)(void*); 20 | } tgc_ptr_t; 21 | 22 | typedef struct { 23 | void *bottom; 24 | int paused; 25 | uintptr_t minptr, maxptr; 26 | tgc_ptr_t *items, *frees; 27 | double loadfactor, sweepfactor; 28 | size_t nitems, nslots, mitems, nfrees; 29 | } tgc_t; 30 | 31 | void tgc_start(tgc_t *gc, void *stk); 32 | void tgc_stop(tgc_t *gc); 33 | void tgc_pause(tgc_t *gc); 34 | void tgc_resume(tgc_t *gc); 35 | void tgc_run(tgc_t *gc); 36 | 37 | void *tgc_alloc(tgc_t *gc, size_t size); 38 | void *tgc_calloc(tgc_t *gc, size_t num, size_t size); 39 | void *tgc_realloc(tgc_t *gc, void *ptr, size_t size); 40 | void tgc_free(tgc_t *gc, void *ptr); 41 | 42 | void *tgc_alloc_opt(tgc_t *gc, size_t size, int flags, void(*dtor)(void*)); 43 | void *tgc_calloc_opt(tgc_t *gc, size_t num, size_t size, int flags, void(*dtor)(void*)); 44 | 45 | void tgc_set_dtor(tgc_t *gc, void *ptr, void(*dtor)(void*)); 46 | void tgc_set_flags(tgc_t *gc, void *ptr, int flags); 47 | int tgc_get_flags(tgc_t *gc, void *ptr); 48 | void(*tgc_get_dtor(tgc_t *gc, void *ptr))(void*); 49 | size_t tgc_get_size(tgc_t *gc, void *ptr); 50 | 51 | #endif 52 | --------------------------------------------------------------------------------