├── .github ├── FUNDING.yml └── workflows │ └── test.yml ├── .gitattributes ├── .gitignore ├── libtoml.pc ├── example ├── Makefile ├── table.c └── array.c ├── .editorconfig ├── .clang-format ├── README.md ├── LICENSE ├── Makefile ├── test.bash ├── toml.h ├── toml2json.c ├── toml-c-test.c ├── toml.c └── header └── toml-c.h /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: arp242 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | header/toml-c.h linguist-generated=true 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.a 4 | *.gch 5 | *.exe 6 | *.gcda 7 | *.gcno 8 | *.gcov 9 | *.dSYM 10 | /toml2json 11 | /toml-c-test 12 | /libtoml.so.1.0 13 | /example/array 14 | /example/table 15 | -------------------------------------------------------------------------------- /libtoml.pc: -------------------------------------------------------------------------------- 1 | prefix=%%PREFIX%% 2 | exec_prefix=${prefix} 3 | libdir=${prefix}/lib 4 | includedir=${prefix}/include 5 | 6 | Name: libtoml 7 | URL: https://github.com/arp242/toml-c/ 8 | Description: TOML C library 9 | Version: v1.0 10 | Libs: -L${libdir} -ltoml 11 | Cflags: -I${includedir} 12 | -------------------------------------------------------------------------------- /example/Makefile: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -std=c99 -Wall -Wextra -Wimplicit-fallthrough -fPIC -O2 -g 3 | 4 | .PHONY: all clean 5 | 6 | all: array table 7 | 8 | array: array.c 9 | ${CC} ${CFLAGS} -o $@ $@.c 10 | 11 | table: table.c 12 | ${CC} ${CFLAGS} -o $@ $@.c 13 | 14 | clean: 15 | rm -f array table 16 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | indent_style = tab 8 | indent_size = 4 9 | 10 | [Makefile] 11 | indent_size = 8 12 | 13 | [*.md] 14 | indent_style = space 15 | 16 | [*.{yml,yaml}] 17 | indent_style = space 18 | indent_size = 2 19 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: 'test' 2 | on: ['push', 'pull_request'] 3 | jobs: 4 | test: 5 | strategy: 6 | fail-fast: false 7 | matrix: {os: ['ubuntu-latest', 'macos-latest', 'windows-latest']} 8 | 9 | runs-on: ${{ matrix.os }} 10 | steps: 11 | - uses: 'actions/setup-go@v6' 12 | with: {go-version: '1.25'} 13 | - uses: 'actions/checkout@v6' 14 | 15 | - run: 'go install github.com/toml-lang/toml-test/v2/cmd/toml-test@v2.0.0' 16 | 17 | - if: runner.os != 'Windows' 18 | run: 'make check CC=clang' 19 | - if: runner.os == 'Windows' 20 | run: 'make check CC=gcc SANITIZER=' 21 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # Real men use real tabs. 2 | IndentWidth: 4 3 | TabWidth: 4 4 | UseTab: 'ForIndentation' 5 | 6 | # Don't frob with line endings so much. 7 | ColumnLimit: 999 8 | #BinPackArguments: false 9 | #BinPackParameters: false 10 | AllowShortFunctionsOnASingleLine: 'None' 11 | BreakStringLiterals: false 12 | ReflowComments: false 13 | AllowShortCaseLabelsOnASingleLine: true 14 | AlignConsecutiveShortCaseStatements: {Enabled: true} 15 | AlignEscapedNewlines: 'Left' 16 | 17 | # Align asignments and declarations. 18 | AlignConsecutiveAssignments: {Enabled: true, AlignCompound: true} 19 | AlignConsecutiveDeclarations: {Enabled: true, PadOperators: false} 20 | 21 | # Star next to the type name, as it's really part of the type. 22 | PointerAlignment: 'Left' 23 | 24 | # Indent nested pre-processer 25 | IndentPPDirectives: 'AfterHash' 26 | 27 | # Group local ".." includes, and put them after system <..> includes. 28 | IncludeBlocks: 'Regroup' 29 | IncludeCategories: [ 30 | {Regex: '^"', Priority: 99, SortPriority: 99}, 31 | ] 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | C library for parsing TOML 1.1. Passes all of [TOML test suite]. 2 | 3 | This is a fork of https://github.com/cktan/tomlc99, as that didn't seem hugely 4 | maintained, and has a number of errors. This library isn't compatible. 5 | 6 | [TOML test suite]: https://github.com/toml-lang/toml-test 7 | 8 | Installation 9 | ------------ 10 | This can be used in two ways: as a library or in "header only mode": 11 | 12 | - "Header only mode" is to make it a bit easier to include this in a project: 13 | just copy `header/toml-c.h` to your project and `#include ` and 14 | you're done – nothing else needed. 15 | 16 | It's essentially just "cat toml.h toml.c > toml-c.h" with a bit of frobbing. 17 | 18 | - For "library mode" build `libtoml.so.1.0` and `libtoml.a` by just typing `make`. 19 | 20 | Usage 21 | ----- 22 | See `toml.h` and the `example` directory. 23 | 24 | Testing 25 | ------- 26 | Run `make check` to run the tests; this requires [toml-test] to be in $PATH. 27 | 28 | [toml-test]: https://github.com/toml-lang/toml-test 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) CK Tan 4 | https://github.com/cktan/tomlc99 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /example/table.c: -------------------------------------------------------------------------------- 1 | #include "../header/toml-c.h" 2 | 3 | char *doc = "\n" 4 | "host = 'example.com'\n" 5 | "port = 80\n" 6 | "\n" 7 | "[tbl]\n" 8 | "key = 'value'\n" 9 | "[tbl.sub]\n" 10 | "subkey = 'subvalue'\n"; 11 | 12 | int main(void) { 13 | char errbuf[200]; 14 | toml_table_t *tbl = toml_parse(doc, errbuf, sizeof(errbuf)); 15 | if (!tbl) { 16 | fprintf(stderr, "ERROR: %s\n", errbuf); 17 | exit(1); 18 | } 19 | 20 | // Get specific keys. 21 | toml_value_t host = toml_table_string(tbl, "host"); 22 | toml_value_t port = toml_table_int(tbl, "port"); 23 | if (!host.ok) // Default values. 24 | host.u.s = "localhost"; 25 | if (!port.ok) 26 | host.u.i = 80; 27 | printf("%s:%ld\n", host.u.s, port.u.i); 28 | 29 | // Get a table. 30 | toml_table_t *sub_tbl = toml_table_table(tbl, "tbl"); 31 | if (sub_tbl) { 32 | // Loop over all keys in a table. 33 | int l = toml_table_len(sub_tbl); 34 | for (int i = 0; i < l; i++) { 35 | int keylen; 36 | const char *key = toml_table_key(sub_tbl, i, &keylen); 37 | printf("key #%d: %s\n", i, key); 38 | // TODO: this should return toml_key_t or something, which also 39 | // includes the type. This actually requires a bit of frobbing with 40 | // the lexer, as that just sets the type of everything to STRING. 41 | // 42 | // Then we can also get rid of toml_table_{string,int,...} and just 43 | // parse it automatically. 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /example/array.c: -------------------------------------------------------------------------------- 1 | #include "../header/toml-c.h" 2 | 3 | char *doc = "\n" 4 | "ints = [1, 2, 3]\n" 5 | "mixed = [1, 'one', 1.2]\n" 6 | "\n" 7 | "[[aot]]\n" 8 | "k = 'one'\n" 9 | "[[aot]]\n" 10 | "k = 'two'\n"; 11 | 12 | int main(void) { 13 | char errbuf[200]; 14 | toml_table_t *tbl = toml_parse(doc, errbuf, sizeof(errbuf)); 15 | if (!tbl) { 16 | fprintf(stderr, "ERROR: %s\n", errbuf); 17 | exit(1); 18 | } 19 | 20 | // Array of ints. 21 | toml_array_t *arr = toml_table_array(tbl, "ints"); 22 | int l = toml_array_len(arr); 23 | printf("ints:\n"); 24 | for (int i = 0; i < l; i++) 25 | printf(" index %d = %ld\n", i, toml_array_int(arr, i).u.i); 26 | printf("\n"); 27 | 28 | // Mixed array. 29 | arr = toml_table_array(tbl, "mixed"); 30 | l = toml_array_len(arr); 31 | printf("mixed:\n"); 32 | for (int i = 0; i < l; i++) { 33 | // TODO: like with table.c, this also would be tons easier if record and 34 | // return the type. 35 | toml_value_t v = toml_array_int(arr, i); 36 | if (v.ok) { 37 | printf(" index %d = (int)%ld\n", i, v.u.i); 38 | continue; 39 | } 40 | v = toml_array_double(arr, i); 41 | if (v.ok) { 42 | printf(" index %d = (float)%0.17g\n", i, v.u.d); 43 | continue; 44 | } 45 | v = toml_array_string(arr, i); 46 | if (v.ok) { 47 | printf(" index %d = (string)\"%s\"\n", i, v.u.s); 48 | continue; 49 | } 50 | } 51 | printf("\n"); 52 | 53 | // Array-of-tables works just like inline tables. 54 | arr = toml_table_array(tbl, "aot"); 55 | l = toml_array_len(arr); 56 | for (int i = 0; i < l; i++) { 57 | toml_table_t *t = toml_array_table(arr, i); 58 | toml_value_t v = toml_table_string(t, "k"); 59 | printf("aot[%d].k = \"%s\"\n", i, v.u.s); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = cc 2 | PREFIX = /usr/local 3 | FPIC = -fPIC 4 | CFLAGS = -std=c99 -Wall -Wextra -Wimplicit-fallthrough ${FPIC} -O2 -g 5 | SANITIZER = -fsanitize=address -fsanitize=undefined 6 | #COVERAGE = -g3 -Og --coverage -lgcov -fprofile-arcs -ftest-coverage 7 | 8 | HDRS = toml.h 9 | SRCS = toml.c 10 | OBJS = ${SRCS:.c=.o} 11 | PCFILE = libtoml.pc 12 | LIB = libtoml.a 13 | SOLIB = libtoml.so.1.0 14 | 15 | .PHONY: all clean install check 16 | 17 | all: ${LIB} ${SOLIB} toml2json toml-c-test header/toml-c.h 18 | 19 | header/toml-c.h: ${HDRS} ${SRCS} 20 | @echo 'create $@' 21 | @: >header/toml-c.h 22 | @sed '/#endif \/\/ TOML_H/d; /#define TOML_H/a#ifndef _POSIX_C_SOURCE\n#define _POSIX_C_SOURCE 200809L\n#endif' toml.h >>header/toml-c.h 23 | @sed '/#include "toml.h"/d; /_POSIX_C_SOURCE/d' toml.c >>header/toml-c.h 24 | @echo '#endif // TOML_H' >>header/toml-c.h 25 | 26 | toml.o: toml.c ${HDRS} 27 | ${CC} ${CFLAGS} -c $< 28 | 29 | libtoml.a: ${OBJS} 30 | ar -rcs $@ $^ 31 | 32 | libtoml.so.1.0: ${OBJS} 33 | ${CC} ${CFLAGS} -shared -o $@ $^ 34 | 35 | toml2json: toml2json.c ${HDRS} ${SRCS} 36 | ${CC} ${CFLAGS} -o toml2json ${SANITIZER} ${COVERAGE} toml.c toml2json.c 37 | 38 | toml-c-test: toml-c-test.c ${HDRS} ${SRCS} 39 | ${CC} ${CFLAGS} -o toml-c-test ${SANITIZER} ${COVERAGE} toml.c toml-c-test.c 40 | 41 | install: all 42 | install -d ${DESTDIR}${PREFIX}/include ${DESTDIR}${PREFIX}/lib ${DESTDIR}${PREFIX}/lib/pkgconfig 43 | install toml.h ${DESTDIR}${PREFIX}/include 44 | install ${LIB} ${DESTDIR}${PREFIX}/lib 45 | install ${SOLIB} ${DESTDIR}${PREFIX}/lib 46 | sed 's!%%PREFIX%%!${PREFIX}!' ${PCFILE} >${DESTDIR}${PREFIX}/lib/pkgconfig/${PCFILE} 47 | 48 | check: toml2json toml-c-test 49 | @./test.bash 50 | @echo 51 | @./toml-c-test 52 | 53 | report: check 54 | @[ -f toml2json-toml.gcda ] && mv toml2json-toml.gcda toml.gcda ||: 55 | @[ -f toml2json-toml.gcno ] && mv toml2json-toml.gcno toml.gcno ||: 56 | gcov -kt toml.c 57 | 58 | clean: 59 | rm -f *.o *.gcov *.gcda *.gcno toml2json toml-c-test ${LIB} ${SOLIB} 60 | -------------------------------------------------------------------------------- /test.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Also compatible with zsh, but not POSIX sh. 3 | # 4 | # Run the toml-test compliance tests: https://github.com/toml-lang/toml-test 5 | 6 | decoder="./toml2json" 7 | toml=1.1.0 8 | 9 | # Skip known failures. 10 | skip=( 11 | # Extending existing tables 12 | -skip 'invalid/array/extending-table' 13 | -skip 'invalid/table/append-with-dotted-keys-01' 14 | -skip 'invalid/table/append-with-dotted-keys-02' 15 | -skip 'invalid/inline-table/overwrite-02' 16 | 17 | # Encoding 18 | -skip 'invalid/encoding/bad-codepoint' 19 | -skip 'invalid/encoding/bad-utf8-in-comment' 20 | -skip 'invalid/encoding/utf16-comment' 21 | -skip 'invalid/encoding/utf16-key' 22 | 23 | # Allows "invalid" control characters 24 | -skip 'invalid/control/bare-cr' 25 | -skip 'invalid/control/bare-null' 26 | -skip 'invalid/control/comment-null' 27 | -skip 'invalid/control/multi-cr' 28 | -skip 'invalid/control/only-null' 29 | -skip 'invalid/control/rawmulti-cr' 30 | 31 | # TOML 1.1 32 | -skip 'invalid/inline-table/trailing-comma' 33 | ) 34 | 35 | # Find toml-test 36 | tt= 37 | if [[ -x "./toml-test" ]]; then 38 | tt="./toml-test" 39 | elif command -v "toml-test" >/dev/null; then 40 | tt="toml-test" 41 | elif [[ -n "$(go env GOBIN)" ]] && [[ -x "$(go env GOBIN)/toml-test" ]]; then 42 | tt="$(go env GOPATH)/toml-test" 43 | elif [[ -n "$(go env GOPATH)" ]] && [[ -x "$(go env GOPATH)/bin/toml-test" ]]; then 44 | tt="$(go env GOPATH)/bin/toml-test" 45 | elif [[ -x "$HOME/go/bin/toml-test" ]]; then 46 | tt="$HOME/go/bin/toml-test" 47 | fi 48 | if ! command -v "$tt" >/dev/null; then 49 | echo >&2 'toml-test not in current dir, $PATH, $GOBIN, $GOPATH/bin, or $HOME/go/bin; install with:' 50 | echo >&2 ' % go install github.com/toml-lang/toml-test/cmd/toml-test@latest' 51 | echo >&2 52 | echo >&2 'Or download a binary from:' 53 | echo >&2 ' https://github.com/toml-lang/toml-test/releases' 54 | exit 1 55 | fi 56 | 57 | "$tt" test -toml="$toml" -skip-must-err ${skip[@]} -decoder="$decoder" "$@" 58 | -------------------------------------------------------------------------------- /toml.h: -------------------------------------------------------------------------------- 1 | #ifndef TOML_H 2 | #define TOML_H 3 | #ifdef _MSC_VER 4 | # pragma warning(disable : 4996) 5 | #endif 6 | #ifdef __cplusplus 7 | # define TOML_EXTERN extern "C" 8 | #else 9 | # define TOML_EXTERN extern 10 | #endif 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | typedef struct toml_table_t toml_table_t; 17 | typedef struct toml_array_t toml_array_t; 18 | typedef struct toml_value_t toml_value_t; 19 | typedef struct toml_timestamp_t toml_timestamp_t; 20 | typedef struct toml_keyval_t toml_keyval_t; 21 | typedef struct toml_arritem_t toml_arritem_t; 22 | typedef struct toml_pos_t toml_pos_t; 23 | 24 | // TOML table. 25 | struct toml_table_t { 26 | const char* key; // Key for this table 27 | int keylen; // length of key. 28 | bool implicit; // Table was created implicitly 29 | bool readonly; // No more modification allowed 30 | 31 | int nkval; // key-values in the table 32 | toml_keyval_t** kval; 33 | int narr; // arrays in the table 34 | toml_array_t** arr; 35 | int ntbl; // tables in the table 36 | toml_table_t** tbl; 37 | }; 38 | 39 | // TOML array. 40 | struct toml_array_t { 41 | const char* key; // key to this array 42 | int keylen; // length of key. 43 | int kind; // element kind: 'v'alue, 'a'rray, or 't'able, 'm'ixed 44 | int type; // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp, 'm'ixed 45 | int nitem; // number of elements 46 | toml_arritem_t* item; 47 | }; 48 | struct toml_arritem_t { 49 | int valtype; // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp 50 | char* val; 51 | toml_array_t* arr; 52 | toml_table_t* tbl; 53 | }; 54 | 55 | // TOML key/value pair. 56 | struct toml_keyval_t { 57 | const char* key; // key to this value 58 | int keylen; // length of key. 59 | const char* val; // the raw value 60 | }; 61 | 62 | // Token position. 63 | struct toml_pos_t { 64 | int line; 65 | int col; 66 | }; 67 | 68 | // Timestamp type; some values may be empty depending on the value of kind. 69 | struct toml_timestamp_t { 70 | // datetime type: 71 | // 72 | // 'd'atetime Full date + time + TZ 73 | // 'l'local-datetime Full date + time but without TZ 74 | // 'D'ate-local Date only, without TZ 75 | // 't'ime-local Time only, without TZ 76 | char kind; 77 | 78 | int year, month, day; 79 | int hour, minute, second, millisec; 80 | int tz; // Timezone offset in minutes 81 | }; 82 | 83 | // Parsed TOML value. 84 | // 85 | // The string value s is a regular NULL-terminated C string, but the string 86 | // length is also given in sl since TOML values may contain NULL bytes. The 87 | // value is guaranteed to be correct UTF-8. 88 | struct toml_value_t { 89 | bool ok; // Was this value present? 90 | union { 91 | struct { 92 | char* s; // string value; must be freed after use. 93 | int sl; // string length, excluding NULL. 94 | }; 95 | toml_timestamp_t ts; // datetime 96 | bool b; // bool 97 | int64_t i; // int 98 | double d; // double 99 | } u; 100 | }; 101 | 102 | // toml_parse() parses a TOML document from a string. Returns 0 on error, with 103 | // the error message stored in errbuf. 104 | // 105 | // toml_parse_file() is identical, but reads from a file descriptor. 106 | // 107 | // Use toml_free() to free the return value; this will invalidate all handles 108 | // for this table. 109 | TOML_EXTERN toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz); 110 | TOML_EXTERN toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz); 111 | TOML_EXTERN void toml_free(toml_table_t* table); 112 | 113 | // Table functions. 114 | // 115 | // toml_table_len() gets the number of direct keys for this table; 116 | // toml_table_key() gets the nth direct key in this table. 117 | TOML_EXTERN int toml_table_len(const toml_table_t* table); 118 | TOML_EXTERN const char* toml_table_key(const toml_table_t* table, int keyidx, int* keylen); 119 | TOML_EXTERN toml_value_t toml_table_string(const toml_table_t* table, const char* key); 120 | TOML_EXTERN toml_value_t toml_table_bool(const toml_table_t* table, const char* key); 121 | TOML_EXTERN toml_value_t toml_table_int(const toml_table_t* table, const char* key); 122 | TOML_EXTERN toml_value_t toml_table_double(const toml_table_t* table, const char* key); 123 | TOML_EXTERN toml_value_t toml_table_timestamp(const toml_table_t* table, const char* key); 124 | TOML_EXTERN toml_array_t* toml_table_array(const toml_table_t* table, const char* key); 125 | TOML_EXTERN toml_table_t* toml_table_table(const toml_table_t* table, const char* key); 126 | 127 | // Array functions. 128 | TOML_EXTERN int toml_array_len(const toml_array_t* array); 129 | TOML_EXTERN toml_value_t toml_array_string(const toml_array_t* array, int idx); 130 | TOML_EXTERN toml_value_t toml_array_bool(const toml_array_t* array, int idx); 131 | TOML_EXTERN toml_value_t toml_array_int(const toml_array_t* array, int idx); 132 | TOML_EXTERN toml_value_t toml_array_double(const toml_array_t* array, int idx); 133 | TOML_EXTERN toml_value_t toml_array_timestamp(const toml_array_t* array, int idx); 134 | TOML_EXTERN toml_array_t* toml_array_array(const toml_array_t* array, int idx); 135 | TOML_EXTERN toml_table_t* toml_array_table(const toml_array_t* array, int idx); 136 | 137 | #endif // TOML_H 138 | -------------------------------------------------------------------------------- /toml2json.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "toml.h" 8 | 9 | typedef const char* toml_unparsed_t; 10 | toml_unparsed_t toml_table_unparsed(const toml_table_t* table, const char* key); 11 | toml_unparsed_t toml_array_unparsed(const toml_array_t* array, int idx); 12 | int toml_value_string(toml_unparsed_t s, char** ret, int* len); 13 | int toml_value_bool(toml_unparsed_t s, bool* ret); 14 | int toml_value_int(toml_unparsed_t s, int64_t* ret); 15 | int toml_value_double(toml_unparsed_t s, double* ret); 16 | int toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret); 17 | 18 | static void print_escape_string(const char* s, int sl) { 19 | for (int i = 0; i < sl; i++) { 20 | char ch = s[i]; 21 | switch (ch) { 22 | case '\b': printf("\\b"); break; 23 | case '\t': printf("\\t"); break; 24 | case '\n': printf("\\n"); break; 25 | case '\f': printf("\\f"); break; 26 | case '\r': printf("\\r"); break; 27 | case '"': printf("\\\""); break; 28 | case '\\': printf("\\\\"); break; 29 | default: 30 | if (ch >= 0x00 && ch <= 0x1f) 31 | printf("\\u00%02X", ch); 32 | else 33 | printf("%c", ch); 34 | break; 35 | } 36 | } 37 | } 38 | 39 | static void print_raw(const char* s) { 40 | char* sval; 41 | int slen; 42 | int64_t ival; 43 | bool bval; 44 | double dval; 45 | toml_timestamp_t ts; 46 | 47 | if (toml_value_string(s, &sval, &slen) == 0) { 48 | printf("{\"type\": \"string\",\"value\": \""); 49 | print_escape_string(sval, slen); 50 | printf("\"}"); 51 | free(sval); 52 | } else if (toml_value_int(s, &ival) == 0) { 53 | printf("{\"type\": \"integer\",\"value\": \"%" PRId64 "\"}", ival); 54 | } else if (toml_value_bool(s, &bval) == 0) { 55 | printf("{\"type\": \"bool\",\"value\": \"%s\"}", bval ? "true" : "false"); 56 | } else if (toml_value_double(s, &dval) == 0) { 57 | if (isnan(dval)) 58 | printf("{\"type\": \"float\",\"value\": \"nan\"}"); 59 | else 60 | printf("{\"type\": \"float\",\"value\": \"%0.17g\"}", dval); 61 | } else if (toml_value_timestamp(s, &ts) == 0) { 62 | char millisec[10]; 63 | if (ts.millisec) 64 | snprintf(millisec, 10, ".%03d", ts.millisec); 65 | else 66 | millisec[0] = 0; 67 | if (ts.kind == 'd' || ts.kind == 'l') { 68 | char off[15]; 69 | off[0] = 'Z'; 70 | off[1] = 0; 71 | if (ts.tz != 0) 72 | snprintf(off, 15, "%c%02d:%02d", (ts.tz > 0 ? '+' : '-'), abs(ts.tz) / 60, abs(ts.tz) % 60); 73 | // clang-format off 74 | printf("{\"type\": \"%s\",\"value\": \"%04d-%02d-%02dT%02d:%02d:%02d%s%s\"}", 75 | (ts.kind == 'd' ? "datetime" : "datetime-local"), 76 | ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, millisec, 77 | (ts.kind == 'd' ? off : "")); 78 | } else if (ts.kind == 'D') { 79 | printf("{\"type\": \"date-local\",\"value\": \"%04d-%02d-%02d\"}", 80 | ts.year, ts.month, ts.day); 81 | } else if (ts.kind == 't') { 82 | printf("{\"type\": \"time-local\",\"value\": \"%02d:%02d:%02d%s\"}", 83 | ts.hour, ts.minute, ts.second, millisec); 84 | // clang-format on 85 | } 86 | } else { 87 | fprintf(stderr, "unknown type\n"); 88 | exit(1); 89 | } 90 | } 91 | 92 | static void print_array(toml_array_t* arr); 93 | static void print_table(toml_table_t* curtbl) { 94 | const char* key; 95 | int keylen; 96 | const char* raw; 97 | toml_array_t* arr; 98 | toml_table_t* tbl; 99 | 100 | printf("{"); 101 | for (int i = 0; (key = toml_table_key(curtbl, i, &keylen)) != 0; i++) { 102 | printf("%s\"", i > 0 ? ",\n" : ""); 103 | print_escape_string(key, keylen); 104 | printf("\":"); 105 | 106 | if ((raw = toml_table_unparsed(curtbl, key)) != 0) 107 | print_raw(raw); 108 | else if ((arr = toml_table_array(curtbl, key)) != 0) 109 | print_array(arr); 110 | else if ((tbl = toml_table_table(curtbl, key)) != 0) 111 | print_table(tbl); 112 | else 113 | abort(); 114 | } 115 | printf("}"); 116 | } 117 | 118 | static void print_table_array(toml_array_t* curarr) { 119 | toml_table_t* tbl; 120 | 121 | printf("["); 122 | for (int i = 0; (tbl = toml_array_table(curarr, i)) != 0; i++) { 123 | printf("%s", i > 0 ? "," : ""); 124 | print_table(tbl); 125 | } 126 | printf("]"); 127 | } 128 | 129 | static void print_array(toml_array_t* curarr) { 130 | if (curarr->kind == 't') { 131 | print_table_array(curarr); 132 | return; 133 | } 134 | 135 | printf("["); 136 | 137 | const char* raw; 138 | toml_array_t* arr; 139 | toml_table_t* tbl; 140 | 141 | const int n = toml_array_len(curarr); 142 | for (int i = 0; i < n; i++) { 143 | printf("%s", i > 0 ? "," : ""); 144 | 145 | if ((arr = toml_array_array(curarr, i)) != 0) { 146 | print_array(arr); 147 | continue; 148 | } 149 | 150 | if ((tbl = toml_array_table(curarr, i)) != 0) { 151 | print_table(tbl); 152 | continue; 153 | } 154 | 155 | raw = toml_array_unparsed(curarr, i); 156 | if (raw) { 157 | print_raw(raw); 158 | continue; 159 | } 160 | 161 | fflush(stdout); 162 | fprintf(stderr, "ERROR: unable to decode value in array\n"); 163 | exit(1); 164 | } 165 | 166 | printf("]"); 167 | } 168 | 169 | static void cat(FILE* fp) { 170 | char errbuf[200]; 171 | 172 | toml_table_t* tbl = toml_parse_file(fp, errbuf, sizeof(errbuf)); 173 | if (!tbl) { 174 | fprintf(stderr, "ERROR: %s\n", errbuf); 175 | exit(1); 176 | } 177 | 178 | print_table(tbl); 179 | printf("\n"); 180 | 181 | toml_free(tbl); 182 | } 183 | 184 | int main(int argc, const char* argv[argc + 1]) { 185 | if (argc == 1) { 186 | cat(stdin); 187 | return 0; 188 | } 189 | 190 | for (int i = 1; i < argc; i++) { 191 | FILE* fp = fopen(argv[i], "r"); 192 | if (!fp) { 193 | fprintf(stderr, "ERROR: cannot open %s: %s\n", argv[i], strerror(errno)); 194 | exit(1); 195 | } 196 | cat(fp); 197 | fclose(fp); 198 | } 199 | return 0; 200 | } 201 | -------------------------------------------------------------------------------- /toml-c-test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "toml.h" 5 | 6 | int failed = 0; 7 | 8 | #define errorf(...) \ 9 | { \ 10 | failed = 1; \ 11 | fprintf(stderr, "FAIL: %s:%d: %s()\n\t", __FILE__, __LINE__, __func__); \ 12 | fprintf(stderr, __VA_ARGS__); \ 13 | fprintf(stderr, "\n"); \ 14 | } 15 | 16 | #define fatalf(...) \ 17 | { \ 18 | errorf(__VA_ARGS__); \ 19 | return; \ 20 | } 21 | 22 | #define streq(a, b) (strcmp(a, b) == 0) 23 | 24 | void test_toml_table_value(void) { 25 | char errbuf[200]; 26 | toml_table_t* tbl = toml_parse("str = 'xxx'\n" 27 | "int = 42\n" 28 | "bool = true\n" 29 | "float = 6.666\n" 30 | "ts = 2012-01-02T15:16:17Z\n", 31 | errbuf, sizeof(errbuf)); 32 | if (!tbl) 33 | fatalf("%s", errbuf); 34 | int l = toml_table_len(tbl); 35 | if (l != 5) 36 | errorf("wrong table length: %d", l); 37 | 38 | toml_value_t str = toml_table_string(tbl, "str"); 39 | if (!str.ok) 40 | errorf("str.ok not set"); 41 | if (!streq(str.u.s, "xxx")) 42 | errorf("str.u.s wrong value: '%s'; want: 'xxx'", str.u.s); 43 | if (str.u.sl != 3) 44 | errorf("str.u.sl wrong value: %d", str.u.sl); 45 | free(str.u.s); 46 | 47 | toml_value_t i = toml_table_int(tbl, "int"); 48 | if (!i.ok) 49 | errorf("int.ok not set"); 50 | if (i.u.i != 42) 51 | errorf("int.u.u wrong value: %ld", i.u.i); 52 | 53 | toml_value_t b = toml_table_bool(tbl, "bool"); 54 | if (!b.ok) 55 | errorf("b.ok not set"); 56 | if (!b.u.b) 57 | errorf("int.u.b wrong value: %d", b.u.b); 58 | 59 | toml_value_t f = toml_table_double(tbl, "float"); 60 | if (!f.ok) 61 | errorf("f.ok not set"); 62 | if (f.u.d != 6.666) 63 | errorf("int.u.u wrong value: %f", f.u.d); 64 | 65 | toml_value_t ts = toml_table_timestamp(tbl, "ts"); 66 | if (!ts.ok) 67 | errorf("ts.ok not set"); 68 | char have[200]; 69 | 70 | // clang-format off 71 | snprintf(have, 200, "'%c' %d-%02d-%02d %02d:%02d:%02d.%d TZ=%d", 72 | ts.u.ts.kind, ts.u.ts.year, ts.u.ts.month, ts.u.ts.day, ts.u.ts.hour, 73 | ts.u.ts.minute, ts.u.ts.second, ts.u.ts.millisec, ts.u.ts.tz); 74 | // clang-format on 75 | char want[200] = "'d' 2012-01-02 15:16:17.0 TZ=0"; 76 | if (!streq(have, want)) 77 | errorf("have: %s\n\twant: %s", have, want); 78 | 79 | toml_free(tbl); 80 | } 81 | 82 | void test_toml_array_value(void) { 83 | char errbuf[200]; 84 | toml_table_t* tbl = toml_parse("str = ['xxx', \"yyy\"]\n" 85 | "int = [42, 43]\n" 86 | "bool = [true, false]\n" 87 | "float = [6.666, 6.667]\n" 88 | "ts = [2012-01-02T15:16:17Z, 2013-02-03T16:17:18Z]\n", 89 | errbuf, sizeof(errbuf)); 90 | if (!tbl) 91 | fatalf("%s", errbuf); 92 | 93 | { 94 | toml_array_t* arr = toml_table_array(tbl, "str"); 95 | toml_value_t str1 = toml_array_string(arr, 0); 96 | if (!str1.ok) 97 | errorf("str1.ok not set"); 98 | if (!streq(str1.u.s, "xxx")) 99 | errorf("str1.u.s wrong value: '%s'; want: 'xxx'", str1.u.s); 100 | if (str1.u.sl != 3) 101 | errorf("str1.u.sl wrong value: %d", str1.u.sl); 102 | 103 | toml_value_t str2 = toml_array_string(arr, 1); 104 | if (!str2.ok) 105 | errorf("str2.ok not set"); 106 | if (!streq(str2.u.s, "yyy")) 107 | errorf("str2.u.s wrong value: %s; want: 'yyy'", str2.u.s); 108 | if (str2.u.sl != 3) 109 | errorf("str2.u.sl wrong value: %d", str2.u.sl); 110 | free(str1.u.s); 111 | free(str2.u.s); 112 | } 113 | 114 | { 115 | toml_array_t* arr = toml_table_array(tbl, "ts"); 116 | toml_value_t ts1 = toml_array_timestamp(arr, 0); 117 | char have[200]; 118 | 119 | // clang-format off 120 | snprintf(have, 200, "'%c' %d-%02d-%02d %02d:%02d:%02d.%d TZ=%d", 121 | ts1.u.ts.kind, ts1.u.ts.year, ts1.u.ts.month, ts1.u.ts.day, ts1.u.ts.hour, 122 | ts1.u.ts.minute, ts1.u.ts.second, ts1.u.ts.millisec, ts1.u.ts.tz); 123 | // clang-format on 124 | char want[200] = "'d' 2012-01-02 15:16:17.0 TZ=0"; 125 | if (!streq(have, want)) 126 | errorf("have: %s\n\twant: %s", have, want); 127 | } 128 | 129 | toml_free(tbl); 130 | } 131 | 132 | void test_toml_table_string_unknown_value(void) { 133 | char errbuf[200]; 134 | toml_table_t* tbl = toml_parse("a = 'a'", errbuf, sizeof(errbuf)); 135 | if (!tbl) 136 | fatalf("%s", errbuf); 137 | 138 | toml_value_t unknown = toml_table_string(tbl, "aa"); 139 | if (unknown.ok) 140 | errorf("unknown.ok set"); 141 | 142 | toml_free(tbl); 143 | free(unknown.u.s); 144 | } 145 | 146 | // TODO: can probably use toml-test's -errors feature for this. 147 | void test_error(void) { 148 | char errbuf[200]; 149 | 150 | // e_syntax errors 151 | toml_parse("key", errbuf, sizeof(errbuf)); 152 | if (!streq(errbuf, "at 1:4: missing '='")) 153 | errorf("wrong error: %s", errbuf); 154 | 155 | toml_parse("k = 'missing-q\nb = 1", errbuf, sizeof(errbuf)); 156 | if (!streq(errbuf, "at 1:14: unterminated quote (')")) 157 | errorf("wrong error: %s", errbuf); 158 | 159 | toml_parse("k = {{}}", errbuf, sizeof(errbuf)); 160 | if (!streq(errbuf, "at 1:6: expected a string")) 161 | errorf("wrong error: %s", errbuf); 162 | 163 | // e_keyexists errors 164 | toml_parse("[a]\n[a]", errbuf, sizeof(errbuf)); 165 | if (!streq(errbuf, "at 2:2: key already defined")) 166 | errorf("wrong error: %s", errbuf); 167 | } 168 | 169 | int main(void) { 170 | test_toml_table_value(); 171 | test_toml_array_value(); 172 | test_toml_table_string_unknown_value(); 173 | test_error(); 174 | 175 | printf("%s: %s\n", __FILE__, failed ? "FAIL" : "PASS"); 176 | return failed; 177 | } 178 | -------------------------------------------------------------------------------- /toml.c: -------------------------------------------------------------------------------- 1 | #define _POSIX_C_SOURCE 200809L 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "toml.h" 13 | 14 | #define ALIGN8(sz) (((sz) + 7) & ~7) 15 | #define calloc(x, y) error - forbidden - use CALLOC instead 16 | static void* CALLOC(size_t nmemb, size_t sz) { 17 | int nb = ALIGN8(sz) * nmemb; 18 | void* p = malloc(nb); 19 | if (p) { 20 | memset(p, 0, nb); 21 | } 22 | return p; 23 | } 24 | 25 | // some old platforms define strdup macro -- drop it. 26 | #undef strdup 27 | #define strdup(x) error - forbidden - use STRDUP instead 28 | static char* STRDUP(const char* s) { 29 | int len = strlen(s); 30 | char* p = malloc(len + 1); 31 | if (p) { 32 | memcpy(p, s, len); 33 | p[len] = 0; 34 | } 35 | return p; 36 | } 37 | 38 | // some old platforms define strndup macro -- drop it. 39 | #undef strndup 40 | #define strndup(x) error - forbidden - use STRNDUP instead 41 | static char* STRNDUP(const char* s, size_t n) { 42 | size_t len = strnlen(s, n); 43 | char* p = malloc(len + 1); 44 | if (p) { 45 | memcpy(p, s, len); 46 | p[len] = 0; 47 | } 48 | return p; 49 | } 50 | 51 | // Unparsed values. 52 | typedef const char* toml_unparsed_t; 53 | toml_unparsed_t toml_table_unparsed(const toml_table_t* table, const char* key); 54 | toml_unparsed_t toml_array_unparsed(const toml_array_t* array, int idx); 55 | int toml_value_string(toml_unparsed_t s, char** ret, int* len); 56 | int toml_value_bool(toml_unparsed_t s, bool* ret); 57 | int toml_value_int(toml_unparsed_t s, int64_t* ret); 58 | int toml_value_double(toml_unparsed_t s, double* ret); 59 | int toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret); 60 | 61 | // Convert escape to UTF-8; return #bytes used in buf to encode the char, or -1 62 | // on error. 63 | // http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16 64 | int read_unicode_escape(uint64_t code, char buf[6]) { 65 | if (0xd800 <= code && code <= 0xdfff) /// UTF-16 surrogates 66 | return -1; 67 | if (0x10FFFF < code) 68 | return -1; 69 | if (code <= 0x7F) { /// 0x00000000 - 0x0000007F: 0xxxxxxx 70 | buf[0] = (unsigned char)code; 71 | return 1; 72 | } 73 | if (code <= 0x000007FF) { /// 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx 74 | buf[0] = (unsigned char)(0xc0 | (code >> 6)); 75 | buf[1] = (unsigned char)(0x80 | (code & 0x3f)); 76 | return 2; 77 | } 78 | if (code <= 0x0000FFFF) { /// 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 79 | buf[0] = (unsigned char)(0xe0 | (code >> 12)); 80 | buf[1] = (unsigned char)(0x80 | ((code >> 6) & 0x3f)); 81 | buf[2] = (unsigned char)(0x80 | (code & 0x3f)); 82 | return 3; 83 | } 84 | if (code <= 0x001FFFFF) { /// 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 85 | buf[0] = (unsigned char)(0xf0 | (code >> 18)); 86 | buf[1] = (unsigned char)(0x80 | ((code >> 12) & 0x3f)); 87 | buf[2] = (unsigned char)(0x80 | ((code >> 6) & 0x3f)); 88 | buf[3] = (unsigned char)(0x80 | (code & 0x3f)); 89 | return 4; 90 | } 91 | return -1; 92 | } 93 | 94 | static inline void xfree(const void* x) { 95 | if (x) 96 | free((void*)(intptr_t)x); 97 | } 98 | 99 | enum tokentype_t { INVALID, DOT, COMMA, EQUAL, LBRACE, RBRACE, NEWLINE, LBRACKET, RBRACKET, STRING, MSTRING }; 100 | typedef enum tokentype_t tokentype_t; 101 | 102 | typedef struct token_t token_t; 103 | struct token_t { 104 | tokentype_t tok; 105 | toml_pos_t pos; 106 | char* ptr; // points into context->start 107 | int len; 108 | int eof; 109 | }; 110 | 111 | typedef struct context_t context_t; 112 | struct context_t { 113 | char* start; 114 | char* stop; 115 | char* errbuf; 116 | int errbufsz; 117 | 118 | token_t tok; 119 | toml_table_t* root; 120 | toml_table_t* curtbl; 121 | 122 | struct { 123 | int top; 124 | char* key[10]; 125 | int keylen[10]; 126 | token_t tok[10]; 127 | } tpath; 128 | }; 129 | 130 | #define STRINGIFY(x) #x 131 | #define TOSTRING(x) STRINGIFY(x) 132 | #define FLINE __FILE__ ":" TOSTRING(__LINE__) 133 | 134 | static int next_token(context_t* ctx, bool dotisspecial); 135 | 136 | // Error reporting. Call when an error is detected. Always return -1. 137 | static int e_outofmemory(context_t* ctx, const char* fline) { 138 | snprintf(ctx->errbuf, ctx->errbufsz, "ERROR: out of memory (%s)", fline); 139 | return -1; 140 | } 141 | 142 | static int e_internal(context_t* ctx, const char* fline) { 143 | snprintf(ctx->errbuf, ctx->errbufsz, "internal error (%s)", fline); 144 | return -1; 145 | } 146 | 147 | static int e_syntax(context_t* ctx, toml_pos_t pos, const char* msg) { 148 | snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: %s", pos.line, pos.col, msg); 149 | return -1; 150 | } 151 | 152 | static int e_keyexists(context_t* ctx, toml_pos_t pos) { 153 | snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: key already defined", pos.line, pos.col); 154 | return -1; 155 | } 156 | 157 | static void* expand(void* p, int sz, int newsz) { 158 | void* s = malloc(newsz); 159 | if (!s) 160 | return 0; 161 | 162 | if (p) { 163 | memcpy(s, p, sz); 164 | free(p); 165 | } 166 | return s; 167 | } 168 | 169 | static void** expand_ptrarr(void** p, int n) { 170 | void** s = malloc((n + 1) * sizeof(void*)); 171 | if (!s) 172 | return 0; 173 | 174 | s[n] = 0; 175 | if (p) { 176 | memcpy(s, p, n * sizeof(void*)); 177 | free(p); 178 | } 179 | return s; 180 | } 181 | 182 | static toml_arritem_t* expand_arritem(toml_arritem_t* p, int n) { 183 | toml_arritem_t* pp = expand(p, n * sizeof(*p), (n + 1) * sizeof(*p)); 184 | if (!pp) 185 | return 0; 186 | 187 | memset(&pp[n], 0, sizeof(pp[n])); 188 | return pp; 189 | } 190 | 191 | static uint8_t const u8_length[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4}; 192 | #define u8length(s) u8_length[(((uint8_t*)(s))[0] & 0xFF) >> 4]; 193 | 194 | static char* norm_lit_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) { 195 | const char* sp = src; 196 | const char* sq = src + srclen; 197 | char* dst = 0; /// will write to dst[] and return it 198 | int max = 0; /// max size of dst[] 199 | int off = 0; /// cur offset in dst[] 200 | 201 | for (;;) { /// scan forward on src 202 | if (off >= max - 10) { /// have some slack for misc stuff 203 | int newmax = max + 50; 204 | char* x = expand(dst, max, newmax); 205 | if (!x) { 206 | xfree(dst); 207 | snprintf(errbuf, errbufsz, "out of memory"); 208 | return 0; 209 | } 210 | dst = x; 211 | max = newmax; 212 | } 213 | 214 | if (sp >= sq) /// finished? 215 | break; 216 | 217 | uint8_t l = u8length(sp); 218 | if (l == 0) { 219 | xfree(dst); 220 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 221 | return 0; 222 | } 223 | if (l > 1) { 224 | for (int i = 0; i < l; i++) { 225 | char ch = *sp++; 226 | if ((ch & 0x80) != 0x80) { 227 | xfree(dst); 228 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 229 | return 0; 230 | } 231 | dst[off++] = ch; 232 | } 233 | continue; 234 | } 235 | 236 | /// control characters other than Tab are not allowed 237 | char ch = *sp++; 238 | if ((0 <= ch && ch <= 0x08) || (0x0a <= ch && ch <= 0x1f) || ch == 0x7f) { 239 | if (!(multiline && (ch == '\r' || ch == '\n'))) { 240 | xfree(dst); 241 | snprintf(errbuf, errbufsz, "invalid char U+%04x", ch); 242 | return 0; 243 | } 244 | } 245 | 246 | dst[off++] = ch; /// a plain copy suffice 247 | } 248 | 249 | *len = off; 250 | dst[off++] = 0; 251 | return dst; 252 | } 253 | 254 | // Convert src to raw unescaped utf-8 string. Returns NULL if error with errmsg 255 | // in errbuf. 256 | static char* norm_basic_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) { 257 | const char* sp = src; 258 | const char* sq = src + srclen; 259 | char* dst = 0; /// will write to dst[] and return it 260 | int max = 0; /// max size of dst[] 261 | int off = 0; /// cur offset in dst[] 262 | 263 | /// scan forward on src 264 | for (;;) { 265 | if (off >= max - 10) { /// have some slack for misc stuff 266 | int newmax = max + 50; 267 | char* x = expand(dst, max, newmax); 268 | if (!x) { 269 | xfree(dst); 270 | snprintf(errbuf, errbufsz, "out of memory"); 271 | return 0; 272 | } 273 | dst = x; 274 | max = newmax; 275 | } 276 | 277 | if (sp >= sq) /// finished? 278 | break; 279 | 280 | uint8_t l = u8length(sp); 281 | if (l == 0) { 282 | xfree(dst); 283 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 284 | return 0; 285 | } 286 | if (l > 1) { 287 | for (int i = 0; i < l; i++) { 288 | char ch = *sp++; 289 | if ((ch & 0x80) != 0x80) { 290 | xfree(dst); 291 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 292 | return 0; 293 | } 294 | dst[off++] = ch; 295 | } 296 | continue; 297 | } 298 | 299 | char ch = *sp++; 300 | if (ch != '\\') { 301 | /// must be escaped: U+0000 to U+0008, U+000A to U+001F, U+007F 302 | if ((ch >= 0 && ch <= 0x08) || (ch >= 0x0a && ch <= 0x1f) || ch == 0x7f) { 303 | if (!(multiline && (ch == '\r' || ch == '\n'))) { 304 | xfree(dst); 305 | snprintf(errbuf, errbufsz, "invalid char U+%04x", ch); 306 | return 0; 307 | } 308 | } 309 | 310 | dst[off++] = ch; /// a plain copy suffice 311 | continue; 312 | } 313 | 314 | // TODO: unreachable, I think? 315 | if (sp >= sq) { /// ch was backslash. we expect the escape char. 316 | snprintf(errbuf, errbufsz, "last backslash is invalid"); 317 | xfree(dst); 318 | return 0; 319 | } 320 | 321 | if (multiline) { /// for multi-line, we want to kill line-ending-backslash. 322 | if (sp[strspn(sp, " \t\r")] == '\n') { /// if there is only whitespace after the backslash ... 323 | sp += strspn(sp, " \t\r\n"); /// skip all the following whitespaces 324 | continue; 325 | } 326 | } 327 | 328 | ch = *sp++; /// get the escaped char 329 | switch (ch) { 330 | case 'x': 331 | case 'u': 332 | case 'U': { 333 | uint64_t ucs = 0; 334 | int nhex = 2; 335 | if (ch == 'u') nhex = 4; 336 | if (ch == 'U') nhex = 8; 337 | for (int i = 0; i < nhex; i++) { 338 | // TODO: unreachable I think, as scan_string() already 339 | // guarantees exactly 4 or 8 hex chars. 340 | if (sp >= sq) { 341 | snprintf(errbuf, errbufsz, "\\%c expected %d hex chars", ch, nhex); 342 | xfree(dst); 343 | return 0; 344 | } 345 | ch = *sp++; 346 | int v = -1; 347 | if ('0' <= ch && ch <= '9') 348 | v = ch - '0'; 349 | else if ('A' <= ch && ch <= 'F') 350 | v = ch - 'A' + 10; 351 | else if ('a' <= ch && ch <= 'f') 352 | v = (ch ^ 0x20) - 'A' + 10; 353 | // TODO: also unrechable, as per above. 354 | if (v == -1) { 355 | snprintf(errbuf, errbufsz, "invalid hex chars for \\u or \\U"); 356 | xfree(dst); 357 | return 0; 358 | } 359 | ucs = ucs * 16 + v; 360 | } 361 | int n = read_unicode_escape(ucs, &dst[off]); 362 | if (n == -1) { 363 | snprintf(errbuf, errbufsz, "illegal ucs code in \\u or \\U"); 364 | xfree(dst); 365 | return 0; 366 | } 367 | off += n; 368 | }; 369 | continue; 370 | case 'b': ch = '\b'; break; 371 | case 't': ch = '\t'; break; 372 | case 'n': ch = '\n'; break; 373 | case 'f': ch = '\f'; break; 374 | case 'r': ch = '\r'; break; 375 | case 'e': ch = 0x1b; break; 376 | case '"': ch = '"'; break; 377 | case '\\': ch = '\\'; break; 378 | default: 379 | // TODO: unrechable, I think, as scan_string() already 380 | // guarantees correct char. 381 | snprintf(errbuf, errbufsz, "illegal escape char \\%c", ch); 382 | xfree(dst); 383 | return 0; 384 | } 385 | 386 | dst[off++] = ch; 387 | } 388 | 389 | *len = off; 390 | dst[off++] = 0; /// Cap with NUL and return it. 391 | return dst; 392 | } 393 | 394 | // Normalize a key. Convert all special chars to raw unescaped utf-8 chars. 395 | static char* normalize_key(context_t* ctx, token_t strtok, int* keylen) { 396 | const char* sp = strtok.ptr; 397 | const char* sq = strtok.ptr + strtok.len; 398 | int ch = *sp; 399 | char* ret; 400 | 401 | // Quoted string 402 | if (ch == '\'' || ch == '\"') { 403 | /// Take " or ' off from and back. 404 | sp++, sq--; 405 | 406 | char ebuf[80]; 407 | if (ch == '\'') 408 | ret = norm_lit_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf)); 409 | else 410 | ret = norm_basic_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf)); 411 | if (!ret) { 412 | e_syntax(ctx, strtok.pos, ebuf); 413 | return 0; 414 | } 415 | return ret; 416 | } 417 | 418 | *keylen = 0; 419 | for (const char* c = sp; c != sq; c++) { /// Bare key: allow: [A-Za-z0-9_-]+ 420 | *keylen = *keylen + 1; 421 | if (isalnum(*c) || *c == '_' || *c == '-') 422 | continue; 423 | // TODO: never triggered? When reading the file it already validates 424 | // this, so seems redundant? Need to double-check. 425 | e_syntax(ctx, ctx->tok.pos, "invalid key"); 426 | return 0; 427 | } 428 | 429 | if (!(ret = STRNDUP(sp, sq - sp))) { /// dup and return 430 | e_outofmemory(ctx, FLINE); 431 | return 0; 432 | } 433 | return ret; 434 | } 435 | 436 | // Look up key in tbl. Return 0 if not found, or 'v'alue, 'a'rray or 't'able 437 | // depending on the element. 438 | static int check_key(toml_table_t* tbl, const char* key, toml_keyval_t** ret_val, toml_array_t** ret_arr, toml_table_t** ret_tbl) { 439 | int i; 440 | void* dummy; 441 | 442 | if (!ret_tbl) 443 | ret_tbl = (toml_table_t**)&dummy; 444 | if (!ret_arr) 445 | ret_arr = (toml_array_t**)&dummy; 446 | if (!ret_val) 447 | ret_val = (toml_keyval_t**)&dummy; 448 | 449 | *ret_tbl = 0; 450 | *ret_arr = 0; 451 | *ret_val = 0; 452 | 453 | for (i = 0; i < tbl->nkval; i++) { 454 | if (strcmp(key, tbl->kval[i]->key) == 0) { 455 | *ret_val = tbl->kval[i]; 456 | return 'v'; 457 | } 458 | } 459 | for (i = 0; i < tbl->narr; i++) { 460 | if (strcmp(key, tbl->arr[i]->key) == 0) { 461 | *ret_arr = tbl->arr[i]; 462 | return 'a'; 463 | } 464 | } 465 | for (i = 0; i < tbl->ntbl; i++) { 466 | if (strcmp(key, tbl->tbl[i]->key) == 0) { 467 | *ret_tbl = tbl->tbl[i]; 468 | return 't'; 469 | } 470 | } 471 | return 0; 472 | } 473 | 474 | static int key_kind(toml_table_t* tbl, const char* key) { 475 | return check_key(tbl, key, 0, 0, 0); 476 | } 477 | 478 | // Create a keyval in the table. 479 | static toml_keyval_t* create_keyval_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) { 480 | int keylen; 481 | char* newkey = normalize_key(ctx, keytok, &keylen); 482 | if (!newkey) 483 | return 0; 484 | 485 | toml_keyval_t* dest = 0; 486 | if (key_kind(tbl, newkey)) { 487 | xfree(newkey); 488 | e_keyexists(ctx, keytok.pos); 489 | return 0; 490 | } 491 | 492 | int n = tbl->nkval; 493 | toml_keyval_t** base; 494 | if ((base = (toml_keyval_t**)expand_ptrarr((void**)tbl->kval, n)) == 0) { 495 | xfree(newkey); 496 | e_outofmemory(ctx, FLINE); 497 | return 0; 498 | } 499 | tbl->kval = base; 500 | 501 | if ((base[n] = (toml_keyval_t*)CALLOC(1, sizeof(*base[n]))) == 0) { 502 | xfree(newkey); 503 | e_outofmemory(ctx, FLINE); 504 | return 0; 505 | } 506 | 507 | dest = tbl->kval[tbl->nkval++]; 508 | dest->key = newkey; 509 | dest->keylen = keylen; 510 | return dest; 511 | } 512 | 513 | // Create a table in the table. 514 | static toml_table_t* create_keytable_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) { 515 | int keylen; 516 | char* newkey = normalize_key(ctx, keytok, &keylen); 517 | if (!newkey) 518 | return 0; 519 | 520 | toml_table_t* dest = 0; 521 | // TODO: need to check all parts for: 522 | // 523 | // [a] 524 | // [a.c] # checks of "a.c" is defined, which is false. 525 | if (check_key(tbl, newkey, 0, 0, &dest)) { 526 | xfree(newkey); 527 | 528 | /// Special case: make explicit if table exists and was created 529 | /// implicitly. 530 | if (dest && dest->implicit) { 531 | dest->implicit = false; 532 | return dest; 533 | } 534 | e_keyexists(ctx, keytok.pos); 535 | return 0; 536 | } 537 | 538 | int n = tbl->ntbl; 539 | toml_table_t** base; 540 | if ((base = (toml_table_t**)expand_ptrarr((void**)tbl->tbl, n)) == 0) { 541 | xfree(newkey); 542 | e_outofmemory(ctx, FLINE); 543 | return 0; 544 | } 545 | tbl->tbl = base; 546 | 547 | if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) { 548 | xfree(newkey); 549 | e_outofmemory(ctx, FLINE); 550 | return 0; 551 | } 552 | 553 | dest = tbl->tbl[tbl->ntbl++]; 554 | dest->key = newkey; 555 | dest->keylen = keylen; 556 | return dest; 557 | } 558 | 559 | // Create an array in the table. 560 | static toml_array_t* create_keyarray_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok, char kind) { 561 | int keylen; 562 | char* newkey = normalize_key(ctx, keytok, &keylen); 563 | if (!newkey) 564 | return 0; 565 | 566 | if (key_kind(tbl, newkey)) { 567 | xfree(newkey); 568 | e_keyexists(ctx, keytok.pos); 569 | return 0; 570 | } 571 | 572 | int n = tbl->narr; 573 | toml_array_t** base; 574 | if ((base = (toml_array_t**)expand_ptrarr((void**)tbl->arr, n)) == 0) { 575 | xfree(newkey); 576 | e_outofmemory(ctx, FLINE); 577 | return 0; 578 | } 579 | tbl->arr = base; 580 | 581 | if ((base[n] = (toml_array_t*)CALLOC(1, sizeof(*base[n]))) == 0) { 582 | xfree(newkey); 583 | e_outofmemory(ctx, FLINE); 584 | return 0; 585 | } 586 | toml_array_t* dest = tbl->arr[tbl->narr++]; 587 | 588 | dest->keylen = keylen; 589 | dest->key = newkey; 590 | dest->kind = kind; 591 | return dest; 592 | } 593 | 594 | static toml_arritem_t* create_value_in_array(context_t* ctx, toml_array_t* parent) { 595 | const int n = parent->nitem; 596 | toml_arritem_t* base = expand_arritem(parent->item, n); 597 | if (!base) { 598 | e_outofmemory(ctx, FLINE); 599 | return 0; 600 | } 601 | parent->item = base; 602 | parent->nitem++; 603 | return &parent->item[n]; 604 | } 605 | 606 | // Create an array in an array. 607 | static toml_array_t* create_array_in_array(context_t* ctx, toml_array_t* parent) { 608 | const int n = parent->nitem; 609 | toml_arritem_t* base = expand_arritem(parent->item, n); 610 | if (!base) { 611 | e_outofmemory(ctx, FLINE); 612 | return 0; 613 | } 614 | toml_array_t* ret = (toml_array_t*)CALLOC(1, sizeof(toml_array_t)); 615 | if (!ret) { 616 | e_outofmemory(ctx, FLINE); 617 | return 0; 618 | } 619 | base[n].arr = ret; 620 | parent->item = base; 621 | parent->nitem++; 622 | return ret; 623 | } 624 | 625 | // Create a table in an array 626 | static toml_table_t* create_table_in_array(context_t* ctx, toml_array_t* parent) { 627 | int n = parent->nitem; 628 | toml_arritem_t* base = expand_arritem(parent->item, n); 629 | if (!base) { 630 | e_outofmemory(ctx, FLINE); 631 | return 0; 632 | } 633 | toml_table_t* ret = (toml_table_t*)CALLOC(1, sizeof(toml_table_t)); 634 | if (!ret) { 635 | e_outofmemory(ctx, FLINE); 636 | return 0; 637 | } 638 | base[n].tbl = ret; 639 | parent->item = base; 640 | parent->nitem++; 641 | return ret; 642 | } 643 | 644 | static bool skip_newlines(context_t* ctx, bool isdotspecial) { 645 | while (ctx->tok.tok == NEWLINE) { 646 | if (next_token(ctx, isdotspecial)) 647 | return false; 648 | if (ctx->tok.eof) 649 | break; 650 | } 651 | return true; 652 | } 653 | 654 | static int parse_keyval(context_t* ctx, toml_table_t* tbl); 655 | 656 | static inline int eat_token(context_t* ctx, tokentype_t typ, bool isdotspecial, const char* fline) { 657 | if (ctx->tok.tok != typ) 658 | return e_internal(ctx, fline); 659 | if (next_token(ctx, isdotspecial)) 660 | return -1; 661 | return 0; 662 | } 663 | 664 | // We are at '{ ... }'; parse the table. 665 | static int parse_inline_table(context_t* ctx, toml_table_t* tbl) { 666 | if (eat_token(ctx, LBRACE, 1, FLINE)) 667 | return -1; 668 | 669 | for (;;) { 670 | if (ctx->tok.tok == RBRACE) // until closing brace 671 | break; 672 | if (ctx->tok.eof) 673 | return e_syntax(ctx, ctx->tok.pos, "no closing '}'"); 674 | 675 | if (ctx->tok.tok == NEWLINE) { 676 | if (eat_token(ctx, NEWLINE, 1, FLINE)) 677 | return -1; 678 | continue; 679 | } 680 | 681 | if (ctx->tok.tok != STRING) 682 | return e_syntax(ctx, ctx->tok.pos, "expected a string"); 683 | 684 | if (parse_keyval(ctx, tbl)) 685 | return -1; 686 | 687 | // On comma, continue to scan for next keyval. 688 | if (ctx->tok.tok == COMMA) { 689 | if (eat_token(ctx, COMMA, 1, FLINE)) 690 | return -1; 691 | continue; 692 | } 693 | break; 694 | } 695 | 696 | for (;;) { 697 | if (ctx->tok.tok != NEWLINE || ctx->tok.eof) 698 | break; 699 | if (eat_token(ctx, NEWLINE, 1, FLINE)) 700 | return -1; 701 | } 702 | 703 | if (eat_token(ctx, RBRACE, 1, FLINE)) 704 | return -1; 705 | 706 | tbl->readonly = 1; 707 | return 0; 708 | } 709 | 710 | static int valtype(const char* val) { 711 | toml_timestamp_t ts; 712 | if (*val == '\'' || *val == '"') 713 | return 's'; 714 | if (toml_value_bool(val, 0) == 0) 715 | return 'b'; 716 | if (toml_value_int(val, 0) == 0) 717 | return 'i'; 718 | if (toml_value_double(val, 0) == 0) 719 | return 'd'; 720 | if (toml_value_timestamp(val, &ts) == 0) { 721 | if (ts.year && ts.hour) 722 | return 'T'; /// timestamp 723 | if (ts.year) // TODO: never reached? 724 | return 'D'; /// date 725 | return 't'; /// time 726 | } 727 | return 'u'; /// unknown 728 | } 729 | 730 | // We are at '[...]' 731 | static int parse_array(context_t* ctx, toml_array_t* arr) { 732 | if (eat_token(ctx, LBRACKET, 0, FLINE)) 733 | return -1; 734 | 735 | for (;;) { 736 | if (!skip_newlines(ctx, 0)) 737 | return -1; 738 | 739 | if (ctx->tok.tok == RBRACKET) /// until ] 740 | break; 741 | 742 | switch (ctx->tok.tok) { 743 | case MSTRING: 744 | case STRING: { 745 | /// set array kind if this will be the first entry 746 | if (arr->kind == 0) 747 | arr->kind = 'v'; 748 | else if (arr->kind != 'v') 749 | arr->kind = 'm'; 750 | 751 | char* val = ctx->tok.ptr; 752 | int vlen = ctx->tok.len; 753 | 754 | /// make a new value in array 755 | toml_arritem_t* newval = create_value_in_array(ctx, arr); 756 | if (!newval) 757 | return e_outofmemory(ctx, FLINE); 758 | 759 | if (!(newval->val = STRNDUP(val, vlen))) 760 | return e_outofmemory(ctx, FLINE); 761 | 762 | newval->valtype = valtype(newval->val); 763 | 764 | /// set array type if this is the first entry 765 | if (arr->nitem == 1) 766 | arr->type = newval->valtype; 767 | else if (arr->type != newval->valtype) 768 | arr->type = 'm'; /// mixed 769 | 770 | if (eat_token(ctx, ctx->tok.tok, 0, FLINE)) 771 | return -1; 772 | break; 773 | } 774 | case LBRACKET: { // [ [array], [array] ... ] 775 | // set the array kind if this will be the first entry. 776 | if (arr->kind == 0) 777 | arr->kind = 'a'; 778 | else if (arr->kind != 'a') 779 | arr->kind = 'm'; 780 | 781 | toml_array_t* subarr = create_array_in_array(ctx, arr); 782 | if (!subarr) 783 | return -1; 784 | if (parse_array(ctx, subarr)) 785 | return -1; 786 | break; 787 | } 788 | case LBRACE: { // [ {table}, {table} ... ] 789 | // set the array kind if this will be the first entry. 790 | if (arr->kind == 0) 791 | arr->kind = 't'; 792 | else if (arr->kind != 't') 793 | arr->kind = 'm'; 794 | 795 | toml_table_t* subtbl = create_table_in_array(ctx, arr); 796 | if (!subtbl) 797 | return -1; 798 | if (parse_inline_table(ctx, subtbl)) 799 | return -1; 800 | break; 801 | } 802 | default: return e_syntax(ctx, ctx->tok.pos, "syntax error"); 803 | } 804 | 805 | if (!skip_newlines(ctx, 0)) 806 | return -1; 807 | 808 | // on comma, continue to scan for next element 809 | if (ctx->tok.tok == COMMA) { 810 | if (eat_token(ctx, COMMA, 0, FLINE)) 811 | return -1; 812 | continue; 813 | } 814 | break; 815 | } 816 | 817 | if (eat_token(ctx, RBRACKET, 1, FLINE)) 818 | return -1; 819 | return 0; 820 | } 821 | 822 | // Handle lines like: 823 | // key = "value" 824 | // key = [ array ] 825 | // key = { table } 826 | static int parse_keyval(context_t* ctx, toml_table_t* tbl) { 827 | if (tbl->readonly) 828 | return e_keyexists(ctx, ctx->tok.pos); 829 | 830 | token_t key = ctx->tok; 831 | if (eat_token(ctx, STRING, 1, FLINE)) 832 | return -1; 833 | 834 | if (ctx->tok.tok == DOT) { 835 | // Handle inline dotted key: 836 | // physical.color = "orange" 837 | // physical.shape = "round" 838 | toml_table_t* subtbl = 0; 839 | { 840 | int keylen; 841 | char* subtblstr = normalize_key(ctx, key, &keylen); 842 | if (!subtblstr) 843 | return -1; 844 | 845 | subtbl = toml_table_table(tbl, subtblstr); 846 | if (subtbl) 847 | subtbl->keylen = keylen; 848 | xfree(subtblstr); 849 | } 850 | if (!subtbl) { 851 | subtbl = create_keytable_in_table(ctx, tbl, key); 852 | if (!subtbl) 853 | return -1; 854 | } 855 | if (next_token(ctx, true)) 856 | return -1; 857 | if (parse_keyval(ctx, subtbl)) 858 | return -1; 859 | return 0; 860 | } 861 | 862 | if (ctx->tok.tok != EQUAL) 863 | return e_syntax(ctx, ctx->tok.pos, "missing '='"); 864 | 865 | if (next_token(ctx, false)) 866 | return -1; 867 | 868 | switch (ctx->tok.tok) { 869 | case MSTRING: 870 | case STRING: { // key = "value" 871 | toml_keyval_t* keyval = create_keyval_in_table(ctx, tbl, key); 872 | if (!keyval) 873 | return -1; 874 | token_t val = ctx->tok; 875 | 876 | assert(keyval->val == 0); 877 | if (!(keyval->val = STRNDUP(val.ptr, val.len))) 878 | return e_outofmemory(ctx, FLINE); 879 | 880 | if (next_token(ctx, true)) 881 | return -1; 882 | 883 | return 0; 884 | } 885 | case LBRACKET: { // key = [ array ] 886 | toml_array_t* arr = create_keyarray_in_table(ctx, tbl, key, 0); 887 | if (!arr) 888 | return -1; 889 | if (parse_array(ctx, arr)) 890 | return -1; 891 | return 0; 892 | } 893 | case LBRACE: { // key = { table } 894 | toml_table_t* nexttbl = create_keytable_in_table(ctx, tbl, key); 895 | if (!nexttbl) 896 | return -1; 897 | if (parse_inline_table(ctx, nexttbl)) 898 | return -1; 899 | return 0; 900 | } 901 | default: return e_syntax(ctx, ctx->tok.pos, "syntax error"); 902 | } 903 | return 0; 904 | } 905 | 906 | typedef struct tabpath_t tabpath_t; 907 | struct tabpath_t { 908 | int cnt; 909 | token_t key[10]; 910 | }; 911 | 912 | // At [x.y.z] or [[x.y.z]] 913 | // Scan forward and fill tblpath until it enters ] or ]] 914 | // There will be at least one entry on return. 915 | static int fill_tblpath(context_t* ctx) { 916 | // clear tpath 917 | for (int i = 0; i < ctx->tpath.top; i++) { 918 | char** p = &ctx->tpath.key[i]; 919 | xfree(*p); 920 | *p = 0; 921 | } 922 | ctx->tpath.top = 0; 923 | 924 | for (;;) { 925 | if (ctx->tpath.top >= 10) 926 | return e_syntax(ctx, ctx->tok.pos, "table path is too deep; max allowed is 10."); 927 | if (ctx->tok.tok != STRING) 928 | return e_syntax(ctx, ctx->tok.pos, "invalid or missing key"); 929 | 930 | int keylen; 931 | char* key = normalize_key(ctx, ctx->tok, &keylen); 932 | if (!key) 933 | return -1; 934 | ctx->tpath.tok[ctx->tpath.top] = ctx->tok; 935 | ctx->tpath.key[ctx->tpath.top] = key; 936 | ctx->tpath.keylen[ctx->tpath.top] = keylen; 937 | ctx->tpath.top++; 938 | 939 | if (next_token(ctx, true)) 940 | return -1; 941 | 942 | if (ctx->tok.tok == RBRACKET) 943 | break; 944 | if (ctx->tok.tok != DOT) 945 | return e_syntax(ctx, ctx->tok.pos, "invalid key"); 946 | if (next_token(ctx, true)) 947 | return -1; 948 | } 949 | 950 | if (ctx->tpath.top <= 0) // TODO: never reached? 951 | return e_syntax(ctx, ctx->tok.pos, "empty table selector"); 952 | return 0; 953 | } 954 | 955 | // Walk tblpath from the root, and create new tables on the way. Sets 956 | // ctx->curtbl to the final table. 957 | static int walk_tabpath(context_t* ctx) { 958 | toml_table_t* curtbl = ctx->root; /// start from root 959 | 960 | for (int i = 0; i < ctx->tpath.top; i++) { 961 | const char* key = ctx->tpath.key[i]; 962 | int keylen = ctx->tpath.keylen[i]; 963 | 964 | toml_keyval_t* nextval = 0; 965 | toml_array_t* nextarr = 0; 966 | toml_table_t* nexttbl = 0; 967 | switch (check_key(curtbl, key, &nextval, &nextarr, &nexttbl)) { 968 | case 't': /// found a table. nexttbl is where we will go next. 969 | break; 970 | case 'a': /// found an array. nexttbl is the last table in the array. 971 | if (nextarr->kind != 't') 972 | return e_internal(ctx, FLINE); 973 | 974 | if (nextarr->nitem == 0) 975 | return e_internal(ctx, FLINE); 976 | 977 | nexttbl = nextarr->item[nextarr->nitem - 1].tbl; 978 | break; 979 | case 'v': return e_keyexists(ctx, ctx->tpath.tok[i].pos); 980 | default: { /// Not found. Let's create an implicit table. 981 | int n = curtbl->ntbl; 982 | toml_table_t** base = (toml_table_t**)expand_ptrarr((void**)curtbl->tbl, n); 983 | if (base == 0) 984 | return e_outofmemory(ctx, FLINE); 985 | 986 | curtbl->tbl = base; 987 | 988 | if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) 989 | return e_outofmemory(ctx, FLINE); 990 | 991 | if ((base[n]->key = STRDUP(key)) == 0) 992 | return e_outofmemory(ctx, FLINE); 993 | base[n]->keylen = keylen; 994 | 995 | nexttbl = curtbl->tbl[curtbl->ntbl++]; 996 | 997 | /// tabs created by walk_tabpath are considered implicit 998 | nexttbl->implicit = true; 999 | }; break; 1000 | } 1001 | curtbl = nexttbl; /// switch to next tbl 1002 | } 1003 | 1004 | ctx->curtbl = curtbl; /// save it 1005 | return 0; 1006 | } 1007 | 1008 | // handle lines like [x.y.z] or [[x.y.z]] 1009 | static int parse_select(context_t* ctx) { 1010 | assert(ctx->tok.tok == LBRACKET); 1011 | 1012 | // true if [[ 1013 | bool aot = (ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == '['); 1014 | 1015 | // Need to detect '[[' on our own because next_token() will skip whitespace, 1016 | // and '[ [' would be taken as '[[', which is wrong. 1017 | 1018 | // eat [ or [[ 1019 | if (eat_token(ctx, LBRACKET, 1, FLINE)) 1020 | return -1; 1021 | if (aot) { 1022 | assert(ctx->tok.tok == LBRACKET); 1023 | if (eat_token(ctx, LBRACKET, 1, FLINE)) 1024 | return -1; 1025 | } 1026 | 1027 | if (fill_tblpath(ctx)) 1028 | return -1; 1029 | 1030 | // For [x.y.z] or [[x.y.z]], remove z from tpath. 1031 | token_t z = ctx->tpath.tok[ctx->tpath.top - 1]; 1032 | xfree(ctx->tpath.key[ctx->tpath.top - 1]); 1033 | ctx->tpath.top--; 1034 | 1035 | // Set up ctx->curtbl. 1036 | if (walk_tabpath(ctx)) 1037 | return -1; 1038 | 1039 | if (!aot) { 1040 | // [x.y.z] -> create z = {} in x.y 1041 | toml_table_t* curtbl = create_keytable_in_table(ctx, ctx->curtbl, z); 1042 | if (!curtbl) 1043 | return -1; 1044 | ctx->curtbl = curtbl; 1045 | } else { 1046 | // [[x.y.z]] -> create z = [] in x.y 1047 | toml_array_t* arr = 0; 1048 | { 1049 | int keylen; 1050 | char* zstr = normalize_key(ctx, z, &keylen); 1051 | if (!zstr) 1052 | return -1; 1053 | arr = toml_table_array(ctx->curtbl, zstr); 1054 | if (arr) 1055 | arr->keylen = keylen; 1056 | xfree(zstr); 1057 | } 1058 | if (!arr) { 1059 | arr = create_keyarray_in_table(ctx, ctx->curtbl, z, 't'); 1060 | if (!arr) 1061 | return -1; 1062 | } 1063 | if (arr->kind != 't') 1064 | return e_syntax(ctx, z.pos, "array mismatch"); 1065 | 1066 | // add to z[] 1067 | toml_table_t* dest; 1068 | { 1069 | toml_table_t* t = create_table_in_array(ctx, arr); 1070 | if (!t) 1071 | return -1; 1072 | 1073 | if ((t->key = STRDUP("__anon__")) == 0) 1074 | return e_outofmemory(ctx, FLINE); 1075 | dest = t; 1076 | } 1077 | 1078 | ctx->curtbl = dest; 1079 | } 1080 | 1081 | if (ctx->tok.tok != RBRACKET) // TODO: never reached 1082 | return e_syntax(ctx, ctx->tok.pos, "expected ']'"); 1083 | if (aot) { 1084 | if (!(ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == ']')) 1085 | return e_syntax(ctx, ctx->tok.pos, "expected ']]'"); 1086 | if (eat_token(ctx, RBRACKET, 1, FLINE)) 1087 | return -1; 1088 | } 1089 | 1090 | if (eat_token(ctx, RBRACKET, 1, FLINE)) 1091 | return -1; 1092 | if (ctx->tok.tok != NEWLINE) 1093 | return e_syntax(ctx, ctx->tok.pos, "extra chars after ] or ]]"); 1094 | return 0; 1095 | } 1096 | 1097 | toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz) { 1098 | context_t ctx; 1099 | 1100 | /// clear errbuf 1101 | if (errbufsz <= 0) 1102 | errbufsz = 0; 1103 | if (errbufsz > 0) 1104 | errbuf[0] = 0; 1105 | 1106 | // init context 1107 | memset(&ctx, 0, sizeof(ctx)); 1108 | ctx.start = toml; 1109 | ctx.stop = ctx.start + strlen(toml); 1110 | ctx.errbuf = errbuf; 1111 | ctx.errbufsz = errbufsz; 1112 | 1113 | // start with an artificial newline of length 0 1114 | ctx.tok.tok = NEWLINE; 1115 | ctx.tok.pos.line = 1; 1116 | ctx.tok.pos.col = 1; 1117 | ctx.tok.ptr = toml; 1118 | ctx.tok.len = 0; 1119 | 1120 | // make a root table 1121 | if ((ctx.root = CALLOC(1, sizeof(*ctx.root))) == 0) { 1122 | e_outofmemory(&ctx, FLINE); 1123 | return 0; // Do not goto fail, root table not set up yet 1124 | } 1125 | 1126 | // set root as default table 1127 | ctx.curtbl = ctx.root; 1128 | 1129 | // Scan forward until EOF 1130 | for (token_t tok = ctx.tok; !tok.eof; tok = ctx.tok) { 1131 | switch (tok.tok) { 1132 | case NEWLINE: 1133 | if (next_token(&ctx, true)) 1134 | goto fail; 1135 | break; 1136 | 1137 | case STRING: 1138 | if (parse_keyval(&ctx, ctx.curtbl)) 1139 | goto fail; 1140 | 1141 | if (ctx.tok.tok != NEWLINE) { 1142 | e_syntax(&ctx, ctx.tok.pos, "extra chars after value"); 1143 | goto fail; 1144 | } 1145 | 1146 | if (eat_token(&ctx, NEWLINE, 1, FLINE)) 1147 | goto fail; 1148 | break; 1149 | 1150 | case LBRACKET: // [ x.y.z ] or [[ x.y.z ]] 1151 | if (parse_select(&ctx)) 1152 | goto fail; 1153 | break; 1154 | 1155 | default: e_syntax(&ctx, tok.pos, "syntax error"); goto fail; 1156 | } 1157 | } 1158 | 1159 | /// success 1160 | for (int i = 0; i < ctx.tpath.top; i++) 1161 | xfree(ctx.tpath.key[i]); 1162 | return ctx.root; 1163 | 1164 | fail: 1165 | // Something bad has happened. Free resources and return error. 1166 | for (int i = 0; i < ctx.tpath.top; i++) 1167 | xfree(ctx.tpath.key[i]); 1168 | toml_free(ctx.root); 1169 | return 0; 1170 | } 1171 | 1172 | toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz) { 1173 | int bufsz = 0; 1174 | char* buf = 0; 1175 | int off = 0; 1176 | int inc = 1024; 1177 | 1178 | while (!feof(fp)) { 1179 | if (bufsz == 1024 * 20) /// Increment buffer by 20k after 20k. 1180 | inc = 1024 * 20; 1181 | if (off == bufsz) { 1182 | int xsz = bufsz + inc; 1183 | char* x = expand(buf, bufsz, xsz); 1184 | if (!x) { 1185 | snprintf(errbuf, errbufsz, "out of memory"); 1186 | xfree(buf); 1187 | return 0; 1188 | } 1189 | buf = x; 1190 | bufsz = xsz; 1191 | } 1192 | 1193 | errno = 0; 1194 | int n = fread(buf + off, 1, bufsz - off, fp); 1195 | if (ferror(fp)) { 1196 | snprintf(errbuf, errbufsz, "%s", (errno ? strerror(errno) : "Error reading file")); 1197 | xfree(buf); 1198 | return 0; 1199 | } 1200 | off += n; 1201 | } 1202 | 1203 | /// tag on a NUL to cap the string 1204 | if (off == bufsz) { 1205 | int xsz = bufsz + 1; 1206 | char* x = expand(buf, bufsz, xsz); 1207 | if (!x) { 1208 | snprintf(errbuf, errbufsz, "out of memory"); 1209 | xfree(buf); 1210 | return 0; 1211 | } 1212 | buf = x; 1213 | bufsz = xsz; 1214 | } 1215 | buf[off] = 0; 1216 | 1217 | /// parse it, cleanup and finish. 1218 | toml_table_t* ret = toml_parse(buf, errbuf, errbufsz); 1219 | xfree(buf); 1220 | return ret; 1221 | } 1222 | 1223 | static void xfree_kval(toml_keyval_t* p) { 1224 | if (!p) 1225 | return; 1226 | xfree(p->key); 1227 | xfree(p->val); 1228 | xfree(p); 1229 | } 1230 | 1231 | static void xfree_tbl(toml_table_t* p); 1232 | 1233 | static void xfree_arr(toml_array_t* p) { 1234 | if (!p) 1235 | return; 1236 | 1237 | xfree(p->key); 1238 | const int n = p->nitem; 1239 | for (int i = 0; i < n; i++) { 1240 | toml_arritem_t* a = &p->item[i]; 1241 | if (a->val) 1242 | xfree(a->val); 1243 | else if (a->arr) 1244 | xfree_arr(a->arr); 1245 | else if (a->tbl) 1246 | xfree_tbl(a->tbl); 1247 | } 1248 | xfree(p->item); 1249 | xfree(p); 1250 | } 1251 | 1252 | static void xfree_tbl(toml_table_t* p) { 1253 | if (!p) 1254 | return; 1255 | 1256 | xfree(p->key); 1257 | 1258 | for (int i = 0; i < p->nkval; i++) 1259 | xfree_kval(p->kval[i]); 1260 | xfree(p->kval); 1261 | 1262 | for (int i = 0; i < p->narr; i++) 1263 | xfree_arr(p->arr[i]); 1264 | xfree(p->arr); 1265 | 1266 | for (int i = 0; i < p->ntbl; i++) 1267 | xfree_tbl(p->tbl[i]); 1268 | xfree(p->tbl); 1269 | 1270 | xfree(p); 1271 | } 1272 | 1273 | void toml_free(toml_table_t* tbl) { 1274 | xfree_tbl(tbl); 1275 | } 1276 | 1277 | static void set_token(context_t* ctx, tokentype_t tok, toml_pos_t pos, char* ptr, int len) { 1278 | token_t t; 1279 | t.tok = tok; 1280 | t.pos = pos; 1281 | t.ptr = ptr; 1282 | t.len = len; 1283 | t.eof = 0; 1284 | ctx->tok = t; 1285 | } 1286 | 1287 | static void set_eof(context_t* ctx, toml_pos_t pos) { 1288 | set_token(ctx, NEWLINE, pos, ctx->stop, 0); 1289 | ctx->tok.eof = 1; 1290 | } 1291 | 1292 | // Scan p for n digits compositing entirely of [0-9] 1293 | static int scan_digits(const char* p, int n) { 1294 | int ret = 0; 1295 | for (; n > 0 && isdigit(*p); n--, p++) 1296 | ret = 10 * ret + (*p - '0'); 1297 | return n ? -1 : ret; 1298 | } 1299 | 1300 | static bool scan_date(const char* p, int* YY, int* MM, int* DD) { 1301 | int year = scan_digits(p, 4); 1302 | int month = (year >= 0 && p[4] == '-') ? scan_digits(p + 5, 2) : -1; 1303 | int day = (month >= 0 && p[7] == '-') ? scan_digits(p + 8, 2) : -1; 1304 | if (YY) 1305 | *YY = year; 1306 | if (MM) 1307 | *MM = month; 1308 | if (DD) 1309 | *DD = day; 1310 | return (year >= 0 && month >= 0 && day >= 0); 1311 | } 1312 | 1313 | static bool scan_time(const char* p, int* hh, int* mm, int* ss) { 1314 | int hour = scan_digits(p, 2); 1315 | int minute = (hour >= 0 && p[2] == ':') ? scan_digits(p + 3, 2) : -1; 1316 | int second = (minute >= 0 && p[5] == ':') ? scan_digits(p + 6, 2) : -1; 1317 | if (hh) 1318 | *hh = hour; 1319 | if (mm) 1320 | *mm = minute; 1321 | if (ss) 1322 | *ss = second; 1323 | return (hour >= 0 && minute >= 0); 1324 | } 1325 | 1326 | static int parse_millisec(const char* p, const char** endp) { 1327 | int ret = 0; 1328 | int unit = 100; /// unit in millisec 1329 | for (; '0' <= *p && *p <= '9'; p++, unit /= 10) 1330 | ret += (*p - '0') * unit; 1331 | *endp = p; 1332 | return ret; 1333 | } 1334 | 1335 | static bool scan_offset(const char* p, int* tz) { 1336 | int sign = p[0]; 1337 | int hour = scan_digits(p + 1, 2); 1338 | int minute = (hour >= 0 && p[3] == ':') ? scan_digits(p + 4, 2) : -1; 1339 | if (hour < -12 || hour > 14 || minute < 0 || minute > 59) 1340 | return false; 1341 | if (tz) { 1342 | *tz = hour * 60 + minute; 1343 | if (sign == '-') 1344 | *tz = -(*tz); 1345 | } 1346 | return true; 1347 | } 1348 | 1349 | static int scan_string(context_t* ctx, char* p, toml_pos_t* pos, bool dotisspecial) { 1350 | char* orig = p; 1351 | 1352 | // Literal multiline. 1353 | if (strncmp(p, "'''", 3) == 0) { 1354 | char* q = p + 3; 1355 | pos->col += 3; 1356 | while (true) { 1357 | q = strstr(q, "'''"); 1358 | if (q == 0) 1359 | return e_syntax(ctx, *pos, "unterminated triple quote (''')"); 1360 | int i = 0; 1361 | while (q[3] == '\'') { 1362 | i++; 1363 | if (i >= 3) 1364 | return e_syntax(ctx, *pos, "too many ''' in triple-s-quote"); 1365 | q++; 1366 | } 1367 | break; 1368 | } 1369 | set_token(ctx, MSTRING, *pos, orig, q + 3 - orig); 1370 | return 0; 1371 | } 1372 | 1373 | // Multiline. 1374 | if (strncmp(p, "\"\"\"", 3) == 0) { 1375 | char* q = p + 3; 1376 | pos->col += 3; 1377 | while (true) { 1378 | q = strstr(q, "\"\"\""); 1379 | if (q == 0) 1380 | return e_syntax(ctx, *pos, "unterminated triple quote (\"\"\")"); 1381 | if (q[-1] == '\\') { 1382 | q++; 1383 | continue; 1384 | } 1385 | int i = 0; 1386 | while (q[3] == '\"') { 1387 | i++; 1388 | if (i >= 3) 1389 | return e_syntax(ctx, *pos, "too many \"\"\" in triple-d-quote"); 1390 | q++; 1391 | } 1392 | break; 1393 | } 1394 | 1395 | /// the string is [p+3, q-1] 1396 | int hexreq = 0; /// #hex required 1397 | bool escape = false; 1398 | for (p += 3; p < q; p++) { 1399 | if (escape) { 1400 | escape = false; 1401 | if (strchr("btnfre\"\\", *p)) 1402 | continue; 1403 | if (*p == 'x') { 1404 | hexreq = 2; 1405 | continue; 1406 | } 1407 | if (*p == 'u') { 1408 | hexreq = 4; 1409 | continue; 1410 | } 1411 | if (*p == 'U') { 1412 | hexreq = 8; 1413 | continue; 1414 | } 1415 | if (p[strspn(p, " \t\r")] == '\n') 1416 | continue; // allow for line ending backslash 1417 | return e_syntax(ctx, *pos, "bad escape char"); 1418 | } 1419 | if (hexreq) { 1420 | hexreq--; 1421 | if (strchr("0123456789ABCDEFabcdef", *p)) 1422 | continue; 1423 | return e_syntax(ctx, *pos, "expected hex char"); 1424 | } 1425 | if (*p == '\\') { 1426 | escape = true; 1427 | continue; 1428 | } 1429 | } 1430 | if (escape) // TODO: unreachable, I think? 1431 | return e_syntax(ctx, *pos, "expected an escape char"); 1432 | if (hexreq) 1433 | return e_syntax(ctx, *pos, "expected more hex char"); 1434 | 1435 | set_token(ctx, MSTRING, *pos, orig, q + 3 - orig); 1436 | return 0; 1437 | } 1438 | 1439 | // Literal string. 1440 | if (*p == '\'') { 1441 | for (p++; *p && *p != '\n' && *p != '\''; p++) 1442 | pos->col++; 1443 | if (*p != '\'') 1444 | return e_syntax(ctx, *pos, "unterminated quote (')"); 1445 | set_token(ctx, STRING, *pos, orig, p + 1 - orig); 1446 | return 0; 1447 | } 1448 | 1449 | // Basic String. 1450 | if (*p == '\"') { 1451 | int hexreq = 0; /// #hex required 1452 | bool escape = false; 1453 | for (p++; *p; p++) { 1454 | pos->col++; 1455 | if (escape) { 1456 | escape = false; 1457 | if (strchr("btnfre\"\\", *p)) 1458 | continue; 1459 | if (*p == 'x') { 1460 | hexreq = 2; 1461 | continue; 1462 | } 1463 | if (*p == 'u') { 1464 | hexreq = 4; 1465 | continue; 1466 | } 1467 | if (*p == 'U') { 1468 | hexreq = 8; 1469 | continue; 1470 | } 1471 | return e_syntax(ctx, *pos, "bad escape char"); 1472 | } 1473 | if (hexreq) { 1474 | hexreq--; 1475 | if (strchr("0123456789ABCDEFabcdef", *p)) 1476 | continue; 1477 | return e_syntax(ctx, *pos, "expected hex char"); 1478 | } 1479 | if (*p == '\\') { 1480 | escape = true; 1481 | continue; 1482 | } 1483 | if (*p == '\n') 1484 | break; 1485 | if (*p == '"') 1486 | break; 1487 | } 1488 | if (*p != '"') 1489 | return e_syntax(ctx, *pos, "unterminated quote (\")"); 1490 | 1491 | set_token(ctx, STRING, *pos, orig, p + 1 - orig); 1492 | return 0; 1493 | } 1494 | 1495 | // Time 1496 | if (!dotisspecial && scan_time(p, 0, 0, 0)) { 1497 | p += strspn(p, "0123456789:"); /// forward thru the time. 1498 | if (p[0] == '.') { /// Subseconds 1499 | int n = strspn(++p, "0123456789"); 1500 | if (n == 0) 1501 | return e_syntax(ctx, *pos, "extra chars after '.'"); 1502 | p += n; 1503 | } 1504 | for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string 1505 | ; 1506 | set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize 1507 | return 0; 1508 | } 1509 | 1510 | // Datetime 1511 | if (!dotisspecial && scan_date(p, 0, 0, 0)) { 1512 | p += strspn(p, "0123456789-"); /// forward thru the date 1513 | if (p[0] == ' ' || p[0] == 't' || p[0] == 'T') { /// forward thru the time 1514 | p++; 1515 | p += strspn(p, "0123456789:"); 1516 | if (p[0] == '.') { /// Subseconds 1517 | int n = strspn(++p, "0123456789"); 1518 | if (n == 0) 1519 | return e_syntax(ctx, *pos, "extra chars after '.'"); 1520 | p += n; 1521 | } 1522 | } 1523 | 1524 | // Offset 1525 | if (p[0] == 'Z' || p[0] == 'z') { 1526 | p++; 1527 | } else if (p[0] == '+' || p[0] == '-') { 1528 | if (!scan_offset(p, 0)) 1529 | return e_syntax(ctx, *pos, "invalid offset"); 1530 | p += 6; 1531 | } 1532 | 1533 | for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string 1534 | ; 1535 | set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize 1536 | return 0; 1537 | } 1538 | 1539 | // Literals 1540 | for (; *p && *p != '\n'; p++) { 1541 | int ch = *p; 1542 | if (ch == '.' && dotisspecial) 1543 | break; 1544 | if ('A' <= ch && ch <= 'Z') 1545 | continue; 1546 | if ('a' <= ch && ch <= 'z') 1547 | continue; 1548 | if (strchr("0123456789+-_.", ch)) 1549 | continue; 1550 | break; 1551 | } 1552 | 1553 | set_token(ctx, STRING, *pos, orig, p - orig); 1554 | return 0; 1555 | } 1556 | 1557 | static int next_token(context_t* ctx, bool dotisspecial) { 1558 | // Eat this tok. 1559 | char* p = ctx->tok.ptr; 1560 | toml_pos_t pos = ctx->tok.pos; 1561 | for (int i = 0; i < ctx->tok.len; i++) { 1562 | pos.col++; 1563 | if (*p++ == '\n') { 1564 | pos.line++; 1565 | pos.col = 1; 1566 | } 1567 | } 1568 | 1569 | /// Make next tok 1570 | while (p < ctx->stop) { 1571 | if (*p == '#') { /// Skip comment. stop just before the \n. 1572 | for (p++; p < ctx->stop && *p != '\n'; p++) { 1573 | pos.col++; 1574 | if ((*p != '\t' && *p != '\r' && *p != '\n') && ((*p >= 0x00 && *p <= 0x1f) || *p == 0x7f)) 1575 | return e_syntax(ctx, pos, "invalid control character"); 1576 | if (*p == '\r' && p < ctx->stop + 1 && *(p + 1) != '\n') 1577 | return e_syntax(ctx, pos, "invalid control character"); 1578 | } 1579 | continue; 1580 | } 1581 | 1582 | if (dotisspecial && *p == '.') { 1583 | set_token(ctx, DOT, pos, p, 1); 1584 | return 0; 1585 | } 1586 | 1587 | switch (*p) { 1588 | case ',': set_token(ctx, COMMA, pos, p, 1); return 0; 1589 | case '=': set_token(ctx, EQUAL, pos, p, 1); return 0; 1590 | case '{': set_token(ctx, LBRACE, pos, p, 1); return 0; 1591 | case '}': set_token(ctx, RBRACE, pos, p, 1); return 0; 1592 | case '[': set_token(ctx, LBRACKET, pos, p, 1); return 0; 1593 | case ']': set_token(ctx, RBRACKET, pos, p, 1); return 0; 1594 | case '\n': set_token(ctx, NEWLINE, pos, p, 1); return 0; 1595 | case '\r': 1596 | case ' ': 1597 | case '\t': /// ignore white spaces 1598 | p++; 1599 | pos.col++; 1600 | continue; 1601 | } 1602 | 1603 | return scan_string(ctx, p, &pos, dotisspecial); 1604 | } 1605 | 1606 | set_eof(ctx, pos); 1607 | return 0; 1608 | } 1609 | 1610 | const char* toml_table_key(const toml_table_t* tbl, int keyidx, int* keylen) { 1611 | if (keyidx < tbl->nkval) { 1612 | *keylen = tbl->kval[keyidx]->keylen; 1613 | return tbl->kval[keyidx]->key; 1614 | } 1615 | if ((keyidx -= tbl->nkval) < tbl->narr) { 1616 | *keylen = tbl->arr[keyidx]->keylen; 1617 | return tbl->arr[keyidx]->key; 1618 | } 1619 | if ((keyidx -= tbl->narr) < tbl->ntbl) { 1620 | *keylen = tbl->tbl[keyidx]->keylen; 1621 | return tbl->tbl[keyidx]->key; 1622 | } 1623 | *keylen = 0; 1624 | return 0; 1625 | } 1626 | 1627 | toml_unparsed_t toml_table_unparsed(const toml_table_t* tbl, const char* key) { 1628 | for (int i = 0; i < tbl->nkval; i++) 1629 | if (strcmp(key, tbl->kval[i]->key) == 0) 1630 | return tbl->kval[i]->val; 1631 | return 0; 1632 | } 1633 | 1634 | toml_array_t* toml_table_array(const toml_table_t* tbl, const char* key) { 1635 | for (int i = 0; i < tbl->narr; i++) 1636 | if (strcmp(key, tbl->arr[i]->key) == 0) 1637 | return tbl->arr[i]; 1638 | return 0; 1639 | } 1640 | 1641 | toml_table_t* toml_table_table(const toml_table_t* tbl, const char* key) { 1642 | for (int i = 0; i < tbl->ntbl; i++) 1643 | if (strcmp(key, tbl->tbl[i]->key) == 0) 1644 | return tbl->tbl[i]; 1645 | return 0; 1646 | } 1647 | 1648 | toml_unparsed_t toml_array_unparsed(const toml_array_t* arr, int idx) { 1649 | return (0 <= idx && idx < arr->nitem) ? arr->item[idx].val : 0; 1650 | } 1651 | 1652 | int toml_table_len(const toml_table_t* tbl) { 1653 | return tbl->nkval + tbl->narr + tbl->ntbl; 1654 | } 1655 | 1656 | int toml_array_len(const toml_array_t* arr) { 1657 | return arr->nitem; 1658 | } 1659 | 1660 | toml_array_t* toml_array_array(const toml_array_t* arr, int idx) { 1661 | return (0 <= idx && idx < arr->nitem) ? arr->item[idx].arr : 0; 1662 | } 1663 | 1664 | toml_table_t* toml_array_table(const toml_array_t* arr, int idx) { 1665 | return (0 <= idx && idx < arr->nitem) ? arr->item[idx].tbl : 0; 1666 | } 1667 | 1668 | bool is_leap(int y) { 1669 | return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); 1670 | } 1671 | 1672 | int toml_value_timestamp(toml_unparsed_t src_, toml_timestamp_t* ret) { 1673 | if (!src_) 1674 | return -1; 1675 | 1676 | const char* p = src_; 1677 | bool must_parse_time = false; 1678 | 1679 | memset(ret, 0, sizeof(*ret)); 1680 | 1681 | /// YYYY-MM-DD 1682 | if (scan_date(p, &ret->year, &ret->month, &ret->day)) { 1683 | if (ret->month < 1 || ret->day < 1 || ret->month > 12 || ret->day > 31) 1684 | return -1; 1685 | if (ret->month == 2 && ret->day > (is_leap(ret->year) ? 29 : 28)) 1686 | return -1; 1687 | ret->kind = 'D'; 1688 | 1689 | p += 10; 1690 | if (*p) { 1691 | if (*p != 'T' && *p != 't' && *p != ' ') /// T or space 1692 | return -1; 1693 | must_parse_time = true; 1694 | p++; 1695 | } 1696 | } 1697 | 1698 | /// HH:MM:SS 1699 | if (scan_time(p, &ret->hour, &ret->minute, &ret->second)) { 1700 | if (ret->minute < 0 || ret->hour < 0 || ret->hour > 23 || ret->minute > 59 || ret->second > 60) 1701 | return -1; 1702 | p += (ret->second == -1 ? 5 : 8); 1703 | ret->kind = (ret->kind == 'D' ? 'l' : 't'); 1704 | if (ret->second == -1) 1705 | ret->second = 0; 1706 | 1707 | if (*p == '.') { /// optionally, parse millisec 1708 | p++; /// skip '.' 1709 | const char* qq; 1710 | ret->millisec = parse_millisec(p, &qq); 1711 | p = qq; 1712 | } 1713 | 1714 | if (*p) { /// parse and copy Z 1715 | ret->kind = 'd'; 1716 | if (*p == 'Z' || *p == 'z') 1717 | p++; 1718 | else if (*p == '+' || *p == '-') { 1719 | if (!scan_offset(p, &ret->tz)) 1720 | return -1; 1721 | p += 6; 1722 | } 1723 | } 1724 | } 1725 | if (*p != 0) 1726 | return -1; 1727 | if (must_parse_time && ret->kind == 'D') 1728 | return -1; 1729 | return 0; 1730 | } 1731 | 1732 | // Raw to boolean 1733 | int toml_value_bool(toml_unparsed_t src, bool* ret_) { 1734 | if (!src) 1735 | return -1; 1736 | bool dummy = false; 1737 | bool* ret = ret_ ? ret_ : &dummy; 1738 | 1739 | if (strcmp(src, "true") == 0) { 1740 | *ret = true; 1741 | return 0; 1742 | } 1743 | if (strcmp(src, "false") == 0) { 1744 | *ret = false; 1745 | return 0; 1746 | } 1747 | return -1; 1748 | } 1749 | 1750 | // Raw to integer 1751 | int toml_value_int(toml_unparsed_t src, int64_t* ret_) { 1752 | if (!src) 1753 | return -1; 1754 | 1755 | char buf[100]; 1756 | char* p = buf; 1757 | char* q = p + sizeof(buf); 1758 | const char* s = src; 1759 | int64_t dummy = 0; 1760 | int64_t* ret = ret_ ? ret_ : &dummy; 1761 | bool have_sign = false; 1762 | 1763 | if (s[0] == '+' || s[0] == '-') { /// allow +/- 1764 | have_sign = true; 1765 | *p++ = *s++; 1766 | } 1767 | 1768 | if (s[0] == '_') /// disallow +_100 1769 | return -1; 1770 | 1771 | int base = 0; 1772 | if (s[0] == '0') { /// if 0* ... 1773 | switch (s[1]) { 1774 | case 'x': 1775 | base = 16; 1776 | s += 2; 1777 | break; 1778 | case 'o': 1779 | base = 8; 1780 | s += 2; 1781 | break; 1782 | case 'b': 1783 | base = 2; 1784 | s += 2; 1785 | break; 1786 | case '\0': return *ret = 0, 0; 1787 | default: 1788 | if (s[1]) /// ensure no other digits after it 1789 | return -1; 1790 | } 1791 | if (!*s) 1792 | return -1; 1793 | if (have_sign) /// disallow +0xff, -0xff 1794 | return -1; 1795 | if (s[0] == '_') /// disallow 0x_, 0o_, 0b_ 1796 | return -1; 1797 | if (s[0] == '+' || s[0] == '-') /// disallow 0x+10, 0x-10 1798 | return -1; 1799 | } 1800 | 1801 | while (*s && p < q) { /// just strip underscores and pass to strtoll 1802 | int ch = *s++; 1803 | if (ch == '_') { 1804 | if (s[0] == '_') /// disallow '__' 1805 | return -1; 1806 | if (s[0] == '\0') /// numbers cannot end with '_' 1807 | return -1; 1808 | continue; /// skip _ 1809 | } 1810 | *p++ = ch; 1811 | } 1812 | 1813 | if (*s || p == q) /// if not at end-of-string or we ran out of buffer ... 1814 | return -1; 1815 | 1816 | *p = 0; /// cap with NUL 1817 | 1818 | /// Run strtoll on buf to get the integer 1819 | char* endp; 1820 | errno = 0; 1821 | *ret = strtoll(buf, &endp, base); 1822 | return (errno || *endp) ? -1 : 0; 1823 | } 1824 | 1825 | int toml_value_double(toml_unparsed_t src, double* ret_) { 1826 | if (!src) 1827 | return -1; 1828 | 1829 | char buf[100]; 1830 | char* p = buf; 1831 | char* q = p + sizeof(buf); 1832 | const char* s = src; 1833 | double dummy = 0.0; 1834 | double* ret = ret_ ? ret_ : &dummy; 1835 | 1836 | if (s[0] == '+' || s[0] == '-') /// allow +/- 1837 | *p++ = *s++; 1838 | 1839 | if (s[0] == '_') /// disallow +_1.00 1840 | return -1; 1841 | 1842 | { /// decimal point, if used, must be surrounded by at least one digit on each side 1843 | char* dot = strchr(s, '.'); 1844 | if (dot) { 1845 | if (dot == s || !isdigit(dot[-1]) || !isdigit(dot[1])) 1846 | return -1; 1847 | } 1848 | } 1849 | 1850 | /// zero must be followed by . or 'e', or NUL 1851 | if (s[0] == '0' && s[1] && !strchr("eE.", s[1])) 1852 | return -1; 1853 | 1854 | /// Just strip underscores and pass to strtod 1855 | bool have_us = false; 1856 | while (*s && p < q) { 1857 | int ch = *s++; 1858 | if (ch == '_') { 1859 | have_us = true; 1860 | if (s[0] == '_') /// disallow '__' 1861 | return -1; 1862 | if (s[0] == 'e') /// disallow _e 1863 | return -1; 1864 | if (s[0] == 0) /// disallow last char '_' 1865 | return -1; 1866 | continue; /// skip _ 1867 | } 1868 | if (ch == 'I' || ch == 'N' || ch == 'F' || ch == 'A') /// inf and nan are case-sensitive. 1869 | return -1; 1870 | if (ch == 'e' && s[0] == '_') /// disallow e_ 1871 | return -1; 1872 | *p++ = ch; 1873 | } 1874 | if (*s || p == q) 1875 | return -1; /// reached end of string or buffer is full? 1876 | 1877 | *p = 0; /// cap with NUL 1878 | 1879 | /// Run strtod on buf to get the value 1880 | char* endp; 1881 | errno = 0; 1882 | *ret = strtod(buf, &endp); 1883 | if (errno || *endp) 1884 | return -1; 1885 | if (have_us && (isnan(*ret) || isinf(*ret))) 1886 | return -1; 1887 | return 0; 1888 | } 1889 | 1890 | int toml_value_string(toml_unparsed_t src, char** ret, int* len) { 1891 | bool multiline = false; 1892 | const char* sp; 1893 | const char* sq; 1894 | 1895 | *ret = 0; 1896 | if (!src) 1897 | return -1; 1898 | 1899 | /// First char must be a s-quote or d-quote 1900 | int qchar = src[0]; 1901 | int srclen = strlen(src); 1902 | if (!(qchar == '\'' || qchar == '"')) { 1903 | return -1; 1904 | } 1905 | 1906 | /// triple quotes? 1907 | if (qchar == src[1] && qchar == src[2]) { 1908 | multiline = true; /// triple-quote implies multiline 1909 | sp = src + 3; /// first char after quote 1910 | sq = src + srclen - 3; /// first char of ending quote 1911 | 1912 | if (!(sp <= sq && sq[0] == qchar && sq[1] == qchar && sq[2] == qchar)) 1913 | return -1; /// last 3 chars in src must be qchar 1914 | 1915 | if (sp[0] == '\n') /// skip new line immediate after qchar 1916 | sp++; 1917 | else if (sp[0] == '\r' && sp[1] == '\n') 1918 | sp += 2; 1919 | } else { 1920 | sp = src + 1; /// first char after quote 1921 | sq = src + srclen - 1; /// ending quote 1922 | if (!(sp <= sq && *sq == qchar)) /// last char in src must be qchar 1923 | return -1; 1924 | } 1925 | 1926 | /// at this point: 1927 | /// sp points to first valid char after quote. 1928 | /// sq points to one char beyond last valid char. 1929 | /// string len is (sq - sp). 1930 | if (qchar == '\'') 1931 | *ret = norm_lit_str(sp, sq - sp, len, multiline, 0, 0); 1932 | else 1933 | *ret = norm_basic_str(sp, sq - sp, len, multiline, 0, 0); 1934 | return *ret ? 0 : -1; 1935 | } 1936 | 1937 | toml_value_t toml_array_string(const toml_array_t* arr, int idx) { 1938 | toml_value_t ret; 1939 | memset(&ret, 0, sizeof(ret)); 1940 | ret.ok = (toml_value_string(toml_array_unparsed(arr, idx), &ret.u.s, &ret.u.sl) == 0); 1941 | return ret; 1942 | } 1943 | 1944 | toml_value_t toml_array_bool(const toml_array_t* arr, int idx) { 1945 | toml_value_t ret; 1946 | memset(&ret, 0, sizeof(ret)); 1947 | ret.ok = (toml_value_bool(toml_array_unparsed(arr, idx), &ret.u.b) == 0); 1948 | return ret; 1949 | } 1950 | 1951 | toml_value_t toml_array_int(const toml_array_t* arr, int idx) { 1952 | toml_value_t ret; 1953 | memset(&ret, 0, sizeof(ret)); 1954 | ret.ok = (toml_value_int(toml_array_unparsed(arr, idx), &ret.u.i) == 0); 1955 | return ret; 1956 | } 1957 | 1958 | toml_value_t toml_array_double(const toml_array_t* arr, int idx) { 1959 | toml_value_t ret; 1960 | memset(&ret, 0, sizeof(ret)); 1961 | ret.ok = (toml_value_double(toml_array_unparsed(arr, idx), &ret.u.d) == 0); 1962 | return ret; 1963 | } 1964 | 1965 | toml_value_t toml_array_timestamp(const toml_array_t* arr, int idx) { 1966 | toml_value_t ret; 1967 | memset(&ret, 0, sizeof(ret)); 1968 | ret.ok = (toml_value_timestamp(toml_array_unparsed(arr, idx), &ret.u.ts) == 0); 1969 | return ret; 1970 | } 1971 | 1972 | toml_value_t toml_table_string(const toml_table_t* tbl, const char* key) { 1973 | toml_value_t ret; 1974 | memset(&ret, 0, sizeof(ret)); 1975 | toml_unparsed_t raw = toml_table_unparsed(tbl, key); 1976 | if (raw) 1977 | ret.ok = (toml_value_string(raw, &ret.u.s, &ret.u.sl) == 0); 1978 | return ret; 1979 | } 1980 | 1981 | toml_value_t toml_table_bool(const toml_table_t* tbl, const char* key) { 1982 | toml_value_t ret; 1983 | memset(&ret, 0, sizeof(ret)); 1984 | ret.ok = (toml_value_bool(toml_table_unparsed(tbl, key), &ret.u.b) == 0); 1985 | return ret; 1986 | } 1987 | 1988 | toml_value_t toml_table_int(const toml_table_t* tbl, const char* key) { 1989 | toml_value_t ret; 1990 | memset(&ret, 0, sizeof(ret)); 1991 | ret.ok = (toml_value_int(toml_table_unparsed(tbl, key), &ret.u.i) == 0); 1992 | return ret; 1993 | } 1994 | 1995 | toml_value_t toml_table_double(const toml_table_t* tbl, const char* key) { 1996 | toml_value_t ret; 1997 | memset(&ret, 0, sizeof(ret)); 1998 | ret.ok = (toml_value_double(toml_table_unparsed(tbl, key), &ret.u.d) == 0); 1999 | return ret; 2000 | } 2001 | 2002 | toml_value_t toml_table_timestamp(const toml_table_t* tbl, const char* key) { 2003 | toml_value_t ret; 2004 | memset(&ret, 0, sizeof(ret)); 2005 | ret.ok = (toml_value_timestamp(toml_table_unparsed(tbl, key), &ret.u.ts) == 0); 2006 | return ret; 2007 | } 2008 | -------------------------------------------------------------------------------- /header/toml-c.h: -------------------------------------------------------------------------------- 1 | #ifndef TOML_H 2 | #define TOML_H 3 | #ifndef _POSIX_C_SOURCE 4 | #define _POSIX_C_SOURCE 200809L 5 | #endif 6 | #ifdef _MSC_VER 7 | # pragma warning(disable : 4996) 8 | #endif 9 | #ifdef __cplusplus 10 | # define TOML_EXTERN extern "C" 11 | #else 12 | # define TOML_EXTERN extern 13 | #endif 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | typedef struct toml_table_t toml_table_t; 20 | typedef struct toml_array_t toml_array_t; 21 | typedef struct toml_value_t toml_value_t; 22 | typedef struct toml_timestamp_t toml_timestamp_t; 23 | typedef struct toml_keyval_t toml_keyval_t; 24 | typedef struct toml_arritem_t toml_arritem_t; 25 | typedef struct toml_pos_t toml_pos_t; 26 | 27 | // TOML table. 28 | struct toml_table_t { 29 | const char* key; // Key for this table 30 | int keylen; // length of key. 31 | bool implicit; // Table was created implicitly 32 | bool readonly; // No more modification allowed 33 | 34 | int nkval; // key-values in the table 35 | toml_keyval_t** kval; 36 | int narr; // arrays in the table 37 | toml_array_t** arr; 38 | int ntbl; // tables in the table 39 | toml_table_t** tbl; 40 | }; 41 | 42 | // TOML array. 43 | struct toml_array_t { 44 | const char* key; // key to this array 45 | int keylen; // length of key. 46 | int kind; // element kind: 'v'alue, 'a'rray, or 't'able, 'm'ixed 47 | int type; // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp, 'm'ixed 48 | int nitem; // number of elements 49 | toml_arritem_t* item; 50 | }; 51 | struct toml_arritem_t { 52 | int valtype; // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp 53 | char* val; 54 | toml_array_t* arr; 55 | toml_table_t* tbl; 56 | }; 57 | 58 | // TOML key/value pair. 59 | struct toml_keyval_t { 60 | const char* key; // key to this value 61 | int keylen; // length of key. 62 | const char* val; // the raw value 63 | }; 64 | 65 | // Token position. 66 | struct toml_pos_t { 67 | int line; 68 | int col; 69 | }; 70 | 71 | // Timestamp type; some values may be empty depending on the value of kind. 72 | struct toml_timestamp_t { 73 | // datetime type: 74 | // 75 | // 'd'atetime Full date + time + TZ 76 | // 'l'local-datetime Full date + time but without TZ 77 | // 'D'ate-local Date only, without TZ 78 | // 't'ime-local Time only, without TZ 79 | char kind; 80 | 81 | int year, month, day; 82 | int hour, minute, second, millisec; 83 | int tz; // Timezone offset in minutes 84 | }; 85 | 86 | // Parsed TOML value. 87 | // 88 | // The string value s is a regular NULL-terminated C string, but the string 89 | // length is also given in sl since TOML values may contain NULL bytes. The 90 | // value is guaranteed to be correct UTF-8. 91 | struct toml_value_t { 92 | bool ok; // Was this value present? 93 | union { 94 | struct { 95 | char* s; // string value; must be freed after use. 96 | int sl; // string length, excluding NULL. 97 | }; 98 | toml_timestamp_t ts; // datetime 99 | bool b; // bool 100 | int64_t i; // int 101 | double d; // double 102 | } u; 103 | }; 104 | 105 | // toml_parse() parses a TOML document from a string. Returns 0 on error, with 106 | // the error message stored in errbuf. 107 | // 108 | // toml_parse_file() is identical, but reads from a file descriptor. 109 | // 110 | // Use toml_free() to free the return value; this will invalidate all handles 111 | // for this table. 112 | TOML_EXTERN toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz); 113 | TOML_EXTERN toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz); 114 | TOML_EXTERN void toml_free(toml_table_t* table); 115 | 116 | // Table functions. 117 | // 118 | // toml_table_len() gets the number of direct keys for this table; 119 | // toml_table_key() gets the nth direct key in this table. 120 | TOML_EXTERN int toml_table_len(const toml_table_t* table); 121 | TOML_EXTERN const char* toml_table_key(const toml_table_t* table, int keyidx, int* keylen); 122 | TOML_EXTERN toml_value_t toml_table_string(const toml_table_t* table, const char* key); 123 | TOML_EXTERN toml_value_t toml_table_bool(const toml_table_t* table, const char* key); 124 | TOML_EXTERN toml_value_t toml_table_int(const toml_table_t* table, const char* key); 125 | TOML_EXTERN toml_value_t toml_table_double(const toml_table_t* table, const char* key); 126 | TOML_EXTERN toml_value_t toml_table_timestamp(const toml_table_t* table, const char* key); 127 | TOML_EXTERN toml_array_t* toml_table_array(const toml_table_t* table, const char* key); 128 | TOML_EXTERN toml_table_t* toml_table_table(const toml_table_t* table, const char* key); 129 | 130 | // Array functions. 131 | TOML_EXTERN int toml_array_len(const toml_array_t* array); 132 | TOML_EXTERN toml_value_t toml_array_string(const toml_array_t* array, int idx); 133 | TOML_EXTERN toml_value_t toml_array_bool(const toml_array_t* array, int idx); 134 | TOML_EXTERN toml_value_t toml_array_int(const toml_array_t* array, int idx); 135 | TOML_EXTERN toml_value_t toml_array_double(const toml_array_t* array, int idx); 136 | TOML_EXTERN toml_value_t toml_array_timestamp(const toml_array_t* array, int idx); 137 | TOML_EXTERN toml_array_t* toml_array_array(const toml_array_t* array, int idx); 138 | TOML_EXTERN toml_table_t* toml_array_table(const toml_array_t* array, int idx); 139 | 140 | #include 141 | #include 142 | #include 143 | #include 144 | #include 145 | #include 146 | #include 147 | #include 148 | #include 149 | 150 | 151 | #define ALIGN8(sz) (((sz) + 7) & ~7) 152 | #define calloc(x, y) error - forbidden - use CALLOC instead 153 | static void* CALLOC(size_t nmemb, size_t sz) { 154 | int nb = ALIGN8(sz) * nmemb; 155 | void* p = malloc(nb); 156 | if (p) { 157 | memset(p, 0, nb); 158 | } 159 | return p; 160 | } 161 | 162 | // some old platforms define strdup macro -- drop it. 163 | #undef strdup 164 | #define strdup(x) error - forbidden - use STRDUP instead 165 | static char* STRDUP(const char* s) { 166 | int len = strlen(s); 167 | char* p = malloc(len + 1); 168 | if (p) { 169 | memcpy(p, s, len); 170 | p[len] = 0; 171 | } 172 | return p; 173 | } 174 | 175 | // some old platforms define strndup macro -- drop it. 176 | #undef strndup 177 | #define strndup(x) error - forbidden - use STRNDUP instead 178 | static char* STRNDUP(const char* s, size_t n) { 179 | size_t len = strnlen(s, n); 180 | char* p = malloc(len + 1); 181 | if (p) { 182 | memcpy(p, s, len); 183 | p[len] = 0; 184 | } 185 | return p; 186 | } 187 | 188 | // Unparsed values. 189 | typedef const char* toml_unparsed_t; 190 | toml_unparsed_t toml_table_unparsed(const toml_table_t* table, const char* key); 191 | toml_unparsed_t toml_array_unparsed(const toml_array_t* array, int idx); 192 | int toml_value_string(toml_unparsed_t s, char** ret, int* len); 193 | int toml_value_bool(toml_unparsed_t s, bool* ret); 194 | int toml_value_int(toml_unparsed_t s, int64_t* ret); 195 | int toml_value_double(toml_unparsed_t s, double* ret); 196 | int toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret); 197 | 198 | // Convert escape to UTF-8; return #bytes used in buf to encode the char, or -1 199 | // on error. 200 | // http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16 201 | int read_unicode_escape(uint64_t code, char buf[6]) { 202 | if (0xd800 <= code && code <= 0xdfff) /// UTF-16 surrogates 203 | return -1; 204 | if (0x10FFFF < code) 205 | return -1; 206 | if (code <= 0x7F) { /// 0x00000000 - 0x0000007F: 0xxxxxxx 207 | buf[0] = (unsigned char)code; 208 | return 1; 209 | } 210 | if (code <= 0x000007FF) { /// 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx 211 | buf[0] = (unsigned char)(0xc0 | (code >> 6)); 212 | buf[1] = (unsigned char)(0x80 | (code & 0x3f)); 213 | return 2; 214 | } 215 | if (code <= 0x0000FFFF) { /// 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 216 | buf[0] = (unsigned char)(0xe0 | (code >> 12)); 217 | buf[1] = (unsigned char)(0x80 | ((code >> 6) & 0x3f)); 218 | buf[2] = (unsigned char)(0x80 | (code & 0x3f)); 219 | return 3; 220 | } 221 | if (code <= 0x001FFFFF) { /// 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 222 | buf[0] = (unsigned char)(0xf0 | (code >> 18)); 223 | buf[1] = (unsigned char)(0x80 | ((code >> 12) & 0x3f)); 224 | buf[2] = (unsigned char)(0x80 | ((code >> 6) & 0x3f)); 225 | buf[3] = (unsigned char)(0x80 | (code & 0x3f)); 226 | return 4; 227 | } 228 | return -1; 229 | } 230 | 231 | static inline void xfree(const void* x) { 232 | if (x) 233 | free((void*)(intptr_t)x); 234 | } 235 | 236 | enum tokentype_t { INVALID, DOT, COMMA, EQUAL, LBRACE, RBRACE, NEWLINE, LBRACKET, RBRACKET, STRING, MSTRING }; 237 | typedef enum tokentype_t tokentype_t; 238 | 239 | typedef struct token_t token_t; 240 | struct token_t { 241 | tokentype_t tok; 242 | toml_pos_t pos; 243 | char* ptr; // points into context->start 244 | int len; 245 | int eof; 246 | }; 247 | 248 | typedef struct context_t context_t; 249 | struct context_t { 250 | char* start; 251 | char* stop; 252 | char* errbuf; 253 | int errbufsz; 254 | 255 | token_t tok; 256 | toml_table_t* root; 257 | toml_table_t* curtbl; 258 | 259 | struct { 260 | int top; 261 | char* key[10]; 262 | int keylen[10]; 263 | token_t tok[10]; 264 | } tpath; 265 | }; 266 | 267 | #define STRINGIFY(x) #x 268 | #define TOSTRING(x) STRINGIFY(x) 269 | #define FLINE __FILE__ ":" TOSTRING(__LINE__) 270 | 271 | static int next_token(context_t* ctx, bool dotisspecial); 272 | 273 | // Error reporting. Call when an error is detected. Always return -1. 274 | static int e_outofmemory(context_t* ctx, const char* fline) { 275 | snprintf(ctx->errbuf, ctx->errbufsz, "ERROR: out of memory (%s)", fline); 276 | return -1; 277 | } 278 | 279 | static int e_internal(context_t* ctx, const char* fline) { 280 | snprintf(ctx->errbuf, ctx->errbufsz, "internal error (%s)", fline); 281 | return -1; 282 | } 283 | 284 | static int e_syntax(context_t* ctx, toml_pos_t pos, const char* msg) { 285 | snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: %s", pos.line, pos.col, msg); 286 | return -1; 287 | } 288 | 289 | static int e_keyexists(context_t* ctx, toml_pos_t pos) { 290 | snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: key already defined", pos.line, pos.col); 291 | return -1; 292 | } 293 | 294 | static void* expand(void* p, int sz, int newsz) { 295 | void* s = malloc(newsz); 296 | if (!s) 297 | return 0; 298 | 299 | if (p) { 300 | memcpy(s, p, sz); 301 | free(p); 302 | } 303 | return s; 304 | } 305 | 306 | static void** expand_ptrarr(void** p, int n) { 307 | void** s = malloc((n + 1) * sizeof(void*)); 308 | if (!s) 309 | return 0; 310 | 311 | s[n] = 0; 312 | if (p) { 313 | memcpy(s, p, n * sizeof(void*)); 314 | free(p); 315 | } 316 | return s; 317 | } 318 | 319 | static toml_arritem_t* expand_arritem(toml_arritem_t* p, int n) { 320 | toml_arritem_t* pp = expand(p, n * sizeof(*p), (n + 1) * sizeof(*p)); 321 | if (!pp) 322 | return 0; 323 | 324 | memset(&pp[n], 0, sizeof(pp[n])); 325 | return pp; 326 | } 327 | 328 | static uint8_t const u8_length[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4}; 329 | #define u8length(s) u8_length[(((uint8_t*)(s))[0] & 0xFF) >> 4]; 330 | 331 | static char* norm_lit_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) { 332 | const char* sp = src; 333 | const char* sq = src + srclen; 334 | char* dst = 0; /// will write to dst[] and return it 335 | int max = 0; /// max size of dst[] 336 | int off = 0; /// cur offset in dst[] 337 | 338 | for (;;) { /// scan forward on src 339 | if (off >= max - 10) { /// have some slack for misc stuff 340 | int newmax = max + 50; 341 | char* x = expand(dst, max, newmax); 342 | if (!x) { 343 | xfree(dst); 344 | snprintf(errbuf, errbufsz, "out of memory"); 345 | return 0; 346 | } 347 | dst = x; 348 | max = newmax; 349 | } 350 | 351 | if (sp >= sq) /// finished? 352 | break; 353 | 354 | uint8_t l = u8length(sp); 355 | if (l == 0) { 356 | xfree(dst); 357 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 358 | return 0; 359 | } 360 | if (l > 1) { 361 | for (int i = 0; i < l; i++) { 362 | char ch = *sp++; 363 | if ((ch & 0x80) != 0x80) { 364 | xfree(dst); 365 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 366 | return 0; 367 | } 368 | dst[off++] = ch; 369 | } 370 | continue; 371 | } 372 | 373 | /// control characters other than Tab are not allowed 374 | char ch = *sp++; 375 | if ((0 <= ch && ch <= 0x08) || (0x0a <= ch && ch <= 0x1f) || ch == 0x7f) { 376 | if (!(multiline && (ch == '\r' || ch == '\n'))) { 377 | xfree(dst); 378 | snprintf(errbuf, errbufsz, "invalid char U+%04x", ch); 379 | return 0; 380 | } 381 | } 382 | 383 | dst[off++] = ch; /// a plain copy suffice 384 | } 385 | 386 | *len = off; 387 | dst[off++] = 0; 388 | return dst; 389 | } 390 | 391 | // Convert src to raw unescaped utf-8 string. Returns NULL if error with errmsg 392 | // in errbuf. 393 | static char* norm_basic_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) { 394 | const char* sp = src; 395 | const char* sq = src + srclen; 396 | char* dst = 0; /// will write to dst[] and return it 397 | int max = 0; /// max size of dst[] 398 | int off = 0; /// cur offset in dst[] 399 | 400 | /// scan forward on src 401 | for (;;) { 402 | if (off >= max - 10) { /// have some slack for misc stuff 403 | int newmax = max + 50; 404 | char* x = expand(dst, max, newmax); 405 | if (!x) { 406 | xfree(dst); 407 | snprintf(errbuf, errbufsz, "out of memory"); 408 | return 0; 409 | } 410 | dst = x; 411 | max = newmax; 412 | } 413 | 414 | if (sp >= sq) /// finished? 415 | break; 416 | 417 | uint8_t l = u8length(sp); 418 | if (l == 0) { 419 | xfree(dst); 420 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 421 | return 0; 422 | } 423 | if (l > 1) { 424 | for (int i = 0; i < l; i++) { 425 | char ch = *sp++; 426 | if ((ch & 0x80) != 0x80) { 427 | xfree(dst); 428 | snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); 429 | return 0; 430 | } 431 | dst[off++] = ch; 432 | } 433 | continue; 434 | } 435 | 436 | char ch = *sp++; 437 | if (ch != '\\') { 438 | /// must be escaped: U+0000 to U+0008, U+000A to U+001F, U+007F 439 | if ((ch >= 0 && ch <= 0x08) || (ch >= 0x0a && ch <= 0x1f) || ch == 0x7f) { 440 | if (!(multiline && (ch == '\r' || ch == '\n'))) { 441 | xfree(dst); 442 | snprintf(errbuf, errbufsz, "invalid char U+%04x", ch); 443 | return 0; 444 | } 445 | } 446 | 447 | dst[off++] = ch; /// a plain copy suffice 448 | continue; 449 | } 450 | 451 | // TODO: unreachable, I think? 452 | if (sp >= sq) { /// ch was backslash. we expect the escape char. 453 | snprintf(errbuf, errbufsz, "last backslash is invalid"); 454 | xfree(dst); 455 | return 0; 456 | } 457 | 458 | if (multiline) { /// for multi-line, we want to kill line-ending-backslash. 459 | if (sp[strspn(sp, " \t\r")] == '\n') { /// if there is only whitespace after the backslash ... 460 | sp += strspn(sp, " \t\r\n"); /// skip all the following whitespaces 461 | continue; 462 | } 463 | } 464 | 465 | ch = *sp++; /// get the escaped char 466 | switch (ch) { 467 | case 'x': 468 | case 'u': 469 | case 'U': { 470 | uint64_t ucs = 0; 471 | int nhex = 2; 472 | if (ch == 'u') nhex = 4; 473 | if (ch == 'U') nhex = 8; 474 | for (int i = 0; i < nhex; i++) { 475 | // TODO: unreachable I think, as scan_string() already 476 | // guarantees exactly 4 or 8 hex chars. 477 | if (sp >= sq) { 478 | snprintf(errbuf, errbufsz, "\\%c expected %d hex chars", ch, nhex); 479 | xfree(dst); 480 | return 0; 481 | } 482 | ch = *sp++; 483 | int v = -1; 484 | if ('0' <= ch && ch <= '9') 485 | v = ch - '0'; 486 | else if ('A' <= ch && ch <= 'F') 487 | v = ch - 'A' + 10; 488 | else if ('a' <= ch && ch <= 'f') 489 | v = (ch ^ 0x20) - 'A' + 10; 490 | // TODO: also unrechable, as per above. 491 | if (v == -1) { 492 | snprintf(errbuf, errbufsz, "invalid hex chars for \\u or \\U"); 493 | xfree(dst); 494 | return 0; 495 | } 496 | ucs = ucs * 16 + v; 497 | } 498 | int n = read_unicode_escape(ucs, &dst[off]); 499 | if (n == -1) { 500 | snprintf(errbuf, errbufsz, "illegal ucs code in \\u or \\U"); 501 | xfree(dst); 502 | return 0; 503 | } 504 | off += n; 505 | }; 506 | continue; 507 | case 'b': ch = '\b'; break; 508 | case 't': ch = '\t'; break; 509 | case 'n': ch = '\n'; break; 510 | case 'f': ch = '\f'; break; 511 | case 'r': ch = '\r'; break; 512 | case 'e': ch = 0x1b; break; 513 | case '"': ch = '"'; break; 514 | case '\\': ch = '\\'; break; 515 | default: 516 | // TODO: unrechable, I think, as scan_string() already 517 | // guarantees correct char. 518 | snprintf(errbuf, errbufsz, "illegal escape char \\%c", ch); 519 | xfree(dst); 520 | return 0; 521 | } 522 | 523 | dst[off++] = ch; 524 | } 525 | 526 | *len = off; 527 | dst[off++] = 0; /// Cap with NUL and return it. 528 | return dst; 529 | } 530 | 531 | // Normalize a key. Convert all special chars to raw unescaped utf-8 chars. 532 | static char* normalize_key(context_t* ctx, token_t strtok, int* keylen) { 533 | const char* sp = strtok.ptr; 534 | const char* sq = strtok.ptr + strtok.len; 535 | int ch = *sp; 536 | char* ret; 537 | 538 | // Quoted string 539 | if (ch == '\'' || ch == '\"') { 540 | /// Take " or ' off from and back. 541 | sp++, sq--; 542 | 543 | char ebuf[80]; 544 | if (ch == '\'') 545 | ret = norm_lit_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf)); 546 | else 547 | ret = norm_basic_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf)); 548 | if (!ret) { 549 | e_syntax(ctx, strtok.pos, ebuf); 550 | return 0; 551 | } 552 | return ret; 553 | } 554 | 555 | *keylen = 0; 556 | for (const char* c = sp; c != sq; c++) { /// Bare key: allow: [A-Za-z0-9_-]+ 557 | *keylen = *keylen + 1; 558 | if (isalnum(*c) || *c == '_' || *c == '-') 559 | continue; 560 | // TODO: never triggered? When reading the file it already validates 561 | // this, so seems redundant? Need to double-check. 562 | e_syntax(ctx, ctx->tok.pos, "invalid key"); 563 | return 0; 564 | } 565 | 566 | if (!(ret = STRNDUP(sp, sq - sp))) { /// dup and return 567 | e_outofmemory(ctx, FLINE); 568 | return 0; 569 | } 570 | return ret; 571 | } 572 | 573 | // Look up key in tbl. Return 0 if not found, or 'v'alue, 'a'rray or 't'able 574 | // depending on the element. 575 | static int check_key(toml_table_t* tbl, const char* key, toml_keyval_t** ret_val, toml_array_t** ret_arr, toml_table_t** ret_tbl) { 576 | int i; 577 | void* dummy; 578 | 579 | if (!ret_tbl) 580 | ret_tbl = (toml_table_t**)&dummy; 581 | if (!ret_arr) 582 | ret_arr = (toml_array_t**)&dummy; 583 | if (!ret_val) 584 | ret_val = (toml_keyval_t**)&dummy; 585 | 586 | *ret_tbl = 0; 587 | *ret_arr = 0; 588 | *ret_val = 0; 589 | 590 | for (i = 0; i < tbl->nkval; i++) { 591 | if (strcmp(key, tbl->kval[i]->key) == 0) { 592 | *ret_val = tbl->kval[i]; 593 | return 'v'; 594 | } 595 | } 596 | for (i = 0; i < tbl->narr; i++) { 597 | if (strcmp(key, tbl->arr[i]->key) == 0) { 598 | *ret_arr = tbl->arr[i]; 599 | return 'a'; 600 | } 601 | } 602 | for (i = 0; i < tbl->ntbl; i++) { 603 | if (strcmp(key, tbl->tbl[i]->key) == 0) { 604 | *ret_tbl = tbl->tbl[i]; 605 | return 't'; 606 | } 607 | } 608 | return 0; 609 | } 610 | 611 | static int key_kind(toml_table_t* tbl, const char* key) { 612 | return check_key(tbl, key, 0, 0, 0); 613 | } 614 | 615 | // Create a keyval in the table. 616 | static toml_keyval_t* create_keyval_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) { 617 | int keylen; 618 | char* newkey = normalize_key(ctx, keytok, &keylen); 619 | if (!newkey) 620 | return 0; 621 | 622 | toml_keyval_t* dest = 0; 623 | if (key_kind(tbl, newkey)) { 624 | xfree(newkey); 625 | e_keyexists(ctx, keytok.pos); 626 | return 0; 627 | } 628 | 629 | int n = tbl->nkval; 630 | toml_keyval_t** base; 631 | if ((base = (toml_keyval_t**)expand_ptrarr((void**)tbl->kval, n)) == 0) { 632 | xfree(newkey); 633 | e_outofmemory(ctx, FLINE); 634 | return 0; 635 | } 636 | tbl->kval = base; 637 | 638 | if ((base[n] = (toml_keyval_t*)CALLOC(1, sizeof(*base[n]))) == 0) { 639 | xfree(newkey); 640 | e_outofmemory(ctx, FLINE); 641 | return 0; 642 | } 643 | 644 | dest = tbl->kval[tbl->nkval++]; 645 | dest->key = newkey; 646 | dest->keylen = keylen; 647 | return dest; 648 | } 649 | 650 | // Create a table in the table. 651 | static toml_table_t* create_keytable_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) { 652 | int keylen; 653 | char* newkey = normalize_key(ctx, keytok, &keylen); 654 | if (!newkey) 655 | return 0; 656 | 657 | toml_table_t* dest = 0; 658 | // TODO: need to check all parts for: 659 | // 660 | // [a] 661 | // [a.c] # checks of "a.c" is defined, which is false. 662 | if (check_key(tbl, newkey, 0, 0, &dest)) { 663 | xfree(newkey); 664 | 665 | /// Special case: make explicit if table exists and was created 666 | /// implicitly. 667 | if (dest && dest->implicit) { 668 | dest->implicit = false; 669 | return dest; 670 | } 671 | e_keyexists(ctx, keytok.pos); 672 | return 0; 673 | } 674 | 675 | int n = tbl->ntbl; 676 | toml_table_t** base; 677 | if ((base = (toml_table_t**)expand_ptrarr((void**)tbl->tbl, n)) == 0) { 678 | xfree(newkey); 679 | e_outofmemory(ctx, FLINE); 680 | return 0; 681 | } 682 | tbl->tbl = base; 683 | 684 | if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) { 685 | xfree(newkey); 686 | e_outofmemory(ctx, FLINE); 687 | return 0; 688 | } 689 | 690 | dest = tbl->tbl[tbl->ntbl++]; 691 | dest->key = newkey; 692 | dest->keylen = keylen; 693 | return dest; 694 | } 695 | 696 | // Create an array in the table. 697 | static toml_array_t* create_keyarray_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok, char kind) { 698 | int keylen; 699 | char* newkey = normalize_key(ctx, keytok, &keylen); 700 | if (!newkey) 701 | return 0; 702 | 703 | if (key_kind(tbl, newkey)) { 704 | xfree(newkey); 705 | e_keyexists(ctx, keytok.pos); 706 | return 0; 707 | } 708 | 709 | int n = tbl->narr; 710 | toml_array_t** base; 711 | if ((base = (toml_array_t**)expand_ptrarr((void**)tbl->arr, n)) == 0) { 712 | xfree(newkey); 713 | e_outofmemory(ctx, FLINE); 714 | return 0; 715 | } 716 | tbl->arr = base; 717 | 718 | if ((base[n] = (toml_array_t*)CALLOC(1, sizeof(*base[n]))) == 0) { 719 | xfree(newkey); 720 | e_outofmemory(ctx, FLINE); 721 | return 0; 722 | } 723 | toml_array_t* dest = tbl->arr[tbl->narr++]; 724 | 725 | dest->keylen = keylen; 726 | dest->key = newkey; 727 | dest->kind = kind; 728 | return dest; 729 | } 730 | 731 | static toml_arritem_t* create_value_in_array(context_t* ctx, toml_array_t* parent) { 732 | const int n = parent->nitem; 733 | toml_arritem_t* base = expand_arritem(parent->item, n); 734 | if (!base) { 735 | e_outofmemory(ctx, FLINE); 736 | return 0; 737 | } 738 | parent->item = base; 739 | parent->nitem++; 740 | return &parent->item[n]; 741 | } 742 | 743 | // Create an array in an array. 744 | static toml_array_t* create_array_in_array(context_t* ctx, toml_array_t* parent) { 745 | const int n = parent->nitem; 746 | toml_arritem_t* base = expand_arritem(parent->item, n); 747 | if (!base) { 748 | e_outofmemory(ctx, FLINE); 749 | return 0; 750 | } 751 | toml_array_t* ret = (toml_array_t*)CALLOC(1, sizeof(toml_array_t)); 752 | if (!ret) { 753 | e_outofmemory(ctx, FLINE); 754 | return 0; 755 | } 756 | base[n].arr = ret; 757 | parent->item = base; 758 | parent->nitem++; 759 | return ret; 760 | } 761 | 762 | // Create a table in an array 763 | static toml_table_t* create_table_in_array(context_t* ctx, toml_array_t* parent) { 764 | int n = parent->nitem; 765 | toml_arritem_t* base = expand_arritem(parent->item, n); 766 | if (!base) { 767 | e_outofmemory(ctx, FLINE); 768 | return 0; 769 | } 770 | toml_table_t* ret = (toml_table_t*)CALLOC(1, sizeof(toml_table_t)); 771 | if (!ret) { 772 | e_outofmemory(ctx, FLINE); 773 | return 0; 774 | } 775 | base[n].tbl = ret; 776 | parent->item = base; 777 | parent->nitem++; 778 | return ret; 779 | } 780 | 781 | static bool skip_newlines(context_t* ctx, bool isdotspecial) { 782 | while (ctx->tok.tok == NEWLINE) { 783 | if (next_token(ctx, isdotspecial)) 784 | return false; 785 | if (ctx->tok.eof) 786 | break; 787 | } 788 | return true; 789 | } 790 | 791 | static int parse_keyval(context_t* ctx, toml_table_t* tbl); 792 | 793 | static inline int eat_token(context_t* ctx, tokentype_t typ, bool isdotspecial, const char* fline) { 794 | if (ctx->tok.tok != typ) 795 | return e_internal(ctx, fline); 796 | if (next_token(ctx, isdotspecial)) 797 | return -1; 798 | return 0; 799 | } 800 | 801 | // We are at '{ ... }'; parse the table. 802 | static int parse_inline_table(context_t* ctx, toml_table_t* tbl) { 803 | if (eat_token(ctx, LBRACE, 1, FLINE)) 804 | return -1; 805 | 806 | for (;;) { 807 | if (ctx->tok.tok == RBRACE) // until closing brace 808 | break; 809 | if (ctx->tok.eof) 810 | return e_syntax(ctx, ctx->tok.pos, "no closing '}'"); 811 | 812 | if (ctx->tok.tok == NEWLINE) { 813 | if (eat_token(ctx, NEWLINE, 1, FLINE)) 814 | return -1; 815 | continue; 816 | } 817 | 818 | if (ctx->tok.tok != STRING) 819 | return e_syntax(ctx, ctx->tok.pos, "expected a string"); 820 | 821 | if (parse_keyval(ctx, tbl)) 822 | return -1; 823 | 824 | // On comma, continue to scan for next keyval. 825 | if (ctx->tok.tok == COMMA) { 826 | if (eat_token(ctx, COMMA, 1, FLINE)) 827 | return -1; 828 | continue; 829 | } 830 | break; 831 | } 832 | 833 | for (;;) { 834 | if (ctx->tok.tok != NEWLINE || ctx->tok.eof) 835 | break; 836 | if (eat_token(ctx, NEWLINE, 1, FLINE)) 837 | return -1; 838 | } 839 | 840 | if (eat_token(ctx, RBRACE, 1, FLINE)) 841 | return -1; 842 | 843 | tbl->readonly = 1; 844 | return 0; 845 | } 846 | 847 | static int valtype(const char* val) { 848 | toml_timestamp_t ts; 849 | if (*val == '\'' || *val == '"') 850 | return 's'; 851 | if (toml_value_bool(val, false) == 0) 852 | return 'b'; 853 | if (toml_value_int(val, 0) == 0) 854 | return 'i'; 855 | if (toml_value_double(val, 0) == 0) 856 | return 'd'; 857 | if (toml_value_timestamp(val, &ts) == 0) { 858 | if (ts.year && ts.hour) 859 | return 'T'; /// timestamp 860 | if (ts.year) // TODO: never reached? 861 | return 'D'; /// date 862 | return 't'; /// time 863 | } 864 | return 'u'; /// unknown 865 | } 866 | 867 | // We are at '[...]' 868 | static int parse_array(context_t* ctx, toml_array_t* arr) { 869 | if (eat_token(ctx, LBRACKET, 0, FLINE)) 870 | return -1; 871 | 872 | for (;;) { 873 | if (!skip_newlines(ctx, 0)) 874 | return -1; 875 | 876 | if (ctx->tok.tok == RBRACKET) /// until ] 877 | break; 878 | 879 | switch (ctx->tok.tok) { 880 | case MSTRING: 881 | case STRING: { 882 | /// set array kind if this will be the first entry 883 | if (arr->kind == 0) 884 | arr->kind = 'v'; 885 | else if (arr->kind != 'v') 886 | arr->kind = 'm'; 887 | 888 | char* val = ctx->tok.ptr; 889 | int vlen = ctx->tok.len; 890 | 891 | /// make a new value in array 892 | toml_arritem_t* newval = create_value_in_array(ctx, arr); 893 | if (!newval) 894 | return e_outofmemory(ctx, FLINE); 895 | 896 | if (!(newval->val = STRNDUP(val, vlen))) 897 | return e_outofmemory(ctx, FLINE); 898 | 899 | newval->valtype = valtype(newval->val); 900 | 901 | /// set array type if this is the first entry 902 | if (arr->nitem == 1) 903 | arr->type = newval->valtype; 904 | else if (arr->type != newval->valtype) 905 | arr->type = 'm'; /// mixed 906 | 907 | if (eat_token(ctx, ctx->tok.tok, 0, FLINE)) 908 | return -1; 909 | break; 910 | } 911 | case LBRACKET: { // [ [array], [array] ... ] 912 | // set the array kind if this will be the first entry. 913 | if (arr->kind == 0) 914 | arr->kind = 'a'; 915 | else if (arr->kind != 'a') 916 | arr->kind = 'm'; 917 | 918 | toml_array_t* subarr = create_array_in_array(ctx, arr); 919 | if (!subarr) 920 | return -1; 921 | if (parse_array(ctx, subarr)) 922 | return -1; 923 | break; 924 | } 925 | case LBRACE: { // [ {table}, {table} ... ] 926 | // set the array kind if this will be the first entry. 927 | if (arr->kind == 0) 928 | arr->kind = 't'; 929 | else if (arr->kind != 't') 930 | arr->kind = 'm'; 931 | 932 | toml_table_t* subtbl = create_table_in_array(ctx, arr); 933 | if (!subtbl) 934 | return -1; 935 | if (parse_inline_table(ctx, subtbl)) 936 | return -1; 937 | break; 938 | } 939 | default: return e_syntax(ctx, ctx->tok.pos, "syntax error"); 940 | } 941 | 942 | if (!skip_newlines(ctx, 0)) 943 | return -1; 944 | 945 | // on comma, continue to scan for next element 946 | if (ctx->tok.tok == COMMA) { 947 | if (eat_token(ctx, COMMA, 0, FLINE)) 948 | return -1; 949 | continue; 950 | } 951 | break; 952 | } 953 | 954 | if (eat_token(ctx, RBRACKET, 1, FLINE)) 955 | return -1; 956 | return 0; 957 | } 958 | 959 | // Handle lines like: 960 | // key = "value" 961 | // key = [ array ] 962 | // key = { table } 963 | static int parse_keyval(context_t* ctx, toml_table_t* tbl) { 964 | if (tbl->readonly) 965 | return e_keyexists(ctx, ctx->tok.pos); 966 | 967 | token_t key = ctx->tok; 968 | if (eat_token(ctx, STRING, 1, FLINE)) 969 | return -1; 970 | 971 | if (ctx->tok.tok == DOT) { 972 | // Handle inline dotted key: 973 | // physical.color = "orange" 974 | // physical.shape = "round" 975 | toml_table_t* subtbl = 0; 976 | { 977 | int keylen; 978 | char* subtblstr = normalize_key(ctx, key, &keylen); 979 | if (!subtblstr) 980 | return -1; 981 | 982 | subtbl = toml_table_table(tbl, subtblstr); 983 | if (subtbl) 984 | subtbl->keylen = keylen; 985 | xfree(subtblstr); 986 | } 987 | if (!subtbl) { 988 | subtbl = create_keytable_in_table(ctx, tbl, key); 989 | if (!subtbl) 990 | return -1; 991 | } 992 | if (next_token(ctx, true)) 993 | return -1; 994 | if (parse_keyval(ctx, subtbl)) 995 | return -1; 996 | return 0; 997 | } 998 | 999 | if (ctx->tok.tok != EQUAL) 1000 | return e_syntax(ctx, ctx->tok.pos, "missing '='"); 1001 | 1002 | if (next_token(ctx, false)) 1003 | return -1; 1004 | 1005 | switch (ctx->tok.tok) { 1006 | case MSTRING: 1007 | case STRING: { // key = "value" 1008 | toml_keyval_t* keyval = create_keyval_in_table(ctx, tbl, key); 1009 | if (!keyval) 1010 | return -1; 1011 | token_t val = ctx->tok; 1012 | 1013 | assert(keyval->val == 0); 1014 | if (!(keyval->val = STRNDUP(val.ptr, val.len))) 1015 | return e_outofmemory(ctx, FLINE); 1016 | 1017 | if (next_token(ctx, true)) 1018 | return -1; 1019 | 1020 | return 0; 1021 | } 1022 | case LBRACKET: { // key = [ array ] 1023 | toml_array_t* arr = create_keyarray_in_table(ctx, tbl, key, 0); 1024 | if (!arr) 1025 | return -1; 1026 | if (parse_array(ctx, arr)) 1027 | return -1; 1028 | return 0; 1029 | } 1030 | case LBRACE: { // key = { table } 1031 | toml_table_t* nexttbl = create_keytable_in_table(ctx, tbl, key); 1032 | if (!nexttbl) 1033 | return -1; 1034 | if (parse_inline_table(ctx, nexttbl)) 1035 | return -1; 1036 | return 0; 1037 | } 1038 | default: return e_syntax(ctx, ctx->tok.pos, "syntax error"); 1039 | } 1040 | return 0; 1041 | } 1042 | 1043 | typedef struct tabpath_t tabpath_t; 1044 | struct tabpath_t { 1045 | int cnt; 1046 | token_t key[10]; 1047 | }; 1048 | 1049 | // At [x.y.z] or [[x.y.z]] 1050 | // Scan forward and fill tblpath until it enters ] or ]] 1051 | // There will be at least one entry on return. 1052 | static int fill_tblpath(context_t* ctx) { 1053 | // clear tpath 1054 | for (int i = 0; i < ctx->tpath.top; i++) { 1055 | char** p = &ctx->tpath.key[i]; 1056 | xfree(*p); 1057 | *p = 0; 1058 | } 1059 | ctx->tpath.top = 0; 1060 | 1061 | for (;;) { 1062 | if (ctx->tpath.top >= 10) 1063 | return e_syntax(ctx, ctx->tok.pos, "table path is too deep; max allowed is 10."); 1064 | if (ctx->tok.tok != STRING) 1065 | return e_syntax(ctx, ctx->tok.pos, "invalid or missing key"); 1066 | 1067 | int keylen; 1068 | char* key = normalize_key(ctx, ctx->tok, &keylen); 1069 | if (!key) 1070 | return -1; 1071 | ctx->tpath.tok[ctx->tpath.top] = ctx->tok; 1072 | ctx->tpath.key[ctx->tpath.top] = key; 1073 | ctx->tpath.keylen[ctx->tpath.top] = keylen; 1074 | ctx->tpath.top++; 1075 | 1076 | if (next_token(ctx, true)) 1077 | return -1; 1078 | 1079 | if (ctx->tok.tok == RBRACKET) 1080 | break; 1081 | if (ctx->tok.tok != DOT) 1082 | return e_syntax(ctx, ctx->tok.pos, "invalid key"); 1083 | if (next_token(ctx, true)) 1084 | return -1; 1085 | } 1086 | 1087 | if (ctx->tpath.top <= 0) // TODO: never reached? 1088 | return e_syntax(ctx, ctx->tok.pos, "empty table selector"); 1089 | return 0; 1090 | } 1091 | 1092 | // Walk tblpath from the root, and create new tables on the way. Sets 1093 | // ctx->curtbl to the final table. 1094 | static int walk_tabpath(context_t* ctx) { 1095 | toml_table_t* curtbl = ctx->root; /// start from root 1096 | 1097 | for (int i = 0; i < ctx->tpath.top; i++) { 1098 | const char* key = ctx->tpath.key[i]; 1099 | int keylen = ctx->tpath.keylen[i]; 1100 | 1101 | toml_keyval_t* nextval = 0; 1102 | toml_array_t* nextarr = 0; 1103 | toml_table_t* nexttbl = 0; 1104 | switch (check_key(curtbl, key, &nextval, &nextarr, &nexttbl)) { 1105 | case 't': /// found a table. nexttbl is where we will go next. 1106 | break; 1107 | case 'a': /// found an array. nexttbl is the last table in the array. 1108 | if (nextarr->kind != 't') 1109 | return e_internal(ctx, FLINE); 1110 | 1111 | if (nextarr->nitem == 0) 1112 | return e_internal(ctx, FLINE); 1113 | 1114 | nexttbl = nextarr->item[nextarr->nitem - 1].tbl; 1115 | break; 1116 | case 'v': return e_keyexists(ctx, ctx->tpath.tok[i].pos); 1117 | default: { /// Not found. Let's create an implicit table. 1118 | int n = curtbl->ntbl; 1119 | toml_table_t** base = (toml_table_t**)expand_ptrarr((void**)curtbl->tbl, n); 1120 | if (base == 0) 1121 | return e_outofmemory(ctx, FLINE); 1122 | 1123 | curtbl->tbl = base; 1124 | 1125 | if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) 1126 | return e_outofmemory(ctx, FLINE); 1127 | 1128 | if ((base[n]->key = STRDUP(key)) == 0) 1129 | return e_outofmemory(ctx, FLINE); 1130 | base[n]->keylen = keylen; 1131 | 1132 | nexttbl = curtbl->tbl[curtbl->ntbl++]; 1133 | 1134 | /// tabs created by walk_tabpath are considered implicit 1135 | nexttbl->implicit = true; 1136 | }; break; 1137 | } 1138 | curtbl = nexttbl; /// switch to next tbl 1139 | } 1140 | 1141 | ctx->curtbl = curtbl; /// save it 1142 | return 0; 1143 | } 1144 | 1145 | // handle lines like [x.y.z] or [[x.y.z]] 1146 | static int parse_select(context_t* ctx) { 1147 | assert(ctx->tok.tok == LBRACKET); 1148 | 1149 | // true if [[ 1150 | bool aot = (ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == '['); 1151 | 1152 | // Need to detect '[[' on our own because next_token() will skip whitespace, 1153 | // and '[ [' would be taken as '[[', which is wrong. 1154 | 1155 | // eat [ or [[ 1156 | if (eat_token(ctx, LBRACKET, 1, FLINE)) 1157 | return -1; 1158 | if (aot) { 1159 | assert(ctx->tok.tok == LBRACKET); 1160 | if (eat_token(ctx, LBRACKET, 1, FLINE)) 1161 | return -1; 1162 | } 1163 | 1164 | if (fill_tblpath(ctx)) 1165 | return -1; 1166 | 1167 | // For [x.y.z] or [[x.y.z]], remove z from tpath. 1168 | token_t z = ctx->tpath.tok[ctx->tpath.top - 1]; 1169 | xfree(ctx->tpath.key[ctx->tpath.top - 1]); 1170 | ctx->tpath.top--; 1171 | 1172 | // Set up ctx->curtbl. 1173 | if (walk_tabpath(ctx)) 1174 | return -1; 1175 | 1176 | if (!aot) { 1177 | // [x.y.z] -> create z = {} in x.y 1178 | toml_table_t* curtbl = create_keytable_in_table(ctx, ctx->curtbl, z); 1179 | if (!curtbl) 1180 | return -1; 1181 | ctx->curtbl = curtbl; 1182 | } else { 1183 | // [[x.y.z]] -> create z = [] in x.y 1184 | toml_array_t* arr = 0; 1185 | { 1186 | int keylen; 1187 | char* zstr = normalize_key(ctx, z, &keylen); 1188 | if (!zstr) 1189 | return -1; 1190 | arr = toml_table_array(ctx->curtbl, zstr); 1191 | if (arr) 1192 | arr->keylen = keylen; 1193 | xfree(zstr); 1194 | } 1195 | if (!arr) { 1196 | arr = create_keyarray_in_table(ctx, ctx->curtbl, z, 't'); 1197 | if (!arr) 1198 | return -1; 1199 | } 1200 | if (arr->kind != 't') 1201 | return e_syntax(ctx, z.pos, "array mismatch"); 1202 | 1203 | // add to z[] 1204 | toml_table_t* dest; 1205 | { 1206 | toml_table_t* t = create_table_in_array(ctx, arr); 1207 | if (!t) 1208 | return -1; 1209 | 1210 | if ((t->key = STRDUP("__anon__")) == 0) 1211 | return e_outofmemory(ctx, FLINE); 1212 | dest = t; 1213 | } 1214 | 1215 | ctx->curtbl = dest; 1216 | } 1217 | 1218 | if (ctx->tok.tok != RBRACKET) // TODO: never reached 1219 | return e_syntax(ctx, ctx->tok.pos, "expected ']'"); 1220 | if (aot) { 1221 | if (!(ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == ']')) 1222 | return e_syntax(ctx, ctx->tok.pos, "expected ']]'"); 1223 | if (eat_token(ctx, RBRACKET, 1, FLINE)) 1224 | return -1; 1225 | } 1226 | 1227 | if (eat_token(ctx, RBRACKET, 1, FLINE)) 1228 | return -1; 1229 | if (ctx->tok.tok != NEWLINE) 1230 | return e_syntax(ctx, ctx->tok.pos, "extra chars after ] or ]]"); 1231 | return 0; 1232 | } 1233 | 1234 | toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz) { 1235 | context_t ctx; 1236 | 1237 | /// clear errbuf 1238 | if (errbufsz <= 0) 1239 | errbufsz = 0; 1240 | if (errbufsz > 0) 1241 | errbuf[0] = 0; 1242 | 1243 | // init context 1244 | memset(&ctx, 0, sizeof(ctx)); 1245 | ctx.start = toml; 1246 | ctx.stop = ctx.start + strlen(toml); 1247 | ctx.errbuf = errbuf; 1248 | ctx.errbufsz = errbufsz; 1249 | 1250 | // start with an artificial newline of length 0 1251 | ctx.tok.tok = NEWLINE; 1252 | ctx.tok.pos.line = 1; 1253 | ctx.tok.pos.col = 1; 1254 | ctx.tok.ptr = toml; 1255 | ctx.tok.len = 0; 1256 | 1257 | // make a root table 1258 | if ((ctx.root = CALLOC(1, sizeof(*ctx.root))) == 0) { 1259 | e_outofmemory(&ctx, FLINE); 1260 | return 0; // Do not goto fail, root table not set up yet 1261 | } 1262 | 1263 | // set root as default table 1264 | ctx.curtbl = ctx.root; 1265 | 1266 | // Scan forward until EOF 1267 | for (token_t tok = ctx.tok; !tok.eof; tok = ctx.tok) { 1268 | switch (tok.tok) { 1269 | case NEWLINE: 1270 | if (next_token(&ctx, true)) 1271 | goto fail; 1272 | break; 1273 | 1274 | case STRING: 1275 | if (parse_keyval(&ctx, ctx.curtbl)) 1276 | goto fail; 1277 | 1278 | if (ctx.tok.tok != NEWLINE) { 1279 | e_syntax(&ctx, ctx.tok.pos, "extra chars after value"); 1280 | goto fail; 1281 | } 1282 | 1283 | if (eat_token(&ctx, NEWLINE, 1, FLINE)) 1284 | goto fail; 1285 | break; 1286 | 1287 | case LBRACKET: // [ x.y.z ] or [[ x.y.z ]] 1288 | if (parse_select(&ctx)) 1289 | goto fail; 1290 | break; 1291 | 1292 | default: e_syntax(&ctx, tok.pos, "syntax error"); goto fail; 1293 | } 1294 | } 1295 | 1296 | /// success 1297 | for (int i = 0; i < ctx.tpath.top; i++) 1298 | xfree(ctx.tpath.key[i]); 1299 | return ctx.root; 1300 | 1301 | fail: 1302 | // Something bad has happened. Free resources and return error. 1303 | for (int i = 0; i < ctx.tpath.top; i++) 1304 | xfree(ctx.tpath.key[i]); 1305 | toml_free(ctx.root); 1306 | return 0; 1307 | } 1308 | 1309 | toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz) { 1310 | int bufsz = 0; 1311 | char* buf = 0; 1312 | int off = 0; 1313 | int inc = 1024; 1314 | 1315 | while (!feof(fp)) { 1316 | if (bufsz == 1024 * 20) /// Increment buffer by 20k after 20k. 1317 | inc = 1024 * 20; 1318 | if (off == bufsz) { 1319 | int xsz = bufsz + inc; 1320 | char* x = expand(buf, bufsz, xsz); 1321 | if (!x) { 1322 | snprintf(errbuf, errbufsz, "out of memory"); 1323 | xfree(buf); 1324 | return 0; 1325 | } 1326 | buf = x; 1327 | bufsz = xsz; 1328 | } 1329 | 1330 | errno = 0; 1331 | int n = fread(buf + off, 1, bufsz - off, fp); 1332 | if (ferror(fp)) { 1333 | snprintf(errbuf, errbufsz, "%s", (errno ? strerror(errno) : "Error reading file")); 1334 | xfree(buf); 1335 | return 0; 1336 | } 1337 | off += n; 1338 | } 1339 | 1340 | /// tag on a NUL to cap the string 1341 | if (off == bufsz) { 1342 | int xsz = bufsz + 1; 1343 | char* x = expand(buf, bufsz, xsz); 1344 | if (!x) { 1345 | snprintf(errbuf, errbufsz, "out of memory"); 1346 | xfree(buf); 1347 | return 0; 1348 | } 1349 | buf = x; 1350 | bufsz = xsz; 1351 | } 1352 | buf[off] = 0; 1353 | 1354 | /// parse it, cleanup and finish. 1355 | toml_table_t* ret = toml_parse(buf, errbuf, errbufsz); 1356 | xfree(buf); 1357 | return ret; 1358 | } 1359 | 1360 | static void xfree_kval(toml_keyval_t* p) { 1361 | if (!p) 1362 | return; 1363 | xfree(p->key); 1364 | xfree(p->val); 1365 | xfree(p); 1366 | } 1367 | 1368 | static void xfree_tbl(toml_table_t* p); 1369 | 1370 | static void xfree_arr(toml_array_t* p) { 1371 | if (!p) 1372 | return; 1373 | 1374 | xfree(p->key); 1375 | const int n = p->nitem; 1376 | for (int i = 0; i < n; i++) { 1377 | toml_arritem_t* a = &p->item[i]; 1378 | if (a->val) 1379 | xfree(a->val); 1380 | else if (a->arr) 1381 | xfree_arr(a->arr); 1382 | else if (a->tbl) 1383 | xfree_tbl(a->tbl); 1384 | } 1385 | xfree(p->item); 1386 | xfree(p); 1387 | } 1388 | 1389 | static void xfree_tbl(toml_table_t* p) { 1390 | if (!p) 1391 | return; 1392 | 1393 | xfree(p->key); 1394 | 1395 | for (int i = 0; i < p->nkval; i++) 1396 | xfree_kval(p->kval[i]); 1397 | xfree(p->kval); 1398 | 1399 | for (int i = 0; i < p->narr; i++) 1400 | xfree_arr(p->arr[i]); 1401 | xfree(p->arr); 1402 | 1403 | for (int i = 0; i < p->ntbl; i++) 1404 | xfree_tbl(p->tbl[i]); 1405 | xfree(p->tbl); 1406 | 1407 | xfree(p); 1408 | } 1409 | 1410 | void toml_free(toml_table_t* tbl) { 1411 | xfree_tbl(tbl); 1412 | } 1413 | 1414 | static void set_token(context_t* ctx, tokentype_t tok, toml_pos_t pos, char* ptr, int len) { 1415 | token_t t; 1416 | t.tok = tok; 1417 | t.pos = pos; 1418 | t.ptr = ptr; 1419 | t.len = len; 1420 | t.eof = 0; 1421 | ctx->tok = t; 1422 | } 1423 | 1424 | static void set_eof(context_t* ctx, toml_pos_t pos) { 1425 | set_token(ctx, NEWLINE, pos, ctx->stop, 0); 1426 | ctx->tok.eof = 1; 1427 | } 1428 | 1429 | // Scan p for n digits compositing entirely of [0-9] 1430 | static int scan_digits(const char* p, int n) { 1431 | int ret = 0; 1432 | for (; n > 0 && isdigit(*p); n--, p++) 1433 | ret = 10 * ret + (*p - '0'); 1434 | return n ? -1 : ret; 1435 | } 1436 | 1437 | static bool scan_date(const char* p, int* YY, int* MM, int* DD) { 1438 | int year = scan_digits(p, 4); 1439 | int month = (year >= 0 && p[4] == '-') ? scan_digits(p + 5, 2) : -1; 1440 | int day = (month >= 0 && p[7] == '-') ? scan_digits(p + 8, 2) : -1; 1441 | if (YY) 1442 | *YY = year; 1443 | if (MM) 1444 | *MM = month; 1445 | if (DD) 1446 | *DD = day; 1447 | return (year >= 0 && month >= 0 && day >= 0); 1448 | } 1449 | 1450 | static bool scan_time(const char* p, int* hh, int* mm, int* ss) { 1451 | int hour = scan_digits(p, 2); 1452 | int minute = (hour >= 0 && p[2] == ':') ? scan_digits(p + 3, 2) : -1; 1453 | int second = (minute >= 0 && p[5] == ':') ? scan_digits(p + 6, 2) : -1; 1454 | if (hh) 1455 | *hh = hour; 1456 | if (mm) 1457 | *mm = minute; 1458 | if (ss) 1459 | *ss = second; 1460 | return (hour >= 0 && minute >= 0); 1461 | } 1462 | 1463 | static int parse_millisec(const char* p, const char** endp) { 1464 | int ret = 0; 1465 | int unit = 100; /// unit in millisec 1466 | for (; '0' <= *p && *p <= '9'; p++, unit /= 10) 1467 | ret += (*p - '0') * unit; 1468 | *endp = p; 1469 | return ret; 1470 | } 1471 | 1472 | static bool scan_offset(const char* p, int* tz) { 1473 | int sign = p[0]; 1474 | int hour = scan_digits(p + 1, 2); 1475 | int minute = (hour >= 0 && p[3] == ':') ? scan_digits(p + 4, 2) : -1; 1476 | if (hour < -12 || hour > 14 || minute < 0 || minute > 59) 1477 | return false; 1478 | if (tz) { 1479 | *tz = hour * 60 + minute; 1480 | if (sign == '-') 1481 | *tz = -(*tz); 1482 | } 1483 | return true; 1484 | } 1485 | 1486 | static int scan_string(context_t* ctx, char* p, toml_pos_t* pos, bool dotisspecial) { 1487 | char* orig = p; 1488 | 1489 | // Literal multiline. 1490 | if (strncmp(p, "'''", 3) == 0) { 1491 | char* q = p + 3; 1492 | pos->col += 3; 1493 | while (true) { 1494 | q = strstr(q, "'''"); 1495 | if (q == 0) 1496 | return e_syntax(ctx, *pos, "unterminated triple quote (''')"); 1497 | int i = 0; 1498 | while (q[3] == '\'') { 1499 | i++; 1500 | if (i >= 3) 1501 | return e_syntax(ctx, *pos, "too many ''' in triple-s-quote"); 1502 | q++; 1503 | } 1504 | break; 1505 | } 1506 | set_token(ctx, MSTRING, *pos, orig, q + 3 - orig); 1507 | return 0; 1508 | } 1509 | 1510 | // Multiline. 1511 | if (strncmp(p, "\"\"\"", 3) == 0) { 1512 | char* q = p + 3; 1513 | pos->col += 3; 1514 | while (true) { 1515 | q = strstr(q, "\"\"\""); 1516 | if (q == 0) 1517 | return e_syntax(ctx, *pos, "unterminated triple quote (\"\"\")"); 1518 | if (q[-1] == '\\') { 1519 | q++; 1520 | continue; 1521 | } 1522 | int i = 0; 1523 | while (q[3] == '\"') { 1524 | i++; 1525 | if (i >= 3) 1526 | return e_syntax(ctx, *pos, "too many \"\"\" in triple-d-quote"); 1527 | q++; 1528 | } 1529 | break; 1530 | } 1531 | 1532 | /// the string is [p+3, q-1] 1533 | int hexreq = 0; /// #hex required 1534 | bool escape = false; 1535 | for (p += 3; p < q; p++) { 1536 | if (escape) { 1537 | escape = false; 1538 | if (strchr("btnfre\"\\", *p)) 1539 | continue; 1540 | if (*p == 'x') { 1541 | hexreq = 2; 1542 | continue; 1543 | } 1544 | if (*p == 'u') { 1545 | hexreq = 4; 1546 | continue; 1547 | } 1548 | if (*p == 'U') { 1549 | hexreq = 8; 1550 | continue; 1551 | } 1552 | if (p[strspn(p, " \t\r")] == '\n') 1553 | continue; // allow for line ending backslash 1554 | return e_syntax(ctx, *pos, "bad escape char"); 1555 | } 1556 | if (hexreq) { 1557 | hexreq--; 1558 | if (strchr("0123456789ABCDEFabcdef", *p)) 1559 | continue; 1560 | return e_syntax(ctx, *pos, "expected hex char"); 1561 | } 1562 | if (*p == '\\') { 1563 | escape = true; 1564 | continue; 1565 | } 1566 | } 1567 | if (escape) // TODO: unreachable, I think? 1568 | return e_syntax(ctx, *pos, "expected an escape char"); 1569 | if (hexreq) 1570 | return e_syntax(ctx, *pos, "expected more hex char"); 1571 | 1572 | set_token(ctx, MSTRING, *pos, orig, q + 3 - orig); 1573 | return 0; 1574 | } 1575 | 1576 | // Literal string. 1577 | if (*p == '\'') { 1578 | for (p++; *p && *p != '\n' && *p != '\''; p++) 1579 | pos->col++; 1580 | if (*p != '\'') 1581 | return e_syntax(ctx, *pos, "unterminated quote (')"); 1582 | set_token(ctx, STRING, *pos, orig, p + 1 - orig); 1583 | return 0; 1584 | } 1585 | 1586 | // Basic String. 1587 | if (*p == '\"') { 1588 | int hexreq = 0; /// #hex required 1589 | bool escape = false; 1590 | for (p++; *p; p++) { 1591 | pos->col++; 1592 | if (escape) { 1593 | escape = false; 1594 | if (strchr("btnfre\"\\", *p)) 1595 | continue; 1596 | if (*p == 'x') { 1597 | hexreq = 2; 1598 | continue; 1599 | } 1600 | if (*p == 'u') { 1601 | hexreq = 4; 1602 | continue; 1603 | } 1604 | if (*p == 'U') { 1605 | hexreq = 8; 1606 | continue; 1607 | } 1608 | return e_syntax(ctx, *pos, "bad escape char"); 1609 | } 1610 | if (hexreq) { 1611 | hexreq--; 1612 | if (strchr("0123456789ABCDEFabcdef", *p)) 1613 | continue; 1614 | return e_syntax(ctx, *pos, "expected hex char"); 1615 | } 1616 | if (*p == '\\') { 1617 | escape = true; 1618 | continue; 1619 | } 1620 | if (*p == '\n') 1621 | break; 1622 | if (*p == '"') 1623 | break; 1624 | } 1625 | if (*p != '"') 1626 | return e_syntax(ctx, *pos, "unterminated quote (\")"); 1627 | 1628 | set_token(ctx, STRING, *pos, orig, p + 1 - orig); 1629 | return 0; 1630 | } 1631 | 1632 | // Time 1633 | if (!dotisspecial && scan_time(p, 0, 0, 0)) { 1634 | p += strspn(p, "0123456789:"); /// forward thru the time. 1635 | if (p[0] == '.') { /// Subseconds 1636 | int n = strspn(++p, "0123456789"); 1637 | if (n == 0) 1638 | return e_syntax(ctx, *pos, "extra chars after '.'"); 1639 | p += n; 1640 | } 1641 | for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string 1642 | ; 1643 | set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize 1644 | return 0; 1645 | } 1646 | 1647 | // Datetime 1648 | if (!dotisspecial && scan_date(p, 0, 0, 0)) { 1649 | p += strspn(p, "0123456789-"); /// forward thru the date 1650 | if (p[0] == ' ' || p[0] == 't' || p[0] == 'T') { /// forward thru the time 1651 | p++; 1652 | p += strspn(p, "0123456789:"); 1653 | if (p[0] == '.') { /// Subseconds 1654 | int n = strspn(++p, "0123456789"); 1655 | if (n == 0) 1656 | return e_syntax(ctx, *pos, "extra chars after '.'"); 1657 | p += n; 1658 | } 1659 | } 1660 | 1661 | // Offset 1662 | if (p[0] == 'Z' || p[0] == 'z') { 1663 | p++; 1664 | } else if (p[0] == '+' || p[0] == '-') { 1665 | if (!scan_offset(p, 0)) 1666 | return e_syntax(ctx, *pos, "invalid offset"); 1667 | p += 6; 1668 | } 1669 | 1670 | for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string 1671 | ; 1672 | set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize 1673 | return 0; 1674 | } 1675 | 1676 | // Literals 1677 | for (; *p && *p != '\n'; p++) { 1678 | int ch = *p; 1679 | if (ch == '.' && dotisspecial) 1680 | break; 1681 | if ('A' <= ch && ch <= 'Z') 1682 | continue; 1683 | if ('a' <= ch && ch <= 'z') 1684 | continue; 1685 | if (strchr("0123456789+-_.", ch)) 1686 | continue; 1687 | break; 1688 | } 1689 | 1690 | set_token(ctx, STRING, *pos, orig, p - orig); 1691 | return 0; 1692 | } 1693 | 1694 | static int next_token(context_t* ctx, bool dotisspecial) { 1695 | // Eat this tok. 1696 | char* p = ctx->tok.ptr; 1697 | toml_pos_t pos = ctx->tok.pos; 1698 | for (int i = 0; i < ctx->tok.len; i++) { 1699 | pos.col++; 1700 | if (*p++ == '\n') { 1701 | pos.line++; 1702 | pos.col = 1; 1703 | } 1704 | } 1705 | 1706 | /// Make next tok 1707 | while (p < ctx->stop) { 1708 | if (*p == '#') { /// Skip comment. stop just before the \n. 1709 | for (p++; p < ctx->stop && *p != '\n'; p++) { 1710 | pos.col++; 1711 | if ((*p != '\t' && *p != '\r' && *p != '\n') && ((*p >= 0x00 && *p <= 0x1f) || *p == 0x7f)) 1712 | return e_syntax(ctx, pos, "invalid control character"); 1713 | if (*p == '\r' && p < ctx->stop + 1 && *(p + 1) != '\n') 1714 | return e_syntax(ctx, pos, "invalid control character"); 1715 | } 1716 | continue; 1717 | } 1718 | 1719 | if (dotisspecial && *p == '.') { 1720 | set_token(ctx, DOT, pos, p, 1); 1721 | return 0; 1722 | } 1723 | 1724 | switch (*p) { 1725 | case ',': set_token(ctx, COMMA, pos, p, 1); return 0; 1726 | case '=': set_token(ctx, EQUAL, pos, p, 1); return 0; 1727 | case '{': set_token(ctx, LBRACE, pos, p, 1); return 0; 1728 | case '}': set_token(ctx, RBRACE, pos, p, 1); return 0; 1729 | case '[': set_token(ctx, LBRACKET, pos, p, 1); return 0; 1730 | case ']': set_token(ctx, RBRACKET, pos, p, 1); return 0; 1731 | case '\n': set_token(ctx, NEWLINE, pos, p, 1); return 0; 1732 | case '\r': 1733 | case ' ': 1734 | case '\t': /// ignore white spaces 1735 | p++; 1736 | pos.col++; 1737 | continue; 1738 | } 1739 | 1740 | return scan_string(ctx, p, &pos, dotisspecial); 1741 | } 1742 | 1743 | set_eof(ctx, pos); 1744 | return 0; 1745 | } 1746 | 1747 | const char* toml_table_key(const toml_table_t* tbl, int keyidx, int* keylen) { 1748 | if (keyidx < tbl->nkval) { 1749 | *keylen = tbl->kval[keyidx]->keylen; 1750 | return tbl->kval[keyidx]->key; 1751 | } 1752 | if ((keyidx -= tbl->nkval) < tbl->narr) { 1753 | *keylen = tbl->arr[keyidx]->keylen; 1754 | return tbl->arr[keyidx]->key; 1755 | } 1756 | if ((keyidx -= tbl->narr) < tbl->ntbl) { 1757 | *keylen = tbl->tbl[keyidx]->keylen; 1758 | return tbl->tbl[keyidx]->key; 1759 | } 1760 | *keylen = 0; 1761 | return 0; 1762 | } 1763 | 1764 | toml_unparsed_t toml_table_unparsed(const toml_table_t* tbl, const char* key) { 1765 | for (int i = 0; i < tbl->nkval; i++) 1766 | if (strcmp(key, tbl->kval[i]->key) == 0) 1767 | return tbl->kval[i]->val; 1768 | return 0; 1769 | } 1770 | 1771 | toml_array_t* toml_table_array(const toml_table_t* tbl, const char* key) { 1772 | for (int i = 0; i < tbl->narr; i++) 1773 | if (strcmp(key, tbl->arr[i]->key) == 0) 1774 | return tbl->arr[i]; 1775 | return 0; 1776 | } 1777 | 1778 | toml_table_t* toml_table_table(const toml_table_t* tbl, const char* key) { 1779 | for (int i = 0; i < tbl->ntbl; i++) 1780 | if (strcmp(key, tbl->tbl[i]->key) == 0) 1781 | return tbl->tbl[i]; 1782 | return 0; 1783 | } 1784 | 1785 | toml_unparsed_t toml_array_unparsed(const toml_array_t* arr, int idx) { 1786 | return (0 <= idx && idx < arr->nitem) ? arr->item[idx].val : 0; 1787 | } 1788 | 1789 | int toml_table_len(const toml_table_t* tbl) { 1790 | return tbl->nkval + tbl->narr + tbl->ntbl; 1791 | } 1792 | 1793 | int toml_array_len(const toml_array_t* arr) { 1794 | return arr->nitem; 1795 | } 1796 | 1797 | toml_array_t* toml_array_array(const toml_array_t* arr, int idx) { 1798 | return (0 <= idx && idx < arr->nitem) ? arr->item[idx].arr : 0; 1799 | } 1800 | 1801 | toml_table_t* toml_array_table(const toml_array_t* arr, int idx) { 1802 | return (0 <= idx && idx < arr->nitem) ? arr->item[idx].tbl : 0; 1803 | } 1804 | 1805 | bool is_leap(int y) { 1806 | return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); 1807 | } 1808 | 1809 | int toml_value_timestamp(toml_unparsed_t src_, toml_timestamp_t* ret) { 1810 | if (!src_) 1811 | return -1; 1812 | 1813 | const char* p = src_; 1814 | bool must_parse_time = false; 1815 | 1816 | memset(ret, 0, sizeof(*ret)); 1817 | 1818 | /// YYYY-MM-DD 1819 | if (scan_date(p, &ret->year, &ret->month, &ret->day)) { 1820 | if (ret->month < 1 || ret->day < 1 || ret->month > 12 || ret->day > 31) 1821 | return -1; 1822 | if (ret->month == 2 && ret->day > (is_leap(ret->year) ? 29 : 28)) 1823 | return -1; 1824 | ret->kind = 'D'; 1825 | 1826 | p += 10; 1827 | if (*p) { 1828 | if (*p != 'T' && *p != 't' && *p != ' ') /// T or space 1829 | return -1; 1830 | must_parse_time = true; 1831 | p++; 1832 | } 1833 | } 1834 | 1835 | /// HH:MM:SS 1836 | if (scan_time(p, &ret->hour, &ret->minute, &ret->second)) { 1837 | if (ret->minute < 0 || ret->hour < 0 || ret->hour > 23 || ret->minute > 59 || ret->second > 60) 1838 | return -1; 1839 | p += (ret->second == -1 ? 5 : 8); 1840 | ret->kind = (ret->kind == 'D' ? 'l' : 't'); 1841 | if (ret->second == -1) 1842 | ret->second = 0; 1843 | 1844 | if (*p == '.') { /// optionally, parse millisec 1845 | p++; /// skip '.' 1846 | const char* qq; 1847 | ret->millisec = parse_millisec(p, &qq); 1848 | p = qq; 1849 | } 1850 | 1851 | if (*p) { /// parse and copy Z 1852 | ret->kind = 'd'; 1853 | if (*p == 'Z' || *p == 'z') 1854 | p++; 1855 | else if (*p == '+' || *p == '-') { 1856 | if (!scan_offset(p, &ret->tz)) 1857 | return -1; 1858 | p += 6; 1859 | } 1860 | } 1861 | } 1862 | if (*p != 0) 1863 | return -1; 1864 | if (must_parse_time && ret->kind == 'D') 1865 | return -1; 1866 | return 0; 1867 | } 1868 | 1869 | // Raw to boolean 1870 | int toml_value_bool(toml_unparsed_t src, bool* ret_) { 1871 | if (!src) 1872 | return -1; 1873 | bool dummy = false; 1874 | bool* ret = ret_ ? ret_ : &dummy; 1875 | 1876 | if (strcmp(src, "true") == 0) { 1877 | *ret = true; 1878 | return 0; 1879 | } 1880 | if (strcmp(src, "false") == 0) { 1881 | *ret = false; 1882 | return 0; 1883 | } 1884 | return -1; 1885 | } 1886 | 1887 | // Raw to integer 1888 | int toml_value_int(toml_unparsed_t src, int64_t* ret_) { 1889 | if (!src) 1890 | return -1; 1891 | 1892 | char buf[100]; 1893 | char* p = buf; 1894 | char* q = p + sizeof(buf); 1895 | const char* s = src; 1896 | int64_t dummy = 0; 1897 | int64_t* ret = ret_ ? ret_ : &dummy; 1898 | bool have_sign = false; 1899 | 1900 | if (s[0] == '+' || s[0] == '-') { /// allow +/- 1901 | have_sign = true; 1902 | *p++ = *s++; 1903 | } 1904 | 1905 | if (s[0] == '_') /// disallow +_100 1906 | return -1; 1907 | 1908 | int base = 0; 1909 | if (s[0] == '0') { /// if 0* ... 1910 | switch (s[1]) { 1911 | case 'x': 1912 | base = 16; 1913 | s += 2; 1914 | break; 1915 | case 'o': 1916 | base = 8; 1917 | s += 2; 1918 | break; 1919 | case 'b': 1920 | base = 2; 1921 | s += 2; 1922 | break; 1923 | case '\0': return *ret = 0, 0; 1924 | default: 1925 | if (s[1]) /// ensure no other digits after it 1926 | return -1; 1927 | } 1928 | if (!*s) 1929 | return -1; 1930 | if (have_sign) /// disallow +0xff, -0xff 1931 | return -1; 1932 | if (s[0] == '_') /// disallow 0x_, 0o_, 0b_ 1933 | return -1; 1934 | if (s[0] == '+' || s[0] == '-') /// disallow 0x+10, 0x-10 1935 | return -1; 1936 | } 1937 | 1938 | while (*s && p < q) { /// just strip underscores and pass to strtoll 1939 | int ch = *s++; 1940 | if (ch == '_') { 1941 | if (s[0] == '_') /// disallow '__' 1942 | return -1; 1943 | if (s[0] == '\0') /// numbers cannot end with '_' 1944 | return -1; 1945 | continue; /// skip _ 1946 | } 1947 | *p++ = ch; 1948 | } 1949 | 1950 | if (*s || p == q) /// if not at end-of-string or we ran out of buffer ... 1951 | return -1; 1952 | 1953 | *p = 0; /// cap with NUL 1954 | 1955 | /// Run strtoll on buf to get the integer 1956 | char* endp; 1957 | errno = 0; 1958 | *ret = strtoll(buf, &endp, base); 1959 | return (errno || *endp) ? -1 : 0; 1960 | } 1961 | 1962 | int toml_value_double(toml_unparsed_t src, double* ret_) { 1963 | if (!src) 1964 | return -1; 1965 | 1966 | char buf[100]; 1967 | char* p = buf; 1968 | char* q = p + sizeof(buf); 1969 | const char* s = src; 1970 | double dummy = 0.0; 1971 | double* ret = ret_ ? ret_ : &dummy; 1972 | 1973 | if (s[0] == '+' || s[0] == '-') /// allow +/- 1974 | *p++ = *s++; 1975 | 1976 | if (s[0] == '_') /// disallow +_1.00 1977 | return -1; 1978 | 1979 | { /// decimal point, if used, must be surrounded by at least one digit on each side 1980 | char* dot = strchr(s, '.'); 1981 | if (dot) { 1982 | if (dot == s || !isdigit(dot[-1]) || !isdigit(dot[1])) 1983 | return -1; 1984 | } 1985 | } 1986 | 1987 | /// zero must be followed by . or 'e', or NUL 1988 | if (s[0] == '0' && s[1] && !strchr("eE.", s[1])) 1989 | return -1; 1990 | 1991 | /// Just strip underscores and pass to strtod 1992 | bool have_us = false; 1993 | while (*s && p < q) { 1994 | int ch = *s++; 1995 | if (ch == '_') { 1996 | have_us = true; 1997 | if (s[0] == '_') /// disallow '__' 1998 | return -1; 1999 | if (s[0] == 'e') /// disallow _e 2000 | return -1; 2001 | if (s[0] == 0) /// disallow last char '_' 2002 | return -1; 2003 | continue; /// skip _ 2004 | } 2005 | if (ch == 'I' || ch == 'N' || ch == 'F' || ch == 'A') /// inf and nan are case-sensitive. 2006 | return -1; 2007 | if (ch == 'e' && s[0] == '_') /// disallow e_ 2008 | return -1; 2009 | *p++ = ch; 2010 | } 2011 | if (*s || p == q) 2012 | return -1; /// reached end of string or buffer is full? 2013 | 2014 | *p = 0; /// cap with NUL 2015 | 2016 | /// Run strtod on buf to get the value 2017 | char* endp; 2018 | errno = 0; 2019 | *ret = strtod(buf, &endp); 2020 | if (errno || *endp) 2021 | return -1; 2022 | if (have_us && (isnan(*ret) || isinf(*ret))) 2023 | return -1; 2024 | return 0; 2025 | } 2026 | 2027 | int toml_value_string(toml_unparsed_t src, char** ret, int* len) { 2028 | bool multiline = false; 2029 | const char* sp; 2030 | const char* sq; 2031 | 2032 | *ret = 0; 2033 | if (!src) 2034 | return -1; 2035 | 2036 | /// First char must be a s-quote or d-quote 2037 | int qchar = src[0]; 2038 | int srclen = strlen(src); 2039 | if (!(qchar == '\'' || qchar == '"')) { 2040 | return -1; 2041 | } 2042 | 2043 | /// triple quotes? 2044 | if (qchar == src[1] && qchar == src[2]) { 2045 | multiline = true; /// triple-quote implies multiline 2046 | sp = src + 3; /// first char after quote 2047 | sq = src + srclen - 3; /// first char of ending quote 2048 | 2049 | if (!(sp <= sq && sq[0] == qchar && sq[1] == qchar && sq[2] == qchar)) 2050 | return -1; /// last 3 chars in src must be qchar 2051 | 2052 | if (sp[0] == '\n') /// skip new line immediate after qchar 2053 | sp++; 2054 | else if (sp[0] == '\r' && sp[1] == '\n') 2055 | sp += 2; 2056 | } else { 2057 | sp = src + 1; /// first char after quote 2058 | sq = src + srclen - 1; /// ending quote 2059 | if (!(sp <= sq && *sq == qchar)) /// last char in src must be qchar 2060 | return -1; 2061 | } 2062 | 2063 | /// at this point: 2064 | /// sp points to first valid char after quote. 2065 | /// sq points to one char beyond last valid char. 2066 | /// string len is (sq - sp). 2067 | if (qchar == '\'') 2068 | *ret = norm_lit_str(sp, sq - sp, len, multiline, 0, 0); 2069 | else 2070 | *ret = norm_basic_str(sp, sq - sp, len, multiline, 0, 0); 2071 | return *ret ? 0 : -1; 2072 | } 2073 | 2074 | toml_value_t toml_array_string(const toml_array_t* arr, int idx) { 2075 | toml_value_t ret; 2076 | memset(&ret, 0, sizeof(ret)); 2077 | ret.ok = (toml_value_string(toml_array_unparsed(arr, idx), &ret.u.s, &ret.u.sl) == 0); 2078 | return ret; 2079 | } 2080 | 2081 | toml_value_t toml_array_bool(const toml_array_t* arr, int idx) { 2082 | toml_value_t ret; 2083 | memset(&ret, 0, sizeof(ret)); 2084 | ret.ok = (toml_value_bool(toml_array_unparsed(arr, idx), &ret.u.b) == 0); 2085 | return ret; 2086 | } 2087 | 2088 | toml_value_t toml_array_int(const toml_array_t* arr, int idx) { 2089 | toml_value_t ret; 2090 | memset(&ret, 0, sizeof(ret)); 2091 | ret.ok = (toml_value_int(toml_array_unparsed(arr, idx), &ret.u.i) == 0); 2092 | return ret; 2093 | } 2094 | 2095 | toml_value_t toml_array_double(const toml_array_t* arr, int idx) { 2096 | toml_value_t ret; 2097 | memset(&ret, 0, sizeof(ret)); 2098 | ret.ok = (toml_value_double(toml_array_unparsed(arr, idx), &ret.u.d) == 0); 2099 | return ret; 2100 | } 2101 | 2102 | toml_value_t toml_array_timestamp(const toml_array_t* arr, int idx) { 2103 | toml_value_t ret; 2104 | memset(&ret, 0, sizeof(ret)); 2105 | ret.ok = (toml_value_timestamp(toml_array_unparsed(arr, idx), &ret.u.ts) == 0); 2106 | return ret; 2107 | } 2108 | 2109 | toml_value_t toml_table_string(const toml_table_t* tbl, const char* key) { 2110 | toml_value_t ret; 2111 | memset(&ret, 0, sizeof(ret)); 2112 | toml_unparsed_t raw = toml_table_unparsed(tbl, key); 2113 | if (raw) 2114 | ret.ok = (toml_value_string(raw, &ret.u.s, &ret.u.sl) == 0); 2115 | return ret; 2116 | } 2117 | 2118 | toml_value_t toml_table_bool(const toml_table_t* tbl, const char* key) { 2119 | toml_value_t ret; 2120 | memset(&ret, 0, sizeof(ret)); 2121 | ret.ok = (toml_value_bool(toml_table_unparsed(tbl, key), &ret.u.b) == 0); 2122 | return ret; 2123 | } 2124 | 2125 | toml_value_t toml_table_int(const toml_table_t* tbl, const char* key) { 2126 | toml_value_t ret; 2127 | memset(&ret, 0, sizeof(ret)); 2128 | ret.ok = (toml_value_int(toml_table_unparsed(tbl, key), &ret.u.i) == 0); 2129 | return ret; 2130 | } 2131 | 2132 | toml_value_t toml_table_double(const toml_table_t* tbl, const char* key) { 2133 | toml_value_t ret; 2134 | memset(&ret, 0, sizeof(ret)); 2135 | ret.ok = (toml_value_double(toml_table_unparsed(tbl, key), &ret.u.d) == 0); 2136 | return ret; 2137 | } 2138 | 2139 | toml_value_t toml_table_timestamp(const toml_table_t* tbl, const char* key) { 2140 | toml_value_t ret; 2141 | memset(&ret, 0, sizeof(ret)); 2142 | ret.ok = (toml_value_timestamp(toml_table_unparsed(tbl, key), &ret.u.ts) == 0); 2143 | return ret; 2144 | } 2145 | #endif // TOML_H 2146 | --------------------------------------------------------------------------------