├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── test.yml
├── .gitattributes
├── .gitignore
├── libtoml.pc
├── example
    ├── Makefile
    ├── table.c
    └── array.c
├── .editorconfig
├── .clang-format
├── README.md
├── LICENSE
├── Makefile
├── test.bash
├── toml.h
├── toml2json.c
├── toml-c-test.c
├── toml.c
└── header
    └── toml-c.h


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: arp242
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | header/toml-c.h linguist-generated=true
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.so
 3 | *.a
 4 | *.gch
 5 | *.exe
 6 | *.gcda
 7 | *.gcno
 8 | *.gcov
 9 | *.dSYM
10 | /toml2json
11 | /toml-c-test
12 | /libtoml.so.1.0
13 | /example/array
14 | /example/table
15 | 


--------------------------------------------------------------------------------
/libtoml.pc:
--------------------------------------------------------------------------------
 1 | prefix=%%PREFIX%%
 2 | exec_prefix=${prefix}
 3 | libdir=${prefix}/lib
 4 | includedir=${prefix}/include
 5 | 
 6 | Name: libtoml
 7 | URL: https://github.com/arp242/toml-c/
 8 | Description: TOML C library
 9 | Version: v1.0
10 | Libs: -L${libdir} -ltoml
11 | Cflags: -I${includedir}
12 | 


--------------------------------------------------------------------------------
/example/Makefile:
--------------------------------------------------------------------------------
 1 | CC     = cc
 2 | CFLAGS = -std=c99 -Wall -Wextra -Wimplicit-fallthrough -fPIC -O2 -g
 3 | 
 4 | .PHONY: all clean
 5 | 
 6 | all: array table
 7 | 
 8 | array: array.c
 9 | 	${CC} ${CFLAGS} -o $@ $@.c
10 | 
11 | table: table.c
12 | 	${CC} ${CFLAGS} -o $@ $@.c
13 | 
14 | clean:
15 | 	rm -f array table
16 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | insert_final_newline = true
 6 | trim_trailing_whitespace = true
 7 | indent_style = tab
 8 | indent_size = 4
 9 | 
10 | [Makefile]
11 | indent_size = 8
12 | 
13 | [*.md]
14 | indent_style = space
15 | 
16 | [*.{yml,yaml}]
17 | indent_style = space
18 | indent_size = 2
19 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: 'test'
 2 | on:   ['push', 'pull_request']
 3 | jobs:
 4 |   test:
 5 |     strategy:
 6 |       fail-fast: false
 7 |       matrix: {os: ['ubuntu-latest', 'macos-latest', 'windows-latest']}
 8 | 
 9 |     runs-on: ${{ matrix.os }}
10 |     steps:
11 |     - uses: 'actions/setup-go@v6'
12 |       with: {go-version: '1.25'}
13 |     - uses: 'actions/checkout@v6'
14 | 
15 |     - run: 'go install github.com/toml-lang/toml-test/v2/cmd/toml-test@v2.0.0'
16 | 
17 |     - if:  runner.os != 'Windows'
18 |       run: 'make check CC=clang'
19 |     - if:  runner.os == 'Windows'
20 |       run: 'make check CC=gcc SANITIZER='
21 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | # Real men use real tabs.
 2 | IndentWidth: 4
 3 | TabWidth: 4
 4 | UseTab: 'ForIndentation'
 5 | 
 6 | # Don't frob with line endings so much.
 7 | ColumnLimit: 999
 8 | #BinPackArguments: false
 9 | #BinPackParameters: false
10 | AllowShortFunctionsOnASingleLine: 'None'
11 | BreakStringLiterals: false
12 | ReflowComments: false
13 | AllowShortCaseLabelsOnASingleLine: true
14 | AlignConsecutiveShortCaseStatements: {Enabled: true}
15 | AlignEscapedNewlines: 'Left'
16 | 
17 | # Align asignments and declarations.
18 | AlignConsecutiveAssignments:  {Enabled: true, AlignCompound: true}
19 | AlignConsecutiveDeclarations: {Enabled: true, PadOperators: false}
20 | 
21 | # Star next to the type name, as it's really part of the type.
22 | PointerAlignment: 'Left'
23 | 
24 | # Indent nested pre-processer
25 | IndentPPDirectives: 'AfterHash'
26 | 
27 | # Group local ".." includes, and put them after system <..> includes.
28 | IncludeBlocks: 'Regroup'
29 | IncludeCategories: [
30 |   {Regex: '^"', Priority: 99, SortPriority: 99},
31 | ]
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | C library for parsing TOML 1.1. Passes all of [TOML test suite].
 2 | 
 3 | This is a fork of https://github.com/cktan/tomlc99, as that didn't seem hugely
 4 | maintained, and has a number of errors. This library isn't compatible.
 5 | 
 6 | [TOML test suite]: https://github.com/toml-lang/toml-test
 7 | 
 8 | Installation
 9 | ------------
10 | This can be used in two ways: as a library or in "header only mode":
11 | 
12 | - "Header only mode" is to make it a bit easier to include this in a project:
13 |   just copy `header/toml-c.h` to your project and `#include <toml-c.h>` and
14 |   you're done – nothing else needed.
15 | 
16 |   It's essentially just "cat toml.h toml.c > toml-c.h" with a bit of frobbing.
17 | 
18 | - For "library mode" build `libtoml.so.1.0` and `libtoml.a` by just typing `make`.
19 | 
20 | Usage
21 | -----
22 | See `toml.h` and the `example` directory.
23 | 
24 | Testing
25 | -------
26 | Run `make check` to run the tests; this requires [toml-test] to be in $PATH.
27 | 
28 | [toml-test]: https://github.com/toml-lang/toml-test
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) CK Tan
 4 | https://github.com/cktan/tomlc99
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/example/table.c:
--------------------------------------------------------------------------------
 1 | #include "../header/toml-c.h"
 2 | 
 3 | char *doc = "\n"
 4 | 	"host = 'example.com'\n"
 5 | 	"port = 80\n"
 6 | 	"\n"
 7 | 	"[tbl]\n"
 8 | 	"key = 'value'\n"
 9 | 	"[tbl.sub]\n"
10 | 	"subkey = 'subvalue'\n";
11 | 
12 | int main(void) {
13 | 	char errbuf[200];
14 | 	toml_table_t *tbl = toml_parse(doc, errbuf, sizeof(errbuf));
15 | 	if (!tbl) {
16 | 		fprintf(stderr, "ERROR: %s\n", errbuf);
17 | 		exit(1);
18 | 	}
19 | 
20 | 	// Get specific keys.
21 | 	toml_value_t host = toml_table_string(tbl, "host");
22 | 	toml_value_t port = toml_table_int(tbl, "port");
23 | 	if (!host.ok) // Default values.
24 | 		host.u.s = "localhost";
25 | 	if (!port.ok)
26 | 		host.u.i = 80;
27 | 	printf("%s:%ld\n", host.u.s, port.u.i);
28 | 
29 | 	// Get a table.
30 | 	toml_table_t *sub_tbl = toml_table_table(tbl, "tbl");
31 | 	if (sub_tbl) {
32 | 		// Loop over all keys in a table.
33 | 		int l = toml_table_len(sub_tbl);
34 | 		for (int i = 0; i < l; i++) {
35 | 			int keylen;
36 | 			const char *key = toml_table_key(sub_tbl, i, &keylen);
37 | 			printf("key #%d: %s\n", i, key);
38 | 			// TODO: this should return toml_key_t or something, which also
39 | 			// includes the type. This actually requires a bit of frobbing with
40 | 			// the lexer, as that just sets the type of everything to STRING.
41 | 			//
42 | 			// Then we can also get rid of toml_table_{string,int,...} and just
43 | 			// parse it automatically.
44 | 		}
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/example/array.c:
--------------------------------------------------------------------------------
 1 | #include "../header/toml-c.h"
 2 | 
 3 | char *doc = "\n"
 4 | 	"ints  = [1, 2, 3]\n"
 5 | 	"mixed = [1, 'one', 1.2]\n"
 6 | 	"\n"
 7 | 	"[[aot]]\n"
 8 | 	"k = 'one'\n"
 9 | 	"[[aot]]\n"
10 | 	"k = 'two'\n";
11 | 
12 | int main(void) {
13 | 	char errbuf[200];
14 | 	toml_table_t *tbl = toml_parse(doc, errbuf, sizeof(errbuf));
15 | 	if (!tbl) {
16 | 		fprintf(stderr, "ERROR: %s\n", errbuf);
17 | 		exit(1);
18 | 	}
19 | 
20 | 	// Array of ints.
21 | 	toml_array_t *arr = toml_table_array(tbl, "ints");
22 | 	int l = toml_array_len(arr);
23 | 	printf("ints:\n");
24 | 	for (int i = 0; i < l; i++)
25 | 		printf("  index %d = %ld\n", i, toml_array_int(arr, i).u.i);
26 | 	printf("\n");
27 | 
28 | 	// Mixed array.
29 | 	arr = toml_table_array(tbl, "mixed");
30 | 	l = toml_array_len(arr);
31 | 	printf("mixed:\n");
32 | 	for (int i = 0; i < l; i++) {
33 | 		// TODO: like with table.c, this also would be tons easier if record and
34 | 		// return the type.
35 | 		toml_value_t v = toml_array_int(arr, i);
36 | 		if (v.ok) {
37 | 			printf("  index %d = (int)%ld\n", i, v.u.i);
38 | 			continue;
39 | 		}
40 | 		v = toml_array_double(arr, i);
41 | 		if (v.ok) {
42 | 			printf("  index %d = (float)%0.17g\n", i, v.u.d);
43 | 			continue;
44 | 		}
45 | 		v = toml_array_string(arr, i);
46 | 		if (v.ok) {
47 | 			printf("  index %d = (string)\"%s\"\n", i, v.u.s);
48 | 			continue;
49 | 		}
50 | 	}
51 | 	printf("\n");
52 | 
53 | 	// Array-of-tables works just like inline tables.
54 | 	arr = toml_table_array(tbl, "aot");
55 | 	l = toml_array_len(arr);
56 | 	for (int i = 0; i < l; i++) {
57 | 		toml_table_t *t = toml_array_table(arr, i);
58 | 		toml_value_t v = toml_table_string(t, "k");
59 | 		printf("aot[%d].k = \"%s\"\n", i, v.u.s);
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC        = cc
 2 | PREFIX    = /usr/local
 3 | FPIC      = -fPIC
 4 | CFLAGS    = -std=c99 -Wall -Wextra -Wimplicit-fallthrough ${FPIC} -O2 -g
 5 | SANITIZER = -fsanitize=address -fsanitize=undefined
 6 | #COVERAGE  = -g3 -Og --coverage -lgcov -fprofile-arcs -ftest-coverage
 7 | 
 8 | HDRS   = toml.h
 9 | SRCS   = toml.c
10 | OBJS   = ${SRCS:.c=.o}
11 | PCFILE = libtoml.pc
12 | LIB    = libtoml.a
13 | SOLIB  = libtoml.so.1.0
14 | 
15 | .PHONY: all clean install check
16 | 
17 | all: ${LIB} ${SOLIB} toml2json toml-c-test header/toml-c.h
18 | 
19 | header/toml-c.h: ${HDRS} ${SRCS}
20 | 	@echo 'create $@'
21 | 	@: >header/toml-c.h
22 | 	@sed  '/#endif \/\/ TOML_H/d; /#define TOML_H/a#ifndef _POSIX_C_SOURCE\n#define _POSIX_C_SOURCE 200809L\n#endif' toml.h >>header/toml-c.h
23 | 	@sed  '/#include "toml.h"/d; /_POSIX_C_SOURCE/d' toml.c >>header/toml-c.h
24 | 	@echo '#endif // TOML_H' >>header/toml-c.h
25 | 
26 | toml.o: toml.c ${HDRS}
27 | 	${CC} ${CFLAGS} -c $<
28 | 
29 | libtoml.a: ${OBJS}
30 | 	ar -rcs $@ $^
31 | 
32 | libtoml.so.1.0: ${OBJS}
33 | 	${CC} ${CFLAGS} -shared -o $@ $^
34 | 
35 | toml2json: toml2json.c ${HDRS} ${SRCS}
36 | 	${CC} ${CFLAGS} -o toml2json ${SANITIZER} ${COVERAGE} toml.c toml2json.c
37 | 
38 | toml-c-test: toml-c-test.c ${HDRS} ${SRCS}
39 | 	${CC} ${CFLAGS} -o toml-c-test ${SANITIZER} ${COVERAGE} toml.c toml-c-test.c
40 | 
41 | install: all
42 | 	install -d ${DESTDIR}${PREFIX}/include ${DESTDIR}${PREFIX}/lib ${DESTDIR}${PREFIX}/lib/pkgconfig
43 | 	install toml.h   ${DESTDIR}${PREFIX}/include
44 | 	install ${LIB}   ${DESTDIR}${PREFIX}/lib
45 | 	install ${SOLIB} ${DESTDIR}${PREFIX}/lib
46 | 	sed 's!%%PREFIX%%!${PREFIX}!' ${PCFILE} >${DESTDIR}${PREFIX}/lib/pkgconfig/${PCFILE}
47 | 
48 | check: toml2json toml-c-test
49 | 	@./test.bash
50 | 	@echo
51 | 	@./toml-c-test
52 | 
53 | report: check
54 | 	@[ -f toml2json-toml.gcda ] && mv toml2json-toml.gcda toml.gcda ||:
55 | 	@[ -f toml2json-toml.gcno ] && mv toml2json-toml.gcno toml.gcno ||:
56 | 	gcov -kt toml.c
57 | 
58 | clean:
59 | 	rm -f *.o *.gcov *.gcda *.gcno toml2json toml-c-test ${LIB} ${SOLIB}
60 | 


--------------------------------------------------------------------------------
/test.bash:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Also compatible with zsh, but not POSIX sh.
 3 | #
 4 | # Run the toml-test compliance tests: https://github.com/toml-lang/toml-test
 5 | 
 6 | decoder="./toml2json"
 7 | toml=1.1.0
 8 | 
 9 | # Skip known failures.
10 | skip=(
11 |     # Extending existing tables
12 |     -skip 'invalid/array/extending-table'
13 |     -skip 'invalid/table/append-with-dotted-keys-01'
14 |     -skip 'invalid/table/append-with-dotted-keys-02'
15 |     -skip 'invalid/inline-table/overwrite-02'
16 | 
17 |     # Encoding
18 |     -skip 'invalid/encoding/bad-codepoint'
19 |     -skip 'invalid/encoding/bad-utf8-in-comment'
20 |     -skip 'invalid/encoding/utf16-comment'
21 |     -skip 'invalid/encoding/utf16-key'
22 | 
23 |     # Allows "invalid" control characters
24 |     -skip 'invalid/control/bare-cr'
25 |     -skip 'invalid/control/bare-null'
26 |     -skip 'invalid/control/comment-null'
27 |     -skip 'invalid/control/multi-cr'
28 |     -skip 'invalid/control/only-null'
29 |     -skip 'invalid/control/rawmulti-cr'
30 | 
31 |     # TOML 1.1
32 |     -skip 'invalid/inline-table/trailing-comma'
33 | )
34 | 
35 | # Find toml-test
36 | tt=
37 | if [[ -x "./toml-test" ]]; then
38 | 	tt="./toml-test"
39 | elif command -v "toml-test" >/dev/null; then
40 | 	tt="toml-test"
41 | elif [[ -n "$(go env GOBIN)" ]] && [[ -x "$(go env GOBIN)/toml-test" ]]; then
42 | 	tt="$(go env GOPATH)/toml-test"
43 | elif [[ -n "$(go env GOPATH)" ]] && [[ -x "$(go env GOPATH)/bin/toml-test" ]]; then
44 | 	tt="$(go env GOPATH)/bin/toml-test"
45 | elif [[ -x "$HOME/go/bin/toml-test" ]]; then
46 | 	tt="$HOME/go/bin/toml-test"
47 | fi
48 | if ! command -v "$tt" >/dev/null; then
49 | 	echo >&2 'toml-test not in current dir, $PATH, $GOBIN, $GOPATH/bin, or $HOME/go/bin; install with:'
50 | 	echo >&2 '    % go install github.com/toml-lang/toml-test/cmd/toml-test@latest'
51 | 	echo >&2
52 | 	echo >&2 'Or download a binary from:'
53 | 	echo >&2 '    https://github.com/toml-lang/toml-test/releases'
54 | 	exit 1
55 | fi
56 | 
57 | "$tt" test -toml="$toml" -skip-must-err ${skip[@]} -decoder="$decoder" "$@"
58 | 


--------------------------------------------------------------------------------
/toml.h:
--------------------------------------------------------------------------------
  1 | #ifndef TOML_H
  2 | #define TOML_H
  3 | #ifdef _MSC_VER
  4 | #	pragma warning(disable : 4996)
  5 | #endif
  6 | #ifdef __cplusplus
  7 | #	define TOML_EXTERN extern "C"
  8 | #else
  9 | #	define TOML_EXTERN extern
 10 | #endif
 11 | 
 12 | #include <stdbool.h>
 13 | #include <stdint.h>
 14 | #include <stdio.h>
 15 | 
 16 | typedef struct toml_table_t     toml_table_t;
 17 | typedef struct toml_array_t     toml_array_t;
 18 | typedef struct toml_value_t     toml_value_t;
 19 | typedef struct toml_timestamp_t toml_timestamp_t;
 20 | typedef struct toml_keyval_t    toml_keyval_t;
 21 | typedef struct toml_arritem_t   toml_arritem_t;
 22 | typedef struct toml_pos_t       toml_pos_t;
 23 | 
 24 | // TOML table.
 25 | struct toml_table_t {
 26 | 	const char* key;      // Key for this table
 27 | 	int         keylen;   // length of key.
 28 | 	bool        implicit; // Table was created implicitly
 29 | 	bool        readonly; // No more modification allowed
 30 | 
 31 | 	int             nkval; // key-values in the table
 32 | 	toml_keyval_t** kval;
 33 | 	int             narr; // arrays in the table
 34 | 	toml_array_t**  arr;
 35 | 	int             ntbl; // tables in the table
 36 | 	toml_table_t**  tbl;
 37 | };
 38 | 
 39 | // TOML array.
 40 | struct toml_array_t {
 41 | 	const char*     key;    // key to this array
 42 | 	int             keylen; // length of key.
 43 | 	int             kind;   // element kind: 'v'alue, 'a'rray, or 't'able, 'm'ixed
 44 | 	int             type;   // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp, 'm'ixed
 45 | 	int             nitem;  // number of elements
 46 | 	toml_arritem_t* item;
 47 | };
 48 | struct toml_arritem_t {
 49 | 	int           valtype; // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp
 50 | 	char*         val;
 51 | 	toml_array_t* arr;
 52 | 	toml_table_t* tbl;
 53 | };
 54 | 
 55 | // TOML key/value pair.
 56 | struct toml_keyval_t {
 57 | 	const char* key;    // key to this value
 58 | 	int         keylen; // length of key.
 59 | 	const char* val;    // the raw value
 60 | };
 61 | 
 62 | // Token position.
 63 | struct toml_pos_t {
 64 | 	int line;
 65 | 	int col;
 66 | };
 67 | 
 68 | // Timestamp type; some values may be empty depending on the value of kind.
 69 | struct toml_timestamp_t {
 70 | 	// datetime type:
 71 | 	//
 72 | 	//   'd'atetime          Full date + time + TZ
 73 | 	//   'l'local-datetime   Full date + time but without TZ
 74 | 	//   'D'ate-local        Date only, without TZ
 75 | 	//   't'ime-local        Time only, without TZ
 76 | 	char kind;
 77 | 
 78 | 	int year, month, day;
 79 | 	int hour, minute, second, millisec;
 80 | 	int tz; // Timezone offset in minutes
 81 | };
 82 | 
 83 | // Parsed TOML value.
 84 | //
 85 | // The string value s is a regular NULL-terminated C string, but the string
 86 | // length is also given in sl since TOML values may contain NULL bytes. The
 87 | // value is guaranteed to be correct UTF-8.
 88 | struct toml_value_t {
 89 | 	bool ok; // Was this value present?
 90 | 	union {
 91 | 		struct {
 92 | 			char* s;  // string value; must be freed after use.
 93 | 			int   sl; // string length, excluding NULL.
 94 | 		};
 95 | 		toml_timestamp_t ts; // datetime
 96 | 		bool             b;  // bool
 97 | 		int64_t          i;  // int
 98 | 		double           d;  // double
 99 | 	} u;
100 | };
101 | 
102 | // toml_parse() parses a TOML document from a string. Returns 0 on error, with
103 | // the error message stored in errbuf.
104 | //
105 | // toml_parse_file() is identical, but reads from a file descriptor.
106 | //
107 | // Use toml_free() to free the return value; this will invalidate all handles
108 | // for this table.
109 | TOML_EXTERN toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz);
110 | TOML_EXTERN toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz);
111 | TOML_EXTERN void          toml_free(toml_table_t* table);
112 | 
113 | // Table functions.
114 | //
115 | // toml_table_len() gets the number of direct keys for this table;
116 | // toml_table_key() gets the nth direct key in this table.
117 | TOML_EXTERN int           toml_table_len(const toml_table_t* table);
118 | TOML_EXTERN const char*   toml_table_key(const toml_table_t* table, int keyidx, int* keylen);
119 | TOML_EXTERN toml_value_t  toml_table_string(const toml_table_t* table, const char* key);
120 | TOML_EXTERN toml_value_t  toml_table_bool(const toml_table_t* table, const char* key);
121 | TOML_EXTERN toml_value_t  toml_table_int(const toml_table_t* table, const char* key);
122 | TOML_EXTERN toml_value_t  toml_table_double(const toml_table_t* table, const char* key);
123 | TOML_EXTERN toml_value_t  toml_table_timestamp(const toml_table_t* table, const char* key);
124 | TOML_EXTERN toml_array_t* toml_table_array(const toml_table_t* table, const char* key);
125 | TOML_EXTERN toml_table_t* toml_table_table(const toml_table_t* table, const char* key);
126 | 
127 | // Array functions.
128 | TOML_EXTERN int           toml_array_len(const toml_array_t* array);
129 | TOML_EXTERN toml_value_t  toml_array_string(const toml_array_t* array, int idx);
130 | TOML_EXTERN toml_value_t  toml_array_bool(const toml_array_t* array, int idx);
131 | TOML_EXTERN toml_value_t  toml_array_int(const toml_array_t* array, int idx);
132 | TOML_EXTERN toml_value_t  toml_array_double(const toml_array_t* array, int idx);
133 | TOML_EXTERN toml_value_t  toml_array_timestamp(const toml_array_t* array, int idx);
134 | TOML_EXTERN toml_array_t* toml_array_array(const toml_array_t* array, int idx);
135 | TOML_EXTERN toml_table_t* toml_array_table(const toml_array_t* array, int idx);
136 | 
137 | #endif // TOML_H
138 | 


--------------------------------------------------------------------------------
/toml2json.c:
--------------------------------------------------------------------------------
  1 | #include <errno.h>
  2 | #include <inttypes.h>
  3 | #include <math.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | #include "toml.h"
  8 | 
  9 | typedef const char* toml_unparsed_t;
 10 | toml_unparsed_t     toml_table_unparsed(const toml_table_t* table, const char* key);
 11 | toml_unparsed_t     toml_array_unparsed(const toml_array_t* array, int idx);
 12 | int                 toml_value_string(toml_unparsed_t s, char** ret, int* len);
 13 | int                 toml_value_bool(toml_unparsed_t s, bool* ret);
 14 | int                 toml_value_int(toml_unparsed_t s, int64_t* ret);
 15 | int                 toml_value_double(toml_unparsed_t s, double* ret);
 16 | int                 toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret);
 17 | 
 18 | static void print_escape_string(const char* s, int sl) {
 19 | 	for (int i = 0; i < sl; i++) {
 20 | 		char ch = s[i];
 21 | 		switch (ch) {
 22 | 		case '\b': printf("\\b"); break;
 23 | 		case '\t': printf("\\t"); break;
 24 | 		case '\n': printf("\\n"); break;
 25 | 		case '\f': printf("\\f"); break;
 26 | 		case '\r': printf("\\r"); break;
 27 | 		case '"':  printf("\\\""); break;
 28 | 		case '\\': printf("\\\\"); break;
 29 | 		default:
 30 | 			if (ch >= 0x00 && ch <= 0x1f)
 31 | 				printf("\\u00%02X", ch);
 32 | 			else
 33 | 				printf("%c", ch);
 34 | 			break;
 35 | 		}
 36 | 	}
 37 | }
 38 | 
 39 | static void print_raw(const char* s) {
 40 | 	char*            sval;
 41 | 	int              slen;
 42 | 	int64_t          ival;
 43 | 	bool             bval;
 44 | 	double           dval;
 45 | 	toml_timestamp_t ts;
 46 | 
 47 | 	if (toml_value_string(s, &sval, &slen) == 0) {
 48 | 		printf("{\"type\": \"string\",\"value\": \"");
 49 | 		print_escape_string(sval, slen);
 50 | 		printf("\"}");
 51 | 		free(sval);
 52 | 	} else if (toml_value_int(s, &ival) == 0) {
 53 | 		printf("{\"type\": \"integer\",\"value\": \"%" PRId64 "\"}", ival);
 54 | 	} else if (toml_value_bool(s, &bval) == 0) {
 55 | 		printf("{\"type\": \"bool\",\"value\": \"%s\"}", bval ? "true" : "false");
 56 | 	} else if (toml_value_double(s, &dval) == 0) {
 57 | 		if (isnan(dval))
 58 | 			printf("{\"type\": \"float\",\"value\": \"nan\"}");
 59 | 		else
 60 | 			printf("{\"type\": \"float\",\"value\": \"%0.17g\"}", dval);
 61 | 	} else if (toml_value_timestamp(s, &ts) == 0) {
 62 | 		char millisec[10];
 63 | 		if (ts.millisec)
 64 | 			snprintf(millisec, 10, ".%03d", ts.millisec);
 65 | 		else
 66 | 			millisec[0] = 0;
 67 | 		if (ts.kind == 'd' || ts.kind == 'l') {
 68 | 			char off[15];
 69 | 			off[0] = 'Z';
 70 | 			off[1] = 0;
 71 | 			if (ts.tz != 0)
 72 | 				snprintf(off, 15, "%c%02d:%02d", (ts.tz > 0 ? '+' : '-'), abs(ts.tz) / 60, abs(ts.tz) % 60);
 73 | 			// clang-format off
 74 | 			printf("{\"type\": \"%s\",\"value\": \"%04d-%02d-%02dT%02d:%02d:%02d%s%s\"}",
 75 | 				(ts.kind == 'd' ? "datetime" : "datetime-local"),
 76 | 				ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, millisec,
 77 | 				(ts.kind == 'd' ? off : ""));
 78 | 		} else if (ts.kind == 'D') {
 79 | 			printf("{\"type\": \"date-local\",\"value\": \"%04d-%02d-%02d\"}",
 80 | 				ts.year, ts.month, ts.day);
 81 | 		} else if (ts.kind == 't') {
 82 | 			printf("{\"type\": \"time-local\",\"value\": \"%02d:%02d:%02d%s\"}",
 83 | 				ts.hour, ts.minute, ts.second, millisec);
 84 | 			// clang-format on
 85 | 		}
 86 | 	} else {
 87 | 		fprintf(stderr, "unknown type\n");
 88 | 		exit(1);
 89 | 	}
 90 | }
 91 | 
 92 | static void print_array(toml_array_t* arr);
 93 | static void print_table(toml_table_t* curtbl) {
 94 | 	const char*   key;
 95 | 	int           keylen;
 96 | 	const char*   raw;
 97 | 	toml_array_t* arr;
 98 | 	toml_table_t* tbl;
 99 | 
100 | 	printf("{");
101 | 	for (int i = 0; (key = toml_table_key(curtbl, i, &keylen)) != 0; i++) {
102 | 		printf("%s\"", i > 0 ? ",\n" : "");
103 | 		print_escape_string(key, keylen);
104 | 		printf("\":");
105 | 
106 | 		if ((raw = toml_table_unparsed(curtbl, key)) != 0)
107 | 			print_raw(raw);
108 | 		else if ((arr = toml_table_array(curtbl, key)) != 0)
109 | 			print_array(arr);
110 | 		else if ((tbl = toml_table_table(curtbl, key)) != 0)
111 | 			print_table(tbl);
112 | 		else
113 | 			abort();
114 | 	}
115 | 	printf("}");
116 | }
117 | 
118 | static void print_table_array(toml_array_t* curarr) {
119 | 	toml_table_t* tbl;
120 | 
121 | 	printf("[");
122 | 	for (int i = 0; (tbl = toml_array_table(curarr, i)) != 0; i++) {
123 | 		printf("%s", i > 0 ? "," : "");
124 | 		print_table(tbl);
125 | 	}
126 | 	printf("]");
127 | }
128 | 
129 | static void print_array(toml_array_t* curarr) {
130 | 	if (curarr->kind == 't') {
131 | 		print_table_array(curarr);
132 | 		return;
133 | 	}
134 | 
135 | 	printf("[");
136 | 
137 | 	const char*   raw;
138 | 	toml_array_t* arr;
139 | 	toml_table_t* tbl;
140 | 
141 | 	const int n = toml_array_len(curarr);
142 | 	for (int i = 0; i < n; i++) {
143 | 		printf("%s", i > 0 ? "," : "");
144 | 
145 | 		if ((arr = toml_array_array(curarr, i)) != 0) {
146 | 			print_array(arr);
147 | 			continue;
148 | 		}
149 | 
150 | 		if ((tbl = toml_array_table(curarr, i)) != 0) {
151 | 			print_table(tbl);
152 | 			continue;
153 | 		}
154 | 
155 | 		raw = toml_array_unparsed(curarr, i);
156 | 		if (raw) {
157 | 			print_raw(raw);
158 | 			continue;
159 | 		}
160 | 
161 | 		fflush(stdout);
162 | 		fprintf(stderr, "ERROR: unable to decode value in array\n");
163 | 		exit(1);
164 | 	}
165 | 
166 | 	printf("]");
167 | }
168 | 
169 | static void cat(FILE* fp) {
170 | 	char errbuf[200];
171 | 
172 | 	toml_table_t* tbl = toml_parse_file(fp, errbuf, sizeof(errbuf));
173 | 	if (!tbl) {
174 | 		fprintf(stderr, "ERROR: %s\n", errbuf);
175 | 		exit(1);
176 | 	}
177 | 
178 | 	print_table(tbl);
179 | 	printf("\n");
180 | 
181 | 	toml_free(tbl);
182 | }
183 | 
184 | int main(int argc, const char* argv[argc + 1]) {
185 | 	if (argc == 1) {
186 | 		cat(stdin);
187 | 		return 0;
188 | 	}
189 | 
190 | 	for (int i = 1; i < argc; i++) {
191 | 		FILE* fp = fopen(argv[i], "r");
192 | 		if (!fp) {
193 | 			fprintf(stderr, "ERROR: cannot open %s: %s\n", argv[i], strerror(errno));
194 | 			exit(1);
195 | 		}
196 | 		cat(fp);
197 | 		fclose(fp);
198 | 	}
199 | 	return 0;
200 | }
201 | 


--------------------------------------------------------------------------------
/toml-c-test.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | 
  4 | #include "toml.h"
  5 | 
  6 | int failed = 0;
  7 | 
  8 | #define errorf(...)                                                             \
  9 | 	{                                                                           \
 10 | 		failed = 1;                                                             \
 11 | 		fprintf(stderr, "FAIL: %s:%d: %s()\n\t", __FILE__, __LINE__, __func__); \
 12 | 		fprintf(stderr, __VA_ARGS__);                                           \
 13 | 		fprintf(stderr, "\n");                                                  \
 14 | 	}
 15 | 
 16 | #define fatalf(...)          \
 17 | 	{                        \
 18 | 		errorf(__VA_ARGS__); \
 19 | 		return;              \
 20 | 	}
 21 | 
 22 | #define streq(a, b) (strcmp(a, b) == 0)
 23 | 
 24 | void test_toml_table_value(void) {
 25 | 	char          errbuf[200];
 26 | 	toml_table_t* tbl = toml_parse("str   = 'xxx'\n"
 27 | 	                               "int   = 42\n"
 28 | 	                               "bool  = true\n"
 29 | 	                               "float = 6.666\n"
 30 | 	                               "ts    = 2012-01-02T15:16:17Z\n",
 31 | 	                               errbuf, sizeof(errbuf));
 32 | 	if (!tbl)
 33 | 		fatalf("%s", errbuf);
 34 | 	int l = toml_table_len(tbl);
 35 | 	if (l != 5)
 36 | 		errorf("wrong table length: %d", l);
 37 | 
 38 | 	toml_value_t str = toml_table_string(tbl, "str");
 39 | 	if (!str.ok)
 40 | 		errorf("str.ok not set");
 41 | 	if (!streq(str.u.s, "xxx"))
 42 | 		errorf("str.u.s wrong value: '%s'; want: 'xxx'", str.u.s);
 43 | 	if (str.u.sl != 3)
 44 | 		errorf("str.u.sl wrong value: %d", str.u.sl);
 45 | 	free(str.u.s);
 46 | 
 47 | 	toml_value_t i = toml_table_int(tbl, "int");
 48 | 	if (!i.ok)
 49 | 		errorf("int.ok not set");
 50 | 	if (i.u.i != 42)
 51 | 		errorf("int.u.u wrong value: %ld", i.u.i);
 52 | 
 53 | 	toml_value_t b = toml_table_bool(tbl, "bool");
 54 | 	if (!b.ok)
 55 | 		errorf("b.ok not set");
 56 | 	if (!b.u.b)
 57 | 		errorf("int.u.b wrong value: %d", b.u.b);
 58 | 
 59 | 	toml_value_t f = toml_table_double(tbl, "float");
 60 | 	if (!f.ok)
 61 | 		errorf("f.ok not set");
 62 | 	if (f.u.d != 6.666)
 63 | 		errorf("int.u.u wrong value: %f", f.u.d);
 64 | 
 65 | 	toml_value_t ts = toml_table_timestamp(tbl, "ts");
 66 | 	if (!ts.ok)
 67 | 		errorf("ts.ok not set");
 68 | 	char have[200];
 69 | 
 70 | 	// clang-format off
 71 | 	snprintf(have, 200, "'%c' %d-%02d-%02d %02d:%02d:%02d.%d TZ=%d",
 72 | 		ts.u.ts.kind, ts.u.ts.year, ts.u.ts.month, ts.u.ts.day, ts.u.ts.hour,
 73 | 		ts.u.ts.minute, ts.u.ts.second, ts.u.ts.millisec, ts.u.ts.tz);
 74 | 	// clang-format on
 75 | 	char want[200] = "'d' 2012-01-02 15:16:17.0 TZ=0";
 76 | 	if (!streq(have, want))
 77 | 		errorf("have: %s\n\twant: %s", have, want);
 78 | 
 79 | 	toml_free(tbl);
 80 | }
 81 | 
 82 | void test_toml_array_value(void) {
 83 | 	char          errbuf[200];
 84 | 	toml_table_t* tbl = toml_parse("str   = ['xxx', \"yyy\"]\n"
 85 | 	                               "int   = [42, 43]\n"
 86 | 	                               "bool  = [true, false]\n"
 87 | 	                               "float = [6.666, 6.667]\n"
 88 | 	                               "ts    = [2012-01-02T15:16:17Z, 2013-02-03T16:17:18Z]\n",
 89 | 	                               errbuf, sizeof(errbuf));
 90 | 	if (!tbl)
 91 | 		fatalf("%s", errbuf);
 92 | 
 93 | 	{
 94 | 		toml_array_t* arr  = toml_table_array(tbl, "str");
 95 | 		toml_value_t  str1 = toml_array_string(arr, 0);
 96 | 		if (!str1.ok)
 97 | 			errorf("str1.ok not set");
 98 | 		if (!streq(str1.u.s, "xxx"))
 99 | 			errorf("str1.u.s wrong value: '%s'; want: 'xxx'", str1.u.s);
100 | 		if (str1.u.sl != 3)
101 | 			errorf("str1.u.sl wrong value: %d", str1.u.sl);
102 | 
103 | 		toml_value_t str2 = toml_array_string(arr, 1);
104 | 		if (!str2.ok)
105 | 			errorf("str2.ok not set");
106 | 		if (!streq(str2.u.s, "yyy"))
107 | 			errorf("str2.u.s wrong value: %s; want: 'yyy'", str2.u.s);
108 | 		if (str2.u.sl != 3)
109 | 			errorf("str2.u.sl wrong value: %d", str2.u.sl);
110 | 		free(str1.u.s);
111 | 		free(str2.u.s);
112 | 	}
113 | 
114 | 	{
115 | 		toml_array_t* arr = toml_table_array(tbl, "ts");
116 | 		toml_value_t  ts1 = toml_array_timestamp(arr, 0);
117 | 		char          have[200];
118 | 
119 | 		// clang-format off
120 | 		snprintf(have, 200, "'%c' %d-%02d-%02d %02d:%02d:%02d.%d TZ=%d",
121 | 			ts1.u.ts.kind, ts1.u.ts.year, ts1.u.ts.month, ts1.u.ts.day, ts1.u.ts.hour,
122 | 			ts1.u.ts.minute, ts1.u.ts.second, ts1.u.ts.millisec, ts1.u.ts.tz);
123 | 		// clang-format on
124 | 		char want[200] = "'d' 2012-01-02 15:16:17.0 TZ=0";
125 | 		if (!streq(have, want))
126 | 			errorf("have: %s\n\twant: %s", have, want);
127 | 	}
128 | 
129 | 	toml_free(tbl);
130 | }
131 | 
132 | void test_toml_table_string_unknown_value(void) {
133 | 	char          errbuf[200];
134 | 	toml_table_t* tbl = toml_parse("a = 'a'", errbuf, sizeof(errbuf));
135 | 	if (!tbl)
136 | 		fatalf("%s", errbuf);
137 | 
138 | 	toml_value_t unknown = toml_table_string(tbl, "aa");
139 | 	if (unknown.ok)
140 | 		errorf("unknown.ok set");
141 | 
142 | 	toml_free(tbl);
143 | 	free(unknown.u.s);
144 | }
145 | 
146 | // TODO: can probably use toml-test's -errors feature for this.
147 | void test_error(void) {
148 | 	char errbuf[200];
149 | 
150 | 	// e_syntax errors
151 | 	toml_parse("key", errbuf, sizeof(errbuf));
152 | 	if (!streq(errbuf, "at 1:4: missing '='"))
153 | 		errorf("wrong error: %s", errbuf);
154 | 
155 | 	toml_parse("k = 'missing-q\nb = 1", errbuf, sizeof(errbuf));
156 | 	if (!streq(errbuf, "at 1:14: unterminated quote (')"))
157 | 		errorf("wrong error: %s", errbuf);
158 | 
159 | 	toml_parse("k = {{}}", errbuf, sizeof(errbuf));
160 | 	if (!streq(errbuf, "at 1:6: expected a string"))
161 | 		errorf("wrong error: %s", errbuf);
162 | 
163 | 	// e_keyexists errors
164 | 	toml_parse("[a]\n[a]", errbuf, sizeof(errbuf));
165 | 	if (!streq(errbuf, "at 2:2: key already defined"))
166 | 		errorf("wrong error: %s", errbuf);
167 | }
168 | 
169 | int main(void) {
170 | 	test_toml_table_value();
171 | 	test_toml_array_value();
172 | 	test_toml_table_string_unknown_value();
173 | 	test_error();
174 | 
175 | 	printf("%s: %s\n", __FILE__, failed ? "FAIL" : "PASS");
176 | 	return failed;
177 | }
178 | 


--------------------------------------------------------------------------------
/toml.c:
--------------------------------------------------------------------------------
   1 | #define _POSIX_C_SOURCE 200809L
   2 | #include <assert.h>
   3 | #include <ctype.h>
   4 | #include <errno.h>
   5 | #include <math.h>
   6 | #include <stdbool.h>
   7 | #include <stdint.h>
   8 | #include <stdio.h>
   9 | #include <stdlib.h>
  10 | #include <string.h>
  11 | 
  12 | #include "toml.h"
  13 | 
  14 | #define ALIGN8(sz) (((sz) + 7) & ~7)
  15 | #define calloc(x, y) error - forbidden - use CALLOC instead
  16 | static void* CALLOC(size_t nmemb, size_t sz) {
  17 | 	int   nb = ALIGN8(sz) * nmemb;
  18 | 	void* p  = malloc(nb);
  19 | 	if (p) {
  20 | 		memset(p, 0, nb);
  21 | 	}
  22 | 	return p;
  23 | }
  24 | 
  25 | // some old platforms define strdup macro -- drop it.
  26 | #undef strdup
  27 | #define strdup(x) error - forbidden - use STRDUP instead
  28 | static char* STRDUP(const char* s) {
  29 | 	int   len = strlen(s);
  30 | 	char* p   = malloc(len + 1);
  31 | 	if (p) {
  32 | 		memcpy(p, s, len);
  33 | 		p[len] = 0;
  34 | 	}
  35 | 	return p;
  36 | }
  37 | 
  38 | // some old platforms define strndup macro -- drop it.
  39 | #undef strndup
  40 | #define strndup(x) error - forbidden - use STRNDUP instead
  41 | static char* STRNDUP(const char* s, size_t n) {
  42 | 	size_t len = strnlen(s, n);
  43 | 	char*  p   = malloc(len + 1);
  44 | 	if (p) {
  45 | 		memcpy(p, s, len);
  46 | 		p[len] = 0;
  47 | 	}
  48 | 	return p;
  49 | }
  50 | 
  51 | // Unparsed values.
  52 | typedef const char* toml_unparsed_t;
  53 | toml_unparsed_t     toml_table_unparsed(const toml_table_t* table, const char* key);
  54 | toml_unparsed_t     toml_array_unparsed(const toml_array_t* array, int idx);
  55 | int                 toml_value_string(toml_unparsed_t s, char** ret, int* len);
  56 | int                 toml_value_bool(toml_unparsed_t s, bool* ret);
  57 | int                 toml_value_int(toml_unparsed_t s, int64_t* ret);
  58 | int                 toml_value_double(toml_unparsed_t s, double* ret);
  59 | int                 toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret);
  60 | 
  61 | // Convert escape to UTF-8; return #bytes used in buf to encode the char, or -1
  62 | // on error.
  63 | // http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
  64 | int read_unicode_escape(uint64_t code, char buf[6]) {
  65 | 	if (0xd800 <= code && code <= 0xdfff) /// UTF-16 surrogates
  66 | 		return -1;
  67 | 	if (0x10FFFF < code)
  68 | 		return -1;
  69 | 	if (code <= 0x7F) { /// 0x00000000 - 0x0000007F: 0xxxxxxx
  70 | 		buf[0] = (unsigned char)code;
  71 | 		return 1;
  72 | 	}
  73 | 	if (code <= 0x000007FF) { /// 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx
  74 | 		buf[0] = (unsigned char)(0xc0 | (code >> 6));
  75 | 		buf[1] = (unsigned char)(0x80 | (code & 0x3f));
  76 | 		return 2;
  77 | 	}
  78 | 	if (code <= 0x0000FFFF) { /// 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
  79 | 		buf[0] = (unsigned char)(0xe0 | (code >> 12));
  80 | 		buf[1] = (unsigned char)(0x80 | ((code >> 6) & 0x3f));
  81 | 		buf[2] = (unsigned char)(0x80 | (code & 0x3f));
  82 | 		return 3;
  83 | 	}
  84 | 	if (code <= 0x001FFFFF) { /// 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  85 | 		buf[0] = (unsigned char)(0xf0 | (code >> 18));
  86 | 		buf[1] = (unsigned char)(0x80 | ((code >> 12) & 0x3f));
  87 | 		buf[2] = (unsigned char)(0x80 | ((code >> 6) & 0x3f));
  88 | 		buf[3] = (unsigned char)(0x80 | (code & 0x3f));
  89 | 		return 4;
  90 | 	}
  91 | 	return -1;
  92 | }
  93 | 
  94 | static inline void xfree(const void* x) {
  95 | 	if (x)
  96 | 		free((void*)(intptr_t)x);
  97 | }
  98 | 
  99 | enum tokentype_t { INVALID, DOT, COMMA, EQUAL, LBRACE, RBRACE, NEWLINE, LBRACKET, RBRACKET, STRING, MSTRING };
 100 | typedef enum tokentype_t tokentype_t;
 101 | 
 102 | typedef struct token_t token_t;
 103 | struct token_t {
 104 | 	tokentype_t tok;
 105 | 	toml_pos_t  pos;
 106 | 	char*       ptr; // points into context->start
 107 | 	int         len;
 108 | 	int         eof;
 109 | };
 110 | 
 111 | typedef struct context_t context_t;
 112 | struct context_t {
 113 | 	char* start;
 114 | 	char* stop;
 115 | 	char* errbuf;
 116 | 	int   errbufsz;
 117 | 
 118 | 	token_t       tok;
 119 | 	toml_table_t* root;
 120 | 	toml_table_t* curtbl;
 121 | 
 122 | 	struct {
 123 | 		int     top;
 124 | 		char*   key[10];
 125 | 		int     keylen[10];
 126 | 		token_t tok[10];
 127 | 	} tpath;
 128 | };
 129 | 
 130 | #define STRINGIFY(x) #x
 131 | #define TOSTRING(x) STRINGIFY(x)
 132 | #define FLINE __FILE__ ":" TOSTRING(__LINE__)
 133 | 
 134 | static int next_token(context_t* ctx, bool dotisspecial);
 135 | 
 136 | // Error reporting. Call when an error is detected. Always return -1.
 137 | static int e_outofmemory(context_t* ctx, const char* fline) {
 138 | 	snprintf(ctx->errbuf, ctx->errbufsz, "ERROR: out of memory (%s)", fline);
 139 | 	return -1;
 140 | }
 141 | 
 142 | static int e_internal(context_t* ctx, const char* fline) {
 143 | 	snprintf(ctx->errbuf, ctx->errbufsz, "internal error (%s)", fline);
 144 | 	return -1;
 145 | }
 146 | 
 147 | static int e_syntax(context_t* ctx, toml_pos_t pos, const char* msg) {
 148 | 	snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: %s", pos.line, pos.col, msg);
 149 | 	return -1;
 150 | }
 151 | 
 152 | static int e_keyexists(context_t* ctx, toml_pos_t pos) {
 153 | 	snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: key already defined", pos.line, pos.col);
 154 | 	return -1;
 155 | }
 156 | 
 157 | static void* expand(void* p, int sz, int newsz) {
 158 | 	void* s = malloc(newsz);
 159 | 	if (!s)
 160 | 		return 0;
 161 | 
 162 | 	if (p) {
 163 | 		memcpy(s, p, sz);
 164 | 		free(p);
 165 | 	}
 166 | 	return s;
 167 | }
 168 | 
 169 | static void** expand_ptrarr(void** p, int n) {
 170 | 	void** s = malloc((n + 1) * sizeof(void*));
 171 | 	if (!s)
 172 | 		return 0;
 173 | 
 174 | 	s[n] = 0;
 175 | 	if (p) {
 176 | 		memcpy(s, p, n * sizeof(void*));
 177 | 		free(p);
 178 | 	}
 179 | 	return s;
 180 | }
 181 | 
 182 | static toml_arritem_t* expand_arritem(toml_arritem_t* p, int n) {
 183 | 	toml_arritem_t* pp = expand(p, n * sizeof(*p), (n + 1) * sizeof(*p));
 184 | 	if (!pp)
 185 | 		return 0;
 186 | 
 187 | 	memset(&pp[n], 0, sizeof(pp[n]));
 188 | 	return pp;
 189 | }
 190 | 
 191 | static uint8_t const u8_length[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4};
 192 | #define u8length(s) u8_length[(((uint8_t*)(s))[0] & 0xFF) >> 4];
 193 | 
 194 | static char* norm_lit_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) {
 195 | 	const char* sp  = src;
 196 | 	const char* sq  = src + srclen;
 197 | 	char*       dst = 0; /// will write to dst[] and return it
 198 | 	int         max = 0; /// max size of dst[]
 199 | 	int         off = 0; /// cur offset in dst[]
 200 | 
 201 | 	for (;;) {                 /// scan forward on src
 202 | 		if (off >= max - 10) { /// have some slack for misc stuff
 203 | 			int   newmax = max + 50;
 204 | 			char* x      = expand(dst, max, newmax);
 205 | 			if (!x) {
 206 | 				xfree(dst);
 207 | 				snprintf(errbuf, errbufsz, "out of memory");
 208 | 				return 0;
 209 | 			}
 210 | 			dst = x;
 211 | 			max = newmax;
 212 | 		}
 213 | 
 214 | 		if (sp >= sq) /// finished?
 215 | 			break;
 216 | 
 217 | 		uint8_t l = u8length(sp);
 218 | 		if (l == 0) {
 219 | 			xfree(dst);
 220 | 			snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 221 | 			return 0;
 222 | 		}
 223 | 		if (l > 1) {
 224 | 			for (int i = 0; i < l; i++) {
 225 | 				char ch = *sp++;
 226 | 				if ((ch & 0x80) != 0x80) {
 227 | 					xfree(dst);
 228 | 					snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 229 | 					return 0;
 230 | 				}
 231 | 				dst[off++] = ch;
 232 | 			}
 233 | 			continue;
 234 | 		}
 235 | 
 236 | 		/// control characters other than Tab are not allowed
 237 | 		char ch = *sp++;
 238 | 		if ((0 <= ch && ch <= 0x08) || (0x0a <= ch && ch <= 0x1f) || ch == 0x7f) {
 239 | 			if (!(multiline && (ch == '\r' || ch == '\n'))) {
 240 | 				xfree(dst);
 241 | 				snprintf(errbuf, errbufsz, "invalid char U+%04x", ch);
 242 | 				return 0;
 243 | 			}
 244 | 		}
 245 | 
 246 | 		dst[off++] = ch; /// a plain copy suffice
 247 | 	}
 248 | 
 249 | 	*len       = off;
 250 | 	dst[off++] = 0;
 251 | 	return dst;
 252 | }
 253 | 
 254 | // Convert src to raw unescaped utf-8 string. Returns NULL if error with errmsg
 255 | // in errbuf.
 256 | static char* norm_basic_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) {
 257 | 	const char* sp  = src;
 258 | 	const char* sq  = src + srclen;
 259 | 	char*       dst = 0; /// will write to dst[] and return it
 260 | 	int         max = 0; /// max size of dst[]
 261 | 	int         off = 0; /// cur offset in dst[]
 262 | 
 263 | 	/// scan forward on src
 264 | 	for (;;) {
 265 | 		if (off >= max - 10) { /// have some slack for misc stuff
 266 | 			int   newmax = max + 50;
 267 | 			char* x      = expand(dst, max, newmax);
 268 | 			if (!x) {
 269 | 				xfree(dst);
 270 | 				snprintf(errbuf, errbufsz, "out of memory");
 271 | 				return 0;
 272 | 			}
 273 | 			dst = x;
 274 | 			max = newmax;
 275 | 		}
 276 | 
 277 | 		if (sp >= sq) /// finished?
 278 | 			break;
 279 | 
 280 | 		uint8_t l = u8length(sp);
 281 | 		if (l == 0) {
 282 | 			xfree(dst);
 283 | 			snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 284 | 			return 0;
 285 | 		}
 286 | 		if (l > 1) {
 287 | 			for (int i = 0; i < l; i++) {
 288 | 				char ch = *sp++;
 289 | 				if ((ch & 0x80) != 0x80) {
 290 | 					xfree(dst);
 291 | 					snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 292 | 					return 0;
 293 | 				}
 294 | 				dst[off++] = ch;
 295 | 			}
 296 | 			continue;
 297 | 		}
 298 | 
 299 | 		char ch = *sp++;
 300 | 		if (ch != '\\') {
 301 | 			/// must be escaped: U+0000 to U+0008, U+000A to U+001F, U+007F
 302 | 			if ((ch >= 0 && ch <= 0x08) || (ch >= 0x0a && ch <= 0x1f) || ch == 0x7f) {
 303 | 				if (!(multiline && (ch == '\r' || ch == '\n'))) {
 304 | 					xfree(dst);
 305 | 					snprintf(errbuf, errbufsz, "invalid char U+%04x", ch);
 306 | 					return 0;
 307 | 				}
 308 | 			}
 309 | 
 310 | 			dst[off++] = ch; /// a plain copy suffice
 311 | 			continue;
 312 | 		}
 313 | 
 314 | 		// TODO: unreachable, I think?
 315 | 		if (sp >= sq) { /// ch was backslash. we expect the escape char.
 316 | 			snprintf(errbuf, errbufsz, "last backslash is invalid");
 317 | 			xfree(dst);
 318 | 			return 0;
 319 | 		}
 320 | 
 321 | 		if (multiline) {                           /// for multi-line, we want to kill line-ending-backslash.
 322 | 			if (sp[strspn(sp, " \t\r")] == '\n') { /// if there is only whitespace after the backslash ...
 323 | 				sp += strspn(sp, " \t\r\n");       /// skip all the following whitespaces
 324 | 				continue;
 325 | 			}
 326 | 		}
 327 | 
 328 | 		ch = *sp++; /// get the escaped char
 329 | 		switch (ch) {
 330 | 		case 'x':
 331 | 		case 'u':
 332 | 		case 'U': {
 333 | 			uint64_t ucs  = 0;
 334 | 			int      nhex = 2;
 335 | 			if (ch == 'u') nhex = 4;
 336 | 			if (ch == 'U') nhex = 8;
 337 | 			for (int i = 0; i < nhex; i++) {
 338 | 				// TODO: unreachable I think, as scan_string() already
 339 | 				// guarantees exactly 4 or 8 hex chars.
 340 | 				if (sp >= sq) {
 341 | 					snprintf(errbuf, errbufsz, "\\%c expected %d hex chars", ch, nhex);
 342 | 					xfree(dst);
 343 | 					return 0;
 344 | 				}
 345 | 				ch    = *sp++;
 346 | 				int v = -1;
 347 | 				if ('0' <= ch && ch <= '9')
 348 | 					v = ch - '0';
 349 | 				else if ('A' <= ch && ch <= 'F')
 350 | 					v = ch - 'A' + 10;
 351 | 				else if ('a' <= ch && ch <= 'f')
 352 | 					v = (ch ^ 0x20) - 'A' + 10;
 353 | 				// TODO: also unrechable, as per above.
 354 | 				if (v == -1) {
 355 | 					snprintf(errbuf, errbufsz, "invalid hex chars for \\u or \\U");
 356 | 					xfree(dst);
 357 | 					return 0;
 358 | 				}
 359 | 				ucs = ucs * 16 + v;
 360 | 			}
 361 | 			int n = read_unicode_escape(ucs, &dst[off]);
 362 | 			if (n == -1) {
 363 | 				snprintf(errbuf, errbufsz, "illegal ucs code in \\u or \\U");
 364 | 				xfree(dst);
 365 | 				return 0;
 366 | 			}
 367 | 			off += n;
 368 | 		};
 369 | 			continue;
 370 | 		case 'b':  ch = '\b'; break;
 371 | 		case 't':  ch = '\t'; break;
 372 | 		case 'n':  ch = '\n'; break;
 373 | 		case 'f':  ch = '\f'; break;
 374 | 		case 'r':  ch = '\r'; break;
 375 | 		case 'e':  ch = 0x1b; break;
 376 | 		case '"':  ch = '"'; break;
 377 | 		case '\\': ch = '\\'; break;
 378 | 		default:
 379 | 			// TODO: unrechable, I think, as scan_string() already
 380 | 			// guarantees correct char.
 381 | 			snprintf(errbuf, errbufsz, "illegal escape char \\%c", ch);
 382 | 			xfree(dst);
 383 | 			return 0;
 384 | 		}
 385 | 
 386 | 		dst[off++] = ch;
 387 | 	}
 388 | 
 389 | 	*len       = off;
 390 | 	dst[off++] = 0; /// Cap with NUL and return it.
 391 | 	return dst;
 392 | }
 393 | 
 394 | // Normalize a key. Convert all special chars to raw unescaped utf-8 chars.
 395 | static char* normalize_key(context_t* ctx, token_t strtok, int* keylen) {
 396 | 	const char* sp = strtok.ptr;
 397 | 	const char* sq = strtok.ptr + strtok.len;
 398 | 	int         ch = *sp;
 399 | 	char*       ret;
 400 | 
 401 | 	// Quoted string
 402 | 	if (ch == '\'' || ch == '\"') {
 403 | 		/// Take " or ' off from and back.
 404 | 		sp++, sq--;
 405 | 
 406 | 		char ebuf[80];
 407 | 		if (ch == '\'')
 408 | 			ret = norm_lit_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf));
 409 | 		else
 410 | 			ret = norm_basic_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf));
 411 | 		if (!ret) {
 412 | 			e_syntax(ctx, strtok.pos, ebuf);
 413 | 			return 0;
 414 | 		}
 415 | 		return ret;
 416 | 	}
 417 | 
 418 | 	*keylen = 0;
 419 | 	for (const char* c = sp; c != sq; c++) { /// Bare key: allow: [A-Za-z0-9_-]+
 420 | 		*keylen = *keylen + 1;
 421 | 		if (isalnum(*c) || *c == '_' || *c == '-')
 422 | 			continue;
 423 | 		// TODO: never triggered? When reading the file it already validates
 424 | 		// this, so seems redundant? Need to double-check.
 425 | 		e_syntax(ctx, ctx->tok.pos, "invalid key");
 426 | 		return 0;
 427 | 	}
 428 | 
 429 | 	if (!(ret = STRNDUP(sp, sq - sp))) { /// dup and return
 430 | 		e_outofmemory(ctx, FLINE);
 431 | 		return 0;
 432 | 	}
 433 | 	return ret;
 434 | }
 435 | 
 436 | // Look up key in tbl. Return 0 if not found, or 'v'alue, 'a'rray or 't'able
 437 | // depending on the element.
 438 | static int check_key(toml_table_t* tbl, const char* key, toml_keyval_t** ret_val, toml_array_t** ret_arr, toml_table_t** ret_tbl) {
 439 | 	int   i;
 440 | 	void* dummy;
 441 | 
 442 | 	if (!ret_tbl)
 443 | 		ret_tbl = (toml_table_t**)&dummy;
 444 | 	if (!ret_arr)
 445 | 		ret_arr = (toml_array_t**)&dummy;
 446 | 	if (!ret_val)
 447 | 		ret_val = (toml_keyval_t**)&dummy;
 448 | 
 449 | 	*ret_tbl = 0;
 450 | 	*ret_arr = 0;
 451 | 	*ret_val = 0;
 452 | 
 453 | 	for (i = 0; i < tbl->nkval; i++) {
 454 | 		if (strcmp(key, tbl->kval[i]->key) == 0) {
 455 | 			*ret_val = tbl->kval[i];
 456 | 			return 'v';
 457 | 		}
 458 | 	}
 459 | 	for (i = 0; i < tbl->narr; i++) {
 460 | 		if (strcmp(key, tbl->arr[i]->key) == 0) {
 461 | 			*ret_arr = tbl->arr[i];
 462 | 			return 'a';
 463 | 		}
 464 | 	}
 465 | 	for (i = 0; i < tbl->ntbl; i++) {
 466 | 		if (strcmp(key, tbl->tbl[i]->key) == 0) {
 467 | 			*ret_tbl = tbl->tbl[i];
 468 | 			return 't';
 469 | 		}
 470 | 	}
 471 | 	return 0;
 472 | }
 473 | 
 474 | static int key_kind(toml_table_t* tbl, const char* key) {
 475 | 	return check_key(tbl, key, 0, 0, 0);
 476 | }
 477 | 
 478 | // Create a keyval in the table.
 479 | static toml_keyval_t* create_keyval_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) {
 480 | 	int   keylen;
 481 | 	char* newkey = normalize_key(ctx, keytok, &keylen);
 482 | 	if (!newkey)
 483 | 		return 0;
 484 | 
 485 | 	toml_keyval_t* dest = 0;
 486 | 	if (key_kind(tbl, newkey)) {
 487 | 		xfree(newkey);
 488 | 		e_keyexists(ctx, keytok.pos);
 489 | 		return 0;
 490 | 	}
 491 | 
 492 | 	int             n = tbl->nkval;
 493 | 	toml_keyval_t** base;
 494 | 	if ((base = (toml_keyval_t**)expand_ptrarr((void**)tbl->kval, n)) == 0) {
 495 | 		xfree(newkey);
 496 | 		e_outofmemory(ctx, FLINE);
 497 | 		return 0;
 498 | 	}
 499 | 	tbl->kval = base;
 500 | 
 501 | 	if ((base[n] = (toml_keyval_t*)CALLOC(1, sizeof(*base[n]))) == 0) {
 502 | 		xfree(newkey);
 503 | 		e_outofmemory(ctx, FLINE);
 504 | 		return 0;
 505 | 	}
 506 | 
 507 | 	dest         = tbl->kval[tbl->nkval++];
 508 | 	dest->key    = newkey;
 509 | 	dest->keylen = keylen;
 510 | 	return dest;
 511 | }
 512 | 
 513 | // Create a table in the table.
 514 | static toml_table_t* create_keytable_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) {
 515 | 	int   keylen;
 516 | 	char* newkey = normalize_key(ctx, keytok, &keylen);
 517 | 	if (!newkey)
 518 | 		return 0;
 519 | 
 520 | 	toml_table_t* dest = 0;
 521 | 	// TODO: need to check all parts for:
 522 | 	//
 523 | 	//   [a]
 524 | 	//   [a.c]   # checks of "a.c" is defined, which is false.
 525 | 	if (check_key(tbl, newkey, 0, 0, &dest)) {
 526 | 		xfree(newkey);
 527 | 
 528 | 		/// Special case: make explicit if table exists and was created
 529 | 		/// implicitly.
 530 | 		if (dest && dest->implicit) {
 531 | 			dest->implicit = false;
 532 | 			return dest;
 533 | 		}
 534 | 		e_keyexists(ctx, keytok.pos);
 535 | 		return 0;
 536 | 	}
 537 | 
 538 | 	int            n = tbl->ntbl;
 539 | 	toml_table_t** base;
 540 | 	if ((base = (toml_table_t**)expand_ptrarr((void**)tbl->tbl, n)) == 0) {
 541 | 		xfree(newkey);
 542 | 		e_outofmemory(ctx, FLINE);
 543 | 		return 0;
 544 | 	}
 545 | 	tbl->tbl = base;
 546 | 
 547 | 	if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) {
 548 | 		xfree(newkey);
 549 | 		e_outofmemory(ctx, FLINE);
 550 | 		return 0;
 551 | 	}
 552 | 
 553 | 	dest         = tbl->tbl[tbl->ntbl++];
 554 | 	dest->key    = newkey;
 555 | 	dest->keylen = keylen;
 556 | 	return dest;
 557 | }
 558 | 
 559 | // Create an array in the table.
 560 | static toml_array_t* create_keyarray_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok, char kind) {
 561 | 	int   keylen;
 562 | 	char* newkey = normalize_key(ctx, keytok, &keylen);
 563 | 	if (!newkey)
 564 | 		return 0;
 565 | 
 566 | 	if (key_kind(tbl, newkey)) {
 567 | 		xfree(newkey);
 568 | 		e_keyexists(ctx, keytok.pos);
 569 | 		return 0;
 570 | 	}
 571 | 
 572 | 	int            n = tbl->narr;
 573 | 	toml_array_t** base;
 574 | 	if ((base = (toml_array_t**)expand_ptrarr((void**)tbl->arr, n)) == 0) {
 575 | 		xfree(newkey);
 576 | 		e_outofmemory(ctx, FLINE);
 577 | 		return 0;
 578 | 	}
 579 | 	tbl->arr = base;
 580 | 
 581 | 	if ((base[n] = (toml_array_t*)CALLOC(1, sizeof(*base[n]))) == 0) {
 582 | 		xfree(newkey);
 583 | 		e_outofmemory(ctx, FLINE);
 584 | 		return 0;
 585 | 	}
 586 | 	toml_array_t* dest = tbl->arr[tbl->narr++];
 587 | 
 588 | 	dest->keylen = keylen;
 589 | 	dest->key    = newkey;
 590 | 	dest->kind   = kind;
 591 | 	return dest;
 592 | }
 593 | 
 594 | static toml_arritem_t* create_value_in_array(context_t* ctx, toml_array_t* parent) {
 595 | 	const int       n    = parent->nitem;
 596 | 	toml_arritem_t* base = expand_arritem(parent->item, n);
 597 | 	if (!base) {
 598 | 		e_outofmemory(ctx, FLINE);
 599 | 		return 0;
 600 | 	}
 601 | 	parent->item = base;
 602 | 	parent->nitem++;
 603 | 	return &parent->item[n];
 604 | }
 605 | 
 606 | // Create an array in an array.
 607 | static toml_array_t* create_array_in_array(context_t* ctx, toml_array_t* parent) {
 608 | 	const int       n    = parent->nitem;
 609 | 	toml_arritem_t* base = expand_arritem(parent->item, n);
 610 | 	if (!base) {
 611 | 		e_outofmemory(ctx, FLINE);
 612 | 		return 0;
 613 | 	}
 614 | 	toml_array_t* ret = (toml_array_t*)CALLOC(1, sizeof(toml_array_t));
 615 | 	if (!ret) {
 616 | 		e_outofmemory(ctx, FLINE);
 617 | 		return 0;
 618 | 	}
 619 | 	base[n].arr  = ret;
 620 | 	parent->item = base;
 621 | 	parent->nitem++;
 622 | 	return ret;
 623 | }
 624 | 
 625 | // Create a table in an array
 626 | static toml_table_t* create_table_in_array(context_t* ctx, toml_array_t* parent) {
 627 | 	int             n    = parent->nitem;
 628 | 	toml_arritem_t* base = expand_arritem(parent->item, n);
 629 | 	if (!base) {
 630 | 		e_outofmemory(ctx, FLINE);
 631 | 		return 0;
 632 | 	}
 633 | 	toml_table_t* ret = (toml_table_t*)CALLOC(1, sizeof(toml_table_t));
 634 | 	if (!ret) {
 635 | 		e_outofmemory(ctx, FLINE);
 636 | 		return 0;
 637 | 	}
 638 | 	base[n].tbl  = ret;
 639 | 	parent->item = base;
 640 | 	parent->nitem++;
 641 | 	return ret;
 642 | }
 643 | 
 644 | static bool skip_newlines(context_t* ctx, bool isdotspecial) {
 645 | 	while (ctx->tok.tok == NEWLINE) {
 646 | 		if (next_token(ctx, isdotspecial))
 647 | 			return false;
 648 | 		if (ctx->tok.eof)
 649 | 			break;
 650 | 	}
 651 | 	return true;
 652 | }
 653 | 
 654 | static int parse_keyval(context_t* ctx, toml_table_t* tbl);
 655 | 
 656 | static inline int eat_token(context_t* ctx, tokentype_t typ, bool isdotspecial, const char* fline) {
 657 | 	if (ctx->tok.tok != typ)
 658 | 		return e_internal(ctx, fline);
 659 | 	if (next_token(ctx, isdotspecial))
 660 | 		return -1;
 661 | 	return 0;
 662 | }
 663 | 
 664 | // We are at '{ ... }'; parse the table.
 665 | static int parse_inline_table(context_t* ctx, toml_table_t* tbl) {
 666 | 	if (eat_token(ctx, LBRACE, 1, FLINE))
 667 | 		return -1;
 668 | 
 669 | 	for (;;) {
 670 | 		if (ctx->tok.tok == RBRACE) // until closing brace
 671 | 			break;
 672 | 		if (ctx->tok.eof)
 673 | 			return e_syntax(ctx, ctx->tok.pos, "no closing '}'");
 674 | 
 675 | 		if (ctx->tok.tok == NEWLINE) {
 676 | 			if (eat_token(ctx, NEWLINE, 1, FLINE))
 677 | 				return -1;
 678 | 			continue;
 679 | 		}
 680 | 
 681 | 		if (ctx->tok.tok != STRING)
 682 | 			return e_syntax(ctx, ctx->tok.pos, "expected a string");
 683 | 
 684 | 		if (parse_keyval(ctx, tbl))
 685 | 			return -1;
 686 | 
 687 | 		// On comma, continue to scan for next keyval.
 688 | 		if (ctx->tok.tok == COMMA) {
 689 | 			if (eat_token(ctx, COMMA, 1, FLINE))
 690 | 				return -1;
 691 | 			continue;
 692 | 		}
 693 | 		break;
 694 | 	}
 695 | 
 696 | 	for (;;) {
 697 | 		if (ctx->tok.tok != NEWLINE || ctx->tok.eof)
 698 | 			break;
 699 | 		if (eat_token(ctx, NEWLINE, 1, FLINE))
 700 | 			return -1;
 701 | 	}
 702 | 
 703 | 	if (eat_token(ctx, RBRACE, 1, FLINE))
 704 | 		return -1;
 705 | 
 706 | 	tbl->readonly = 1;
 707 | 	return 0;
 708 | }
 709 | 
 710 | static int valtype(const char* val) {
 711 | 	toml_timestamp_t ts;
 712 | 	if (*val == '\'' || *val == '"')
 713 | 		return 's';
 714 | 	if (toml_value_bool(val, 0) == 0)
 715 | 		return 'b';
 716 | 	if (toml_value_int(val, 0) == 0)
 717 | 		return 'i';
 718 | 	if (toml_value_double(val, 0) == 0)
 719 | 		return 'd';
 720 | 	if (toml_value_timestamp(val, &ts) == 0) {
 721 | 		if (ts.year && ts.hour)
 722 | 			return 'T'; /// timestamp
 723 | 		if (ts.year)    // TODO: never reached?
 724 | 			return 'D'; /// date
 725 | 		return 't';     /// time
 726 | 	}
 727 | 	return 'u'; /// unknown
 728 | }
 729 | 
 730 | // We are at '[...]'
 731 | static int parse_array(context_t* ctx, toml_array_t* arr) {
 732 | 	if (eat_token(ctx, LBRACKET, 0, FLINE))
 733 | 		return -1;
 734 | 
 735 | 	for (;;) {
 736 | 		if (!skip_newlines(ctx, 0))
 737 | 			return -1;
 738 | 
 739 | 		if (ctx->tok.tok == RBRACKET) /// until ]
 740 | 			break;
 741 | 
 742 | 		switch (ctx->tok.tok) {
 743 | 		case MSTRING:
 744 | 		case STRING:  {
 745 | 			/// set array kind if this will be the first entry
 746 | 			if (arr->kind == 0)
 747 | 				arr->kind = 'v';
 748 | 			else if (arr->kind != 'v')
 749 | 				arr->kind = 'm';
 750 | 
 751 | 			char* val  = ctx->tok.ptr;
 752 | 			int   vlen = ctx->tok.len;
 753 | 
 754 | 			/// make a new value in array
 755 | 			toml_arritem_t* newval = create_value_in_array(ctx, arr);
 756 | 			if (!newval)
 757 | 				return e_outofmemory(ctx, FLINE);
 758 | 
 759 | 			if (!(newval->val = STRNDUP(val, vlen)))
 760 | 				return e_outofmemory(ctx, FLINE);
 761 | 
 762 | 			newval->valtype = valtype(newval->val);
 763 | 
 764 | 			/// set array type if this is the first entry
 765 | 			if (arr->nitem == 1)
 766 | 				arr->type = newval->valtype;
 767 | 			else if (arr->type != newval->valtype)
 768 | 				arr->type = 'm'; /// mixed
 769 | 
 770 | 			if (eat_token(ctx, ctx->tok.tok, 0, FLINE))
 771 | 				return -1;
 772 | 			break;
 773 | 		}
 774 | 		case LBRACKET: { // [ [array], [array] ... ]
 775 | 			// set the array kind if this will be the first entry.
 776 | 			if (arr->kind == 0)
 777 | 				arr->kind = 'a';
 778 | 			else if (arr->kind != 'a')
 779 | 				arr->kind = 'm';
 780 | 
 781 | 			toml_array_t* subarr = create_array_in_array(ctx, arr);
 782 | 			if (!subarr)
 783 | 				return -1;
 784 | 			if (parse_array(ctx, subarr))
 785 | 				return -1;
 786 | 			break;
 787 | 		}
 788 | 		case LBRACE: { // [ {table}, {table} ... ]
 789 | 			// set the array kind if this will be the first entry.
 790 | 			if (arr->kind == 0)
 791 | 				arr->kind = 't';
 792 | 			else if (arr->kind != 't')
 793 | 				arr->kind = 'm';
 794 | 
 795 | 			toml_table_t* subtbl = create_table_in_array(ctx, arr);
 796 | 			if (!subtbl)
 797 | 				return -1;
 798 | 			if (parse_inline_table(ctx, subtbl))
 799 | 				return -1;
 800 | 			break;
 801 | 		}
 802 | 		default: return e_syntax(ctx, ctx->tok.pos, "syntax error");
 803 | 		}
 804 | 
 805 | 		if (!skip_newlines(ctx, 0))
 806 | 			return -1;
 807 | 
 808 | 		// on comma, continue to scan for next element
 809 | 		if (ctx->tok.tok == COMMA) {
 810 | 			if (eat_token(ctx, COMMA, 0, FLINE))
 811 | 				return -1;
 812 | 			continue;
 813 | 		}
 814 | 		break;
 815 | 	}
 816 | 
 817 | 	if (eat_token(ctx, RBRACKET, 1, FLINE))
 818 | 		return -1;
 819 | 	return 0;
 820 | }
 821 | 
 822 | // Handle lines like:
 823 | //   key = "value"
 824 | //   key = [ array ]
 825 | //   key = { table }
 826 | static int parse_keyval(context_t* ctx, toml_table_t* tbl) {
 827 | 	if (tbl->readonly)
 828 | 		return e_keyexists(ctx, ctx->tok.pos);
 829 | 
 830 | 	token_t key = ctx->tok;
 831 | 	if (eat_token(ctx, STRING, 1, FLINE))
 832 | 		return -1;
 833 | 
 834 | 	if (ctx->tok.tok == DOT) {
 835 | 		// Handle inline dotted key:
 836 | 		//   physical.color = "orange"
 837 | 		//   physical.shape = "round"
 838 | 		toml_table_t* subtbl = 0;
 839 | 		{
 840 | 			int   keylen;
 841 | 			char* subtblstr = normalize_key(ctx, key, &keylen);
 842 | 			if (!subtblstr)
 843 | 				return -1;
 844 | 
 845 | 			subtbl = toml_table_table(tbl, subtblstr);
 846 | 			if (subtbl)
 847 | 				subtbl->keylen = keylen;
 848 | 			xfree(subtblstr);
 849 | 		}
 850 | 		if (!subtbl) {
 851 | 			subtbl = create_keytable_in_table(ctx, tbl, key);
 852 | 			if (!subtbl)
 853 | 				return -1;
 854 | 		}
 855 | 		if (next_token(ctx, true))
 856 | 			return -1;
 857 | 		if (parse_keyval(ctx, subtbl))
 858 | 			return -1;
 859 | 		return 0;
 860 | 	}
 861 | 
 862 | 	if (ctx->tok.tok != EQUAL)
 863 | 		return e_syntax(ctx, ctx->tok.pos, "missing '='");
 864 | 
 865 | 	if (next_token(ctx, false))
 866 | 		return -1;
 867 | 
 868 | 	switch (ctx->tok.tok) {
 869 | 	case MSTRING:
 870 | 	case STRING:  { // key = "value"
 871 | 		toml_keyval_t* keyval = create_keyval_in_table(ctx, tbl, key);
 872 | 		if (!keyval)
 873 | 			return -1;
 874 | 		token_t val = ctx->tok;
 875 | 
 876 | 		assert(keyval->val == 0);
 877 | 		if (!(keyval->val = STRNDUP(val.ptr, val.len)))
 878 | 			return e_outofmemory(ctx, FLINE);
 879 | 
 880 | 		if (next_token(ctx, true))
 881 | 			return -1;
 882 | 
 883 | 		return 0;
 884 | 	}
 885 | 	case LBRACKET: { // key = [ array ]
 886 | 		toml_array_t* arr = create_keyarray_in_table(ctx, tbl, key, 0);
 887 | 		if (!arr)
 888 | 			return -1;
 889 | 		if (parse_array(ctx, arr))
 890 | 			return -1;
 891 | 		return 0;
 892 | 	}
 893 | 	case LBRACE: { // key = { table }
 894 | 		toml_table_t* nexttbl = create_keytable_in_table(ctx, tbl, key);
 895 | 		if (!nexttbl)
 896 | 			return -1;
 897 | 		if (parse_inline_table(ctx, nexttbl))
 898 | 			return -1;
 899 | 		return 0;
 900 | 	}
 901 | 	default: return e_syntax(ctx, ctx->tok.pos, "syntax error");
 902 | 	}
 903 | 	return 0;
 904 | }
 905 | 
 906 | typedef struct tabpath_t tabpath_t;
 907 | struct tabpath_t {
 908 | 	int     cnt;
 909 | 	token_t key[10];
 910 | };
 911 | 
 912 | // At [x.y.z] or [[x.y.z]]
 913 | // Scan forward and fill tblpath until it enters ] or ]]
 914 | // There will be at least one entry on return.
 915 | static int fill_tblpath(context_t* ctx) {
 916 | 	// clear tpath
 917 | 	for (int i = 0; i < ctx->tpath.top; i++) {
 918 | 		char** p = &ctx->tpath.key[i];
 919 | 		xfree(*p);
 920 | 		*p = 0;
 921 | 	}
 922 | 	ctx->tpath.top = 0;
 923 | 
 924 | 	for (;;) {
 925 | 		if (ctx->tpath.top >= 10)
 926 | 			return e_syntax(ctx, ctx->tok.pos, "table path is too deep; max allowed is 10.");
 927 | 		if (ctx->tok.tok != STRING)
 928 | 			return e_syntax(ctx, ctx->tok.pos, "invalid or missing key");
 929 | 
 930 | 		int   keylen;
 931 | 		char* key = normalize_key(ctx, ctx->tok, &keylen);
 932 | 		if (!key)
 933 | 			return -1;
 934 | 		ctx->tpath.tok[ctx->tpath.top]    = ctx->tok;
 935 | 		ctx->tpath.key[ctx->tpath.top]    = key;
 936 | 		ctx->tpath.keylen[ctx->tpath.top] = keylen;
 937 | 		ctx->tpath.top++;
 938 | 
 939 | 		if (next_token(ctx, true))
 940 | 			return -1;
 941 | 
 942 | 		if (ctx->tok.tok == RBRACKET)
 943 | 			break;
 944 | 		if (ctx->tok.tok != DOT)
 945 | 			return e_syntax(ctx, ctx->tok.pos, "invalid key");
 946 | 		if (next_token(ctx, true))
 947 | 			return -1;
 948 | 	}
 949 | 
 950 | 	if (ctx->tpath.top <= 0) // TODO: never reached?
 951 | 		return e_syntax(ctx, ctx->tok.pos, "empty table selector");
 952 | 	return 0;
 953 | }
 954 | 
 955 | // Walk tblpath from the root, and create new tables on the way. Sets
 956 | // ctx->curtbl to the final table.
 957 | static int walk_tabpath(context_t* ctx) {
 958 | 	toml_table_t* curtbl = ctx->root; /// start from root
 959 | 
 960 | 	for (int i = 0; i < ctx->tpath.top; i++) {
 961 | 		const char* key    = ctx->tpath.key[i];
 962 | 		int         keylen = ctx->tpath.keylen[i];
 963 | 
 964 | 		toml_keyval_t* nextval = 0;
 965 | 		toml_array_t*  nextarr = 0;
 966 | 		toml_table_t*  nexttbl = 0;
 967 | 		switch (check_key(curtbl, key, &nextval, &nextarr, &nexttbl)) {
 968 | 		case 't': /// found a table. nexttbl is where we will go next.
 969 | 			break;
 970 | 		case 'a': /// found an array. nexttbl is the last table in the array.
 971 | 			if (nextarr->kind != 't')
 972 | 				return e_internal(ctx, FLINE);
 973 | 
 974 | 			if (nextarr->nitem == 0)
 975 | 				return e_internal(ctx, FLINE);
 976 | 
 977 | 			nexttbl = nextarr->item[nextarr->nitem - 1].tbl;
 978 | 			break;
 979 | 		case 'v': return e_keyexists(ctx, ctx->tpath.tok[i].pos);
 980 | 		default:  { /// Not found. Let's create an implicit table.
 981 | 			int            n    = curtbl->ntbl;
 982 | 			toml_table_t** base = (toml_table_t**)expand_ptrarr((void**)curtbl->tbl, n);
 983 | 			if (base == 0)
 984 | 				return e_outofmemory(ctx, FLINE);
 985 | 
 986 | 			curtbl->tbl = base;
 987 | 
 988 | 			if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0)
 989 | 				return e_outofmemory(ctx, FLINE);
 990 | 
 991 | 			if ((base[n]->key = STRDUP(key)) == 0)
 992 | 				return e_outofmemory(ctx, FLINE);
 993 | 			base[n]->keylen = keylen;
 994 | 
 995 | 			nexttbl = curtbl->tbl[curtbl->ntbl++];
 996 | 
 997 | 			/// tabs created by walk_tabpath are considered implicit
 998 | 			nexttbl->implicit = true;
 999 | 		}; break;
1000 | 		}
1001 | 		curtbl = nexttbl; /// switch to next tbl
1002 | 	}
1003 | 
1004 | 	ctx->curtbl = curtbl; /// save it
1005 | 	return 0;
1006 | }
1007 | 
1008 | // handle lines like [x.y.z] or [[x.y.z]]
1009 | static int parse_select(context_t* ctx) {
1010 | 	assert(ctx->tok.tok == LBRACKET);
1011 | 
1012 | 	// true if [[
1013 | 	bool aot = (ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == '[');
1014 | 
1015 | 	// Need to detect '[[' on our own because next_token() will skip whitespace,
1016 | 	// and '[ [' would be taken as '[[', which is wrong.
1017 | 
1018 | 	// eat [ or [[
1019 | 	if (eat_token(ctx, LBRACKET, 1, FLINE))
1020 | 		return -1;
1021 | 	if (aot) {
1022 | 		assert(ctx->tok.tok == LBRACKET);
1023 | 		if (eat_token(ctx, LBRACKET, 1, FLINE))
1024 | 			return -1;
1025 | 	}
1026 | 
1027 | 	if (fill_tblpath(ctx))
1028 | 		return -1;
1029 | 
1030 | 	// For [x.y.z] or [[x.y.z]], remove z from tpath.
1031 | 	token_t z = ctx->tpath.tok[ctx->tpath.top - 1];
1032 | 	xfree(ctx->tpath.key[ctx->tpath.top - 1]);
1033 | 	ctx->tpath.top--;
1034 | 
1035 | 	// Set up ctx->curtbl.
1036 | 	if (walk_tabpath(ctx))
1037 | 		return -1;
1038 | 
1039 | 	if (!aot) {
1040 | 		// [x.y.z] -> create z = {} in x.y
1041 | 		toml_table_t* curtbl = create_keytable_in_table(ctx, ctx->curtbl, z);
1042 | 		if (!curtbl)
1043 | 			return -1;
1044 | 		ctx->curtbl = curtbl;
1045 | 	} else {
1046 | 		// [[x.y.z]] -> create z = [] in x.y
1047 | 		toml_array_t* arr = 0;
1048 | 		{
1049 | 			int   keylen;
1050 | 			char* zstr = normalize_key(ctx, z, &keylen);
1051 | 			if (!zstr)
1052 | 				return -1;
1053 | 			arr = toml_table_array(ctx->curtbl, zstr);
1054 | 			if (arr)
1055 | 				arr->keylen = keylen;
1056 | 			xfree(zstr);
1057 | 		}
1058 | 		if (!arr) {
1059 | 			arr = create_keyarray_in_table(ctx, ctx->curtbl, z, 't');
1060 | 			if (!arr)
1061 | 				return -1;
1062 | 		}
1063 | 		if (arr->kind != 't')
1064 | 			return e_syntax(ctx, z.pos, "array mismatch");
1065 | 
1066 | 		// add to z[]
1067 | 		toml_table_t* dest;
1068 | 		{
1069 | 			toml_table_t* t = create_table_in_array(ctx, arr);
1070 | 			if (!t)
1071 | 				return -1;
1072 | 
1073 | 			if ((t->key = STRDUP("__anon__")) == 0)
1074 | 				return e_outofmemory(ctx, FLINE);
1075 | 			dest = t;
1076 | 		}
1077 | 
1078 | 		ctx->curtbl = dest;
1079 | 	}
1080 | 
1081 | 	if (ctx->tok.tok != RBRACKET) // TODO: never reached
1082 | 		return e_syntax(ctx, ctx->tok.pos, "expected ']'");
1083 | 	if (aot) {
1084 | 		if (!(ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == ']'))
1085 | 			return e_syntax(ctx, ctx->tok.pos, "expected ']]'");
1086 | 		if (eat_token(ctx, RBRACKET, 1, FLINE))
1087 | 			return -1;
1088 | 	}
1089 | 
1090 | 	if (eat_token(ctx, RBRACKET, 1, FLINE))
1091 | 		return -1;
1092 | 	if (ctx->tok.tok != NEWLINE)
1093 | 		return e_syntax(ctx, ctx->tok.pos, "extra chars after ] or ]]");
1094 | 	return 0;
1095 | }
1096 | 
1097 | toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz) {
1098 | 	context_t ctx;
1099 | 
1100 | 	/// clear errbuf
1101 | 	if (errbufsz <= 0)
1102 | 		errbufsz = 0;
1103 | 	if (errbufsz > 0)
1104 | 		errbuf[0] = 0;
1105 | 
1106 | 	// init context
1107 | 	memset(&ctx, 0, sizeof(ctx));
1108 | 	ctx.start    = toml;
1109 | 	ctx.stop     = ctx.start + strlen(toml);
1110 | 	ctx.errbuf   = errbuf;
1111 | 	ctx.errbufsz = errbufsz;
1112 | 
1113 | 	// start with an artificial newline of length 0
1114 | 	ctx.tok.tok      = NEWLINE;
1115 | 	ctx.tok.pos.line = 1;
1116 | 	ctx.tok.pos.col  = 1;
1117 | 	ctx.tok.ptr      = toml;
1118 | 	ctx.tok.len      = 0;
1119 | 
1120 | 	// make a root table
1121 | 	if ((ctx.root = CALLOC(1, sizeof(*ctx.root))) == 0) {
1122 | 		e_outofmemory(&ctx, FLINE);
1123 | 		return 0; // Do not goto fail, root table not set up yet
1124 | 	}
1125 | 
1126 | 	// set root as default table
1127 | 	ctx.curtbl = ctx.root;
1128 | 
1129 | 	// Scan forward until EOF
1130 | 	for (token_t tok = ctx.tok; !tok.eof; tok = ctx.tok) {
1131 | 		switch (tok.tok) {
1132 | 		case NEWLINE:
1133 | 			if (next_token(&ctx, true))
1134 | 				goto fail;
1135 | 			break;
1136 | 
1137 | 		case STRING:
1138 | 			if (parse_keyval(&ctx, ctx.curtbl))
1139 | 				goto fail;
1140 | 
1141 | 			if (ctx.tok.tok != NEWLINE) {
1142 | 				e_syntax(&ctx, ctx.tok.pos, "extra chars after value");
1143 | 				goto fail;
1144 | 			}
1145 | 
1146 | 			if (eat_token(&ctx, NEWLINE, 1, FLINE))
1147 | 				goto fail;
1148 | 			break;
1149 | 
1150 | 		case LBRACKET: // [ x.y.z ] or [[ x.y.z ]]
1151 | 			if (parse_select(&ctx))
1152 | 				goto fail;
1153 | 			break;
1154 | 
1155 | 		default: e_syntax(&ctx, tok.pos, "syntax error"); goto fail;
1156 | 		}
1157 | 	}
1158 | 
1159 | 	/// success
1160 | 	for (int i = 0; i < ctx.tpath.top; i++)
1161 | 		xfree(ctx.tpath.key[i]);
1162 | 	return ctx.root;
1163 | 
1164 | fail:
1165 | 	// Something bad has happened. Free resources and return error.
1166 | 	for (int i = 0; i < ctx.tpath.top; i++)
1167 | 		xfree(ctx.tpath.key[i]);
1168 | 	toml_free(ctx.root);
1169 | 	return 0;
1170 | }
1171 | 
1172 | toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz) {
1173 | 	int   bufsz = 0;
1174 | 	char* buf   = 0;
1175 | 	int   off   = 0;
1176 | 	int   inc   = 1024;
1177 | 
1178 | 	while (!feof(fp)) {
1179 | 		if (bufsz == 1024 * 20) /// Increment buffer by 20k after 20k.
1180 | 			inc = 1024 * 20;
1181 | 		if (off == bufsz) {
1182 | 			int   xsz = bufsz + inc;
1183 | 			char* x   = expand(buf, bufsz, xsz);
1184 | 			if (!x) {
1185 | 				snprintf(errbuf, errbufsz, "out of memory");
1186 | 				xfree(buf);
1187 | 				return 0;
1188 | 			}
1189 | 			buf   = x;
1190 | 			bufsz = xsz;
1191 | 		}
1192 | 
1193 | 		errno = 0;
1194 | 		int n = fread(buf + off, 1, bufsz - off, fp);
1195 | 		if (ferror(fp)) {
1196 | 			snprintf(errbuf, errbufsz, "%s", (errno ? strerror(errno) : "Error reading file"));
1197 | 			xfree(buf);
1198 | 			return 0;
1199 | 		}
1200 | 		off += n;
1201 | 	}
1202 | 
1203 | 	/// tag on a NUL to cap the string
1204 | 	if (off == bufsz) {
1205 | 		int   xsz = bufsz + 1;
1206 | 		char* x   = expand(buf, bufsz, xsz);
1207 | 		if (!x) {
1208 | 			snprintf(errbuf, errbufsz, "out of memory");
1209 | 			xfree(buf);
1210 | 			return 0;
1211 | 		}
1212 | 		buf   = x;
1213 | 		bufsz = xsz;
1214 | 	}
1215 | 	buf[off] = 0;
1216 | 
1217 | 	/// parse it, cleanup and finish.
1218 | 	toml_table_t* ret = toml_parse(buf, errbuf, errbufsz);
1219 | 	xfree(buf);
1220 | 	return ret;
1221 | }
1222 | 
1223 | static void xfree_kval(toml_keyval_t* p) {
1224 | 	if (!p)
1225 | 		return;
1226 | 	xfree(p->key);
1227 | 	xfree(p->val);
1228 | 	xfree(p);
1229 | }
1230 | 
1231 | static void xfree_tbl(toml_table_t* p);
1232 | 
1233 | static void xfree_arr(toml_array_t* p) {
1234 | 	if (!p)
1235 | 		return;
1236 | 
1237 | 	xfree(p->key);
1238 | 	const int n = p->nitem;
1239 | 	for (int i = 0; i < n; i++) {
1240 | 		toml_arritem_t* a = &p->item[i];
1241 | 		if (a->val)
1242 | 			xfree(a->val);
1243 | 		else if (a->arr)
1244 | 			xfree_arr(a->arr);
1245 | 		else if (a->tbl)
1246 | 			xfree_tbl(a->tbl);
1247 | 	}
1248 | 	xfree(p->item);
1249 | 	xfree(p);
1250 | }
1251 | 
1252 | static void xfree_tbl(toml_table_t* p) {
1253 | 	if (!p)
1254 | 		return;
1255 | 
1256 | 	xfree(p->key);
1257 | 
1258 | 	for (int i = 0; i < p->nkval; i++)
1259 | 		xfree_kval(p->kval[i]);
1260 | 	xfree(p->kval);
1261 | 
1262 | 	for (int i = 0; i < p->narr; i++)
1263 | 		xfree_arr(p->arr[i]);
1264 | 	xfree(p->arr);
1265 | 
1266 | 	for (int i = 0; i < p->ntbl; i++)
1267 | 		xfree_tbl(p->tbl[i]);
1268 | 	xfree(p->tbl);
1269 | 
1270 | 	xfree(p);
1271 | }
1272 | 
1273 | void toml_free(toml_table_t* tbl) {
1274 | 	xfree_tbl(tbl);
1275 | }
1276 | 
1277 | static void set_token(context_t* ctx, tokentype_t tok, toml_pos_t pos, char* ptr, int len) {
1278 | 	token_t t;
1279 | 	t.tok    = tok;
1280 | 	t.pos    = pos;
1281 | 	t.ptr    = ptr;
1282 | 	t.len    = len;
1283 | 	t.eof    = 0;
1284 | 	ctx->tok = t;
1285 | }
1286 | 
1287 | static void set_eof(context_t* ctx, toml_pos_t pos) {
1288 | 	set_token(ctx, NEWLINE, pos, ctx->stop, 0);
1289 | 	ctx->tok.eof = 1;
1290 | }
1291 | 
1292 | // Scan p for n digits compositing entirely of [0-9]
1293 | static int scan_digits(const char* p, int n) {
1294 | 	int ret = 0;
1295 | 	for (; n > 0 && isdigit(*p); n--, p++)
1296 | 		ret = 10 * ret + (*p - '0');
1297 | 	return n ? -1 : ret;
1298 | }
1299 | 
1300 | static bool scan_date(const char* p, int* YY, int* MM, int* DD) {
1301 | 	int year  = scan_digits(p, 4);
1302 | 	int month = (year >= 0 && p[4] == '-') ? scan_digits(p + 5, 2) : -1;
1303 | 	int day   = (month >= 0 && p[7] == '-') ? scan_digits(p + 8, 2) : -1;
1304 | 	if (YY)
1305 | 		*YY = year;
1306 | 	if (MM)
1307 | 		*MM = month;
1308 | 	if (DD)
1309 | 		*DD = day;
1310 | 	return (year >= 0 && month >= 0 && day >= 0);
1311 | }
1312 | 
1313 | static bool scan_time(const char* p, int* hh, int* mm, int* ss) {
1314 | 	int hour   = scan_digits(p, 2);
1315 | 	int minute = (hour >= 0 && p[2] == ':') ? scan_digits(p + 3, 2) : -1;
1316 | 	int second = (minute >= 0 && p[5] == ':') ? scan_digits(p + 6, 2) : -1;
1317 | 	if (hh)
1318 | 		*hh = hour;
1319 | 	if (mm)
1320 | 		*mm = minute;
1321 | 	if (ss)
1322 | 		*ss = second;
1323 | 	return (hour >= 0 && minute >= 0);
1324 | }
1325 | 
1326 | static int parse_millisec(const char* p, const char** endp) {
1327 | 	int ret  = 0;
1328 | 	int unit = 100; /// unit in millisec
1329 | 	for (; '0' <= *p && *p <= '9'; p++, unit /= 10)
1330 | 		ret += (*p - '0') * unit;
1331 | 	*endp = p;
1332 | 	return ret;
1333 | }
1334 | 
1335 | static bool scan_offset(const char* p, int* tz) {
1336 | 	int sign   = p[0];
1337 | 	int hour   = scan_digits(p + 1, 2);
1338 | 	int minute = (hour >= 0 && p[3] == ':') ? scan_digits(p + 4, 2) : -1;
1339 | 	if (hour < -12 || hour > 14 || minute < 0 || minute > 59)
1340 | 		return false;
1341 | 	if (tz) {
1342 | 		*tz = hour * 60 + minute;
1343 | 		if (sign == '-')
1344 | 			*tz = -(*tz);
1345 | 	}
1346 | 	return true;
1347 | }
1348 | 
1349 | static int scan_string(context_t* ctx, char* p, toml_pos_t* pos, bool dotisspecial) {
1350 | 	char* orig = p;
1351 | 
1352 | 	// Literal multiline.
1353 | 	if (strncmp(p, "'''", 3) == 0) {
1354 | 		char* q   = p + 3;
1355 | 		pos->col += 3;
1356 | 		while (true) {
1357 | 			q = strstr(q, "'''");
1358 | 			if (q == 0)
1359 | 				return e_syntax(ctx, *pos, "unterminated triple quote (''')");
1360 | 			int i = 0;
1361 | 			while (q[3] == '\'') {
1362 | 				i++;
1363 | 				if (i >= 3)
1364 | 					return e_syntax(ctx, *pos, "too many ''' in triple-s-quote");
1365 | 				q++;
1366 | 			}
1367 | 			break;
1368 | 		}
1369 | 		set_token(ctx, MSTRING, *pos, orig, q + 3 - orig);
1370 | 		return 0;
1371 | 	}
1372 | 
1373 | 	// Multiline.
1374 | 	if (strncmp(p, "\"\"\"", 3) == 0) {
1375 | 		char* q   = p + 3;
1376 | 		pos->col += 3;
1377 | 		while (true) {
1378 | 			q = strstr(q, "\"\"\"");
1379 | 			if (q == 0)
1380 | 				return e_syntax(ctx, *pos, "unterminated triple quote (\"\"\")");
1381 | 			if (q[-1] == '\\') {
1382 | 				q++;
1383 | 				continue;
1384 | 			}
1385 | 			int i = 0;
1386 | 			while (q[3] == '\"') {
1387 | 				i++;
1388 | 				if (i >= 3)
1389 | 					return e_syntax(ctx, *pos, "too many \"\"\" in triple-d-quote");
1390 | 				q++;
1391 | 			}
1392 | 			break;
1393 | 		}
1394 | 
1395 | 		/// the string is [p+3, q-1]
1396 | 		int  hexreq = 0; /// #hex required
1397 | 		bool escape = false;
1398 | 		for (p += 3; p < q; p++) {
1399 | 			if (escape) {
1400 | 				escape = false;
1401 | 				if (strchr("btnfre\"\\", *p))
1402 | 					continue;
1403 | 				if (*p == 'x') {
1404 | 					hexreq = 2;
1405 | 					continue;
1406 | 				}
1407 | 				if (*p == 'u') {
1408 | 					hexreq = 4;
1409 | 					continue;
1410 | 				}
1411 | 				if (*p == 'U') {
1412 | 					hexreq = 8;
1413 | 					continue;
1414 | 				}
1415 | 				if (p[strspn(p, " \t\r")] == '\n')
1416 | 					continue; // allow for line ending backslash
1417 | 				return e_syntax(ctx, *pos, "bad escape char");
1418 | 			}
1419 | 			if (hexreq) {
1420 | 				hexreq--;
1421 | 				if (strchr("0123456789ABCDEFabcdef", *p))
1422 | 					continue;
1423 | 				return e_syntax(ctx, *pos, "expected hex char");
1424 | 			}
1425 | 			if (*p == '\\') {
1426 | 				escape = true;
1427 | 				continue;
1428 | 			}
1429 | 		}
1430 | 		if (escape) // TODO: unreachable, I think?
1431 | 			return e_syntax(ctx, *pos, "expected an escape char");
1432 | 		if (hexreq)
1433 | 			return e_syntax(ctx, *pos, "expected more hex char");
1434 | 
1435 | 		set_token(ctx, MSTRING, *pos, orig, q + 3 - orig);
1436 | 		return 0;
1437 | 	}
1438 | 
1439 | 	// Literal string.
1440 | 	if (*p == '\'') {
1441 | 		for (p++; *p && *p != '\n' && *p != '\''; p++)
1442 | 			pos->col++;
1443 | 		if (*p != '\'')
1444 | 			return e_syntax(ctx, *pos, "unterminated quote (')");
1445 | 		set_token(ctx, STRING, *pos, orig, p + 1 - orig);
1446 | 		return 0;
1447 | 	}
1448 | 
1449 | 	// Basic String.
1450 | 	if (*p == '\"') {
1451 | 		int  hexreq = 0; /// #hex required
1452 | 		bool escape = false;
1453 | 		for (p++; *p; p++) {
1454 | 			pos->col++;
1455 | 			if (escape) {
1456 | 				escape = false;
1457 | 				if (strchr("btnfre\"\\", *p))
1458 | 					continue;
1459 | 				if (*p == 'x') {
1460 | 					hexreq = 2;
1461 | 					continue;
1462 | 				}
1463 | 				if (*p == 'u') {
1464 | 					hexreq = 4;
1465 | 					continue;
1466 | 				}
1467 | 				if (*p == 'U') {
1468 | 					hexreq = 8;
1469 | 					continue;
1470 | 				}
1471 | 				return e_syntax(ctx, *pos, "bad escape char");
1472 | 			}
1473 | 			if (hexreq) {
1474 | 				hexreq--;
1475 | 				if (strchr("0123456789ABCDEFabcdef", *p))
1476 | 					continue;
1477 | 				return e_syntax(ctx, *pos, "expected hex char");
1478 | 			}
1479 | 			if (*p == '\\') {
1480 | 				escape = true;
1481 | 				continue;
1482 | 			}
1483 | 			if (*p == '\n')
1484 | 				break;
1485 | 			if (*p == '"')
1486 | 				break;
1487 | 		}
1488 | 		if (*p != '"')
1489 | 			return e_syntax(ctx, *pos, "unterminated quote (\")");
1490 | 
1491 | 		set_token(ctx, STRING, *pos, orig, p + 1 - orig);
1492 | 		return 0;
1493 | 	}
1494 | 
1495 | 	// Time
1496 | 	if (!dotisspecial && scan_time(p, 0, 0, 0)) {
1497 | 		p += strspn(p, "0123456789:"); /// forward thru the time.
1498 | 		if (p[0] == '.') {             /// Subseconds
1499 | 			int n = strspn(++p, "0123456789");
1500 | 			if (n == 0)
1501 | 				return e_syntax(ctx, *pos, "extra chars after '.'");
1502 | 			p += n;
1503 | 		}
1504 | 		for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string
1505 | 			;
1506 | 		set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize
1507 | 		return 0;
1508 | 	}
1509 | 
1510 | 	// Datetime
1511 | 	if (!dotisspecial && scan_date(p, 0, 0, 0)) {
1512 | 		p += strspn(p, "0123456789-");                   /// forward thru the date
1513 | 		if (p[0] == ' ' || p[0] == 't' || p[0] == 'T') { /// forward thru the time
1514 | 			p++;
1515 | 			p += strspn(p, "0123456789:");
1516 | 			if (p[0] == '.') { /// Subseconds
1517 | 				int n = strspn(++p, "0123456789");
1518 | 				if (n == 0)
1519 | 					return e_syntax(ctx, *pos, "extra chars after '.'");
1520 | 				p += n;
1521 | 			}
1522 | 		}
1523 | 
1524 | 		// Offset
1525 | 		if (p[0] == 'Z' || p[0] == 'z') {
1526 | 			p++;
1527 | 		} else if (p[0] == '+' || p[0] == '-') {
1528 | 			if (!scan_offset(p, 0))
1529 | 				return e_syntax(ctx, *pos, "invalid offset");
1530 | 			p += 6;
1531 | 		}
1532 | 
1533 | 		for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string
1534 | 			;
1535 | 		set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize
1536 | 		return 0;
1537 | 	}
1538 | 
1539 | 	// Literals
1540 | 	for (; *p && *p != '\n'; p++) {
1541 | 		int ch = *p;
1542 | 		if (ch == '.' && dotisspecial)
1543 | 			break;
1544 | 		if ('A' <= ch && ch <= 'Z')
1545 | 			continue;
1546 | 		if ('a' <= ch && ch <= 'z')
1547 | 			continue;
1548 | 		if (strchr("0123456789+-_.", ch))
1549 | 			continue;
1550 | 		break;
1551 | 	}
1552 | 
1553 | 	set_token(ctx, STRING, *pos, orig, p - orig);
1554 | 	return 0;
1555 | }
1556 | 
1557 | static int next_token(context_t* ctx, bool dotisspecial) {
1558 | 	// Eat this tok.
1559 | 	char*      p   = ctx->tok.ptr;
1560 | 	toml_pos_t pos = ctx->tok.pos;
1561 | 	for (int i = 0; i < ctx->tok.len; i++) {
1562 | 		pos.col++;
1563 | 		if (*p++ == '\n') {
1564 | 			pos.line++;
1565 | 			pos.col = 1;
1566 | 		}
1567 | 	}
1568 | 
1569 | 	/// Make next tok
1570 | 	while (p < ctx->stop) {
1571 | 		if (*p == '#') { /// Skip comment. stop just before the \n.
1572 | 			for (p++; p < ctx->stop && *p != '\n'; p++) {
1573 | 				pos.col++;
1574 | 				if ((*p != '\t' && *p != '\r' && *p != '\n') && ((*p >= 0x00 && *p <= 0x1f) || *p == 0x7f))
1575 | 					return e_syntax(ctx, pos, "invalid control character");
1576 | 				if (*p == '\r' && p < ctx->stop + 1 && *(p + 1) != '\n')
1577 | 					return e_syntax(ctx, pos, "invalid control character");
1578 | 			}
1579 | 			continue;
1580 | 		}
1581 | 
1582 | 		if (dotisspecial && *p == '.') {
1583 | 			set_token(ctx, DOT, pos, p, 1);
1584 | 			return 0;
1585 | 		}
1586 | 
1587 | 		switch (*p) {
1588 | 		case ',':  set_token(ctx, COMMA, pos, p, 1); return 0;
1589 | 		case '=':  set_token(ctx, EQUAL, pos, p, 1); return 0;
1590 | 		case '{':  set_token(ctx, LBRACE, pos, p, 1); return 0;
1591 | 		case '}':  set_token(ctx, RBRACE, pos, p, 1); return 0;
1592 | 		case '[':  set_token(ctx, LBRACKET, pos, p, 1); return 0;
1593 | 		case ']':  set_token(ctx, RBRACKET, pos, p, 1); return 0;
1594 | 		case '\n': set_token(ctx, NEWLINE, pos, p, 1); return 0;
1595 | 		case '\r':
1596 | 		case ' ':
1597 | 		case '\t': /// ignore white spaces
1598 | 			p++;
1599 | 			pos.col++;
1600 | 			continue;
1601 | 		}
1602 | 
1603 | 		return scan_string(ctx, p, &pos, dotisspecial);
1604 | 	}
1605 | 
1606 | 	set_eof(ctx, pos);
1607 | 	return 0;
1608 | }
1609 | 
1610 | const char* toml_table_key(const toml_table_t* tbl, int keyidx, int* keylen) {
1611 | 	if (keyidx < tbl->nkval) {
1612 | 		*keylen = tbl->kval[keyidx]->keylen;
1613 | 		return tbl->kval[keyidx]->key;
1614 | 	}
1615 | 	if ((keyidx -= tbl->nkval) < tbl->narr) {
1616 | 		*keylen = tbl->arr[keyidx]->keylen;
1617 | 		return tbl->arr[keyidx]->key;
1618 | 	}
1619 | 	if ((keyidx -= tbl->narr) < tbl->ntbl) {
1620 | 		*keylen = tbl->tbl[keyidx]->keylen;
1621 | 		return tbl->tbl[keyidx]->key;
1622 | 	}
1623 | 	*keylen = 0;
1624 | 	return 0;
1625 | }
1626 | 
1627 | toml_unparsed_t toml_table_unparsed(const toml_table_t* tbl, const char* key) {
1628 | 	for (int i = 0; i < tbl->nkval; i++)
1629 | 		if (strcmp(key, tbl->kval[i]->key) == 0)
1630 | 			return tbl->kval[i]->val;
1631 | 	return 0;
1632 | }
1633 | 
1634 | toml_array_t* toml_table_array(const toml_table_t* tbl, const char* key) {
1635 | 	for (int i = 0; i < tbl->narr; i++)
1636 | 		if (strcmp(key, tbl->arr[i]->key) == 0)
1637 | 			return tbl->arr[i];
1638 | 	return 0;
1639 | }
1640 | 
1641 | toml_table_t* toml_table_table(const toml_table_t* tbl, const char* key) {
1642 | 	for (int i = 0; i < tbl->ntbl; i++)
1643 | 		if (strcmp(key, tbl->tbl[i]->key) == 0)
1644 | 			return tbl->tbl[i];
1645 | 	return 0;
1646 | }
1647 | 
1648 | toml_unparsed_t toml_array_unparsed(const toml_array_t* arr, int idx) {
1649 | 	return (0 <= idx && idx < arr->nitem) ? arr->item[idx].val : 0;
1650 | }
1651 | 
1652 | int toml_table_len(const toml_table_t* tbl) {
1653 | 	return tbl->nkval + tbl->narr + tbl->ntbl;
1654 | }
1655 | 
1656 | int toml_array_len(const toml_array_t* arr) {
1657 | 	return arr->nitem;
1658 | }
1659 | 
1660 | toml_array_t* toml_array_array(const toml_array_t* arr, int idx) {
1661 | 	return (0 <= idx && idx < arr->nitem) ? arr->item[idx].arr : 0;
1662 | }
1663 | 
1664 | toml_table_t* toml_array_table(const toml_array_t* arr, int idx) {
1665 | 	return (0 <= idx && idx < arr->nitem) ? arr->item[idx].tbl : 0;
1666 | }
1667 | 
1668 | bool is_leap(int y) {
1669 | 	return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
1670 | }
1671 | 
1672 | int toml_value_timestamp(toml_unparsed_t src_, toml_timestamp_t* ret) {
1673 | 	if (!src_)
1674 | 		return -1;
1675 | 
1676 | 	const char* p               = src_;
1677 | 	bool        must_parse_time = false;
1678 | 
1679 | 	memset(ret, 0, sizeof(*ret));
1680 | 
1681 | 	/// YYYY-MM-DD
1682 | 	if (scan_date(p, &ret->year, &ret->month, &ret->day)) {
1683 | 		if (ret->month < 1 || ret->day < 1 || ret->month > 12 || ret->day > 31)
1684 | 			return -1;
1685 | 		if (ret->month == 2 && ret->day > (is_leap(ret->year) ? 29 : 28))
1686 | 			return -1;
1687 | 		ret->kind = 'D';
1688 | 
1689 | 		p += 10;
1690 | 		if (*p) {
1691 | 			if (*p != 'T' && *p != 't' && *p != ' ') /// T or space
1692 | 				return -1;
1693 | 			must_parse_time = true;
1694 | 			p++;
1695 | 		}
1696 | 	}
1697 | 
1698 | 	/// HH:MM:SS
1699 | 	if (scan_time(p, &ret->hour, &ret->minute, &ret->second)) {
1700 | 		if (ret->minute < 0 || ret->hour < 0 || ret->hour > 23 || ret->minute > 59 || ret->second > 60)
1701 | 			return -1;
1702 | 		p += (ret->second == -1 ? 5 : 8);
1703 | 		ret->kind = (ret->kind == 'D' ? 'l' : 't');
1704 | 		if (ret->second == -1)
1705 | 			ret->second = 0;
1706 | 
1707 | 		if (*p == '.') { /// optionally, parse millisec
1708 | 			p++;         /// skip '.'
1709 | 			const char* qq;
1710 | 			ret->millisec = parse_millisec(p, &qq);
1711 | 			p             = qq;
1712 | 		}
1713 | 
1714 | 		if (*p) { /// parse and copy Z
1715 | 			ret->kind = 'd';
1716 | 			if (*p == 'Z' || *p == 'z')
1717 | 				p++;
1718 | 			else if (*p == '+' || *p == '-') {
1719 | 				if (!scan_offset(p, &ret->tz))
1720 | 					return -1;
1721 | 				p += 6;
1722 | 			}
1723 | 		}
1724 | 	}
1725 | 	if (*p != 0)
1726 | 		return -1;
1727 | 	if (must_parse_time && ret->kind == 'D')
1728 | 		return -1;
1729 | 	return 0;
1730 | }
1731 | 
1732 | // Raw to boolean
1733 | int toml_value_bool(toml_unparsed_t src, bool* ret_) {
1734 | 	if (!src)
1735 | 		return -1;
1736 | 	bool  dummy = false;
1737 | 	bool* ret   = ret_ ? ret_ : &dummy;
1738 | 
1739 | 	if (strcmp(src, "true") == 0) {
1740 | 		*ret = true;
1741 | 		return 0;
1742 | 	}
1743 | 	if (strcmp(src, "false") == 0) {
1744 | 		*ret = false;
1745 | 		return 0;
1746 | 	}
1747 | 	return -1;
1748 | }
1749 | 
1750 | // Raw to integer
1751 | int toml_value_int(toml_unparsed_t src, int64_t* ret_) {
1752 | 	if (!src)
1753 | 		return -1;
1754 | 
1755 | 	char        buf[100];
1756 | 	char*       p         = buf;
1757 | 	char*       q         = p + sizeof(buf);
1758 | 	const char* s         = src;
1759 | 	int64_t     dummy     = 0;
1760 | 	int64_t*    ret       = ret_ ? ret_ : &dummy;
1761 | 	bool        have_sign = false;
1762 | 
1763 | 	if (s[0] == '+' || s[0] == '-') { /// allow +/-
1764 | 		have_sign = true;
1765 | 		*p++      = *s++;
1766 | 	}
1767 | 
1768 | 	if (s[0] == '_') /// disallow +_100
1769 | 		return -1;
1770 | 
1771 | 	int base = 0;
1772 | 	if (s[0] == '0') { /// if 0* ...
1773 | 		switch (s[1]) {
1774 | 		case 'x':
1775 | 			base  = 16;
1776 | 			s    += 2;
1777 | 			break;
1778 | 		case 'o':
1779 | 			base  = 8;
1780 | 			s    += 2;
1781 | 			break;
1782 | 		case 'b':
1783 | 			base  = 2;
1784 | 			s    += 2;
1785 | 			break;
1786 | 		case '\0': return *ret = 0, 0;
1787 | 		default:
1788 | 			if (s[1]) /// ensure no other digits after it
1789 | 				return -1;
1790 | 		}
1791 | 		if (!*s)
1792 | 			return -1;
1793 | 		if (have_sign) /// disallow +0xff, -0xff
1794 | 			return -1;
1795 | 		if (s[0] == '_') /// disallow 0x_, 0o_, 0b_
1796 | 			return -1;
1797 | 		if (s[0] == '+' || s[0] == '-') /// disallow 0x+10, 0x-10
1798 | 			return -1;
1799 | 	}
1800 | 
1801 | 	while (*s && p < q) { /// just strip underscores and pass to strtoll
1802 | 		int ch = *s++;
1803 | 		if (ch == '_') {
1804 | 			if (s[0] == '_') /// disallow '__'
1805 | 				return -1;
1806 | 			if (s[0] == '\0') /// numbers cannot end with '_'
1807 | 				return -1;
1808 | 			continue; /// skip _
1809 | 		}
1810 | 		*p++ = ch;
1811 | 	}
1812 | 
1813 | 	if (*s || p == q) /// if not at end-of-string or we ran out of buffer ...
1814 | 		return -1;
1815 | 
1816 | 	*p = 0; /// cap with NUL
1817 | 
1818 | 	/// Run strtoll on buf to get the integer
1819 | 	char* endp;
1820 | 	errno = 0;
1821 | 	*ret  = strtoll(buf, &endp, base);
1822 | 	return (errno || *endp) ? -1 : 0;
1823 | }
1824 | 
1825 | int toml_value_double(toml_unparsed_t src, double* ret_) {
1826 | 	if (!src)
1827 | 		return -1;
1828 | 
1829 | 	char        buf[100];
1830 | 	char*       p     = buf;
1831 | 	char*       q     = p + sizeof(buf);
1832 | 	const char* s     = src;
1833 | 	double      dummy = 0.0;
1834 | 	double*     ret   = ret_ ? ret_ : &dummy;
1835 | 
1836 | 	if (s[0] == '+' || s[0] == '-') /// allow +/-
1837 | 		*p++ = *s++;
1838 | 
1839 | 	if (s[0] == '_') /// disallow +_1.00
1840 | 		return -1;
1841 | 
1842 | 	{ /// decimal point, if used, must be surrounded by at least one digit on each side
1843 | 		char* dot = strchr(s, '.');
1844 | 		if (dot) {
1845 | 			if (dot == s || !isdigit(dot[-1]) || !isdigit(dot[1]))
1846 | 				return -1;
1847 | 		}
1848 | 	}
1849 | 
1850 | 	/// zero must be followed by . or 'e', or NUL
1851 | 	if (s[0] == '0' && s[1] && !strchr("eE.", s[1]))
1852 | 		return -1;
1853 | 
1854 | 	/// Just strip underscores and pass to strtod
1855 | 	bool have_us = false;
1856 | 	while (*s && p < q) {
1857 | 		int ch = *s++;
1858 | 		if (ch == '_') {
1859 | 			have_us = true;
1860 | 			if (s[0] == '_') /// disallow '__'
1861 | 				return -1;
1862 | 			if (s[0] == 'e') /// disallow _e
1863 | 				return -1;
1864 | 			if (s[0] == 0) /// disallow last char '_'
1865 | 				return -1;
1866 | 			continue; /// skip _
1867 | 		}
1868 | 		if (ch == 'I' || ch == 'N' || ch == 'F' || ch == 'A') /// inf and nan are case-sensitive.
1869 | 			return -1;
1870 | 		if (ch == 'e' && s[0] == '_') /// disallow e_
1871 | 			return -1;
1872 | 		*p++ = ch;
1873 | 	}
1874 | 	if (*s || p == q)
1875 | 		return -1; /// reached end of string or buffer is full?
1876 | 
1877 | 	*p = 0; /// cap with NUL
1878 | 
1879 | 	/// Run strtod on buf to get the value
1880 | 	char* endp;
1881 | 	errno = 0;
1882 | 	*ret  = strtod(buf, &endp);
1883 | 	if (errno || *endp)
1884 | 		return -1;
1885 | 	if (have_us && (isnan(*ret) || isinf(*ret)))
1886 | 		return -1;
1887 | 	return 0;
1888 | }
1889 | 
1890 | int toml_value_string(toml_unparsed_t src, char** ret, int* len) {
1891 | 	bool        multiline = false;
1892 | 	const char* sp;
1893 | 	const char* sq;
1894 | 
1895 | 	*ret = 0;
1896 | 	if (!src)
1897 | 		return -1;
1898 | 
1899 | 	/// First char must be a s-quote or d-quote
1900 | 	int qchar  = src[0];
1901 | 	int srclen = strlen(src);
1902 | 	if (!(qchar == '\'' || qchar == '"')) {
1903 | 		return -1;
1904 | 	}
1905 | 
1906 | 	/// triple quotes?
1907 | 	if (qchar == src[1] && qchar == src[2]) {
1908 | 		multiline = true;             /// triple-quote implies multiline
1909 | 		sp        = src + 3;          /// first char after quote
1910 | 		sq        = src + srclen - 3; /// first char of ending quote
1911 | 
1912 | 		if (!(sp <= sq && sq[0] == qchar && sq[1] == qchar && sq[2] == qchar))
1913 | 			return -1; /// last 3 chars in src must be qchar
1914 | 
1915 | 		if (sp[0] == '\n') /// skip new line immediate after qchar
1916 | 			sp++;
1917 | 		else if (sp[0] == '\r' && sp[1] == '\n')
1918 | 			sp += 2;
1919 | 	} else {
1920 | 		sp = src + 1;                    /// first char after quote
1921 | 		sq = src + srclen - 1;           /// ending quote
1922 | 		if (!(sp <= sq && *sq == qchar)) /// last char in src must be qchar
1923 | 			return -1;
1924 | 	}
1925 | 
1926 | 	/// at this point:
1927 | 	///     sp points to first valid char after quote.
1928 | 	///     sq points to one char beyond last valid char.
1929 | 	///     string len is (sq - sp).
1930 | 	if (qchar == '\'')
1931 | 		*ret = norm_lit_str(sp, sq - sp, len, multiline, 0, 0);
1932 | 	else
1933 | 		*ret = norm_basic_str(sp, sq - sp, len, multiline, 0, 0);
1934 | 	return *ret ? 0 : -1;
1935 | }
1936 | 
1937 | toml_value_t toml_array_string(const toml_array_t* arr, int idx) {
1938 | 	toml_value_t ret;
1939 | 	memset(&ret, 0, sizeof(ret));
1940 | 	ret.ok = (toml_value_string(toml_array_unparsed(arr, idx), &ret.u.s, &ret.u.sl) == 0);
1941 | 	return ret;
1942 | }
1943 | 
1944 | toml_value_t toml_array_bool(const toml_array_t* arr, int idx) {
1945 | 	toml_value_t ret;
1946 | 	memset(&ret, 0, sizeof(ret));
1947 | 	ret.ok = (toml_value_bool(toml_array_unparsed(arr, idx), &ret.u.b) == 0);
1948 | 	return ret;
1949 | }
1950 | 
1951 | toml_value_t toml_array_int(const toml_array_t* arr, int idx) {
1952 | 	toml_value_t ret;
1953 | 	memset(&ret, 0, sizeof(ret));
1954 | 	ret.ok = (toml_value_int(toml_array_unparsed(arr, idx), &ret.u.i) == 0);
1955 | 	return ret;
1956 | }
1957 | 
1958 | toml_value_t toml_array_double(const toml_array_t* arr, int idx) {
1959 | 	toml_value_t ret;
1960 | 	memset(&ret, 0, sizeof(ret));
1961 | 	ret.ok = (toml_value_double(toml_array_unparsed(arr, idx), &ret.u.d) == 0);
1962 | 	return ret;
1963 | }
1964 | 
1965 | toml_value_t toml_array_timestamp(const toml_array_t* arr, int idx) {
1966 | 	toml_value_t ret;
1967 | 	memset(&ret, 0, sizeof(ret));
1968 | 	ret.ok = (toml_value_timestamp(toml_array_unparsed(arr, idx), &ret.u.ts) == 0);
1969 | 	return ret;
1970 | }
1971 | 
1972 | toml_value_t toml_table_string(const toml_table_t* tbl, const char* key) {
1973 | 	toml_value_t ret;
1974 | 	memset(&ret, 0, sizeof(ret));
1975 | 	toml_unparsed_t raw = toml_table_unparsed(tbl, key);
1976 | 	if (raw)
1977 | 		ret.ok = (toml_value_string(raw, &ret.u.s, &ret.u.sl) == 0);
1978 | 	return ret;
1979 | }
1980 | 
1981 | toml_value_t toml_table_bool(const toml_table_t* tbl, const char* key) {
1982 | 	toml_value_t ret;
1983 | 	memset(&ret, 0, sizeof(ret));
1984 | 	ret.ok = (toml_value_bool(toml_table_unparsed(tbl, key), &ret.u.b) == 0);
1985 | 	return ret;
1986 | }
1987 | 
1988 | toml_value_t toml_table_int(const toml_table_t* tbl, const char* key) {
1989 | 	toml_value_t ret;
1990 | 	memset(&ret, 0, sizeof(ret));
1991 | 	ret.ok = (toml_value_int(toml_table_unparsed(tbl, key), &ret.u.i) == 0);
1992 | 	return ret;
1993 | }
1994 | 
1995 | toml_value_t toml_table_double(const toml_table_t* tbl, const char* key) {
1996 | 	toml_value_t ret;
1997 | 	memset(&ret, 0, sizeof(ret));
1998 | 	ret.ok = (toml_value_double(toml_table_unparsed(tbl, key), &ret.u.d) == 0);
1999 | 	return ret;
2000 | }
2001 | 
2002 | toml_value_t toml_table_timestamp(const toml_table_t* tbl, const char* key) {
2003 | 	toml_value_t ret;
2004 | 	memset(&ret, 0, sizeof(ret));
2005 | 	ret.ok = (toml_value_timestamp(toml_table_unparsed(tbl, key), &ret.u.ts) == 0);
2006 | 	return ret;
2007 | }
2008 | 


--------------------------------------------------------------------------------
/header/toml-c.h:
--------------------------------------------------------------------------------
   1 | #ifndef TOML_H
   2 | #define TOML_H
   3 | #ifndef _POSIX_C_SOURCE
   4 | #define _POSIX_C_SOURCE 200809L
   5 | #endif
   6 | #ifdef _MSC_VER
   7 | #	pragma warning(disable : 4996)
   8 | #endif
   9 | #ifdef __cplusplus
  10 | #	define TOML_EXTERN extern "C"
  11 | #else
  12 | #	define TOML_EXTERN extern
  13 | #endif
  14 | 
  15 | #include <stdbool.h>
  16 | #include <stdint.h>
  17 | #include <stdio.h>
  18 | 
  19 | typedef struct toml_table_t     toml_table_t;
  20 | typedef struct toml_array_t     toml_array_t;
  21 | typedef struct toml_value_t     toml_value_t;
  22 | typedef struct toml_timestamp_t toml_timestamp_t;
  23 | typedef struct toml_keyval_t    toml_keyval_t;
  24 | typedef struct toml_arritem_t   toml_arritem_t;
  25 | typedef struct toml_pos_t       toml_pos_t;
  26 | 
  27 | // TOML table.
  28 | struct toml_table_t {
  29 | 	const char* key;      // Key for this table
  30 | 	int         keylen;   // length of key.
  31 | 	bool        implicit; // Table was created implicitly
  32 | 	bool        readonly; // No more modification allowed
  33 | 
  34 | 	int             nkval; // key-values in the table
  35 | 	toml_keyval_t** kval;
  36 | 	int             narr; // arrays in the table
  37 | 	toml_array_t**  arr;
  38 | 	int             ntbl; // tables in the table
  39 | 	toml_table_t**  tbl;
  40 | };
  41 | 
  42 | // TOML array.
  43 | struct toml_array_t {
  44 | 	const char*     key;    // key to this array
  45 | 	int             keylen; // length of key.
  46 | 	int             kind;   // element kind: 'v'alue, 'a'rray, or 't'able, 'm'ixed
  47 | 	int             type;   // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp, 'm'ixed
  48 | 	int             nitem;  // number of elements
  49 | 	toml_arritem_t* item;
  50 | };
  51 | struct toml_arritem_t {
  52 | 	int           valtype; // for value kind: 'i'nt, 'd'ouble, 'b'ool, 's'tring, 't'ime, 'D'ate, 'T'imestamp
  53 | 	char*         val;
  54 | 	toml_array_t* arr;
  55 | 	toml_table_t* tbl;
  56 | };
  57 | 
  58 | // TOML key/value pair.
  59 | struct toml_keyval_t {
  60 | 	const char* key;    // key to this value
  61 | 	int         keylen; // length of key.
  62 | 	const char* val;    // the raw value
  63 | };
  64 | 
  65 | // Token position.
  66 | struct toml_pos_t {
  67 | 	int line;
  68 | 	int col;
  69 | };
  70 | 
  71 | // Timestamp type; some values may be empty depending on the value of kind.
  72 | struct toml_timestamp_t {
  73 | 	// datetime type:
  74 | 	//
  75 | 	//   'd'atetime          Full date + time + TZ
  76 | 	//   'l'local-datetime   Full date + time but without TZ
  77 | 	//   'D'ate-local        Date only, without TZ
  78 | 	//   't'ime-local        Time only, without TZ
  79 | 	char kind;
  80 | 
  81 | 	int year, month, day;
  82 | 	int hour, minute, second, millisec;
  83 | 	int tz; // Timezone offset in minutes
  84 | };
  85 | 
  86 | // Parsed TOML value.
  87 | //
  88 | // The string value s is a regular NULL-terminated C string, but the string
  89 | // length is also given in sl since TOML values may contain NULL bytes. The
  90 | // value is guaranteed to be correct UTF-8.
  91 | struct toml_value_t {
  92 | 	bool ok; // Was this value present?
  93 | 	union {
  94 | 		struct {
  95 | 			char* s;  // string value; must be freed after use.
  96 | 			int   sl; // string length, excluding NULL.
  97 | 		};
  98 | 		toml_timestamp_t ts; // datetime
  99 | 		bool             b;  // bool
 100 | 		int64_t          i;  // int
 101 | 		double           d;  // double
 102 | 	} u;
 103 | };
 104 | 
 105 | // toml_parse() parses a TOML document from a string. Returns 0 on error, with
 106 | // the error message stored in errbuf.
 107 | //
 108 | // toml_parse_file() is identical, but reads from a file descriptor.
 109 | //
 110 | // Use toml_free() to free the return value; this will invalidate all handles
 111 | // for this table.
 112 | TOML_EXTERN toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz);
 113 | TOML_EXTERN toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz);
 114 | TOML_EXTERN void          toml_free(toml_table_t* table);
 115 | 
 116 | // Table functions.
 117 | //
 118 | // toml_table_len() gets the number of direct keys for this table;
 119 | // toml_table_key() gets the nth direct key in this table.
 120 | TOML_EXTERN int           toml_table_len(const toml_table_t* table);
 121 | TOML_EXTERN const char*   toml_table_key(const toml_table_t* table, int keyidx, int* keylen);
 122 | TOML_EXTERN toml_value_t  toml_table_string(const toml_table_t* table, const char* key);
 123 | TOML_EXTERN toml_value_t  toml_table_bool(const toml_table_t* table, const char* key);
 124 | TOML_EXTERN toml_value_t  toml_table_int(const toml_table_t* table, const char* key);
 125 | TOML_EXTERN toml_value_t  toml_table_double(const toml_table_t* table, const char* key);
 126 | TOML_EXTERN toml_value_t  toml_table_timestamp(const toml_table_t* table, const char* key);
 127 | TOML_EXTERN toml_array_t* toml_table_array(const toml_table_t* table, const char* key);
 128 | TOML_EXTERN toml_table_t* toml_table_table(const toml_table_t* table, const char* key);
 129 | 
 130 | // Array functions.
 131 | TOML_EXTERN int           toml_array_len(const toml_array_t* array);
 132 | TOML_EXTERN toml_value_t  toml_array_string(const toml_array_t* array, int idx);
 133 | TOML_EXTERN toml_value_t  toml_array_bool(const toml_array_t* array, int idx);
 134 | TOML_EXTERN toml_value_t  toml_array_int(const toml_array_t* array, int idx);
 135 | TOML_EXTERN toml_value_t  toml_array_double(const toml_array_t* array, int idx);
 136 | TOML_EXTERN toml_value_t  toml_array_timestamp(const toml_array_t* array, int idx);
 137 | TOML_EXTERN toml_array_t* toml_array_array(const toml_array_t* array, int idx);
 138 | TOML_EXTERN toml_table_t* toml_array_table(const toml_array_t* array, int idx);
 139 | 
 140 | #include <assert.h>
 141 | #include <ctype.h>
 142 | #include <errno.h>
 143 | #include <math.h>
 144 | #include <stdbool.h>
 145 | #include <stdint.h>
 146 | #include <stdio.h>
 147 | #include <stdlib.h>
 148 | #include <string.h>
 149 | 
 150 | 
 151 | #define ALIGN8(sz) (((sz) + 7) & ~7)
 152 | #define calloc(x, y) error - forbidden - use CALLOC instead
 153 | static void* CALLOC(size_t nmemb, size_t sz) {
 154 | 	int   nb = ALIGN8(sz) * nmemb;
 155 | 	void* p  = malloc(nb);
 156 | 	if (p) {
 157 | 		memset(p, 0, nb);
 158 | 	}
 159 | 	return p;
 160 | }
 161 | 
 162 | // some old platforms define strdup macro -- drop it.
 163 | #undef strdup
 164 | #define strdup(x) error - forbidden - use STRDUP instead
 165 | static char* STRDUP(const char* s) {
 166 | 	int   len = strlen(s);
 167 | 	char* p   = malloc(len + 1);
 168 | 	if (p) {
 169 | 		memcpy(p, s, len);
 170 | 		p[len] = 0;
 171 | 	}
 172 | 	return p;
 173 | }
 174 | 
 175 | // some old platforms define strndup macro -- drop it.
 176 | #undef strndup
 177 | #define strndup(x) error - forbidden - use STRNDUP instead
 178 | static char* STRNDUP(const char* s, size_t n) {
 179 | 	size_t len = strnlen(s, n);
 180 | 	char*  p   = malloc(len + 1);
 181 | 	if (p) {
 182 | 		memcpy(p, s, len);
 183 | 		p[len] = 0;
 184 | 	}
 185 | 	return p;
 186 | }
 187 | 
 188 | // Unparsed values.
 189 | typedef const char* toml_unparsed_t;
 190 | toml_unparsed_t     toml_table_unparsed(const toml_table_t* table, const char* key);
 191 | toml_unparsed_t     toml_array_unparsed(const toml_array_t* array, int idx);
 192 | int                 toml_value_string(toml_unparsed_t s, char** ret, int* len);
 193 | int                 toml_value_bool(toml_unparsed_t s, bool* ret);
 194 | int                 toml_value_int(toml_unparsed_t s, int64_t* ret);
 195 | int                 toml_value_double(toml_unparsed_t s, double* ret);
 196 | int                 toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret);
 197 | 
 198 | // Convert escape to UTF-8; return #bytes used in buf to encode the char, or -1
 199 | // on error.
 200 | // http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
 201 | int read_unicode_escape(uint64_t code, char buf[6]) {
 202 | 	if (0xd800 <= code && code <= 0xdfff) /// UTF-16 surrogates
 203 | 		return -1;
 204 | 	if (0x10FFFF < code)
 205 | 		return -1;
 206 | 	if (code <= 0x7F) { /// 0x00000000 - 0x0000007F: 0xxxxxxx
 207 | 		buf[0] = (unsigned char)code;
 208 | 		return 1;
 209 | 	}
 210 | 	if (code <= 0x000007FF) { /// 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx
 211 | 		buf[0] = (unsigned char)(0xc0 | (code >> 6));
 212 | 		buf[1] = (unsigned char)(0x80 | (code & 0x3f));
 213 | 		return 2;
 214 | 	}
 215 | 	if (code <= 0x0000FFFF) { /// 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
 216 | 		buf[0] = (unsigned char)(0xe0 | (code >> 12));
 217 | 		buf[1] = (unsigned char)(0x80 | ((code >> 6) & 0x3f));
 218 | 		buf[2] = (unsigned char)(0x80 | (code & 0x3f));
 219 | 		return 3;
 220 | 	}
 221 | 	if (code <= 0x001FFFFF) { /// 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 222 | 		buf[0] = (unsigned char)(0xf0 | (code >> 18));
 223 | 		buf[1] = (unsigned char)(0x80 | ((code >> 12) & 0x3f));
 224 | 		buf[2] = (unsigned char)(0x80 | ((code >> 6) & 0x3f));
 225 | 		buf[3] = (unsigned char)(0x80 | (code & 0x3f));
 226 | 		return 4;
 227 | 	}
 228 | 	return -1;
 229 | }
 230 | 
 231 | static inline void xfree(const void* x) {
 232 | 	if (x)
 233 | 		free((void*)(intptr_t)x);
 234 | }
 235 | 
 236 | enum tokentype_t { INVALID, DOT, COMMA, EQUAL, LBRACE, RBRACE, NEWLINE, LBRACKET, RBRACKET, STRING, MSTRING };
 237 | typedef enum tokentype_t tokentype_t;
 238 | 
 239 | typedef struct token_t token_t;
 240 | struct token_t {
 241 | 	tokentype_t tok;
 242 | 	toml_pos_t  pos;
 243 | 	char*       ptr; // points into context->start
 244 | 	int         len;
 245 | 	int         eof;
 246 | };
 247 | 
 248 | typedef struct context_t context_t;
 249 | struct context_t {
 250 | 	char* start;
 251 | 	char* stop;
 252 | 	char* errbuf;
 253 | 	int   errbufsz;
 254 | 
 255 | 	token_t       tok;
 256 | 	toml_table_t* root;
 257 | 	toml_table_t* curtbl;
 258 | 
 259 | 	struct {
 260 | 		int     top;
 261 | 		char*   key[10];
 262 | 		int     keylen[10];
 263 | 		token_t tok[10];
 264 | 	} tpath;
 265 | };
 266 | 
 267 | #define STRINGIFY(x) #x
 268 | #define TOSTRING(x) STRINGIFY(x)
 269 | #define FLINE __FILE__ ":" TOSTRING(__LINE__)
 270 | 
 271 | static int next_token(context_t* ctx, bool dotisspecial);
 272 | 
 273 | // Error reporting. Call when an error is detected. Always return -1.
 274 | static int e_outofmemory(context_t* ctx, const char* fline) {
 275 | 	snprintf(ctx->errbuf, ctx->errbufsz, "ERROR: out of memory (%s)", fline);
 276 | 	return -1;
 277 | }
 278 | 
 279 | static int e_internal(context_t* ctx, const char* fline) {
 280 | 	snprintf(ctx->errbuf, ctx->errbufsz, "internal error (%s)", fline);
 281 | 	return -1;
 282 | }
 283 | 
 284 | static int e_syntax(context_t* ctx, toml_pos_t pos, const char* msg) {
 285 | 	snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: %s", pos.line, pos.col, msg);
 286 | 	return -1;
 287 | }
 288 | 
 289 | static int e_keyexists(context_t* ctx, toml_pos_t pos) {
 290 | 	snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: key already defined", pos.line, pos.col);
 291 | 	return -1;
 292 | }
 293 | 
 294 | static void* expand(void* p, int sz, int newsz) {
 295 | 	void* s = malloc(newsz);
 296 | 	if (!s)
 297 | 		return 0;
 298 | 
 299 | 	if (p) {
 300 | 		memcpy(s, p, sz);
 301 | 		free(p);
 302 | 	}
 303 | 	return s;
 304 | }
 305 | 
 306 | static void** expand_ptrarr(void** p, int n) {
 307 | 	void** s = malloc((n + 1) * sizeof(void*));
 308 | 	if (!s)
 309 | 		return 0;
 310 | 
 311 | 	s[n] = 0;
 312 | 	if (p) {
 313 | 		memcpy(s, p, n * sizeof(void*));
 314 | 		free(p);
 315 | 	}
 316 | 	return s;
 317 | }
 318 | 
 319 | static toml_arritem_t* expand_arritem(toml_arritem_t* p, int n) {
 320 | 	toml_arritem_t* pp = expand(p, n * sizeof(*p), (n + 1) * sizeof(*p));
 321 | 	if (!pp)
 322 | 		return 0;
 323 | 
 324 | 	memset(&pp[n], 0, sizeof(pp[n]));
 325 | 	return pp;
 326 | }
 327 | 
 328 | static uint8_t const u8_length[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4};
 329 | #define u8length(s) u8_length[(((uint8_t*)(s))[0] & 0xFF) >> 4];
 330 | 
 331 | static char* norm_lit_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) {
 332 | 	const char* sp  = src;
 333 | 	const char* sq  = src + srclen;
 334 | 	char*       dst = 0; /// will write to dst[] and return it
 335 | 	int         max = 0; /// max size of dst[]
 336 | 	int         off = 0; /// cur offset in dst[]
 337 | 
 338 | 	for (;;) {                 /// scan forward on src
 339 | 		if (off >= max - 10) { /// have some slack for misc stuff
 340 | 			int   newmax = max + 50;
 341 | 			char* x      = expand(dst, max, newmax);
 342 | 			if (!x) {
 343 | 				xfree(dst);
 344 | 				snprintf(errbuf, errbufsz, "out of memory");
 345 | 				return 0;
 346 | 			}
 347 | 			dst = x;
 348 | 			max = newmax;
 349 | 		}
 350 | 
 351 | 		if (sp >= sq) /// finished?
 352 | 			break;
 353 | 
 354 | 		uint8_t l = u8length(sp);
 355 | 		if (l == 0) {
 356 | 			xfree(dst);
 357 | 			snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 358 | 			return 0;
 359 | 		}
 360 | 		if (l > 1) {
 361 | 			for (int i = 0; i < l; i++) {
 362 | 				char ch = *sp++;
 363 | 				if ((ch & 0x80) != 0x80) {
 364 | 					xfree(dst);
 365 | 					snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 366 | 					return 0;
 367 | 				}
 368 | 				dst[off++] = ch;
 369 | 			}
 370 | 			continue;
 371 | 		}
 372 | 
 373 | 		/// control characters other than Tab are not allowed
 374 | 		char ch = *sp++;
 375 | 		if ((0 <= ch && ch <= 0x08) || (0x0a <= ch && ch <= 0x1f) || ch == 0x7f) {
 376 | 			if (!(multiline && (ch == '\r' || ch == '\n'))) {
 377 | 				xfree(dst);
 378 | 				snprintf(errbuf, errbufsz, "invalid char U+%04x", ch);
 379 | 				return 0;
 380 | 			}
 381 | 		}
 382 | 
 383 | 		dst[off++] = ch; /// a plain copy suffice
 384 | 	}
 385 | 
 386 | 	*len       = off;
 387 | 	dst[off++] = 0;
 388 | 	return dst;
 389 | }
 390 | 
 391 | // Convert src to raw unescaped utf-8 string. Returns NULL if error with errmsg
 392 | // in errbuf.
 393 | static char* norm_basic_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) {
 394 | 	const char* sp  = src;
 395 | 	const char* sq  = src + srclen;
 396 | 	char*       dst = 0; /// will write to dst[] and return it
 397 | 	int         max = 0; /// max size of dst[]
 398 | 	int         off = 0; /// cur offset in dst[]
 399 | 
 400 | 	/// scan forward on src
 401 | 	for (;;) {
 402 | 		if (off >= max - 10) { /// have some slack for misc stuff
 403 | 			int   newmax = max + 50;
 404 | 			char* x      = expand(dst, max, newmax);
 405 | 			if (!x) {
 406 | 				xfree(dst);
 407 | 				snprintf(errbuf, errbufsz, "out of memory");
 408 | 				return 0;
 409 | 			}
 410 | 			dst = x;
 411 | 			max = newmax;
 412 | 		}
 413 | 
 414 | 		if (sp >= sq) /// finished?
 415 | 			break;
 416 | 
 417 | 		uint8_t l = u8length(sp);
 418 | 		if (l == 0) {
 419 | 			xfree(dst);
 420 | 			snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 421 | 			return 0;
 422 | 		}
 423 | 		if (l > 1) {
 424 | 			for (int i = 0; i < l; i++) {
 425 | 				char ch = *sp++;
 426 | 				if ((ch & 0x80) != 0x80) {
 427 | 					xfree(dst);
 428 | 					snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off);
 429 | 					return 0;
 430 | 				}
 431 | 				dst[off++] = ch;
 432 | 			}
 433 | 			continue;
 434 | 		}
 435 | 
 436 | 		char ch = *sp++;
 437 | 		if (ch != '\\') {
 438 | 			/// must be escaped: U+0000 to U+0008, U+000A to U+001F, U+007F
 439 | 			if ((ch >= 0 && ch <= 0x08) || (ch >= 0x0a && ch <= 0x1f) || ch == 0x7f) {
 440 | 				if (!(multiline && (ch == '\r' || ch == '\n'))) {
 441 | 					xfree(dst);
 442 | 					snprintf(errbuf, errbufsz, "invalid char U+%04x", ch);
 443 | 					return 0;
 444 | 				}
 445 | 			}
 446 | 
 447 | 			dst[off++] = ch; /// a plain copy suffice
 448 | 			continue;
 449 | 		}
 450 | 
 451 | 		// TODO: unreachable, I think?
 452 | 		if (sp >= sq) { /// ch was backslash. we expect the escape char.
 453 | 			snprintf(errbuf, errbufsz, "last backslash is invalid");
 454 | 			xfree(dst);
 455 | 			return 0;
 456 | 		}
 457 | 
 458 | 		if (multiline) {                           /// for multi-line, we want to kill line-ending-backslash.
 459 | 			if (sp[strspn(sp, " \t\r")] == '\n') { /// if there is only whitespace after the backslash ...
 460 | 				sp += strspn(sp, " \t\r\n");       /// skip all the following whitespaces
 461 | 				continue;
 462 | 			}
 463 | 		}
 464 | 
 465 | 		ch = *sp++; /// get the escaped char
 466 | 		switch (ch) {
 467 | 		case 'x':
 468 | 		case 'u':
 469 | 		case 'U': {
 470 | 			uint64_t ucs  = 0;
 471 | 			int      nhex = 2;
 472 | 			if (ch == 'u') nhex = 4;
 473 | 			if (ch == 'U') nhex = 8;
 474 | 			for (int i = 0; i < nhex; i++) {
 475 | 				// TODO: unreachable I think, as scan_string() already
 476 | 				// guarantees exactly 4 or 8 hex chars.
 477 | 				if (sp >= sq) {
 478 | 					snprintf(errbuf, errbufsz, "\\%c expected %d hex chars", ch, nhex);
 479 | 					xfree(dst);
 480 | 					return 0;
 481 | 				}
 482 | 				ch    = *sp++;
 483 | 				int v = -1;
 484 | 				if ('0' <= ch && ch <= '9')
 485 | 					v = ch - '0';
 486 | 				else if ('A' <= ch && ch <= 'F')
 487 | 					v = ch - 'A' + 10;
 488 | 				else if ('a' <= ch && ch <= 'f')
 489 | 					v = (ch ^ 0x20) - 'A' + 10;
 490 | 				// TODO: also unrechable, as per above.
 491 | 				if (v == -1) {
 492 | 					snprintf(errbuf, errbufsz, "invalid hex chars for \\u or \\U");
 493 | 					xfree(dst);
 494 | 					return 0;
 495 | 				}
 496 | 				ucs = ucs * 16 + v;
 497 | 			}
 498 | 			int n = read_unicode_escape(ucs, &dst[off]);
 499 | 			if (n == -1) {
 500 | 				snprintf(errbuf, errbufsz, "illegal ucs code in \\u or \\U");
 501 | 				xfree(dst);
 502 | 				return 0;
 503 | 			}
 504 | 			off += n;
 505 | 		};
 506 | 			continue;
 507 | 		case 'b':  ch = '\b'; break;
 508 | 		case 't':  ch = '\t'; break;
 509 | 		case 'n':  ch = '\n'; break;
 510 | 		case 'f':  ch = '\f'; break;
 511 | 		case 'r':  ch = '\r'; break;
 512 | 		case 'e':  ch = 0x1b; break;
 513 | 		case '"':  ch = '"'; break;
 514 | 		case '\\': ch = '\\'; break;
 515 | 		default:
 516 | 			// TODO: unrechable, I think, as scan_string() already
 517 | 			// guarantees correct char.
 518 | 			snprintf(errbuf, errbufsz, "illegal escape char \\%c", ch);
 519 | 			xfree(dst);
 520 | 			return 0;
 521 | 		}
 522 | 
 523 | 		dst[off++] = ch;
 524 | 	}
 525 | 
 526 | 	*len       = off;
 527 | 	dst[off++] = 0; /// Cap with NUL and return it.
 528 | 	return dst;
 529 | }
 530 | 
 531 | // Normalize a key. Convert all special chars to raw unescaped utf-8 chars.
 532 | static char* normalize_key(context_t* ctx, token_t strtok, int* keylen) {
 533 | 	const char* sp = strtok.ptr;
 534 | 	const char* sq = strtok.ptr + strtok.len;
 535 | 	int         ch = *sp;
 536 | 	char*       ret;
 537 | 
 538 | 	// Quoted string
 539 | 	if (ch == '\'' || ch == '\"') {
 540 | 		/// Take " or ' off from and back.
 541 | 		sp++, sq--;
 542 | 
 543 | 		char ebuf[80];
 544 | 		if (ch == '\'')
 545 | 			ret = norm_lit_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf));
 546 | 		else
 547 | 			ret = norm_basic_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf));
 548 | 		if (!ret) {
 549 | 			e_syntax(ctx, strtok.pos, ebuf);
 550 | 			return 0;
 551 | 		}
 552 | 		return ret;
 553 | 	}
 554 | 
 555 | 	*keylen = 0;
 556 | 	for (const char* c = sp; c != sq; c++) { /// Bare key: allow: [A-Za-z0-9_-]+
 557 | 		*keylen = *keylen + 1;
 558 | 		if (isalnum(*c) || *c == '_' || *c == '-')
 559 | 			continue;
 560 | 		// TODO: never triggered? When reading the file it already validates
 561 | 		// this, so seems redundant? Need to double-check.
 562 | 		e_syntax(ctx, ctx->tok.pos, "invalid key");
 563 | 		return 0;
 564 | 	}
 565 | 
 566 | 	if (!(ret = STRNDUP(sp, sq - sp))) { /// dup and return
 567 | 		e_outofmemory(ctx, FLINE);
 568 | 		return 0;
 569 | 	}
 570 | 	return ret;
 571 | }
 572 | 
 573 | // Look up key in tbl. Return 0 if not found, or 'v'alue, 'a'rray or 't'able
 574 | // depending on the element.
 575 | static int check_key(toml_table_t* tbl, const char* key, toml_keyval_t** ret_val, toml_array_t** ret_arr, toml_table_t** ret_tbl) {
 576 | 	int   i;
 577 | 	void* dummy;
 578 | 
 579 | 	if (!ret_tbl)
 580 | 		ret_tbl = (toml_table_t**)&dummy;
 581 | 	if (!ret_arr)
 582 | 		ret_arr = (toml_array_t**)&dummy;
 583 | 	if (!ret_val)
 584 | 		ret_val = (toml_keyval_t**)&dummy;
 585 | 
 586 | 	*ret_tbl = 0;
 587 | 	*ret_arr = 0;
 588 | 	*ret_val = 0;
 589 | 
 590 | 	for (i = 0; i < tbl->nkval; i++) {
 591 | 		if (strcmp(key, tbl->kval[i]->key) == 0) {
 592 | 			*ret_val = tbl->kval[i];
 593 | 			return 'v';
 594 | 		}
 595 | 	}
 596 | 	for (i = 0; i < tbl->narr; i++) {
 597 | 		if (strcmp(key, tbl->arr[i]->key) == 0) {
 598 | 			*ret_arr = tbl->arr[i];
 599 | 			return 'a';
 600 | 		}
 601 | 	}
 602 | 	for (i = 0; i < tbl->ntbl; i++) {
 603 | 		if (strcmp(key, tbl->tbl[i]->key) == 0) {
 604 | 			*ret_tbl = tbl->tbl[i];
 605 | 			return 't';
 606 | 		}
 607 | 	}
 608 | 	return 0;
 609 | }
 610 | 
 611 | static int key_kind(toml_table_t* tbl, const char* key) {
 612 | 	return check_key(tbl, key, 0, 0, 0);
 613 | }
 614 | 
 615 | // Create a keyval in the table.
 616 | static toml_keyval_t* create_keyval_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) {
 617 | 	int   keylen;
 618 | 	char* newkey = normalize_key(ctx, keytok, &keylen);
 619 | 	if (!newkey)
 620 | 		return 0;
 621 | 
 622 | 	toml_keyval_t* dest = 0;
 623 | 	if (key_kind(tbl, newkey)) {
 624 | 		xfree(newkey);
 625 | 		e_keyexists(ctx, keytok.pos);
 626 | 		return 0;
 627 | 	}
 628 | 
 629 | 	int             n = tbl->nkval;
 630 | 	toml_keyval_t** base;
 631 | 	if ((base = (toml_keyval_t**)expand_ptrarr((void**)tbl->kval, n)) == 0) {
 632 | 		xfree(newkey);
 633 | 		e_outofmemory(ctx, FLINE);
 634 | 		return 0;
 635 | 	}
 636 | 	tbl->kval = base;
 637 | 
 638 | 	if ((base[n] = (toml_keyval_t*)CALLOC(1, sizeof(*base[n]))) == 0) {
 639 | 		xfree(newkey);
 640 | 		e_outofmemory(ctx, FLINE);
 641 | 		return 0;
 642 | 	}
 643 | 
 644 | 	dest         = tbl->kval[tbl->nkval++];
 645 | 	dest->key    = newkey;
 646 | 	dest->keylen = keylen;
 647 | 	return dest;
 648 | }
 649 | 
 650 | // Create a table in the table.
 651 | static toml_table_t* create_keytable_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) {
 652 | 	int   keylen;
 653 | 	char* newkey = normalize_key(ctx, keytok, &keylen);
 654 | 	if (!newkey)
 655 | 		return 0;
 656 | 
 657 | 	toml_table_t* dest = 0;
 658 | 	// TODO: need to check all parts for:
 659 | 	//
 660 | 	//   [a]
 661 | 	//   [a.c]   # checks of "a.c" is defined, which is false.
 662 | 	if (check_key(tbl, newkey, 0, 0, &dest)) {
 663 | 		xfree(newkey);
 664 | 
 665 | 		/// Special case: make explicit if table exists and was created
 666 | 		/// implicitly.
 667 | 		if (dest && dest->implicit) {
 668 | 			dest->implicit = false;
 669 | 			return dest;
 670 | 		}
 671 | 		e_keyexists(ctx, keytok.pos);
 672 | 		return 0;
 673 | 	}
 674 | 
 675 | 	int            n = tbl->ntbl;
 676 | 	toml_table_t** base;
 677 | 	if ((base = (toml_table_t**)expand_ptrarr((void**)tbl->tbl, n)) == 0) {
 678 | 		xfree(newkey);
 679 | 		e_outofmemory(ctx, FLINE);
 680 | 		return 0;
 681 | 	}
 682 | 	tbl->tbl = base;
 683 | 
 684 | 	if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) {
 685 | 		xfree(newkey);
 686 | 		e_outofmemory(ctx, FLINE);
 687 | 		return 0;
 688 | 	}
 689 | 
 690 | 	dest         = tbl->tbl[tbl->ntbl++];
 691 | 	dest->key    = newkey;
 692 | 	dest->keylen = keylen;
 693 | 	return dest;
 694 | }
 695 | 
 696 | // Create an array in the table.
 697 | static toml_array_t* create_keyarray_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok, char kind) {
 698 | 	int   keylen;
 699 | 	char* newkey = normalize_key(ctx, keytok, &keylen);
 700 | 	if (!newkey)
 701 | 		return 0;
 702 | 
 703 | 	if (key_kind(tbl, newkey)) {
 704 | 		xfree(newkey);
 705 | 		e_keyexists(ctx, keytok.pos);
 706 | 		return 0;
 707 | 	}
 708 | 
 709 | 	int            n = tbl->narr;
 710 | 	toml_array_t** base;
 711 | 	if ((base = (toml_array_t**)expand_ptrarr((void**)tbl->arr, n)) == 0) {
 712 | 		xfree(newkey);
 713 | 		e_outofmemory(ctx, FLINE);
 714 | 		return 0;
 715 | 	}
 716 | 	tbl->arr = base;
 717 | 
 718 | 	if ((base[n] = (toml_array_t*)CALLOC(1, sizeof(*base[n]))) == 0) {
 719 | 		xfree(newkey);
 720 | 		e_outofmemory(ctx, FLINE);
 721 | 		return 0;
 722 | 	}
 723 | 	toml_array_t* dest = tbl->arr[tbl->narr++];
 724 | 
 725 | 	dest->keylen = keylen;
 726 | 	dest->key    = newkey;
 727 | 	dest->kind   = kind;
 728 | 	return dest;
 729 | }
 730 | 
 731 | static toml_arritem_t* create_value_in_array(context_t* ctx, toml_array_t* parent) {
 732 | 	const int       n    = parent->nitem;
 733 | 	toml_arritem_t* base = expand_arritem(parent->item, n);
 734 | 	if (!base) {
 735 | 		e_outofmemory(ctx, FLINE);
 736 | 		return 0;
 737 | 	}
 738 | 	parent->item = base;
 739 | 	parent->nitem++;
 740 | 	return &parent->item[n];
 741 | }
 742 | 
 743 | // Create an array in an array.
 744 | static toml_array_t* create_array_in_array(context_t* ctx, toml_array_t* parent) {
 745 | 	const int       n    = parent->nitem;
 746 | 	toml_arritem_t* base = expand_arritem(parent->item, n);
 747 | 	if (!base) {
 748 | 		e_outofmemory(ctx, FLINE);
 749 | 		return 0;
 750 | 	}
 751 | 	toml_array_t* ret = (toml_array_t*)CALLOC(1, sizeof(toml_array_t));
 752 | 	if (!ret) {
 753 | 		e_outofmemory(ctx, FLINE);
 754 | 		return 0;
 755 | 	}
 756 | 	base[n].arr  = ret;
 757 | 	parent->item = base;
 758 | 	parent->nitem++;
 759 | 	return ret;
 760 | }
 761 | 
 762 | // Create a table in an array
 763 | static toml_table_t* create_table_in_array(context_t* ctx, toml_array_t* parent) {
 764 | 	int             n    = parent->nitem;
 765 | 	toml_arritem_t* base = expand_arritem(parent->item, n);
 766 | 	if (!base) {
 767 | 		e_outofmemory(ctx, FLINE);
 768 | 		return 0;
 769 | 	}
 770 | 	toml_table_t* ret = (toml_table_t*)CALLOC(1, sizeof(toml_table_t));
 771 | 	if (!ret) {
 772 | 		e_outofmemory(ctx, FLINE);
 773 | 		return 0;
 774 | 	}
 775 | 	base[n].tbl  = ret;
 776 | 	parent->item = base;
 777 | 	parent->nitem++;
 778 | 	return ret;
 779 | }
 780 | 
 781 | static bool skip_newlines(context_t* ctx, bool isdotspecial) {
 782 | 	while (ctx->tok.tok == NEWLINE) {
 783 | 		if (next_token(ctx, isdotspecial))
 784 | 			return false;
 785 | 		if (ctx->tok.eof)
 786 | 			break;
 787 | 	}
 788 | 	return true;
 789 | }
 790 | 
 791 | static int parse_keyval(context_t* ctx, toml_table_t* tbl);
 792 | 
 793 | static inline int eat_token(context_t* ctx, tokentype_t typ, bool isdotspecial, const char* fline) {
 794 | 	if (ctx->tok.tok != typ)
 795 | 		return e_internal(ctx, fline);
 796 | 	if (next_token(ctx, isdotspecial))
 797 | 		return -1;
 798 | 	return 0;
 799 | }
 800 | 
 801 | // We are at '{ ... }'; parse the table.
 802 | static int parse_inline_table(context_t* ctx, toml_table_t* tbl) {
 803 | 	if (eat_token(ctx, LBRACE, 1, FLINE))
 804 | 		return -1;
 805 | 
 806 | 	for (;;) {
 807 | 		if (ctx->tok.tok == RBRACE) // until closing brace
 808 | 			break;
 809 | 		if (ctx->tok.eof)
 810 | 			return e_syntax(ctx, ctx->tok.pos, "no closing '}'");
 811 | 
 812 | 		if (ctx->tok.tok == NEWLINE) {
 813 | 			if (eat_token(ctx, NEWLINE, 1, FLINE))
 814 | 				return -1;
 815 | 			continue;
 816 | 		}
 817 | 
 818 | 		if (ctx->tok.tok != STRING)
 819 | 			return e_syntax(ctx, ctx->tok.pos, "expected a string");
 820 | 
 821 | 		if (parse_keyval(ctx, tbl))
 822 | 			return -1;
 823 | 
 824 | 		// On comma, continue to scan for next keyval.
 825 | 		if (ctx->tok.tok == COMMA) {
 826 | 			if (eat_token(ctx, COMMA, 1, FLINE))
 827 | 				return -1;
 828 | 			continue;
 829 | 		}
 830 | 		break;
 831 | 	}
 832 | 
 833 | 	for (;;) {
 834 | 		if (ctx->tok.tok != NEWLINE || ctx->tok.eof)
 835 | 			break;
 836 | 		if (eat_token(ctx, NEWLINE, 1, FLINE))
 837 | 			return -1;
 838 | 	}
 839 | 
 840 | 	if (eat_token(ctx, RBRACE, 1, FLINE))
 841 | 		return -1;
 842 | 
 843 | 	tbl->readonly = 1;
 844 | 	return 0;
 845 | }
 846 | 
 847 | static int valtype(const char* val) {
 848 | 	toml_timestamp_t ts;
 849 | 	if (*val == '\'' || *val == '"')
 850 | 		return 's';
 851 | 	if (toml_value_bool(val, false) == 0)
 852 | 		return 'b';
 853 | 	if (toml_value_int(val, 0) == 0)
 854 | 		return 'i';
 855 | 	if (toml_value_double(val, 0) == 0)
 856 | 		return 'd';
 857 | 	if (toml_value_timestamp(val, &ts) == 0) {
 858 | 		if (ts.year && ts.hour)
 859 | 			return 'T'; /// timestamp
 860 | 		if (ts.year)    // TODO: never reached?
 861 | 			return 'D'; /// date
 862 | 		return 't';     /// time
 863 | 	}
 864 | 	return 'u'; /// unknown
 865 | }
 866 | 
 867 | // We are at '[...]'
 868 | static int parse_array(context_t* ctx, toml_array_t* arr) {
 869 | 	if (eat_token(ctx, LBRACKET, 0, FLINE))
 870 | 		return -1;
 871 | 
 872 | 	for (;;) {
 873 | 		if (!skip_newlines(ctx, 0))
 874 | 			return -1;
 875 | 
 876 | 		if (ctx->tok.tok == RBRACKET) /// until ]
 877 | 			break;
 878 | 
 879 | 		switch (ctx->tok.tok) {
 880 | 		case MSTRING:
 881 | 		case STRING:  {
 882 | 			/// set array kind if this will be the first entry
 883 | 			if (arr->kind == 0)
 884 | 				arr->kind = 'v';
 885 | 			else if (arr->kind != 'v')
 886 | 				arr->kind = 'm';
 887 | 
 888 | 			char* val  = ctx->tok.ptr;
 889 | 			int   vlen = ctx->tok.len;
 890 | 
 891 | 			/// make a new value in array
 892 | 			toml_arritem_t* newval = create_value_in_array(ctx, arr);
 893 | 			if (!newval)
 894 | 				return e_outofmemory(ctx, FLINE);
 895 | 
 896 | 			if (!(newval->val = STRNDUP(val, vlen)))
 897 | 				return e_outofmemory(ctx, FLINE);
 898 | 
 899 | 			newval->valtype = valtype(newval->val);
 900 | 
 901 | 			/// set array type if this is the first entry
 902 | 			if (arr->nitem == 1)
 903 | 				arr->type = newval->valtype;
 904 | 			else if (arr->type != newval->valtype)
 905 | 				arr->type = 'm'; /// mixed
 906 | 
 907 | 			if (eat_token(ctx, ctx->tok.tok, 0, FLINE))
 908 | 				return -1;
 909 | 			break;
 910 | 		}
 911 | 		case LBRACKET: { // [ [array], [array] ... ]
 912 | 			// set the array kind if this will be the first entry.
 913 | 			if (arr->kind == 0)
 914 | 				arr->kind = 'a';
 915 | 			else if (arr->kind != 'a')
 916 | 				arr->kind = 'm';
 917 | 
 918 | 			toml_array_t* subarr = create_array_in_array(ctx, arr);
 919 | 			if (!subarr)
 920 | 				return -1;
 921 | 			if (parse_array(ctx, subarr))
 922 | 				return -1;
 923 | 			break;
 924 | 		}
 925 | 		case LBRACE: { // [ {table}, {table} ... ]
 926 | 			// set the array kind if this will be the first entry.
 927 | 			if (arr->kind == 0)
 928 | 				arr->kind = 't';
 929 | 			else if (arr->kind != 't')
 930 | 				arr->kind = 'm';
 931 | 
 932 | 			toml_table_t* subtbl = create_table_in_array(ctx, arr);
 933 | 			if (!subtbl)
 934 | 				return -1;
 935 | 			if (parse_inline_table(ctx, subtbl))
 936 | 				return -1;
 937 | 			break;
 938 | 		}
 939 | 		default: return e_syntax(ctx, ctx->tok.pos, "syntax error");
 940 | 		}
 941 | 
 942 | 		if (!skip_newlines(ctx, 0))
 943 | 			return -1;
 944 | 
 945 | 		// on comma, continue to scan for next element
 946 | 		if (ctx->tok.tok == COMMA) {
 947 | 			if (eat_token(ctx, COMMA, 0, FLINE))
 948 | 				return -1;
 949 | 			continue;
 950 | 		}
 951 | 		break;
 952 | 	}
 953 | 
 954 | 	if (eat_token(ctx, RBRACKET, 1, FLINE))
 955 | 		return -1;
 956 | 	return 0;
 957 | }
 958 | 
 959 | // Handle lines like:
 960 | //   key = "value"
 961 | //   key = [ array ]
 962 | //   key = { table }
 963 | static int parse_keyval(context_t* ctx, toml_table_t* tbl) {
 964 | 	if (tbl->readonly)
 965 | 		return e_keyexists(ctx, ctx->tok.pos);
 966 | 
 967 | 	token_t key = ctx->tok;
 968 | 	if (eat_token(ctx, STRING, 1, FLINE))
 969 | 		return -1;
 970 | 
 971 | 	if (ctx->tok.tok == DOT) {
 972 | 		// Handle inline dotted key:
 973 | 		//   physical.color = "orange"
 974 | 		//   physical.shape = "round"
 975 | 		toml_table_t* subtbl = 0;
 976 | 		{
 977 | 			int   keylen;
 978 | 			char* subtblstr = normalize_key(ctx, key, &keylen);
 979 | 			if (!subtblstr)
 980 | 				return -1;
 981 | 
 982 | 			subtbl = toml_table_table(tbl, subtblstr);
 983 | 			if (subtbl)
 984 | 				subtbl->keylen = keylen;
 985 | 			xfree(subtblstr);
 986 | 		}
 987 | 		if (!subtbl) {
 988 | 			subtbl = create_keytable_in_table(ctx, tbl, key);
 989 | 			if (!subtbl)
 990 | 				return -1;
 991 | 		}
 992 | 		if (next_token(ctx, true))
 993 | 			return -1;
 994 | 		if (parse_keyval(ctx, subtbl))
 995 | 			return -1;
 996 | 		return 0;
 997 | 	}
 998 | 
 999 | 	if (ctx->tok.tok != EQUAL)
1000 | 		return e_syntax(ctx, ctx->tok.pos, "missing '='");
1001 | 
1002 | 	if (next_token(ctx, false))
1003 | 		return -1;
1004 | 
1005 | 	switch (ctx->tok.tok) {
1006 | 	case MSTRING:
1007 | 	case STRING:  { // key = "value"
1008 | 		toml_keyval_t* keyval = create_keyval_in_table(ctx, tbl, key);
1009 | 		if (!keyval)
1010 | 			return -1;
1011 | 		token_t val = ctx->tok;
1012 | 
1013 | 		assert(keyval->val == 0);
1014 | 		if (!(keyval->val = STRNDUP(val.ptr, val.len)))
1015 | 			return e_outofmemory(ctx, FLINE);
1016 | 
1017 | 		if (next_token(ctx, true))
1018 | 			return -1;
1019 | 
1020 | 		return 0;
1021 | 	}
1022 | 	case LBRACKET: { // key = [ array ]
1023 | 		toml_array_t* arr = create_keyarray_in_table(ctx, tbl, key, 0);
1024 | 		if (!arr)
1025 | 			return -1;
1026 | 		if (parse_array(ctx, arr))
1027 | 			return -1;
1028 | 		return 0;
1029 | 	}
1030 | 	case LBRACE: { // key = { table }
1031 | 		toml_table_t* nexttbl = create_keytable_in_table(ctx, tbl, key);
1032 | 		if (!nexttbl)
1033 | 			return -1;
1034 | 		if (parse_inline_table(ctx, nexttbl))
1035 | 			return -1;
1036 | 		return 0;
1037 | 	}
1038 | 	default: return e_syntax(ctx, ctx->tok.pos, "syntax error");
1039 | 	}
1040 | 	return 0;
1041 | }
1042 | 
1043 | typedef struct tabpath_t tabpath_t;
1044 | struct tabpath_t {
1045 | 	int     cnt;
1046 | 	token_t key[10];
1047 | };
1048 | 
1049 | // At [x.y.z] or [[x.y.z]]
1050 | // Scan forward and fill tblpath until it enters ] or ]]
1051 | // There will be at least one entry on return.
1052 | static int fill_tblpath(context_t* ctx) {
1053 | 	// clear tpath
1054 | 	for (int i = 0; i < ctx->tpath.top; i++) {
1055 | 		char** p = &ctx->tpath.key[i];
1056 | 		xfree(*p);
1057 | 		*p = 0;
1058 | 	}
1059 | 	ctx->tpath.top = 0;
1060 | 
1061 | 	for (;;) {
1062 | 		if (ctx->tpath.top >= 10)
1063 | 			return e_syntax(ctx, ctx->tok.pos, "table path is too deep; max allowed is 10.");
1064 | 		if (ctx->tok.tok != STRING)
1065 | 			return e_syntax(ctx, ctx->tok.pos, "invalid or missing key");
1066 | 
1067 | 		int   keylen;
1068 | 		char* key = normalize_key(ctx, ctx->tok, &keylen);
1069 | 		if (!key)
1070 | 			return -1;
1071 | 		ctx->tpath.tok[ctx->tpath.top]    = ctx->tok;
1072 | 		ctx->tpath.key[ctx->tpath.top]    = key;
1073 | 		ctx->tpath.keylen[ctx->tpath.top] = keylen;
1074 | 		ctx->tpath.top++;
1075 | 
1076 | 		if (next_token(ctx, true))
1077 | 			return -1;
1078 | 
1079 | 		if (ctx->tok.tok == RBRACKET)
1080 | 			break;
1081 | 		if (ctx->tok.tok != DOT)
1082 | 			return e_syntax(ctx, ctx->tok.pos, "invalid key");
1083 | 		if (next_token(ctx, true))
1084 | 			return -1;
1085 | 	}
1086 | 
1087 | 	if (ctx->tpath.top <= 0) // TODO: never reached?
1088 | 		return e_syntax(ctx, ctx->tok.pos, "empty table selector");
1089 | 	return 0;
1090 | }
1091 | 
1092 | // Walk tblpath from the root, and create new tables on the way. Sets
1093 | // ctx->curtbl to the final table.
1094 | static int walk_tabpath(context_t* ctx) {
1095 | 	toml_table_t* curtbl = ctx->root; /// start from root
1096 | 
1097 | 	for (int i = 0; i < ctx->tpath.top; i++) {
1098 | 		const char* key    = ctx->tpath.key[i];
1099 | 		int         keylen = ctx->tpath.keylen[i];
1100 | 
1101 | 		toml_keyval_t* nextval = 0;
1102 | 		toml_array_t*  nextarr = 0;
1103 | 		toml_table_t*  nexttbl = 0;
1104 | 		switch (check_key(curtbl, key, &nextval, &nextarr, &nexttbl)) {
1105 | 		case 't': /// found a table. nexttbl is where we will go next.
1106 | 			break;
1107 | 		case 'a': /// found an array. nexttbl is the last table in the array.
1108 | 			if (nextarr->kind != 't')
1109 | 				return e_internal(ctx, FLINE);
1110 | 
1111 | 			if (nextarr->nitem == 0)
1112 | 				return e_internal(ctx, FLINE);
1113 | 
1114 | 			nexttbl = nextarr->item[nextarr->nitem - 1].tbl;
1115 | 			break;
1116 | 		case 'v': return e_keyexists(ctx, ctx->tpath.tok[i].pos);
1117 | 		default:  { /// Not found. Let's create an implicit table.
1118 | 			int            n    = curtbl->ntbl;
1119 | 			toml_table_t** base = (toml_table_t**)expand_ptrarr((void**)curtbl->tbl, n);
1120 | 			if (base == 0)
1121 | 				return e_outofmemory(ctx, FLINE);
1122 | 
1123 | 			curtbl->tbl = base;
1124 | 
1125 | 			if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0)
1126 | 				return e_outofmemory(ctx, FLINE);
1127 | 
1128 | 			if ((base[n]->key = STRDUP(key)) == 0)
1129 | 				return e_outofmemory(ctx, FLINE);
1130 | 			base[n]->keylen = keylen;
1131 | 
1132 | 			nexttbl = curtbl->tbl[curtbl->ntbl++];
1133 | 
1134 | 			/// tabs created by walk_tabpath are considered implicit
1135 | 			nexttbl->implicit = true;
1136 | 		}; break;
1137 | 		}
1138 | 		curtbl = nexttbl; /// switch to next tbl
1139 | 	}
1140 | 
1141 | 	ctx->curtbl = curtbl; /// save it
1142 | 	return 0;
1143 | }
1144 | 
1145 | // handle lines like [x.y.z] or [[x.y.z]]
1146 | static int parse_select(context_t* ctx) {
1147 | 	assert(ctx->tok.tok == LBRACKET);
1148 | 
1149 | 	// true if [[
1150 | 	bool aot = (ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == '[');
1151 | 
1152 | 	// Need to detect '[[' on our own because next_token() will skip whitespace,
1153 | 	// and '[ [' would be taken as '[[', which is wrong.
1154 | 
1155 | 	// eat [ or [[
1156 | 	if (eat_token(ctx, LBRACKET, 1, FLINE))
1157 | 		return -1;
1158 | 	if (aot) {
1159 | 		assert(ctx->tok.tok == LBRACKET);
1160 | 		if (eat_token(ctx, LBRACKET, 1, FLINE))
1161 | 			return -1;
1162 | 	}
1163 | 
1164 | 	if (fill_tblpath(ctx))
1165 | 		return -1;
1166 | 
1167 | 	// For [x.y.z] or [[x.y.z]], remove z from tpath.
1168 | 	token_t z = ctx->tpath.tok[ctx->tpath.top - 1];
1169 | 	xfree(ctx->tpath.key[ctx->tpath.top - 1]);
1170 | 	ctx->tpath.top--;
1171 | 
1172 | 	// Set up ctx->curtbl.
1173 | 	if (walk_tabpath(ctx))
1174 | 		return -1;
1175 | 
1176 | 	if (!aot) {
1177 | 		// [x.y.z] -> create z = {} in x.y
1178 | 		toml_table_t* curtbl = create_keytable_in_table(ctx, ctx->curtbl, z);
1179 | 		if (!curtbl)
1180 | 			return -1;
1181 | 		ctx->curtbl = curtbl;
1182 | 	} else {
1183 | 		// [[x.y.z]] -> create z = [] in x.y
1184 | 		toml_array_t* arr = 0;
1185 | 		{
1186 | 			int   keylen;
1187 | 			char* zstr = normalize_key(ctx, z, &keylen);
1188 | 			if (!zstr)
1189 | 				return -1;
1190 | 			arr = toml_table_array(ctx->curtbl, zstr);
1191 | 			if (arr)
1192 | 				arr->keylen = keylen;
1193 | 			xfree(zstr);
1194 | 		}
1195 | 		if (!arr) {
1196 | 			arr = create_keyarray_in_table(ctx, ctx->curtbl, z, 't');
1197 | 			if (!arr)
1198 | 				return -1;
1199 | 		}
1200 | 		if (arr->kind != 't')
1201 | 			return e_syntax(ctx, z.pos, "array mismatch");
1202 | 
1203 | 		// add to z[]
1204 | 		toml_table_t* dest;
1205 | 		{
1206 | 			toml_table_t* t = create_table_in_array(ctx, arr);
1207 | 			if (!t)
1208 | 				return -1;
1209 | 
1210 | 			if ((t->key = STRDUP("__anon__")) == 0)
1211 | 				return e_outofmemory(ctx, FLINE);
1212 | 			dest = t;
1213 | 		}
1214 | 
1215 | 		ctx->curtbl = dest;
1216 | 	}
1217 | 
1218 | 	if (ctx->tok.tok != RBRACKET) // TODO: never reached
1219 | 		return e_syntax(ctx, ctx->tok.pos, "expected ']'");
1220 | 	if (aot) {
1221 | 		if (!(ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == ']'))
1222 | 			return e_syntax(ctx, ctx->tok.pos, "expected ']]'");
1223 | 		if (eat_token(ctx, RBRACKET, 1, FLINE))
1224 | 			return -1;
1225 | 	}
1226 | 
1227 | 	if (eat_token(ctx, RBRACKET, 1, FLINE))
1228 | 		return -1;
1229 | 	if (ctx->tok.tok != NEWLINE)
1230 | 		return e_syntax(ctx, ctx->tok.pos, "extra chars after ] or ]]");
1231 | 	return 0;
1232 | }
1233 | 
1234 | toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz) {
1235 | 	context_t ctx;
1236 | 
1237 | 	/// clear errbuf
1238 | 	if (errbufsz <= 0)
1239 | 		errbufsz = 0;
1240 | 	if (errbufsz > 0)
1241 | 		errbuf[0] = 0;
1242 | 
1243 | 	// init context
1244 | 	memset(&ctx, 0, sizeof(ctx));
1245 | 	ctx.start    = toml;
1246 | 	ctx.stop     = ctx.start + strlen(toml);
1247 | 	ctx.errbuf   = errbuf;
1248 | 	ctx.errbufsz = errbufsz;
1249 | 
1250 | 	// start with an artificial newline of length 0
1251 | 	ctx.tok.tok      = NEWLINE;
1252 | 	ctx.tok.pos.line = 1;
1253 | 	ctx.tok.pos.col  = 1;
1254 | 	ctx.tok.ptr      = toml;
1255 | 	ctx.tok.len      = 0;
1256 | 
1257 | 	// make a root table
1258 | 	if ((ctx.root = CALLOC(1, sizeof(*ctx.root))) == 0) {
1259 | 		e_outofmemory(&ctx, FLINE);
1260 | 		return 0; // Do not goto fail, root table not set up yet
1261 | 	}
1262 | 
1263 | 	// set root as default table
1264 | 	ctx.curtbl = ctx.root;
1265 | 
1266 | 	// Scan forward until EOF
1267 | 	for (token_t tok = ctx.tok; !tok.eof; tok = ctx.tok) {
1268 | 		switch (tok.tok) {
1269 | 		case NEWLINE:
1270 | 			if (next_token(&ctx, true))
1271 | 				goto fail;
1272 | 			break;
1273 | 
1274 | 		case STRING:
1275 | 			if (parse_keyval(&ctx, ctx.curtbl))
1276 | 				goto fail;
1277 | 
1278 | 			if (ctx.tok.tok != NEWLINE) {
1279 | 				e_syntax(&ctx, ctx.tok.pos, "extra chars after value");
1280 | 				goto fail;
1281 | 			}
1282 | 
1283 | 			if (eat_token(&ctx, NEWLINE, 1, FLINE))
1284 | 				goto fail;
1285 | 			break;
1286 | 
1287 | 		case LBRACKET: // [ x.y.z ] or [[ x.y.z ]]
1288 | 			if (parse_select(&ctx))
1289 | 				goto fail;
1290 | 			break;
1291 | 
1292 | 		default: e_syntax(&ctx, tok.pos, "syntax error"); goto fail;
1293 | 		}
1294 | 	}
1295 | 
1296 | 	/// success
1297 | 	for (int i = 0; i < ctx.tpath.top; i++)
1298 | 		xfree(ctx.tpath.key[i]);
1299 | 	return ctx.root;
1300 | 
1301 | fail:
1302 | 	// Something bad has happened. Free resources and return error.
1303 | 	for (int i = 0; i < ctx.tpath.top; i++)
1304 | 		xfree(ctx.tpath.key[i]);
1305 | 	toml_free(ctx.root);
1306 | 	return 0;
1307 | }
1308 | 
1309 | toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz) {
1310 | 	int   bufsz = 0;
1311 | 	char* buf   = 0;
1312 | 	int   off   = 0;
1313 | 	int   inc   = 1024;
1314 | 
1315 | 	while (!feof(fp)) {
1316 | 		if (bufsz == 1024 * 20) /// Increment buffer by 20k after 20k.
1317 | 			inc = 1024 * 20;
1318 | 		if (off == bufsz) {
1319 | 			int   xsz = bufsz + inc;
1320 | 			char* x   = expand(buf, bufsz, xsz);
1321 | 			if (!x) {
1322 | 				snprintf(errbuf, errbufsz, "out of memory");
1323 | 				xfree(buf);
1324 | 				return 0;
1325 | 			}
1326 | 			buf   = x;
1327 | 			bufsz = xsz;
1328 | 		}
1329 | 
1330 | 		errno = 0;
1331 | 		int n = fread(buf + off, 1, bufsz - off, fp);
1332 | 		if (ferror(fp)) {
1333 | 			snprintf(errbuf, errbufsz, "%s", (errno ? strerror(errno) : "Error reading file"));
1334 | 			xfree(buf);
1335 | 			return 0;
1336 | 		}
1337 | 		off += n;
1338 | 	}
1339 | 
1340 | 	/// tag on a NUL to cap the string
1341 | 	if (off == bufsz) {
1342 | 		int   xsz = bufsz + 1;
1343 | 		char* x   = expand(buf, bufsz, xsz);
1344 | 		if (!x) {
1345 | 			snprintf(errbuf, errbufsz, "out of memory");
1346 | 			xfree(buf);
1347 | 			return 0;
1348 | 		}
1349 | 		buf   = x;
1350 | 		bufsz = xsz;
1351 | 	}
1352 | 	buf[off] = 0;
1353 | 
1354 | 	/// parse it, cleanup and finish.
1355 | 	toml_table_t* ret = toml_parse(buf, errbuf, errbufsz);
1356 | 	xfree(buf);
1357 | 	return ret;
1358 | }
1359 | 
1360 | static void xfree_kval(toml_keyval_t* p) {
1361 | 	if (!p)
1362 | 		return;
1363 | 	xfree(p->key);
1364 | 	xfree(p->val);
1365 | 	xfree(p);
1366 | }
1367 | 
1368 | static void xfree_tbl(toml_table_t* p);
1369 | 
1370 | static void xfree_arr(toml_array_t* p) {
1371 | 	if (!p)
1372 | 		return;
1373 | 
1374 | 	xfree(p->key);
1375 | 	const int n = p->nitem;
1376 | 	for (int i = 0; i < n; i++) {
1377 | 		toml_arritem_t* a = &p->item[i];
1378 | 		if (a->val)
1379 | 			xfree(a->val);
1380 | 		else if (a->arr)
1381 | 			xfree_arr(a->arr);
1382 | 		else if (a->tbl)
1383 | 			xfree_tbl(a->tbl);
1384 | 	}
1385 | 	xfree(p->item);
1386 | 	xfree(p);
1387 | }
1388 | 
1389 | static void xfree_tbl(toml_table_t* p) {
1390 | 	if (!p)
1391 | 		return;
1392 | 
1393 | 	xfree(p->key);
1394 | 
1395 | 	for (int i = 0; i < p->nkval; i++)
1396 | 		xfree_kval(p->kval[i]);
1397 | 	xfree(p->kval);
1398 | 
1399 | 	for (int i = 0; i < p->narr; i++)
1400 | 		xfree_arr(p->arr[i]);
1401 | 	xfree(p->arr);
1402 | 
1403 | 	for (int i = 0; i < p->ntbl; i++)
1404 | 		xfree_tbl(p->tbl[i]);
1405 | 	xfree(p->tbl);
1406 | 
1407 | 	xfree(p);
1408 | }
1409 | 
1410 | void toml_free(toml_table_t* tbl) {
1411 | 	xfree_tbl(tbl);
1412 | }
1413 | 
1414 | static void set_token(context_t* ctx, tokentype_t tok, toml_pos_t pos, char* ptr, int len) {
1415 | 	token_t t;
1416 | 	t.tok    = tok;
1417 | 	t.pos    = pos;
1418 | 	t.ptr    = ptr;
1419 | 	t.len    = len;
1420 | 	t.eof    = 0;
1421 | 	ctx->tok = t;
1422 | }
1423 | 
1424 | static void set_eof(context_t* ctx, toml_pos_t pos) {
1425 | 	set_token(ctx, NEWLINE, pos, ctx->stop, 0);
1426 | 	ctx->tok.eof = 1;
1427 | }
1428 | 
1429 | // Scan p for n digits compositing entirely of [0-9]
1430 | static int scan_digits(const char* p, int n) {
1431 | 	int ret = 0;
1432 | 	for (; n > 0 && isdigit(*p); n--, p++)
1433 | 		ret = 10 * ret + (*p - '0');
1434 | 	return n ? -1 : ret;
1435 | }
1436 | 
1437 | static bool scan_date(const char* p, int* YY, int* MM, int* DD) {
1438 | 	int year  = scan_digits(p, 4);
1439 | 	int month = (year >= 0 && p[4] == '-') ? scan_digits(p + 5, 2) : -1;
1440 | 	int day   = (month >= 0 && p[7] == '-') ? scan_digits(p + 8, 2) : -1;
1441 | 	if (YY)
1442 | 		*YY = year;
1443 | 	if (MM)
1444 | 		*MM = month;
1445 | 	if (DD)
1446 | 		*DD = day;
1447 | 	return (year >= 0 && month >= 0 && day >= 0);
1448 | }
1449 | 
1450 | static bool scan_time(const char* p, int* hh, int* mm, int* ss) {
1451 | 	int hour   = scan_digits(p, 2);
1452 | 	int minute = (hour >= 0 && p[2] == ':') ? scan_digits(p + 3, 2) : -1;
1453 | 	int second = (minute >= 0 && p[5] == ':') ? scan_digits(p + 6, 2) : -1;
1454 | 	if (hh)
1455 | 		*hh = hour;
1456 | 	if (mm)
1457 | 		*mm = minute;
1458 | 	if (ss)
1459 | 		*ss = second;
1460 | 	return (hour >= 0 && minute >= 0);
1461 | }
1462 | 
1463 | static int parse_millisec(const char* p, const char** endp) {
1464 | 	int ret  = 0;
1465 | 	int unit = 100; /// unit in millisec
1466 | 	for (; '0' <= *p && *p <= '9'; p++, unit /= 10)
1467 | 		ret += (*p - '0') * unit;
1468 | 	*endp = p;
1469 | 	return ret;
1470 | }
1471 | 
1472 | static bool scan_offset(const char* p, int* tz) {
1473 | 	int sign   = p[0];
1474 | 	int hour   = scan_digits(p + 1, 2);
1475 | 	int minute = (hour >= 0 && p[3] == ':') ? scan_digits(p + 4, 2) : -1;
1476 | 	if (hour < -12 || hour > 14 || minute < 0 || minute > 59)
1477 | 		return false;
1478 | 	if (tz) {
1479 | 		*tz = hour * 60 + minute;
1480 | 		if (sign == '-')
1481 | 			*tz = -(*tz);
1482 | 	}
1483 | 	return true;
1484 | }
1485 | 
1486 | static int scan_string(context_t* ctx, char* p, toml_pos_t* pos, bool dotisspecial) {
1487 | 	char* orig = p;
1488 | 
1489 | 	// Literal multiline.
1490 | 	if (strncmp(p, "'''", 3) == 0) {
1491 | 		char* q   = p + 3;
1492 | 		pos->col += 3;
1493 | 		while (true) {
1494 | 			q = strstr(q, "'''");
1495 | 			if (q == 0)
1496 | 				return e_syntax(ctx, *pos, "unterminated triple quote (''')");
1497 | 			int i = 0;
1498 | 			while (q[3] == '\'') {
1499 | 				i++;
1500 | 				if (i >= 3)
1501 | 					return e_syntax(ctx, *pos, "too many ''' in triple-s-quote");
1502 | 				q++;
1503 | 			}
1504 | 			break;
1505 | 		}
1506 | 		set_token(ctx, MSTRING, *pos, orig, q + 3 - orig);
1507 | 		return 0;
1508 | 	}
1509 | 
1510 | 	// Multiline.
1511 | 	if (strncmp(p, "\"\"\"", 3) == 0) {
1512 | 		char* q   = p + 3;
1513 | 		pos->col += 3;
1514 | 		while (true) {
1515 | 			q = strstr(q, "\"\"\"");
1516 | 			if (q == 0)
1517 | 				return e_syntax(ctx, *pos, "unterminated triple quote (\"\"\")");
1518 | 			if (q[-1] == '\\') {
1519 | 				q++;
1520 | 				continue;
1521 | 			}
1522 | 			int i = 0;
1523 | 			while (q[3] == '\"') {
1524 | 				i++;
1525 | 				if (i >= 3)
1526 | 					return e_syntax(ctx, *pos, "too many \"\"\" in triple-d-quote");
1527 | 				q++;
1528 | 			}
1529 | 			break;
1530 | 		}
1531 | 
1532 | 		/// the string is [p+3, q-1]
1533 | 		int  hexreq = 0; /// #hex required
1534 | 		bool escape = false;
1535 | 		for (p += 3; p < q; p++) {
1536 | 			if (escape) {
1537 | 				escape = false;
1538 | 				if (strchr("btnfre\"\\", *p))
1539 | 					continue;
1540 | 				if (*p == 'x') {
1541 | 					hexreq = 2;
1542 | 					continue;
1543 | 				}
1544 | 				if (*p == 'u') {
1545 | 					hexreq = 4;
1546 | 					continue;
1547 | 				}
1548 | 				if (*p == 'U') {
1549 | 					hexreq = 8;
1550 | 					continue;
1551 | 				}
1552 | 				if (p[strspn(p, " \t\r")] == '\n')
1553 | 					continue; // allow for line ending backslash
1554 | 				return e_syntax(ctx, *pos, "bad escape char");
1555 | 			}
1556 | 			if (hexreq) {
1557 | 				hexreq--;
1558 | 				if (strchr("0123456789ABCDEFabcdef", *p))
1559 | 					continue;
1560 | 				return e_syntax(ctx, *pos, "expected hex char");
1561 | 			}
1562 | 			if (*p == '\\') {
1563 | 				escape = true;
1564 | 				continue;
1565 | 			}
1566 | 		}
1567 | 		if (escape) // TODO: unreachable, I think?
1568 | 			return e_syntax(ctx, *pos, "expected an escape char");
1569 | 		if (hexreq)
1570 | 			return e_syntax(ctx, *pos, "expected more hex char");
1571 | 
1572 | 		set_token(ctx, MSTRING, *pos, orig, q + 3 - orig);
1573 | 		return 0;
1574 | 	}
1575 | 
1576 | 	// Literal string.
1577 | 	if (*p == '\'') {
1578 | 		for (p++; *p && *p != '\n' && *p != '\''; p++)
1579 | 			pos->col++;
1580 | 		if (*p != '\'')
1581 | 			return e_syntax(ctx, *pos, "unterminated quote (')");
1582 | 		set_token(ctx, STRING, *pos, orig, p + 1 - orig);
1583 | 		return 0;
1584 | 	}
1585 | 
1586 | 	// Basic String.
1587 | 	if (*p == '\"') {
1588 | 		int  hexreq = 0; /// #hex required
1589 | 		bool escape = false;
1590 | 		for (p++; *p; p++) {
1591 | 			pos->col++;
1592 | 			if (escape) {
1593 | 				escape = false;
1594 | 				if (strchr("btnfre\"\\", *p))
1595 | 					continue;
1596 | 				if (*p == 'x') {
1597 | 					hexreq = 2;
1598 | 					continue;
1599 | 				}
1600 | 				if (*p == 'u') {
1601 | 					hexreq = 4;
1602 | 					continue;
1603 | 				}
1604 | 				if (*p == 'U') {
1605 | 					hexreq = 8;
1606 | 					continue;
1607 | 				}
1608 | 				return e_syntax(ctx, *pos, "bad escape char");
1609 | 			}
1610 | 			if (hexreq) {
1611 | 				hexreq--;
1612 | 				if (strchr("0123456789ABCDEFabcdef", *p))
1613 | 					continue;
1614 | 				return e_syntax(ctx, *pos, "expected hex char");
1615 | 			}
1616 | 			if (*p == '\\') {
1617 | 				escape = true;
1618 | 				continue;
1619 | 			}
1620 | 			if (*p == '\n')
1621 | 				break;
1622 | 			if (*p == '"')
1623 | 				break;
1624 | 		}
1625 | 		if (*p != '"')
1626 | 			return e_syntax(ctx, *pos, "unterminated quote (\")");
1627 | 
1628 | 		set_token(ctx, STRING, *pos, orig, p + 1 - orig);
1629 | 		return 0;
1630 | 	}
1631 | 
1632 | 	// Time
1633 | 	if (!dotisspecial && scan_time(p, 0, 0, 0)) {
1634 | 		p += strspn(p, "0123456789:"); /// forward thru the time.
1635 | 		if (p[0] == '.') {             /// Subseconds
1636 | 			int n = strspn(++p, "0123456789");
1637 | 			if (n == 0)
1638 | 				return e_syntax(ctx, *pos, "extra chars after '.'");
1639 | 			p += n;
1640 | 		}
1641 | 		for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string
1642 | 			;
1643 | 		set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize
1644 | 		return 0;
1645 | 	}
1646 | 
1647 | 	// Datetime
1648 | 	if (!dotisspecial && scan_date(p, 0, 0, 0)) {
1649 | 		p += strspn(p, "0123456789-");                   /// forward thru the date
1650 | 		if (p[0] == ' ' || p[0] == 't' || p[0] == 'T') { /// forward thru the time
1651 | 			p++;
1652 | 			p += strspn(p, "0123456789:");
1653 | 			if (p[0] == '.') { /// Subseconds
1654 | 				int n = strspn(++p, "0123456789");
1655 | 				if (n == 0)
1656 | 					return e_syntax(ctx, *pos, "extra chars after '.'");
1657 | 				p += n;
1658 | 			}
1659 | 		}
1660 | 
1661 | 		// Offset
1662 | 		if (p[0] == 'Z' || p[0] == 'z') {
1663 | 			p++;
1664 | 		} else if (p[0] == '+' || p[0] == '-') {
1665 | 			if (!scan_offset(p, 0))
1666 | 				return e_syntax(ctx, *pos, "invalid offset");
1667 | 			p += 6;
1668 | 		}
1669 | 
1670 | 		for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string
1671 | 			;
1672 | 		set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize
1673 | 		return 0;
1674 | 	}
1675 | 
1676 | 	// Literals
1677 | 	for (; *p && *p != '\n'; p++) {
1678 | 		int ch = *p;
1679 | 		if (ch == '.' && dotisspecial)
1680 | 			break;
1681 | 		if ('A' <= ch && ch <= 'Z')
1682 | 			continue;
1683 | 		if ('a' <= ch && ch <= 'z')
1684 | 			continue;
1685 | 		if (strchr("0123456789+-_.", ch))
1686 | 			continue;
1687 | 		break;
1688 | 	}
1689 | 
1690 | 	set_token(ctx, STRING, *pos, orig, p - orig);
1691 | 	return 0;
1692 | }
1693 | 
1694 | static int next_token(context_t* ctx, bool dotisspecial) {
1695 | 	// Eat this tok.
1696 | 	char*      p   = ctx->tok.ptr;
1697 | 	toml_pos_t pos = ctx->tok.pos;
1698 | 	for (int i = 0; i < ctx->tok.len; i++) {
1699 | 		pos.col++;
1700 | 		if (*p++ == '\n') {
1701 | 			pos.line++;
1702 | 			pos.col = 1;
1703 | 		}
1704 | 	}
1705 | 
1706 | 	/// Make next tok
1707 | 	while (p < ctx->stop) {
1708 | 		if (*p == '#') { /// Skip comment. stop just before the \n.
1709 | 			for (p++; p < ctx->stop && *p != '\n'; p++) {
1710 | 				pos.col++;
1711 | 				if ((*p != '\t' && *p != '\r' && *p != '\n') && ((*p >= 0x00 && *p <= 0x1f) || *p == 0x7f))
1712 | 					return e_syntax(ctx, pos, "invalid control character");
1713 | 				if (*p == '\r' && p < ctx->stop + 1 && *(p + 1) != '\n')
1714 | 					return e_syntax(ctx, pos, "invalid control character");
1715 | 			}
1716 | 			continue;
1717 | 		}
1718 | 
1719 | 		if (dotisspecial && *p == '.') {
1720 | 			set_token(ctx, DOT, pos, p, 1);
1721 | 			return 0;
1722 | 		}
1723 | 
1724 | 		switch (*p) {
1725 | 		case ',':  set_token(ctx, COMMA, pos, p, 1); return 0;
1726 | 		case '=':  set_token(ctx, EQUAL, pos, p, 1); return 0;
1727 | 		case '{':  set_token(ctx, LBRACE, pos, p, 1); return 0;
1728 | 		case '}':  set_token(ctx, RBRACE, pos, p, 1); return 0;
1729 | 		case '[':  set_token(ctx, LBRACKET, pos, p, 1); return 0;
1730 | 		case ']':  set_token(ctx, RBRACKET, pos, p, 1); return 0;
1731 | 		case '\n': set_token(ctx, NEWLINE, pos, p, 1); return 0;
1732 | 		case '\r':
1733 | 		case ' ':
1734 | 		case '\t': /// ignore white spaces
1735 | 			p++;
1736 | 			pos.col++;
1737 | 			continue;
1738 | 		}
1739 | 
1740 | 		return scan_string(ctx, p, &pos, dotisspecial);
1741 | 	}
1742 | 
1743 | 	set_eof(ctx, pos);
1744 | 	return 0;
1745 | }
1746 | 
1747 | const char* toml_table_key(const toml_table_t* tbl, int keyidx, int* keylen) {
1748 | 	if (keyidx < tbl->nkval) {
1749 | 		*keylen = tbl->kval[keyidx]->keylen;
1750 | 		return tbl->kval[keyidx]->key;
1751 | 	}
1752 | 	if ((keyidx -= tbl->nkval) < tbl->narr) {
1753 | 		*keylen = tbl->arr[keyidx]->keylen;
1754 | 		return tbl->arr[keyidx]->key;
1755 | 	}
1756 | 	if ((keyidx -= tbl->narr) < tbl->ntbl) {
1757 | 		*keylen = tbl->tbl[keyidx]->keylen;
1758 | 		return tbl->tbl[keyidx]->key;
1759 | 	}
1760 | 	*keylen = 0;
1761 | 	return 0;
1762 | }
1763 | 
1764 | toml_unparsed_t toml_table_unparsed(const toml_table_t* tbl, const char* key) {
1765 | 	for (int i = 0; i < tbl->nkval; i++)
1766 | 		if (strcmp(key, tbl->kval[i]->key) == 0)
1767 | 			return tbl->kval[i]->val;
1768 | 	return 0;
1769 | }
1770 | 
1771 | toml_array_t* toml_table_array(const toml_table_t* tbl, const char* key) {
1772 | 	for (int i = 0; i < tbl->narr; i++)
1773 | 		if (strcmp(key, tbl->arr[i]->key) == 0)
1774 | 			return tbl->arr[i];
1775 | 	return 0;
1776 | }
1777 | 
1778 | toml_table_t* toml_table_table(const toml_table_t* tbl, const char* key) {
1779 | 	for (int i = 0; i < tbl->ntbl; i++)
1780 | 		if (strcmp(key, tbl->tbl[i]->key) == 0)
1781 | 			return tbl->tbl[i];
1782 | 	return 0;
1783 | }
1784 | 
1785 | toml_unparsed_t toml_array_unparsed(const toml_array_t* arr, int idx) {
1786 | 	return (0 <= idx && idx < arr->nitem) ? arr->item[idx].val : 0;
1787 | }
1788 | 
1789 | int toml_table_len(const toml_table_t* tbl) {
1790 | 	return tbl->nkval + tbl->narr + tbl->ntbl;
1791 | }
1792 | 
1793 | int toml_array_len(const toml_array_t* arr) {
1794 | 	return arr->nitem;
1795 | }
1796 | 
1797 | toml_array_t* toml_array_array(const toml_array_t* arr, int idx) {
1798 | 	return (0 <= idx && idx < arr->nitem) ? arr->item[idx].arr : 0;
1799 | }
1800 | 
1801 | toml_table_t* toml_array_table(const toml_array_t* arr, int idx) {
1802 | 	return (0 <= idx && idx < arr->nitem) ? arr->item[idx].tbl : 0;
1803 | }
1804 | 
1805 | bool is_leap(int y) {
1806 | 	return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
1807 | }
1808 | 
1809 | int toml_value_timestamp(toml_unparsed_t src_, toml_timestamp_t* ret) {
1810 | 	if (!src_)
1811 | 		return -1;
1812 | 
1813 | 	const char* p               = src_;
1814 | 	bool        must_parse_time = false;
1815 | 
1816 | 	memset(ret, 0, sizeof(*ret));
1817 | 
1818 | 	/// YYYY-MM-DD
1819 | 	if (scan_date(p, &ret->year, &ret->month, &ret->day)) {
1820 | 		if (ret->month < 1 || ret->day < 1 || ret->month > 12 || ret->day > 31)
1821 | 			return -1;
1822 | 		if (ret->month == 2 && ret->day > (is_leap(ret->year) ? 29 : 28))
1823 | 			return -1;
1824 | 		ret->kind = 'D';
1825 | 
1826 | 		p += 10;
1827 | 		if (*p) {
1828 | 			if (*p != 'T' && *p != 't' && *p != ' ') /// T or space
1829 | 				return -1;
1830 | 			must_parse_time = true;
1831 | 			p++;
1832 | 		}
1833 | 	}
1834 | 
1835 | 	/// HH:MM:SS
1836 | 	if (scan_time(p, &ret->hour, &ret->minute, &ret->second)) {
1837 | 		if (ret->minute < 0 || ret->hour < 0 || ret->hour > 23 || ret->minute > 59 || ret->second > 60)
1838 | 			return -1;
1839 | 		p += (ret->second == -1 ? 5 : 8);
1840 | 		ret->kind = (ret->kind == 'D' ? 'l' : 't');
1841 | 		if (ret->second == -1)
1842 | 			ret->second = 0;
1843 | 
1844 | 		if (*p == '.') { /// optionally, parse millisec
1845 | 			p++;         /// skip '.'
1846 | 			const char* qq;
1847 | 			ret->millisec = parse_millisec(p, &qq);
1848 | 			p             = qq;
1849 | 		}
1850 | 
1851 | 		if (*p) { /// parse and copy Z
1852 | 			ret->kind = 'd';
1853 | 			if (*p == 'Z' || *p == 'z')
1854 | 				p++;
1855 | 			else if (*p == '+' || *p == '-') {
1856 | 				if (!scan_offset(p, &ret->tz))
1857 | 					return -1;
1858 | 				p += 6;
1859 | 			}
1860 | 		}
1861 | 	}
1862 | 	if (*p != 0)
1863 | 		return -1;
1864 | 	if (must_parse_time && ret->kind == 'D')
1865 | 		return -1;
1866 | 	return 0;
1867 | }
1868 | 
1869 | // Raw to boolean
1870 | int toml_value_bool(toml_unparsed_t src, bool* ret_) {
1871 | 	if (!src)
1872 | 		return -1;
1873 | 	bool  dummy = false;
1874 | 	bool* ret   = ret_ ? ret_ : &dummy;
1875 | 
1876 | 	if (strcmp(src, "true") == 0) {
1877 | 		*ret = true;
1878 | 		return 0;
1879 | 	}
1880 | 	if (strcmp(src, "false") == 0) {
1881 | 		*ret = false;
1882 | 		return 0;
1883 | 	}
1884 | 	return -1;
1885 | }
1886 | 
1887 | // Raw to integer
1888 | int toml_value_int(toml_unparsed_t src, int64_t* ret_) {
1889 | 	if (!src)
1890 | 		return -1;
1891 | 
1892 | 	char        buf[100];
1893 | 	char*       p         = buf;
1894 | 	char*       q         = p + sizeof(buf);
1895 | 	const char* s         = src;
1896 | 	int64_t     dummy     = 0;
1897 | 	int64_t*    ret       = ret_ ? ret_ : &dummy;
1898 | 	bool        have_sign = false;
1899 | 
1900 | 	if (s[0] == '+' || s[0] == '-') { /// allow +/-
1901 | 		have_sign = true;
1902 | 		*p++      = *s++;
1903 | 	}
1904 | 
1905 | 	if (s[0] == '_') /// disallow +_100
1906 | 		return -1;
1907 | 
1908 | 	int base = 0;
1909 | 	if (s[0] == '0') { /// if 0* ...
1910 | 		switch (s[1]) {
1911 | 		case 'x':
1912 | 			base  = 16;
1913 | 			s    += 2;
1914 | 			break;
1915 | 		case 'o':
1916 | 			base  = 8;
1917 | 			s    += 2;
1918 | 			break;
1919 | 		case 'b':
1920 | 			base  = 2;
1921 | 			s    += 2;
1922 | 			break;
1923 | 		case '\0': return *ret = 0, 0;
1924 | 		default:
1925 | 			if (s[1]) /// ensure no other digits after it
1926 | 				return -1;
1927 | 		}
1928 | 		if (!*s)
1929 | 			return -1;
1930 | 		if (have_sign) /// disallow +0xff, -0xff
1931 | 			return -1;
1932 | 		if (s[0] == '_') /// disallow 0x_, 0o_, 0b_
1933 | 			return -1;
1934 | 		if (s[0] == '+' || s[0] == '-') /// disallow 0x+10, 0x-10
1935 | 			return -1;
1936 | 	}
1937 | 
1938 | 	while (*s && p < q) { /// just strip underscores and pass to strtoll
1939 | 		int ch = *s++;
1940 | 		if (ch == '_') {
1941 | 			if (s[0] == '_') /// disallow '__'
1942 | 				return -1;
1943 | 			if (s[0] == '\0') /// numbers cannot end with '_'
1944 | 				return -1;
1945 | 			continue; /// skip _
1946 | 		}
1947 | 		*p++ = ch;
1948 | 	}
1949 | 
1950 | 	if (*s || p == q) /// if not at end-of-string or we ran out of buffer ...
1951 | 		return -1;
1952 | 
1953 | 	*p = 0; /// cap with NUL
1954 | 
1955 | 	/// Run strtoll on buf to get the integer
1956 | 	char* endp;
1957 | 	errno = 0;
1958 | 	*ret  = strtoll(buf, &endp, base);
1959 | 	return (errno || *endp) ? -1 : 0;
1960 | }
1961 | 
1962 | int toml_value_double(toml_unparsed_t src, double* ret_) {
1963 | 	if (!src)
1964 | 		return -1;
1965 | 
1966 | 	char        buf[100];
1967 | 	char*       p     = buf;
1968 | 	char*       q     = p + sizeof(buf);
1969 | 	const char* s     = src;
1970 | 	double      dummy = 0.0;
1971 | 	double*     ret   = ret_ ? ret_ : &dummy;
1972 | 
1973 | 	if (s[0] == '+' || s[0] == '-') /// allow +/-
1974 | 		*p++ = *s++;
1975 | 
1976 | 	if (s[0] == '_') /// disallow +_1.00
1977 | 		return -1;
1978 | 
1979 | 	{ /// decimal point, if used, must be surrounded by at least one digit on each side
1980 | 		char* dot = strchr(s, '.');
1981 | 		if (dot) {
1982 | 			if (dot == s || !isdigit(dot[-1]) || !isdigit(dot[1]))
1983 | 				return -1;
1984 | 		}
1985 | 	}
1986 | 
1987 | 	/// zero must be followed by . or 'e', or NUL
1988 | 	if (s[0] == '0' && s[1] && !strchr("eE.", s[1]))
1989 | 		return -1;
1990 | 
1991 | 	/// Just strip underscores and pass to strtod
1992 | 	bool have_us = false;
1993 | 	while (*s && p < q) {
1994 | 		int ch = *s++;
1995 | 		if (ch == '_') {
1996 | 			have_us = true;
1997 | 			if (s[0] == '_') /// disallow '__'
1998 | 				return -1;
1999 | 			if (s[0] == 'e') /// disallow _e
2000 | 				return -1;
2001 | 			if (s[0] == 0) /// disallow last char '_'
2002 | 				return -1;
2003 | 			continue; /// skip _
2004 | 		}
2005 | 		if (ch == 'I' || ch == 'N' || ch == 'F' || ch == 'A') /// inf and nan are case-sensitive.
2006 | 			return -1;
2007 | 		if (ch == 'e' && s[0] == '_') /// disallow e_
2008 | 			return -1;
2009 | 		*p++ = ch;
2010 | 	}
2011 | 	if (*s || p == q)
2012 | 		return -1; /// reached end of string or buffer is full?
2013 | 
2014 | 	*p = 0; /// cap with NUL
2015 | 
2016 | 	/// Run strtod on buf to get the value
2017 | 	char* endp;
2018 | 	errno = 0;
2019 | 	*ret  = strtod(buf, &endp);
2020 | 	if (errno || *endp)
2021 | 		return -1;
2022 | 	if (have_us && (isnan(*ret) || isinf(*ret)))
2023 | 		return -1;
2024 | 	return 0;
2025 | }
2026 | 
2027 | int toml_value_string(toml_unparsed_t src, char** ret, int* len) {
2028 | 	bool        multiline = false;
2029 | 	const char* sp;
2030 | 	const char* sq;
2031 | 
2032 | 	*ret = 0;
2033 | 	if (!src)
2034 | 		return -1;
2035 | 
2036 | 	/// First char must be a s-quote or d-quote
2037 | 	int qchar  = src[0];
2038 | 	int srclen = strlen(src);
2039 | 	if (!(qchar == '\'' || qchar == '"')) {
2040 | 		return -1;
2041 | 	}
2042 | 
2043 | 	/// triple quotes?
2044 | 	if (qchar == src[1] && qchar == src[2]) {
2045 | 		multiline = true;             /// triple-quote implies multiline
2046 | 		sp        = src + 3;          /// first char after quote
2047 | 		sq        = src + srclen - 3; /// first char of ending quote
2048 | 
2049 | 		if (!(sp <= sq && sq[0] == qchar && sq[1] == qchar && sq[2] == qchar))
2050 | 			return -1; /// last 3 chars in src must be qchar
2051 | 
2052 | 		if (sp[0] == '\n') /// skip new line immediate after qchar
2053 | 			sp++;
2054 | 		else if (sp[0] == '\r' && sp[1] == '\n')
2055 | 			sp += 2;
2056 | 	} else {
2057 | 		sp = src + 1;                    /// first char after quote
2058 | 		sq = src + srclen - 1;           /// ending quote
2059 | 		if (!(sp <= sq && *sq == qchar)) /// last char in src must be qchar
2060 | 			return -1;
2061 | 	}
2062 | 
2063 | 	/// at this point:
2064 | 	///     sp points to first valid char after quote.
2065 | 	///     sq points to one char beyond last valid char.
2066 | 	///     string len is (sq - sp).
2067 | 	if (qchar == '\'')
2068 | 		*ret = norm_lit_str(sp, sq - sp, len, multiline, 0, 0);
2069 | 	else
2070 | 		*ret = norm_basic_str(sp, sq - sp, len, multiline, 0, 0);
2071 | 	return *ret ? 0 : -1;
2072 | }
2073 | 
2074 | toml_value_t toml_array_string(const toml_array_t* arr, int idx) {
2075 | 	toml_value_t ret;
2076 | 	memset(&ret, 0, sizeof(ret));
2077 | 	ret.ok = (toml_value_string(toml_array_unparsed(arr, idx), &ret.u.s, &ret.u.sl) == 0);
2078 | 	return ret;
2079 | }
2080 | 
2081 | toml_value_t toml_array_bool(const toml_array_t* arr, int idx) {
2082 | 	toml_value_t ret;
2083 | 	memset(&ret, 0, sizeof(ret));
2084 | 	ret.ok = (toml_value_bool(toml_array_unparsed(arr, idx), &ret.u.b) == 0);
2085 | 	return ret;
2086 | }
2087 | 
2088 | toml_value_t toml_array_int(const toml_array_t* arr, int idx) {
2089 | 	toml_value_t ret;
2090 | 	memset(&ret, 0, sizeof(ret));
2091 | 	ret.ok = (toml_value_int(toml_array_unparsed(arr, idx), &ret.u.i) == 0);
2092 | 	return ret;
2093 | }
2094 | 
2095 | toml_value_t toml_array_double(const toml_array_t* arr, int idx) {
2096 | 	toml_value_t ret;
2097 | 	memset(&ret, 0, sizeof(ret));
2098 | 	ret.ok = (toml_value_double(toml_array_unparsed(arr, idx), &ret.u.d) == 0);
2099 | 	return ret;
2100 | }
2101 | 
2102 | toml_value_t toml_array_timestamp(const toml_array_t* arr, int idx) {
2103 | 	toml_value_t ret;
2104 | 	memset(&ret, 0, sizeof(ret));
2105 | 	ret.ok = (toml_value_timestamp(toml_array_unparsed(arr, idx), &ret.u.ts) == 0);
2106 | 	return ret;
2107 | }
2108 | 
2109 | toml_value_t toml_table_string(const toml_table_t* tbl, const char* key) {
2110 | 	toml_value_t ret;
2111 | 	memset(&ret, 0, sizeof(ret));
2112 | 	toml_unparsed_t raw = toml_table_unparsed(tbl, key);
2113 | 	if (raw)
2114 | 		ret.ok = (toml_value_string(raw, &ret.u.s, &ret.u.sl) == 0);
2115 | 	return ret;
2116 | }
2117 | 
2118 | toml_value_t toml_table_bool(const toml_table_t* tbl, const char* key) {
2119 | 	toml_value_t ret;
2120 | 	memset(&ret, 0, sizeof(ret));
2121 | 	ret.ok = (toml_value_bool(toml_table_unparsed(tbl, key), &ret.u.b) == 0);
2122 | 	return ret;
2123 | }
2124 | 
2125 | toml_value_t toml_table_int(const toml_table_t* tbl, const char* key) {
2126 | 	toml_value_t ret;
2127 | 	memset(&ret, 0, sizeof(ret));
2128 | 	ret.ok = (toml_value_int(toml_table_unparsed(tbl, key), &ret.u.i) == 0);
2129 | 	return ret;
2130 | }
2131 | 
2132 | toml_value_t toml_table_double(const toml_table_t* tbl, const char* key) {
2133 | 	toml_value_t ret;
2134 | 	memset(&ret, 0, sizeof(ret));
2135 | 	ret.ok = (toml_value_double(toml_table_unparsed(tbl, key), &ret.u.d) == 0);
2136 | 	return ret;
2137 | }
2138 | 
2139 | toml_value_t toml_table_timestamp(const toml_table_t* tbl, const char* key) {
2140 | 	toml_value_t ret;
2141 | 	memset(&ret, 0, sizeof(ret));
2142 | 	ret.ok = (toml_value_timestamp(toml_table_unparsed(tbl, key), &ret.u.ts) == 0);
2143 | 	return ret;
2144 | }
2145 | #endif // TOML_H
2146 | 


--------------------------------------------------------------------------------