├── .gitignore ├── LICENSE ├── README.md ├── build.sh ├── clean.sh ├── coverage.sh ├── examples ├── parse-file.c └── simple.c ├── fuzzer.sh ├── src ├── xj_snprintf.c ├── xj_snprintf.h ├── xjson.c └── xjson.h └── tests ├── fuzzer.c └── test.c /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | test 3 | vgcore.* 4 | *.gcno 5 | *.gcda 6 | *.gcov 7 | parse-file 8 | samples 9 | fuzzer 10 | simple -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Francesco Cozzuto 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xJSON 2 | xJSON is a lightweight library that implements a JSON encoder, decoder and other utility functions. 3 | 4 | ## Usage 5 | To use xJSON, just add `xjson.c` and `xjson.h` to your files, then include `xjson.h` where you want to use it and compile `xjson.c` with your other files. 6 | 7 | ## Overview 8 | The main functions implemented by xJSON are 9 | ```c 10 | xj_value *xj_decode(const char *str, int len, 11 | xj_alloc *alloc, xj_error *error); 12 | 13 | char *xj_encode(xj_value *value, int *len); 14 | ``` 15 | which let you transform a JSON-encoded UTF-8 string to an `xj_value` and transform an `xj_value` to a string. 16 | 17 | ### Object model 18 | The `xj_value` structure represents a generic JSON value. It's definition is made public, so that you can access it directly to read and modify it 19 | ```c 20 | enum { 21 | XJ_NULL, XJ_BOOL, XJ_INT, XJ_FLOAT, 22 | XJ_ARRAY, XJ_OBJECT, XJ_STRING, 23 | }; 24 | 25 | typedef struct xj_value xj_value; 26 | struct xj_value { 27 | int type; 28 | int size; 29 | xj_value *next; 30 | char *key; 31 | union { 32 | xj_i64 as_int; 33 | xj_bool as_bool; 34 | xj_f64 as_float; 35 | xj_value *as_array; 36 | xj_value *as_object; 37 | char *as_string; 38 | }; 39 | }; 40 | ``` 41 | objects and arrays are represented as linked lists of `xj_value`s. 42 | 43 | Although the user can make as many `xj_value` nodes as he wants, some constructor functions are also provided 44 | 45 | ```c 46 | xj_value *xj_value_null(xj_alloc *alloc, xj_error *error); 47 | 48 | xj_value *xj_value_bool (xj_bool val, xj_alloc *alloc, xj_error *error); 49 | xj_value *xj_value_int (xj_i64 val, xj_alloc *alloc, xj_error *error); 50 | xj_value *xj_value_float(xj_f64 val, xj_alloc *alloc, xj_error *error); 51 | 52 | xj_value *xj_value_array (xj_value *head, xj_alloc *alloc, xj_error *error); 53 | xj_value *xj_value_object(xj_value *head, xj_alloc *alloc, xj_error *error); 54 | 55 | xj_value *xj_value_string(const char *str, int len, xj_alloc *alloc, xj_error *error); 56 | ``` 57 | 58 | ### Error handling 59 | You may have noticed many functions require you to specify a `xj_error` object. Whenever an error occurres, the error structure is used to inform the caller of the context of the failure. This is optional, so whenever a function expects an error pointer, you can provide a `NULL`. 60 | 61 | The structure is public and defined as following 62 | ```c 63 | typedef struct { 64 | xj_bool occurred; 65 | xj_bool truncated; 66 | int off, row, col; 67 | char message[128]; 68 | } xj_error; 69 | ``` 70 | when an error occurres you can read it's fields directly. 71 | 72 | ### Memory management 73 | Many JSON libraries handle memory using reference counting. xJSON uses a different approach where all nodes are stored in a single memory pool and then freed up at the same time. Assuming most objects have the same lifetime, this makes it both faster and easier to manage many objects. 74 | 75 | An allocator is instanciated using one of 76 | ```c 77 | xj_alloc *xj_alloc_using(void *mem, int size, int ext, void (*free)(void*)); 78 | xj_alloc *xj_alloc_new(int size, int ext); 79 | ``` 80 | the first lets you specify the memory that the allocator will use to operate, while the second tells the allocator to call `malloc` to get the memory he needs. 81 | 82 | When an `xj_alloc` is instanciated, you can call all of the functions that require you to provide an `xj_alloc*`. The objects that those functions return will be stored in the allocator. You deallocate all of the nodes by freeing up the whole allocator using 83 | ```c 84 | void xj_alloc_del(xj_alloc *alloc); 85 | ``` -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | FLAGS="-Wall -Wextra -Isrc/" 3 | 4 | for arg in "$@" 5 | do 6 | case $arg in 7 | --debug) FLAGS="$FLAGS -DDEBUG -g" ;; 8 | --release) FLAGS="$FLAGS -DNDEBUG -O3" ;; 9 | --coverage) FLAGS="$FLAGS -fprofile-arcs -ftest-coverage" ;; 10 | esac 11 | done 12 | 13 | $CC tests/test.c src/xjson.c src/xj_snprintf.c -o test $FLAGS 14 | $CC examples/parse-file.c src/xjson.c -o parse-file $FLAGS 15 | $CC examples/simple.c src/xjson.c -o simple $FLAGS -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | rm *.gcda 2 | rm *.gcno 3 | rm *.gcov 4 | rm vgcore.* -------------------------------------------------------------------------------- /coverage.sh: -------------------------------------------------------------------------------- 1 | ./build.sh --coverage 2 | ./test 3 | gcov -b test-xjson.gcno -------------------------------------------------------------------------------- /examples/parse-file.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "xjson.h" 6 | 7 | static char *load_file(const char *path, int *len); 8 | 9 | int main(int argc, char **argv) 10 | { 11 | if(argc < 2) 12 | { 13 | fprintf(stderr, "Error: Missing file\n"); 14 | fprintf(stderr, "Usage: %s \n", argv[0]); 15 | return 1; 16 | } 17 | 18 | int size; 19 | char *data; 20 | 21 | data = load_file(argv[1], &size); 22 | if(data == NULL) 23 | { 24 | fprintf(stderr, "Error: Failed to load file\n"); 25 | return 1; 26 | } 27 | 28 | char pool[65536]; 29 | xj_alloc *alloc; 30 | 31 | alloc = xj_alloc_using(pool, sizeof(pool), 4096, NULL); 32 | assert(alloc != NULL); 33 | 34 | xj_error error; 35 | xj_value *val = xj_decode(data, size, alloc, &error); 36 | 37 | if(val == NULL) 38 | { 39 | if(error.off < 0) 40 | fprintf(stderr, "Error: %s\n", error.message); 41 | else 42 | fprintf(stderr, "Error %s:%d:%d: %s\n", argv[1], error.row+1, error.col+1, error.message); 43 | } 44 | else 45 | fprintf(stderr, "OK\n"); 46 | 47 | xj_alloc_del(alloc); 48 | free(data); 49 | return 0; 50 | } 51 | 52 | static char *load_file(const char *path, int *len) 53 | { 54 | FILE *fp = fopen(path, "rb"); 55 | 56 | if(fp == NULL) 57 | return NULL; 58 | 59 | fseek(fp, 0, SEEK_END); 60 | int len_ = ftell(fp); 61 | fseek(fp, 0, SEEK_SET); 62 | 63 | char *data = malloc(len_ + 1); 64 | 65 | if(data == NULL) 66 | { 67 | fclose(fp); 68 | return NULL; 69 | } 70 | 71 | assert(len_ >= 0); 72 | if(fread(data, 1, len_, fp) != (unsigned int) len_) 73 | { 74 | free(data); 75 | fclose(fp); 76 | return NULL; 77 | } 78 | 79 | data[len_] = '\0'; 80 | fclose(fp); 81 | 82 | if(len) 83 | *len = len_; 84 | return data; 85 | } -------------------------------------------------------------------------------- /examples/simple.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() 6 | { 7 | // Instanciate the allocator. 8 | xj_alloc *alloc = xj_alloc_new(65536, 4096); 9 | 10 | if(alloc == NULL) 11 | // Handle failure maybe? 12 | return 1; 13 | 14 | char *str = "{\"name\": \"Francesco\", \"age\": 23}"; 15 | 16 | // Do the actual parsing.. 17 | xj_value *val = xj_decode(str, -1, alloc, NULL); 18 | 19 | // ..error? 20 | if(val == NULL) 21 | fprintf(stderr, "Failed to parse!\n"); 22 | else 23 | { 24 | char *name; 25 | int age; 26 | 27 | // Now iterate over the fields to get the name 28 | // and age. 29 | xj_value *child = val->as_object; 30 | while(child != NULL) 31 | { 32 | if(!strcmp("name", child->key)) 33 | name = child->as_string; 34 | else 35 | age = child->as_int; 36 | 37 | child = child->next; 38 | } 39 | 40 | printf("name: %s, age: %d\n", name, age); 41 | } 42 | 43 | // Now free everything! 44 | xj_alloc_del(alloc); 45 | return 0; 46 | } -------------------------------------------------------------------------------- /fuzzer.sh: -------------------------------------------------------------------------------- 1 | afl-clang-fast tests/fuzzer.c src/xjson.c -o fuzzer -Isrc/ 2 | export AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 3 | export AFL_SKIP_CPUFREQ=1 4 | afl-fuzz -i samples/ -o out -m none -d -- ./fuzzer -------------------------------------------------------------------------------- /src/xj_snprintf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "xj_snprintf.h" 5 | 6 | //#define STB_SPRINTF_NOFLOAT 7 | 8 | #define STB_SPRINTF_MIN 512 // how many characters per callback 9 | typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len); 10 | 11 | #ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses 12 | #define STBSP__UNALIGNED(code) 13 | #else 14 | #define STBSP__UNALIGNED(code) code 15 | #endif 16 | 17 | #if defined(__clang__) 18 | #if defined(__has_feature) && defined(__has_attribute) 19 | #if __has_feature(address_sanitizer) 20 | #if __has_attribute(__no_sanitize__) 21 | #define STBSP__ASAN __attribute__((__no_sanitize__("address"))) 22 | #elif __has_attribute(__no_sanitize_address__) 23 | #define STBSP__ASAN __attribute__((__no_sanitize_address__)) 24 | #elif __has_attribute(__no_address_safety_analysis__) 25 | #define STBSP__ASAN __attribute__((__no_address_safety_analysis__)) 26 | #endif 27 | #endif 28 | #endif 29 | #elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) 30 | #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ 31 | #define STBSP__ASAN __attribute__((__no_sanitize_address__)) 32 | #endif 33 | #endif 34 | 35 | #ifndef STBSP__ASAN 36 | #define STBSP__ASAN 37 | #endif 38 | 39 | #ifdef STB_SPRINTF_STATIC 40 | #define STBSP__PUBLICDEC static 41 | #define STBSP__PUBLICDEF static STBSP__ASAN 42 | #else 43 | #ifdef __cplusplus 44 | #define STBSP__PUBLICDEC extern "C" 45 | #define STBSP__PUBLICDEF extern "C" STBSP__ASAN 46 | #else 47 | #define STBSP__PUBLICDEC extern 48 | #define STBSP__PUBLICDEF STBSP__ASAN 49 | #endif 50 | #endif 51 | 52 | #define stbsp__uint32 unsigned int 53 | #define stbsp__int32 signed int 54 | 55 | #ifdef _MSC_VER 56 | #define stbsp__uint64 unsigned __int64 57 | #define stbsp__int64 signed __int64 58 | #else 59 | #define stbsp__uint64 unsigned long long 60 | #define stbsp__int64 signed long long 61 | #endif 62 | #define stbsp__uint16 unsigned short 63 | 64 | #ifndef stbsp__uintptr 65 | #if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__) 66 | #define stbsp__uintptr stbsp__uint64 67 | #else 68 | #define stbsp__uintptr stbsp__uint32 69 | #endif 70 | #endif 71 | 72 | #ifdef _MSC_VER 73 | #define STBSP__NOTUSED(v) (void)(v) 74 | #else 75 | #define STBSP__NOTUSED(v) (void)sizeof(v) 76 | #endif 77 | 78 | #ifndef STB_SPRINTF_NOFLOAT 79 | // internal float utility functions 80 | static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits); 81 | static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value); 82 | #define STBSP__SPECIAL 0x7000 83 | #endif 84 | 85 | #define STBSP__LEFTJUST 1 86 | #define STBSP__LEADINGPLUS 2 87 | #define STBSP__LEADINGSPACE 4 88 | #define STBSP__LEADING_0X 8 89 | #define STBSP__LEADINGZERO 16 90 | #define STBSP__INTMAX 32 91 | #define STBSP__TRIPLET_COMMA 64 92 | #define STBSP__NEGATIVE 128 93 | #define STBSP__METRIC_SUFFIX 256 94 | #define STBSP__HALFWIDTH 512 95 | #define STBSP__METRIC_NOSPACE 1024 96 | #define STBSP__METRIC_1024 2048 97 | #define STBSP__METRIC_JEDEC 4096 98 | 99 | #define STBSP__COPYFP(dest, src) \ 100 | { \ 101 | int cn; \ 102 | for (cn = 0; cn < 8; cn++) \ 103 | ((char *)&dest)[cn] = ((char *)&src)[cn]; \ 104 | } 105 | 106 | static int stb_vsprintfcb(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va); 107 | 108 | typedef struct stbsp__context { 109 | char *buf; 110 | int count; 111 | int length; 112 | char tmp[STB_SPRINTF_MIN]; 113 | } stbsp__context; 114 | 115 | static char *stbsp__clamp_callback(const char *buf, void *user, int len) 116 | { 117 | stbsp__context *c = (stbsp__context *)user; 118 | c->length += len; 119 | 120 | if (len > c->count) 121 | len = c->count; 122 | 123 | if (len) { 124 | if (buf != c->buf) { 125 | const char *s, *se; 126 | char *d; 127 | d = c->buf; 128 | s = buf; 129 | se = buf + len; 130 | do { 131 | *d++ = *s++; 132 | } while (s < se); 133 | } 134 | c->buf += len; 135 | c->count -= len; 136 | } 137 | 138 | if (c->count <= 0) 139 | return c->tmp; 140 | return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can 141 | } 142 | 143 | static char * stbsp__count_clamp_callback( const char * buf, void * user, int len ) 144 | { 145 | stbsp__context * c = (stbsp__context*)user; 146 | (void) sizeof(buf); 147 | 148 | c->length += len; 149 | return c->tmp; // go direct into buffer if you can 150 | } 151 | 152 | static void stbsp__lead_sign(stbsp__uint32 fl, char *sign) 153 | { 154 | sign[0] = 0; 155 | if (fl & STBSP__NEGATIVE) { 156 | sign[0] = 1; 157 | sign[1] = '-'; 158 | } else if (fl & STBSP__LEADINGSPACE) { 159 | sign[0] = 1; 160 | sign[1] = ' '; 161 | } else if (fl & STBSP__LEADINGPLUS) { 162 | sign[0] = 1; 163 | sign[1] = '+'; 164 | } 165 | } 166 | 167 | static char stbsp__comma = ','; 168 | static struct 169 | { 170 | short temp; // force next field to be 2-byte aligned 171 | char pair[201]; 172 | } stbsp__digitpair = 173 | { 174 | 0, 175 | "00010203040506070809101112131415161718192021222324" 176 | "25262728293031323334353637383940414243444546474849" 177 | "50515253545556575859606162636465666768697071727374" 178 | "75767778798081828384858687888990919293949596979899" 179 | }; 180 | 181 | #ifndef STB_SPRINTF_NOFLOAT 182 | 183 | static char stbsp__period = '.'; 184 | 185 | // copies d to bits w/ strict aliasing (this compiles to nothing on /Ox) 186 | #define STBSP__COPYFP(dest, src) \ 187 | { \ 188 | int cn; \ 189 | for (cn = 0; cn < 8; cn++) \ 190 | ((char *)&dest)[cn] = ((char *)&src)[cn]; \ 191 | } 192 | 193 | // get float info 194 | static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value) 195 | { 196 | double d; 197 | stbsp__int64 b = 0; 198 | 199 | // load value and round at the frac_digits 200 | d = value; 201 | 202 | STBSP__COPYFP(b, d); 203 | 204 | *bits = b & ((((stbsp__uint64)1) << 52) - 1); 205 | *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023); 206 | 207 | return (stbsp__int32)((stbsp__uint64) b >> 63); 208 | } 209 | 210 | static double const stbsp__bot[23] = { 211 | 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, 212 | 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 213 | }; 214 | static double const stbsp__negbot[22] = { 215 | 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, 216 | 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 217 | }; 218 | static double const stbsp__negboterr[22] = { 219 | -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, 220 | 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, 221 | -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, 222 | 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 223 | }; 224 | static double const stbsp__top[13] = { 225 | 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 226 | }; 227 | static double const stbsp__negtop[13] = { 228 | 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 229 | }; 230 | static double const stbsp__toperr[13] = { 231 | 8388608, 232 | 6.8601809640529717e+028, 233 | -7.253143638152921e+052, 234 | -4.3377296974619174e+075, 235 | -1.5559416129466825e+098, 236 | -3.2841562489204913e+121, 237 | -3.7745893248228135e+144, 238 | -1.7356668416969134e+167, 239 | -3.8893577551088374e+190, 240 | -9.9566444326005119e+213, 241 | 6.3641293062232429e+236, 242 | -5.2069140800249813e+259, 243 | -5.2504760255204387e+282 244 | }; 245 | static double const stbsp__negtoperr[13] = { 246 | 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, 247 | -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, 248 | 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, 249 | 8.0970921678014997e-317 250 | }; 251 | 252 | #if defined(_MSC_VER) && (_MSC_VER <= 1200) 253 | static stbsp__uint64 const stbsp__powten[20] = { 254 | 1, 255 | 10, 256 | 100, 257 | 1000, 258 | 10000, 259 | 100000, 260 | 1000000, 261 | 10000000, 262 | 100000000, 263 | 1000000000, 264 | 10000000000, 265 | 100000000000, 266 | 1000000000000, 267 | 10000000000000, 268 | 100000000000000, 269 | 1000000000000000, 270 | 10000000000000000, 271 | 100000000000000000, 272 | 1000000000000000000, 273 | 10000000000000000000U 274 | }; 275 | #define stbsp__tento19th ((stbsp__uint64)1000000000000000000) 276 | #else 277 | static stbsp__uint64 const stbsp__powten[20] = { 278 | 1, 279 | 10, 280 | 100, 281 | 1000, 282 | 10000, 283 | 100000, 284 | 1000000, 285 | 10000000, 286 | 100000000, 287 | 1000000000, 288 | 10000000000ULL, 289 | 100000000000ULL, 290 | 1000000000000ULL, 291 | 10000000000000ULL, 292 | 100000000000000ULL, 293 | 1000000000000000ULL, 294 | 10000000000000000ULL, 295 | 100000000000000000ULL, 296 | 1000000000000000000ULL, 297 | 10000000000000000000ULL 298 | }; 299 | #define stbsp__tento19th (1000000000000000000ULL) 300 | #endif 301 | 302 | #define stbsp__ddmulthi(oh, ol, xh, yh) \ 303 | { \ 304 | double ahi = 0, alo, bhi = 0, blo; \ 305 | stbsp__int64 bt; \ 306 | oh = xh * yh; \ 307 | STBSP__COPYFP(bt, xh); \ 308 | bt &= ((~(stbsp__uint64)0) << 27); \ 309 | STBSP__COPYFP(ahi, bt); \ 310 | alo = xh - ahi; \ 311 | STBSP__COPYFP(bt, yh); \ 312 | bt &= ((~(stbsp__uint64)0) << 27); \ 313 | STBSP__COPYFP(bhi, bt); \ 314 | blo = yh - bhi; \ 315 | ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \ 316 | } 317 | 318 | #define stbsp__ddtoS64(ob, xh, xl) \ 319 | { \ 320 | double ahi = 0, alo, vh, t; \ 321 | ob = (stbsp__int64)xh; \ 322 | vh = (double)ob; \ 323 | ahi = (xh - vh); \ 324 | t = (ahi - xh); \ 325 | alo = (xh - (ahi - t)) - (vh + t); \ 326 | ob += (stbsp__int64)(ahi + alo + xl); \ 327 | } 328 | 329 | #define stbsp__ddrenorm(oh, ol) \ 330 | { \ 331 | double s; \ 332 | s = oh + ol; \ 333 | ol = ol - (s - oh); \ 334 | oh = s; \ 335 | } 336 | 337 | #define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh); 338 | 339 | #define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl); 340 | 341 | static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350 342 | { 343 | double ph, pl; 344 | if ((power >= 0) && (power <= 22)) { 345 | stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]); 346 | } else { 347 | stbsp__int32 e, et, eb; 348 | double p2h, p2l; 349 | 350 | e = power; 351 | if (power < 0) 352 | e = -e; 353 | et = (e * 0x2c9) >> 14; /* %23 */ 354 | if (et > 13) 355 | et = 13; 356 | eb = e - (et * 23); 357 | 358 | ph = d; 359 | pl = 0.0; 360 | if (power < 0) { 361 | if (eb) { 362 | --eb; 363 | stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]); 364 | stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]); 365 | } 366 | if (et) { 367 | stbsp__ddrenorm(ph, pl); 368 | --et; 369 | stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]); 370 | stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]); 371 | ph = p2h; 372 | pl = p2l; 373 | } 374 | } else { 375 | if (eb) { 376 | e = eb; 377 | if (eb > 22) 378 | eb = 22; 379 | e -= eb; 380 | stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]); 381 | if (e) { 382 | stbsp__ddrenorm(ph, pl); 383 | stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]); 384 | stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl); 385 | ph = p2h; 386 | pl = p2l; 387 | } 388 | } 389 | if (et) { 390 | stbsp__ddrenorm(ph, pl); 391 | --et; 392 | stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]); 393 | stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]); 394 | ph = p2h; 395 | pl = p2l; 396 | } 397 | } 398 | } 399 | stbsp__ddrenorm(ph, pl); 400 | *ohi = ph; 401 | *olo = pl; 402 | } 403 | 404 | 405 | // given a float value, returns the significant bits in bits, and the position of the 406 | // decimal point in decimal_pos. +/-INF and NAN are specified by special values 407 | // returned in the decimal_pos parameter. 408 | // frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000 409 | static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits) 410 | { 411 | double d; 412 | stbsp__int64 bits = 0; 413 | stbsp__int32 expo, e, ng, tens; 414 | 415 | d = value; 416 | STBSP__COPYFP(bits, d); 417 | expo = (stbsp__int32)((bits >> 52) & 2047); 418 | ng = (stbsp__int32)((stbsp__uint64) bits >> 63); 419 | if (ng) 420 | d = -d; 421 | 422 | if (expo == 2047) // is nan or inf? 423 | { 424 | *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf"; 425 | *decimal_pos = STBSP__SPECIAL; 426 | *len = 3; 427 | return ng; 428 | } 429 | 430 | if (expo == 0) // is zero or denormal 431 | { 432 | if (((stbsp__uint64) bits << 1) == 0) // do zero 433 | { 434 | *decimal_pos = 1; 435 | *start = out; 436 | out[0] = '0'; 437 | *len = 1; 438 | return ng; 439 | } 440 | // find the right expo for denormals 441 | { 442 | stbsp__int64 v = ((stbsp__uint64)1) << 51; 443 | while ((bits & v) == 0) { 444 | --expo; 445 | v >>= 1; 446 | } 447 | } 448 | } 449 | 450 | // find the decimal exponent as well as the decimal bits of the value 451 | { 452 | double ph, pl; 453 | 454 | // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046 455 | tens = expo - 1023; 456 | tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1); 457 | 458 | // move the significant bits into position and stick them into an int 459 | stbsp__raise_to_power10(&ph, &pl, d, 18 - tens); 460 | 461 | // get full as much precision from double-double as possible 462 | stbsp__ddtoS64(bits, ph, pl); 463 | 464 | // check if we undershot 465 | if (((stbsp__uint64)bits) >= stbsp__tento19th) 466 | ++tens; 467 | } 468 | 469 | // now do the rounding in integer land 470 | frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits); 471 | if ((frac_digits < 24)) { 472 | stbsp__uint32 dg = 1; 473 | if ((stbsp__uint64)bits >= stbsp__powten[9]) 474 | dg = 10; 475 | while ((stbsp__uint64)bits >= stbsp__powten[dg]) { 476 | ++dg; 477 | if (dg == 20) 478 | goto noround; 479 | } 480 | if (frac_digits < dg) { 481 | stbsp__uint64 r; 482 | // add 0.5 at the right position and round 483 | e = dg - frac_digits; 484 | if ((stbsp__uint32)e >= 24) 485 | goto noround; 486 | r = stbsp__powten[e]; 487 | bits = bits + (r / 2); 488 | if ((stbsp__uint64)bits >= stbsp__powten[dg]) 489 | ++tens; 490 | bits /= r; 491 | } 492 | noround:; 493 | } 494 | 495 | // kill long trailing runs of zeros 496 | if (bits) { 497 | stbsp__uint32 n; 498 | for (;;) { 499 | if (bits <= 0xffffffff) 500 | break; 501 | if (bits % 1000) 502 | goto donez; 503 | bits /= 1000; 504 | } 505 | n = (stbsp__uint32)bits; 506 | while ((n % 1000) == 0) 507 | n /= 1000; 508 | bits = n; 509 | donez:; 510 | } 511 | 512 | // convert to string 513 | out += 64; 514 | e = 0; 515 | for (;;) { 516 | stbsp__uint32 n; 517 | char *o = out - 8; 518 | // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned) 519 | if (bits >= 100000000) { 520 | n = (stbsp__uint32)(bits % 100000000); 521 | bits /= 100000000; 522 | } else { 523 | n = (stbsp__uint32)bits; 524 | bits = 0; 525 | } 526 | while (n) { 527 | out -= 2; 528 | *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; 529 | n /= 100; 530 | e += 2; 531 | } 532 | if (bits == 0) { 533 | if ((e) && (out[0] == '0')) { 534 | ++out; 535 | --e; 536 | } 537 | break; 538 | } 539 | while (out != o) { 540 | *--out = '0'; 541 | ++e; 542 | } 543 | } 544 | 545 | *decimal_pos = tens; 546 | *start = out; 547 | *len = e; 548 | return ng; 549 | } 550 | 551 | #endif // STB_SPRINTF_NOFLOAT 552 | 553 | static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit) 554 | { 555 | char const * sn = s; 556 | 557 | // get up to 4-byte alignment 558 | for (;;) { 559 | if (((stbsp__uintptr)sn & 3) == 0) 560 | break; 561 | 562 | if (!limit || *sn == 0) 563 | return (stbsp__uint32)(sn - s); 564 | 565 | ++sn; 566 | --limit; 567 | } 568 | 569 | // scan over 4 bytes at a time to find terminating 0 570 | // this will intentionally scan up to 3 bytes past the end of buffers, 571 | // but becase it works 4B aligned, it will never cross page boundaries 572 | // (hence the STBSP__ASAN markup; the over-read here is intentional 573 | // and harmless) 574 | while (limit >= 4) { 575 | stbsp__uint32 v = *(stbsp__uint32 *)sn; 576 | // bit hack to find if there's a 0 byte in there 577 | if ((v - 0x01010101) & (~v) & 0x80808080UL) 578 | break; 579 | 580 | sn += 4; 581 | limit -= 4; 582 | } 583 | 584 | // handle the last few characters to find actual size 585 | while (limit && *sn) { 586 | ++sn; 587 | --limit; 588 | } 589 | 590 | return (stbsp__uint32)(sn - s); 591 | } 592 | 593 | int xj_vsnprintf(char *buf, int count, char const *fmt, va_list va) 594 | { 595 | stbsp__context c; 596 | 597 | if ( (count == 0) && !buf ) 598 | { 599 | c.length = 0; 600 | 601 | stb_vsprintfcb( stbsp__count_clamp_callback, &c, c.tmp, fmt, va ); 602 | } 603 | else 604 | { 605 | int l; 606 | 607 | c.buf = buf; 608 | c.count = count; 609 | c.length = 0; 610 | 611 | stb_vsprintfcb( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va ); 612 | 613 | // zero-terminate 614 | l = (int)( c.buf - buf ); 615 | if ( l >= count ) // should never be greater, only equal (or less) than count 616 | l = count - 1; 617 | buf[l] = 0; 618 | } 619 | 620 | return c.length; 621 | } 622 | 623 | static int stb_vsprintfcb(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va) 624 | { 625 | static char hex[] = "0123456789abcdefxp"; 626 | static char hexu[] = "0123456789ABCDEFXP"; 627 | char *bf; 628 | char const *f; 629 | int tlen = 0; 630 | 631 | bf = buf; 632 | f = fmt; 633 | for (;;) { 634 | stbsp__int32 fw, pr, tz; 635 | stbsp__uint32 fl; 636 | 637 | // macros for the callback buffer stuff 638 | #define stbsp__chk_cb_bufL(bytes) \ 639 | { \ 640 | int len = (int)(bf - buf); \ 641 | if ((len + (bytes)) >= STB_SPRINTF_MIN) { \ 642 | tlen += len; \ 643 | if (0 == (bf = buf = callback(buf, user, len))) \ 644 | goto done; \ 645 | } \ 646 | } 647 | #define stbsp__chk_cb_buf(bytes) \ 648 | { \ 649 | if (callback) { \ 650 | stbsp__chk_cb_bufL(bytes); \ 651 | } \ 652 | } 653 | #define stbsp__flush_cb() \ 654 | { \ 655 | stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \ 656 | } // flush if there is even one byte in the buffer 657 | #define stbsp__cb_buf_clamp(cl, v) \ 658 | cl = v; \ 659 | if (callback) { \ 660 | int lg = STB_SPRINTF_MIN - (int)(bf - buf); \ 661 | if (cl > lg) \ 662 | cl = lg; \ 663 | } 664 | 665 | // fast copy everything up to the next % (or end of string) 666 | for (;;) { 667 | while (((stbsp__uintptr)f) & 3) { 668 | schk1: 669 | if (f[0] == '%') 670 | goto scandd; 671 | schk2: 672 | if (f[0] == 0) 673 | goto endfmt; 674 | stbsp__chk_cb_buf(1); 675 | *bf++ = f[0]; 676 | ++f; 677 | } 678 | for (;;) { 679 | // Check if the next 4 bytes contain %(0x25) or end of string. 680 | // Using the 'hasless' trick: 681 | // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord 682 | stbsp__uint32 v, c; 683 | v = *(stbsp__uint32 *)f; 684 | c = (~v) & 0x80808080; 685 | if (((v ^ 0x25252525) - 0x01010101) & c) 686 | goto schk1; 687 | if ((v - 0x01010101) & c) 688 | goto schk2; 689 | if (callback) 690 | if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4) 691 | goto schk1; 692 | #ifdef STB_SPRINTF_NOUNALIGNED 693 | if(((stbsp__uintptr)bf) & 3) { 694 | bf[0] = f[0]; 695 | bf[1] = f[1]; 696 | bf[2] = f[2]; 697 | bf[3] = f[3]; 698 | } else 699 | #endif 700 | { 701 | *(stbsp__uint32 *)bf = v; 702 | } 703 | bf += 4; 704 | f += 4; 705 | } 706 | } 707 | scandd: 708 | 709 | ++f; 710 | 711 | // ok, we have a percent, read the modifiers first 712 | fw = 0; 713 | pr = -1; 714 | fl = 0; 715 | tz = 0; 716 | 717 | // flags 718 | for (;;) { 719 | switch (f[0]) { 720 | // if we have left justify 721 | case '-': 722 | fl |= STBSP__LEFTJUST; 723 | ++f; 724 | continue; 725 | // if we have leading plus 726 | case '+': 727 | fl |= STBSP__LEADINGPLUS; 728 | ++f; 729 | continue; 730 | // if we have leading space 731 | case ' ': 732 | fl |= STBSP__LEADINGSPACE; 733 | ++f; 734 | continue; 735 | // if we have leading 0x 736 | case '#': 737 | fl |= STBSP__LEADING_0X; 738 | ++f; 739 | continue; 740 | // if we have thousand commas 741 | case '\'': 742 | fl |= STBSP__TRIPLET_COMMA; 743 | ++f; 744 | continue; 745 | // if we have kilo marker (none->kilo->kibi->jedec) 746 | case '$': 747 | if (fl & STBSP__METRIC_SUFFIX) { 748 | if (fl & STBSP__METRIC_1024) { 749 | fl |= STBSP__METRIC_JEDEC; 750 | } else { 751 | fl |= STBSP__METRIC_1024; 752 | } 753 | } else { 754 | fl |= STBSP__METRIC_SUFFIX; 755 | } 756 | ++f; 757 | continue; 758 | // if we don't want space between metric suffix and number 759 | case '_': 760 | fl |= STBSP__METRIC_NOSPACE; 761 | ++f; 762 | continue; 763 | // if we have leading zero 764 | case '0': 765 | fl |= STBSP__LEADINGZERO; 766 | ++f; 767 | goto flags_done; 768 | default: goto flags_done; 769 | } 770 | } 771 | flags_done: 772 | 773 | // get the field width 774 | if (f[0] == '*') { 775 | fw = va_arg(va, stbsp__uint32); 776 | ++f; 777 | } else { 778 | while ((f[0] >= '0') && (f[0] <= '9')) { 779 | fw = fw * 10 + f[0] - '0'; 780 | f++; 781 | } 782 | } 783 | // get the precision 784 | if (f[0] == '.') { 785 | ++f; 786 | if (f[0] == '*') { 787 | pr = va_arg(va, stbsp__uint32); 788 | ++f; 789 | } else { 790 | pr = 0; 791 | while ((f[0] >= '0') && (f[0] <= '9')) { 792 | pr = pr * 10 + f[0] - '0'; 793 | f++; 794 | } 795 | } 796 | } 797 | 798 | // handle integer size overrides 799 | switch (f[0]) { 800 | // are we halfwidth? 801 | case 'h': 802 | fl |= STBSP__HALFWIDTH; 803 | ++f; 804 | if (f[0] == 'h') 805 | ++f; // QUARTERWIDTH 806 | break; 807 | // are we 64-bit (unix style) 808 | case 'l': 809 | fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0); 810 | ++f; 811 | if (f[0] == 'l') { 812 | fl |= STBSP__INTMAX; 813 | ++f; 814 | } 815 | break; 816 | // are we 64-bit on intmax? (c99) 817 | case 'j': 818 | fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0; 819 | ++f; 820 | break; 821 | // are we 64-bit on size_t or ptrdiff_t? (c99) 822 | case 'z': 823 | fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; 824 | ++f; 825 | break; 826 | case 't': 827 | fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; 828 | ++f; 829 | break; 830 | // are we 64-bit (msft style) 831 | case 'I': 832 | if ((f[1] == '6') && (f[2] == '4')) { 833 | fl |= STBSP__INTMAX; 834 | f += 3; 835 | } else if ((f[1] == '3') && (f[2] == '2')) { 836 | f += 3; 837 | } else { 838 | fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0); 839 | ++f; 840 | } 841 | break; 842 | default: break; 843 | } 844 | 845 | _Bool must_free = 0; 846 | 847 | // handle each replacement 848 | switch (f[0]) { 849 | #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307 850 | char num[STBSP__NUMSZ]; 851 | char lead[8]; 852 | char tail[8]; 853 | char *s; 854 | char const *h; 855 | stbsp__uint32 l, n, cs; 856 | stbsp__uint64 n64; 857 | #ifndef STB_SPRINTF_NOFLOAT 858 | double fv; 859 | #endif 860 | stbsp__int32 dp; 861 | char const *sn; 862 | 863 | case 's': 864 | // get the string 865 | s = va_arg(va, char *); 866 | if (s == 0) 867 | s = (char *)"null"; 868 | // get the length, limited to desired precision 869 | // always limit to ~0u chars since our counts are 32b 870 | l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u); 871 | lead[0] = 0; 872 | tail[0] = 0; 873 | pr = 0; 874 | dp = 0; 875 | cs = 0; 876 | // copy the string in 877 | goto scopy; 878 | 879 | #ifndef XJ_NOEXTENSION 880 | case 'v': 881 | { 882 | // get the string 883 | xj_value *v = va_arg(va, xj_value*); 884 | if (v == NULL) 885 | { 886 | s = (char*) "null"; 887 | 888 | // get the length, limited to desired precision 889 | // always limit to ~0u chars since our counts are 32b 890 | l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u); 891 | } 892 | else 893 | { 894 | int l2; 895 | s = xj_encode(v, &l2); 896 | if(s == NULL) 897 | s = "(Bad xJSON value or out of memory)"; 898 | else 899 | { 900 | must_free = 1; 901 | assert(l2 >= 0); 902 | l = (unsigned int) l2; 903 | } 904 | } 905 | 906 | lead[0] = 0; 907 | tail[0] = 0; 908 | pr = 0; 909 | dp = 0; 910 | cs = 0; 911 | // copy the string in 912 | goto scopy; 913 | } 914 | #endif 915 | 916 | case 'c': // char 917 | // get the character 918 | s = num + STBSP__NUMSZ - 1; 919 | *s = (char)va_arg(va, int); 920 | l = 1; 921 | lead[0] = 0; 922 | tail[0] = 0; 923 | pr = 0; 924 | dp = 0; 925 | cs = 0; 926 | goto scopy; 927 | 928 | case 'n': // weird write-bytes specifier 929 | { 930 | int *d = va_arg(va, int *); 931 | *d = tlen + (int)(bf - buf); 932 | } break; 933 | 934 | #ifdef STB_SPRINTF_NOFLOAT 935 | case 'A': // float 936 | case 'a': // hex float 937 | case 'G': // float 938 | case 'g': // float 939 | case 'E': // float 940 | case 'e': // float 941 | case 'f': // float 942 | va_arg(va, double); // eat it 943 | s = (char *)"No float"; 944 | l = 8; 945 | lead[0] = 0; 946 | tail[0] = 0; 947 | pr = 0; 948 | cs = 0; 949 | STBSP__NOTUSED(dp); 950 | goto scopy; 951 | #else 952 | case 'A': // hex float 953 | case 'a': // hex float 954 | h = (f[0] == 'A') ? hexu : hex; 955 | fv = va_arg(va, double); 956 | if (pr == -1) 957 | pr = 6; // default is 6 958 | // read the double into a string 959 | if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) 960 | fl |= STBSP__NEGATIVE; 961 | 962 | s = num + 64; 963 | 964 | stbsp__lead_sign(fl, lead); 965 | 966 | if (dp == -1023) 967 | dp = (n64) ? -1022 : 0; 968 | else 969 | n64 |= (((stbsp__uint64)1) << 52); 970 | n64 <<= (64 - 56); 971 | if (pr < 15) 972 | n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4)); 973 | // add leading chars 974 | 975 | #ifdef STB_SPRINTF_MSVC_MODE 976 | *s++ = '0'; 977 | *s++ = 'x'; 978 | #else 979 | lead[1 + lead[0]] = '0'; 980 | lead[2 + lead[0]] = 'x'; 981 | lead[0] += 2; 982 | #endif 983 | *s++ = h[(n64 >> 60) & 15]; 984 | n64 <<= 4; 985 | if (pr) 986 | *s++ = stbsp__period; 987 | sn = s; 988 | 989 | // print the bits 990 | n = pr; 991 | if (n > 13) 992 | n = 13; 993 | if (pr > (stbsp__int32)n) 994 | tz = pr - n; 995 | pr = 0; 996 | while (n--) { 997 | *s++ = h[(n64 >> 60) & 15]; 998 | n64 <<= 4; 999 | } 1000 | 1001 | // print the expo 1002 | tail[1] = h[17]; 1003 | if (dp < 0) { 1004 | tail[2] = '-'; 1005 | dp = -dp; 1006 | } else 1007 | tail[2] = '+'; 1008 | n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3)); 1009 | tail[0] = (char)n; 1010 | for (;;) { 1011 | tail[n] = '0' + dp % 10; 1012 | if (n <= 3) 1013 | break; 1014 | --n; 1015 | dp /= 10; 1016 | } 1017 | 1018 | dp = (int)(s - sn); 1019 | l = (int)(s - (num + 64)); 1020 | s = num + 64; 1021 | cs = 1 + (3 << 24); 1022 | goto scopy; 1023 | 1024 | case 'G': // float 1025 | case 'g': // float 1026 | h = (f[0] == 'G') ? hexu : hex; 1027 | fv = va_arg(va, double); 1028 | if (pr == -1) 1029 | pr = 6; 1030 | else if (pr == 0) 1031 | pr = 1; // default is 6 1032 | // read the double into a string 1033 | if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000)) 1034 | fl |= STBSP__NEGATIVE; 1035 | 1036 | // clamp the precision and delete extra zeros after clamp 1037 | n = pr; 1038 | if (l > (stbsp__uint32)pr) 1039 | l = pr; 1040 | while ((l > 1) && (pr) && (sn[l - 1] == '0')) { 1041 | --pr; 1042 | --l; 1043 | } 1044 | 1045 | // should we use %e 1046 | if ((dp <= -4) || (dp > (stbsp__int32)n)) { 1047 | if (pr > (stbsp__int32)l) 1048 | pr = l - 1; 1049 | else if (pr) 1050 | --pr; // when using %e, there is one digit before the decimal 1051 | goto doexpfromg; 1052 | } 1053 | // this is the insane action to get the pr to match %g semantics for %f 1054 | if (dp > 0) { 1055 | pr = (dp < (stbsp__int32)l) ? l - dp : 0; 1056 | } else { 1057 | pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr); 1058 | } 1059 | goto dofloatfromg; 1060 | 1061 | case 'E': // float 1062 | case 'e': // float 1063 | h = (f[0] == 'E') ? hexu : hex; 1064 | fv = va_arg(va, double); 1065 | if (pr == -1) 1066 | pr = 6; // default is 6 1067 | // read the double into a string 1068 | if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000)) 1069 | fl |= STBSP__NEGATIVE; 1070 | doexpfromg: 1071 | tail[0] = 0; 1072 | stbsp__lead_sign(fl, lead); 1073 | if (dp == STBSP__SPECIAL) { 1074 | s = (char *)sn; 1075 | cs = 0; 1076 | pr = 0; 1077 | goto scopy; 1078 | } 1079 | s = num + 64; 1080 | // handle leading chars 1081 | *s++ = sn[0]; 1082 | 1083 | if (pr) 1084 | *s++ = stbsp__period; 1085 | 1086 | // handle after decimal 1087 | if ((l - 1) > (stbsp__uint32)pr) 1088 | l = pr + 1; 1089 | for (n = 1; n < l; n++) 1090 | *s++ = sn[n]; 1091 | // trailing zeros 1092 | tz = pr - (l - 1); 1093 | pr = 0; 1094 | // dump expo 1095 | tail[1] = h[0xe]; 1096 | dp -= 1; 1097 | if (dp < 0) { 1098 | tail[2] = '-'; 1099 | dp = -dp; 1100 | } else 1101 | tail[2] = '+'; 1102 | #ifdef STB_SPRINTF_MSVC_MODE 1103 | n = 5; 1104 | #else 1105 | n = (dp >= 100) ? 5 : 4; 1106 | #endif 1107 | tail[0] = (char)n; 1108 | for (;;) { 1109 | tail[n] = '0' + dp % 10; 1110 | if (n <= 3) 1111 | break; 1112 | --n; 1113 | dp /= 10; 1114 | } 1115 | cs = 1 + (3 << 24); // how many tens 1116 | goto flt_lead; 1117 | 1118 | case 'f': // float 1119 | fv = va_arg(va, double); 1120 | doafloat: 1121 | // do kilos 1122 | if (fl & STBSP__METRIC_SUFFIX) { 1123 | double divisor; 1124 | divisor = 1000.0f; 1125 | if (fl & STBSP__METRIC_1024) 1126 | divisor = 1024.0; 1127 | while (fl < 0x4000000) { 1128 | if ((fv < divisor) && (fv > -divisor)) 1129 | break; 1130 | fv /= divisor; 1131 | fl += 0x1000000; 1132 | } 1133 | } 1134 | if (pr == -1) 1135 | pr = 6; // default is 6 1136 | // read the double into a string 1137 | if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr)) 1138 | fl |= STBSP__NEGATIVE; 1139 | dofloatfromg: 1140 | tail[0] = 0; 1141 | stbsp__lead_sign(fl, lead); 1142 | if (dp == STBSP__SPECIAL) { 1143 | s = (char *)sn; 1144 | cs = 0; 1145 | pr = 0; 1146 | goto scopy; 1147 | } 1148 | s = num + 64; 1149 | 1150 | // handle the three decimal varieties 1151 | if (dp <= 0) { 1152 | stbsp__int32 i; 1153 | // handle 0.000*000xxxx 1154 | *s++ = '0'; 1155 | if (pr) 1156 | *s++ = stbsp__period; 1157 | n = -dp; 1158 | if ((stbsp__int32)n > pr) 1159 | n = pr; 1160 | i = n; 1161 | while (i) { 1162 | if ((((stbsp__uintptr)s) & 3) == 0) 1163 | break; 1164 | *s++ = '0'; 1165 | --i; 1166 | } 1167 | while (i >= 4) { 1168 | *(stbsp__uint32 *)s = 0x30303030; 1169 | s += 4; 1170 | i -= 4; 1171 | } 1172 | while (i) { 1173 | *s++ = '0'; 1174 | --i; 1175 | } 1176 | if ((stbsp__int32)(l + n) > pr) 1177 | l = pr - n; 1178 | i = l; 1179 | while (i) { 1180 | *s++ = *sn++; 1181 | --i; 1182 | } 1183 | tz = pr - (n + l); 1184 | cs = 1 + (3 << 24); // how many tens did we write (for commas below) 1185 | } else { 1186 | cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0; 1187 | if ((stbsp__uint32)dp >= l) { 1188 | // handle xxxx000*000.0 1189 | n = 0; 1190 | for (;;) { 1191 | if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { 1192 | cs = 0; 1193 | *s++ = stbsp__comma; 1194 | } else { 1195 | *s++ = sn[n]; 1196 | ++n; 1197 | if (n >= l) 1198 | break; 1199 | } 1200 | } 1201 | if (n < (stbsp__uint32)dp) { 1202 | n = dp - n; 1203 | if ((fl & STBSP__TRIPLET_COMMA) == 0) { 1204 | while (n) { 1205 | if ((((stbsp__uintptr)s) & 3) == 0) 1206 | break; 1207 | *s++ = '0'; 1208 | --n; 1209 | } 1210 | while (n >= 4) { 1211 | *(stbsp__uint32 *)s = 0x30303030; 1212 | s += 4; 1213 | n -= 4; 1214 | } 1215 | } 1216 | while (n) { 1217 | if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { 1218 | cs = 0; 1219 | *s++ = stbsp__comma; 1220 | } else { 1221 | *s++ = '0'; 1222 | --n; 1223 | } 1224 | } 1225 | } 1226 | cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens 1227 | if (pr) { 1228 | *s++ = stbsp__period; 1229 | tz = pr; 1230 | } 1231 | } else { 1232 | // handle xxxxx.xxxx000*000 1233 | n = 0; 1234 | for (;;) { 1235 | if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { 1236 | cs = 0; 1237 | *s++ = stbsp__comma; 1238 | } else { 1239 | *s++ = sn[n]; 1240 | ++n; 1241 | if (n >= (stbsp__uint32)dp) 1242 | break; 1243 | } 1244 | } 1245 | cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens 1246 | if (pr) 1247 | *s++ = stbsp__period; 1248 | if ((l - dp) > (stbsp__uint32)pr) 1249 | l = pr + dp; 1250 | while (n < l) { 1251 | *s++ = sn[n]; 1252 | ++n; 1253 | } 1254 | tz = pr - (l - dp); 1255 | } 1256 | } 1257 | pr = 0; 1258 | 1259 | // handle k,m,g,t 1260 | if (fl & STBSP__METRIC_SUFFIX) { 1261 | char idx; 1262 | idx = 1; 1263 | if (fl & STBSP__METRIC_NOSPACE) 1264 | idx = 0; 1265 | tail[0] = idx; 1266 | tail[1] = ' '; 1267 | { 1268 | if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'. 1269 | if (fl & STBSP__METRIC_1024) 1270 | tail[idx + 1] = "_KMGT"[fl >> 24]; 1271 | else 1272 | tail[idx + 1] = "_kMGT"[fl >> 24]; 1273 | idx++; 1274 | // If printing kibits and not in jedec, add the 'i'. 1275 | if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) { 1276 | tail[idx + 1] = 'i'; 1277 | idx++; 1278 | } 1279 | tail[0] = idx; 1280 | } 1281 | } 1282 | }; 1283 | 1284 | flt_lead: 1285 | // get the length that we copied 1286 | l = (stbsp__uint32)(s - (num + 64)); 1287 | s = num + 64; 1288 | goto scopy; 1289 | #endif 1290 | 1291 | case 'B': // upper binary 1292 | case 'b': // lower binary 1293 | h = (f[0] == 'B') ? hexu : hex; 1294 | lead[0] = 0; 1295 | if (fl & STBSP__LEADING_0X) { 1296 | lead[0] = 2; 1297 | lead[1] = '0'; 1298 | lead[2] = h[0xb]; 1299 | } 1300 | l = (8 << 4) | (1 << 8); 1301 | goto radixnum; 1302 | 1303 | case 'o': // octal 1304 | h = hexu; 1305 | lead[0] = 0; 1306 | if (fl & STBSP__LEADING_0X) { 1307 | lead[0] = 1; 1308 | lead[1] = '0'; 1309 | } 1310 | l = (3 << 4) | (3 << 8); 1311 | goto radixnum; 1312 | 1313 | case 'p': // pointer 1314 | fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0; 1315 | pr = sizeof(void *) * 2; 1316 | fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros 1317 | // fall through - to X 1318 | 1319 | case 'X': // upper hex 1320 | case 'x': // lower hex 1321 | h = (f[0] == 'X') ? hexu : hex; 1322 | l = (4 << 4) | (4 << 8); 1323 | lead[0] = 0; 1324 | if (fl & STBSP__LEADING_0X) { 1325 | lead[0] = 2; 1326 | lead[1] = '0'; 1327 | lead[2] = h[16]; 1328 | } 1329 | radixnum: 1330 | // get the number 1331 | if (fl & STBSP__INTMAX) 1332 | n64 = va_arg(va, stbsp__uint64); 1333 | else 1334 | n64 = va_arg(va, stbsp__uint32); 1335 | 1336 | s = num + STBSP__NUMSZ; 1337 | dp = 0; 1338 | // clear tail, and clear leading if value is zero 1339 | tail[0] = 0; 1340 | if (n64 == 0) { 1341 | lead[0] = 0; 1342 | if (pr == 0) { 1343 | l = 0; 1344 | cs = 0; 1345 | goto scopy; 1346 | } 1347 | } 1348 | // convert to string 1349 | for (;;) { 1350 | *--s = h[n64 & ((1 << (l >> 8)) - 1)]; 1351 | n64 >>= (l >> 8); 1352 | if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr))) 1353 | break; 1354 | if (fl & STBSP__TRIPLET_COMMA) { 1355 | ++l; 1356 | if ((l & 15) == ((l >> 4) & 15)) { 1357 | l &= ~15; 1358 | *--s = stbsp__comma; 1359 | } 1360 | } 1361 | }; 1362 | // get the tens and the comma pos 1363 | cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24); 1364 | // get the length that we copied 1365 | l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); 1366 | // copy it 1367 | goto scopy; 1368 | 1369 | case 'u': // unsigned 1370 | case 'i': 1371 | case 'd': // integer 1372 | // get the integer and abs it 1373 | if (fl & STBSP__INTMAX) { 1374 | stbsp__int64 i64 = va_arg(va, stbsp__int64); 1375 | n64 = (stbsp__uint64)i64; 1376 | if ((f[0] != 'u') && (i64 < 0)) { 1377 | n64 = (stbsp__uint64)-i64; 1378 | fl |= STBSP__NEGATIVE; 1379 | } 1380 | } else { 1381 | stbsp__int32 i = va_arg(va, stbsp__int32); 1382 | n64 = (stbsp__uint32)i; 1383 | if ((f[0] != 'u') && (i < 0)) { 1384 | n64 = (stbsp__uint32)-i; 1385 | fl |= STBSP__NEGATIVE; 1386 | } 1387 | } 1388 | 1389 | #ifndef STB_SPRINTF_NOFLOAT 1390 | if (fl & STBSP__METRIC_SUFFIX) { 1391 | if (n64 < 1024) 1392 | pr = 0; 1393 | else if (pr == -1) 1394 | pr = 1; 1395 | fv = (double)(stbsp__int64)n64; 1396 | goto doafloat; 1397 | } 1398 | #endif 1399 | 1400 | // convert to string 1401 | s = num + STBSP__NUMSZ; 1402 | l = 0; 1403 | 1404 | for (;;) { 1405 | // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators) 1406 | char *o = s - 8; 1407 | if (n64 >= 100000000) { 1408 | n = (stbsp__uint32)(n64 % 100000000); 1409 | n64 /= 100000000; 1410 | } else { 1411 | n = (stbsp__uint32)n64; 1412 | n64 = 0; 1413 | } 1414 | if ((fl & STBSP__TRIPLET_COMMA) == 0) { 1415 | do { 1416 | s -= 2; 1417 | *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; 1418 | n /= 100; 1419 | } while (n); 1420 | } 1421 | while (n) { 1422 | if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { 1423 | l = 0; 1424 | *--s = stbsp__comma; 1425 | --o; 1426 | } else { 1427 | *--s = (char)(n % 10) + '0'; 1428 | n /= 10; 1429 | } 1430 | } 1431 | if (n64 == 0) { 1432 | if ((s[0] == '0') && (s != (num + STBSP__NUMSZ))) 1433 | ++s; 1434 | break; 1435 | } 1436 | while (s != o) 1437 | if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { 1438 | l = 0; 1439 | *--s = stbsp__comma; 1440 | --o; 1441 | } else { 1442 | *--s = '0'; 1443 | } 1444 | } 1445 | 1446 | tail[0] = 0; 1447 | stbsp__lead_sign(fl, lead); 1448 | 1449 | // get the length that we copied 1450 | l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); 1451 | if (l == 0) { 1452 | *--s = '0'; 1453 | l = 1; 1454 | } 1455 | cs = l + (3 << 24); 1456 | if (pr < 0) 1457 | pr = 0; 1458 | 1459 | scopy: 1460 | // get fw=leading/trailing space, pr=leading zeros 1461 | if (pr < (stbsp__int32)l) 1462 | pr = l; 1463 | n = pr + lead[0] + tail[0] + tz; 1464 | if (fw < (stbsp__int32)n) 1465 | fw = n; 1466 | fw -= n; 1467 | pr -= l; 1468 | 1469 | // handle right justify and leading zeros 1470 | if ((fl & STBSP__LEFTJUST) == 0) { 1471 | if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr 1472 | { 1473 | pr = (fw > pr) ? fw : pr; 1474 | fw = 0; 1475 | } else { 1476 | fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas 1477 | } 1478 | } 1479 | 1480 | // copy the spaces and/or zeros 1481 | if (fw + pr) { 1482 | stbsp__int32 i; 1483 | stbsp__uint32 c; 1484 | 1485 | // copy leading spaces (or when doing %8.4d stuff) 1486 | if ((fl & STBSP__LEFTJUST) == 0) 1487 | while (fw > 0) { 1488 | stbsp__cb_buf_clamp(i, fw); 1489 | fw -= i; 1490 | while (i) { 1491 | if ((((stbsp__uintptr)bf) & 3) == 0) 1492 | break; 1493 | *bf++ = ' '; 1494 | --i; 1495 | } 1496 | while (i >= 4) { 1497 | *(stbsp__uint32 *)bf = 0x20202020; 1498 | bf += 4; 1499 | i -= 4; 1500 | } 1501 | while (i) { 1502 | *bf++ = ' '; 1503 | --i; 1504 | } 1505 | stbsp__chk_cb_buf(1); 1506 | } 1507 | 1508 | // copy leader 1509 | sn = lead + 1; 1510 | while (lead[0]) { 1511 | stbsp__cb_buf_clamp(i, lead[0]); 1512 | lead[0] -= (char)i; 1513 | while (i) { 1514 | *bf++ = *sn++; 1515 | --i; 1516 | } 1517 | stbsp__chk_cb_buf(1); 1518 | } 1519 | 1520 | // copy leading zeros 1521 | c = cs >> 24; 1522 | cs &= 0xffffff; 1523 | cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0; 1524 | while (pr > 0) { 1525 | stbsp__cb_buf_clamp(i, pr); 1526 | pr -= i; 1527 | if ((fl & STBSP__TRIPLET_COMMA) == 0) { 1528 | while (i) { 1529 | if ((((stbsp__uintptr)bf) & 3) == 0) 1530 | break; 1531 | *bf++ = '0'; 1532 | --i; 1533 | } 1534 | while (i >= 4) { 1535 | *(stbsp__uint32 *)bf = 0x30303030; 1536 | bf += 4; 1537 | i -= 4; 1538 | } 1539 | } 1540 | while (i) { 1541 | if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) { 1542 | cs = 0; 1543 | *bf++ = stbsp__comma; 1544 | } else 1545 | *bf++ = '0'; 1546 | --i; 1547 | } 1548 | stbsp__chk_cb_buf(1); 1549 | } 1550 | } 1551 | 1552 | // copy leader if there is still one 1553 | sn = lead + 1; 1554 | while (lead[0]) { 1555 | stbsp__int32 i; 1556 | stbsp__cb_buf_clamp(i, lead[0]); 1557 | lead[0] -= (char)i; 1558 | while (i) { 1559 | *bf++ = *sn++; 1560 | --i; 1561 | } 1562 | stbsp__chk_cb_buf(1); 1563 | } 1564 | 1565 | // copy the string 1566 | n = l; 1567 | while (n) { 1568 | stbsp__int32 i; 1569 | stbsp__cb_buf_clamp(i, n); 1570 | n -= i; 1571 | STBSP__UNALIGNED(while (i >= 4) { 1572 | *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s; 1573 | bf += 4; 1574 | s += 4; 1575 | i -= 4; 1576 | }) 1577 | while (i) { 1578 | *bf++ = *s++; 1579 | --i; 1580 | } 1581 | stbsp__chk_cb_buf(1); 1582 | } 1583 | 1584 | // copy trailing zeros 1585 | while (tz) { 1586 | stbsp__int32 i; 1587 | stbsp__cb_buf_clamp(i, tz); 1588 | tz -= i; 1589 | while (i) { 1590 | if ((((stbsp__uintptr)bf) & 3) == 0) 1591 | break; 1592 | *bf++ = '0'; 1593 | --i; 1594 | } 1595 | while (i >= 4) { 1596 | *(stbsp__uint32 *)bf = 0x30303030; 1597 | bf += 4; 1598 | i -= 4; 1599 | } 1600 | while (i) { 1601 | *bf++ = '0'; 1602 | --i; 1603 | } 1604 | stbsp__chk_cb_buf(1); 1605 | } 1606 | 1607 | // copy tail if there is one 1608 | sn = tail + 1; 1609 | while (tail[0]) { 1610 | stbsp__int32 i; 1611 | stbsp__cb_buf_clamp(i, tail[0]); 1612 | tail[0] -= (char)i; 1613 | while (i) { 1614 | *bf++ = *sn++; 1615 | --i; 1616 | } 1617 | stbsp__chk_cb_buf(1); 1618 | } 1619 | 1620 | // handle the left justify 1621 | if (fl & STBSP__LEFTJUST) 1622 | if (fw > 0) { 1623 | while (fw) { 1624 | stbsp__int32 i; 1625 | stbsp__cb_buf_clamp(i, fw); 1626 | fw -= i; 1627 | while (i) { 1628 | if ((((stbsp__uintptr)bf) & 3) == 0) 1629 | break; 1630 | *bf++ = ' '; 1631 | --i; 1632 | } 1633 | while (i >= 4) { 1634 | *(stbsp__uint32 *)bf = 0x20202020; 1635 | bf += 4; 1636 | i -= 4; 1637 | } 1638 | while (i--) 1639 | *bf++ = ' '; 1640 | stbsp__chk_cb_buf(1); 1641 | } 1642 | } 1643 | 1644 | if(must_free) 1645 | free(s); 1646 | break; 1647 | 1648 | default: // unknown, just copy code 1649 | s = num + STBSP__NUMSZ - 1; 1650 | *s = f[0]; 1651 | l = 1; 1652 | fw = fl = 0; 1653 | lead[0] = 0; 1654 | tail[0] = 0; 1655 | pr = 0; 1656 | dp = 0; 1657 | cs = 0; 1658 | goto scopy; 1659 | } 1660 | ++f; 1661 | } 1662 | endfmt: 1663 | 1664 | if (!callback) 1665 | *bf = 0; 1666 | else 1667 | stbsp__flush_cb(); 1668 | 1669 | done: 1670 | return tlen + (int)(bf - buf); 1671 | } 1672 | 1673 | xj_value *xj_vdecodef(xj_alloc *alloc, xj_error *error, const char *fmt, va_list va) 1674 | { 1675 | va_list va2; 1676 | va_copy(va2, va); 1677 | 1678 | char maybe[512]; 1679 | int n = xj_vsnprintf(maybe, sizeof(maybe), fmt, va); 1680 | 1681 | char *buff; 1682 | if(n < (int) sizeof(maybe)) 1683 | if(n < 0) 1684 | buff = "Bad format"; 1685 | else 1686 | buff = maybe; 1687 | else 1688 | { 1689 | buff = malloc(n+1); 1690 | if(buff == NULL) 1691 | { 1692 | xj_report(error, "No memory"); 1693 | return NULL; 1694 | } 1695 | 1696 | int k = xj_vsnprintf(buff, n+1, fmt, va2); 1697 | assert(n == k); 1698 | } 1699 | 1700 | xj_value *res = xj_decode(buff, n, alloc, error); 1701 | 1702 | va_end(va2); 1703 | if(buff != maybe) 1704 | free(buff); 1705 | 1706 | return res; 1707 | } 1708 | 1709 | xj_value *xj_decodef(xj_alloc *alloc, xj_error *error, const char *fmt, ...) 1710 | { 1711 | va_list va; 1712 | va_start(va, fmt); 1713 | xj_value *res = xj_vdecodef(alloc, error, fmt, va); 1714 | va_end(va); 1715 | return res; 1716 | } 1717 | -------------------------------------------------------------------------------- /src/xj_snprintf.h: -------------------------------------------------------------------------------- 1 | #ifndef XJ_SNPRINTF_H 2 | #define XJ_SNPRINTF_H 3 | #include 4 | #include "xjson.h" 5 | int xj_vsnprintf(char *buf, int count, const char *fmt, va_list va); 6 | xj_value *xj_vdecodef(xj_alloc *alloc, xj_error *error, const char *fmt, va_list va); 7 | xj_value *xj_decodef(xj_alloc *alloc, xj_error *error, const char *fmt, ...); 8 | #endif -------------------------------------------------------------------------------- /src/xjson.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "xjson.h" 9 | 10 | #define XJ_MAX_DEPTH 128 11 | #define XJ_MAX_EXPNT 10 12 | 13 | typedef struct chunk_t chunk_t; 14 | 15 | /* Symbol: 16 | * chunk_t 17 | * 18 | * Description: 19 | * This is the structure that implements a pool of 20 | * an [xj_alloc] allocator. It's used for both the 21 | * main pool and any extension pool. It's basically 22 | * just a chunk of memory with a pointer before it 23 | * to make a linked list of chunks. 24 | * 25 | * Fields: 26 | * prev: Pointer to the previously allocated chunk. 27 | * 28 | * body: The actual chunk of memory. This hold the 29 | * memory allocations. It's important to make 30 | * sure that this field is properly aligned 31 | * so that the first allocation is also aligned. 32 | */ 33 | struct chunk_t { 34 | chunk_t *prev; 35 | _Alignas(void*) char body[]; 36 | }; 37 | 38 | /* Symbol: 39 | * xj_alloc 40 | * 41 | * Description: 42 | * This is the structure that holds the state of a 43 | * bump-pointer allocator. 44 | * 45 | * A bump-pointer allocator is the simplest form of 46 | * allocation scheme. It's basically a big pool of 47 | * memory that's linearly filled up with allocations. 48 | * Since the allocations may be of different sizes, 49 | * there's no way of freeing previous allocations, 50 | * so all allocations must be freed at the same time 51 | * with the whole pool. 52 | * 53 | * A bump-pointer allocator is good for JSON objects 54 | * because they're made up by lots of nodes with the 55 | * same lifetime. 56 | * 57 | * This implementation allows a dynamic growth of the 58 | * memory it holds by appending extension pools. It's 59 | * both possible to specify the size of the main pool 60 | * and the extension pools on instanciation of the 61 | * allocator (all extension pools will have the same 62 | * size which may be different to the main pool's size). 63 | * 64 | * The first pool is allocated along with the allocator 65 | * object. By using [xj_alloc_using], the user provides 66 | * a memory region that the allocator will use to instanciate 67 | * itself. This memory region must both hold the allocator 68 | * and the first chunk. Since this memory was provided 69 | * by the user, he must also be able to specify a way 70 | * to free the provided chunk that holds allocator and 71 | * pool. 72 | * 73 | * Fields: 74 | * free: An user-provided freeing callback that, if not 75 | * NULL, is called on the allocator pointer (xj_alloc*). 76 | * This is useful when it's the user to provide 77 | * the allocator with memory, by instanciating it 78 | * using [xj_alloc_using]. 79 | * 80 | * tail: The currently used pool. At first this will refer 81 | * to the main pool. When extensions are added, this 82 | * refers to the last extension. 83 | * All chunks are linked together using their [prev] 84 | * pointer in allocation order, therefore the [tail] 85 | * pointer is the tail of the linked list of all chunks. 86 | * 87 | * tail_used: The amount of bytes used of the currently 88 | * used pool (the [tail]). Allocation occur 89 | * by incrementing this offset in the pool. 90 | * 91 | * tail_size: The total size of the tail pool. This is 92 | * equal to the main pool's size when there 93 | * are no extension pools and it's equal to 94 | * the extensions size when there are. 95 | * 96 | * ext_size: The size of an extension pool. 97 | */ 98 | struct xj_alloc { 99 | void (*free)(void*); 100 | chunk_t *tail; 101 | int tail_used; 102 | int tail_size; 103 | int ext_size; 104 | }; 105 | 106 | /* Symbol: 107 | * xj_alloc_new 108 | * 109 | * Description: 110 | * Instanciate an allocator. 111 | * 112 | * Arguments: 113 | * size: The size of the main memory pool. 114 | * 115 | * ext: The size of the pools allocated if the 116 | * main pool isn't enough. By specifying 0, 117 | * you're telling the allocator to only use 118 | * the main pool and fail if it's not enough. 119 | * 120 | * Returns: 121 | * The pointer to an allocator instance if all went 122 | * well or NULL. 123 | * 124 | * Notes: 125 | * The returned pointer, if not NULL, must be 126 | * deallocated using [xj_alloc_del]. 127 | */ 128 | xj_alloc *xj_alloc_new(int size, int ext) 129 | { 130 | assert(size >= 0 && ext >= 0); 131 | 132 | int allocated = sizeof(xj_alloc) + sizeof(chunk_t) + size; 133 | void *temp = malloc(allocated); 134 | 135 | if(temp == NULL) 136 | return NULL; 137 | 138 | return xj_alloc_using(temp, allocated, ext, free); 139 | } 140 | 141 | /* Symbol: 142 | * xj_alloc_using 143 | * 144 | * Description: 145 | * Instanciate an allocator by telling by 146 | * providing it with the main pool's memory. 147 | * 148 | * Arguments: 149 | * mem: The the pointer to the main memory pool. 150 | * It can't be NULL. 151 | * 152 | * size: The size of the region referred by [mem] 153 | * in bytes. It can't be negative. 154 | * 155 | * ext: The size of any extension pool allocated 156 | * if the main pool isn't enough. 157 | * 158 | * free: The freeing routine that needs to be 159 | * called on [mem] when the allocator is 160 | * destroyed using [xj_alloc_del]. This 161 | * is only called on the [mem] pointer and 162 | * not on any additional extension pool. 163 | * 164 | * Returns: 165 | * The pointer to an allocator instance if all went 166 | * well or NULL. 167 | * 168 | * Notes: 169 | * The returned pointer, if not NULL, must be 170 | * deallocated using [xj_alloc_del]. 171 | * 172 | * The [mem] pool is also used to store the allocator's 173 | * header, so if it's not big enough, this function will 174 | * fail. 175 | */ 176 | xj_alloc *xj_alloc_using(void *mem, int size, int ext, void (*free)(void*)) 177 | { 178 | assert(mem != NULL && size >= 0 && ext >= 0); 179 | 180 | if((unsigned int) size < sizeof(xj_alloc) + sizeof(chunk_t)) 181 | return NULL; 182 | 183 | xj_alloc *alloc = mem; 184 | alloc->free = free; 185 | alloc->tail = (chunk_t*) (alloc + 1); 186 | alloc->tail->prev = NULL; 187 | alloc->tail_used = 0; 188 | alloc->tail_size = size - (sizeof(xj_alloc) + sizeof(chunk_t)); 189 | alloc->ext_size = ext; 190 | return alloc; 191 | } 192 | 193 | /* Symbol: 194 | * xj_alloc_del 195 | * 196 | * Description: 197 | * Free an allocator instance. 198 | */ 199 | void xj_alloc_del(xj_alloc *alloc) 200 | { 201 | // Free all of the allocator's chunks, 202 | // with exception of the first one, 203 | // which is allocated with the allocator's 204 | // header and must be deallocated with 205 | // the user-provided callback. 206 | chunk_t *curr = alloc->tail; 207 | while(curr->prev != NULL) 208 | { 209 | chunk_t *prev = curr->prev; 210 | free(curr); 211 | curr = prev; 212 | } 213 | 214 | // Free the allocator header and first 215 | // chunk. 216 | if(alloc->free != NULL) 217 | alloc->free(alloc); 218 | } 219 | 220 | /* Symbol: 221 | * next_aligned 222 | * 223 | * Description: 224 | * If the argument is multiple of 8, then 225 | * the argument is returned, else the first 226 | * multiple of 8 higher than the argument is 227 | * returned. 228 | */ 229 | unsigned long long next_aligned(unsigned long long n) 230 | { 231 | // NOTE: For powers of 2, the modulo operator 232 | // is equivalent to and & operation where 233 | // the right operand if the power of 2 234 | // minus 1: 235 | // 236 | // x % (2^i) === x & (2^i - 1) 237 | // 238 | // usually & are faster than %'s so if it's 239 | // known that the divisor (the right argument) 240 | // is a power of 2, it's preferred to use the 241 | // &. 242 | // 243 | // (n & 7) is equivalent to (n % 8), to it's the 244 | // remainder of the division by 8, therefore an 245 | // unaligned [n] will have a non-zero (n & 7). 246 | // If the [n] is aligned to 8, then we return 8 247 | // (the case after the :). If there's a remainder 248 | // then we need to find the first aligned offset 249 | // after [n], which can be calculated by removing 250 | // the remainder (n & ~7) and adding 8. 251 | return (n & 7) ? (n & ~7) + 8 : n; 252 | } 253 | 254 | void *xj_bpalloc(xj_alloc *alloc, int size) 255 | { 256 | assert(size >= 0); 257 | 258 | // Make sure the returned memory is aligned 259 | // to 8 bytes boundaries, which is assumed 260 | // to be the a valid alignment for anything. 261 | alloc->tail_used = next_aligned(alloc->tail_used); 262 | 263 | // If there's not enough memory in the 264 | // current chunk, allocate an extension. 265 | if(alloc->tail_used + size > alloc->tail_size) 266 | { 267 | // When the user instanciated the allocator, 268 | // he specified an extension size of 0, which 269 | // means that he doesn't want the allocator 270 | // to grow. Therefore, we just wen out of 271 | // memory! 272 | if(alloc->ext_size == 0) 273 | return NULL; 274 | 275 | // Either allocate a chunk of the size specified 276 | // by the user during the instanciation of the 277 | // allocator, or a bigger one if the current 278 | // allocation wouldn't fit in it. 279 | int new_chunk_size = alloc->ext_size; 280 | 281 | if(new_chunk_size < size) 282 | new_chunk_size = size; 283 | 284 | chunk_t *chunk = malloc(sizeof(chunk_t) + new_chunk_size); 285 | 286 | if(chunk == NULL) 287 | return NULL; 288 | 289 | chunk->prev = alloc->tail; 290 | alloc->tail = chunk; 291 | alloc->tail_used = 0; 292 | alloc->tail_size = new_chunk_size; 293 | } 294 | 295 | // Do the bump-pointer's bumping of the pointer. 296 | void *addr = alloc->tail->body + alloc->tail_used; 297 | 298 | alloc->tail_used += size; 299 | 300 | return addr; 301 | } 302 | 303 | void xj_preport(xj_error *error, const char *src, int off, const char *fmt, ...) 304 | { 305 | if(error != NULL) 306 | { 307 | int row = -1, 308 | col = -1; 309 | if(src != NULL) 310 | { 311 | // Calculate column and row given 312 | // the source string and an index 313 | // in it. 314 | assert(off >= 0); 315 | col = 0; 316 | row = 0; 317 | int i = 0; 318 | while(i < off) 319 | { 320 | if(src[i] == '\n') 321 | { 322 | row += 1; 323 | col = 0; 324 | } 325 | else 326 | col += 1; 327 | i += 1; 328 | } 329 | } 330 | 331 | int k; 332 | va_list va; 333 | va_start(va, fmt); 334 | k = vsnprintf(error->message, sizeof(error->message), fmt, va); 335 | va_end(va); 336 | 337 | assert(k >= 0); 338 | 339 | error->truncated = (k >= (int) sizeof(error->message)-1); 340 | error->occurred = 1; 341 | error->off = off; 342 | error->row = row; 343 | error->col = col; 344 | } 345 | } 346 | 347 | // Create an [xj_value] that represents the [null] JSON value. 348 | xj_value *xj_value_null(xj_alloc *alloc, xj_error *error) 349 | { 350 | xj_value *x = xj_bpalloc(alloc, sizeof(xj_value)); 351 | if(x == NULL) 352 | xj_report(error, "Out of memory"); 353 | else 354 | { 355 | x->type = XJ_NULL; 356 | x->size = -1; 357 | x->next = NULL; 358 | x->key = NULL; 359 | } 360 | return x; 361 | } 362 | 363 | // Create an [xj_value] that represents a boolean value. 364 | xj_value *xj_value_bool(xj_bool val, xj_alloc *alloc, xj_error *error) 365 | { 366 | xj_value *x = xj_value_null(alloc, error); 367 | if(x != NULL) 368 | { 369 | x->type = XJ_BOOL; 370 | x->as_bool = val; 371 | } 372 | return x; 373 | } 374 | 375 | xj_value *xj_value_int(xj_i64 val, xj_alloc *alloc, xj_error *error) 376 | { 377 | xj_value *x = xj_value_null(alloc, error); 378 | if(x != NULL) 379 | { 380 | x->type = XJ_INT; 381 | x->as_int = val; 382 | } 383 | return x; 384 | } 385 | 386 | xj_value *xj_value_float(xj_f64 val, xj_alloc *alloc, xj_error *error) 387 | { 388 | xj_value *x = xj_value_null(alloc, error); 389 | if(x != NULL) 390 | { 391 | x->type = XJ_FLOAT; 392 | x->as_float = val; 393 | } 394 | return x; 395 | } 396 | 397 | xj_value *xj_value_string(const char *str, int len, xj_alloc *alloc, xj_error *error) 398 | { 399 | if(str == NULL) str = ""; 400 | if(len < 0) len = strlen(str); 401 | 402 | char *copy = xj_strdup(str, len, alloc, error); 403 | 404 | if(copy == NULL) 405 | return NULL; 406 | 407 | xj_value *x = xj_value_null(alloc, error); 408 | if(x != NULL) 409 | { 410 | x->type = XJ_STRING; 411 | x->size = len; 412 | x->as_string = copy; 413 | } 414 | return x; 415 | } 416 | 417 | xj_value *xj_value_array__nocheck(xj_value *head, int count, xj_alloc *alloc, xj_error *error) 418 | { 419 | if(count < 0) 420 | { 421 | count = 0; 422 | xj_value *curs = head; 423 | while(curs != NULL) 424 | { 425 | count += 1; 426 | curs = curs->next; 427 | } 428 | } 429 | 430 | xj_value *x = xj_value_null(alloc, error); 431 | if(x != NULL) 432 | { 433 | x->type = XJ_ARRAY; 434 | x->size = count; 435 | x->as_array = head; 436 | } 437 | return x; 438 | } 439 | 440 | xj_value *xj_value_array(xj_value *head, xj_alloc *alloc, xj_error *error) 441 | { 442 | int count = 0; 443 | xj_value *curs = head; 444 | while(curs != NULL) 445 | { 446 | if(curs->key != NULL) 447 | { 448 | /* Array child has a 449 | key associated to it? */ 450 | return NULL; 451 | } 452 | count += 1; 453 | curs = curs->next; 454 | } 455 | 456 | return xj_value_array__nocheck(head, count, alloc, error); 457 | } 458 | 459 | xj_value *xj_value_object__nocheck(xj_value *head, int count, xj_alloc *alloc, xj_error *error) 460 | { 461 | if(count < 0) 462 | { 463 | count = 0; 464 | xj_value *curs = head; 465 | while(curs != NULL) 466 | { 467 | count += 1; 468 | curs = curs->next; 469 | } 470 | } 471 | 472 | xj_value *x = xj_value_null(alloc, error); 473 | if(x != NULL) 474 | { 475 | x->type = XJ_OBJECT; 476 | x->size = count; 477 | x->as_object = head; 478 | } 479 | return x; 480 | } 481 | 482 | xj_value *xj_value_object(xj_value *head, xj_alloc *alloc, xj_error *error) 483 | { 484 | int count = 0; 485 | xj_value *curs = head; 486 | while(curs != NULL) 487 | { 488 | if(curs->key == NULL) 489 | { 490 | /* Object child has no 491 | key associated to it! */ 492 | return NULL; 493 | } 494 | 495 | xj_value *curs2 = head; 496 | while(curs2 != curs) 497 | { 498 | if(!strcmp(curs->key, curs2->key)) 499 | { 500 | /* Duplicate key. */ 501 | return NULL; 502 | } 503 | curs2 = curs2->next; 504 | } 505 | 506 | count += 1; 507 | curs = curs->next; 508 | } 509 | 510 | return xj_value_object__nocheck(head, count, alloc, error); 511 | } 512 | 513 | _Bool xj_array_append(xj_value *array, xj_value *child, 514 | xj_error *error) 515 | { 516 | assert(array != NULL); 517 | 518 | if(child != NULL) 519 | { 520 | if(child->key != NULL) 521 | { 522 | xj_report(error, "Array child can't have a key"); 523 | return 0; 524 | } 525 | 526 | if(child->next != NULL) 527 | { 528 | xj_report(error, "Array child can't be in a list"); 529 | return 0; 530 | } 531 | 532 | // Find the end of the array 533 | xj_value **tail; 534 | 535 | if(array->as_array == NULL) 536 | // The tail is the base node pointer 537 | tail = &array->as_array; 538 | else 539 | { 540 | // Scan the list 'til the end 541 | xj_value *curs = array->as_array; 542 | while(curs->next != NULL) 543 | curs = curs->next; 544 | tail = &curs; 545 | } 546 | 547 | *tail = child; 548 | } 549 | 550 | return 1; 551 | } 552 | 553 | char *xj_strdup(const char *str, int len, xj_alloc *alloc, xj_error *error) 554 | { 555 | assert(str != NULL); 556 | 557 | if(len < 0) 558 | len = strlen(str); 559 | 560 | char *copy = xj_bpalloc(alloc, len+1); 561 | 562 | if(copy == NULL) 563 | xj_report(error, "Out of memory"); 564 | else 565 | { 566 | memcpy(copy, str, len); 567 | copy[len] = '\0'; 568 | } 569 | return copy; 570 | } 571 | 572 | typedef struct { 573 | const char *str; 574 | int i, len, depth; 575 | xj_alloc *alloc; 576 | xj_error *error; 577 | } context_t; 578 | 579 | /* Symbol: 580 | * xutf8_sequence_from_utf32_codepoint 581 | * 582 | * Description: 583 | * Transform a UTF-32 encoded codepoint to a UTF-8 encoded byte sequence. 584 | * 585 | * Arguments: 586 | * utf8_data: Refers to the location of the UTF-8 sequence of bytes. 587 | * 588 | * nbytes: The maximum number of bytes that can be written to [utf8_data]. 589 | * It can't be negative. 590 | * 591 | * utf32_code: UTF-32 codepoint that needs to be converted. 592 | * 593 | * Returns: 594 | * If [utf32_code] is valid UTF-32 and the provided buffer is big enough, 595 | * the UTF-8 equivalent sequence is stored in [utf8_data]. No more than 596 | * [nbytes] are ever written. If one of those conitions isn't true, -1 is 597 | * returned. 598 | * 599 | * Notes: 600 | * This was taken by the cozis/xUTF8 library on github.com 601 | */ 602 | static int xutf8_sequence_from_utf32_codepoint(char *utf8_data, int nbytes, uint32_t utf32_code) 603 | { 604 | if(utf32_code < 128) 605 | { 606 | if(nbytes < 1) 607 | return -1; 608 | 609 | utf8_data[0] = utf32_code; 610 | return 1; 611 | } 612 | 613 | if(utf32_code < 2048) 614 | { 615 | if(nbytes < 2) 616 | return -1; 617 | 618 | utf8_data[0] = 0xc0 | (utf32_code >> 6); 619 | utf8_data[1] = 0x80 | (utf32_code & 0x3f); 620 | return 2; 621 | } 622 | 623 | if(utf32_code < 65536) 624 | { 625 | if(nbytes < 3) 626 | return -1; 627 | 628 | utf8_data[0] = 0xe0 | (utf32_code >> 12); 629 | utf8_data[1] = 0x80 | ((utf32_code >> 6) & 0x3f); 630 | utf8_data[2] = 0x80 | (utf32_code & 0x3f); 631 | return 3; 632 | } 633 | 634 | if(utf32_code <= 0x10ffff) 635 | { 636 | if(nbytes < 4) 637 | return -1; 638 | 639 | utf8_data[0] = 0xf0 | (utf32_code >> 18); 640 | utf8_data[1] = 0x80 | ((utf32_code >> 12) & 0x3f); 641 | utf8_data[2] = 0x80 | ((utf32_code >> 6) & 0x3f); 642 | utf8_data[3] = 0x80 | (utf32_code & 0x3f); 643 | return 4; 644 | } 645 | 646 | // Code is out of range for UTF-8. 647 | return -1; 648 | } 649 | 650 | /* Symbol 651 | * xutf8_sequence_to_utf32_codepoint 652 | * 653 | * Description 654 | * Transform a UTF-8 encoded byte sequence pointed by `utf8_data` 655 | * into a UTF-32 encoded codepoint. 656 | * 657 | * Arguments: 658 | * utf8_data: Refers to the location of the UTF-8 byte sequence. 659 | * 660 | * nbytes: The maximum number of bytes that can be read after 661 | * [utf8_data]. It can't be negative. 662 | * 663 | * utf32_code: Location where the encoded UTF-32 code will be stored. 664 | * It may be NULL, in which case the value is evaluated 665 | * and then thrown away. 666 | * 667 | * Returns: 668 | * The codepoint is returned through the output parameter `utf32_code`. 669 | * The returned value is the number of bytes of the UTF-8 sequence that 670 | * were scanned to encode the UTF-32 code, or -1 if the UTF-8 sequence 671 | * is invalid. 672 | * 673 | * Notes: 674 | * By calling this function with a NULL [utf32_code], you can check the 675 | * validity of a UTF-8 sequence. 676 | * 677 | * The [nbytes] argument has no relation to the UTF-8 byte count sequence. 678 | * You may think about this argument as the "raw" string length (the one 679 | * [strlen] whould return if [utf8_data] were zero-terminated). 680 | * 681 | * This was taken by the cozis/xUTF8 library on github.com 682 | */ 683 | static int xutf8_sequence_to_utf32_codepoint(const char *utf8_data, int nbytes, uint32_t *utf32_code) 684 | { 685 | assert(utf8_data != NULL); 686 | assert(nbytes >= 0); 687 | 688 | uint32_t dummy; 689 | if(utf32_code == NULL) 690 | utf32_code = &dummy; 691 | 692 | if(nbytes == 0) 693 | return -1; 694 | 695 | if(utf8_data[0] & 0x80) 696 | { 697 | // May be UTF-8. 698 | 699 | if((unsigned char) utf8_data[0] >= 0xF0) 700 | { 701 | // 4 bytes. 702 | // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 703 | 704 | if(nbytes < 4) 705 | return -1; 706 | 707 | uint32_t temp 708 | = (((uint32_t) utf8_data[0] & 0x07) << 18) 709 | | (((uint32_t) utf8_data[1] & 0x3f) << 12) 710 | | (((uint32_t) utf8_data[2] & 0x3f) << 6) 711 | | (((uint32_t) utf8_data[3] & 0x3f)); 712 | 713 | if(temp > 0x10ffff) 714 | return -1; 715 | 716 | *utf32_code = temp; 717 | return 4; 718 | } 719 | 720 | if((unsigned char) utf8_data[0] >= 0xE0) 721 | { 722 | // 3 bytes. 723 | // 1110xxxx 10xxxxxx 10xxxxxx 724 | 725 | if(nbytes < 3) 726 | return -1; 727 | 728 | uint32_t temp 729 | = (((uint32_t) utf8_data[0] & 0x0f) << 12) 730 | | (((uint32_t) utf8_data[1] & 0x3f) << 6) 731 | | (((uint32_t) utf8_data[2] & 0x3f)); 732 | 733 | if(temp > 0x10ffff) 734 | return -1; 735 | 736 | *utf32_code = temp; 737 | return 3; 738 | } 739 | 740 | if((unsigned char) utf8_data[0] >= 0xC0) 741 | { 742 | // 2 bytes. 743 | // 110xxxxx 10xxxxxx 744 | 745 | if(nbytes < 2) 746 | return -1; 747 | 748 | *utf32_code 749 | = (((uint32_t) utf8_data[0] & 0x1f) << 6) 750 | | (((uint32_t) utf8_data[1] & 0x3f)); 751 | 752 | assert(*utf32_code <= 0x10ffff); 753 | return 2; 754 | } 755 | 756 | // 1 byte 757 | // 10xxxxxx 758 | *utf32_code = (uint32_t) utf8_data[0] & 0x3f; 759 | return 1; 760 | } 761 | 762 | // It's ASCII 763 | // 0xxxxxxx 764 | 765 | *utf32_code = (uint32_t) utf8_data[0]; 766 | return 1; 767 | } 768 | 769 | static _Bool parse_XXXX_after_u(context_t *ctx, uint16_t *res) 770 | { 771 | const char *bytes = ctx->str + ctx->i; 772 | 773 | if(ctx->i+3 >= ctx->len 774 | || !isxdigit(bytes[0]) || !isxdigit(bytes[1]) 775 | || !isxdigit(bytes[2]) || !isxdigit(bytes[3])) 776 | { 777 | xj_preport(ctx->error, ctx->str, ctx->i, 778 | "The \\u specifier expects 4 hex digits after it"); 779 | return 0; 780 | } 781 | 782 | ctx->i += 4; 783 | 784 | uint16_t rune = 0; 785 | 786 | for(int i = 0; i < 4; i += 1) 787 | { 788 | char c = tolower(bytes[i]); 789 | 790 | if(isdigit(c)) 791 | c = c - '0'; 792 | else 793 | c = c - 'a' + 10; 794 | 795 | rune |= c << ((3 - i) * 4); 796 | } 797 | 798 | if(res) 799 | *res = rune; 800 | 801 | return 1; 802 | } 803 | 804 | typedef struct { 805 | char *buffer; 806 | int size, capacity; 807 | char maybe[256]; 808 | } string_parsing_context_t; 809 | 810 | static _Bool spc_append(string_parsing_context_t *spc, const char *str, int len) 811 | { 812 | if(spc->size + len > spc->capacity) 813 | { 814 | // Grow the buffer. 815 | 816 | int new_capacity = spc->capacity * 2; 817 | 818 | if(new_capacity < (spc->size + len)) 819 | new_capacity = (spc->size + len); 820 | 821 | char *temp; 822 | 823 | if(spc->maybe == spc->buffer) 824 | { 825 | temp = malloc(new_capacity); 826 | 827 | if(temp == NULL) 828 | return 0; 829 | 830 | memcpy(temp, spc->buffer, spc->size); 831 | } 832 | else 833 | { 834 | temp = realloc(spc->buffer, new_capacity); 835 | 836 | if(temp == NULL) 837 | return 0; 838 | } 839 | 840 | spc->buffer = temp; 841 | spc->capacity = new_capacity; 842 | } 843 | 844 | memcpy(spc->buffer + spc->size, str, len); 845 | spc->size += len; 846 | return 1; 847 | } 848 | 849 | static void spc_free(string_parsing_context_t *spc) 850 | { 851 | if(spc->maybe != spc->buffer) 852 | free(spc->buffer); 853 | } 854 | 855 | static void *parse_string(context_t *ctx, _Bool raw) 856 | { 857 | // This is probably the hottest function of the 858 | // parser. JSON documents contain a lot of strings. 859 | // The string is scanned and copied into a temporary 860 | // buffer, then the buffer is transformed into 861 | // the final form that will be returned. 862 | 863 | assert(ctx->i < ctx->len && ctx->str[ctx->i] == '"'); 864 | 865 | string_parsing_context_t spc; 866 | { 867 | spc.buffer = spc.maybe; 868 | spc.size = 0; 869 | spc.capacity = sizeof(spc.maybe); 870 | } 871 | 872 | ctx->i += 1; // Skip '"'. 873 | 874 | while(1) 875 | { 876 | int start = ctx->i; 877 | 878 | while(ctx->i < ctx->len 879 | && ctx->str[ctx->i] != '\\' 880 | && ctx->str[ctx->i] != '"' 881 | && (unsigned char) ctx->str[ctx->i] >= 32 882 | && (unsigned char) ctx->str[ctx->i] <= 127) 883 | ctx->i += 1; 884 | 885 | if(ctx->i == ctx->len) 886 | { 887 | xj_report(ctx->error, "String ended inside a string value"); 888 | spc_free(&spc); 889 | return NULL; 890 | } 891 | 892 | if((unsigned char) ctx->str[ctx->i] < 32) 893 | { 894 | xj_preport(ctx->error, ctx->str, ctx->i, "String contains control characters"); 895 | spc_free(&spc); 896 | return NULL; 897 | } 898 | 899 | int end = ctx->i; 900 | 901 | if(!spc_append(&spc, ctx->str + start, end - start)) 902 | { 903 | xj_report(ctx->error, "Out of memory"); 904 | spc_free(&spc); 905 | return NULL; 906 | } 907 | 908 | if(ctx->str[ctx->i] == '"') 909 | break; 910 | 911 | if(ctx->str[ctx->i] == '\\') 912 | { 913 | ctx->i += 1; // Skip '\'. 914 | 915 | if(ctx->i == ctx->len) 916 | { 917 | xj_report(ctx->error, "String ended inside a string"); 918 | spc_free(&spc); 919 | return NULL; 920 | } 921 | 922 | uint32_t rune; 923 | int rune_byte_count = xutf8_sequence_to_utf32_codepoint(ctx->str + ctx->i, ctx->len - ctx->i, &rune); 924 | 925 | if(rune == 'u') 926 | { 927 | int start = ctx->i-1; // Points to the '\'. 928 | assert(start >= 0); 929 | 930 | assert(rune_byte_count == 1); 931 | ctx->i += 1; // Skip the 'u'. 932 | 933 | uint16_t first_half; 934 | if(!parse_XXXX_after_u(ctx, &first_half)) 935 | { 936 | spc_free(&spc); 937 | return NULL; 938 | } 939 | 940 | int end = ctx->i; 941 | 942 | _Bool have_2_parts = 0; 943 | uint16_t second_half; 944 | if(ctx->i+1 < ctx->len && ctx->str[ctx->i] == '\\' 945 | && ctx->str[ctx->i+1] == 'u') 946 | { 947 | have_2_parts = 1; 948 | 949 | ctx->i += 2; // Skip the "\u". 950 | 951 | if(!parse_XXXX_after_u(ctx, &second_half)) 952 | { 953 | spc_free(&spc); 954 | return NULL; 955 | } 956 | 957 | end = ctx->i; 958 | } 959 | 960 | uint32_t rune = first_half; 961 | if(have_2_parts) 962 | rune = (rune << 16) | second_half; 963 | 964 | char as_utf8[16]; 965 | int byte_count_as_utf8 = xutf8_sequence_from_utf32_codepoint(as_utf8, sizeof(as_utf8), rune); 966 | if(byte_count_as_utf8 < 0) 967 | { 968 | // Failed to convert to UTF-8. 969 | // Either the rune isn't valid unicode or 970 | // the buffer is too small to hold the 971 | // UTF-8 text. We'll assume the buffer is 972 | // big enough to hold any UTF-8 symbol and 973 | // the error is due to malformed unicode. 974 | 975 | // If the invalid UTF-32 token was invalid 976 | // but composed of two \uXXXX tokens, maybe 977 | // they're valid individually. 978 | 979 | if(have_2_parts == 0) 980 | { 981 | xj_preport(ctx->error, ctx->str, start, "Invalid unicode symbol %.*s", end - start, ctx->str + start); 982 | spc_free(&spc); 983 | return NULL; 984 | } 985 | 986 | rune = first_half; 987 | byte_count_as_utf8 = xutf8_sequence_from_utf32_codepoint(as_utf8, sizeof(as_utf8), rune); 988 | 989 | if(byte_count_as_utf8 < 0) 990 | { 991 | xj_preport(ctx->error, ctx->str, start, "Invalid unicode symbol %.*s", end - start, ctx->str + start); 992 | spc_free(&spc); 993 | return NULL; 994 | } 995 | 996 | if(!spc_append(&spc, as_utf8, byte_count_as_utf8)) 997 | { 998 | xj_report(ctx->error, "Out of memory"); 999 | spc_free(&spc); 1000 | return NULL; 1001 | } 1002 | 1003 | rune = second_half; 1004 | byte_count_as_utf8 = xutf8_sequence_from_utf32_codepoint(as_utf8, sizeof(as_utf8), rune); 1005 | 1006 | if(byte_count_as_utf8 < 0) 1007 | { 1008 | xj_preport(ctx->error, ctx->str, start, "Invalid unicode symbol %.*s", end - start, ctx->str + start); 1009 | spc_free(&spc); 1010 | return NULL; 1011 | } 1012 | 1013 | if(!spc_append(&spc, as_utf8, byte_count_as_utf8)) 1014 | { 1015 | xj_report(ctx->error, "Out of memory"); 1016 | spc_free(&spc); 1017 | return NULL; 1018 | } 1019 | } 1020 | else 1021 | { 1022 | if(!spc_append(&spc, as_utf8, byte_count_as_utf8)) 1023 | { 1024 | xj_report(ctx->error, "Out of memory"); 1025 | spc_free(&spc); 1026 | return NULL; 1027 | } 1028 | } 1029 | } 1030 | else 1031 | { 1032 | const char *s; int l; 1033 | switch(rune) 1034 | { 1035 | case 'n': s = "\n"; l = 1; break; 1036 | case 't': s = "\t"; l = 1; break; 1037 | case 'b': s = "\b"; l = 1; break; 1038 | case 'f': s = "\f"; l = 1; break; 1039 | case 'r': s = "\r"; l = 1; break; 1040 | default: 1041 | s = ctx->str + ctx->i; 1042 | l = rune_byte_count; 1043 | break; 1044 | } 1045 | 1046 | ctx->i += rune_byte_count; 1047 | 1048 | if(!spc_append(&spc, s, l)) 1049 | { 1050 | xj_report(ctx->error, "Out of memory"); 1051 | spc_free(&spc); 1052 | return NULL; 1053 | } 1054 | } 1055 | } 1056 | else 1057 | { 1058 | assert(!isascii(ctx->str[ctx->i])); 1059 | 1060 | int n = xutf8_sequence_to_utf32_codepoint(ctx->str + ctx->i, ctx->len - ctx->i, NULL); 1061 | if(n < 0) 1062 | { 1063 | xj_preport(ctx->error, ctx->str, ctx->i, "Invalid UTF-8"); 1064 | spc_free(&spc); 1065 | return NULL; 1066 | } 1067 | 1068 | assert(n > 0); 1069 | 1070 | if(!spc_append(&spc, ctx->str + ctx->i, n)) 1071 | { 1072 | xj_report(ctx->error, "Out of memory"); 1073 | spc_free(&spc); 1074 | return NULL; 1075 | } 1076 | 1077 | ctx->i += n; 1078 | } 1079 | } 1080 | 1081 | ctx->i += 1; // Skip '"'. 1082 | 1083 | void *p = raw ? (void*) xj_strdup(spc.buffer, spc.size, ctx->alloc, ctx->error) 1084 | : (void*) xj_value_string(spc.buffer, spc.size, ctx->alloc, ctx->error); 1085 | if(p == NULL) 1086 | xj_report(ctx->error, "No memory"); 1087 | spc_free(&spc); 1088 | return p; 1089 | } 1090 | 1091 | static xj_value *parse_number(context_t *ctx) 1092 | { 1093 | assert(ctx->i < ctx->len && (isdigit(ctx->str[ctx->i]) || ctx->str[ctx->i] == '-')); 1094 | 1095 | _Bool negative = 0; 1096 | if(ctx->str[ctx->i] == '-') 1097 | { 1098 | negative = 1; 1099 | 1100 | ctx->i += 1; // Skip '-'. 1101 | 1102 | if(ctx->i == ctx->len) 1103 | { 1104 | xj_report(ctx->error, "String ended inside after minus sign"); 1105 | return NULL; 1106 | } 1107 | 1108 | if(!isdigit(ctx->str[ctx->i])) 1109 | { 1110 | xj_preport(ctx->error, ctx->str, ctx->i, "Expected a digit after minus sign"); 1111 | return NULL; 1112 | } 1113 | } 1114 | 1115 | // NOTE: We allow non-0 numbers starting with 0. 1116 | 1117 | xj_i64 parsed = 0; 1118 | 1119 | while(ctx->i < ctx->len && isdigit(ctx->str[ctx->i])) 1120 | { 1121 | if(parsed > (INT64_MAX - ctx->str[ctx->i] + '0') / 10) 1122 | { 1123 | /* Overflow */ 1124 | xj_preport(ctx->error, ctx->str, ctx->i, "Integer would overflow"); 1125 | return NULL; 1126 | } 1127 | 1128 | parsed = parsed * 10 + ctx->str[ctx->i] - '0'; 1129 | 1130 | ctx->i += 1; 1131 | } 1132 | 1133 | xj_bool followed_by_dot = ctx->i+1 < ctx->len && ctx->str[ctx->i] == '.' && isdigit(ctx->str[ctx->i+1]); 1134 | 1135 | xj_f64 decimal; 1136 | 1137 | if(followed_by_dot) 1138 | { 1139 | ctx->i += 1; // Skip '.'. 1140 | 1141 | xj_f64 f = 1.0; 1142 | 1143 | decimal = 0; 1144 | 1145 | while(ctx->i < ctx->len && isdigit(ctx->str[ctx->i])) 1146 | { 1147 | f /= 10; 1148 | decimal += f * (ctx->str[ctx->i] - '0'); 1149 | ctx->i += 1; 1150 | } 1151 | } 1152 | 1153 | _Bool have_exponent = 0; 1154 | xj_f64 coeff; 1155 | 1156 | if(ctx->i < ctx->len && (ctx->str[ctx->i] == 'e' || ctx->str[ctx->i] == 'E')) 1157 | { 1158 | ctx->i += 1; // Skip 'e'. 1159 | 1160 | if(ctx->i == ctx->len) 1161 | { 1162 | xj_report(ctx->error, "String ended where an exponent was expected"); 1163 | return NULL; 1164 | } 1165 | 1166 | int exponent_start = ctx->i; 1167 | 1168 | _Bool negative_exponent = 0; 1169 | if(ctx->str[ctx->i] == '+' || ctx->str[ctx->i] == '-') 1170 | { 1171 | if(ctx->str[ctx->i] == '-') 1172 | negative_exponent = 1; 1173 | 1174 | ctx->i += 1; 1175 | 1176 | if(ctx->i == ctx->len) 1177 | { 1178 | xj_report(ctx->error, "String ended where an exponent was expected"); 1179 | return NULL; 1180 | } 1181 | } 1182 | 1183 | if(!isdigit(ctx->str[ctx->i])) 1184 | { 1185 | xj_preport(ctx->error, ctx->str, ctx->i, "Expected digit as exponent"); 1186 | return NULL; 1187 | } 1188 | 1189 | have_exponent = 1; 1190 | int exponent = 0; 1191 | while(ctx->i < ctx->len && isdigit(ctx->str[ctx->i])) 1192 | { 1193 | exponent = exponent * 10 + ctx->str[ctx->i] - '0'; 1194 | ctx->i += 1; 1195 | } 1196 | 1197 | if(exponent > XJ_MAX_EXPNT) 1198 | { 1199 | xj_preport(ctx->error, ctx->str, exponent_start, "Exponent is too big"); 1200 | return NULL; 1201 | } 1202 | 1203 | coeff = 1; 1204 | for(int j = 0; j < exponent; j += 1) 1205 | coeff *= 10; 1206 | 1207 | if(negative_exponent) 1208 | coeff = -coeff; 1209 | } 1210 | 1211 | xj_value *v; 1212 | if(followed_by_dot) 1213 | { 1214 | xj_f64 r = (xj_f64) parsed + decimal; 1215 | 1216 | if(negative) 1217 | r = -r; 1218 | 1219 | if(have_exponent) 1220 | r = r * coeff; 1221 | 1222 | v = xj_value_float(r, ctx->alloc, ctx->error); 1223 | } 1224 | else 1225 | { 1226 | xj_i64 r = parsed; 1227 | 1228 | if(negative) 1229 | r = -r; 1230 | 1231 | if(have_exponent) 1232 | r = r * coeff; 1233 | 1234 | v = xj_value_int(r, ctx->alloc, ctx->error); 1235 | } 1236 | return v; 1237 | } 1238 | 1239 | static xj_value *parse_value(context_t *ctx); 1240 | 1241 | static xj_value *parse_array(context_t *ctx) 1242 | { 1243 | assert(ctx->i < ctx->len && ctx->str[ctx->i] == '['); 1244 | 1245 | ctx->i += 1; // Skip '['. 1246 | 1247 | // Skip whitespace. 1248 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1249 | ctx->i += 1; 1250 | 1251 | if(ctx->i == ctx->len) 1252 | { 1253 | xj_report(ctx->error, "String ended inside an array, right after the first '['"); 1254 | return NULL; 1255 | } 1256 | 1257 | if(ctx->str[ctx->i] == ']') /* Empty array */ 1258 | { 1259 | ctx->i += 1; // Skip ']'. 1260 | return xj_value_array__nocheck(NULL, 0, ctx->alloc, ctx->error); 1261 | } 1262 | 1263 | xj_value *head = NULL; 1264 | xj_value **tail = &head; 1265 | int count = 0; 1266 | 1267 | while(1) 1268 | { 1269 | xj_value *child = parse_value(ctx); 1270 | 1271 | if(child == NULL) 1272 | return NULL; 1273 | 1274 | // Skip whitespace. 1275 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1276 | ctx->i += 1; 1277 | 1278 | if(ctx->i == ctx->len) 1279 | { 1280 | xj_report(ctx->error, "String ended inside an array, right after the %dth child", count+1); 1281 | return NULL; 1282 | } 1283 | 1284 | *tail = child; 1285 | tail = &child->next; 1286 | count += 1; 1287 | 1288 | if(ctx->str[ctx->i] == ']') 1289 | break; 1290 | 1291 | if(ctx->str[ctx->i] != ',') 1292 | { 1293 | xj_preport(ctx->error, ctx->str, ctx->i, "Bad character '%c' inside of an array", ctx->str[ctx->i]); 1294 | return NULL; 1295 | } 1296 | 1297 | ctx->i += 1; // Skip ','. 1298 | 1299 | // Skip whitespace. 1300 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1301 | ctx->i += 1; 1302 | 1303 | if(ctx->i == ctx->len) 1304 | { 1305 | xj_report(ctx->error, "String ended inside an array, right after the ',' after the %dth child", count+1); 1306 | return NULL; 1307 | } 1308 | } 1309 | 1310 | ctx->i += 1; // Skip ']'. 1311 | 1312 | return xj_value_array__nocheck(head, count, ctx->alloc, ctx->error); 1313 | } 1314 | 1315 | static xj_value *parse_object(context_t *ctx) 1316 | { 1317 | assert(ctx->i < ctx->len && ctx->str[ctx->i] == '{'); 1318 | 1319 | ctx->i += 1; // Skip '{'. 1320 | 1321 | // Skip whitespace. 1322 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1323 | ctx->i += 1; 1324 | 1325 | if(ctx->i == ctx->len) 1326 | { 1327 | xj_report(ctx->error, "String ended inside an object, right after the first '{'"); 1328 | return NULL; 1329 | } 1330 | 1331 | if(ctx->str[ctx->i] == '}') /* Empty object */ 1332 | { 1333 | ctx->i += 1; // Skip '}'. 1334 | return xj_value_object__nocheck(NULL, 0, ctx->alloc, ctx->error); 1335 | } 1336 | 1337 | xj_value *head = NULL; 1338 | xj_value **tail = &head; 1339 | int count = 0; 1340 | 1341 | while(1) 1342 | { 1343 | if(ctx->str[ctx->i] != '"') 1344 | { 1345 | xj_preport(ctx->error, ctx->str, ctx->i, "Bad character '%c' where a string was expected"); 1346 | return NULL; 1347 | } 1348 | 1349 | char *key = parse_string(ctx, 1); 1350 | 1351 | if(key == NULL) 1352 | return NULL; 1353 | 1354 | // Skip whitespace before ':'. 1355 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1356 | ctx->i += 1; 1357 | 1358 | if(ctx->i == ctx->len) 1359 | { 1360 | xj_report(ctx->error, "String ended inside an object, right after the %dth child's key", count+1); 1361 | return NULL; 1362 | } 1363 | 1364 | if(ctx->str[ctx->i] != ':') 1365 | { 1366 | xj_preport(ctx->error, ctx->str, ctx->i, "Bad character '%c' where ':' was expected"); 1367 | return NULL; 1368 | } 1369 | 1370 | ctx->i += 1; // Skip the ':'. 1371 | 1372 | // Skip whitespace after ':'. 1373 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1374 | ctx->i += 1; 1375 | 1376 | xj_value *child = parse_value(ctx); 1377 | 1378 | if(child == NULL) 1379 | return NULL; 1380 | 1381 | // Skip whitespace. 1382 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1383 | ctx->i += 1; 1384 | 1385 | if(ctx->i == ctx->len) 1386 | { 1387 | xj_report(ctx->error, "String ended inside an object, right after the %dth child", count+1); 1388 | return NULL; 1389 | } 1390 | 1391 | child->key = key; 1392 | 1393 | *tail = child; 1394 | tail = &child->next; 1395 | count += 1; 1396 | 1397 | if(ctx->str[ctx->i] == '}') 1398 | break; 1399 | 1400 | if(ctx->str[ctx->i] != ',') 1401 | { 1402 | xj_preport(ctx->error, ctx->str, ctx->i, "Bad character '%c' inside of an object", ctx->str[ctx->i]); 1403 | return NULL; 1404 | } 1405 | 1406 | ctx->i += 1; // Skip ','. 1407 | 1408 | // Skip whitespace. 1409 | while(ctx->i < ctx->len && isspace(ctx->str[ctx->i])) 1410 | ctx->i += 1; 1411 | 1412 | if(ctx->i == ctx->len) 1413 | { 1414 | xj_report(ctx->error, "String ended inside an object, right after the ',' after the %dth child", count+1); 1415 | return NULL; 1416 | } 1417 | } 1418 | 1419 | ctx->i += 1; // Skip '}'. 1420 | 1421 | return xj_value_object__nocheck(head, count, ctx->alloc, ctx->error); 1422 | } 1423 | 1424 | static xj_value *parse_bool_or_null(context_t *ctx) 1425 | { 1426 | static const char kword_null [] = "null"; 1427 | static const char kword_true [] = "true"; 1428 | static const char kword_false[] = "false"; 1429 | const char *kword; 1430 | int kwlen; 1431 | 1432 | char c = ctx->str[ctx->i]; 1433 | 1434 | if(c == 'n') 1435 | { 1436 | kword = kword_null; 1437 | kwlen = sizeof(kword_null)-1; 1438 | } 1439 | else if(c == 't') 1440 | { 1441 | kword = kword_true; 1442 | kwlen = sizeof(kword_true)-1; 1443 | } 1444 | else if(c == 'f') 1445 | { 1446 | kword = kword_false; 1447 | kwlen = sizeof(kword_false)-1; 1448 | } 1449 | else 1450 | { 1451 | xj_preport(ctx->error, ctx->str, ctx->i, "Bad character '%c'", c); 1452 | return NULL; 1453 | } 1454 | 1455 | if(ctx->i + kwlen <= ctx->len && !strncmp(ctx->str + ctx->i, kword, kwlen)) 1456 | { 1457 | ctx->i += kwlen; 1458 | switch(c) 1459 | { 1460 | case 'n': return xj_value_null(ctx->alloc, ctx->error); 1461 | case 't': return xj_value_bool(1, ctx->alloc, ctx->error); 1462 | case 'f': return xj_value_bool(0, ctx->alloc, ctx->error); 1463 | } 1464 | /* UNREACHABLE */ 1465 | } 1466 | 1467 | if(ctx->i + kwlen > ctx->len) 1468 | { 1469 | xj_report(ctx->error, "String ended unexpectedly"); 1470 | return NULL; 1471 | } 1472 | 1473 | // Get to the character that made the comparison fail 1474 | int p = 0; 1475 | while(kword[p] == ctx->str[ctx->i+p]) 1476 | p += 1; 1477 | ctx->i += p; 1478 | 1479 | xj_preport(ctx->error, ctx->str, ctx->i, 1480 | "Bad character '%c'", ctx->str[ctx->i]); 1481 | return NULL; 1482 | } 1483 | 1484 | static xj_value *parse_value(context_t *ctx) 1485 | { 1486 | if(ctx->i == ctx->len) 1487 | { 1488 | xj_report(ctx->error, "String ended where a value was expected"); 1489 | return NULL; 1490 | } 1491 | 1492 | if(ctx->depth+1 == XJ_MAX_DEPTH) 1493 | { 1494 | xj_preport(ctx->error, ctx->str, ctx->i, "Maximum depth reached"); 1495 | return NULL; 1496 | } 1497 | ctx->depth += 1; 1498 | 1499 | assert(!isspace(ctx->str[ctx->i])); 1500 | 1501 | xj_value *res; 1502 | 1503 | char c = ctx->str[ctx->i]; 1504 | 1505 | if(c == '"') 1506 | res = parse_string(ctx, 0); 1507 | else if(isdigit(c) || c == '-') 1508 | res = parse_number(ctx); 1509 | else if(c == '[') 1510 | res = parse_array(ctx); 1511 | else if(c == '{') 1512 | res = parse_object(ctx); 1513 | else 1514 | res = parse_bool_or_null(ctx); 1515 | 1516 | ctx->depth -= 1; 1517 | return res; 1518 | } 1519 | 1520 | /* Symbol: 1521 | * xj_decode 1522 | * 1523 | * Description: 1524 | * Transform a JSON UTF-8 string to a tree of [xj_value] nodes. 1525 | * 1526 | * Arguments: 1527 | * str: The string to be parsed. It's doesn't need to be 1528 | * zero-terminated. If NULL, an empty string is assumed. 1529 | * 1530 | * len: The length of [str] (in bytes). If negative, [str] is 1531 | * assumed to be zero-terminated and [len] is computed 1532 | * using [strlen]. 1533 | * 1534 | * alloc: The allocator that will be used to store the parsing 1535 | * result. It's not optional (can't be NULL). 1536 | * 1537 | * error: The reference to a caller-allocated [xj_error]. If 1538 | * an error occurres (NULL is returned) then this is 1539 | * used to provide the caller with useful information 1540 | * regarting the failure. It's not required and can be 1541 | * NULL. 1542 | * 1543 | * Returns: 1544 | * The pointer to a tree of [xj_value] nodes, or NULL on failure. 1545 | * If NULL is returned and an [xj_error] is provided, than it's 1546 | * fields are set to provide the caller with extra information 1547 | * related to the failure. 1548 | * 1549 | * Notes: 1550 | * The returned objects are deallocated with the whole allocator 1551 | * when calling [xj_alloc_del]. 1552 | */ 1553 | xj_value *xj_decode(const char *str, int len, 1554 | xj_alloc *alloc, xj_error *error) 1555 | { 1556 | if(str == NULL) 1557 | str = ""; 1558 | 1559 | if(len < 0) 1560 | len = strlen(str); 1561 | 1562 | if(error != NULL) 1563 | memset(error, 0, sizeof(xj_error)); 1564 | 1565 | int i = 0; 1566 | 1567 | // Skip whitespace 1568 | while(i < len && isspace(str[i])) 1569 | i += 1; 1570 | 1571 | if(i == len) 1572 | { 1573 | xj_report(error, "The string only contains whitespace"); 1574 | return NULL; 1575 | } 1576 | 1577 | context_t ctx = { 1578 | .str = str, .i = i, .len = len, .depth = 0, 1579 | .alloc = alloc, .error = error }; 1580 | return parse_value(&ctx); 1581 | } 1582 | 1583 | typedef struct bucket_t bucket_t; 1584 | 1585 | /* Symbol: 1586 | * bucket_t 1587 | * 1588 | * Description: 1589 | * A memory region that linked with other [bucket_t] 1590 | * can represent long strings of text. It's a sub-type 1591 | * of [bucket_t]. 1592 | * 1593 | * Notes: 1594 | * This is a big structure. 1595 | * 1596 | * The [body]'s was chosen to be such that the whole 1597 | * [bucket_t] is 4kb big, but it's not really necessary. 1598 | */ 1599 | struct bucket_t { 1600 | bucket_t *next; 1601 | char body[4096-sizeof(void*)]; 1602 | }; 1603 | 1604 | /* Symbol: 1605 | * buffer_t 1606 | * 1607 | * Description: 1608 | * A buffer that can be used to build large strings 1609 | * without the degradation of performance that one 1610 | * would get by using a plain dinamically growing 1611 | * array. 1612 | * It's implemented as a linked list of chunks, so 1613 | * it grows by adding new chunks, without the need 1614 | * to move the old chunks. 1615 | * 1616 | * Fields: 1617 | * size: The absolute string size (in bytes) that is 1618 | * contained in the buffer. When the buffer is 1619 | * serialized, the resulting string will have 1620 | * this size. 1621 | * 1622 | * used: The amount of bytes held by the last chunk. 1623 | * 1624 | * tail: The pointer to the last chunk. 1625 | * 1626 | * head: The first chunk of the buffer. It's not a 1627 | * pointer because it's pre-allocated with 1628 | * the [buffer_t]. 1629 | * 1630 | * Notes: 1631 | * The fact that the first chunk comes preallocated with 1632 | * the buffer makes it a large structure. A [bucket_t] is 1633 | * around 4kb, so a buffer will be bigger than that. 1634 | * 1635 | * The [head] is the last field so that the other fields 1636 | * are contiguous in memory. If [head] were between other 1637 | * fields, then there would be a 4kb distance between them. 1638 | */ 1639 | typedef struct { 1640 | int size, used; 1641 | bucket_t *tail, head; 1642 | } buffer_t; 1643 | 1644 | /* Symbol: 1645 | * buffer_append 1646 | * 1647 | * Description: 1648 | * Appends a string to a [buffer_t]. 1649 | * 1650 | * Returns: 1651 | * 1 if all went well or 0 if an error occurred. 1652 | */ 1653 | static xj_bool buffer_append(buffer_t *buff, const char *str, int len) 1654 | { 1655 | assert(str != NULL && len >= 0); 1656 | 1657 | // If there's not enough memory in the tail chunk 1658 | // then create a new tail chunk! 1659 | 1660 | if(buff->used + len > (int) sizeof(buff->tail->body)) 1661 | { 1662 | // It's not possible to add a string that 1663 | // is bigger than a chunk. 1664 | if(len > (int) sizeof(buff->tail->body)) 1665 | return 0; 1666 | 1667 | bucket_t *buck = malloc(sizeof(bucket_t)); 1668 | 1669 | if(buck == NULL) 1670 | return 0; 1671 | 1672 | buck->next = NULL; 1673 | buff->tail->next = buck; 1674 | buff->tail = buck; 1675 | buff->used = 0; 1676 | } 1677 | 1678 | memcpy(buff->tail->body + buff->used, str, len); 1679 | buff->used += len; 1680 | buff->size += len; 1681 | return 1; 1682 | } 1683 | 1684 | /* Symbol: 1685 | * encode_string 1686 | * 1687 | * Description: 1688 | * Serializes a string to a [buffer_t] in JSON form. 1689 | * 1690 | * Returns: 1691 | * 1 if all went well or 0 if an error occurred. 1692 | */ 1693 | static _Bool encode_string(const char *str, int len, buffer_t *buff) 1694 | { 1695 | assert(str != NULL && len >= 0); 1696 | 1697 | if(!buffer_append(buff, "\"", 1)) 1698 | return 0; 1699 | 1700 | int i = 0; 1701 | while(1) 1702 | { 1703 | int start = i; 1704 | 1705 | while(i < len && str[i] != '"' && str[i] != '\\' 1706 | && (unsigned char) str[i] >= 32 1707 | && (unsigned char) str[i] <= 127) 1708 | i += 1; 1709 | 1710 | int end = i; 1711 | 1712 | if(!buffer_append(buff, str + start, end - start)) 1713 | return 0; 1714 | 1715 | if(i == len) 1716 | break; 1717 | 1718 | if(str[i] == '"') 1719 | { 1720 | if(!buffer_append(buff, "\\\"", 2)) 1721 | return 0; 1722 | i += 1; 1723 | } 1724 | else if(str[i] == '\\') 1725 | { 1726 | if(!buffer_append(buff, "\\\\", 2)) 1727 | return 0; 1728 | i += 1; 1729 | } 1730 | else if((unsigned char) str[i] < 32) 1731 | { 1732 | char *m = NULL; 1733 | switch(str[i]) 1734 | { 1735 | case '\t': m = "\\t"; break; 1736 | case '\n': m = "\\n"; break; 1737 | case '\b': m = "\\b"; break; 1738 | case '\f': m = "\\f"; break; 1739 | case '\r': m = "\\r"; break; 1740 | default: 1741 | assert(0); 1742 | // Unexpected control character. 1743 | break; 1744 | } 1745 | 1746 | assert(m != NULL); 1747 | 1748 | if(!buffer_append(buff, m, 2)) 1749 | return 0; 1750 | 1751 | i += 1; 1752 | } 1753 | else 1754 | { 1755 | uint32_t rune; 1756 | int scanned = xutf8_sequence_to_utf32_codepoint(str + i, len - i, &rune); 1757 | 1758 | if(scanned < 0) 1759 | { 1760 | assert(0); 1761 | // Invalid UTF-8 1762 | } 1763 | 1764 | static const char map[] = "0123456789ABCDEF"; 1765 | 1766 | char buffer[13]; 1767 | int used; 1768 | 1769 | if((rune >> 16) == 0) 1770 | { 1771 | used = 6; 1772 | buffer[0] = '\\'; 1773 | buffer[1] = 'u'; 1774 | buffer[2] = map[(rune >> 12) & 0xF]; 1775 | buffer[3] = map[(rune >> 8) & 0xF]; 1776 | buffer[4] = map[(rune >> 4) & 0xF]; 1777 | buffer[5] = map[(rune >> 0) & 0xF]; 1778 | buffer[6] = '\0'; 1779 | } 1780 | else 1781 | { 1782 | used = 12; 1783 | buffer[0] = '\\'; 1784 | buffer[1] = 'u'; 1785 | buffer[2] = map[(rune >> 28) & 0xF]; 1786 | buffer[3] = map[(rune >> 24) & 0xF]; 1787 | buffer[4] = map[(rune >> 20) & 0xF]; 1788 | buffer[5] = map[(rune >> 16) & 0xF]; 1789 | buffer[6] = '\\'; 1790 | buffer[7] = 'u'; 1791 | buffer[8] = map[(rune >> 12) & 0xF]; 1792 | buffer[9] = map[(rune >> 8) & 0xF]; 1793 | buffer[10] = map[(rune >> 4) & 0xF]; 1794 | buffer[11] = map[(rune >> 0) & 0xF]; 1795 | buffer[12] = '\0'; 1796 | } 1797 | 1798 | if(!buffer_append(buff, buffer, used)) 1799 | return 0; 1800 | 1801 | i += scanned; 1802 | } 1803 | } 1804 | 1805 | if(!buffer_append(buff, "\"", 1)) 1806 | return 0; 1807 | 1808 | return 1; 1809 | } 1810 | 1811 | /* Symbol: 1812 | * encode_value 1813 | * 1814 | * Description: 1815 | * Serializes an [xj_value] to a [buffer_t] 1816 | * 1817 | * Returns: 1818 | * 1 if all went well or 0 if an error occurred. 1819 | */ 1820 | static _Bool encode_value(xj_value *val, buffer_t *buff) 1821 | { 1822 | switch(val == NULL ? XJ_NULL : val->type) 1823 | { 1824 | case XJ_NULL: 1825 | return buffer_append(buff, "null", 4); 1826 | 1827 | case XJ_BOOL: 1828 | return val->as_bool 1829 | ? buffer_append(buff, "true", 4) 1830 | : buffer_append(buff, "false", 5); 1831 | 1832 | case XJ_INT: 1833 | { 1834 | char temp[32]; 1835 | int k = snprintf(temp, sizeof(temp), 1836 | "%lld", val->as_int); 1837 | assert(k >= 0 && k < (int) sizeof(temp)); 1838 | if(!buffer_append(buff, temp, k)) 1839 | return 0; 1840 | return 1; 1841 | } 1842 | 1843 | case XJ_FLOAT: 1844 | { 1845 | char temp[32]; 1846 | int k = snprintf(temp, sizeof(temp), 1847 | "%g", val->as_float); 1848 | assert(k >= 0 && k < (int) sizeof(temp)); 1849 | if(!buffer_append(buff, temp, k)) 1850 | return 0; 1851 | return 1; 1852 | } 1853 | 1854 | case XJ_ARRAY: 1855 | { 1856 | if(!buffer_append(buff, "[", 1)) 1857 | return 0; 1858 | 1859 | xj_value *child = val->as_object; 1860 | while(child != NULL) 1861 | { 1862 | if(!encode_value(child, buff)) 1863 | return 0; 1864 | 1865 | child = child->next; 1866 | 1867 | if(child != NULL) 1868 | if(!buffer_append(buff, ", ", 2)) 1869 | return 0; 1870 | } 1871 | 1872 | if(!buffer_append(buff, "]", 1)) 1873 | return 0; 1874 | return 1; 1875 | } 1876 | 1877 | case XJ_OBJECT: 1878 | { 1879 | if(!buffer_append(buff, "{", 1)) 1880 | return 0; 1881 | 1882 | xj_value *child = val->as_object; 1883 | while(child != NULL) 1884 | { 1885 | if(!encode_string(child->key, strlen(child->key), buff)) 1886 | return 0; 1887 | 1888 | if(!buffer_append(buff, ": ", 2)) 1889 | return 0; 1890 | 1891 | if(!encode_value(child, buff)) 1892 | return 0; 1893 | 1894 | child = child->next; 1895 | 1896 | if(child != NULL) 1897 | if(!buffer_append(buff, ", ", 2)) 1898 | return 0; 1899 | } 1900 | 1901 | if(!buffer_append(buff, "}", 1)) 1902 | return 0; 1903 | return 1; 1904 | } 1905 | 1906 | case XJ_STRING: 1907 | return encode_string(val->as_string, val->size, buff); 1908 | } 1909 | return 0; 1910 | } 1911 | 1912 | /* Symbol: 1913 | * xj_encode 1914 | * 1915 | * Description: 1916 | * Transforms an [xj_value] to a string. 1917 | * 1918 | * Arguments: 1919 | * value: The object to be converted to a string. 1920 | * 1921 | * len: An output argument that returns the length 1922 | * of the generated string. It's optional, so 1923 | * it can be NULL. 1924 | * 1925 | * Returns: 1926 | * The pointer to a zero-terminated string if all went 1927 | * well or NULL. 1928 | * 1929 | * Notes: 1930 | * The returned pointer, if not NULL, must be 1931 | * deallocated using [free]. 1932 | */ 1933 | char *xj_encode(xj_value *value, int *len) 1934 | { 1935 | buffer_t buff; 1936 | buff.size = 0; 1937 | buff.used = 0; 1938 | buff.tail = &buff.head; 1939 | buff.head.next = NULL; 1940 | 1941 | _Bool ok = encode_value(value, &buff); 1942 | 1943 | char *serialized = NULL; 1944 | 1945 | if(ok) 1946 | { 1947 | /* Serialize */ 1948 | 1949 | serialized = malloc(buff.size+1); 1950 | 1951 | if(serialized != NULL) 1952 | { 1953 | int copied = 0; 1954 | 1955 | bucket_t *curs = &buff.head; 1956 | while(curs->next != NULL) 1957 | { 1958 | memcpy(serialized + copied, 1959 | curs->body, sizeof(curs->body)); 1960 | 1961 | copied += sizeof(curs->body); 1962 | curs = curs->next; 1963 | } 1964 | 1965 | memcpy(serialized + copied, 1966 | curs->body, buff.used); 1967 | 1968 | serialized[buff.size] = '\0'; 1969 | 1970 | if(len) 1971 | *len = buff.size; 1972 | } 1973 | } 1974 | 1975 | /* Free the buffer */ 1976 | bucket_t *curs = buff.head.next; 1977 | while(curs != NULL) 1978 | { 1979 | bucket_t *next = curs->next; 1980 | free(curs); 1981 | curs = next; 1982 | } 1983 | 1984 | return serialized; 1985 | } -------------------------------------------------------------------------------- /src/xjson.h: -------------------------------------------------------------------------------- 1 | #ifndef XJSON_H 2 | #define XJSON_H 3 | 4 | typedef double xj_f64; 5 | typedef long long xj_i64; 6 | typedef _Bool xj_bool; 7 | 8 | _Static_assert(sizeof(xj_f64) == 8, "double isn't 8 bytes long"); 9 | _Static_assert(sizeof(xj_i64) == 8, "long long isn't 8 bytes long"); 10 | 11 | enum { 12 | XJ_NULL, 13 | XJ_BOOL, 14 | XJ_INT, 15 | XJ_FLOAT, 16 | XJ_ARRAY, 17 | XJ_OBJECT, 18 | XJ_STRING, 19 | }; 20 | 21 | typedef struct xj_value xj_value; 22 | struct xj_value { 23 | int type; 24 | int size; 25 | xj_value *next; 26 | char *key; 27 | union { 28 | xj_i64 as_int; 29 | xj_bool as_bool; 30 | xj_f64 as_float; 31 | xj_value *as_array; 32 | xj_value *as_object; 33 | char *as_string; 34 | }; 35 | }; 36 | 37 | typedef struct { 38 | xj_bool occurred; 39 | xj_bool truncated; 40 | int off, row, col; 41 | char message[128]; 42 | } xj_error; 43 | 44 | typedef struct xj_alloc xj_alloc; 45 | xj_alloc *xj_alloc_using(void *mem, int size, int ext, void (*free)(void*)); 46 | xj_alloc *xj_alloc_new(int size, int ext); 47 | void xj_alloc_del(xj_alloc *alloc); 48 | 49 | void *xj_bpalloc(xj_alloc *alloc, int size); 50 | void xj_preport(xj_error *error, const char *src, int off, const char *fmt, ...); 51 | #define xj_report(error, fmt, ...) xj_preport(error, NULL, -1, fmt, ## __VA_ARGS__) 52 | 53 | xj_value *xj_value_null(xj_alloc *alloc, xj_error *error); 54 | xj_value *xj_value_bool(xj_bool val, xj_alloc *alloc, xj_error *error); 55 | xj_value *xj_value_int(xj_i64 val, xj_alloc *alloc, xj_error *error); 56 | xj_value *xj_value_float(xj_f64 val, xj_alloc *alloc, xj_error *error); 57 | xj_value *xj_value_array(xj_value *head, xj_alloc *alloc, xj_error *error); 58 | xj_value *xj_value_object(xj_value *head, xj_alloc *alloc, xj_error *error); 59 | xj_value *xj_value_string(const char *str, int len, xj_alloc *alloc, xj_error *error); 60 | xj_value *xj_value_array__nocheck(xj_value *head, int count, xj_alloc *alloc, xj_error *error); 61 | xj_value *xj_value_object__nocheck(xj_value *head, int count, xj_alloc *alloc, xj_error *error); 62 | 63 | _Bool xj_array_append(xj_value *array, xj_value *child, xj_error *error); 64 | 65 | char *xj_strdup(const char *str, int len, xj_alloc *alloc, xj_error *error); 66 | 67 | xj_value *xj_decode(const char *str, int len, xj_alloc *alloc, xj_error *error); 68 | char *xj_encode(xj_value *value, int *len); 69 | 70 | #endif /* XJSON_H */ -------------------------------------------------------------------------------- /tests/fuzzer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "xjson.h" 6 | 7 | __AFL_FUZZ_INIT(); 8 | 9 | int main() 10 | { 11 | 12 | #ifdef __AFL_HAVE_MANUAL_CONTROL 13 | __AFL_INIT(); 14 | #endif 15 | 16 | unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; // must be after __AFL_INIT 17 | // and before __AFL_LOOP! 18 | char pool[65536]; 19 | xj_error error; 20 | 21 | while(__AFL_LOOP(10000)) 22 | { 23 | int len = __AFL_FUZZ_TESTCASE_LEN; 24 | 25 | xj_alloc *alloc = xj_alloc_using(pool, sizeof(pool), 4096, NULL); 26 | assert(alloc != NULL); 27 | 28 | xj_decode(buf, len, alloc, &error); 29 | 30 | xj_alloc_del(alloc); 31 | } 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define TEST(src_) { .line = __LINE__, .src = src_ } 9 | 10 | static const struct { 11 | int line; 12 | const char *src; 13 | } tests[] = { 14 | TEST("null"), 15 | TEST("true"), 16 | TEST("false"), 17 | 18 | TEST("1"), 19 | TEST("100"), 20 | TEST("-1"), 21 | TEST("-100"), 22 | TEST("1.0"), 23 | TEST("100.111"), 24 | TEST("-1.0"), 25 | TEST("-100.111"), 26 | 27 | TEST("[]"), 28 | TEST("[1, 2, 3]"), 29 | TEST("{}"), 30 | TEST("{\"key\":5}"), 31 | TEST("{\"key1\":5,\"key2\":3}"), 32 | 33 | TEST("\"\""), 34 | TEST("\"\\f\""), 35 | TEST("\"\\t\""), 36 | TEST("\"\\n\""), 37 | TEST("\"\\b\""), 38 | TEST("\"\\r\""), 39 | TEST("\"\\\\\""), 40 | TEST("\"\\uFFFF\""), 41 | TEST("\"\\u0010\\uFFFF\""), 42 | 43 | }; 44 | 45 | #undef TEST 46 | 47 | _Bool json_strings_match(const char *A, const char *B) 48 | { 49 | int Ai = 0, Bi = 0; 50 | 51 | do 52 | { 53 | // TODO: Only ignore spaces if they're 54 | // not inside strings. 55 | while(isspace(A[Ai])) 56 | Ai += 1; 57 | 58 | while(isspace(B[Bi])) 59 | Bi += 1; 60 | 61 | if(A[Ai] != B[Bi]) return 0; 62 | Ai += 1; 63 | Bi += 1; 64 | } 65 | while(A[Ai] != '\0'); 66 | return 1; 67 | } 68 | 69 | int main() 70 | { 71 | char pool[65536]; 72 | xj_error error; 73 | 74 | int total = sizeof(tests) / sizeof(tests[0]); 75 | int passed = 0; 76 | 77 | for(int i = 0; i < total; i += 1) 78 | { 79 | xj_alloc *alloc = xj_alloc_using(pool, sizeof(pool), 0, NULL); 80 | assert(alloc != NULL); 81 | 82 | xj_value *val = xj_decode(tests[i].src, -1, alloc, &error); 83 | 84 | if(val == NULL) 85 | { 86 | fprintf(stderr, "Failed to parse (%s)\n", error.message); 87 | return 1; 88 | } 89 | 90 | int size; 91 | char *serialized = xj_encode(val, &size); 92 | assert(serialized != NULL); 93 | 94 | if(json_strings_match(serialized, tests[i].src)) 95 | { 96 | passed += 1; 97 | } 98 | else 99 | { 100 | fprintf(stderr, "Failed! Expected:\n"); 101 | fprintf(stderr, " %s\n", tests[i].src); 102 | fprintf(stderr, "but got:\n"); 103 | fprintf(stderr, " %s\n", serialized); 104 | } 105 | 106 | free(serialized); 107 | } 108 | 109 | fprintf(stdout, "passed: %d, failed: %d, total: %d\n", passed, total - passed, total); 110 | return 0; 111 | } 112 | --------------------------------------------------------------------------------