├── .editorconfig ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── Doxyfile ├── LICENSE ├── MANIFEST ├── Makefile ├── README.pod ├── compile-travis.sh ├── examples ├── Makefile ├── glib-datatypes.c └── glib-datatypes.h ├── json_samples.tgz ├── jsonsl.c ├── jsonsl.h ├── perf ├── Makefile ├── bench.c ├── documents.c ├── documents.h └── perftest.c ├── srcutil └── genchartables.pl └── tests ├── CMakeLists.txt ├── Makefile ├── all-tests.h ├── api_test.c ├── cliopts.c ├── cliopts.h ├── cxxtest.cpp ├── fail-tests.c ├── jpr_test.c ├── json_test.c ├── match_test.c └── unescape.c /.editorconfig: -------------------------------------------------------------------------------- 1 | # Copying and distribution of this file, with or without modification, 2 | # are permitted in any medium without royalty provided this notice is 3 | # preserved. This file is offered as-is, without any warranty. 4 | # Names of contributors must not be used to endorse or promote products 5 | # derived from this file without specific prior written permission. 6 | 7 | # EditorConfig 8 | # http://EditorConfig.org 9 | 10 | # top-most EditorConfig file 11 | root = true 12 | 13 | # LF end-of-line, insert an empty new line and UTF-8 14 | [*] 15 | end_of_line = lf 16 | insert_final_newline = true 17 | charset = utf-8 18 | 19 | # Tab indentation 20 | [makefile,Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Copying and distribution of this file, with or without modification, 2 | # are permitted in any medium without royalty provided this notice is 3 | # preserved. This file is offered as-is, without any warranty. 4 | # Names of contributors must not be used to endorse or promote products 5 | # derived from this file without specific prior written permission. 6 | 7 | 8 | # Temp files 9 | *~ 10 | *.bak 11 | *.backup 12 | \#* 13 | .\#* 14 | *\# 15 | *.swp 16 | *.sav 17 | *.save 18 | *.autosav 19 | *.autosave 20 | 21 | # Generated binaries 22 | *.o 23 | *.so 24 | *.dll 25 | *.exe 26 | *.dylib 27 | *.dSYM 28 | perf/bench 29 | perf/yajl-perftest 30 | share 31 | 32 | # Generated folders 33 | ## Generated binary folders 34 | bin/ 35 | binaries/ 36 | obj/ 37 | objects/ 38 | build*/ 39 | Build*/ 40 | release*/ 41 | Release*/ 42 | dbg*/ 43 | debug*/ 44 | Debug*/ 45 | ## Generated documentation 46 | /doc/ 47 | docs/ 48 | documentation/ 49 | Documentation/ 50 | doc-html/ 51 | 52 | # Tests 53 | tests/jpr_test 54 | tests/api_test 55 | tests/json_test 56 | tests/unescape 57 | tests/cxxtest 58 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - gcc 5 | - clang 6 | 7 | env: 8 | - PARSE_NAN=1 9 | - PARSE_NAN=0 10 | 11 | script: ./compile-travis.sh 12 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # This is here mainly to support Win32 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.9) 3 | ADD_DEFINITIONS(-DJSONSL_STATE_GENERIC) 4 | IF(MSVC) 5 | ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS) 6 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W3") 7 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3") 8 | ELSE() 9 | SET(jsonsl_cpp_warnings "-pedantic -Wall -Wextra -Wno-missing-field-initializers -Wno-unused-parameter") 10 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c89 ${jsonsl_cpp_warnings}") 11 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++03 ${jsonsl_cpp_warnings}") 12 | ENDIF() 13 | INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) 14 | ADD_LIBRARY(jsonsl jsonsl.c) 15 | EXECUTE_PROCESS( 16 | COMMAND 17 | ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_SOURCE_DIR}/json_samples.tgz 18 | WORKING_DIRECTORY 19 | ${CMAKE_CURRENT_BINARY_DIR}) 20 | 21 | 22 | ENABLE_TESTING() 23 | ADD_SUBDIRECTORY(tests) 24 | 25 | # Add the benchmarks: 26 | ADD_EXECUTABLE(bench-simple EXCLUDE_FROM_ALL perf/bench.c jsonsl.c) 27 | ADD_EXECUTABLE(yajl-perftest EXCLUDE_FROM_ALL perf/documents.c perf/perftest.c jsonsl.c) 28 | IF(CMAKE_MAJOR_VERSION GREATER 2 OR CMAKE_MINOR_VERSION GREATER 8) 29 | ADD_CUSTOM_TARGET(bench 30 | COMMAND $ ${CMAKE_CURRENT_BINARY_DIR}/share/auction 100 31 | COMMAND $ 32 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) 33 | ENDIF() 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2015 M. Nunberg, mnunberg@haskalah.org 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | Doxyfile 2 | LICENSE 3 | MANIFEST 4 | Makefile 5 | README.pod 6 | examples/Makefile 7 | examples/glib-datatypes.c 8 | examples/glib-datatypes.h 9 | json_samples.tgz 10 | jsonsl.c 11 | jsonsl.h 12 | perf/Makefile 13 | perf/bench.c 14 | perf/documents.c 15 | perf/documents.h 16 | perf/perftest.c 17 | srcutil/genchartables.pl 18 | tests/Makefile 19 | tests/jpr_test.c 20 | tests/api_test.c 21 | tests/json_test.c 22 | tests/unescape.c 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | prefix = /opt/local 2 | exec_prefix= ${prefix} 3 | libdir = $(exec_prefix)/lib 4 | 5 | INSTALL = /usr/bin/install -c 6 | 7 | INSTALL_PROGRAM = $(INSTALL) 8 | 9 | ifeq ($(patsubst gcc%,gcc,$(notdir $(basename $(CC)))),gcc) 10 | GCCFLAGS=-ggdb3 11 | endif 12 | 13 | LIBJSONSL_DIR+=$(shell pwd) 14 | LDFLAGS+=-L$(LIBJSONSL_DIR) -Wl,-rpath $(LIBJSONSL_DIR) 15 | CFLAGS+=\ 16 | -Wall -std=gnu89 -pedantic \ 17 | -O3 $(GCCFLAGS) \ 18 | -I$(LIBJSONSL_DIR) -DJSONSL_STATE_GENERIC \ 19 | 20 | CXXFLAGS+=\ 21 | -Wall -std=c++03 -pedantic -O3 -I$(LIBJSONSL_DIR) 22 | 23 | export CFLAGS 24 | export LDFLAGS 25 | export CXXFLAGS 26 | 27 | DYLIBPREFIX=lib 28 | ifeq ($(shell uname -s),Darwin) 29 | DYLIBSUFFIX=.dylib 30 | DYLIBFLAGS=-fPIC -fno-common -dynamiclib -Wl,-install_name,$(libdir)/$(LIB_FQNAME) 31 | else 32 | DYLIBSUFFIX=.so 33 | DYLIBFLAGS=-shared -fPIC 34 | endif 35 | 36 | LIB_BASENAME=jsonsl 37 | LIB_PREFIX?=$(DYLIBPREFIX) 38 | LIB_SUFFIX?=$(DYLIBSUFFIX) 39 | LIB_FQNAME = $(LIB_PREFIX)$(LIB_BASENAME)$(LIB_SUFFIX) 40 | 41 | ifdef STATIC_LIB 42 | LDFLAGS+=$(shell pwd)/$(LIB_FQNAME) 43 | LIBFLAGS=-c 44 | else 45 | LDFLAGS+=-l$(LIB_BASENAME) 46 | LIBFLAGS=$(DYLIBFLAGS) 47 | endif 48 | 49 | ifdef JSONSL_PARSE_NAN 50 | CFLAGS+="-DJSONSL_PARSE_NAN" 51 | endif 52 | 53 | all: $(LIB_FQNAME) 54 | 55 | install: all 56 | $(INSTALL) $(LIB_FQNAME) $(DESTDIR)$(libdir) 57 | 58 | .PHONY: examples 59 | examples: 60 | $(MAKE) -C $@ 61 | 62 | share: json_samples.tgz 63 | tar xzf $^ 64 | 65 | json_examples_tarball: 66 | rm -f json_samples.tgz 67 | tar -czf json_samples.tgz share 68 | 69 | check: $(LIB_FQNAME) share jsonsl.c 70 | JSONSL_QUIET_TESTS=1 $(MAKE) -C tests 71 | 72 | bench: share 73 | $(MAKE) -C perf run-benchmarks 74 | 75 | $(LIB_FQNAME): jsonsl.c 76 | $(CC) $(CFLAGS) $(LIBFLAGS) -o $@ $^ 77 | 78 | .PHONY: doc 79 | 80 | 81 | doc: Doxyfile 82 | doxygen -s $^ 83 | 84 | 85 | .PHONY: clean 86 | clean: 87 | rm -f *.o *.so *.a 88 | rm -f $(LIB_FQNAME) 89 | rm -f -r share 90 | rm -f -r *.dSYM 91 | $(MAKE) -C examples clean 92 | $(MAKE) -C tests clean 93 | $(MAKE) -C perf clean 94 | 95 | distclean: clean 96 | rm -rf share doc *.out 97 | 98 | dist: 99 | -rm -f jsonsl.tar.gz 100 | xargs < MANIFEST tar czf jsonsl.tar.gz 101 | -------------------------------------------------------------------------------- /README.pod: -------------------------------------------------------------------------------- 1 | =begin html 2 | 3 | 4 | 5 | =end html 6 | 7 | =head1 JSONSL 8 | 9 | JSON Stateful (or Simple, or Stacked, or Searchable, or Streaming) Lexer 10 | 11 | =head1 Why another (and yet another) JSON lexer? 12 | 13 | I took inspiration from some of the uses of I, which looked 14 | quite nice, but whose build system seemed unusable, source horribly 15 | mangled, and grown beyond its original design. In other words, I saw 16 | it as a bunch of cruft. 17 | 18 | Instead of bothering to spend a few days figuring out how to use it, 19 | I came to a conclusion that the tasks I needed (simple token 20 | notifications coupled with some kind of state shift detection), I could 21 | do with a simple, small, ANSI C embeddable source file. 22 | 23 | I am still not sure if I provides the featureset of I, but 24 | I'm guessing I've got at least I innovation. 25 | 26 | I 27 | 28 | Inspiration was also taken from Joyent's B project, which 29 | seems to use a similar, embeddable, and simple model. 30 | 31 | Here's a quick featureset 32 | 33 | =over 34 | 35 | =item Stateful 36 | 37 | Maintains state about current descent/recursion/nesting level 38 | Furthermore, you can access information about 'lower' stacks 39 | as long as they are activ. 40 | 41 | =item Decoupling Object Graph from Data 42 | 43 | JSONSL abstracts the object graph from the actual (and usually 44 | more CPU-intensive) work of actually populating higher level 45 | structures such as "hashes" and "arrays" with "decoded" and 46 | "meaningful" values. Using this, one can implement an on-demand 47 | type of conversion. 48 | 49 | =item Callback oriented, selectively 50 | 51 | Invokes callbacks for all sorts of events, but you can control 52 | which kind of events you are interested in receiving without 53 | writing a ton of wrapper stubs 54 | 55 | =item Non-Buffering 56 | 57 | This doesn't buffer, copy, or allocate any data. The only allocation 58 | overhead is during the initialization of the parser, in which the 59 | initial stack structures are initialized 60 | 61 | =item Simple 62 | 63 | Just a C source file, and a corresponding header file. ANSI C. 64 | 65 | While attempts will be made to add functionality and reduce boilerplate 66 | in your code, the core functions are simple and clearly defined. 67 | 68 | Add-ons (see below) are available (and exist in the same jsonsl.c file) 69 | 70 | =item JSONPointer search add-on 71 | 72 | Use L 73 | to query JSON streams as they arrive. Quite efficient, and very simple (see 74 | jpr_test.c for examples) 75 | 76 | =item Unescaping utility add-on 77 | 78 | Includes a nice little function which can flexibly unescape JSON 79 | strings to match your specifications. 80 | 81 | =back 82 | 83 | The rest of this documentation needs work 84 | 85 | =head1 Details 86 | 87 | =head2 Terminology 88 | 89 | Because the JSON spec is quite confusing in its terminology, especially 90 | when we want to map it to a different model, here is a listing of the 91 | terminology used here. 92 | 93 | I will use I, I, I interchangeably. They all 94 | refer to some form of atomic unit as far as JSON is concerned. 95 | 96 | I will use the term I for those things which look like C<{"foo":"bar"}>, 97 | and refer to its contents as I and I 98 | 99 | I will use the term I for those things which look like C<["hello", "byebye"]>, 100 | and their contents as I or I explicitly 101 | 102 | 103 | =head2 Model 104 | 105 | =head3 States 106 | 107 | A state represents a JSON element, this can be a 108 | a hash (C), array (C), hash key 109 | (C), string (C), or a 'special' value (C) 110 | which should be either a numeric value, or one of C. 111 | 112 | A state comprises and maintains the following information 113 | 114 | =over 115 | 116 | =item Type 117 | 118 | This merely states what type it is - as one of the C constants 119 | mentioned above 120 | 121 | =item Positioning 122 | 123 | This contains positioning information mapping the location of the element 124 | as an offset relative to the input stream. When a state begins, its I 125 | position is set. Whenever control returns back to the state, its I 126 | position is updated and set to the point in the stream when the return 127 | occured 128 | 129 | =item Extended Information 130 | 131 | For non-scalar state types, information regarding the number of children 132 | contained is stored. 133 | 134 | =item User Data 135 | 136 | This is a simple void* pointer, and allows you to associate your own data 137 | with a given state 138 | 139 | =back 140 | 141 | =head3 Stack 142 | 143 | A stack consists of multiple states. When a state begins, it is I 144 | to the stack, and when the state terminates, it is I from the stack 145 | and returns control to the previous stack state. 146 | 147 | When a state is popped, the contained information regarding positioning and 148 | children is complete, and it is therefore possible to retrieve the entire 149 | element in its byte-stream. 150 | 151 | Once a state has been popped, it is considered invalid (though it is still 152 | valid during the callback). 153 | 154 | Below is a diagram of a sample JSON stream annotated with stack/state 155 | information. 156 | 157 | Level 0 158 | { 159 | 160 | Level 1 161 | 162 | Level 2 163 | "ABC" 164 | : 165 | Level 2 166 | "XYZ" 167 | , 168 | 169 | Level 1 170 | 171 | [ 172 | Level 2 173 | 174 | { 175 | Level 3 176 | 177 | Level 4 178 | "Foo":"Bar" 179 | 180 | Level 3 181 | } 182 | Level 2 183 | ] 184 | Level 1 185 | } 186 | 187 | =head1 USING 188 | 189 | The header file C contains the API. Read it. 190 | 191 | As an additional note, you can 'extend' the state structure 192 | (thereby eliminating the need to allocate extra pointers for 193 | the C field) by defining the C 194 | macro to expand to additonal struct fields. 195 | 196 | This is assumed as the default behavior - and should work when 197 | you compile your project with C directly. 198 | 199 | If you wish to use the 'generic' mode, make sure to 200 | C<#define> or C<-D> the C macro. 201 | 202 | Some notes regarding usage will follow: 203 | 204 | =head2 Position and Offset Tracking 205 | 206 | The state object contains some C variables. These variables contain the 207 | position relative to the amount of total bytes that the C object has 208 | been fed since creation (or since C) has been called. Thus, in order to 209 | make sense of these variables, you must do one of two things 210 | 211 | =over 212 | 213 | =item Buffer the entire stream (simpler, but not recommended) 214 | 215 | This way, the offsets declared in the C<< jsn->pos >> and C variables can 216 | be directly applied as offsets to the actual buffer. 217 | 218 | Of course this is not the recommended option; since C is a streaming 219 | parser, you are likely using it because you I want to buffer the entire 220 | stream 221 | 222 | =item Note the first valid position in the existing buffer 223 | 224 | This technique requires the user to keep track of the first valid position within 225 | the current buffer. This is useful for tracking the beginnings and ends of strings. 226 | 227 | Typically you will need a simple function or macro and some variables which do 228 | the following: 229 | 230 | 231 | =over 232 | 233 | =item * 234 | 235 | Contain the minimum valid position in the buffer, e.g. C 236 | 237 | This is initially set to 0, and increases as we discard data (see later) 238 | 239 | =item * 240 | 241 | Allow callbacks to request an advancement of the position. This means 242 | that your context object contains a "min_needed" variable. For example, 243 | one might have a C callback for the beginning of a string. The push 244 | callback will set the C variable to the position 245 | of the beginning of the string (i.e. C<< state->pos_begin >>). In a corresponding 246 | C callback, the string is read from an internal buffer (whose first valid 247 | position is no greater than C<< state->pos_begin >>) with a length of 248 | C<< jsn->pos - state->pos_begin >> bytes. Once the string is read, it is 249 | no longer needed, and the callback then updates the C variable 250 | to the parser's C. 251 | 252 | =item * 253 | 254 | After C is called, determine if the input buffer needs to be 255 | adjusted. This means to determine whether the C variable has 256 | been set to something larger than the C variable. If this 257 | condition is true, it means part of the buffer can be discarded. The amount 258 | of bytes to discard from the beginning will be the difference between these 259 | two variables. The length of the buffer also becomes shorted by the difference. 260 | 261 | Once the bytes are discarded (one can use a simple C), 262 | the C variable is set to the C value. 263 | 264 | =item * 265 | 266 | To demonstrate this, let's make a sample structure: 267 | 268 | struct parse_context { 269 | size_t min_needed; 270 | size_t min_available; 271 | 272 | char *buffer; 273 | size_t buffer_len; 274 | } 275 | 276 | The C is the buffer which is available to the callbacks (e.g. by making 277 | this struct be the value of the C field in the C). 278 | 279 | It is possible to write a simple function which will get a slice of the buffer, 280 | given the absolute offsets from the C variable: 281 | 282 | void get_state_buffer(struct parse_context *ctx, struct jsonsl_state_st *state) 283 | { 284 | size_t offset = state->pos_begin - ctx->min_available; 285 | return ctx->buffer + offset; 286 | } 287 | 288 | Of course this function would probably like to do some error checking to ensure 289 | that for example, the C's C is not less than the C 290 | of the C. 291 | 292 | =back 293 | 294 | =back 295 | 296 | =head2 Notes on String States 297 | 298 | It is possible to get the I of a string by getting the difference 299 | between its state's C and the parser's C variable. 300 | it should be noted that the C points to the position of the 301 | opening C<"> (quote) and the C points to the position of the 302 | closing C<"> (quote). Thus to get the actual raw string, one must 303 | increase the buffer pointer and decrease the length. 304 | 305 | The logic may be encapsulated in a macro 306 | 307 | #define NORMALIZE_OFFSETS(buf, len) (buf)++; (len)++; 308 | /* use it */ 309 | char *buf = get_state_buffer(ctx, state); 310 | size_t len = jsn->pos - state->pos_begin; 311 | NORMALIZE_OFFSETS(buf, len); 312 | 313 | Note that care should be taken not to perform this on I like numbers, 314 | booleans, and Cs. 315 | 316 | =head2 Notes on I 317 | 318 | The C implementation is designed to work with a stream and works 319 | very nicely with the callbacks. It relies on the caller incrementally providing 320 | C with information (e.g. via C) about each element 321 | in the JSON tree. 322 | 323 | Interally it builds a graph based on inputs from each item in the JSON tree; 324 | relying on the fact that an item will only be a C or 325 | C if its parent was also a C. 326 | 327 | Thus the C functions must be fed with I data and information. 328 | 329 | In general, items need their I information. The I exists as the following 330 | 331 | =over 332 | 333 | =item Object (dictionary) values are passed with their keys 334 | 335 | This means you must buffer the keys 336 | 337 | =item Array elements are passed with their indices 338 | 339 | C does this for you automatically 340 | 341 | =item Primitives without any children are not passed 342 | 343 | C only makes sense when searching for data I a key. 344 | Passing a primitive (i.e. boolean, number, or non-key string) does not make sense 345 | 346 | =back 347 | 348 | =head2 UNICODE 349 | 350 | While JSONSL does not support unicode directly (it does not 351 | decode \uxxx escapes, nor does it care about any non-ascii 352 | characters), you can compile JSONSL using the C 353 | macro. This will make jsonsl iterate over C characters 354 | instead of the good 'ole C. Of course you would need to 355 | handle processing the stream correctly to make sure the multibyte 356 | stream was complete. 357 | 358 | =head2 NaN, Infinity, -Infinity 359 | 360 | By default, JSONSL does not consider objects like C<{"n": NaN}>, 361 | C<{"n": Infinity}>, or C<{"n": -Infinity}> to be valid. 362 | Compile with C defined to parse these non-numbers. 363 | JSONSL will then execute your POP callback with Cspecial_flags> set 364 | to C when it parses NaN, C for 365 | Infinity, and C for -Infinity. 366 | 367 | =head2 WINDOWS 368 | 369 | JSONSL Now has a visual studio C<.sln> and C<.vcxproj> files in the 370 | C directory. 371 | 372 | If you wish to use JSONSL as a DLL, be sure to define the macro 373 | C which will properly decorate the prototypes with 374 | C<__declspec(dllexport)>. 375 | 376 | You can also run the tests on windows using the C 377 | project. You will need to manually pass in the sample input files to 378 | be tested, however. In the future, I hope to automate this process. 379 | 380 | =head2 API AND ABI STABILITY, AND PACKAGING NOTES 381 | 382 | I will attempt to maintain a stable API, but not a stable ABI. 383 | 384 | The general distribution model of JSONSL (A single source file and a single 385 | header) is designed in such a manner to allow the application to I the 386 | relevant parts in the application. 387 | 388 | For speed benefits it may also be desirable to actually have the C 389 | file embedded in the same translation unit as the user-side code calling into 390 | I itself. In such use cases, the C macro may be defined as 391 | C. 392 | 393 | 394 | =head1 AUTHOR AND COPYRIGHT 395 | 396 | Copyright (C) 2012-2017 Mark Nunberg. 397 | 398 | See C for license information. 399 | -------------------------------------------------------------------------------- /compile-travis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | if [ "${PARSE_NAN}" = "1" ]; then 5 | make JSONSL_PARSE_NAN=1 && make JSONSL_PARSE_NAN=1 check 6 | else 7 | make && make check 8 | fi 9 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | all: glib-datatypes 2 | 3 | CFLAGS+=$(shell pkg-config glib-2.0 --cflags) -I../ 4 | LDFLAGS+=$(shell pkg-config glib-2.0 --libs) 5 | 6 | glib-datatypes: glib-datatypes.c 7 | $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) 8 | 9 | clean: 10 | rm -f glib-datatypes 11 | -------------------------------------------------------------------------------- /examples/glib-datatypes.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "glib-datatypes.h" 7 | 8 | static int MaxOutputLevel = 20; 9 | static int MaxDescentLevel = 20; 10 | static gboolean DumpIncremental = FALSE; 11 | static gboolean DumpProgress = TRUE; 12 | static gboolean DumpCompletion = TRUE; 13 | static gboolean DumpStringContents = TRUE; 14 | static gboolean DumpHashKeys = TRUE; 15 | static gboolean DumpAll = FALSE; 16 | static gboolean Silent = FALSE; 17 | 18 | static void pad_level(int level) { 19 | for (; level > 1; level--) { 20 | putchar(' '); 21 | } 22 | } 23 | 24 | static void dump_hash(struct hash_st *hash, int level); 25 | static void dump_list(struct list_st *list, int level); 26 | static void dump_string(struct string_st *str, int level); 27 | 28 | 29 | static void dump_element(struct element_st *root, int level) 30 | { 31 | char *typestr = "UNKNOWN"; 32 | if (!root) { 33 | return; 34 | } 35 | if (MaxOutputLevel < level) { 36 | return; 37 | } 38 | #define X(t) \ 39 | if (root->type == TYPE_##t) \ 40 | typestr = #t; 41 | _XTYPE_ALL 42 | #undef X 43 | 44 | pad_level(level); 45 | printf("<%s", typestr); 46 | 47 | switch(root->type) { 48 | case TYPE_STRING: 49 | dump_string((struct string_st*)root, level+1); 50 | break; 51 | case TYPE_LIST: 52 | dump_list((struct list_st*)root, level+1); 53 | break; 54 | case TYPE_HASH: 55 | dump_hash((struct hash_st*)root, level+1); 56 | break; 57 | default: 58 | fprintf(stderr, "Unknown type!\n"); 59 | abort(); 60 | break; 61 | } 62 | pad_level(level); 63 | printf("\n", typestr); 64 | } 65 | 66 | static void dump_string(struct string_st *str, int level) 67 | { 68 | printf(" len=\"%lu\">", strlen(str->data)); 69 | if (DumpStringContents) { 70 | printf("%s", str->data); 71 | } 72 | printf("\n"); 73 | } 74 | 75 | static void dump_list(struct list_st *list, int level) 76 | { 77 | GList *node; 78 | printf(">\n"); 79 | for (node = list->data; node; node = node->next) { 80 | dump_element((struct element_st*)node->data, level); 81 | } 82 | } 83 | 84 | static void dump_hash_iterfunc(gpointer key, gpointer value, void *data) 85 | { 86 | int level = *(int*)data; 87 | level++; 88 | printf("\n"); 89 | pad_level(level); 90 | printf("\n"); 95 | dump_element((struct element_st*)value, level+1); 96 | } 97 | 98 | static void dump_hash(struct hash_st *hash, int level) 99 | { 100 | printf(">\n"); 101 | g_hash_table_foreach(hash->data, dump_hash_iterfunc, &level); 102 | } 103 | 104 | static inline 105 | void add_to_hash(struct hash_st *parent, struct element_st *value) 106 | { 107 | assert(parent->pending_key); 108 | g_hash_table_insert(parent->data, (gpointer)parent->pending_key, value); 109 | parent->pending_key = NULL; 110 | } 111 | 112 | static inline 113 | void add_to_list(struct list_st *parent, struct element_st *value) 114 | { 115 | parent->data = g_list_append(parent->data, value); 116 | } 117 | 118 | static inline void 119 | dump_action_state(jsonsl_t jsn, 120 | jsonsl_action_t action, 121 | struct jsonsl_state_st *state) 122 | { 123 | int ii; 124 | size_t pos = (action == JSONSL_ACTION_POP) ? jsn->pos : state->pos_begin; 125 | if (!DumpProgress) { 126 | return; 127 | } 128 | for (ii = 1; ii < state->level; ii++) { 129 | printf(" "); 130 | } 131 | printf("L%d %c%-10s @%lu\n", 132 | state->level, 133 | action, 134 | jsonsl_strtype(state->type), 135 | pos); 136 | } 137 | 138 | static void 139 | create_new_element(jsonsl_t jsn, 140 | jsonsl_action_t action, 141 | struct jsonsl_state_st *state, 142 | const char *buf) 143 | { 144 | struct element_st *child = NULL, *parent = NULL; 145 | struct jsonsl_state_st *last_state = jsonsl_last_state(jsn, state); 146 | parent = (struct element_st*)last_state->data; 147 | 148 | dump_action_state(jsn, action, state); 149 | 150 | switch(state->type) { 151 | case JSONSL_T_SPECIAL: 152 | case JSONSL_T_STRING: { 153 | struct string_st *str = malloc(sizeof(*str)); 154 | str->data = buf; 155 | str->type = TYPE_STRING; 156 | child = (struct element_st*)str; 157 | break; 158 | } 159 | case JSONSL_T_HKEY: { 160 | struct hash_st *hash = (struct hash_st*)parent; 161 | struct string_st *str = malloc(sizeof(*str)); 162 | assert(hash->type == TYPE_HASH); 163 | hash->pending_key = buf; 164 | str->parent = NULL; 165 | str->data = buf; 166 | str->type = TYPE_STRING; 167 | state->data = (struct element_st*)str; 168 | return; /* nothing to do here */ 169 | } 170 | case JSONSL_T_LIST: { 171 | struct list_st *list = malloc(sizeof(*list)); 172 | list->type = TYPE_LIST; 173 | list->data = g_list_alloc(); 174 | child = (struct element_st*)list; 175 | break; 176 | } 177 | case JSONSL_T_OBJECT: { 178 | struct hash_st *hash = malloc(sizeof(*hash)); 179 | hash->type = TYPE_HASH; 180 | hash->data = g_hash_table_new(g_str_hash, g_str_equal); 181 | child = (struct element_st*)hash; 182 | break; 183 | } 184 | default: 185 | fprintf(stderr, "Unhandled type %c\n", state->type); 186 | abort(); 187 | break; 188 | } 189 | 190 | if (parent->type == TYPE_LIST) { 191 | add_to_list((struct list_st*)parent, child); 192 | } else if (parent->type == TYPE_HASH) { 193 | add_to_hash((struct hash_st*)parent, child); 194 | } else { 195 | fprintf(stderr, "Requested to add to non-container parent type!\n"); 196 | abort(); 197 | } 198 | 199 | assert(child); 200 | state->data = child; 201 | } 202 | 203 | static void 204 | cleanup_closing_element(jsonsl_t jsn, 205 | jsonsl_action_t action, 206 | struct jsonsl_state_st *state, 207 | const char *at) 208 | { 209 | /* termination of an element */ 210 | 211 | struct element_st *elem = (struct element_st*)state->data; 212 | struct string_st *str = (struct string_st*)elem; 213 | assert(state); 214 | if (!DumpIncremental) { 215 | dump_action_state(jsn, action, state); 216 | } 217 | 218 | if (elem->type == TYPE_STRING) { 219 | if (*at != '"') { 220 | return; 221 | } 222 | 223 | *(char*)at = '\0'; 224 | str->data++; 225 | } 226 | 227 | if (DumpIncremental) { 228 | struct objgraph_st *objgraph = jsn->data; 229 | printf("Incremental dump at input position %lu\n", jsn->pos); 230 | dump_element(objgraph->root, 0); 231 | } 232 | } 233 | 234 | void nest_callback_initial(jsonsl_t jsn, 235 | jsonsl_action_t action, 236 | struct jsonsl_state_st *state, 237 | const char *at) 238 | { 239 | struct objgraph_st *objgraph = (struct objgraph_st*)(jsn->data); 240 | struct element_st *elem; 241 | dump_action_state(jsn, action, state); 242 | 243 | assert(action == JSONSL_ACTION_PUSH); 244 | if (state->type == JSONSL_T_LIST) { 245 | struct list_st *list = malloc(sizeof(*list)); 246 | list->data = g_list_alloc(); 247 | list->type = TYPE_LIST; 248 | elem = (struct element_st*)list; 249 | } else if (state->type == JSONSL_T_OBJECT) { 250 | struct hash_st *hash = malloc(sizeof(*hash)); 251 | hash->data = g_hash_table_new(g_str_hash, g_str_equal); 252 | hash->type = TYPE_HASH; 253 | hash->pending_key = NULL; 254 | elem = (struct element_st*)hash; 255 | } else { 256 | fprintf(stderr, "Type is neither hash nor list\n"); 257 | abort(); 258 | } 259 | 260 | elem->parent = NULL; 261 | objgraph->root = elem; 262 | state->data = elem; 263 | jsn->action_callback = NULL; 264 | jsn->action_callback_PUSH = create_new_element; 265 | jsn->action_callback_POP = cleanup_closing_element; 266 | } 267 | 268 | int error_callback(jsonsl_t jsn, 269 | jsonsl_error_t err, 270 | struct jsonsl_state_st *state, 271 | char *at) 272 | { 273 | fprintf(stderr, "Got error at pos %lu: %s\n", 274 | jsn->pos, jsonsl_strerror(err)); 275 | printf("Remaining text: %s\n", at); 276 | abort(); 277 | return 0; 278 | } 279 | 280 | 281 | static void parse_one_file(const char *path) 282 | { 283 | int fd, status; 284 | struct stat sb; 285 | jsonsl_t jsn; 286 | struct objgraph_st graph; 287 | char *buf, *bufp; 288 | size_t nread; 289 | 290 | printf("==== %s ====\n", path); 291 | 292 | fd = open(path, O_RDONLY); 293 | if (fd == -1) { 294 | perror(path); 295 | return; 296 | } 297 | 298 | status = fstat(fd, &sb); 299 | assert(status == 0); 300 | assert(sb.st_size < 0x1000000); 301 | buf = malloc(sb.st_size); 302 | 303 | jsn = jsonsl_new(0x1000); 304 | jsonsl_enable_all_callbacks(jsn); 305 | 306 | jsn->action_callback = nest_callback_initial; 307 | jsn->action_callback_PUSH = NULL; 308 | jsn->action_callback_POP = NULL; 309 | jsn->error_callback = error_callback; 310 | jsn->data = &graph; 311 | jsn->max_callback_level = MaxDescentLevel; 312 | 313 | memset(&graph, 0, sizeof(graph)); 314 | 315 | bufp = buf; 316 | while ( (nread = read(fd, bufp, 4096)) > 0) { 317 | jsonsl_feed(jsn, bufp, nread); 318 | bufp += nread; 319 | if (nread < 4096) { 320 | break; 321 | } 322 | } 323 | 324 | if (DumpCompletion) { 325 | dump_element(graph.root, 0); 326 | } 327 | } 328 | 329 | static GOptionEntry CLIOptions[] = { 330 | {"dump-completion", 'C', 0, G_OPTION_ARG_NONE, &DumpCompletion, "Dump graph upon completion", NULL }, 331 | {"dump-incremental", 'i', 0, G_OPTION_ARG_NONE, &DumpIncremental, "Dump graph incrementally", NULL }, 332 | {"output-level", 'L', 0, G_OPTION_ARG_INT, &MaxOutputLevel, "Maximum output level", "LEVEL"}, 333 | {"descent-level", 'R', 0, G_OPTION_ARG_INT, &MaxDescentLevel, "Maximum recursion level for graph", "LEVEL" }, 334 | {"dump-strings", 's', 0, G_OPTION_ARG_NONE, &DumpStringContents, "Display string values", NULL }, 335 | {"dump-keys", 'k', 0, G_OPTION_ARG_NONE, &DumpHashKeys, "Display dictionary keys", NULL }, 336 | {"dump-progresss", 'p', 0, G_OPTION_ARG_NONE, &DumpProgress, "Dump abbreviated progress (incrementally)", NULL }, 337 | {"verbose", 'v', 0, G_OPTION_ARG_NONE, &DumpAll, "Be verbose. dump everything", NULL }, 338 | {"quiet", 'q', 0, G_OPTION_ARG_NONE, &Silent, "Quiet, don't output anything", NULL }, 339 | { NULL } 340 | }; 341 | 342 | int main(int argc, char **argv) { 343 | int ii; 344 | GError *error = NULL; 345 | GOptionContext *context; 346 | 347 | context = g_option_context_new("FILES.."); 348 | g_option_context_add_main_entries(context, CLIOptions, NULL); 349 | if (!g_option_context_parse(context, &argc, &argv, &error)) { 350 | g_print("Option parsing failed: %s\n", error->message); 351 | exit(EXIT_FAILURE); 352 | } 353 | 354 | { 355 | int set_all = -1; 356 | if (DumpAll) { 357 | set_all = 1; 358 | } else if (Silent) { 359 | set_all = 0; 360 | } 361 | if (set_all >= 0) { 362 | DumpIncremental = set_all; 363 | DumpCompletion = set_all; 364 | DumpStringContents = set_all; 365 | DumpHashKeys = set_all; 366 | DumpProgress = set_all; 367 | } 368 | if (DumpIncremental) { 369 | DumpProgress = FALSE; 370 | } 371 | } 372 | 373 | if (argc < 2) { 374 | fprintf(stderr, "USAGE: %s FILES...\n", argv[0]); 375 | exit(EXIT_FAILURE); 376 | } 377 | for (ii = 1; ii < argc && argv[ii]; ii++) { 378 | parse_one_file(argv[ii]); 379 | } 380 | return 0; 381 | } 382 | -------------------------------------------------------------------------------- /examples/glib-datatypes.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define _XTYPE_ALL \ 5 | X(LIST) \ 6 | X(HASH) \ 7 | X(BOOLEAN) \ 8 | X(INTEGER) \ 9 | X(STRING) 10 | 11 | typedef enum { 12 | #define X(t) \ 13 | TYPE_ ##t , 14 | _XTYPE_ALL 15 | TYPE_UNKNOWN 16 | #undef X 17 | } jtype_t; 18 | 19 | 20 | #define ST_ELEMENT_BASE(ptype) \ 21 | jtype_t type; \ 22 | struct element_st *parent; \ 23 | ptype *data; 24 | 25 | struct element_st { ST_ELEMENT_BASE(void) }; 26 | struct string_st { 27 | ST_ELEMENT_BASE(const char) 28 | }; 29 | 30 | struct list_st { 31 | ST_ELEMENT_BASE(GList) 32 | }; 33 | 34 | struct hash_st { 35 | ST_ELEMENT_BASE(GHashTable) 36 | const char *pending_key; 37 | }; 38 | 39 | struct objgraph_st { 40 | struct element_st *root; 41 | }; 42 | -------------------------------------------------------------------------------- /json_samples.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnunberg/jsonsl/684b60f9af68b8c397422e74d0c2dd206de16a2c/json_samples.tgz -------------------------------------------------------------------------------- /jsonsl.c: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2012-2015 Mark Nunberg. 2 | * 3 | * See included LICENSE file for license details. 4 | */ 5 | 6 | #include "jsonsl.h" 7 | #include 8 | #include 9 | 10 | #ifdef JSONSL_USE_METRICS 11 | #define XMETRICS \ 12 | X(STRINGY_INSIGNIFICANT) \ 13 | X(STRINGY_SLOWPATH) \ 14 | X(ALLOWED_WHITESPACE) \ 15 | X(QUOTE_FASTPATH) \ 16 | X(SPECIAL_FASTPATH) \ 17 | X(SPECIAL_WSPOP) \ 18 | X(SPECIAL_SLOWPATH) \ 19 | X(GENERIC) \ 20 | X(STRUCTURAL_TOKEN) \ 21 | X(SPECIAL_SWITCHFIRST) \ 22 | X(STRINGY_CATCH) \ 23 | X(NUMBER_FASTPATH) \ 24 | X(ESCAPES) \ 25 | X(TOTAL) \ 26 | 27 | struct jsonsl_metrics_st { 28 | #define X(m) \ 29 | unsigned long metric_##m; 30 | XMETRICS 31 | #undef X 32 | }; 33 | 34 | static struct jsonsl_metrics_st GlobalMetrics = { 0 }; 35 | static unsigned long GenericCounter[0x100] = { 0 }; 36 | static unsigned long StringyCatchCounter[0x100] = { 0 }; 37 | 38 | #define INCR_METRIC(m) \ 39 | GlobalMetrics.metric_##m++; 40 | 41 | #define INCR_GENERIC(c) \ 42 | INCR_METRIC(GENERIC); \ 43 | GenericCounter[c]++; \ 44 | 45 | #define INCR_STRINGY_CATCH(c) \ 46 | INCR_METRIC(STRINGY_CATCH); \ 47 | StringyCatchCounter[c]++; 48 | 49 | JSONSL_API 50 | void jsonsl_dump_global_metrics(void) 51 | { 52 | int ii; 53 | printf("JSONSL Metrics:\n"); 54 | #define X(m) \ 55 | printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \ 56 | (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100); 57 | XMETRICS 58 | #undef X 59 | printf("Generic Characters:\n"); 60 | for (ii = 0; ii < 0xff; ii++) { 61 | if (GenericCounter[ii]) { 62 | printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]); 63 | } 64 | } 65 | printf("Weird string loop\n"); 66 | for (ii = 0; ii < 0xff; ii++) { 67 | if (StringyCatchCounter[ii]) { 68 | printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]); 69 | } 70 | } 71 | } 72 | 73 | #else 74 | #define INCR_METRIC(m) 75 | #define INCR_GENERIC(c) 76 | #define INCR_STRINGY_CATCH(c) 77 | JSONSL_API 78 | void jsonsl_dump_global_metrics(void) { } 79 | #endif /* JSONSL_USE_METRICS */ 80 | 81 | #define CASE_DIGITS \ 82 | case '1': \ 83 | case '2': \ 84 | case '3': \ 85 | case '4': \ 86 | case '5': \ 87 | case '6': \ 88 | case '7': \ 89 | case '8': \ 90 | case '9': \ 91 | case '0': 92 | 93 | static unsigned extract_special(unsigned); 94 | static int is_special_end(unsigned); 95 | static int is_allowed_whitespace(unsigned); 96 | static int is_allowed_escape(unsigned); 97 | static int is_simple_char(unsigned); 98 | static char get_escape_equiv(unsigned); 99 | 100 | JSONSL_API 101 | jsonsl_t jsonsl_new(int nlevels) 102 | { 103 | unsigned int ii; 104 | struct jsonsl_st * jsn; 105 | 106 | if (nlevels < 2) { 107 | return NULL; 108 | } 109 | 110 | jsn = (struct jsonsl_st *) 111 | calloc(1, sizeof (*jsn) + 112 | ( (nlevels-1) * sizeof (struct jsonsl_state_st) ) 113 | ); 114 | 115 | jsn->levels_max = (unsigned int) nlevels; 116 | jsn->max_callback_level = UINT_MAX; 117 | jsonsl_reset(jsn); 118 | for (ii = 0; ii < jsn->levels_max; ii++) { 119 | jsn->stack[ii].level = ii; 120 | } 121 | return jsn; 122 | } 123 | 124 | JSONSL_API 125 | void jsonsl_reset(jsonsl_t jsn) 126 | { 127 | jsn->tok_last = 0; 128 | jsn->can_insert = 1; 129 | jsn->pos = 0; 130 | jsn->level = 0; 131 | jsn->stopfl = 0; 132 | jsn->in_escape = 0; 133 | jsn->expecting = 0; 134 | } 135 | 136 | JSONSL_API 137 | void jsonsl_destroy(jsonsl_t jsn) 138 | { 139 | if (jsn) { 140 | free(jsn); 141 | } 142 | } 143 | 144 | 145 | #define FASTPARSE_EXHAUSTED 1 146 | #define FASTPARSE_BREAK 0 147 | 148 | /* 149 | * This function is meant to accelerate string parsing, reducing the main loop's 150 | * check if we are indeed a string. 151 | * 152 | * @param jsn the parser 153 | * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position) 154 | * @param[in,out] nbytes_p A pointer to the current size of the buffer 155 | * @return true if all bytes have been exhausted (and thus the main loop can 156 | * return), false if a special character was examined which requires greater 157 | * examination. 158 | */ 159 | static int 160 | jsonsl__str_fastparse(jsonsl_t jsn, 161 | const jsonsl_uchar_t **bytes_p, size_t *nbytes_p) 162 | { 163 | const jsonsl_uchar_t *bytes = *bytes_p; 164 | const jsonsl_uchar_t *end; 165 | for (end = bytes + *nbytes_p; bytes != end; bytes++) { 166 | if ( 167 | #ifdef JSONSL_USE_WCHAR 168 | *bytes >= 0x100 || 169 | #endif /* JSONSL_USE_WCHAR */ 170 | (is_simple_char(*bytes))) { 171 | INCR_METRIC(TOTAL); 172 | INCR_METRIC(STRINGY_INSIGNIFICANT); 173 | } else { 174 | /* Once we're done here, re-calculate the position variables */ 175 | jsn->pos += (bytes - *bytes_p); 176 | *nbytes_p -= (bytes - *bytes_p); 177 | *bytes_p = bytes; 178 | return FASTPARSE_BREAK; 179 | } 180 | } 181 | 182 | /* Once we're done here, re-calculate the position variables */ 183 | jsn->pos += (bytes - *bytes_p); 184 | return FASTPARSE_EXHAUSTED; 185 | } 186 | 187 | /* Functions exactly like str_fastparse, except it also accepts a 'state' 188 | * argument, since the number's value is updated in the state. */ 189 | static int 190 | jsonsl__num_fastparse(jsonsl_t jsn, 191 | const jsonsl_uchar_t **bytes_p, size_t *nbytes_p, 192 | struct jsonsl_state_st *state) 193 | { 194 | int exhausted = 1; 195 | size_t nbytes = *nbytes_p; 196 | const jsonsl_uchar_t *bytes = *bytes_p; 197 | 198 | for (; nbytes; nbytes--, bytes++) { 199 | jsonsl_uchar_t c = *bytes; 200 | if (isdigit(c)) { 201 | INCR_METRIC(TOTAL); 202 | INCR_METRIC(NUMBER_FASTPATH); 203 | state->nelem = (state->nelem * 10) + (c - 0x30); 204 | } else { 205 | exhausted = 0; 206 | break; 207 | } 208 | } 209 | jsn->pos += (*nbytes_p - nbytes); 210 | if (exhausted) { 211 | return FASTPARSE_EXHAUSTED; 212 | } 213 | *nbytes_p = nbytes; 214 | *bytes_p = bytes; 215 | return FASTPARSE_BREAK; 216 | } 217 | 218 | JSONSL_API 219 | void 220 | jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) 221 | { 222 | 223 | #define INVOKE_ERROR(eb) \ 224 | if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \ 225 | goto GT_AGAIN; \ 226 | } \ 227 | return; 228 | 229 | #define STACK_PUSH \ 230 | if (jsn->level >= (levels_max-1)) { \ 231 | jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \ 232 | return; \ 233 | } \ 234 | state = jsn->stack + (++jsn->level); \ 235 | state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \ 236 | state->pos_begin = jsn->pos; 237 | 238 | #define STACK_POP_NOPOS \ 239 | state->pos_cur = jsn->pos; \ 240 | state = jsn->stack + (--jsn->level); 241 | 242 | 243 | #define STACK_POP \ 244 | STACK_POP_NOPOS; \ 245 | state->pos_cur = jsn->pos; 246 | 247 | #define CALLBACK_AND_POP_NOPOS(T) \ 248 | state->pos_cur = jsn->pos; \ 249 | DO_CALLBACK(T, POP); \ 250 | state->nescapes = 0; \ 251 | state = jsn->stack + (--jsn->level); 252 | 253 | #define CALLBACK_AND_POP(T) \ 254 | CALLBACK_AND_POP_NOPOS(T); \ 255 | state->pos_cur = jsn->pos; 256 | 257 | #define SPECIAL_POP \ 258 | CALLBACK_AND_POP(SPECIAL); \ 259 | jsn->expecting = 0; \ 260 | jsn->tok_last = 0; \ 261 | 262 | #define CUR_CHAR (*(jsonsl_uchar_t*)c) 263 | 264 | #define DO_CALLBACK(T, action) \ 265 | if (jsn->call_##T && \ 266 | jsn->max_callback_level > state->level && \ 267 | state->ignore_callback == 0) { \ 268 | \ 269 | if (jsn->action_callback_##action) { \ 270 | jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \ 271 | } else if (jsn->action_callback) { \ 272 | jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \ 273 | } \ 274 | if (jsn->stopfl) { return; } \ 275 | } 276 | 277 | /** 278 | * Verifies that we are able to insert the (non-string) item into a hash. 279 | */ 280 | #define ENSURE_HVAL \ 281 | if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \ 282 | INVOKE_ERROR(HKEY_EXPECTED); \ 283 | } 284 | 285 | #define VERIFY_SPECIAL(lit) \ 286 | if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \ 287 | INVOKE_ERROR(SPECIAL_EXPECTED); \ 288 | } 289 | 290 | #define VERIFY_SPECIAL_CI(lit) \ 291 | if (tolower(CUR_CHAR) != (lit)[jsn->pos - state->pos_begin]) { \ 292 | INVOKE_ERROR(SPECIAL_EXPECTED); \ 293 | } 294 | 295 | #define STATE_SPECIAL_LENGTH \ 296 | (state)->nescapes 297 | 298 | #define IS_NORMAL_NUMBER \ 299 | ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \ 300 | (state)->special_flags == JSONSL_SPECIALf_SIGNED) 301 | 302 | #define STATE_NUM_LAST jsn->tok_last 303 | 304 | #define CONTINUE_NEXT_CHAR() continue 305 | 306 | const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes; 307 | size_t levels_max = jsn->levels_max; 308 | struct jsonsl_state_st *state = jsn->stack + jsn->level; 309 | jsn->base = bytes; 310 | 311 | for (; nbytes; nbytes--, jsn->pos++, c++) { 312 | unsigned state_type; 313 | INCR_METRIC(TOTAL); 314 | 315 | GT_AGAIN: 316 | state_type = state->type; 317 | /* Most common type is typically a string: */ 318 | if (state_type & JSONSL_Tf_STRINGY) { 319 | /* Special escape handling for some stuff */ 320 | if (jsn->in_escape) { 321 | jsn->in_escape = 0; 322 | if (!is_allowed_escape(CUR_CHAR)) { 323 | INVOKE_ERROR(ESCAPE_INVALID); 324 | } else if (CUR_CHAR == 'u') { 325 | DO_CALLBACK(UESCAPE, UESCAPE); 326 | if (jsn->return_UESCAPE) { 327 | return; 328 | } 329 | } 330 | CONTINUE_NEXT_CHAR(); 331 | } 332 | 333 | if (jsonsl__str_fastparse(jsn, &c, &nbytes) == 334 | FASTPARSE_EXHAUSTED) { 335 | /* No need to readjust variables as we've exhausted the iterator */ 336 | return; 337 | } else { 338 | if (CUR_CHAR == '"') { 339 | goto GT_QUOTE; 340 | } else if (CUR_CHAR == '\\') { 341 | goto GT_ESCAPE; 342 | } else { 343 | INVOKE_ERROR(WEIRD_WHITESPACE); 344 | } 345 | } 346 | INCR_METRIC(STRINGY_SLOWPATH); 347 | 348 | } else if (state_type == JSONSL_T_SPECIAL) { 349 | /* Fast track for signed/unsigned */ 350 | if (IS_NORMAL_NUMBER) { 351 | if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) == 352 | FASTPARSE_EXHAUSTED) { 353 | return; 354 | } else { 355 | goto GT_SPECIAL_NUMERIC; 356 | } 357 | } else if (state->special_flags == JSONSL_SPECIALf_DASH) { 358 | #ifdef JSONSL_PARSE_NAN 359 | if (CUR_CHAR == 'I' || CUR_CHAR == 'i') { 360 | /* parsing -Infinity? */ 361 | state->special_flags = JSONSL_SPECIALf_NEG_INF; 362 | CONTINUE_NEXT_CHAR(); 363 | } 364 | #endif 365 | 366 | if (!isdigit(CUR_CHAR)) { 367 | INVOKE_ERROR(INVALID_NUMBER); 368 | } 369 | 370 | if (CUR_CHAR == '0') { 371 | state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED; 372 | } else if (isdigit(CUR_CHAR)) { 373 | state->special_flags = JSONSL_SPECIALf_SIGNED; 374 | state->nelem = CUR_CHAR - 0x30; 375 | } else { 376 | INVOKE_ERROR(INVALID_NUMBER); 377 | } 378 | CONTINUE_NEXT_CHAR(); 379 | 380 | } else if (state->special_flags == JSONSL_SPECIALf_ZERO) { 381 | if (isdigit(CUR_CHAR)) { 382 | /* Following a zero! */ 383 | INVOKE_ERROR(INVALID_NUMBER); 384 | } 385 | /* Unset the 'zero' flag: */ 386 | if (state->special_flags & JSONSL_SPECIALf_SIGNED) { 387 | state->special_flags = JSONSL_SPECIALf_SIGNED; 388 | } else { 389 | state->special_flags = JSONSL_SPECIALf_UNSIGNED; 390 | } 391 | goto GT_SPECIAL_NUMERIC; 392 | } 393 | 394 | if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) && 395 | !(state->special_flags & JSONSL_SPECIALf_INF)) { 396 | GT_SPECIAL_NUMERIC: 397 | switch (CUR_CHAR) { 398 | CASE_DIGITS 399 | STATE_NUM_LAST = '1'; 400 | CONTINUE_NEXT_CHAR(); 401 | 402 | case '.': 403 | if (state->special_flags & JSONSL_SPECIALf_FLOAT) { 404 | INVOKE_ERROR(INVALID_NUMBER); 405 | } 406 | state->special_flags |= JSONSL_SPECIALf_FLOAT; 407 | STATE_NUM_LAST = '.'; 408 | CONTINUE_NEXT_CHAR(); 409 | 410 | case 'e': 411 | case 'E': 412 | if (state->special_flags & JSONSL_SPECIALf_EXPONENT) { 413 | INVOKE_ERROR(INVALID_NUMBER); 414 | } 415 | state->special_flags |= JSONSL_SPECIALf_EXPONENT; 416 | STATE_NUM_LAST = 'e'; 417 | CONTINUE_NEXT_CHAR(); 418 | 419 | case '-': 420 | case '+': 421 | if (STATE_NUM_LAST != 'e') { 422 | INVOKE_ERROR(INVALID_NUMBER); 423 | } 424 | STATE_NUM_LAST = '-'; 425 | CONTINUE_NEXT_CHAR(); 426 | 427 | default: 428 | if (is_special_end(CUR_CHAR)) { 429 | goto GT_SPECIAL_POP; 430 | } 431 | INVOKE_ERROR(INVALID_NUMBER); 432 | break; 433 | } 434 | } 435 | /* else if (!NUMERIC) */ 436 | if (!is_special_end(CUR_CHAR)) { 437 | STATE_SPECIAL_LENGTH++; 438 | 439 | /* Verify TRUE, FALSE, NULL */ 440 | if (state->special_flags == JSONSL_SPECIALf_TRUE) { 441 | VERIFY_SPECIAL("true"); 442 | } else if (state->special_flags == JSONSL_SPECIALf_FALSE) { 443 | VERIFY_SPECIAL("false"); 444 | } else if (state->special_flags == JSONSL_SPECIALf_NULL) { 445 | VERIFY_SPECIAL("null"); 446 | #ifdef JSONSL_PARSE_NAN 447 | } else if (state->special_flags == JSONSL_SPECIALf_POS_INF) { 448 | VERIFY_SPECIAL_CI("infinity"); 449 | } else if (state->special_flags == JSONSL_SPECIALf_NEG_INF) { 450 | VERIFY_SPECIAL_CI("-infinity"); 451 | } else if (state->special_flags == JSONSL_SPECIALf_NAN) { 452 | VERIFY_SPECIAL_CI("nan"); 453 | } else if (state->special_flags & JSONSL_SPECIALf_NULL || 454 | state->special_flags & JSONSL_SPECIALf_NAN) { 455 | /* previous char was "n", are we parsing null or nan? */ 456 | if (CUR_CHAR != 'u') { 457 | state->special_flags &= ~JSONSL_SPECIALf_NULL; 458 | } 459 | 460 | if (tolower(CUR_CHAR) != 'a') { 461 | state->special_flags &= ~JSONSL_SPECIALf_NAN; 462 | } 463 | #endif 464 | } 465 | INCR_METRIC(SPECIAL_FASTPATH); 466 | CONTINUE_NEXT_CHAR(); 467 | } 468 | 469 | GT_SPECIAL_POP: 470 | jsn->can_insert = 0; 471 | if (IS_NORMAL_NUMBER) { 472 | /* Nothing */ 473 | } else if (state->special_flags == JSONSL_SPECIALf_ZERO || 474 | state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) { 475 | /* 0 is unsigned! */ 476 | state->special_flags = JSONSL_SPECIALf_UNSIGNED; 477 | } else if (state->special_flags == JSONSL_SPECIALf_DASH) { 478 | /* Still in dash! */ 479 | INVOKE_ERROR(INVALID_NUMBER); 480 | } else if (state->special_flags & JSONSL_SPECIALf_INF) { 481 | if (STATE_SPECIAL_LENGTH != 8) { 482 | INVOKE_ERROR(SPECIAL_INCOMPLETE); 483 | } 484 | state->nelem = 1; 485 | } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) { 486 | /* Check that we're not at the end of a token */ 487 | if (STATE_NUM_LAST != '1') { 488 | INVOKE_ERROR(INVALID_NUMBER); 489 | } 490 | } else if (state->special_flags == JSONSL_SPECIALf_TRUE) { 491 | if (STATE_SPECIAL_LENGTH != 4) { 492 | INVOKE_ERROR(SPECIAL_INCOMPLETE); 493 | } 494 | state->nelem = 1; 495 | } else if (state->special_flags == JSONSL_SPECIALf_FALSE) { 496 | if (STATE_SPECIAL_LENGTH != 5) { 497 | INVOKE_ERROR(SPECIAL_INCOMPLETE); 498 | } 499 | } else if (state->special_flags == JSONSL_SPECIALf_NULL) { 500 | if (STATE_SPECIAL_LENGTH != 4) { 501 | INVOKE_ERROR(SPECIAL_INCOMPLETE); 502 | } 503 | } 504 | SPECIAL_POP; 505 | jsn->expecting = ','; 506 | if (is_allowed_whitespace(CUR_CHAR)) { 507 | CONTINUE_NEXT_CHAR(); 508 | } 509 | /** 510 | * This works because we have a non-whitespace token 511 | * which is not a special token. If this is a structural 512 | * character then it will be gracefully handled by the 513 | * switch statement. Otherwise it will default to the 'special' 514 | * state again, 515 | */ 516 | goto GT_STRUCTURAL_TOKEN; 517 | } else if (is_allowed_whitespace(CUR_CHAR)) { 518 | INCR_METRIC(ALLOWED_WHITESPACE); 519 | /* So we're not special. Harmless insignificant whitespace 520 | * passthrough 521 | */ 522 | CONTINUE_NEXT_CHAR(); 523 | } else if (extract_special(CUR_CHAR)) { 524 | /* not a string, whitespace, or structural token. must be special */ 525 | goto GT_SPECIAL_BEGIN; 526 | } 527 | 528 | INCR_GENERIC(CUR_CHAR); 529 | 530 | if (CUR_CHAR == '"') { 531 | GT_QUOTE: 532 | jsn->can_insert = 0; 533 | switch (state_type) { 534 | 535 | /* the end of a string or hash key */ 536 | case JSONSL_T_STRING: 537 | CALLBACK_AND_POP(STRING); 538 | CONTINUE_NEXT_CHAR(); 539 | case JSONSL_T_HKEY: 540 | CALLBACK_AND_POP(HKEY); 541 | CONTINUE_NEXT_CHAR(); 542 | 543 | case JSONSL_T_OBJECT: 544 | state->nelem++; 545 | if ( (state->nelem-1) % 2 ) { 546 | /* Odd, this must be a hash value */ 547 | if (jsn->tok_last != ':') { 548 | INVOKE_ERROR(MISSING_TOKEN); 549 | } 550 | jsn->expecting = ','; /* Can't figure out what to expect next */ 551 | jsn->tok_last = 0; 552 | 553 | STACK_PUSH; 554 | state->type = JSONSL_T_STRING; 555 | DO_CALLBACK(STRING, PUSH); 556 | 557 | } else { 558 | /* hash key */ 559 | if (jsn->expecting != '"') { 560 | INVOKE_ERROR(STRAY_TOKEN); 561 | } 562 | jsn->tok_last = 0; 563 | jsn->expecting = ':'; 564 | 565 | STACK_PUSH; 566 | state->type = JSONSL_T_HKEY; 567 | DO_CALLBACK(HKEY, PUSH); 568 | } 569 | CONTINUE_NEXT_CHAR(); 570 | 571 | case JSONSL_T_LIST: 572 | state->nelem++; 573 | STACK_PUSH; 574 | state->type = JSONSL_T_STRING; 575 | jsn->expecting = ','; 576 | jsn->tok_last = 0; 577 | DO_CALLBACK(STRING, PUSH); 578 | CONTINUE_NEXT_CHAR(); 579 | 580 | case JSONSL_T_SPECIAL: 581 | INVOKE_ERROR(STRAY_TOKEN); 582 | break; 583 | 584 | default: 585 | INVOKE_ERROR(STRING_OUTSIDE_CONTAINER); 586 | break; 587 | } /* switch(state->type) */ 588 | } else if (CUR_CHAR == '\\') { 589 | GT_ESCAPE: 590 | INCR_METRIC(ESCAPES); 591 | /* Escape */ 592 | if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) { 593 | INVOKE_ERROR(ESCAPE_OUTSIDE_STRING); 594 | } 595 | state->nescapes++; 596 | jsn->in_escape = 1; 597 | CONTINUE_NEXT_CHAR(); 598 | } /* " or \ */ 599 | 600 | GT_STRUCTURAL_TOKEN: 601 | switch (CUR_CHAR) { 602 | case ':': 603 | INCR_METRIC(STRUCTURAL_TOKEN); 604 | if (jsn->expecting != CUR_CHAR) { 605 | INVOKE_ERROR(STRAY_TOKEN); 606 | } 607 | jsn->tok_last = ':'; 608 | jsn->can_insert = 1; 609 | jsn->expecting = '"'; 610 | CONTINUE_NEXT_CHAR(); 611 | 612 | case ',': 613 | INCR_METRIC(STRUCTURAL_TOKEN); 614 | /** 615 | * The comma is one of the more generic tokens. 616 | * In the context of an OBJECT, the can_insert flag 617 | * should never be set, and no other action is 618 | * necessary. 619 | */ 620 | if (jsn->expecting != CUR_CHAR) { 621 | /* make this branch execute only when we haven't manually 622 | * just placed the ',' in the expecting register. 623 | */ 624 | INVOKE_ERROR(STRAY_TOKEN); 625 | } 626 | 627 | if (state->type == JSONSL_T_OBJECT) { 628 | /* end of hash value, expect a string as a hash key */ 629 | jsn->expecting = '"'; 630 | } else { 631 | jsn->can_insert = 1; 632 | } 633 | 634 | jsn->tok_last = ','; 635 | jsn->expecting = '"'; 636 | CONTINUE_NEXT_CHAR(); 637 | 638 | /* new list or object */ 639 | /* hashes are more common */ 640 | case '{': 641 | case '[': 642 | INCR_METRIC(STRUCTURAL_TOKEN); 643 | if (!jsn->can_insert) { 644 | INVOKE_ERROR(CANT_INSERT); 645 | } 646 | 647 | ENSURE_HVAL; 648 | state->nelem++; 649 | 650 | STACK_PUSH; 651 | /* because the constants match the opening delimiters, we can do this: */ 652 | state->type = CUR_CHAR; 653 | state->nelem = 0; 654 | jsn->can_insert = 1; 655 | if (CUR_CHAR == '{') { 656 | /* If we're a hash, we expect a key first, which is quouted */ 657 | jsn->expecting = '"'; 658 | } 659 | if (CUR_CHAR == JSONSL_T_OBJECT) { 660 | DO_CALLBACK(OBJECT, PUSH); 661 | } else { 662 | DO_CALLBACK(LIST, PUSH); 663 | } 664 | jsn->tok_last = 0; 665 | CONTINUE_NEXT_CHAR(); 666 | 667 | /* closing of list or object */ 668 | case '}': 669 | case ']': 670 | INCR_METRIC(STRUCTURAL_TOKEN); 671 | if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) { 672 | INVOKE_ERROR(TRAILING_COMMA); 673 | } 674 | 675 | jsn->can_insert = 0; 676 | jsn->level--; 677 | jsn->expecting = ','; 678 | jsn->tok_last = 0; 679 | if (CUR_CHAR == ']') { 680 | if (state->type != '[') { 681 | INVOKE_ERROR(BRACKET_MISMATCH); 682 | } 683 | DO_CALLBACK(LIST, POP); 684 | } else { 685 | if (state->type != '{') { 686 | INVOKE_ERROR(BRACKET_MISMATCH); 687 | } else if (state->nelem && state->nelem % 2 != 0) { 688 | INVOKE_ERROR(VALUE_EXPECTED); 689 | } 690 | DO_CALLBACK(OBJECT, POP); 691 | } 692 | state = jsn->stack + jsn->level; 693 | state->pos_cur = jsn->pos; 694 | CONTINUE_NEXT_CHAR(); 695 | 696 | default: 697 | GT_SPECIAL_BEGIN: 698 | /** 699 | * Not a string, not a structural token, and not benign whitespace. 700 | * Technically we should iterate over the character always, but since 701 | * we are not doing full numerical/value decoding anyway (but only hinting), 702 | * we only check upon entry. 703 | */ 704 | if (state->type != JSONSL_T_SPECIAL) { 705 | int special_flags = extract_special(CUR_CHAR); 706 | if (!special_flags) { 707 | /** 708 | * Try to do some heuristics here anyway to figure out what kind of 709 | * error this is. The 'special' case is a fallback scenario anyway. 710 | */ 711 | if (CUR_CHAR == '\0') { 712 | INVOKE_ERROR(FOUND_NULL_BYTE); 713 | } else if (CUR_CHAR < 0x20) { 714 | INVOKE_ERROR(WEIRD_WHITESPACE); 715 | } else { 716 | INVOKE_ERROR(SPECIAL_EXPECTED); 717 | } 718 | } 719 | ENSURE_HVAL; 720 | state->nelem++; 721 | if (!jsn->can_insert) { 722 | INVOKE_ERROR(CANT_INSERT); 723 | } 724 | STACK_PUSH; 725 | state->type = JSONSL_T_SPECIAL; 726 | state->special_flags = special_flags; 727 | STATE_SPECIAL_LENGTH = 1; 728 | 729 | if (special_flags == JSONSL_SPECIALf_UNSIGNED) { 730 | state->nelem = CUR_CHAR - 0x30; 731 | STATE_NUM_LAST = '1'; 732 | } else { 733 | STATE_NUM_LAST = '-'; 734 | state->nelem = 0; 735 | } 736 | DO_CALLBACK(SPECIAL, PUSH); 737 | } 738 | CONTINUE_NEXT_CHAR(); 739 | } 740 | } 741 | } 742 | 743 | JSONSL_API 744 | const char* jsonsl_strerror(jsonsl_error_t err) 745 | { 746 | if (err == JSONSL_ERROR_SUCCESS) { 747 | return "SUCCESS"; 748 | } 749 | #define X(t) \ 750 | if (err == JSONSL_ERROR_##t) \ 751 | return #t; 752 | JSONSL_XERR; 753 | #undef X 754 | return ""; 755 | } 756 | 757 | JSONSL_API 758 | const char *jsonsl_strtype(jsonsl_type_t type) 759 | { 760 | #define X(o,c) \ 761 | if (type == JSONSL_T_##o) \ 762 | return #o; 763 | JSONSL_XTYPE 764 | #undef X 765 | return "UNKNOWN TYPE"; 766 | 767 | } 768 | 769 | /* 770 | * 771 | * JPR/JSONPointer functions 772 | * 773 | * 774 | */ 775 | #ifndef JSONSL_NO_JPR 776 | static 777 | jsonsl_jpr_type_t 778 | populate_component(char *in, 779 | struct jsonsl_jpr_component_st *component, 780 | char **next, 781 | jsonsl_error_t *errp) 782 | { 783 | unsigned long pctval; 784 | char *c = NULL, *outp = NULL, *end = NULL; 785 | size_t input_len; 786 | jsonsl_jpr_type_t ret = JSONSL_PATH_NONE; 787 | 788 | if (*next == NULL || *(*next) == '\0') { 789 | return JSONSL_PATH_NONE; 790 | } 791 | 792 | /* Replace the next / with a NULL */ 793 | *next = strstr(in, "/"); 794 | if (*next != NULL) { 795 | *(*next) = '\0'; /* drop the forward slash */ 796 | input_len = *next - in; 797 | end = *next; 798 | *next += 1; /* next character after the '/' */ 799 | } else { 800 | input_len = strlen(in); 801 | end = in + input_len + 1; 802 | } 803 | 804 | component->pstr = in; 805 | 806 | /* Check for special components of interest */ 807 | if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) { 808 | /* Lone wildcard */ 809 | ret = JSONSL_PATH_WILDCARD; 810 | goto GT_RET; 811 | } else if (isdigit(*in)) { 812 | /* ASCII Numeric */ 813 | char *endptr; 814 | component->idx = strtoul(in, &endptr, 10); 815 | if (endptr && *endptr == '\0') { 816 | ret = JSONSL_PATH_NUMERIC; 817 | goto GT_RET; 818 | } 819 | } 820 | 821 | /* Default, it's a string */ 822 | ret = JSONSL_PATH_STRING; 823 | for (c = outp = in; c < end; c++, outp++) { 824 | char origc; 825 | if (*c != '%') { 826 | goto GT_ASSIGN; 827 | } 828 | /* 829 | * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' } 830 | */ 831 | 832 | /* Need %XX */ 833 | if (c+2 >= end) { 834 | *errp = JSONSL_ERROR_PERCENT_BADHEX; 835 | return JSONSL_PATH_INVALID; 836 | } 837 | if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) { 838 | *errp = JSONSL_ERROR_PERCENT_BADHEX; 839 | return JSONSL_PATH_INVALID; 840 | } 841 | 842 | /* Temporarily null-terminate the characters */ 843 | origc = *(c+3); 844 | *(c+3) = '\0'; 845 | pctval = strtoul(c+1, NULL, 16); 846 | *(c+3) = origc; 847 | 848 | *outp = (char) pctval; 849 | c += 2; 850 | continue; 851 | 852 | GT_ASSIGN: 853 | *outp = *c; 854 | } 855 | /* Null-terminate the string */ 856 | for (; outp < c; outp++) { 857 | *outp = '\0'; 858 | } 859 | 860 | GT_RET: 861 | component->ptype = ret; 862 | if (ret != JSONSL_PATH_WILDCARD) { 863 | component->len = strlen(component->pstr); 864 | } 865 | return ret; 866 | } 867 | 868 | JSONSL_API 869 | jsonsl_jpr_t 870 | jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) 871 | { 872 | char *my_copy = NULL; 873 | int count, curidx; 874 | struct jsonsl_jpr_st *ret = NULL; 875 | struct jsonsl_jpr_component_st *components = NULL; 876 | size_t origlen; 877 | jsonsl_error_t errstacked; 878 | 879 | #define JPR_BAIL(err) *errp = err; goto GT_ERROR; 880 | 881 | if (errp == NULL) { 882 | errp = &errstacked; 883 | } 884 | 885 | if (path == NULL || *path != '/') { 886 | JPR_BAIL(JSONSL_ERROR_JPR_NOROOT); 887 | } 888 | 889 | count = 1; 890 | path++; 891 | { 892 | const char *c = path; 893 | for (; *c; c++) { 894 | if (*c == '/') { 895 | count++; 896 | if (*(c+1) == '/') { 897 | JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH); 898 | } 899 | } 900 | } 901 | } 902 | if(*path) { 903 | count++; 904 | } 905 | 906 | components = (struct jsonsl_jpr_component_st *) 907 | malloc(sizeof(*components) * count); 908 | if (!components) { 909 | JPR_BAIL(JSONSL_ERROR_ENOMEM); 910 | } 911 | 912 | my_copy = (char *)malloc(strlen(path) + 1); 913 | if (!my_copy) { 914 | JPR_BAIL(JSONSL_ERROR_ENOMEM); 915 | } 916 | 917 | strcpy(my_copy, path); 918 | 919 | components[0].ptype = JSONSL_PATH_ROOT; 920 | 921 | if (*my_copy) { 922 | char *cur = my_copy; 923 | int pathret = JSONSL_PATH_STRING; 924 | curidx = 1; 925 | while (curidx < count) { 926 | pathret = populate_component(cur, components + curidx, &cur, errp); 927 | if (pathret > 0) { 928 | curidx++; 929 | } else { 930 | break; 931 | } 932 | } 933 | 934 | if (pathret == JSONSL_PATH_INVALID) { 935 | JPR_BAIL(JSONSL_ERROR_JPR_BADPATH); 936 | } 937 | } else { 938 | curidx = 1; 939 | } 940 | 941 | path--; /*revert path to leading '/' */ 942 | origlen = strlen(path) + 1; 943 | ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret)); 944 | if (!ret) { 945 | JPR_BAIL(JSONSL_ERROR_ENOMEM); 946 | } 947 | ret->orig = (char *)malloc(origlen); 948 | if (!ret->orig) { 949 | JPR_BAIL(JSONSL_ERROR_ENOMEM); 950 | } 951 | ret->components = components; 952 | ret->ncomponents = curidx; 953 | ret->basestr = my_copy; 954 | ret->norig = origlen-1; 955 | strcpy(ret->orig, path); 956 | 957 | return ret; 958 | 959 | GT_ERROR: 960 | free(my_copy); 961 | free(components); 962 | if (ret) { 963 | free(ret->orig); 964 | } 965 | free(ret); 966 | return NULL; 967 | #undef JPR_BAIL 968 | } 969 | 970 | void jsonsl_jpr_destroy(jsonsl_jpr_t jpr) 971 | { 972 | free(jpr->components); 973 | free(jpr->basestr); 974 | free(jpr->orig); 975 | free(jpr); 976 | } 977 | 978 | /** 979 | * Call when there is a possibility of a match, either as a final match or 980 | * as a path within a match 981 | * @param jpr The JPR path 982 | * @param component Component corresponding to the current element 983 | * @param prlevel The level of the *parent* 984 | * @param chtype The type of the child 985 | * @return Match status 986 | */ 987 | static jsonsl_jpr_match_t 988 | jsonsl__match_continue(jsonsl_jpr_t jpr, 989 | const struct jsonsl_jpr_component_st *component, 990 | unsigned prlevel, unsigned chtype) 991 | { 992 | const struct jsonsl_jpr_component_st *next_comp = component + 1; 993 | if (prlevel == jpr->ncomponents - 1) { 994 | /* This is the match. Check the expected type of the match against 995 | * the child */ 996 | if (jpr->match_type == 0 || jpr->match_type == chtype) { 997 | return JSONSL_MATCH_COMPLETE; 998 | } else { 999 | return JSONSL_MATCH_TYPE_MISMATCH; 1000 | } 1001 | } 1002 | if (chtype == JSONSL_T_LIST) { 1003 | if (next_comp->ptype == JSONSL_PATH_NUMERIC) { 1004 | return JSONSL_MATCH_POSSIBLE; 1005 | } else { 1006 | return JSONSL_MATCH_TYPE_MISMATCH; 1007 | } 1008 | } else if (chtype == JSONSL_T_OBJECT) { 1009 | if (next_comp->ptype == JSONSL_PATH_NUMERIC) { 1010 | return JSONSL_MATCH_TYPE_MISMATCH; 1011 | } else { 1012 | return JSONSL_MATCH_POSSIBLE; 1013 | } 1014 | } else { 1015 | return JSONSL_MATCH_TYPE_MISMATCH; 1016 | } 1017 | } 1018 | 1019 | JSONSL_API 1020 | jsonsl_jpr_match_t 1021 | jsonsl_path_match(jsonsl_jpr_t jpr, 1022 | const struct jsonsl_state_st *parent, 1023 | const struct jsonsl_state_st *child, 1024 | const char *key, size_t nkey) 1025 | { 1026 | const struct jsonsl_jpr_component_st *comp; 1027 | if (!parent) { 1028 | /* No parent. Return immediately since it's always a match */ 1029 | return jsonsl__match_continue(jpr, jpr->components, 0, child->type); 1030 | } 1031 | 1032 | comp = jpr->components + parent->level; 1033 | 1034 | /* note that we don't need to verify the type of the match, this is 1035 | * always done through the previous call to jsonsl__match_continue. 1036 | * If we are in a POSSIBLE tree then we can be certain the types (at 1037 | * least at this level) are correct */ 1038 | if (parent->type == JSONSL_T_OBJECT) { 1039 | if (comp->len != nkey || strncmp(key, comp->pstr, nkey) != 0) { 1040 | return JSONSL_MATCH_NOMATCH; 1041 | } 1042 | } else { 1043 | if (comp->idx != parent->nelem - 1) { 1044 | return JSONSL_MATCH_NOMATCH; 1045 | } 1046 | } 1047 | return jsonsl__match_continue(jpr, comp, parent->level, child->type); 1048 | } 1049 | 1050 | JSONSL_API 1051 | jsonsl_jpr_match_t 1052 | jsonsl_jpr_match(jsonsl_jpr_t jpr, 1053 | unsigned int parent_type, 1054 | unsigned int parent_level, 1055 | const char *key, 1056 | size_t nkey) 1057 | { 1058 | /* find our current component. This is the child level */ 1059 | int cmpret; 1060 | struct jsonsl_jpr_component_st *p_component; 1061 | p_component = jpr->components + parent_level; 1062 | 1063 | if (parent_level >= jpr->ncomponents) { 1064 | return JSONSL_MATCH_NOMATCH; 1065 | } 1066 | 1067 | /* Lone query for 'root' element. Always matches */ 1068 | if (parent_level == 0) { 1069 | if (jpr->ncomponents == 1) { 1070 | return JSONSL_MATCH_COMPLETE; 1071 | } else { 1072 | return JSONSL_MATCH_POSSIBLE; 1073 | } 1074 | } 1075 | 1076 | /* Wildcard, always matches */ 1077 | if (p_component->ptype == JSONSL_PATH_WILDCARD) { 1078 | if (parent_level == jpr->ncomponents-1) { 1079 | return JSONSL_MATCH_COMPLETE; 1080 | } else { 1081 | return JSONSL_MATCH_POSSIBLE; 1082 | } 1083 | } 1084 | 1085 | /* Check numeric array index. This gets its special block so we can avoid 1086 | * string comparisons */ 1087 | if (p_component->ptype == JSONSL_PATH_NUMERIC) { 1088 | if (parent_type == JSONSL_T_LIST) { 1089 | if (p_component->idx != nkey) { 1090 | /* Wrong index */ 1091 | return JSONSL_MATCH_NOMATCH; 1092 | } else { 1093 | if (parent_level == jpr->ncomponents-1) { 1094 | /* This is the last element of the path */ 1095 | return JSONSL_MATCH_COMPLETE; 1096 | } else { 1097 | /* Intermediate element */ 1098 | return JSONSL_MATCH_POSSIBLE; 1099 | } 1100 | } 1101 | } else if (p_component->is_arridx) { 1102 | /* Numeric and an array index (set explicitly by user). But not 1103 | * a list for a parent */ 1104 | return JSONSL_MATCH_TYPE_MISMATCH; 1105 | } 1106 | } else if (parent_type == JSONSL_T_LIST) { 1107 | return JSONSL_MATCH_TYPE_MISMATCH; 1108 | } 1109 | 1110 | /* Check lengths */ 1111 | if (p_component->len != nkey) { 1112 | return JSONSL_MATCH_NOMATCH; 1113 | } 1114 | 1115 | /* Check string comparison */ 1116 | cmpret = strncmp(p_component->pstr, key, nkey); 1117 | if (cmpret == 0) { 1118 | if (parent_level == jpr->ncomponents-1) { 1119 | return JSONSL_MATCH_COMPLETE; 1120 | } else { 1121 | return JSONSL_MATCH_POSSIBLE; 1122 | } 1123 | } 1124 | 1125 | return JSONSL_MATCH_NOMATCH; 1126 | } 1127 | 1128 | JSONSL_API 1129 | void jsonsl_jpr_match_state_init(jsonsl_t jsn, 1130 | jsonsl_jpr_t *jprs, 1131 | size_t njprs) 1132 | { 1133 | size_t ii, *firstjmp; 1134 | if (njprs == 0) { 1135 | return; 1136 | } 1137 | jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs); 1138 | jsn->jpr_count = njprs; 1139 | jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max); 1140 | memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs); 1141 | /* Set the initial jump table values */ 1142 | 1143 | firstjmp = jsn->jpr_root; 1144 | for (ii = 0; ii < njprs; ii++) { 1145 | firstjmp[ii] = ii+1; 1146 | } 1147 | } 1148 | 1149 | JSONSL_API 1150 | void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn) 1151 | { 1152 | if (jsn->jpr_count == 0) { 1153 | return; 1154 | } 1155 | 1156 | free(jsn->jpr_root); 1157 | free(jsn->jprs); 1158 | jsn->jprs = NULL; 1159 | jsn->jpr_root = NULL; 1160 | jsn->jpr_count = 0; 1161 | } 1162 | 1163 | /** 1164 | * This function should be called exactly once on each element... 1165 | * This should also be called in recursive order, since we rely 1166 | * on the parent having been initalized for a match. 1167 | * 1168 | * Since the parent is checked for a match as well, we maintain a 'serial' counter. 1169 | * Whenever we traverse an element, we expect the serial to be the same as a global 1170 | * integer. If they do not match, we re-initialize the context, and set the serial. 1171 | * 1172 | * This ensures a type of consistency without having a proactive reset by the 1173 | * main lexer itself. 1174 | * 1175 | */ 1176 | JSONSL_API 1177 | jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn, 1178 | struct jsonsl_state_st *state, 1179 | const char *key, 1180 | size_t nkey, 1181 | jsonsl_jpr_match_t *out) 1182 | { 1183 | struct jsonsl_state_st *parent_state; 1184 | jsonsl_jpr_t ret = NULL; 1185 | 1186 | /* Jump and JPR tables for our own state and the parent state */ 1187 | size_t *jmptable, *pjmptable; 1188 | size_t jmp_cur, ii, ourjmpidx; 1189 | 1190 | if (!jsn->jpr_root) { 1191 | *out = JSONSL_MATCH_NOMATCH; 1192 | return NULL; 1193 | } 1194 | 1195 | pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1)); 1196 | jmptable = pjmptable + jsn->jpr_count; 1197 | 1198 | /* If the parent cannot match, then invalidate it */ 1199 | if (*pjmptable == 0) { 1200 | *jmptable = 0; 1201 | *out = JSONSL_MATCH_NOMATCH; 1202 | return NULL; 1203 | } 1204 | 1205 | parent_state = jsn->stack + state->level - 1; 1206 | 1207 | if (parent_state->type == JSONSL_T_LIST) { 1208 | nkey = (size_t) parent_state->nelem; 1209 | } 1210 | 1211 | *jmptable = 0; 1212 | ourjmpidx = 0; 1213 | memset(jmptable, 0, sizeof(int) * jsn->jpr_count); 1214 | 1215 | for (ii = 0; ii < jsn->jpr_count; ii++) { 1216 | jmp_cur = pjmptable[ii]; 1217 | if (jmp_cur) { 1218 | jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1]; 1219 | *out = jsonsl_jpr_match(jpr, 1220 | parent_state->type, 1221 | parent_state->level, 1222 | key, nkey); 1223 | if (*out == JSONSL_MATCH_COMPLETE) { 1224 | ret = jpr; 1225 | *jmptable = 0; 1226 | return ret; 1227 | } else if (*out == JSONSL_MATCH_POSSIBLE) { 1228 | jmptable[ourjmpidx] = ii+1; 1229 | ourjmpidx++; 1230 | } 1231 | } else { 1232 | break; 1233 | } 1234 | } 1235 | if (!*jmptable) { 1236 | *out = JSONSL_MATCH_NOMATCH; 1237 | } 1238 | return NULL; 1239 | } 1240 | 1241 | JSONSL_API 1242 | const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match) 1243 | { 1244 | #define X(T,v) \ 1245 | if ( match == JSONSL_MATCH_##T ) \ 1246 | return #T; 1247 | JSONSL_XMATCH 1248 | #undef X 1249 | return ""; 1250 | } 1251 | 1252 | #endif /* JSONSL_WITH_JPR */ 1253 | 1254 | static char * 1255 | jsonsl__writeutf8(uint32_t pt, char *out) 1256 | { 1257 | #define ADD_OUTPUT(c) *out = (char)(c); out++; 1258 | 1259 | if (pt < 0x80) { 1260 | ADD_OUTPUT(pt); 1261 | } else if (pt < 0x800) { 1262 | ADD_OUTPUT((pt >> 6) | 0xC0); 1263 | ADD_OUTPUT((pt & 0x3F) | 0x80); 1264 | } else if (pt < 0x10000) { 1265 | ADD_OUTPUT((pt >> 12) | 0xE0); 1266 | ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80); 1267 | ADD_OUTPUT((pt & 0x3F) | 0x80); 1268 | } else { 1269 | ADD_OUTPUT((pt >> 18) | 0xF0); 1270 | ADD_OUTPUT(((pt >> 12) & 0x3F) | 0x80); 1271 | ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80); 1272 | ADD_OUTPUT((pt & 0x3F) | 0x80); 1273 | } 1274 | return out; 1275 | #undef ADD_OUTPUT 1276 | } 1277 | 1278 | /* Thanks snej (https://github.com/mnunberg/jsonsl/issues/9) */ 1279 | static int 1280 | jsonsl__digit2int(char ch) { 1281 | int d = ch - '0'; 1282 | if ((unsigned) d < 10) { 1283 | return d; 1284 | } 1285 | d = ch - 'a'; 1286 | if ((unsigned) d < 6) { 1287 | return d + 10; 1288 | } 1289 | d = ch - 'A'; 1290 | if ((unsigned) d < 6) { 1291 | return d + 10; 1292 | } 1293 | return -1; 1294 | } 1295 | 1296 | /* Assume 's' is at least 4 bytes long */ 1297 | static int 1298 | jsonsl__get_uescape_16(const char *s) 1299 | { 1300 | int ret = 0; 1301 | int cur; 1302 | 1303 | #define GET_DIGIT(off) \ 1304 | cur = jsonsl__digit2int(s[off]); \ 1305 | if (cur == -1) { return -1; } \ 1306 | ret |= (cur << (12 - (off * 4))); 1307 | 1308 | GET_DIGIT(0); 1309 | GET_DIGIT(1); 1310 | GET_DIGIT(2); 1311 | GET_DIGIT(3); 1312 | #undef GET_DIGIT 1313 | return ret; 1314 | } 1315 | 1316 | /** 1317 | * Utility function to convert escape sequences 1318 | */ 1319 | JSONSL_API 1320 | size_t jsonsl_util_unescape_ex(const char *in, 1321 | char *out, 1322 | size_t len, 1323 | const int toEscape[128], 1324 | unsigned *oflags, 1325 | jsonsl_error_t *err, 1326 | const char **errat) 1327 | { 1328 | const unsigned char *c = (const unsigned char*)in; 1329 | char *begin_p = out; 1330 | unsigned oflags_s; 1331 | uint16_t last_codepoint = 0; 1332 | 1333 | if (!oflags) { 1334 | oflags = &oflags_s; 1335 | } 1336 | *oflags = 0; 1337 | 1338 | #define UNESCAPE_BAIL(e,offset) \ 1339 | *err = JSONSL_ERROR_##e; \ 1340 | if (errat) { \ 1341 | *errat = (const char*)(c+ (ptrdiff_t)(offset)); \ 1342 | } \ 1343 | return 0; 1344 | 1345 | for (; len; len--, c++, out++) { 1346 | int uescval; 1347 | if (*c != '\\') { 1348 | /* Not an escape, so we don't care about this */ 1349 | goto GT_ASSIGN; 1350 | } 1351 | 1352 | if (len < 2) { 1353 | UNESCAPE_BAIL(ESCAPE_INVALID, 0); 1354 | } 1355 | if (!is_allowed_escape(c[1])) { 1356 | UNESCAPE_BAIL(ESCAPE_INVALID, 1) 1357 | } 1358 | if ((toEscape && toEscape[(unsigned char)c[1] & 0x7f] == 0 && 1359 | c[1] != '\\' && c[1] != '"')) { 1360 | /* if we don't want to unescape this string, write the escape sequence to the output */ 1361 | *out++ = *c++; 1362 | --len; 1363 | goto GT_ASSIGN; 1364 | } 1365 | 1366 | if (c[1] != 'u') { 1367 | /* simple skip-and-replace using pre-defined maps. 1368 | * TODO: should the maps actually reflect the desired 1369 | * replacement character in toEscape? 1370 | */ 1371 | char esctmp = get_escape_equiv(c[1]); 1372 | if (esctmp) { 1373 | /* Check if there is a corresponding replacement */ 1374 | *out = esctmp; 1375 | } else { 1376 | /* Just gobble up the 'reverse-solidus' */ 1377 | *out = c[1]; 1378 | } 1379 | len--; 1380 | c++; 1381 | /* do not assign, just continue */ 1382 | continue; 1383 | } 1384 | 1385 | /* next == 'u' */ 1386 | if (len < 6) { 1387 | /* Need at least six characters.. */ 1388 | UNESCAPE_BAIL(UESCAPE_TOOSHORT, 2); 1389 | } 1390 | 1391 | uescval = jsonsl__get_uescape_16((const char *)c + 2); 1392 | if (uescval == -1) { 1393 | UNESCAPE_BAIL(PERCENT_BADHEX, -1); 1394 | } 1395 | 1396 | if (last_codepoint) { 1397 | uint16_t w1 = last_codepoint, w2 = (uint16_t)uescval; 1398 | uint32_t cp; 1399 | 1400 | if (uescval < 0xDC00 || uescval > 0xDFFF) { 1401 | UNESCAPE_BAIL(INVALID_CODEPOINT, -1); 1402 | } 1403 | 1404 | cp = (w1 & 0x3FF) << 10; 1405 | cp |= (w2 & 0x3FF); 1406 | cp += 0x10000; 1407 | 1408 | out = jsonsl__writeutf8(cp, out) - 1; 1409 | last_codepoint = 0; 1410 | 1411 | } else if (uescval < 0xD800 || uescval > 0xDFFF) { 1412 | *oflags |= JSONSL_SPECIALf_NONASCII; 1413 | out = jsonsl__writeutf8(uescval, out) - 1; 1414 | 1415 | } else if (uescval < 0xDC00) { 1416 | *oflags |= JSONSL_SPECIALf_NONASCII; 1417 | last_codepoint = (uint16_t)uescval; 1418 | out--; 1419 | } else { 1420 | UNESCAPE_BAIL(INVALID_CODEPOINT, 2); 1421 | } 1422 | 1423 | /* Post uescape cleanup */ 1424 | len -= 5; /* Gobble up 5 chars after 'u' */ 1425 | c += 5; 1426 | continue; 1427 | 1428 | /* Only reached by previous branches */ 1429 | GT_ASSIGN: 1430 | *out = *c; 1431 | } 1432 | 1433 | if (last_codepoint) { 1434 | *err = JSONSL_ERROR_INVALID_CODEPOINT; 1435 | return 0; 1436 | } 1437 | 1438 | *err = JSONSL_ERROR_SUCCESS; 1439 | return out - begin_p; 1440 | } 1441 | 1442 | /** 1443 | * Character Table definitions. 1444 | * These were all generated via srcutil/genchartables.pl 1445 | */ 1446 | 1447 | /** 1448 | * This table contains the beginnings of non-string 1449 | * allowable (bareword) values. 1450 | */ 1451 | static unsigned short Special_Table[0x100] = { 1452 | /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ 1453 | /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */ 1454 | /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */ 1455 | /* 0x2e */ 0,0, /* 0x2f */ 1456 | /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */ 1457 | /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */ 1458 | /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */ 1459 | /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */ 1460 | /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */ 1461 | /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */ 1462 | /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */ 1463 | /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */ 1464 | /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */ 1465 | /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */ 1466 | /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x48 */ 1467 | /* 0x49 */ JSONSL__INF_PROXY /* */, /* 0x49 */ 1468 | /* 0x4a */ 0,0,0,0, /* 0x4d */ 1469 | /* 0x4e */ JSONSL__NAN_PROXY /* */, /* 0x4e */ 1470 | /* 0x4f */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */ 1471 | /* 0x66 */ JSONSL_SPECIALf_FALSE /* */, /* 0x66 */ 1472 | /* 0x67 */ 0,0, /* 0x68 */ 1473 | /* 0x69 */ JSONSL__INF_PROXY /* */, /* 0x69 */ 1474 | /* 0x6a */ 0,0,0,0, /* 0x6d */ 1475 | /* 0x6e */ JSONSL_SPECIALf_NULL|JSONSL__NAN_PROXY /* */, /* 0x6e */ 1476 | /* 0x6f */ 0,0,0,0,0, /* 0x73 */ 1477 | /* 0x74 */ JSONSL_SPECIALf_TRUE /* */, /* 0x74 */ 1478 | /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */ 1479 | /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */ 1480 | /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */ 1481 | /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */ 1482 | /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */ 1483 | }; 1484 | 1485 | /** 1486 | * Contains characters which signal the termination of any of the 'special' bareword 1487 | * values. 1488 | */ 1489 | static int Special_Endings[0x100] = { 1490 | /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */ 1491 | /* 0x09 */ 1 /* */, /* 0x09 */ 1492 | /* 0x0a */ 1 /* */, /* 0x0a */ 1493 | /* 0x0b */ 0,0, /* 0x0c */ 1494 | /* 0x0d */ 1 /* */, /* 0x0d */ 1495 | /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ 1496 | /* 0x20 */ 1 /* */, /* 0x20 */ 1497 | /* 0x21 */ 0, /* 0x21 */ 1498 | /* 0x22 */ 1 /* " */, /* 0x22 */ 1499 | /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */ 1500 | /* 0x2c */ 1 /* , */, /* 0x2c */ 1501 | /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */ 1502 | /* 0x3a */ 1 /* : */, /* 0x3a */ 1503 | /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */ 1504 | /* 0x5b */ 1 /* [ */, /* 0x5b */ 1505 | /* 0x5c */ 1 /* \ */, /* 0x5c */ 1506 | /* 0x5d */ 1 /* ] */, /* 0x5d */ 1507 | /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */ 1508 | /* 0x7b */ 1 /* { */, /* 0x7b */ 1509 | /* 0x7c */ 0, /* 0x7c */ 1510 | /* 0x7d */ 1 /* } */, /* 0x7d */ 1511 | /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */ 1512 | /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */ 1513 | /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */ 1514 | /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */ 1515 | /* 0xfe */ 0 /* 0xfe */ 1516 | }; 1517 | 1518 | /** 1519 | * This table contains entries for the allowed whitespace as per RFC 4627 1520 | */ 1521 | static int Allowed_Whitespace[0x100] = { 1522 | /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */ 1523 | /* 0x09 */ 1 /* */, /* 0x09 */ 1524 | /* 0x0a */ 1 /* */, /* 0x0a */ 1525 | /* 0x0b */ 0,0, /* 0x0c */ 1526 | /* 0x0d */ 1 /* */, /* 0x0d */ 1527 | /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ 1528 | /* 0x20 */ 1 /* */, /* 0x20 */ 1529 | /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */ 1530 | /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */ 1531 | /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */ 1532 | /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */ 1533 | /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */ 1534 | /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */ 1535 | /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */ 1536 | }; 1537 | 1538 | static const int String_No_Passthrough[0x100] = { 1539 | /* 0x00 */ 1 /* */, /* 0x00 */ 1540 | /* 0x01 */ 1 /* */, /* 0x01 */ 1541 | /* 0x02 */ 1 /* */, /* 0x02 */ 1542 | /* 0x03 */ 1 /* */, /* 0x03 */ 1543 | /* 0x04 */ 1 /* */, /* 0x04 */ 1544 | /* 0x05 */ 1 /* */, /* 0x05 */ 1545 | /* 0x06 */ 1 /* */, /* 0x06 */ 1546 | /* 0x07 */ 1 /* */, /* 0x07 */ 1547 | /* 0x08 */ 1 /* */, /* 0x08 */ 1548 | /* 0x09 */ 1 /* */, /* 0x09 */ 1549 | /* 0x0a */ 1 /* */, /* 0x0a */ 1550 | /* 0x0b */ 1 /* */, /* 0x0b */ 1551 | /* 0x0c */ 1 /* */, /* 0x0c */ 1552 | /* 0x0d */ 1 /* */, /* 0x0d */ 1553 | /* 0x0e */ 1 /* */, /* 0x0e */ 1554 | /* 0x0f */ 1 /* */, /* 0x0f */ 1555 | /* 0x10 */ 1 /* */, /* 0x10 */ 1556 | /* 0x11 */ 1 /* */, /* 0x11 */ 1557 | /* 0x12 */ 1 /* */, /* 0x12 */ 1558 | /* 0x13 */ 1 /* */, /* 0x13 */ 1559 | /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ 1560 | /* 0x22 */ 1 /* <"> */, /* 0x22 */ 1561 | /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ 1562 | /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ 1563 | /* 0x5c */ 1 /* <\> */, /* 0x5c */ 1564 | /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ 1565 | /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ 1566 | /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ 1567 | /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ 1568 | /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ 1569 | /* 0xfd */ 0,0, /* 0xfe */ 1570 | }; 1571 | 1572 | /** 1573 | * Allowable two-character 'common' escapes: 1574 | */ 1575 | static int Allowed_Escapes[0x100] = { 1576 | /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ 1577 | /* 0x20 */ 0,0, /* 0x21 */ 1578 | /* 0x22 */ 1 /* <"> */, /* 0x22 */ 1579 | /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */ 1580 | /* 0x2f */ 1 /* */, /* 0x2f */ 1581 | /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */ 1582 | /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ 1583 | /* 0x5c */ 1 /* <\> */, /* 0x5c */ 1584 | /* 0x5d */ 0,0,0,0,0, /* 0x61 */ 1585 | /* 0x62 */ 1 /* */, /* 0x62 */ 1586 | /* 0x63 */ 0,0,0, /* 0x65 */ 1587 | /* 0x66 */ 1 /* */, /* 0x66 */ 1588 | /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */ 1589 | /* 0x6e */ 1 /* */, /* 0x6e */ 1590 | /* 0x6f */ 0,0,0, /* 0x71 */ 1591 | /* 0x72 */ 1 /* */, /* 0x72 */ 1592 | /* 0x73 */ 0, /* 0x73 */ 1593 | /* 0x74 */ 1 /* */, /* 0x74 */ 1594 | /* 0x75 */ 1 /* */, /* 0x75 */ 1595 | /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */ 1596 | /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */ 1597 | /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */ 1598 | /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */ 1599 | /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */ 1600 | }; 1601 | 1602 | /** 1603 | * This table contains the _values_ for a given (single) escaped character. 1604 | */ 1605 | static unsigned char Escape_Equivs[0x100] = { 1606 | /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ 1607 | /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */ 1608 | /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */ 1609 | /* 0x60 */ 0,0, /* 0x61 */ 1610 | /* 0x62 */ 8 /* */, /* 0x62 */ 1611 | /* 0x63 */ 0,0,0, /* 0x65 */ 1612 | /* 0x66 */ 12 /* */, /* 0x66 */ 1613 | /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */ 1614 | /* 0x6e */ 10 /* */, /* 0x6e */ 1615 | /* 0x6f */ 0,0,0, /* 0x71 */ 1616 | /* 0x72 */ 13 /* */, /* 0x72 */ 1617 | /* 0x73 */ 0, /* 0x73 */ 1618 | /* 0x74 */ 9 /* */, /* 0x74 */ 1619 | /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */ 1620 | /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */ 1621 | /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */ 1622 | /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */ 1623 | /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */ 1624 | }; 1625 | 1626 | /* Definitions of above-declared static functions */ 1627 | static char get_escape_equiv(unsigned c) { 1628 | return Escape_Equivs[c & 0xff]; 1629 | } 1630 | static unsigned extract_special(unsigned c) { 1631 | return Special_Table[c & 0xff]; 1632 | } 1633 | static int is_special_end(unsigned c) { 1634 | return Special_Endings[c & 0xff]; 1635 | } 1636 | static int is_allowed_whitespace(unsigned c) { 1637 | return c == ' ' || Allowed_Whitespace[c & 0xff]; 1638 | } 1639 | static int is_allowed_escape(unsigned c) { 1640 | return Allowed_Escapes[c & 0xff]; 1641 | } 1642 | static int is_simple_char(unsigned c) { 1643 | return !String_No_Passthrough[c & 0xff]; 1644 | } 1645 | 1646 | /* Clean up all our macros! */ 1647 | #undef INCR_METRIC 1648 | #undef INCR_GENERIC 1649 | #undef INCR_STRINGY_CATCH 1650 | #undef CASE_DIGITS 1651 | #undef INVOKE_ERROR 1652 | #undef STACK_PUSH 1653 | #undef STACK_POP_NOPOS 1654 | #undef STACK_POP 1655 | #undef CALLBACK_AND_POP_NOPOS 1656 | #undef CALLBACK_AND_POP 1657 | #undef SPECIAL_POP 1658 | #undef CUR_CHAR 1659 | #undef DO_CALLBACK 1660 | #undef ENSURE_HVAL 1661 | #undef VERIFY_SPECIAL 1662 | #undef STATE_SPECIAL_LENGTH 1663 | #undef IS_NORMAL_NUMBER 1664 | #undef STATE_NUM_LAST 1665 | #undef FASTPARSE_EXHAUSTED 1666 | #undef FASTPARSE_BREAK 1667 | -------------------------------------------------------------------------------- /jsonsl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * JSON Simple/Stacked/Stateful Lexer. 3 | * - Does not buffer data 4 | * - Maintains state 5 | * - Callback oriented 6 | * - Lightweight and fast. One source file and one header file 7 | * 8 | * Copyright (C) 2012-2015 Mark Nunberg 9 | * See included LICENSE file for license details. 10 | */ 11 | 12 | #ifndef JSONSL_H_ 13 | #define JSONSL_H_ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif /* __cplusplus */ 25 | 26 | #ifdef JSONSL_USE_WCHAR 27 | typedef jsonsl_char_t wchar_t; 28 | typedef jsonsl_uchar_t unsigned wchar_t; 29 | #else 30 | typedef char jsonsl_char_t; 31 | typedef unsigned char jsonsl_uchar_t; 32 | #endif /* JSONSL_USE_WCHAR */ 33 | 34 | #ifdef JSONSL_PARSE_NAN 35 | #define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN 36 | #define JSONSL__INF_PROXY JSONSL_SPECIALf_INF 37 | #else 38 | #define JSONSL__NAN_PROXY 0 39 | #define JSONSL__INF_PROXY 0 40 | #endif 41 | 42 | /* Stolen from http-parser.h, and possibly others */ 43 | #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) 44 | typedef __int8 int8_t; 45 | typedef unsigned __int8 uint8_t; 46 | typedef __int16 int16_t; 47 | typedef unsigned __int16 uint16_t; 48 | typedef __int32 int32_t; 49 | typedef unsigned __int32 uint32_t; 50 | typedef __int64 int64_t; 51 | typedef unsigned __int64 uint64_t; 52 | #if !defined(_MSC_VER) || _MSC_VER<1400 53 | typedef unsigned int size_t; 54 | typedef int ssize_t; 55 | #endif 56 | #else 57 | #include 58 | #endif 59 | 60 | 61 | #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS)) 62 | #define JSONSL_STATE_GENERIC 63 | #endif /* !defined JSONSL_STATE_GENERIC */ 64 | 65 | #ifdef JSONSL_STATE_GENERIC 66 | #define JSONSL_STATE_USER_FIELDS 67 | #endif /* JSONSL_STATE_GENERIC */ 68 | 69 | /* Additional fields for component object */ 70 | #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS 71 | #define JSONSL_JPR_COMPONENT_USER_FIELDS 72 | #endif 73 | 74 | #ifndef JSONSL_API 75 | /** 76 | * We require a /DJSONSL_DLL so that users already using this as a static 77 | * or embedded library don't get confused 78 | */ 79 | #if defined(_WIN32) && defined(JSONSL_DLL) 80 | #define JSONSL_API __declspec(dllexport) 81 | #else 82 | #define JSONSL_API 83 | #endif /* _WIN32 */ 84 | 85 | #endif /* !JSONSL_API */ 86 | 87 | #ifndef JSONSL_INLINE 88 | #if defined(_MSC_VER) 89 | #define JSONSL_INLINE __inline 90 | #elif defined(__GNUC__) 91 | #define JSONSL_INLINE __inline__ 92 | #else 93 | #define JSONSL_INLINE inline 94 | #endif /* _MSC_VER or __GNUC__ */ 95 | #endif /* JSONSL_INLINE */ 96 | 97 | #define JSONSL_MAX_LEVELS 512 98 | 99 | struct jsonsl_st; 100 | typedef struct jsonsl_st *jsonsl_t; 101 | 102 | typedef struct jsonsl_jpr_st* jsonsl_jpr_t; 103 | 104 | /** 105 | * This flag is true when AND'd against a type whose value 106 | * must be in "quoutes" i.e. T_HKEY and T_STRING 107 | */ 108 | #define JSONSL_Tf_STRINGY 0xffff00 109 | 110 | /** 111 | * Constant representing the special JSON types. 112 | * The values are special and aid in speed (the OBJECT and LIST 113 | * values are the char literals of their openings). 114 | * 115 | * Their actual value is a character which attempts to resemble 116 | * some mnemonic reference to the actual type. 117 | * 118 | * If new types are added, they must fit into the ASCII printable 119 | * range (so they should be AND'd with 0x7f and yield something 120 | * meaningful) 121 | */ 122 | #define JSONSL_XTYPE \ 123 | X(STRING, '"'|JSONSL_Tf_STRINGY) \ 124 | X(HKEY, '#'|JSONSL_Tf_STRINGY) \ 125 | X(OBJECT, '{') \ 126 | X(LIST, '[') \ 127 | X(SPECIAL, '^') \ 128 | X(UESCAPE, 'u') 129 | typedef enum { 130 | #define X(o, c) \ 131 | JSONSL_T_##o = c, 132 | JSONSL_XTYPE 133 | JSONSL_T_UNKNOWN = '?', 134 | /* Abstract 'root' object */ 135 | JSONSL_T_ROOT = 0 136 | #undef X 137 | } jsonsl_type_t; 138 | 139 | /** 140 | * Subtypes for T_SPECIAL. We define them as flags 141 | * because more than one type can be applied to a 142 | * given object. 143 | */ 144 | 145 | #define JSONSL_XSPECIAL \ 146 | X(NONE, 0) \ 147 | X(SIGNED, 1<<0) \ 148 | X(UNSIGNED, 1<<1) \ 149 | X(TRUE, 1<<2) \ 150 | X(FALSE, 1<<3) \ 151 | X(NULL, 1<<4) \ 152 | X(FLOAT, 1<<5) \ 153 | X(EXPONENT, 1<<6) \ 154 | X(NONASCII, 1<<7) \ 155 | X(NAN, 1<<8) \ 156 | X(INF, 1<<9) 157 | typedef enum { 158 | #define X(o,b) \ 159 | JSONSL_SPECIALf_##o = b, 160 | JSONSL_XSPECIAL 161 | #undef X 162 | /* Handy flags for checking */ 163 | 164 | JSONSL_SPECIALf_UNKNOWN = 1 << 10, 165 | 166 | /** @private Private */ 167 | JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED, 168 | /** @private */ 169 | JSONSL_SPECIALf_DASH = 1 << 12, 170 | /** @private */ 171 | JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF), 172 | JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED), 173 | 174 | /** Type is numeric */ 175 | JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED), 176 | 177 | /** Type is a boolean */ 178 | JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE), 179 | 180 | /** Type is an "extended", not integral type (but numeric) */ 181 | JSONSL_SPECIALf_NUMNOINT = 182 | (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN 183 | |JSONSL_SPECIALf_INF) 184 | } jsonsl_special_t; 185 | 186 | 187 | /** 188 | * These are the various types of stack (or other) events 189 | * which will trigger a callback. 190 | * Like the type constants, this are also mnemonic 191 | */ 192 | #define JSONSL_XACTION \ 193 | X(PUSH, '+') \ 194 | X(POP, '-') \ 195 | X(UESCAPE, 'U') \ 196 | X(ERROR, '!') 197 | typedef enum { 198 | #define X(a,c) \ 199 | JSONSL_ACTION_##a = c, 200 | JSONSL_XACTION 201 | JSONSL_ACTION_UNKNOWN = '?' 202 | #undef X 203 | } jsonsl_action_t; 204 | 205 | 206 | /** 207 | * Various errors which may be thrown while parsing JSON 208 | */ 209 | #define JSONSL_XERR \ 210 | /* Trailing garbage characters */ \ 211 | X(GARBAGE_TRAILING) \ 212 | /* We were expecting a 'special' (numeric, true, false, null) */ \ 213 | X(SPECIAL_EXPECTED) \ 214 | /* The 'special' value was incomplete */ \ 215 | X(SPECIAL_INCOMPLETE) \ 216 | /* Found a stray token */ \ 217 | X(STRAY_TOKEN) \ 218 | /* We were expecting a token before this one */ \ 219 | X(MISSING_TOKEN) \ 220 | /* Cannot insert because the container is not ready */ \ 221 | X(CANT_INSERT) \ 222 | /* Found a '\' outside a string */ \ 223 | X(ESCAPE_OUTSIDE_STRING) \ 224 | /* Found a ':' outside of a hash */ \ 225 | X(KEY_OUTSIDE_OBJECT) \ 226 | /* found a string outside of a container */ \ 227 | X(STRING_OUTSIDE_CONTAINER) \ 228 | /* Found a null byte in middle of string */ \ 229 | X(FOUND_NULL_BYTE) \ 230 | /* Current level exceeds limit specified in constructor */ \ 231 | X(LEVELS_EXCEEDED) \ 232 | /* Got a } as a result of an opening [ or vice versa */ \ 233 | X(BRACKET_MISMATCH) \ 234 | /* We expected a key, but got something else instead */ \ 235 | X(HKEY_EXPECTED) \ 236 | /* We got an illegal control character (bad whitespace or something) */ \ 237 | X(WEIRD_WHITESPACE) \ 238 | /* Found a \u-escape, but there were less than 4 following hex digits */ \ 239 | X(UESCAPE_TOOSHORT) \ 240 | /* Invalid two-character escape */ \ 241 | X(ESCAPE_INVALID) \ 242 | /* Trailing comma */ \ 243 | X(TRAILING_COMMA) \ 244 | /* An invalid number was passed in a numeric field */ \ 245 | X(INVALID_NUMBER) \ 246 | /* Value is missing for object */ \ 247 | X(VALUE_EXPECTED) \ 248 | /* The following are for JPR Stuff */ \ 249 | \ 250 | /* Found a literal '%' but it was only followed by a single valid hex digit */ \ 251 | X(PERCENT_BADHEX) \ 252 | /* jsonpointer URI is malformed '/' */ \ 253 | X(JPR_BADPATH) \ 254 | /* Duplicate slash */ \ 255 | X(JPR_DUPSLASH) \ 256 | /* No leading root */ \ 257 | X(JPR_NOROOT) \ 258 | /* Allocation failure */ \ 259 | X(ENOMEM) \ 260 | /* Invalid unicode codepoint detected (in case of escapes) */ \ 261 | X(INVALID_CODEPOINT) 262 | 263 | typedef enum { 264 | JSONSL_ERROR_SUCCESS = 0, 265 | #define X(e) \ 266 | JSONSL_ERROR_##e, 267 | JSONSL_XERR 268 | #undef X 269 | JSONSL_ERROR_GENERIC 270 | } jsonsl_error_t; 271 | 272 | 273 | /** 274 | * A state is a single level of the stack. 275 | * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section) 276 | * will remain in tact until the item is popped. 277 | * 278 | * As a result, it means a parent state object may be accessed from a child 279 | * object, (the parents fields will all be valid). This allows a user to create 280 | * an ad-hoc hierarchy on top of the JSON one. 281 | * 282 | */ 283 | struct jsonsl_state_st { 284 | /** 285 | * The JSON object type 286 | */ 287 | unsigned type; 288 | 289 | /** If this element is special, then its extended type is here */ 290 | unsigned special_flags; 291 | 292 | /** 293 | * The position (in terms of number of bytes since the first call to 294 | * jsonsl_feed()) at which the state was first pushed. This includes 295 | * opening tokens, if applicable. 296 | * 297 | * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will 298 | * be the position of the first quote. 299 | * 300 | * @see jsonsl_st::pos which contains the _current_ position and can be 301 | * used during a POP callback to get the length of the element. 302 | */ 303 | size_t pos_begin; 304 | 305 | /**FIXME: This is redundant as the same information can be derived from 306 | * jsonsl_st::pos at pop-time */ 307 | size_t pos_cur; 308 | 309 | /** 310 | * Level of recursion into nesting. This is mainly a convenience 311 | * variable, as this can technically be deduced from the lexer's 312 | * level parameter (though the logic is not that simple) 313 | */ 314 | unsigned int level; 315 | 316 | 317 | /** 318 | * how many elements in the object/list. 319 | * For objects (hashes), an element is either 320 | * a key or a value. Thus for one complete pair, 321 | * nelem will be 2. 322 | * 323 | * For special types, this will hold the sum of the digits. 324 | * This only holds true for values which are simple signed/unsigned 325 | * numbers. Otherwise a special flag is set, and extra handling is not 326 | * performed. 327 | */ 328 | uint64_t nelem; 329 | 330 | 331 | 332 | /*TODO: merge this and special_flags into a union */ 333 | 334 | 335 | /** 336 | * Useful for an opening nest, this will prevent a callback from being 337 | * invoked on this item or any of its children 338 | */ 339 | int ignore_callback; 340 | 341 | /** 342 | * Counter which is incremented each time an escape ('\') is encountered. 343 | * This is used internally for non-string types and should only be 344 | * inspected by the user if the state actually represents a string 345 | * type. 346 | */ 347 | unsigned int nescapes; 348 | 349 | /** 350 | * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then 351 | * the macro expansion happens here. 352 | * 353 | * You can use these fields to store hierarchical or 'tagging' information 354 | * for specific objects. 355 | * 356 | * See the documentation above for the lifetime of the state object (i.e. 357 | * if the private data points to allocated memory, it should be freed 358 | * when the object is popped, as the state object will be re-used) 359 | */ 360 | #ifndef JSONSL_STATE_GENERIC 361 | JSONSL_STATE_USER_FIELDS 362 | #else 363 | 364 | /** 365 | * Otherwise, this is a simple void * pointer for anything you want 366 | */ 367 | void *data; 368 | #endif /* JSONSL_STATE_USER_FIELDS */ 369 | }; 370 | 371 | /**Gets the number of elements in the list. 372 | * @param st The state. Must be of type JSONSL_T_LIST 373 | * @return number of elements in the list 374 | */ 375 | #define JSONSL_LIST_SIZE(st) ((st)->nelem) 376 | 377 | /**Gets the number of key-value pairs in an object 378 | * @param st The state. Must be of type JSONSL_T_OBJECT 379 | * @return the number of key-value pairs in the object 380 | */ 381 | #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2) 382 | 383 | /**Gets the numeric value. 384 | * @param st The state. Must be of type JSONSL_T_SPECIAL and 385 | * special_flags must have the JSONSL_SPECIALf_NUMERIC flag 386 | * set. 387 | * @return the numeric value of the state. 388 | */ 389 | #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem) 390 | 391 | /* 392 | * So now we need some special structure for keeping the 393 | * JPR info in sync. Preferrably all in a single block 394 | * of memory (there's no need for separate allocations. 395 | * So we will define a 'table' with the following layout 396 | * 397 | * Level nPosbl JPR1_last JPR2_last JPR3_last 398 | * 399 | * 0 1 NOMATCH POSSIBLE POSSIBLE 400 | * 1 0 NOMATCH NOMATCH COMPLETE 401 | * [ table ends here because no further path is possible] 402 | * 403 | * Where the JPR..n corresponds to the number of JPRs 404 | * requested, and nPosble is a quick flag to determine 405 | * 406 | * the number of possibilities. In the future this might 407 | * be made into a proper 'jump' table, 408 | * 409 | * Since we always mark JPRs from the higher levels descending 410 | * into the lower ones, a prospective child match would first 411 | * look at the parent table to check the possibilities, and then 412 | * see which ones were possible.. 413 | * 414 | * Thus, the size of this blob would be (and these are all ints here) 415 | * nLevels * nJPR * 2. 416 | * 417 | * the 'Width' of the table would be nJPR*2, and the 'height' would be 418 | * nlevels 419 | */ 420 | 421 | /** 422 | * This is called when a stack change ocurs. 423 | * 424 | * @param jsn The lexer 425 | * @param action The type of action, this can be PUSH or POP 426 | * @param state A pointer to the stack currently affected by the action 427 | * @param at A pointer to the position of the input buffer which triggered 428 | * this action. 429 | */ 430 | typedef void (*jsonsl_stack_callback)( 431 | jsonsl_t jsn, 432 | jsonsl_action_t action, 433 | struct jsonsl_state_st* state, 434 | const jsonsl_char_t *at); 435 | 436 | 437 | /** 438 | * This is called when an error is encountered. 439 | * Sometimes it's possible to 'erase' characters (by replacing them 440 | * with whitespace). If you think you have corrected the error, you 441 | * can return a true value, in which case the parser will backtrack 442 | * and try again. 443 | * 444 | * @param jsn The lexer 445 | * @param error The error which was thrown 446 | * @param state the current state 447 | * @param a pointer to the position of the input buffer which triggered 448 | * the error. Note that this is not const, this is because you have the 449 | * possibility of modifying the character in an attempt to correct the 450 | * error 451 | * 452 | * @return zero to bail, nonzero to try again (this only makes sense if 453 | * the input buffer has been modified by this callback) 454 | */ 455 | typedef int (*jsonsl_error_callback)( 456 | jsonsl_t jsn, 457 | jsonsl_error_t error, 458 | struct jsonsl_state_st* state, 459 | jsonsl_char_t *at); 460 | 461 | struct jsonsl_st { 462 | /** Public, read-only */ 463 | 464 | /** This is the current level of the stack */ 465 | unsigned int level; 466 | 467 | /** Flag set to indicate we should stop processing */ 468 | unsigned int stopfl; 469 | 470 | /** 471 | * This is the current position, relative to the beginning 472 | * of the stream. 473 | */ 474 | size_t pos; 475 | 476 | /** This is the 'bytes' variable passed to feed() */ 477 | const jsonsl_char_t *base; 478 | 479 | /** Callback invoked for PUSH actions */ 480 | jsonsl_stack_callback action_callback_PUSH; 481 | 482 | /** Callback invoked for POP actions */ 483 | jsonsl_stack_callback action_callback_POP; 484 | 485 | /** Default callback for any action, if neither PUSH or POP callbacks are defined */ 486 | jsonsl_stack_callback action_callback; 487 | 488 | /** 489 | * Do not invoke callbacks for objects deeper than this level. 490 | * NOTE: This field establishes the lower bound for ignored callbacks, 491 | * and is thus misnamed. `min_ignore_level` would actually make more 492 | * sense, but we don't want to break API. 493 | */ 494 | unsigned int max_callback_level; 495 | 496 | /** The error callback. Invoked when an error happens. Should not be NULL */ 497 | jsonsl_error_callback error_callback; 498 | 499 | /* these are boolean flags you can modify. You will be called 500 | * about notification for each of these types if the corresponding 501 | * variable is true. 502 | */ 503 | 504 | /** 505 | * @name Callback Booleans. 506 | * These determine whether a callback is to be invoked for certain types of objects 507 | * @{*/ 508 | 509 | /** Boolean flag to enable or disable the invokcation for events on this type*/ 510 | int call_SPECIAL; 511 | int call_OBJECT; 512 | int call_LIST; 513 | int call_STRING; 514 | int call_HKEY; 515 | /*@}*/ 516 | 517 | /** 518 | * @name u-Escape handling 519 | * Special handling for the \\u-f00d type sequences. These are meant 520 | * to be translated back into the corresponding octet(s). 521 | * A special callback (if set) is invoked with *at=='u'. An application 522 | * may wish to temporarily suspend parsing and handle the 'u-' sequence 523 | * internally (or not). 524 | */ 525 | 526 | /*@{*/ 527 | 528 | /** Callback to be invoked for a u-escape */ 529 | jsonsl_stack_callback action_callback_UESCAPE; 530 | 531 | /** Boolean flag, whether to invoke the callback */ 532 | int call_UESCAPE; 533 | 534 | /** Boolean flag, whether we should return after encountering a u-escape: 535 | * the callback is invoked and then we return if this is true 536 | */ 537 | int return_UESCAPE; 538 | /*@}*/ 539 | 540 | struct { 541 | int allow_trailing_comma; 542 | } options; 543 | 544 | /** Put anything here */ 545 | void *data; 546 | 547 | /*@{*/ 548 | /** Private */ 549 | int in_escape; 550 | char expecting; 551 | char tok_last; 552 | int can_insert; 553 | unsigned int levels_max; 554 | 555 | #ifndef JSONSL_NO_JPR 556 | size_t jpr_count; 557 | jsonsl_jpr_t *jprs; 558 | 559 | /* Root pointer for JPR matching information */ 560 | size_t *jpr_root; 561 | #endif /* JSONSL_NO_JPR */ 562 | /*@}*/ 563 | 564 | /** 565 | * This is the stack. Its upper bound is levels_max, or the 566 | * nlevels argument passed to jsonsl_new. If you modify this structure, 567 | * make sure that this member is last. 568 | */ 569 | struct jsonsl_state_st stack[1]; 570 | }; 571 | 572 | 573 | /** 574 | * Creates a new lexer object, with capacity for recursion up to nlevels 575 | * 576 | * @param nlevels maximum recursion depth 577 | */ 578 | JSONSL_API 579 | jsonsl_t jsonsl_new(int nlevels); 580 | 581 | /** 582 | * Feeds data into the lexer. 583 | * 584 | * @param jsn the lexer object 585 | * @param bytes new data to be fed 586 | * @param nbytes size of new data 587 | */ 588 | JSONSL_API 589 | void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes); 590 | 591 | /** 592 | * Resets the internal parser state. This does not free the parser 593 | * but does clean it internally, so that the next time feed() is called, 594 | * it will be treated as a new stream 595 | * 596 | * @param jsn the lexer 597 | */ 598 | JSONSL_API 599 | void jsonsl_reset(jsonsl_t jsn); 600 | 601 | /** 602 | * Frees the lexer, cleaning any allocated memory taken 603 | * 604 | * @param jsn the lexer 605 | */ 606 | JSONSL_API 607 | void jsonsl_destroy(jsonsl_t jsn); 608 | 609 | /** 610 | * Gets the 'parent' element, given the current one 611 | * 612 | * @param jsn the lexer 613 | * @param cur the current nest, which should be a struct jsonsl_nest_st 614 | */ 615 | static JSONSL_INLINE 616 | struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn, 617 | const struct jsonsl_state_st *state) 618 | { 619 | /* Don't complain about overriding array bounds */ 620 | if (state->level > 1) { 621 | return jsn->stack + state->level - 1; 622 | } else { 623 | return NULL; 624 | } 625 | } 626 | 627 | /** 628 | * Gets the state of the last fully consumed child of this parent. This is 629 | * only valid in the parent's POP callback. 630 | * 631 | * @param the lexer 632 | * @return A pointer to the child. 633 | */ 634 | static JSONSL_INLINE 635 | struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn, 636 | const struct jsonsl_state_st *parent) 637 | { 638 | return jsn->stack + (parent->level + 1); 639 | } 640 | 641 | /**Call to instruct the parser to stop parsing and return. This is valid 642 | * only from within a callback */ 643 | static JSONSL_INLINE 644 | void jsonsl_stop(jsonsl_t jsn) 645 | { 646 | jsn->stopfl = 1; 647 | } 648 | 649 | /** 650 | * This enables receiving callbacks on all events. Doesn't do 651 | * anything special but helps avoid some boilerplate. 652 | * This does not touch the UESCAPE callbacks or flags. 653 | */ 654 | static JSONSL_INLINE 655 | void jsonsl_enable_all_callbacks(jsonsl_t jsn) 656 | { 657 | jsn->call_HKEY = 1; 658 | jsn->call_STRING = 1; 659 | jsn->call_OBJECT = 1; 660 | jsn->call_SPECIAL = 1; 661 | jsn->call_LIST = 1; 662 | } 663 | 664 | /** 665 | * A macro which returns true if the current state object can 666 | * have children. This means a list type or an object type. 667 | */ 668 | #define JSONSL_STATE_IS_CONTAINER(state) \ 669 | (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST) 670 | 671 | /** 672 | * These two functions, dump a string representation 673 | * of the error or type, respectively. They will never 674 | * return NULL 675 | */ 676 | JSONSL_API 677 | const char* jsonsl_strerror(jsonsl_error_t err); 678 | JSONSL_API 679 | const char* jsonsl_strtype(jsonsl_type_t jt); 680 | 681 | /** 682 | * Dumps global metrics to the screen. This is a noop unless 683 | * jsonsl was compiled with JSONSL_USE_METRICS 684 | */ 685 | JSONSL_API 686 | void jsonsl_dump_global_metrics(void); 687 | 688 | /* This macro just here for editors to do code folding */ 689 | #ifndef JSONSL_NO_JPR 690 | 691 | /** 692 | * @name JSON Pointer API 693 | * 694 | * JSONPointer API. This isn't really related to the lexer (at least not yet) 695 | * JSONPointer provides an extremely simple specification for providing 696 | * locations within JSON objects. We will extend it a bit and allow for 697 | * providing 'wildcard' characters by which to be able to 'query' the stream. 698 | * 699 | * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00 700 | * 701 | * Currently I'm implementing the 'single query' API which can only use a single 702 | * query component. In the future I will integrate my yet-to-be-published 703 | * Boyer-Moore-esque prefix searching implementation, in order to allow 704 | * multiple paths to be merged into one for quick and efficient searching. 705 | * 706 | * 707 | * JPR (as we'll refer to it within the source) can be used by splitting 708 | * the components into mutliple sections, and incrementally 'track' each 709 | * component. When JSONSL delivers a 'pop' callback for a string, or a 'push' 710 | * callback for an object, we will check to see whether the index matching 711 | * the component corresponding to the current level contains a match 712 | * for our path. 713 | * 714 | * In order to do this properly, a structure must be maintained within the 715 | * parent indicating whether its children are possible matches. This flag 716 | * will be 'inherited' by call children which may conform to the match 717 | * specification, and discarded by all which do not (thereby eliminating 718 | * their children from inheriting it). 719 | * 720 | * A successful match is a complete one. One can provide multiple paths with 721 | * multiple levels of matches e.g. 722 | * /foo/bar/baz/^/blah 723 | * 724 | * @{ 725 | */ 726 | 727 | /** The wildcard character */ 728 | #ifndef JSONSL_PATH_WILDCARD_CHAR 729 | #define JSONSL_PATH_WILDCARD_CHAR '^' 730 | #endif /* WILDCARD_CHAR */ 731 | 732 | #define JSONSL_XMATCH \ 733 | X(COMPLETE,1) \ 734 | X(POSSIBLE,0) \ 735 | X(NOMATCH,-1) \ 736 | X(TYPE_MISMATCH, -2) 737 | 738 | typedef enum { 739 | 740 | #define X(T,v) \ 741 | JSONSL_MATCH_##T = v, 742 | JSONSL_XMATCH 743 | 744 | #undef X 745 | JSONSL_MATCH_UNKNOWN 746 | } jsonsl_jpr_match_t; 747 | 748 | typedef enum { 749 | JSONSL_PATH_STRING = 1, 750 | JSONSL_PATH_WILDCARD, 751 | JSONSL_PATH_NUMERIC, 752 | JSONSL_PATH_ROOT, 753 | 754 | /* Special */ 755 | JSONSL_PATH_INVALID = -1, 756 | JSONSL_PATH_NONE = 0 757 | } jsonsl_jpr_type_t; 758 | 759 | struct jsonsl_jpr_component_st { 760 | /** The string the component points to */ 761 | char *pstr; 762 | /** if this is a numeric type, the number is 'cached' here */ 763 | unsigned long idx; 764 | /** The length of the string */ 765 | size_t len; 766 | /** The type of component (NUMERIC or STRING) */ 767 | jsonsl_jpr_type_t ptype; 768 | 769 | /** Set this to true to enforce type checking between dict keys and array 770 | * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects 771 | * that an array index is actually a child of a dictionary. */ 772 | short is_arridx; 773 | 774 | /* Extra fields (for more advanced searches. Default is empty) */ 775 | JSONSL_JPR_COMPONENT_USER_FIELDS 776 | }; 777 | 778 | struct jsonsl_jpr_st { 779 | /** Path components */ 780 | struct jsonsl_jpr_component_st *components; 781 | size_t ncomponents; 782 | 783 | /**Type of the match to be expected. If nonzero, will be compared against 784 | * the actual type */ 785 | unsigned match_type; 786 | 787 | /** Base of allocated string for components */ 788 | char *basestr; 789 | 790 | /** The original match string. Useful for returning to the user */ 791 | char *orig; 792 | size_t norig; 793 | }; 794 | 795 | /** 796 | * Create a new JPR object. 797 | * 798 | * @param path the JSONPointer path specification. 799 | * @param errp a pointer to a jsonsl_error_t. If this function returns NULL, 800 | * then more details will be in this variable. 801 | * 802 | * @return a new jsonsl_jpr_t object, or NULL on error. 803 | */ 804 | JSONSL_API 805 | jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp); 806 | 807 | /** 808 | * Destroy a JPR object 809 | */ 810 | JSONSL_API 811 | void jsonsl_jpr_destroy(jsonsl_jpr_t jpr); 812 | 813 | /** 814 | * Match a JSON object against a type and specific level 815 | * 816 | * @param jpr the JPR object 817 | * @param parent_type the type of the parent (should be T_LIST or T_OBJECT) 818 | * @param parent_level the level of the parent 819 | * @param key the 'key' of the child. If the parent is an array, this should be 820 | * empty. 821 | * @param nkey - the length of the key. If the parent is an array (T_LIST), then 822 | * this should be the current index. 823 | * 824 | * NOTE: The key of the child means any kind of associative data related to the 825 | * element. Thus: <<< { "foo" : [ >>, 826 | * the opening array's key is "foo". 827 | * 828 | * @return a status constant. This indicates whether a match was excluded, possible, 829 | * or successful. 830 | */ 831 | JSONSL_API 832 | jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr, 833 | unsigned int parent_type, 834 | unsigned int parent_level, 835 | const char *key, size_t nkey); 836 | 837 | /** 838 | * Alternate matching algorithm. This matching algorithm does not use 839 | * JSONPointer but relies on a more structured searching mechanism. It 840 | * assumes that there is a clear distinction between array indices and 841 | * object keys. In this case, the jsonsl_path_component_st::ptype should 842 | * be set to @ref JSONSL_PATH_NUMERIC for an array index (the 843 | * jsonsl_path_comonent_st::is_arridx field will be removed in a future 844 | * version). 845 | * 846 | * @param jpr The path 847 | * @param parent The parent structure. Can be NULL if this is the root object 848 | * @param child The child structure. Should not be NULL 849 | * @param key Object key, if an object 850 | * @param nkey Length of object key 851 | * @return Status constant if successful 852 | * 853 | * @note 854 | * For successful matching, both the key and the path itself should be normalized 855 | * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This 856 | * should currently be done in the application. Another version of this function 857 | * may use a temporary buffer in such circumstances (allocated by the application). 858 | * 859 | * Since this function also checks the state of the child, it should only 860 | * be called on PUSH callbacks, and not POP callbacks 861 | */ 862 | JSONSL_API 863 | jsonsl_jpr_match_t 864 | jsonsl_path_match(jsonsl_jpr_t jpr, 865 | const struct jsonsl_state_st *parent, 866 | const struct jsonsl_state_st *child, 867 | const char *key, size_t nkey); 868 | 869 | 870 | /** 871 | * Associate a set of JPR objects with a lexer instance. 872 | * This should be called before the lexer has been fed any data (and 873 | * behavior is undefined if you don't adhere to this). 874 | * 875 | * After using this function, you may subsequently call match_state() on 876 | * given states (presumably from within the callbacks). 877 | * 878 | * Note that currently the first JPR is the quickest and comes 879 | * pre-allocated with the state structure. Further JPR objects 880 | * are chained. 881 | * 882 | * @param jsn The lexer 883 | * @param jprs An array of jsonsl_jpr_t objects 884 | * @param njprs How many elements in the jprs array. 885 | */ 886 | JSONSL_API 887 | void jsonsl_jpr_match_state_init(jsonsl_t jsn, 888 | jsonsl_jpr_t *jprs, 889 | size_t njprs); 890 | 891 | /** 892 | * This follows the same semantics as the normal match, 893 | * except we infer parent and type information from the relevant state objects. 894 | * The match status (for all possible JPR objects) is set in the *out parameter. 895 | * 896 | * If a match has succeeded, then its JPR object will be returned. In all other 897 | * instances, NULL is returned; 898 | * 899 | * @param jpr The jsonsl_jpr_t handle 900 | * @param state The jsonsl_state_st which is a candidate 901 | * @param key The hash key (if applicable, can be NULL if parent is list) 902 | * @param nkey Length of hash key (if applicable, can be zero if parent is list) 903 | * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with 904 | * the match result 905 | * 906 | * @return If a match was completed in full, then the JPR object containing 907 | * the matching path will be returned. Otherwise, the return is NULL (note, this 908 | * does not mean matching has failed, it can still be part of the match: check 909 | * the out parameter). 910 | */ 911 | JSONSL_API 912 | jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn, 913 | struct jsonsl_state_st *state, 914 | const char *key, 915 | size_t nkey, 916 | jsonsl_jpr_match_t *out); 917 | 918 | 919 | /** 920 | * Cleanup any memory allocated and any states set by 921 | * match_state_init() and match_state() 922 | * @param jsn The lexer 923 | */ 924 | JSONSL_API 925 | void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn); 926 | 927 | /** 928 | * Return a string representation of the match result returned by match() 929 | */ 930 | JSONSL_API 931 | const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match); 932 | 933 | /* @}*/ 934 | 935 | /** 936 | * Utility function to convert escape sequences into their original form. 937 | * 938 | * The decoders I've sampled do not seem to specify a standard behavior of what 939 | * to escape/unescape. 940 | * 941 | * RFC 4627 Mandates only that the quoute, backslash, and ASCII control 942 | * characters (0x00-0x1f) be escaped. It is often common for applications 943 | * to escape a '/' - however this may also be desired behavior. the JSON 944 | * spec is not clear on this, and therefore jsonsl leaves it up to you. 945 | * 946 | * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically 947 | * true when dealing with 'u-escapes' which can be expressed perfectly fine 948 | * as utf8. One use case for normalization is JPR string comparison, in which 949 | * case two effectively equivalent strings may not match because one is using 950 | * u-escapes and the other proper utf8. To normalize u-escapes only, pass in 951 | * an empty `toEscape` table, enabling only the `u` index. 952 | * 953 | * @param in The input string. 954 | * @param out An allocated output (should be the same size as in) 955 | * @param len the size of the buffer 956 | * @param toEscape - A sparse array of characters to unescape. Characters 957 | * which are not present in this array, e.g. toEscape['c'] == 0 will be 958 | * ignored and passed to the output in their original form. 959 | * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte, 960 | * then this variable will have the SPECIALf_NONASCII flag on. 961 | * 962 | * @param err A pointer to an error variable. If an error ocurrs, it will be 963 | * set in this variable 964 | * @param errat If not null and an error occurs, this will be set to point 965 | * to the position within the string at which the offending character was 966 | * encountered. 967 | * 968 | * @return The effective size of the output buffer. 969 | * 970 | * @note 971 | * This function now encodes the UTF8 equivalents of utf16 escapes (i.e. 972 | * 'u-escapes'). Previously this would encode the escapes as utf16 literals, 973 | * which while still correct in some sense was confusing for many (especially 974 | * considering that the inputs were variations of char). 975 | * 976 | * @note 977 | * The output buffer will never be larger than the input buffer, since 978 | * standard escape sequences (i.e. '\t') occupy two bytes in the source 979 | * but only one byte (when unescaped) in the output. Likewise u-escapes 980 | * (i.e. \uXXXX) will occupy six bytes in the source, but at the most 981 | * two bytes when escaped. 982 | */ 983 | JSONSL_API 984 | size_t jsonsl_util_unescape_ex(const char *in, 985 | char *out, 986 | size_t len, 987 | const int toEscape[128], 988 | unsigned *oflags, 989 | jsonsl_error_t *err, 990 | const char **errat); 991 | 992 | /** 993 | * Convenience macro to avoid passing too many parameters 994 | */ 995 | #define jsonsl_util_unescape(in, out, len, toEscape, err) \ 996 | jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL) 997 | 998 | #endif /* JSONSL_NO_JPR */ 999 | 1000 | #ifdef __cplusplus 1001 | } 1002 | #endif /* __cplusplus */ 1003 | 1004 | #endif /* JSONSL_H_ */ 1005 | -------------------------------------------------------------------------------- /perf/Makefile: -------------------------------------------------------------------------------- 1 | all: bench yajl-perftest 2 | 3 | CFLAGS+= -Wno-overlength-strings -fvisibility=hidden -DJSONSL_NO_JPR -DNDEBUG 4 | #CFLAGS+=-DJSONSL_USE_METRICS 5 | BENCH_LFLAGS= 6 | 7 | bench: bench.c ../jsonsl.c 8 | $(CC) -o $@ $(CFLAGS) $^ $(BENCH_LFLAGS) 9 | 10 | yajl-perftest: documents.c perftest.c ../jsonsl.c 11 | $(CC) $(CFLAGS) $^ -o $@ $(BENCH_LFLAGS) 12 | 13 | .PHONY: run-benchmarks 14 | 15 | run-benchmarks: bench yajl-perftest 16 | @echo "Running against single file" 17 | ./bench ../share/auction 100 18 | @echo "Running yajl tests on JSONSL" 19 | ./yajl-perftest 20 | 21 | clean: 22 | -rm -f bench yajl-perftest 23 | -------------------------------------------------------------------------------- /perf/bench.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char **argv) 8 | { 9 | struct stat sb; 10 | char *buf; 11 | FILE *fh; 12 | jsonsl_t jsn; 13 | int rv, itermax, ii; 14 | int is_rawscan = 0; 15 | time_t begin_time; 16 | size_t total_size; 17 | unsigned long duration; 18 | unsigned stuff = 0; 19 | 20 | if (argc < 3) { 21 | fprintf(stderr, "%s: FILE ITERATIONS [MODE]\n", argv[0]); 22 | exit(EXIT_FAILURE); 23 | } 24 | 25 | if (argc > 3) { 26 | if (strcmp("raw", argv[3]) == 0) { 27 | is_rawscan = 1; 28 | } 29 | } 30 | 31 | sscanf(argv[2], "%d", &itermax); 32 | rv = stat(argv[1], &sb); 33 | if (rv != 0) { 34 | perror(argv[1]); 35 | exit(EXIT_FAILURE); 36 | } 37 | 38 | fh = fopen(argv[1], "rb"); 39 | if (fh == NULL) { 40 | perror(argv[1]); 41 | exit(EXIT_FAILURE); 42 | } 43 | buf = malloc(sb.st_size + 1); 44 | fread(buf, 1, sb.st_size, fh); 45 | buf[sb.st_size] = '\0'; 46 | begin_time = time(NULL); 47 | 48 | jsn = jsonsl_new(512); 49 | 50 | if (is_rawscan) { 51 | for (ii = 0; ii < itermax; ii++) { 52 | unsigned jj; 53 | for (jj = 0; jj < sb.st_size; jj++) { 54 | if (buf[jj] == '"') { 55 | stuff++; 56 | } 57 | } 58 | } 59 | } else { 60 | for (ii = 0; ii < itermax; ii++) { 61 | jsonsl_reset(jsn); 62 | jsonsl_feed(jsn, buf, sb.st_size); 63 | } 64 | } 65 | 66 | total_size = sb.st_size * itermax; 67 | total_size /= (1024*1024); 68 | duration = time(NULL) - begin_time; 69 | if (!duration) { 70 | duration = 1; 71 | } 72 | if (stuff) { 73 | fprintf(stderr, "Random value (don't optimize out!): %u\n", stuff); 74 | } 75 | fprintf(stderr, "SPEED: %lu MB/sec\n", total_size/duration); 76 | 77 | jsonsl_dump_global_metrics(); 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /perf/documents.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2011, Lloyd Hilaiel 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef __DOCUMENTS_H__ 18 | #define __DOCUMENTS_H__ 19 | 20 | /* a header that provides access to several json documents broken into chunks of 21 | * less than 4k, cause C99 says that's what we should do and YAJL likes streams */ 22 | 23 | extern const char ** g_documents[]; 24 | int num_docs(void); 25 | const char ** get_doc(int i); 26 | unsigned int doc_size(int i); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /perf/perftest.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2011, Lloyd Hilaiel 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "documents.h" 22 | 23 | /* a platform specific defn' of a function to get a high res time in a 24 | * portable format */ 25 | #ifndef WIN32 26 | #include 27 | static double mygettime(void) { 28 | struct timeval now; 29 | gettimeofday(&now, NULL); 30 | return now.tv_sec + (now.tv_usec / 1000000.0); 31 | } 32 | #else 33 | #define _WIN32 1 34 | #include 35 | static double mygettime(void) { 36 | long long tval; 37 | FILETIME ft; 38 | GetSystemTimeAsFileTime(&ft); 39 | tval = ft.dwHighDateTime; 40 | tval <<=32; 41 | tval |= ft.dwLowDateTime; 42 | return tval / 10000000.00; 43 | } 44 | #endif 45 | 46 | #define PARSE_TIME_SECS 3 47 | 48 | static int 49 | error_callback(jsonsl_t jsn, jsonsl_error_t err, struct jsonsl_state_st *state, 50 | char *at) 51 | { 52 | fprintf(stderr, "Got error %s at pos %lu (remaining: %s)\n", 53 | jsonsl_strerror(err), jsn->pos, at); 54 | abort(); 55 | } 56 | 57 | static int 58 | run(int validate_utf8) 59 | { 60 | long long times = 0; 61 | double starttime; 62 | 63 | starttime = mygettime(); 64 | jsonsl_t jsn = jsonsl_new(128); 65 | jsn->error_callback = error_callback; 66 | 67 | /* allocate a parser */ 68 | for (;;) { 69 | int i; 70 | { 71 | double now = mygettime(); 72 | if (now - starttime >= PARSE_TIME_SECS) break; 73 | } 74 | for (i = 0; i < 100; i++) { 75 | const char ** d; 76 | jsonsl_reset(jsn); 77 | for (d = get_doc(times % num_docs()); *d; d++) { 78 | jsonsl_feed(jsn, (char *) *d, strlen(*d)); 79 | } 80 | times++; 81 | } 82 | } 83 | jsonsl_destroy(jsn); 84 | 85 | /* parsed doc 'times' times */ 86 | { 87 | double throughput; 88 | double now; 89 | const char * all_units[] = { "B/s", "KB/s", "MB/s", (char *) 0 }; 90 | const char ** units = all_units; 91 | int i, avg_doc_size = 0; 92 | 93 | now = mygettime(); 94 | 95 | for (i = 0; i < num_docs(); i++) avg_doc_size += doc_size(i); 96 | avg_doc_size /= num_docs(); 97 | 98 | throughput = (times * avg_doc_size) / (now - starttime); 99 | 100 | while (*(units + 1) && throughput > 1024) { 101 | throughput /= 1024; 102 | units++; 103 | } 104 | 105 | printf("Parsing speed: %g %s\n", throughput, *units); 106 | } 107 | 108 | return 0; 109 | } 110 | 111 | int 112 | main(void) 113 | { 114 | int rv = 0; 115 | 116 | printf("-- speed tests determine parsing throughput given %d different sample documents --\n", 117 | num_docs()); 118 | 119 | printf("Without UTF8 validation:\n"); 120 | rv = run(0); 121 | jsonsl_dump_global_metrics(); 122 | return rv; 123 | } 124 | 125 | -------------------------------------------------------------------------------- /srcutil/genchartables.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # This script generates the character table for 'special' lookups 3 | # 4 | use strict; 5 | use warnings; 6 | use Getopt::Long; 7 | 8 | ################################################################################ 9 | ################################################################################ 10 | ### Character Table Definitions ### 11 | ################################################################################ 12 | ################################################################################ 13 | my @special_begin; 14 | $special_begin[ord('-')] = 'JSONSL_SPECIALf_DASH'; 15 | $special_begin[ord('i')] = 'JSONSL__INF_PROXY'; 16 | $special_begin[ord('I')] = 'JSONSL__INF_PROXY'; 17 | $special_begin[ord('t')] = 'JSONSL_SPECIALf_TRUE'; 18 | $special_begin[ord('f')] = 'JSONSL_SPECIALf_FALSE'; 19 | $special_begin[ord('n')] = 'JSONSL_SPECIALf_NULL|JSONSL__NAN_PROXY'; 20 | $special_begin[ord('N')] = 'JSONSL__NAN_PROXY'; 21 | $special_begin[ord($_)] = 'JSONSL_SPECIALf_UNSIGNED' for (0..9); 22 | $special_begin[ord('0')] = 'JSONSL_SPECIALf_ZERO'; 23 | 24 | my @strdefs; 25 | $strdefs[ord('\\')] = 1; 26 | $strdefs[ord('"')] = 1; 27 | 28 | #Tokens which terminate a 'special' sequence. Basically all JSON tokens 29 | #themselves 30 | my @special_end; 31 | { 32 | my @toks = qw([ { } ] " : \\ ); 33 | push @toks, ','; 34 | $special_end[ord($_)] = 1 for (@toks); 35 | } 36 | 37 | #RFC 4627 allowed whitespace 38 | my @wstable; 39 | foreach my $x (0x20, 0x09, 0xa, 0xd) { 40 | $wstable[$x] = 1; 41 | $special_end[$x] = 1; 42 | } 43 | 44 | my @special_body; 45 | { 46 | foreach my $x (0..9) { 47 | $special_body[ord($x)] = 1; 48 | } 49 | foreach my $x ('E', 'e', 'a','l','s','u','-','+', '.') { 50 | $special_body[ord($x)] = 1; 51 | } 52 | } 53 | 54 | my @unescapes; 55 | $unescapes[ord('t')] = 0x09; 56 | $unescapes[ord('b')] = 0x08; 57 | $unescapes[ord('n')] = 0x0a; 58 | $unescapes[ord('f')] = 0x0c; 59 | $unescapes[ord('r')] = 0x0d; 60 | 61 | my @allowed_escapes; 62 | { 63 | @allowed_escapes[ord($_)] = 1 foreach 64 | ('"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'); 65 | } 66 | 67 | my @string_passthrough; 68 | $string_passthrough[ord($_)] = 1 for ('\\','"'); 69 | $string_passthrough[$_] = 1 for (0..19); 70 | 71 | ################################################################################ 72 | ################################################################################ 73 | ### CLI Options ### 74 | ################################################################################ 75 | ################################################################################ 76 | 77 | my %HMap = ( 78 | special => [ undef, \@special_begin ], 79 | strings => [ undef, \@strdefs ], 80 | special_end => [ undef, \@special_end ], 81 | special_body => [undef, \@special_body ], 82 | whitespace => [ undef, \@wstable ], 83 | unescapes => [undef, \@unescapes], 84 | allowed_escapes => [ undef, \@allowed_escapes], 85 | string_passthrough => [ undef, \@string_passthrough ] 86 | ); 87 | 88 | my $Table; 89 | my %opthash; 90 | while (my ($optname,$optarry) = each %HMap) { 91 | $opthash{$optname} = \$optarry->[0]; 92 | } 93 | GetOptions(%opthash, escape_newlines => \my $EscapeNewlines); 94 | 95 | while (my ($k,$v) = each %HMap) { 96 | if ($v->[0]) { 97 | $Table = $v->[1]; 98 | last; 99 | } 100 | } 101 | 102 | if (!$Table) { 103 | die("Please specify one of: " . join(",", keys %HMap)); 104 | } 105 | 106 | ################################################################################ 107 | ################################################################################ 108 | ### Logic ### 109 | ################################################################################ 110 | ################################################################################ 111 | my %PrettyMap = ( 112 | "\x00" => '', 113 | "\x01" => '', 114 | "\x02" => '', 115 | "\x03" => '', 116 | "\x04" => '', 117 | "\x05" => '', 118 | "\x06" => '', 119 | "\x07" => '', 120 | "\x08" => '', 121 | "\x09" => '', 122 | "\x0a" => '', 123 | "\x0b" => '', 124 | "\x0c" => '', 125 | "\x0d" => '', 126 | "\x0e" => '', 127 | "\x0f" => '', 128 | "\x10" => '', 129 | "\x11" => '', 130 | "\x12" => '', 131 | "\x13" => '', 132 | "\x14" => '', 133 | "\x15" => '', 134 | "\x16" => '', 135 | "\x17" => '', 136 | "\x18" => '', 137 | "\x19" => '', 138 | "\x1a" => '', 139 | "\x1b" => '', 140 | "\x1c" => '', 141 | "\x1d" => '', 142 | "\x1e" => '', 143 | "\x1f" => '', 144 | "\x20" => '', 145 | "\x21" => '', 146 | "\x22" => '<">', 147 | "\x23" => '<#>', 148 | "\x24" => '<$>', 149 | "\x25" => '<%>', 150 | "\x26" => '<&>', 151 | "\x27" => '<\'>', 152 | "\x28" => '<(>', 153 | "\x29" => '<)>', 154 | "\x2a" => '<*>', 155 | "\x2b" => '<+>', 156 | "\x2c" => '<,>', 157 | "\x2d" => '<->', 158 | "\x2e" => '<.>', 159 | "\x2f" => '', 160 | "\x30" => '<0>', 161 | "\x31" => '<1>', 162 | "\x32" => '<2>', 163 | "\x33" => '<3>', 164 | "\x34" => '<4>', 165 | "\x35" => '<5>', 166 | "\x36" => '<6>', 167 | "\x37" => '<7>', 168 | "\x38" => '<8>', 169 | "\x39" => '<9>', 170 | "\x3a" => '<:>', 171 | "\x3b" => '<;>', 172 | "\x3c" => '<<>', 173 | "\x3d" => '<=>', 174 | "\x3e" => '<>>', 175 | "\x3f" => '', 176 | "\x40" => '<@>', 177 | "\x41" => '', 178 | "\x42" => '', 179 | "\x43" => '', 180 | "\x44" => '', 181 | "\x45" => '', 182 | "\x46" => '', 183 | "\x47" => '', 184 | "\x48" => '', 185 | "\x49" => '', 186 | "\x4a" => '', 187 | "\x4b" => '', 188 | "\x4c" => '', 189 | "\x4d" => '', 190 | "\x4e" => '', 191 | "\x4f" => '', 192 | "\x50" => '

', 225 | "\x71" => '', 226 | "\x72" => '', 227 | "\x73" => '', 228 | "\x74" => '', 229 | "\x75" => '', 230 | "\x76" => '', 231 | "\x77" => '', 232 | "\x78" => '', 233 | "\x79" => '', 234 | "\x7a" => '', 235 | "\x7b" => '<{>', 236 | "\x7c" => '<|>', 237 | "\x7d" => '<}>', 238 | "\x7e" => '<~>', 239 | "\x7f" => '', 240 | ); 241 | 242 | my @lines; 243 | my $cur = { begin => 0, items => [], end => 0 }; 244 | push @lines, $cur; 245 | 246 | my $i = 0; 247 | my $cur_col = 0; 248 | 249 | my $special_last = 0; 250 | 251 | sub add_to_grid { 252 | my $v = shift; 253 | 254 | if ($special_last) { 255 | $cur = { begin => $i, end => $i, items => [ $v ]}; 256 | push @lines, $cur; 257 | $special_last = 0; 258 | $cur_col = 1; 259 | return; 260 | } else { 261 | push @{$cur->{items}}, $v; 262 | $cur->{end} = $i; 263 | $cur_col++; 264 | } 265 | 266 | if ($cur_col >= 32) { 267 | $cur = { 268 | begin => $i+1, end => $i+1, items => [] }; 269 | $cur_col = 0; 270 | push @lines, $cur; 271 | } 272 | } 273 | 274 | sub add_special { 275 | my $v = shift; 276 | push @lines, { items => [ $v ], begin => $i, end => $i }; 277 | $special_last = 1; 278 | } 279 | 280 | $special_last = 0; 281 | for (; $i < 255; $i++) { 282 | my $v = $Table->[$i]; 283 | if (defined $v) { 284 | my $char_pretty = $PrettyMap{chr($i)}; 285 | if (defined $char_pretty) { 286 | $v = sprintf("$v /* %s */", $char_pretty); 287 | add_special($v); 288 | } else { 289 | add_to_grid(1); 290 | } 291 | } else { 292 | add_to_grid(0); 293 | } 294 | } 295 | 296 | foreach my $line (@lines) { 297 | my $items = $line->{items}; 298 | if (@$items) { 299 | printf("/* 0x%02x */ %s, /* 0x%02x */", 300 | $line->{begin}, join(",", @$items), $line->{end}); 301 | if ($EscapeNewlines) { 302 | print " \\"; 303 | } 304 | print "\n"; 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ADD_EXECUTABLE(json_test json_test.c) 2 | TARGET_LINK_LIBRARIES(json_test jsonsl) 3 | 4 | ADD_EXECUTABLE(failure_test fail-tests.c) 5 | TARGET_LINK_LIBRARIES(failure_test jsonsl) 6 | 7 | ADD_EXECUTABLE(api_test api_test.c) 8 | TARGET_LINK_LIBRARIES(api_test jsonsl) 9 | 10 | ADD_EXECUTABLE(jpr_test jpr_test.c) 11 | TARGET_LINK_LIBRARIES(jpr_test jsonsl) 12 | ADD_EXECUTABLE(unescape unescape.c) 13 | TARGET_LINK_LIBRARIES(unescape jsonsl) 14 | 15 | ADD_EXECUTABLE(cxxtest cxxtest.cpp) 16 | ADD_EXECUTABLE(match_test match_test.c) 17 | TARGET_LINK_LIBRARIES(match_test jsonsl) 18 | 19 | FILE(GLOB samples_ok ${CMAKE_BINARY_DIR}/share/* 20 | ${CMAKE_BINARY_DIR}/jsc/pass*.json) 21 | FILE(GLOB samples_bad ${CMAKE_BINARY_DIR}/share/jsc/fail*.json) 22 | ADD_TEST(okparse json_test ${samples_ok}) 23 | ADD_TEST(badparse failure_test ${samples_bad}) 24 | ADD_TEST(apitest api_test) 25 | ADD_TEST(jsonpointer jpr_test) 26 | ADD_TEST(unescape unescape) 27 | ADD_TEST(cxxtest cxxtest) 28 | ADD_TEST(match_test match_test) 29 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | TESTMODS= json_test api_test jpr_test unescape cxxtest 2 | 3 | all: $(TESTMODS) 4 | ./json_test ../share/* 5 | ./api_test 6 | ./jpr_test 7 | ./unescape 8 | ./json_test ../share/jsc/pass*.json 9 | JSONSL_FAIL_TESTS=1 ./json_test ../share/jsc/fail*.json 10 | ifneq (,$(findstring JSONSL_PARSE_NAN,$(CFLAGS))) 11 | ./json_test ../share/nan/*.json 12 | else 13 | JSONSL_FAIL_TESTS=1 ./json_test ../share/nan/*.json 14 | endif 15 | @echo " Some tests were skipped." 16 | @echo " See share/jsc/nyi_fail/README.skipped for details" 17 | @echo "All Tests OK" 18 | 19 | %: %.c 20 | echo "LIBFLAGS ${LIBFLAGS}" 21 | echo "LDFLAGS ${LDFLAGS}" 22 | $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) 23 | 24 | clean: 25 | rm -f $(TESTMODS) 26 | -------------------------------------------------------------------------------- /tests/all-tests.h: -------------------------------------------------------------------------------- 1 | #ifndef JSONSL_ALLTESTS_H 2 | #define JSONSL_ALLTESTS_H 3 | 4 | /** 5 | * I made this file primarily for Windows, where I didn't want to make 6 | * 3 separate projects for each executable 7 | */ 8 | 9 | #ifdef JSONSL_SINGLE_TEST_EXE 10 | #define JSONSL_TEST_UNESCAPE_FUNC int jsonsl_test_unescape(void) 11 | #define JSONSL_TEST_JPR_FUNC int jsonsl_test_jpr(void) 12 | #define JSONSL_TEST_JSON_FUNC int jsonsl_test_json(int argc, char **argv) 13 | 14 | JSONSL_TEST_UNESCAPE_FUNC; 15 | JSONSL_TEST_JPR_FUNC; 16 | JSONSL_TEST_JSON_FUNC; 17 | 18 | #else 19 | #define JSONSL_TEST_UNESCAPE_FUNC int main(void) 20 | #define JSONSL_TEST_JPR_FUNC int main(void) 21 | #define JSONSL_TEST_JSON_FUNC int main(int argc, char **argv) 22 | 23 | #endif /* SINGLE_TEST_EXE */ 24 | 25 | #ifdef _WIN32 26 | #define S_ISDIR(x) ((x & _S_IFMT) == _S_IFDIR) 27 | #define DEVNULL "nul" 28 | #define setenv(k, v, o) _putenv_s(k, v) 29 | #else 30 | #define DEVNULL "/dev/null" 31 | #endif 32 | 33 | #endif /* JSONSL_ALLTESTS_H */ 34 | -------------------------------------------------------------------------------- /tests/api_test.c: -------------------------------------------------------------------------------- 1 | #include "jsonsl.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "all-tests.h" 8 | 9 | 10 | /* "actual" must have at least all the same bits set as "expected" */ 11 | static void 12 | check_flags (int actual, int expected) 13 | { 14 | int i; 15 | 16 | for (i = 0; i < (int)(sizeof actual); i++) { 17 | if (expected & (1 << i)) { 18 | if (!(actual & (1 << i))) { 19 | fprintf(stderr, "bit %d not set in special_flags\n", i); 20 | abort(); 21 | } 22 | } 23 | } 24 | } 25 | 26 | 27 | static void 28 | special_flags_test_pop_callback (jsonsl_t jsn, 29 | jsonsl_action_t action, 30 | struct jsonsl_state_st *state, 31 | const char *buf) 32 | { 33 | jsonsl_special_t flags = (jsonsl_special_t) jsn->data; 34 | if (state->type == JSONSL_T_SPECIAL) { 35 | check_flags (state->special_flags, flags); 36 | } 37 | } 38 | 39 | 40 | int 41 | main (int argc, char **argv) 42 | { 43 | typedef struct { 44 | const char *value; 45 | jsonsl_special_t expected_special_flags; 46 | } special_flags_test_t; 47 | 48 | special_flags_test_t tests[] = { 49 | { "1", JSONSL_SPECIALf_UNSIGNED }, 50 | { "1.0", JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_UNSIGNED }, 51 | { "0", JSONSL_SPECIALf_UNSIGNED }, 52 | { "0.0", JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_UNSIGNED }, 53 | { "-0.0", JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_SIGNED }, 54 | { NULL } 55 | }; 56 | 57 | special_flags_test_t *test; 58 | jsonsl_t jsn; 59 | char name[512]; 60 | char formatted_json[512]; 61 | int formatted_len; 62 | 63 | for (test = tests; test->value; test++) { 64 | snprintf (name, sizeof name, "parse \"%s\"", test->value); 65 | fprintf (stderr, "==== %-40s ====\n", name); 66 | formatted_len = snprintf (formatted_json, 67 | sizeof formatted_json, 68 | "{\"x\": %s}", 69 | test->value); 70 | 71 | jsn = jsonsl_new(0x2000); 72 | jsn->data = (void *) test->expected_special_flags; 73 | jsn->action_callback_POP = special_flags_test_pop_callback; 74 | jsonsl_enable_all_callbacks (jsn); 75 | 76 | jsonsl_feed (jsn, formatted_json, (size_t) formatted_len); 77 | jsonsl_destroy (jsn); 78 | } 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /tests/cliopts.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cliopts.h" 8 | 9 | 10 | enum { 11 | CLIOPTS_ERR_SUCCESS, 12 | CLIOPTS_ERR_NEED_ARG, 13 | CLIOPTS_ERR_ISSWITCH, 14 | CLIOPTS_ERR_BADOPT, 15 | CLIOPTS_ERR_BAD_VALUE, 16 | CLIOPTS_ERR_UNRECOGNIZED 17 | }; 18 | 19 | struct cliopts_priv { 20 | cliopts_entry *entries; 21 | 22 | cliopts_entry *prev; 23 | cliopts_entry *current; 24 | 25 | char *errstr; 26 | int errnum; 27 | 28 | int argsplit; 29 | int wanted; 30 | 31 | char current_key[4096]; 32 | char current_value[4096]; 33 | }; 34 | 35 | enum { 36 | WANT_OPTION, 37 | WANT_VALUE, 38 | 39 | MODE_ERROR, 40 | MODE_RESTARGS, 41 | MODE_HELP 42 | }; 43 | 44 | #ifdef CLIOPTS_DEBUG 45 | 46 | #define cliopt_debug(...) \ 47 | fprintf(stderr, "(%s:%d) ", __func__, __LINE__); \ 48 | fprintf(stderr, __VA_ARGS__); \ 49 | fprintf(stderr, "\n") 50 | 51 | #else 52 | /** variadic macros not c89 */ 53 | static void cliopt_debug(void *bleh, ...) { } 54 | #endif /* CLIOPT_DEBUG */ 55 | 56 | static int 57 | parse_option(struct cliopts_priv *ctx, const char *key); 58 | 59 | 60 | static int 61 | parse_value(struct cliopts_priv *ctx, const char *value); 62 | 63 | /** 64 | * Various extraction/conversion functions for numerics 65 | */ 66 | 67 | #define _VERIFY_INT_COMMON(m1, m2) \ 68 | if (value == m1 || value > m2) { *errp = "Value too large"; return -1; } \ 69 | if (*endptr != '\0') { *errp = "Trailing garbage"; return -1; } 70 | 71 | static int 72 | extract_int(const char *s, void *dest, char **errp) 73 | { 74 | long int value; 75 | char *endptr = NULL; 76 | value = strtol(s, &endptr, 10); 77 | _VERIFY_INT_COMMON(LONG_MAX, INT_MAX) 78 | *(int*)dest = value; 79 | return 0; 80 | } 81 | 82 | static int 83 | extract_uint(const char *s, void *dest, char **errp) 84 | { 85 | unsigned long int value; 86 | char *endptr = NULL; 87 | value = strtoul(s, &endptr, 10); 88 | _VERIFY_INT_COMMON(ULONG_MAX, UINT_MAX) 89 | *(unsigned int*)dest = value; 90 | return 0; 91 | } 92 | 93 | static int 94 | extract_hex(const char *s, void *dest, char **errp) 95 | { 96 | unsigned long value; 97 | char *endptr = NULL; 98 | value = strtoul(s, &endptr, 16); 99 | _VERIFY_INT_COMMON(ULONG_MAX, UINT_MAX); 100 | *(unsigned int*)dest = value; 101 | return 0; 102 | } 103 | 104 | #undef _VERIFY_INT_COMMON 105 | 106 | static int 107 | extract_float(const char *s, void *dest, char **errp) 108 | { 109 | char dummy_buf[4096]; 110 | float value; 111 | if (sscanf(s, "%f%s", &value, dummy_buf) != 1) { 112 | *errp = "Found trailing garbage"; 113 | return -1; 114 | } 115 | *(float*)dest = value; 116 | return 0; 117 | } 118 | 119 | typedef int(*cliopts_extractor_func)(const char*, void*, char**); 120 | 121 | 122 | /** 123 | * This function tries to extract a single value for an option key. 124 | * If it successfully has extracted a value, it returns MODE_VALUE. 125 | * If the entry takes no arguments, then the current string is a key, 126 | * and it will return MODE_OPTION. On error, MODE_ERROR is set, and errp 127 | * will point to a string. 128 | * 129 | * @param entry The current entry 130 | * @param value the string which might be a value 131 | * @errp a pointer which will be populated with the address of the error, if any 132 | * 133 | * @return a MODE_* type 134 | */ 135 | static int 136 | parse_value(struct cliopts_priv *ctx, 137 | const char *value) 138 | { 139 | cliopts_entry *entry = ctx->current; 140 | 141 | size_t vlen = strlen(value); 142 | cliopts_extractor_func exfn = NULL; 143 | int exret; 144 | int is_option = 0; 145 | 146 | cliopt_debug("Called with %s, want=%d", value, ctx->wanted); 147 | 148 | if (ctx->argsplit) { 149 | if (vlen > 2 && strncmp(value, "--", 2) == 0) { 150 | is_option = 1; 151 | } else if (*value == '-') { 152 | is_option = 1; 153 | } 154 | } 155 | 156 | if (is_option) { 157 | ctx->errstr = "Expected option. Got '-' or '--' prefixed value " 158 | "(use = if this is really a value)"; 159 | ctx->errnum = CLIOPTS_ERR_NEED_ARG; 160 | return MODE_ERROR; 161 | } 162 | 163 | if (entry->ktype == CLIOPTS_ARGT_STRING) { 164 | char *vp = malloc(vlen+1); 165 | vp[vlen] = 0; 166 | strcpy(vp, value); 167 | *(char**)entry->dest = vp; 168 | return WANT_OPTION; 169 | } 170 | 171 | if (entry->ktype == CLIOPTS_ARGT_FLOAT) { 172 | exfn = extract_float; 173 | } else if (entry->ktype == CLIOPTS_ARGT_HEX) { 174 | exfn = extract_hex; 175 | } else if (entry->ktype == CLIOPTS_ARGT_INT) { 176 | exfn = extract_int; 177 | } else if (entry->ktype == CLIOPTS_ARGT_UINT) { 178 | exfn = extract_uint; 179 | } else { 180 | fprintf(stderr, "Unrecognized type %d. Abort.\n", entry->ktype); 181 | } 182 | 183 | exret = exfn(value, entry->dest, &ctx->errstr); 184 | if (exret == 0) { 185 | return WANT_OPTION; 186 | } else { 187 | ctx->errnum = CLIOPTS_ERR_BAD_VALUE; 188 | } 189 | 190 | return MODE_ERROR; 191 | } 192 | 193 | /** 194 | * Like parse_value, except for keys. 195 | * 196 | * @param entries all option entries 197 | * @param key the current string from argv 198 | * @param errp a pointer which will be populated with the address of an error 199 | * string 200 | * 201 | * @param found_entry a pointer to be populated with the relevant entry 202 | * structure 203 | * @param kp a pointer which will be poplated with the address of the 'sanitized' 204 | * key string 205 | * 206 | * @param valp if the string is actually a key-value pair (i.e. --foo=bar) then 207 | * this will be populated with the address of that string 208 | * 209 | * @return MODE_OPTION if an option was found, MODE_VALUE if the current option 210 | * is a value, or MODE_ERROR on error 211 | */ 212 | static int 213 | parse_option(struct cliopts_priv *ctx, 214 | const char *key) 215 | { 216 | cliopts_entry *cur = NULL; 217 | int ii, prefix_len = 0; 218 | const char *valp = NULL; 219 | size_t klen; 220 | 221 | klen = strlen(key); 222 | ctx->errstr = NULL; 223 | ctx->prev = ctx->current; 224 | ctx->current = NULL; 225 | 226 | cliopt_debug("Called with %s, want=%d", key, ctx->wanted); 227 | if (klen == 0) { 228 | ctx->errstr = "Got an empty string"; 229 | ctx->errnum = CLIOPTS_ERR_BADOPT; 230 | return MODE_ERROR; 231 | } 232 | 233 | /** 234 | * figure out what type of option it is.. 235 | * it can either be a -c, --long, or --long=value 236 | */ 237 | while (*key == '-') { 238 | key++; 239 | prefix_len++; 240 | klen--; 241 | } 242 | 243 | for (ii = 0; ii < klen; ii++) { 244 | if (key[ii] == '"' || key[ii] == '\'') { 245 | ii = klen; 246 | break; 247 | 248 | } else if (key[ii] == '=' && prefix_len == 2) { 249 | /* only split on '=' if we're called as '--' */ 250 | valp = key + (ii + 1); 251 | break; 252 | } 253 | } 254 | 255 | GT_PARSEOPT: 256 | memset(ctx->current_value, 0, sizeof(ctx->current_value)); 257 | memcpy(ctx->current_key, key, ii); 258 | ctx->current_key[ii] = '\0'; 259 | 260 | if (valp) { 261 | strcpy(ctx->current_value, valp); 262 | } 263 | 264 | if (prefix_len == 0 || prefix_len > 2) { 265 | if (ctx->prev && ctx->prev->ktype == CLIOPTS_ARGT_NONE) { 266 | ctx->errstr = ""; 267 | ctx->errnum = CLIOPTS_ERR_ISSWITCH; 268 | } else { 269 | ctx->errstr = "Options must begin with either '-' or '--'"; 270 | ctx->errnum = CLIOPTS_ERR_BADOPT; 271 | } 272 | return MODE_ERROR; 273 | } 274 | 275 | /** 276 | * --help or -? 277 | */ 278 | 279 | if ( (prefix_len == 1 && *key == '?') || 280 | (prefix_len == 2 && strcmp(key, "help") == 0)) { 281 | return MODE_HELP; 282 | } 283 | 284 | /** 285 | * Bare -- 286 | */ 287 | if (prefix_len == 2 && *key == '\0') { 288 | 289 | if (ctx->wanted == WANT_VALUE) { 290 | ctx->errnum = CLIOPTS_ERR_NEED_ARG; 291 | ctx->errstr = "Found bare '--', but value wanted"; 292 | return MODE_ERROR; 293 | } 294 | 295 | return MODE_RESTARGS; 296 | } 297 | 298 | for (cur = ctx->entries; cur->dest; cur++) { 299 | int optlen; 300 | if (prefix_len == 1) { 301 | if (cur->kshort == ctx->current_key[0]) { 302 | ctx->current = cur; 303 | break; 304 | } 305 | continue; 306 | } 307 | 308 | /** else, prefix_len is 2 */ 309 | if (cur->klong == NULL || 310 | (optlen = strlen(cur->klong) != klen) || 311 | strcmp(cur->klong, ctx->current_key) != 0) { 312 | 313 | continue; 314 | } 315 | 316 | ctx->current = cur; 317 | break; 318 | } 319 | 320 | if (!ctx->current) { 321 | ctx->errstr = "Unknown option"; 322 | ctx->errnum = CLIOPTS_ERR_UNRECOGNIZED; 323 | return MODE_ERROR; 324 | } 325 | 326 | ctx->current->found++; 327 | if (ctx->current->klong != CLIOPTS_ARGT_NONE) { 328 | ctx->wanted = WANT_VALUE; 329 | } 330 | 331 | if (ctx->current_value[0]) { 332 | /* --foo=bar */ 333 | if (ctx->current->ktype == CLIOPTS_ARGT_NONE) { 334 | ctx->errnum = CLIOPTS_ERR_ISSWITCH; 335 | ctx->errstr = "Option takes no arguments"; 336 | return MODE_ERROR; 337 | } else { 338 | return parse_value(ctx, ctx->current_value); 339 | } 340 | } 341 | 342 | if (ctx->current->ktype == CLIOPTS_ARGT_NONE) { 343 | *(char*)ctx->current->dest = 1; 344 | 345 | if (prefix_len == 1 && klen > 1) { 346 | /** 347 | * e.g. ls -lsh 348 | */ 349 | klen--; 350 | key++; 351 | 352 | /** 353 | * While we can also possibly recurse, this may be a security risk 354 | * as it wouldn't take much to cause a deep recursion on the stack 355 | * which will cause all sorts of nasties. 356 | */ 357 | goto GT_PARSEOPT; 358 | } 359 | return WANT_OPTION; 360 | 361 | } else if (prefix_len == 1 && klen > 1) { 362 | 363 | /* e.g. patch -p0 */ 364 | ctx->wanted = WANT_VALUE; 365 | return parse_value(ctx, key + 1); 366 | } 367 | return WANT_VALUE; 368 | } 369 | 370 | static char * 371 | get_option_name(cliopts_entry *entry, char *buf) 372 | { 373 | /* [-s,--option] */ 374 | char *bufp = buf; 375 | bufp += sprintf(buf, "["); 376 | if (entry->kshort) { 377 | bufp += sprintf(bufp, "-%c", entry->kshort); 378 | } 379 | if (entry->klong) { 380 | if (entry->kshort) { 381 | bufp += sprintf(bufp, ","); 382 | } 383 | bufp += sprintf(bufp, "--%s", entry->klong); 384 | } 385 | sprintf(bufp, "]"); 386 | return buf; 387 | } 388 | 389 | static char* 390 | format_option_help(cliopts_entry *entry, char *buf) 391 | { 392 | char *bufp = buf; 393 | 394 | if (entry->kshort) { 395 | bufp += sprintf(bufp, " -%c ", entry->kshort); 396 | } 397 | 398 | #define _advance_margin(offset) \ 399 | while(bufp-buf < offset || *bufp) { \ 400 | if (!*bufp) { \ 401 | *bufp = ' '; \ 402 | } \ 403 | bufp++; \ 404 | } 405 | 406 | _advance_margin(4) 407 | 408 | if (entry->klong) { 409 | bufp += sprintf(bufp, " --%s ", entry->klong); 410 | } 411 | 412 | if (entry->vdesc) { 413 | bufp += sprintf(bufp, " <%s> ", entry->vdesc); 414 | } 415 | 416 | if (entry->help) { 417 | _advance_margin(35) 418 | bufp += sprintf(bufp, " %s ", entry->help); 419 | } 420 | 421 | *bufp = '\0'; 422 | return buf; 423 | #undef _advance_margin 424 | } 425 | 426 | static void 427 | print_help(struct cliopts_priv *ctx, const char *progname) 428 | { 429 | cliopts_entry *cur; 430 | cliopts_entry helpent = { 0 }; 431 | char helpbuf[512] = { 0 }; 432 | 433 | helpent.klong = "help"; 434 | helpent.kshort = '?'; 435 | helpent.help = "this message"; 436 | 437 | fprintf(stderr, "Usage:\n"); 438 | fprintf(stderr, " %s [OPTIONS...]\n\n", progname); 439 | 440 | 441 | for (cur = ctx->entries; cur->dest; cur++) { 442 | memset(helpbuf, 0, sizeof(helpbuf)); 443 | format_option_help(cur, helpbuf); 444 | fprintf(stderr, " %s\n", helpbuf); 445 | } 446 | memset(helpbuf, 0, sizeof(helpbuf)); 447 | fprintf(stderr, " %s\n", format_option_help(&helpent, helpbuf)); 448 | 449 | } 450 | 451 | static void 452 | dump_error(struct cliopts_priv *ctx) 453 | { 454 | fprintf(stderr, "Couldn't parse options: %s\n", ctx->errstr); 455 | if (ctx->errnum == CLIOPTS_ERR_BADOPT) { 456 | fprintf(stderr, "Bad option: %s", ctx->current_key); 457 | } else if (ctx->errnum == CLIOPTS_ERR_BAD_VALUE) { 458 | fprintf(stderr, "Bad value '%s' for %s", 459 | ctx->current_value, 460 | ctx->current_key); 461 | } else if (ctx->errnum == CLIOPTS_ERR_UNRECOGNIZED) { 462 | fprintf(stderr, "No such option: %s", ctx->current_key); 463 | } else if (ctx->errnum == CLIOPTS_ERR_ISSWITCH) { 464 | char optbuf[64] = { 0 }; 465 | fprintf(stderr, "Option %s takes no arguments", 466 | get_option_name(ctx->prev, optbuf)); 467 | } 468 | fprintf(stderr, "\n"); 469 | 470 | } 471 | 472 | int 473 | cliopts_parse_options(cliopts_entry *entries, 474 | int argc, 475 | char **argv, 476 | int *lastidx, 477 | struct cliopts_extra_settings *settings) 478 | { 479 | /** 480 | * Now let's build ourselves a 481 | */ 482 | int curmode; 483 | int ii, ret = 0; 484 | struct cliopts_priv ctx = { 0 }; 485 | struct cliopts_extra_settings default_settings = { 0 }; 486 | 487 | ctx.entries = entries; 488 | 489 | if (!settings) { 490 | settings = &default_settings; 491 | settings->progname = argv[0]; 492 | } 493 | 494 | ii = (settings->argv_noskip) ? 0 : 1; 495 | 496 | if (ii >= argc) { 497 | *lastidx = 0; 498 | ret = 0; 499 | goto GT_CHECK_REQ; 500 | return 0; 501 | } 502 | 503 | curmode = WANT_OPTION; 504 | ctx.wanted = curmode; 505 | 506 | for (; ii < argc; ii++) { 507 | 508 | if (curmode == WANT_OPTION) { 509 | curmode = parse_option(&ctx, argv[ii]); 510 | } else if (curmode == WANT_VALUE) { 511 | curmode = parse_value(&ctx, argv[ii]); 512 | } 513 | 514 | if (curmode == MODE_ERROR) { 515 | if (settings->error_nohelp == 0) { 516 | dump_error(&ctx); 517 | } 518 | ret = -1; 519 | break; 520 | } else if (curmode == MODE_HELP) { 521 | if (settings->help_noflag) { 522 | /* ignore it ? */ 523 | continue; 524 | } 525 | 526 | print_help(&ctx, settings->progname); 527 | exit(0); 528 | 529 | } else if (curmode == MODE_RESTARGS) { 530 | ii++; 531 | break; 532 | } else { 533 | ctx.wanted = curmode; 534 | } 535 | } 536 | 537 | *lastidx = ii; 538 | 539 | if (curmode == WANT_VALUE) { 540 | ret = -1; 541 | 542 | if (settings->error_nohelp == 0) { 543 | fprintf(stderr, 544 | "Option %s requires argument\n", 545 | ctx.current_key); 546 | } 547 | goto GT_RET; 548 | } 549 | 550 | GT_CHECK_REQ: 551 | { 552 | cliopts_entry *cur_ent; 553 | for (cur_ent = entries; cur_ent->dest; cur_ent++) { 554 | char entbuf[128] = { 0 }; 555 | if (cur_ent->found || cur_ent->required == 0) { 556 | continue; 557 | } 558 | 559 | ret = -1; 560 | if (settings->error_nohelp) { 561 | goto GT_RET; 562 | } 563 | 564 | fprintf(stderr, "Required option %s missing\n", 565 | get_option_name(cur_ent, entbuf)); 566 | } 567 | } 568 | 569 | GT_RET: 570 | if (ret == -1) { 571 | if (settings->error_nohelp == 0) { 572 | print_help(&ctx, settings->progname); 573 | } 574 | if (settings->error_noexit == 0) { 575 | exit(EXIT_FAILURE); 576 | } 577 | } 578 | return ret; 579 | } 580 | -------------------------------------------------------------------------------- /tests/cliopts.h: -------------------------------------------------------------------------------- 1 | #ifndef CLIOPTS_H_ 2 | #define CLIOPTS_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif /* __cplusplus */ 7 | 8 | /** 9 | * Various option types 10 | */ 11 | typedef enum { 12 | /** takes no argument, dest should be anything big enough to hold a boolean*/ 13 | CLIOPTS_ARGT_NONE, 14 | 15 | /** simple int type, dest should be an 'int' */ 16 | CLIOPTS_ARGT_INT, 17 | 18 | /** dest should be an unsigned int */ 19 | CLIOPTS_ARGT_UINT, 20 | 21 | /** dest should be an unsigned int, but command line format is hex */ 22 | CLIOPTS_ARGT_HEX, 23 | 24 | /** dest should be a char**. Note that the string is allocated, so you should 25 | * free() it when done */ 26 | CLIOPTS_ARGT_STRING, 27 | 28 | /** dest should be a float* */ 29 | CLIOPTS_ARGT_FLOAT 30 | } cliopts_argtype_t; 31 | 32 | typedef struct { 33 | /** 34 | * Input parameters 35 | */ 36 | 37 | /** Short option, i.e. -v (0 for none) */ 38 | char kshort; 39 | 40 | /** long option, i.e. --verbose, NULL for none */ 41 | const char *klong; 42 | 43 | /** type of value */ 44 | cliopts_argtype_t ktype; 45 | 46 | /** destination pointer for value */ 47 | void *dest; 48 | 49 | /** help string for this option */ 50 | const char *help; 51 | 52 | /** description of the value, e.g. --file=FILE */ 53 | const char *vdesc; 54 | 55 | 56 | /** set this to true if the user must provide this option */ 57 | int required; 58 | 59 | 60 | /** 61 | * Output parameters 62 | */ 63 | 64 | /** whether this option was encountered on the command line */ 65 | int found; 66 | 67 | } cliopts_entry; 68 | 69 | struct cliopts_extra_settings { 70 | /** Assume actual arguments start from argv[0], not argv[1] */ 71 | int argv_noskip; 72 | /** Don't exit on error */ 73 | int error_noexit; 74 | /** Don't print help on error */ 75 | int error_nohelp; 76 | /** Don't interpret --help or -? as help flags */ 77 | int help_noflag; 78 | /** Program name (defaults to argv[0]) */ 79 | const char *progname; 80 | }; 81 | 82 | /** 83 | * Parse options. 84 | * 85 | * @param entries an array of cliopts_entry structures. The list should be 86 | * terminated with a structure which has its dest field set to NULL 87 | * 88 | * @param argc the count of arguments 89 | * @param argv the actual list of arguments 90 | * @param lastidx populated with the amount of elements from argv actually read 91 | * @params setting a structure defining extra settings for the argument parser. 92 | * May be NULL 93 | * 94 | * @return 0 for success, -1 on error. 95 | */ 96 | int 97 | cliopts_parse_options(cliopts_entry *entries, 98 | int argc, 99 | char **argv, 100 | int *lastidx, 101 | struct cliopts_extra_settings *settings); 102 | 103 | 104 | #ifdef __cplusplus 105 | } 106 | #endif /* __cplusplus */ 107 | 108 | #endif /* CLIOPTS_H_ */ 109 | -------------------------------------------------------------------------------- /tests/cxxtest.cpp: -------------------------------------------------------------------------------- 1 | #include "jsonsl.c" 2 | 3 | int main(int, char **) 4 | { 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /tests/fail-tests.c: -------------------------------------------------------------------------------- 1 | #define JSONSL_FAILURE_TESTS 2 | #include "json_test.c" 3 | -------------------------------------------------------------------------------- /tests/jpr_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "all-tests.h" 6 | 7 | #define _JSTR(e) \ 8 | "\"" #e "\"" 9 | 10 | const char SampleJSON[] = 11 | "{" 12 | _JSTR(foo) ": {" 13 | _JSTR(bar) ": [" 14 | _JSTR(element0) "," 15 | _JSTR(element1) 16 | "]," 17 | _JSTR(inner object) ": {" \ 18 | _JSTR(baz) ":" _JSTR(qux) 19 | "}" 20 | "}" 21 | "}"; 22 | 23 | static void check_path(const char *path) 24 | { 25 | jsonsl_error_t err; 26 | size_t ii; 27 | jsonsl_jpr_t jpr; 28 | 29 | fprintf(stderr, "=== Testing %s ===\n", path); 30 | 31 | jpr = jsonsl_jpr_new(path, &err); 32 | if (jpr == NULL) { 33 | fprintf(stderr, "Couldn't create new JPR with path '%s': %s\n", 34 | path, jsonsl_strerror(err)); 35 | abort(); 36 | } 37 | printf("%lu components\n", jpr->ncomponents); 38 | 39 | for (ii = 0; ii < jpr->ncomponents; ii++) { 40 | struct jsonsl_jpr_component_st *comp = jpr->components + ii; 41 | printf("[%lu]: ", ii); 42 | if (comp->ptype == JSONSL_PATH_ROOT) { 43 | printf("Root: /\n"); 44 | } else if (comp->ptype == JSONSL_PATH_NUMERIC) { 45 | printf("\tNumeric: %lu\n", comp->idx); 46 | } else if (comp->ptype == JSONSL_PATH_WILDCARD) { 47 | printf("\tWildcard: %c\n", JSONSL_PATH_WILDCARD_CHAR); 48 | } else { 49 | printf("\tString: %s\n", comp->pstr); 50 | } 51 | } 52 | printf("Destroying..\n\n"); 53 | jsonsl_jpr_destroy(jpr); 54 | } 55 | 56 | static void check_bad_path(const char *bad_path) 57 | { 58 | jsonsl_error_t err; 59 | jsonsl_jpr_t jpr; 60 | fprintf(stderr, "=== Checking bad path %s ===\n", bad_path); 61 | jpr = jsonsl_jpr_new(bad_path, &err); 62 | if (jpr != NULL) { 63 | fprintf(stderr, "Expected %s to fail validation\n", bad_path); 64 | abort(); 65 | } 66 | } 67 | 68 | static void check_match(const char *path, 69 | jsonsl_type_t type, 70 | unsigned int level, 71 | void *spec, 72 | jsonsl_jpr_match_t expected) 73 | { 74 | char *key; 75 | size_t nkey; 76 | jsonsl_jpr_t jpr; 77 | jsonsl_jpr_match_t matchres; 78 | fprintf(stderr, "=== Match jpr=%-15s parent(type=%s,level=%d)", 79 | path, jsonsl_strtype(type), level); 80 | 81 | if (type == JSONSL_T_LIST) { 82 | key = NULL; 83 | nkey = (size_t)spec; 84 | fprintf(stderr, " idx=%lu", nkey); 85 | } else { 86 | key = (char*)spec; 87 | nkey = strlen(spec); 88 | fprintf(stderr, " key=%-10s", key); 89 | } 90 | fprintf(stderr, " Exp: %s ===\n", jsonsl_strmatchtype(expected)); 91 | 92 | jpr = jsonsl_jpr_new(path, NULL); 93 | assert(jpr); 94 | 95 | matchres = jsonsl_jpr_match(jpr, type, level, key, nkey); 96 | if (matchres != expected) { 97 | fprintf(stderr, "Expected %s, got %s\n", jsonsl_strmatchtype(expected), 98 | jsonsl_strmatchtype(matchres)); 99 | abort(); 100 | } 101 | } 102 | 103 | struct lexer_global_st { 104 | const char *hkey; 105 | size_t nhkey; 106 | }; 107 | 108 | static void push_callback(jsonsl_t jsn, 109 | jsonsl_action_t action, 110 | struct jsonsl_state_st *state, 111 | const jsonsl_char_t *at) 112 | { 113 | struct lexer_global_st *global = (struct lexer_global_st*)jsn->data; 114 | jsonsl_jpr_match_t matchres; 115 | jsonsl_jpr_t matchjpr; 116 | if (state->type == JSONSL_T_HKEY) { 117 | return; 118 | } 119 | matchjpr = jsonsl_jpr_match_state(jsn, state, 120 | global->hkey, 121 | global->nhkey, 122 | &matchres); 123 | printf("Got match result: %d\n", matchres); 124 | } 125 | 126 | static void pop_callback(jsonsl_t jsn, 127 | jsonsl_action_t action, 128 | struct jsonsl_state_st *state, 129 | const jsonsl_char_t *at) 130 | { 131 | struct lexer_global_st *global = (struct lexer_global_st*)jsn->data; 132 | if (state->type == JSONSL_T_HKEY) { 133 | global->hkey = at - (jsn->pos - state->pos_begin); 134 | global->hkey++; 135 | global->nhkey = (jsn->pos - state->pos_begin)-1; 136 | printf("Got key.."); 137 | fwrite(global->hkey, 1, global->nhkey, stdout); 138 | printf("\n"); 139 | } 140 | } 141 | 142 | static int error_callback(jsonsl_t jsn, 143 | jsonsl_error_t error, 144 | struct jsonsl_state_st *state, 145 | jsonsl_char_t *at) 146 | { 147 | fprintf(stderr, "Got error %s at pos %lu. Remaining: %s\n", 148 | jsonsl_strerror(error), jsn->pos, at); 149 | abort(); 150 | return 0; 151 | } 152 | 153 | 154 | static void lexjpr(void) 155 | { 156 | struct lexer_global_st global; 157 | jsonsl_t jsn; 158 | jsonsl_jpr_t jpr; 159 | jpr = jsonsl_jpr_new("/foo/^/1", NULL); 160 | assert(jpr); 161 | jsn = jsonsl_new(24); 162 | assert(jsn); 163 | jsonsl_jpr_match_state_init(jsn, &jpr, 1); 164 | jsn->error_callback = error_callback; 165 | jsn->action_callback_POP = pop_callback; 166 | jsn->action_callback_PUSH = push_callback; 167 | jsonsl_enable_all_callbacks(jsn); 168 | jsn->data = &global; 169 | jsonsl_feed(jsn, SampleJSON, sizeof(SampleJSON)-1); 170 | } 171 | 172 | JSONSL_TEST_JPR_FUNC 173 | { 174 | printf("%s\n", SampleJSON); 175 | if (getenv("JSONSL_QUIET_TESTS")) { 176 | freopen("/dev/null", "w", stdout); 177 | } 178 | /* This should match only the root object */ 179 | check_path("/"); 180 | 181 | /* This should match { "foo" : } */ 182 | check_path("/foo"); 183 | 184 | /* This should match { "foo" : { "another prop": } } */ 185 | check_path("/foo/another%20prop"); 186 | 187 | /* this should match { "foo" : { "another prop": { "baz" : } } } */ 188 | check_path("/foo/another%20prop/baz"); 189 | 190 | /* This should match { "foo" : { "anArray" : [ , ] } } */ 191 | check_path("/foo/anArray/0"); 192 | 193 | /* This should match any of the following: 194 | * { 195 | * "hello" : { 196 | * "cruel" : { 197 | * "world" : { 198 | * .... 199 | * } 200 | * }, 201 | * "kind" : { 202 | * "world" : { 203 | * .... 204 | * } 205 | * } 206 | * } 207 | * } 208 | */ 209 | check_path("/hello/^/world"); 210 | 211 | check_bad_path("rootless/uri"); 212 | check_bad_path("/doubly-escaped//uri"); 213 | check_bad_path("/%GG"); 214 | check_bad_path("/incomplete%f/hex"); 215 | 216 | check_match("/", JSONSL_T_OBJECT, 0, "some_key", JSONSL_MATCH_COMPLETE); 217 | check_match("/", JSONSL_T_OBJECT, 1, "some_key", JSONSL_MATCH_NOMATCH); 218 | check_match("/^", JSONSL_T_OBJECT, 1, "some_key", JSONSL_MATCH_COMPLETE); 219 | check_match("/foo/bar/baz", JSONSL_T_OBJECT, 2, "bar", JSONSL_MATCH_POSSIBLE); 220 | check_match("/foo/bar/^/grrrrrr", JSONSL_T_OBJECT, 3, "anything", JSONSL_MATCH_POSSIBLE); 221 | check_match("/foo/bar/something/grrr", JSONSL_T_OBJECT, 3, "anything", JSONSL_MATCH_NOMATCH); 222 | 223 | lexjpr(); 224 | return 0; 225 | } 226 | -------------------------------------------------------------------------------- /tests/json_test.c: -------------------------------------------------------------------------------- 1 | #include "jsonsl.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "all-tests.h" 8 | 9 | static int WantFail = 0; 10 | static jsonsl_error_t WantError = 0; 11 | 12 | void fmt_level(const char *buf, size_t nbuf, int levels) 13 | { 14 | const char *c = buf; 15 | int ii; 16 | for (ii = 0; ii < levels; ii++) { 17 | putchar('\t'); 18 | } 19 | 20 | while (nbuf && *c) { 21 | putchar(*c); 22 | if (*c == '\n') { 23 | for (ii = 0; ii < levels; ii++) { 24 | putchar(' '); 25 | } 26 | } 27 | c++; 28 | nbuf--; 29 | } 30 | putchar('\n'); 31 | } 32 | 33 | void state_callback(jsonsl_t jsn, 34 | jsonsl_action_t action, 35 | struct jsonsl_state_st *state, 36 | const char *buf) 37 | { 38 | /* We are called here with the jsn object, the state (PUSH or POP), 39 | * the 'state' object, which contains information about the level of 40 | * nesting we are descending into/ascending from, and a pointer to the 41 | * start position of the detectin of this nesting 42 | */ 43 | /* 44 | printf("@%-5lu L%d %c%s\n", 45 | jsn->pos, 46 | state->level, 47 | action, 48 | jsonsl_strtype(state->type)); 49 | */ 50 | /* 51 | if (action == JSONSL_ACTION_POP) { 52 | size_t state_len = jsn->pos - state->pos_begin; 53 | } 54 | */ 55 | } 56 | 57 | int error_callback(jsonsl_t jsn, 58 | jsonsl_error_t err, 59 | struct jsonsl_state_st *state, 60 | char *errat) 61 | { 62 | /* Error callback. In theory, this can return a true value 63 | * and maybe 'correct' and seek ahead of the buffer, and try to 64 | * do some correction. 65 | */ 66 | if (WantFail) { 67 | printf("Got error %s (PASS)\n", jsonsl_strerror(err)); 68 | WantError = err; 69 | return 0; 70 | } 71 | 72 | fprintf(stderr, "Got parse error at '%c', pos %lu\n", *errat, jsn->pos); 73 | fprintf(stderr, "Error is %s\n", jsonsl_strerror(err)); 74 | fprintf(stderr, "Remaining text: %s\n", errat); 75 | abort(); 76 | return 0; 77 | } 78 | 79 | 80 | void parse_single_file(const char *path) 81 | { 82 | char *buf, *bufp; 83 | long fsize; 84 | size_t nread = 0; 85 | FILE *fh; 86 | jsonsl_t jsn; 87 | struct stat sb = { 0 }; 88 | WantError = 0; 89 | /* open our file */ 90 | if (stat(path, &sb) == -1) { 91 | perror(path); 92 | return; 93 | } 94 | if (S_ISDIR(sb.st_mode)) { 95 | fprintf(stderr, "Skipping directory '%s'\n", path); 96 | return; 97 | } 98 | fh = fopen(path, "r"); 99 | if (fh == NULL) { 100 | perror(path); 101 | return; 102 | } 103 | 104 | /* Declare that we will support up to 512 nesting levels. 105 | * Each level of nesting requires about ~40 bytes (allocated at initialization) 106 | * to maintain state information. 107 | */ 108 | jsn = jsonsl_new(0x2000); 109 | 110 | /* Set up our error callbacks (to be called when an error occurs) 111 | * and a nest callback (when a level changes in 'nesting') 112 | */ 113 | jsn->error_callback = error_callback; 114 | jsn->action_callback = state_callback; 115 | 116 | /* Declare that we're intertested in receiving callbacks about 117 | * json 'Object' and 'List' types. 118 | */ 119 | jsonsl_enable_all_callbacks(jsn); 120 | /* read into the buffer */ 121 | 122 | /** 123 | * To avoid recomputing offsets and relative positioning, 124 | * we will maintain the buffer, but this is not strictly required. 125 | */ 126 | fseek(fh, 0, SEEK_END); 127 | fsize = ftell(fh); 128 | if (fsize == -1) { 129 | perror(path); 130 | fclose(fh); 131 | return; 132 | } 133 | assert(fsize < 0x1000000); 134 | buf = malloc(fsize); 135 | bufp = buf; 136 | fseek(fh, 0, SEEK_SET); 137 | while ( (nread = fread(bufp, 1, 4096, fh)) ) { 138 | jsonsl_feed(jsn, bufp, nread); 139 | bufp += nread; 140 | } 141 | 142 | if (WantFail && WantError == 0) { 143 | fprintf(stderr, "Expected error but didn't find any!\n"); 144 | abort(); 145 | } 146 | jsonsl_destroy(jsn); 147 | fclose(fh); 148 | 149 | free(buf); 150 | } 151 | 152 | JSONSL_TEST_JSON_FUNC 153 | { 154 | int ii; 155 | if (getenv("JSONSL_QUIET_TESTS")) { 156 | freopen(DEVNULL, "w", stdout); 157 | } 158 | #ifdef JSONSL_FAILURE_TESTS 159 | WantFail = 1; 160 | #else 161 | if (getenv("JSONSL_FAIL_TESTS")) { 162 | printf("Want Fail..\n"); 163 | WantFail = 1; 164 | } 165 | #endif 166 | if (argc < 2) { 167 | fprintf(stderr, "Usage: %s FILES..\n", argv[0]); 168 | exit(EXIT_FAILURE); 169 | } 170 | 171 | for (ii = 1; ii < argc && argv[ii]; ii++) { 172 | int rv; 173 | struct stat sb = { 0 }; 174 | rv = stat(argv[ii], &sb); 175 | if (rv == -1) { 176 | fprintf(stderr, "Couldn't stat '%s': %s\n", 177 | argv[ii], strerror(errno)); 178 | return EXIT_FAILURE; 179 | } 180 | 181 | if (S_ISDIR(sb.st_mode)) { 182 | fprintf(stderr, "Skipping directory '%s'\n", argv[ii]); 183 | continue; 184 | } 185 | 186 | fprintf(stderr, "==== %-40s ====\n", argv[ii]); 187 | parse_single_file(argv[ii]); 188 | } 189 | 190 | return 0; 191 | } 192 | -------------------------------------------------------------------------------- /tests/match_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define _JSTR(e) \ 8 | "\"" #e "\"" 9 | 10 | const char SampleJSON[] = 11 | "{" 12 | _JSTR(foo) ": {" 13 | _JSTR(bar) ": [" 14 | _JSTR(element0) "," 15 | _JSTR(element1) 16 | "]," 17 | _JSTR(inner object) ": {" \ 18 | _JSTR(baz) ":" _JSTR(qux) 19 | "}" 20 | "}" 21 | "}"; 22 | 23 | typedef struct { 24 | jsonsl_jpr_t jpr; 25 | jsonsl_jpr_match_t match_status; 26 | const char *buf; 27 | const char *key; 28 | size_t nkey; 29 | 30 | unsigned match_type; 31 | size_t match_begin; 32 | size_t match_end; 33 | unsigned match_level; 34 | } match_context; 35 | 36 | static void 37 | push_callback(jsonsl_t jsn, jsonsl_action_t action, 38 | struct jsonsl_state_st *state, const jsonsl_char_t *at) 39 | { 40 | match_context *ctx = jsn->data; 41 | jsonsl_jpr_match_t matchres; 42 | assert(ctx->match_status != JSONSL_MATCH_COMPLETE); 43 | 44 | if (state->type == JSONSL_T_HKEY) { 45 | ctx->key = ctx->buf + state->pos_begin + 1; 46 | return; 47 | } 48 | 49 | matchres = jsonsl_path_match( 50 | ctx->jpr, jsonsl_last_state(jsn, state), state, ctx->key, ctx->nkey); 51 | 52 | if (matchres == JSONSL_MATCH_NOMATCH) { 53 | state->ignore_callback = 1; 54 | return; 55 | } else if (matchres == JSONSL_MATCH_TYPE_MISMATCH) { 56 | ctx->match_status = matchres; 57 | jsonsl_stop(jsn); 58 | return; 59 | } else if (matchres == JSONSL_MATCH_COMPLETE) { 60 | jsn->max_callback_level = state->level + 1; 61 | } else { 62 | /* POSSIBLE */ 63 | } 64 | 65 | ctx->match_status = matchres; 66 | ctx->match_level = state->level; 67 | } 68 | 69 | static void 70 | pop_callback(jsonsl_t jsn, 71 | jsonsl_action_t action, struct jsonsl_state_st *state, 72 | const jsonsl_char_t *at) 73 | { 74 | match_context *ctx = jsn->data; 75 | 76 | if (state->type == JSONSL_T_HKEY) { 77 | ctx->key = ctx->buf + state->pos_begin + 1; 78 | ctx->nkey = jsn->pos - state->pos_begin - 1; 79 | return; 80 | } 81 | 82 | if (ctx->match_status == JSONSL_MATCH_COMPLETE) { 83 | ctx->match_end = jsn->pos; 84 | jsonsl_stop(jsn); 85 | } 86 | } 87 | 88 | static int 89 | error_callback(jsonsl_t jsn, jsonsl_error_t error, 90 | struct jsonsl_state_st *state, jsonsl_char_t *at) 91 | { 92 | fprintf(stderr, "Got error %s at pos %lu. Remaining: %s\n", 93 | jsonsl_strerror(error), jsn->pos, at); 94 | abort(); 95 | return 0; 96 | } 97 | 98 | static void 99 | do_match(jsonsl_jpr_match_t exp_status, unsigned exp_type, int comptype, ...) 100 | { 101 | struct jsonsl_jpr_component_st comps[64]; 102 | struct jsonsl_jpr_st jprst; 103 | va_list ap; 104 | size_t ncomps = 1; 105 | jsonsl_t jsn = jsonsl_new(512); 106 | match_context mctx = { 0 }; 107 | 108 | memset(comps, 0, sizeof comps); 109 | memset(&jprst, 0, sizeof jprst); 110 | 111 | 112 | comps[0].ptype = JSONSL_PATH_ROOT; 113 | 114 | va_start(ap, comptype); 115 | 116 | while (comptype != JSONSL_PATH_INVALID) { 117 | if (comptype == JSONSL_PATH_STRING) { 118 | const char *s = va_arg(ap, const char *); 119 | comps[ncomps].pstr = (char *)s; 120 | comps[ncomps].len = strlen(s); 121 | comps[ncomps].ptype = JSONSL_PATH_STRING; 122 | } else { 123 | comps[ncomps].idx = va_arg(ap, int); 124 | comps[ncomps].ptype = JSONSL_PATH_NUMERIC; 125 | comps[ncomps].is_arridx = 1; 126 | } 127 | ncomps++; 128 | comptype = va_arg(ap, int); 129 | } 130 | 131 | va_end(ap); 132 | 133 | jprst.components = comps; 134 | jprst.ncomponents = ncomps; 135 | jprst.match_type = exp_type; 136 | 137 | jsonsl_enable_all_callbacks(jsn); 138 | jsn->action_callback_POP = pop_callback; 139 | jsn->action_callback_PUSH = push_callback; 140 | jsn->error_callback = error_callback; 141 | jsn->data = &mctx; 142 | 143 | mctx.buf = SampleJSON; 144 | mctx.jpr = &jprst; 145 | mctx.match_status = JSONSL_MATCH_NOMATCH; 146 | 147 | jsonsl_feed(jsn, SampleJSON, strlen(SampleJSON)); 148 | assert(mctx.match_status == exp_status); 149 | jsonsl_destroy(jsn); 150 | } 151 | 152 | int main(int argc, char **argv) 153 | { 154 | /* Match is OK */ 155 | do_match(JSONSL_MATCH_COMPLETE, JSONSL_T_LIST, 156 | JSONSL_PATH_STRING, "foo", 157 | JSONSL_PATH_STRING, "bar", 158 | JSONSL_PATH_INVALID); 159 | 160 | /* Match is actually a list! */ 161 | do_match(JSONSL_MATCH_TYPE_MISMATCH, JSONSL_T_STRING, 162 | JSONSL_PATH_STRING, "foo", 163 | JSONSL_PATH_STRING, "bar", 164 | JSONSL_PATH_INVALID); 165 | 166 | /* Bad intermediate path (array index for dict parent) */ 167 | do_match(JSONSL_MATCH_TYPE_MISMATCH, JSONSL_T_STRING, 168 | JSONSL_PATH_STRING, "foo", 169 | JSONSL_PATH_NUMERIC, 29, 170 | JSONSL_PATH_INVALID); 171 | 172 | /* Bad intermediate path (string key for array parent) */ 173 | do_match(JSONSL_MATCH_TYPE_MISMATCH, JSONSL_T_STRING, 174 | JSONSL_PATH_STRING, "foo", 175 | JSONSL_PATH_STRING, "bar", 176 | JSONSL_PATH_STRING, "baz", 177 | JSONSL_PATH_INVALID); 178 | 179 | /* Ok intermediate path matching (but index not found) */ 180 | do_match(JSONSL_MATCH_POSSIBLE, JSONSL_T_STRING, 181 | JSONSL_PATH_STRING, "foo", 182 | JSONSL_PATH_STRING, "bar", 183 | JSONSL_PATH_NUMERIC, 99, 184 | JSONSL_PATH_INVALID); 185 | return 0; 186 | } 187 | -------------------------------------------------------------------------------- /tests/unescape.c: -------------------------------------------------------------------------------- 1 | #undef NDEBUG 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "all-tests.h" 8 | 9 | static size_t res; 10 | static jsonsl_error_t err; 11 | static char *out; 12 | static int strtable[0xff] = { 0 }; 13 | const char *escaped; 14 | 15 | /** 16 | * Check a single octet escape of four hex digits 17 | */ 18 | void test_single_uescape(void) 19 | { 20 | escaped = "\\u002B"; 21 | strtable['u'] = 1; 22 | out = malloc(strlen(escaped)+1); 23 | res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err); 24 | assert(res == 1); 25 | assert(out[0] == 0x2b); 26 | free(out); 27 | } 28 | 29 | /** 30 | * Test that we handle the null escape correctly (or that we do it right) 31 | */ 32 | void test_null_escape(void) 33 | { 34 | escaped = "\\u0000"; 35 | strtable['u'] = 1; 36 | out = malloc(strlen(escaped)+1); 37 | res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err); 38 | assert(res == 1); 39 | assert(out[0] == '\0'); 40 | free(out); 41 | } 42 | 43 | /** 44 | * Test multiple sequences of escapes. 45 | */ 46 | void test_multibyte_escape(void) 47 | { 48 | unsigned flags; 49 | const char *exp = "\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d"; 50 | escaped = "\\u05e9\\u05dc\\u05d5\\u05dd"; 51 | strtable['u'] = 1; 52 | out = malloc(strlen(escaped) + 1); 53 | res = jsonsl_util_unescape_ex(escaped, out, strlen(escaped), strtable, 54 | &flags, &err, NULL); 55 | assert(res != 0); 56 | assert(res == strlen(exp)); 57 | assert(memcmp(exp, out, strlen(exp)) == 0); 58 | assert(flags & JSONSL_SPECIALf_NONASCII); 59 | free(out); 60 | } 61 | 62 | /** 63 | * Check that things we don't want being unescaped are not unescaped 64 | */ 65 | void test_ignore_escape(void) 66 | { 67 | escaped = "Some \\nWeird String"; 68 | out = malloc(strlen(escaped)+1); 69 | strtable['W'] = 0; 70 | res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err); 71 | out[res] = '\0'; 72 | assert(res == strlen(escaped)); 73 | assert(strncmp(escaped, out, res) == 0); 74 | 75 | escaped = "\\tA String"; 76 | res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err); 77 | out[res] = '\0'; 78 | assert(res == strlen(escaped)); 79 | assert(strncmp(escaped, out, res) == 0); 80 | free(out); 81 | } 82 | 83 | /** 84 | * Check that the built-in mappings for the 'sane' defaults work 85 | */ 86 | void test_replacement_escape(void) 87 | { 88 | escaped = "This\\tIs\\tA\\tTab"; 89 | out = malloc(strlen(escaped)+1); 90 | strtable['t'] = 1; 91 | res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err); 92 | assert(res > 0); 93 | out[res] = '\0'; 94 | assert(out[4] == '\t'); 95 | assert(strcmp(out, "This\tIs\tA\tTab") == 0); 96 | free(out); 97 | } 98 | 99 | void test_invalid_escape(void) 100 | { 101 | escaped = "\\invalid \\escape"; 102 | out = malloc(strlen(escaped)+1); 103 | res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err); 104 | assert(res == 0); 105 | assert(err == JSONSL_ERROR_ESCAPE_INVALID); 106 | free(out); 107 | } 108 | 109 | void test_unicode_escape(void) 110 | { 111 | const char *exp = "\xe2\x82\xac"; 112 | char out_s[64] = { 0 }; 113 | 114 | escaped = "\\u20AC"; 115 | strtable['u'] = 1; 116 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 117 | assert(err == JSONSL_ERROR_SUCCESS); 118 | assert(res == 3); 119 | assert(0 == memcmp(exp, out_s, 3)); 120 | 121 | escaped = "\\u20ACHello"; 122 | exp = "\xe2\x82\xacHello"; 123 | memset(out_s, 0, sizeof out_s); 124 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 125 | assert(res == strlen(exp)); 126 | assert(0 == memcmp(exp, out_s, strlen(exp))); 127 | 128 | escaped = "\\u0000"; 129 | memset(out_s, 0, sizeof out_s); 130 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 131 | assert(res == 1); 132 | assert(out_s[0] == '\0'); 133 | 134 | /* Try with a surrogate pair */ 135 | escaped = "\\uD834\\uDD1E"; 136 | exp = "\xf0\x9d\x84\x9e"; 137 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 138 | assert(res == 4); 139 | assert(0 == memcmp(exp, out_s, 4)); 140 | 141 | /* Try with an incomplete surrogate */ 142 | res = jsonsl_util_unescape_ex(escaped, out_s, 6, strtable, NULL, &err, NULL); 143 | assert(res == 0); 144 | assert(err == JSONSL_ERROR_INVALID_CODEPOINT); 145 | 146 | /* Try with an invalid pair */ 147 | escaped = "\\uD834\\u0020"; 148 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 149 | assert(res == 0); 150 | assert(err == JSONSL_ERROR_INVALID_CODEPOINT); 151 | 152 | /* Try with invalid hex */ 153 | escaped = "\\uTTTT"; 154 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 155 | assert(res == 0); 156 | assert(err == JSONSL_ERROR_PERCENT_BADHEX); 157 | 158 | escaped = "\\uaaa"; 159 | res = jsonsl_util_unescape(escaped, out_s, strlen(escaped), strtable, &err); 160 | assert(res == 0); 161 | assert(err == JSONSL_ERROR_UESCAPE_TOOSHORT); 162 | 163 | /* ASCII Escapes */ 164 | exp = "simple space"; 165 | escaped = "simple\\u0020space"; 166 | res = jsonsl_util_unescape_ex( 167 | escaped, out_s, strlen(escaped), strtable, NULL, &err, NULL); 168 | assert(res == strlen(exp)); 169 | assert(err == JSONSL_ERROR_SUCCESS); 170 | assert(memcmp(exp, out_s, res) == 0); 171 | } 172 | 173 | JSONSL_TEST_UNESCAPE_FUNC 174 | { 175 | test_single_uescape(); 176 | test_null_escape(); 177 | test_ignore_escape(); 178 | test_replacement_escape(); 179 | test_invalid_escape(); 180 | test_multibyte_escape(); 181 | test_unicode_escape(); 182 | return 0; 183 | } 184 | --------------------------------------------------------------------------------

', 193 | "\x51" => '', 194 | "\x52" => '', 195 | "\x53" => '', 196 | "\x54" => '', 197 | "\x55" => '', 198 | "\x56" => '', 199 | "\x57" => '', 200 | "\x58" => '', 201 | "\x59" => '', 202 | "\x5a" => '', 203 | "\x5b" => '<[>', 204 | "\x5c" => '<\>', 205 | "\x5d" => '<]>', 206 | "\x5e" => '<^>', 207 | "\x5f" => '<_>', 208 | "\x60" => '<`>', 209 | "\x61" => '', 210 | "\x62" => '', 211 | "\x63" => '', 212 | "\x64" => '', 213 | "\x65" => '', 214 | "\x66" => '', 215 | "\x67" => '', 216 | "\x68" => '', 217 | "\x69" => '', 218 | "\x6a" => '', 219 | "\x6b" => '', 220 | "\x6c" => '', 221 | "\x6d" => '', 222 | "\x6e" => '', 223 | "\x6f" => '', 224 | "\x70" => '