├── .gitignore ├── test_utf8.js ├── test.js ├── package.json ├── js0n.h ├── Makefile ├── README ├── test ├── example.c └── js0n_test.c ├── j0g.h ├── j0g.c ├── js0n.3 ├── j0g.3 └── js0n.c /.gitignore: -------------------------------------------------------------------------------- 1 | js0n_test 2 | example 3 | -------------------------------------------------------------------------------- /test_utf8.js: -------------------------------------------------------------------------------- 1 | { 2 | "foo":"$¢€𤪤", 3 | } -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | { 2 | "foo":"b\"a and \\ r", 3 | "bar":[1,2,3], 4 | "baz":{"a":"b"}, 5 | "num":123.45, 6 | "key":"value\n\"newline\"", 7 | "obj":{"true":true} 8 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "js0n", 3 | "version": "1.0.2", 4 | "repo": "mbucc/js0n", 5 | "description": "json parser", 6 | "license": "Public domain", 7 | "src": ["js0n.c", "js0n.h", "j0g.c", "j0g.h", "js0n.3", "j0g.3"], 8 | "makefile": "Makefile" 9 | } 10 | -------------------------------------------------------------------------------- /js0n.h: -------------------------------------------------------------------------------- 1 | // pass it a raw json string and length, and it will tag all the key/value offsets in the out argument (size it same as js to be safe) 2 | // returns 0 if successful, >0 if not 3 | int js0n(const unsigned char *js, unsigned int len, unsigned short *out, unsigned int olen); 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MANDIR=/usr/share/man/man3 2 | 3 | all: js0n_test example 4 | 5 | js0n_test: test/js0n_test.c js0n.c j0g.c 6 | gcc -Wall -o js0n_test test/js0n_test.c js0n.c j0g.c 7 | 8 | example: test/example.c js0n.c j0g.c 9 | gcc -Wall -o example test/example.c js0n.c j0g.c 10 | 11 | clean: 12 | rm -f example js0n_test 13 | 14 | 15 | man: ${MANDIR}/js0n.3 ${MANDIR}/j0g.3 16 | 17 | ${MANDIR}/%.3: %.3 18 | sudo cp $? $@ 19 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | js0n - the "cheapest" c json parser possible? 2 | 3 | A one-pass super low overhead parsing walk of the raw bytes and no mem copying of any sort, fills in a simple array of offsets and lengths of the first depth array values or object key/values. It should parse any valid json, but trades full validation for efficiency (some invalid json will still parse). Excellent for low level high speed scanning/routing of small chunks of json. 4 | 5 | Parsing this: 6 | 7 | {"foo":"bar","barbar":[1,2,3],"obj":{"a":"b"}} 8 | 9 | Would result in: 10 | 11 | 2,3,8,3,14,6,22,7,30,3,35,9,0 12 | 13 | Also includes a collection utility functions for handy ways of using js0n quickly/easily. -------------------------------------------------------------------------------- /test/example.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../js0n.h" 5 | #include "../j0g.h" 6 | 7 | void 8 | ex1() 9 | { 10 | char *s = "{\"foo\":\"bar\",\"barbar\":[1,2,3],\"obj\":{\"a\":\"b\"}}"; 11 | 12 | // 3 keys, 3 values, each with a start and offset --> 12 13 | // Plus one for a terminating zero = 13. 14 | unsigned short kvpairs[13]; 15 | 16 | printf("Parsing '%s'\n", s); 17 | 18 | int rc = js0n((unsigned char*) s, strlen(s), kvpairs, 13); 19 | 20 | printf("returned %d\n",rc); 21 | 22 | for (int i = 0; kvpairs[i]; i += 2) 23 | 24 | printf("%d: at %d len %d is %.*s\n", i, 25 | kvpairs[i], kvpairs[i + 1], kvpairs[i + 1], s + kvpairs[i]); 26 | 27 | } 28 | 29 | int main(int argc, char **argv) 30 | { 31 | ex1(); 32 | } 33 | 34 | -------------------------------------------------------------------------------- /j0g.h: -------------------------------------------------------------------------------- 1 | // these are handy utility functions to make working with js0n easier 2 | 3 | // dumbed down parser, expects null terminated json, if fails the *index is 0 (and safe to pass into j0g_*) 4 | // returns json, for chaining 5 | char *j0g(const char *json, unsigned short *index, int ilen); 6 | 7 | // for the rest - 8 | // first arg is the key to find in the json 9 | // second is the original json passed to js0n 10 | // third is the array returned by js0n 11 | // these functions will modify the json arg in place (to null terminate strings) 12 | 13 | // return the null-terminated string value matching the given key 14 | char *j0g_str(const char *key, char *json, const unsigned short *index); 15 | 16 | // return 1 if the value is the bool value true, number 1, or even the string "true", false otherwise 17 | int j0g_test(const char *key, char *json, const unsigned short *index); 18 | 19 | // return the index offset of the value matching the given key 20 | int j0g_val(const char *key, char *json, const unsigned short *index); 21 | 22 | // unescapes and null terminates any value (useful for arrays) 23 | char *j0g_safe(int val, char *json, const unsigned short *index); 24 | -------------------------------------------------------------------------------- /test/js0n_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../js0n.h" 5 | #include "../j0g.h" 6 | 7 | int main(int argc, char **argv) 8 | { 9 | unsigned char buff[1024], *json = NULL; 10 | int len, lastlen=0, ret, i; 11 | unsigned short *res; 12 | FILE *f; 13 | 14 | if((f = fopen(argv[1],"r")) == NULL) 15 | { 16 | printf("uhoh opening %s\n",argv[1]); 17 | exit(1); 18 | } 19 | while((len = fread(buff,1,1024,f)) > 0) 20 | { 21 | json = realloc(json,lastlen+len); 22 | memcpy(json+lastlen,buff,len); 23 | lastlen+=len; 24 | } 25 | fclose(f); 26 | printf("got[%.*s]\n",lastlen,json); 27 | res = malloc(lastlen); // way more than enough 28 | ret = js0n(json,lastlen,res,lastlen); 29 | printf("returned %d\n",ret); 30 | for(i=0;res[i];i+=2) 31 | { 32 | printf("%d: at %d len %d is %.*s\n",i,res[i],res[i+1],res[i+1],json+res[i]); 33 | } 34 | 35 | // j0g tests 36 | printf("j0g_val 'key' val offset %d\n", j0g_val("key",(char*)json,res)); 37 | printf("j0g_str 'key' val '%s'\n", j0g_str("key",(char*)json,res)); 38 | printf("j0g_str 'num' val '%0.2f'\n", (j0g_str("num",(char*)json,res)!=NULL)?strtof(j0g_str("num",(char*)json,res), NULL):0); 39 | printf("j0g_test obj->true %d\n", j0g_test("true",j0g(j0g_str("obj",(char*)json,res),res,16),res)); 40 | return 0; 41 | } 42 | 43 | -------------------------------------------------------------------------------- /j0g.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "js0n.h" 5 | #include "j0g.h" 6 | 7 | // dumbed down parser, expects null terminated json, if fails the *index is 0 (and safe to pass into j0g_*) 8 | char *j0g(const char *json, unsigned short *index, int ilen) 9 | { 10 | int ret, len; 11 | if(!json) return NULL; 12 | len = strlen(json); 13 | ret = js0n((unsigned char*)json, len, index, ilen); 14 | if(ret) *index = 0; 15 | return (char*)json; 16 | } 17 | 18 | // return the null-terminated string value matching the given key 19 | char *j0g_str(const char *key, char *json, const unsigned short *index) 20 | { 21 | int val = j0g_val(key, json, index); 22 | if(!val) return NULL; 23 | return j0g_safe(val, json, index); 24 | } 25 | 26 | // null terminate and unescape any string at this value 27 | char *j0g_safe(int val, char *json, const unsigned short *index) 28 | { 29 | char *str, *cursor; 30 | *(json+(index[val]+index[val+1])) = 0; // null terminate 31 | // unescape stuff 32 | for(cursor=str=json+index[val]; *cursor; cursor++,str++) 33 | { 34 | if(*cursor == '\\' && *(cursor+1) == 'n') 35 | { 36 | *str = '\n'; 37 | cursor++; 38 | }else if(*cursor == '\\' && *(cursor+1) == '"'){ 39 | *str = '"'; 40 | cursor++; 41 | }else{ 42 | *str = *cursor; 43 | } 44 | } 45 | *str = *cursor; // copy null terminator too 46 | return (char*)json+index[val]; 47 | } 48 | 49 | // return 1 if the value is the bool value true, number 1, or even the string "true", false otherwise 50 | int j0g_test(const char *key, char *json, const unsigned short *index) 51 | { 52 | char *val = j0g_str(key, json, index); 53 | if(!val) return 0; 54 | if(strcmp(val, "true") == 0) return 1; 55 | if(strcmp(val, "1") == 0) return 1; 56 | return 0; 57 | } 58 | 59 | // return the index offset of the value matching the given key 60 | int j0g_val(const char *key, char *json, const unsigned short *index) 61 | { 62 | if(!key || !json) return 0; 63 | int i, klen = strlen(key); 64 | for(i=0;index[i];i+=4) 65 | { 66 | if(klen == index[i+1] && strncmp(key,(char*)json+index[i],klen) == 0) return i+2; 67 | } 68 | return 0; 69 | } 70 | 71 | -------------------------------------------------------------------------------- /js0n.3: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2014 Jeremie Miller 3 | .\" 4 | .\" 5 | .\" This software is in the public domain. 6 | .\" 7 | .\" 8 | .Dd $Mdocdate: August 1, 2014 $ 9 | .Dt JS0N 3 10 | .Os 11 | .Sh NAME 12 | .Nm js0n 13 | .Nd json parsing library 14 | .Sh SYNOPSIS 15 | .Fd "#include " 16 | .Pp 17 | .Ft int 18 | .Fn js0n "const unsigned char *js" "unsigned int len" "unsigned short *out" "unsigned int olen" 19 | 20 | .Sh DESCRIPTION 21 | The 22 | .Nm js0n 23 | function parses a JSON string and 24 | is designed to be simple, lightweight and fast. 25 | Unlike most JSON parsers, 26 | .Nm js0n 27 | hardly allocates any memory. 28 | Rather, it walks the string and 29 | records the sequence of (offset, length) integer pairs 30 | that describe the location of the first-level keys and values. 31 | .Pp 32 | It should parse any valid json, but trades full 33 | validation for efficiency (some invalid json will still parse). 34 | .Pp 35 | Excellent for low level high speed scanning/routing of small chunks 36 | of json. 37 | .Sh RETURN VALUE 38 | .Nm js0n 39 | returns 0 on success. 40 | If the data was incomplete (for example, missing a close brace) 41 | or invalid (for example, a string value containing a character 42 | with an ASCII code less than 32), 43 | then a number greater than zero is returned. 44 | 45 | .Sh EXAMPLE 46 | The following code fragment illustrates the simple case: 47 | .Bd -literal -offset indent 48 | char *s = "{\\"foo\\":\\"bar\\",\\"barbar\\":[1,2,3],\\"obj\\":{\\"a\\":\\"b\\"}}"; 49 | // 3 keys, 3 values, each with a start and offset = 12 50 | // Plus one for a terminating zero = 13. 51 | unsigned short kvpairs[13]; 52 | 53 | \&... 54 | 55 | int rc = js0n((unsigned char*) s, strlen(s), kvpairs, 13); 56 | if (!rc) 57 | for (int i = 0; kvpairs[i]; i += 2) 58 | printf("%d: at %d len %d is %.*s\n", i, 59 | kvpairs[i], kvpairs[i + 1], kvpairs[i + 1], s + kvpairs[i]); 60 | else 61 | errx("Parse failed."); 62 | 63 | .Ed 64 | 65 | produces this output: 66 | 67 | .Bd -literal -offset indent0: at 2 len 3 is foo 68 | 2: at 8 len 3 is bar 69 | 4: at 14 len 6 is barbar 70 | 6: at 22 len 7 is [1,2,3] 71 | 8: at 31 len 3 is obj 72 | 10: at 36 len 9 is {"a":"b"} 73 | .Ed 74 | 75 | .Sh SEE ALSO 76 | .Xr j0g 3 77 | 78 | -------------------------------------------------------------------------------- /j0g.3: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2014 Jeremie Miller 3 | .\" 4 | .\" 5 | .\" This software is in the pubic domain. 6 | .\" 7 | .\" 8 | .Dd $Mdocdate: August 1, 2014 $ 9 | .Dt J0G 3 10 | .Os 11 | .Sh NAME 12 | .Nm j0g , 13 | .Nm j0g_str , 14 | .Nm j0g_safe , 15 | .Nm j0g_test , 16 | .Nm j0g_val , 17 | .Nm j0g_val 18 | .Nd utility functions for js0n parsing libary. 19 | .Sh SYNOPSIS 20 | .Fd "#include " 21 | .Fd "#include " 22 | .Pp 23 | .Ft char * 24 | .Fn j0g "const char *json" "unsigned short *index" "int ilen" 25 | .Pp 26 | .Ft int 27 | .Fn j0g_val "const char *key" "char *json" "const unsigned short *index" 28 | .Pp 29 | .Ft char * 30 | .Fn j0g_safe "int val" "char *json" "const unsigned short *index" 31 | .Pp 32 | .Ft char * 33 | .Fn j0g_str "const char *key" "char *json" "const unsigned short *index" 34 | .Pp 35 | .Ft int 36 | .Fn j0g_test "const char *key" "char *json" "const unsigned short *index" 37 | 38 | .Sh DESCRIPTION 39 | Convenience functions that make it easier to work with 40 | .Nm js0n . 41 | All but 42 | .Fn j0g 43 | operate on a particular key in the json string. 44 | 45 | .Bl -tag -width Ds 46 | 47 | .It Xo 48 | .Fa char * 49 | .Fn j0g "const char *json" "unsigned short *index" "int ilen" 50 | .Xc 51 | .Pp 52 | A simpler version of 53 | .Fn js0n 54 | that assumes the 55 | .Fa json 56 | argument is a null-terminated string. 57 | 58 | 59 | .It Xo 60 | .Fa int 61 | .Fn j0g_val "const char *key" "char *json" "const unsigned short *index" ; 62 | .Xc 63 | .Pp 64 | For the given key, return the array element 65 | that holds the starting position of the key's value. For example, if 66 | index[2] holds the offset from the beginning of the json string 67 | to the beginning of the 68 | .Fa key , 69 | then 70 | .Fn j0g_val 71 | would return a 4 (index[3] holds the length of the key). 72 | .Pp 73 | If the key is not found (or if the key or the json is a null pointer), return 0. 74 | 75 | .It Xo 76 | .Fa char * 77 | .Fn j0g_safe "int val" "char *json" "const unsigned short *index" ; 78 | .Xc 79 | .Pp 80 | Modifies 81 | .Fa json 82 | passed in, null-terminating the value (or 83 | key) whose offset is stored in the 84 | .Fa index 85 | array array location 86 | .Fa val . 87 | Also, unescapes newlines and double quotes in-place in 88 | .Fa json. 89 | Returns the null-terminated and unescaped string value. 90 | .Pp 91 | Note that this function does not unescape all escaped characters 92 | in the JSON specification; only 93 | newlines and double-quotes. 94 | 95 | .It Xo 96 | .Fa char * 97 | .Fn j0g_str "const char *key" "char *json" "const unsigned short *index" ; 98 | .Xc 99 | .Pp 100 | Return the null-terminated and (partially) unescaped string value 101 | matching the given key. 102 | If no such key, return NULL. 103 | 104 | .It Xo 105 | .Fa int 106 | .Fn j0g_test "const char *key" "char *json" "const unsigned short *index" ; 107 | .Xc 108 | .Pp 109 | Return 1 if the value is ``true'' or ``1'', false otherwise 110 | .El 111 | 112 | .Sh SEE ALSO 113 | .Xr js0n 3 114 | 115 | -------------------------------------------------------------------------------- /js0n.c: -------------------------------------------------------------------------------- 1 | // by jeremie miller - 2010 2 | // public domain, contributions/improvements welcome via github at https://github.com/quartzjer/js0n 3 | 4 | // gcc started warning for the init syntax used here, is not helpful so don't generate the spam 5 | #pragma GCC diagnostic push 6 | #pragma GCC diagnostic ignored "-Winitializer-overrides" 7 | 8 | // opportunity to further optimize would be having different jump tables for higher depths 9 | #define PUSH(i) if(depth == 1) prev = *out++ = ((cur+i) - js) 10 | #define CAP(i) if(depth == 1) prev = *out++ = ((cur+i) - (js + prev) + 1) 11 | 12 | int js0n(const unsigned char *js, unsigned int len, unsigned short *out, unsigned int olen) 13 | { 14 | unsigned short prev = 0, *oend; 15 | const unsigned char *cur, *end; 16 | int depth=0; 17 | int utf8_remain=0; 18 | static void *gostruct[] = 19 | { 20 | [0 ... 255] = &&l_bad, 21 | ['\t'] = &&l_loop, [' '] = &&l_loop, ['\r'] = &&l_loop, ['\n'] = &&l_loop, 22 | ['"'] = &&l_qup, 23 | [':'] = &&l_loop,[','] = &&l_loop, 24 | ['['] = &&l_up, [']'] = &&l_down, // tracking [] and {} individually would allow fuller validation but is really messy 25 | ['{'] = &&l_up, ['}'] = &&l_down, 26 | ['-'] = &&l_bare, [48 ... 57] = &&l_bare, // 0-9 27 | ['t'] = &&l_bare, ['f'] = &&l_bare, ['n'] = &&l_bare // true, false, null 28 | }; 29 | static void *gobare[] = 30 | { 31 | [0 ... 31] = &&l_bad, 32 | [32 ... 126] = &&l_loop, // could be more pedantic/validation-checking 33 | ['\t'] = &&l_unbare, [' '] = &&l_unbare, ['\r'] = &&l_unbare, ['\n'] = &&l_unbare, 34 | [','] = &&l_unbare, [']'] = &&l_unbare, ['}'] = &&l_unbare, 35 | [127 ... 255] = &&l_bad 36 | }; 37 | static void *gostring[] = 38 | { 39 | [0 ... 31] = &&l_bad, [127] = &&l_bad, 40 | [32 ... 126] = &&l_loop, 41 | ['\\'] = &&l_esc, ['"'] = &&l_qdown, 42 | [128 ... 191] = &&l_bad, 43 | [192 ... 223] = &&l_utf8_2, 44 | [224 ... 239] = &&l_utf8_3, 45 | [240 ... 247] = &&l_utf8_4, 46 | [248 ... 255] = &&l_bad 47 | }; 48 | static void *goutf8_continue[] = 49 | { 50 | [0 ... 127] = &&l_bad, 51 | [128 ... 191] = &&l_utf_continue, 52 | [192 ... 255] = &&l_bad 53 | }; 54 | static void *goesc[] = 55 | { 56 | [0 ... 255] = &&l_bad, 57 | ['"'] = &&l_unesc, ['\\'] = &&l_unesc, ['/'] = &&l_unesc, ['b'] = &&l_unesc, 58 | ['f'] = &&l_unesc, ['n'] = &&l_unesc, ['r'] = &&l_unesc, ['t'] = &&l_unesc, ['u'] = &&l_unesc 59 | }; 60 | void **go = gostruct; 61 | 62 | for(cur=js,end=js+len,oend=out+olen; cur0 for incomplete data 70 | 71 | l_bad: 72 | return 1; 73 | 74 | l_up: 75 | PUSH(0); 76 | ++depth; 77 | goto l_loop; 78 | 79 | l_down: 80 | --depth; 81 | CAP(0); 82 | goto l_loop; 83 | 84 | l_qup: 85 | PUSH(1); 86 | go=gostring; 87 | goto l_loop; 88 | 89 | l_qdown: 90 | CAP(-1); 91 | go=gostruct; 92 | goto l_loop; 93 | 94 | l_esc: 95 | go = goesc; 96 | goto l_loop; 97 | 98 | l_unesc: 99 | go = gostring; 100 | goto l_loop; 101 | 102 | l_bare: 103 | PUSH(0); 104 | go = gobare; 105 | goto l_loop; 106 | 107 | l_unbare: 108 | CAP(-1); 109 | go = gostruct; 110 | goto *go[*cur]; 111 | 112 | l_utf8_2: 113 | go = goutf8_continue; 114 | utf8_remain = 1; 115 | goto l_loop; 116 | 117 | l_utf8_3: 118 | go = goutf8_continue; 119 | utf8_remain = 2; 120 | goto l_loop; 121 | 122 | l_utf8_4: 123 | go = goutf8_continue; 124 | utf8_remain = 3; 125 | goto l_loop; 126 | 127 | l_utf_continue: 128 | if (!--utf8_remain) 129 | go=gostring; 130 | goto l_loop; 131 | 132 | } 133 | 134 | #pragma GCC diagnostic pop 135 | --------------------------------------------------------------------------------