├── LICENSE
├── README.md
├── index.html
└── xml_to_json.c
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 jakethaw
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # xml_to_json
2 |
3 | Implementation of `xml_to_json(xml, indent)` function.
4 |
5 | `xml_to_json(xml, indent)` takes two arguments:
6 |
7 | * xml - XML string UTF-8 encoded
8 | * indent - Indent for pretty printed JSON or -1 for minified JSON
9 |
10 | The input XML is not validated prior to conversion.
11 |
12 | # TOC
13 |
14 | - [WebAssembly](#webassembly)
15 | - [Compile](#compile)
16 | - [Usage example:](#usage-example)
17 | - [SQLite3](#sqlite3)
18 | - [Compile](#compile-1)
19 | - [Usage examples](#usage-examples)
20 | - [Implementation Method](#implementation-method)
21 | - [TODO](#todo)
22 |
23 |
24 | # WebAssembly
25 |
26 | ## Demo
27 |
28 | [jakethaw.com/xml_to_json](https://jakethaw.com/xml_to_json)
29 |
30 | ## Compile
31 |
32 | To compile with [Emscripten](https://emscripten.org) as a WebAssembly function:
33 |
34 | ```bash
35 | emcc -Oz xml_to_json.c -o xml_to_json.js -s EXPORTED_FUNCTIONS='["_xml_to_json", "_free"]' -s 'EXTRA_EXPORTED_RUNTIME_METHODS=["allocate", "intArrayFromString", "ALLOC_NORMAL", "UTF8ToString"]'
36 | ```
37 |
38 | ## Usage example:
39 |
40 | ```javascript
41 | var xml = allocate(intArrayFromString("hello world"), 'i8', ALLOC_NORMAL);
42 | var indent = 2;
43 |
44 | var json = _xml_to_json(xml, indent);
45 | console.log(UTF8ToString(json, 5000));
46 |
47 | _free(xml);
48 | _free(json);
49 | ```
50 | ```json
51 | {
52 | "x": "hello world"
53 | }
54 | ```
55 |
56 | # SQLite3
57 |
58 | Implementation of an [SQLite3](sqlite.org) `xml_to_json(X, N)` function.
59 |
60 | `xml_to_json(X, N)` takes one or two arguments:
61 |
62 | * X - XML string UTF-8 encoded
63 | * N - Optional indent for pretty printed JSON or -1 for minified JSON
64 |
65 | The input XML is not validated prior to conversion.
66 |
67 |
68 | ## Compile
69 |
70 | To compile with gcc as a run-time loadable extension:
71 |
72 | ```bash
73 | UNIX-like : gcc -g -O3 -fPIC -shared xml_to_json.c -o xml_to_json.so -DSQLITE
74 | Mac : gcc -g -O3 -fPIC -dynamiclib xml_to_json.c -o xml_to_json.dylib -DSQLITE
75 | Windows : gcc -g -O3 -shared xml_to_json.c -o xml_to_json.dll -DSQLITE
76 | ```
77 |
78 | Add the `-DDEBUG` option to print debug information to stdout.
79 |
80 | E.g.
81 |
82 | ```bash
83 | gcc -g -O3 -fPIC -shared xml_to_json.c -o xml_to_json.so -DDEBUG
84 | ```
85 |
86 | ## Usage examples
87 |
88 | ```sql
89 | SELECT xml_to_json('hello world', 2);
90 | ```
91 | ```json
92 | {
93 | "x": "hello world"
94 | }
95 | ```
96 | ---
97 | ```sql
98 | SELECT xml_to_json('ab', 2);
99 | ```
100 | ```json
101 | {
102 | "x": {
103 | "#text": [
104 | "a",
105 | "b"
106 | ],
107 | "y": null
108 | }
109 | }
110 | ```
111 | ---
112 | ```sql
113 | SELECT xml_to_json('abcdef', 2);
114 | ```
115 | ```json
116 | {
117 | "x": {
118 | "y": [
119 | "abc",
120 | "def"
121 | ]
122 | }
123 | }
124 | ```
125 | ---
126 | ```sql
127 | SELECT xml_to_json('helloabcworlddefxyz', 2);
128 | ```
129 | ```json
130 | {
131 | "x": {
132 | "#text": [
133 | "hello",
134 | "world",
135 | "xyz"
136 | ],
137 | "y": [
138 | "abc",
139 | "def"
140 | ]
141 | }
142 | }
143 | ```
144 | ---
145 | ```sql
146 | SELECT xml_to_json('& > < '', 2);
147 | ```
148 | ```json
149 | {
150 | "x": {
151 | "@attr1": "attr val 1",
152 | "@attr2": "attr val 2",
153 | "#text": "& > < '"
154 | }
155 | }
156 | ```
157 |
158 | # Implementation Method
159 |
160 | This implementation does not support the full [XML 1.0 Specification](https://www.w3.org/TR/REC-xml/). The following explaination is designed to describe what is currently supported.
161 |
162 | TODO
163 |
164 | # TODO
165 |
166 | * Improve readme
167 | * Add test cases
168 | * Benchmark
169 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | XML to JSON
7 |
13 |
14 |
15 | XML to JSON
16 |
17 | github.com/jakethaw/xml_to_json
18 |
19 |
20 |
21 |
22 |
23 |
24 | Indent |
25 |
26 |
34 | |
35 |
36 |
37 | XML |
38 |
39 |
40 | |
41 |
42 |
43 | JSON |
44 |
45 |
49 | |
50 |
51 |
52 |
53 |
54 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/xml_to_json.c:
--------------------------------------------------------------------------------
1 | /*
2 | ** xml_to_json.c - 2018-01-22 - jakethaw
3 | **
4 | *************************************************************************
5 | **
6 | ** MIT License
7 | **
8 | ** Copyright (c) 2019 jakethaw
9 | **
10 | ** Permission is hereby granted, free of charge, to any person obtaining a copy
11 | ** of this software and associated documentation files (the "Software"), to deal
12 | ** in the Software without restriction, including without limitation the rights
13 | ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 | ** copies of the Software, and to permit persons to whom the Software is
15 | ** furnished to do so, subject to the following conditions:
16 | **
17 | ** The above copyright notice and this permission notice shall be included in all
18 | ** copies or substantial portions of the Software.
19 | **
20 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 | ** SOFTWARE.
27 | **
28 | *************************************************************************
29 | ** WebAssembly *********************************************************
30 | *************************************************************************
31 | **
32 | ** To compile with Emscripten as a WebAssembly function:
33 | **
34 | ** emcc -Oz xml_to_json.c -o xml_to_json.js -s EXPORTED_FUNCTIONS='["_xml_to_json", "_free"]' -s 'EXTRA_EXPORTED_RUNTIME_METHODS=["allocate", "intArrayFromString", "ALLOC_NORMAL", "UTF8ToString"]'
35 | **
36 | *************************************************************************
37 | **
38 | ** Usage example:
39 | **
40 | ** var xml = allocate(intArrayFromString("hello world"), 'i8', ALLOC_NORMAL);
41 | ** var indent = 2;
42 | **
43 | ** var json = _xml_to_json(xml, indent);
44 | ** console.log(UTF8ToString(json, 5000));
45 | **
46 | ** _free(xml);
47 | ** _free(json);
48 | **
49 | *************************************************************************
50 | ** SQLite3 *************************************************************
51 | *************************************************************************
52 | **
53 | ** Implementation of an SQLite3 xml_to_json(X, N) function.
54 | **
55 | ** xml_to_json(X, N) takes one or two arguments:
56 | **
57 | ** * X - XML string UTF-8 encoded
58 | ** * N - Optional indent for pretty printed JSON or -1 for minified JSON
59 | **
60 | ** The input XML is not validated prior to conversion.
61 | **
62 | *************************************************************************
63 | **
64 | ** To compile with gcc as a run-time loadable extension:
65 | **
66 | ** UNIX-like : gcc -g -O2 -fPIC -shared xml_to_json.c -o xml_to_json.so -DSQLITE
67 | ** Mac : gcc -g -O2 -fPIC -dynamiclib xml_to_json.c -o xml_to_json.dylib -DSQLITE
68 | ** Windows : gcc -g -O2 -shared xml_to_json.c -o xml_to_json.dll -DSQLITE
69 | **
70 | ** Add the -DDEBUG option to print debug information to stdout.
71 | **
72 | *************************************************************************
73 | **
74 | ** Usage examples:
75 | **
76 | ** SELECT xml_to_json('ab');
77 | ** SELECT xml_to_json('abcdef', 2);
78 | ** SELECT xml_to_json('helloabcworlddefxyz', 2);
79 | ** SELECT xml_to_json('& > < '', 2);
80 | **
81 | *************************************************************************
82 | */
83 |
84 | #ifdef SQLITE
85 | #include "sqlite3ext.h"
86 | SQLITE_EXTENSION_INIT1
87 | #define MALLOC sqlite3_malloc
88 | #define FREE sqlite3_free
89 | #else
90 | #define MALLOC malloc
91 | #define FREE free
92 | #endif
93 |
94 | #include
95 | #include
96 | #include
97 |
98 | typedef struct element *element;
99 | struct element{
100 | struct element *parent; // Link to parent element or null
101 | char *name; // Pointer to element name in original XML string
102 | int nName; // Length of name
103 | struct value *first_value; // Link to first value. Value might be an array of values e.g ab
104 | int depth; // Depth of element
105 | int is_parent; // True if element has children
106 | int child_index; // Index of element among siblings
107 | int is_last_child; // True if element does not link to sibling
108 | int array_index; // Index of element in array
109 | int is_array_end; // True if last element in array
110 | struct element *next; // Link to next element. Sibling or ancestor's sibling
111 | struct element_attribute *first_attr; // Link to first attribute
112 | };
113 |
114 | typedef struct value *value;
115 | struct value{
116 | struct value_part *first_value_part; // Link to first value part
117 | struct value *next_value; // Link to sibling value
118 | };
119 |
120 | // Divide value into parts to support special characters
121 | // i.e.
122 | // & -> &
123 | // > -> >
124 | // < -> <
125 | // " -> "
126 | // ' -> '
127 | // ' -> '
128 | // etc.
129 | //
130 | // Constant memory for named special charactes
131 | // Dynamically allocated memory for html codes values
132 | //
133 | typedef struct value_part *value_part;
134 | struct value_part{
135 | char *val; // Pointer to value part in original XML string (or special characters)
136 | int nVal; // Length of val
137 | int free; // True if val should be freed
138 | struct value_part *next_value_part; // Link to next value part
139 | };
140 |
141 | typedef struct element_attribute *element_attribute;
142 | struct element_attribute{
143 | char *name; // Pointer to element name in original XML string
144 | int nName; // Lenth of name
145 | struct value_part *first_value_part; // Link to first value part
146 | struct element_attribute *next_attr; // Link to nect attribute
147 | };
148 |
149 | static value_part get_value_parts(int *i, int j, char *xml, value_part new_value_part, int is_attr);
150 | static int json_output(element root, char *json, int indent);
151 |
152 | static int is_space(char *z){
153 | return z[0]==' ' || z[0]=='\t' || z[0]=='\n' || z[0]=='\f' || z[0]=='\r';
154 | }
155 |
156 | static int print_spaces(char *json, int nJson, int spaces){
157 | if( spaces<0 )
158 | return 0;
159 |
160 | int i;
161 | for(i=0; iparent = 0;
232 | root->depth = 0;
233 | root->first_value = 0;
234 | root->is_parent = 0;
235 | root->child_index = 0;
236 | root->is_last_child = 1;
237 | root->array_index = 0;
238 | root->is_array_end = 0;
239 | root->next = 0;
240 | root->first_attr = 0;
241 |
242 | previous_node = root;
243 |
244 | i = 0;
245 | while( is_space(&xml[i]) ) i++;
246 | while(xml[i]){
247 | // Element open tag
248 | //printf("%.*s\n", 1, &xml[i]);
249 | if( xml[i]=='<' && xml[i+1]!='/' ){
250 | // Create node
251 | depth++;
252 | new_node = (element)MALLOC(sizeof(struct element));
253 |
254 | // Node name
255 | j = 1;
256 | while( xml[i+j] && !is_space(&xml[i+j]) && !(xml[i+j]=='/' || xml[i+j]=='>') ) j++;
257 | j--;
258 | new_node->name = &xml[i+1];
259 | new_node->nName = j;
260 | i += j+1;
261 |
262 | // Default values
263 | new_node->first_value = 0;
264 | new_node->depth = depth;
265 | new_node->is_parent = 0;
266 | new_node->array_index = 0;
267 | new_node->is_array_end = 0;
268 | new_node->next = 0;
269 | new_node->child_index = 0;
270 | new_node->is_last_child = 0;
271 | new_node->first_attr = 0;
272 |
273 | // Set parent node
274 | parent_node = previous_node;
275 | while( parent_node->depth >= new_node->depth && parent_node->parent )
276 | parent_node = parent_node->parent;
277 | new_node->parent = parent_node;
278 |
279 | if( !parent_node->is_parent )
280 | parent_node->is_parent = 1;
281 |
282 | // Make new node the current node
283 | previous_node->next = new_node;
284 | previous_node = new_node;
285 | current_node = new_node;
286 |
287 | // printf("%.*s\n", j, current_node->name);
288 | // if( parent_node && parent_node->parent )
289 | // printf(" Parent = %.*s\n", parent_node->nName, parent_node->name);
290 |
291 | // Get attributes
292 | while( is_space(&xml[i]) ) i++;
293 | while( xml[i] && xml[i]!='/' && xml[i]!='?' && xml[i]!='>' ){
294 | // Create attribute
295 | new_attr = (element_attribute)MALLOC(sizeof(struct element_attribute));
296 | if( !current_node->first_attr ){
297 | current_node->first_attr = new_attr;
298 | }else{
299 | current_attr->next_attr = new_attr;
300 | }
301 | current_attr = new_attr;
302 | current_attr->first_value_part = 0;
303 | current_attr->next_attr = 0;
304 |
305 | // Attribute name
306 | j = 1;
307 | while( xml[i+j] && xml[i+j]!='=' && !is_space(&xml[i+j]) ) j++;
308 | current_attr->name = &xml[i];
309 | current_attr->nName = j;
310 | i += j;
311 |
312 | // Ensure attribute value starts
313 | while( xml[i] && (xml[i]!='"' || is_space(&xml[i])) ) i++;
314 |
315 | if( xml[i] ){
316 | i++;
317 |
318 | // Ensure attribute value ends
319 | j=0;
320 | while( xml[i+j] && xml[i+j]!='"' ) j++;
321 |
322 | if( xml[i+j] ){
323 | // Attribute value
324 | do{
325 | if( !current_attr->first_value_part ){
326 | new_value_part = (value_part)MALLOC(sizeof(struct value_part));
327 | new_value_part->next_value_part = 0;
328 | current_attr->first_value_part = new_value_part;
329 | }else{
330 | new_value_part->next_value_part = (value_part)MALLOC(sizeof(struct value_part));
331 | new_value_part = new_value_part->next_value_part;
332 | new_value_part->next_value_part = 0;
333 | }
334 |
335 | new_value_part = get_value_parts(&i, 0, xml, new_value_part, 1);
336 | }while( xml[i] && xml[i]!='"' );
337 |
338 | if( xml[i] == '"' ){
339 | i++;
340 | while( is_space(&xml[i]) ) i++;
341 | }
342 | }
343 | }
344 | }
345 |
346 | // Self closing element
347 | if( xml[i]=='/' || xml[i]=='?' ){
348 | current_node = current_node->parent;
349 | depth--;
350 | while( xml[i] && xml[i]!='>' ) i++;
351 | }
352 |
353 | // Element close tag
354 | }else if( xml[i]=='<' && xml[i+1]=='/' ){
355 | current_node = current_node->parent;
356 | depth--;
357 | while( xml[i] && xml[i]!='>' ) i++;
358 |
359 | }else{
360 | i++;
361 |
362 | // Get value if it exists, or find the start of the next element
363 | j = 0;
364 | while( is_space(&xml[i+j]) ) j++;
365 |
366 | if( xml[i+j]!='<' || (!current_node->is_parent && xml[i+j]=='<' && xml[i+j+1]=='/') ){
367 |
368 | // Determine the deepest value of this element
369 | current_value = current_node->first_value;
370 | while( current_value && current_value->next_value )
371 | current_value = current_value->next_value;
372 |
373 | new_value = (value)MALLOC(sizeof(struct value));
374 |
375 | // Either make the new value the first value of the element,
376 | // or link the new value to the previous one
377 | if( !current_node->first_value ){
378 | current_node->first_value = new_value;
379 | }else{
380 | current_value->next_value = new_value;
381 | }
382 |
383 | new_value->first_value_part = 0;
384 | new_value->next_value = 0;
385 |
386 | // Value
387 | new_value_part = 0;
388 | while( xml[i] && xml[i]!='<' ){
389 | if( !new_value->first_value_part ){
390 | new_value_part = (value_part)MALLOC(sizeof(struct value_part));
391 | new_value_part->next_value_part = 0;
392 | new_value->first_value_part = new_value_part;
393 | }else{
394 | new_value_part->next_value_part = (value_part)MALLOC(sizeof(struct value_part));
395 | new_value_part = new_value_part->next_value_part;
396 | new_value_part->next_value_part = 0;
397 | }
398 | new_value_part = get_value_parts(&i, 0, xml, new_value_part, 0);
399 | j = 0;
400 | }
401 |
402 | // if( new_value_part )
403 | // printf("%.*s=%.*s\n", current_node->nName,
404 | // current_node->name,
405 | // new_value_part->nVal,
406 | // new_value_part->val);
407 | }
408 | i += j;
409 | }
410 | }
411 |
412 | //
413 | // Determine first/last nodes in a family
414 | //
415 | current_node = root;
416 | while(current_node->next){
417 | current_node = current_node->next;
418 | if( !current_node->child_index ){
419 | i = 1;
420 | test_node = current_node;
421 | previous_node = 0;
422 | do{
423 | if( current_node->parent == test_node->parent ){
424 | if( !current_node->child_index )
425 | current_node->child_index = 1;
426 |
427 | if( current_node != test_node )
428 | test_node->child_index = ++i;
429 |
430 | previous_node = test_node;
431 | }
432 |
433 | test_node = test_node->next;
434 | }while(test_node && test_node->depth >= current_node->depth );
435 |
436 | if( previous_node )
437 | previous_node->is_last_child = 1;
438 | }
439 | }
440 |
441 | //
442 | // Determine and group arrays
443 | //
444 | current_node = root;
445 | while(current_node->next){
446 | current_node = current_node->next;
447 | if( !current_node->array_index ){
448 | i = 1;
449 | test_node = current_node;
450 | previous_array_node = 0;
451 | while(test_node->next && test_node->depth >= current_node->depth){
452 | test_node = test_node->next;
453 | if( current_node->parent == test_node->parent
454 | && current_node->nName == test_node->nName
455 | && memcmp(current_node->name, test_node->name, test_node->nName) == 0){
456 | if( !current_node->array_index ){
457 | current_node->array_index = 1;
458 | previous_array_node = current_node;
459 | }
460 | test_node->array_index = ++i;
461 |
462 | //
463 | // Re-order if array elements are separated
464 | //
465 | // e.g.
466 | // 1
467 | //
468 | // 2
469 | //
470 | //
471 | // becomes:
472 | //
473 | //
474 | // 1
475 | // 2
476 | //
477 | //
478 | //
479 | if( test_node->child_index != previous_array_node->child_index+1 ){
480 |
481 | next_node = previous_array_node->next;
482 | previous_sibling = next_node;
483 |
484 | //
485 | // Get the node that the furthest child of the test node points to
486 | //
487 | test_node_deepest_node = test_node;
488 | if( test_node_deepest_node->next ){
489 | while( test_node_deepest_node->next->depth > test_node->depth)
490 | test_node_deepest_node = test_node_deepest_node->next;
491 | }
492 |
493 | // Shift up each sibling node that sits between the previous array element and the test node
494 | while( next_node->next != test_node ){
495 | if( previous_array_node->parent == next_node->parent ){
496 | previous_sibling = next_node;
497 | previous_sibling->child_index++;
498 | }
499 | next_node = next_node->next;
500 | }
501 | if( previous_array_node->parent == next_node->parent ){
502 | previous_sibling = next_node;
503 | previous_sibling->child_index++;
504 | }
505 |
506 | // Set test node's previous node to point to the test node's next node
507 | next_node->next = test_node_deepest_node->next;
508 |
509 | // If the test node was the last child,
510 | // then flag the last array element as the last child,
511 | // and un-flag the test node
512 | if( test_node->is_last_child ){
513 | previous_sibling->is_last_child = 1;
514 | test_node->is_last_child = 0;
515 | }
516 |
517 | // Set the test node to be the next adjacent sibling to the
518 | // previous array element
519 | test_node->child_index = previous_array_node->child_index+1;
520 |
521 | //
522 | // Get the node that the furthest child of the previous array node points to
523 | //
524 | next_node = previous_array_node;
525 | do{
526 | previous_node = next_node;
527 | next_node = next_node->next;
528 | }while( next_node->parent != previous_array_node->parent );
529 |
530 | previous_node->next = test_node;
531 | test_node_deepest_node->next = next_node;
532 | }
533 |
534 | previous_array_node = test_node;
535 | }
536 | }
537 | if( previous_array_node )
538 | previous_array_node->is_array_end = 1;
539 | }
540 | }
541 |
542 | #ifdef DEBUG
543 | current_node = root;
544 | while(current_node->next){
545 | current_node = current_node->next;
546 |
547 | printf("%.*s\n", current_node->nName, current_node->name);
548 | if( current_node->parent && current_node->parent->parent )
549 | printf(" Parent = %.*s\n", current_node->parent->nName, current_node->parent->name);
550 |
551 | printf(" depth = %d\n", current_node->depth);
552 | printf(" is_parent = %d\n", current_node->is_parent);
553 | printf(" child_index = %d\n", current_node->child_index);
554 | printf(" is_last_child = %d\n", current_node->is_last_child);
555 | printf(" array_index = %d\n", current_node->array_index);
556 | printf(" is_array_end = %d\n", current_node->is_array_end);
557 |
558 | current_attr = current_node->first_attr;
559 | while( current_attr ){
560 | printf(" @%.*s=%.*s\n", current_attr->nName, current_attr->name, current_attr->nName, current_attr->name);
561 | current_attr = current_attr->next_attr;
562 | }
563 |
564 | printf(" \"#text\":\"%.*s\"\n", current_node->nName, current_node->name);
565 | }
566 | #endif
567 |
568 | int nJson;
569 | char *json;
570 |
571 | // Calculate space required
572 | nJson = json_output(root, NULL, indent);
573 |
574 | // Construct JSON
575 | json = MALLOC(nJson+1);
576 | json_output(root, json, indent);
577 | json[nJson] = 0;
578 |
579 | // Cleanup elements
580 | current_node = root;
581 | next_node = current_node->next;
582 | while( current_node ){
583 |
584 | // Cleanup attributes
585 | current_attr = current_node->first_attr;
586 | while( current_attr ){
587 |
588 | // Cleanup value parts
589 | current_value_part = current_attr->first_value_part;
590 | while( current_value_part ){
591 | next_value_part = current_value_part->next_value_part;
592 | FREE(current_value_part);
593 | current_value_part = next_value_part;
594 | }
595 |
596 | next_attr = current_attr->next_attr;
597 | FREE(current_attr);
598 | current_attr = next_attr;
599 | }
600 |
601 | // Cleanup values
602 | current_value = current_node->first_value;
603 | while( current_value ){
604 |
605 | // Cleanup value parts
606 | current_value_part = current_value->first_value_part;
607 | while( current_value_part ){
608 | next_value_part = current_value_part->next_value_part;
609 | if( current_value_part->free ) FREE(current_value_part->val);
610 | FREE(current_value_part);
611 | current_value_part = next_value_part;
612 | }
613 |
614 | next_value = current_value->next_value;
615 | FREE(current_value);
616 | current_value = next_value;
617 | }
618 |
619 | next_node = current_node->next;
620 | FREE(current_node);
621 | current_node = next_node;
622 | }
623 |
624 | return json;
625 | }
626 |
627 | //
628 | // html_code_to_str()
629 | //
630 | // Convert a html code to a char array.
631 | //
632 | // e.g. ' to '
633 | //
634 | // must be freed
635 | //
636 | static void html_code_to_str(int *i, value_part value_part, const char *xml){
637 | // find end of html code
638 | int start = *i+1;
639 | int len = 0;
640 | while( xml[start+len]!=';' )
641 | len++;
642 |
643 | // advance through xml
644 | *i += 2+len;
645 |
646 | // str to int
647 | int m = 1; // multiplier 1, 10, 100 etc.
648 | unsigned long x = 0;
649 | while( len>0 ){
650 | x += (xml[start+len-1]-48)*m;
651 | m *= 10;
652 | len--;
653 | }
654 |
655 | // int to char array
656 | char *str;
657 | if( x < 1 << 8 ){
658 | value_part->nVal = 1;
659 | str = MALLOC(2);
660 | str[0] = x & 0xFF;
661 | str[1] = 0;
662 | }else if( x < 1 << 16 ){
663 | value_part->nVal = 2;
664 | str = MALLOC(3);
665 | str[0] = (x >> 8) & 0xFF;
666 | str[1] = x & 0xFF;
667 | str[2] = 0;
668 | }else if( x < 1 << 16 ){
669 | value_part->nVal = 3;
670 | str = MALLOC(4);
671 | str[0] = (x >> 16) & 0xFF;
672 | str[1] = (x >> 8) & 0xFF;
673 | str[2] = x & 0xFF;
674 | str[3] = 0;
675 | }else{
676 | value_part->nVal = 4;
677 | str = MALLOC(5);
678 | str[0] = (x >> 24) & 0xFF;
679 | str[1] = (x >> 16) & 0xFF;
680 | str[2] = (x >> 8) & 0xFF;
681 | str[3] = x & 0xFF;
682 | str[4] = 0;
683 | }
684 | value_part->free = 1;
685 | value_part->val = str;
686 | }
687 |
688 | static value_part get_value_parts(int *i, int j, char *xml, value_part new_value_part, int is_attr){
689 |
690 | while( xml[*i+j] && !(xml[*i+j]=='<'
691 | || xml[*i+j]=='&'
692 | || xml[*i+j]=='\b'
693 | || xml[*i+j]=='\t'
694 | || xml[*i+j]=='\n'
695 | || xml[*i+j]=='\f'
696 | || xml[*i+j]=='\r'
697 | || xml[*i+j]=='"'
698 | || xml[*i+j]=='\\') )
699 | j++;
700 |
701 | //printf("%.*s\n", j, &xml[*i]);
702 |
703 | new_value_part->nVal = j;
704 | new_value_part->val = &xml[*i];
705 | new_value_part->free = 0;
706 | *i += j;
707 |
708 | // Special characters
709 | if( xml[*i]=='&'
710 | || xml[*i]=='\b'
711 | || xml[*i]=='\t'
712 | || xml[*i]=='\n'
713 | || xml[*i]=='\f'
714 | || xml[*i]=='\r'
715 | || (xml[*i]=='"' && !is_attr)
716 | || xml[*i]=='\\' ){
717 | new_value_part->next_value_part = (value_part)MALLOC(sizeof(struct value_part));
718 | new_value_part = new_value_part->next_value_part;
719 | new_value_part->next_value_part = 0;
720 | new_value_part->free = 0;
721 | }
722 |
723 | if( xml[*i]=='&' ){
724 | *i += 1;
725 | if( memcmp("amp;", &xml[*i], 4) == 0 ){
726 | new_value_part->nVal = 1;
727 | new_value_part->val = "&";
728 | *i += 4;
729 | }else if( memcmp("gt;", &xml[*i], 3) == 0 ){
730 | new_value_part->nVal = 1;
731 | new_value_part->val = ">";
732 | *i += 3;
733 | }else if( memcmp("lt;", &xml[*i], 3) == 0 ){
734 | new_value_part->nVal = 1;
735 | new_value_part->val = "<";
736 | *i += 3;
737 | }else if( memcmp("quot;", &xml[*i], 5) == 0 ){
738 | new_value_part->nVal = 2;
739 | new_value_part->val = "\\\"";
740 | *i += 5;
741 | }else if( memcmp("apos;", &xml[*i], 5) == 0 ){
742 | new_value_part->nVal = 1;
743 | new_value_part->val = "'";
744 | *i += 5;
745 | }else if( memcmp("#8;", &xml[*i], 3) == 0 ){
746 | new_value_part->nVal = 2;
747 | new_value_part->val = "\\b";
748 | *i += 3;
749 | }else if( memcmp("#9;", &xml[*i], 3) == 0 ){
750 | new_value_part->nVal = 2;
751 | new_value_part->val = "\\t";
752 | *i += 3;
753 | }else if( memcmp("#10;", &xml[*i], 4) == 0 ){
754 | new_value_part->nVal = 2;
755 | new_value_part->val = "\\n";
756 | *i += 4;
757 | }else if( memcmp("#12;", &xml[*i], 4) == 0 ){
758 | new_value_part->nVal = 2;
759 | new_value_part->val = "\\f";
760 | *i += 4;
761 | }else if( memcmp("#13;", &xml[*i], 4) == 0 ){
762 | new_value_part->nVal = 2;
763 | new_value_part->val = "\\r";
764 | *i += 4;
765 | }else if( memcmp("#34;", &xml[*i], 4) == 0 ){
766 | new_value_part->nVal = 2;
767 | new_value_part->val = "\\\"";
768 | *i += 4;
769 | }else if( memcmp("#92;", &xml[*i], 4) == 0 ){
770 | new_value_part->nVal = 2;
771 | new_value_part->val = "\\\\";
772 | *i += 4;
773 | }else if( memcmp("#", &xml[*i], 1) == 0 ){
774 | html_code_to_str(i, new_value_part, (const char *)xml);
775 | }
776 | }else if( xml[*i]=='\b' ){
777 | new_value_part->nVal = 2;
778 | new_value_part->val = "\\b";
779 | *i += 1;
780 | }else if( xml[*i]=='\t' ){
781 | new_value_part->nVal = 2;
782 | new_value_part->val = "\\t";
783 | *i += 1;
784 | }else if( xml[*i]=='\n' ){
785 | new_value_part->nVal = 2;
786 | new_value_part->val = "\\n";
787 | *i += 1;
788 | }else if( xml[*i]=='\f' ){
789 | new_value_part->nVal = 2;
790 | new_value_part->val = "\\f";
791 | *i += 1;
792 | }else if( xml[*i]=='\r' ){
793 | new_value_part->nVal = 2;
794 | new_value_part->val = "\\r";
795 | *i += 1;
796 | }else if( !is_attr && xml[*i]=='"' ){
797 | new_value_part->nVal = 2;
798 | new_value_part->val = "\\\"";
799 | *i += 1;
800 | }else if( xml[*i]=='\\' ){
801 | new_value_part->nVal = 2;
802 | new_value_part->val = "\\\\";
803 | *i += 1;
804 | }
805 |
806 | return new_value_part;
807 | }
808 |
809 | #define PRINT_SPACES(x) nJson += print_spaces(json, nJson, x)
810 | #define PRINT_NEWLINE nJson += print_newline(json, nJson, indent)
811 | #define PRINT_CHAR(x) nJson += print_char(json, nJson, x)
812 | #define PRINT_STRING(z,n) nJson += print_string(json, nJson, z, n);
813 |
814 | //
815 | // json_output
816 | //
817 | // If *json is null, then return total space required.
818 | // If *json is not null, then populate with JSON string.
819 | //
820 | // Does not zero terminate JSON string.
821 | //
822 | int json_output(element root, char *json, int indent){
823 | int nJson = 0;
824 | int depth = 0;
825 |
826 | element current_node;
827 | element parent_node;
828 | element_attribute current_attr;
829 | value current_value;
830 | value_part current_value_part;
831 |
832 | current_node = root;
833 |
834 | while(current_node->next){
835 | current_node = current_node->next;
836 |
837 | // Opening bracket
838 | if( (current_node->child_index == 1 && !current_node->parent->first_attr && !current_node->parent->first_value) || current_node == root->next ){
839 | if( current_node->parent->array_index > 1){
840 | PRINT_SPACES(depth*indent);
841 | }
842 | PRINT_CHAR('{');
843 | PRINT_NEWLINE;
844 | depth++;
845 | }
846 |
847 | // Node name
848 | if( current_node->array_index <= 1 ){
849 | PRINT_SPACES(depth*indent);
850 | PRINT_CHAR('"');
851 | PRINT_STRING(current_node->name, current_node->nName);
852 | PRINT_CHAR('"');
853 | PRINT_CHAR(':');
854 | PRINT_SPACES(indent < 0 ? 0 : 1);
855 | }
856 |
857 | // Attributes
858 | current_attr = current_node->first_attr;
859 | if( current_attr ){
860 |
861 | if( current_node->array_index == 1 ){
862 | depth++;
863 | PRINT_CHAR('[');
864 | PRINT_NEWLINE;
865 | }
866 |
867 | if( current_node->array_index ){
868 | PRINT_SPACES(depth*indent);
869 | }
870 |
871 | PRINT_CHAR('{');
872 | PRINT_NEWLINE;
873 | depth++;
874 |
875 | while(current_attr){
876 | // "@name":"value",
877 | PRINT_SPACES(depth*indent);
878 | PRINT_CHAR('"');
879 | PRINT_CHAR('@');
880 | PRINT_STRING(current_attr->name, current_attr->nName);
881 | PRINT_CHAR('"');
882 | PRINT_CHAR(':');
883 | PRINT_SPACES(indent < 0 ? 0 : 1);
884 |
885 | // Join value parts
886 | PRINT_CHAR('"');
887 | current_value_part = current_attr->first_value_part;
888 | while( current_value_part ){
889 | PRINT_STRING(current_value_part->val, current_value_part->nVal);
890 | current_value_part = current_value_part->next_value_part;
891 | }
892 | PRINT_CHAR('"');
893 |
894 | current_attr = current_attr->next_attr;
895 |
896 | if( current_attr || current_node->first_value || current_node->is_parent ){
897 | PRINT_CHAR(',');
898 | PRINT_NEWLINE;
899 | }
900 | }
901 |
902 | if( !current_node->first_value && !current_node->is_parent ){
903 | depth--;
904 | PRINT_NEWLINE;
905 | PRINT_SPACES(depth*indent);
906 | PRINT_CHAR('}');
907 | }
908 | }
909 |
910 | // #text
911 | if( current_node->first_value && (current_node->first_attr || current_node->is_parent) ){
912 | if( current_node->array_index ){
913 | PRINT_SPACES(depth*indent);
914 | }
915 | if( current_node->is_parent && !current_node->first_attr ){
916 | PRINT_CHAR('{');
917 | PRINT_NEWLINE;
918 | depth++;
919 | }
920 | if( !(current_node->first_attr && current_node->array_index ) ){
921 | PRINT_SPACES(depth*indent);
922 | }
923 | PRINT_STRING("\"#text\":", 8);
924 | PRINT_SPACES(indent < 0 ? 0 : 1);
925 |
926 | // Array of values
927 | if( current_node->first_value->next_value ){
928 | PRINT_CHAR('[');
929 | PRINT_NEWLINE;
930 | current_value = current_node->first_value;
931 |
932 | while( current_value ){
933 | PRINT_SPACES((depth+1)*indent);
934 |
935 | // Join value parts
936 | PRINT_CHAR('"');
937 | current_value_part = current_value->first_value_part;
938 | while( current_value_part ){
939 | PRINT_STRING(current_value_part->val, current_value_part->nVal);
940 | current_value_part = current_value_part->next_value_part;
941 | }
942 | PRINT_CHAR('"');
943 |
944 | current_value = current_value->next_value;
945 | if( current_value ){
946 | PRINT_CHAR(',');
947 | PRINT_NEWLINE;
948 | }else{
949 | PRINT_NEWLINE;
950 | PRINT_SPACES(depth*indent);
951 | PRINT_CHAR(']');
952 | }
953 | }
954 | }
955 | }
956 |
957 | // Array start
958 | if( current_node->array_index == 1 && !current_node->first_attr ){
959 | depth++;
960 | PRINT_CHAR('[');
961 | PRINT_NEWLINE;
962 | if( current_node->is_parent ){
963 | PRINT_SPACES(depth*indent);
964 | }
965 | }
966 |
967 | // null
968 | if( !current_node->first_value && !current_node->is_parent && !current_node->first_attr ){
969 | if( current_node->array_index ){
970 | PRINT_SPACES(depth*indent);
971 | }
972 | PRINT_STRING("null", 4);
973 | }
974 |
975 | // Value
976 | if( current_node->first_value && !current_node->first_value->next_value ){
977 | if( current_node->array_index && !current_node->is_parent && !current_node->first_attr ){
978 | PRINT_SPACES(depth*indent);
979 | }
980 |
981 | // Join value parts
982 | PRINT_CHAR('"');
983 | current_value_part = current_node->first_value->first_value_part;
984 | while( current_value_part ){
985 | PRINT_STRING(current_value_part->val, current_value_part->nVal);
986 | current_value_part = current_value_part->next_value_part;
987 | }
988 | PRINT_CHAR('"');
989 |
990 | if( current_node->first_attr && !current_node->is_parent ){
991 | depth--;
992 | PRINT_NEWLINE;
993 | PRINT_SPACES(depth*indent);
994 | PRINT_CHAR('}');
995 | }
996 | }
997 |
998 | // Comma
999 | if( (!current_node->is_last_child && !current_node->is_array_end && !current_node->is_parent) || (current_node->is_parent && current_node->first_value) ){
1000 | PRINT_CHAR(',');
1001 | PRINT_NEWLINE;
1002 | }
1003 |
1004 | // Trailing brackets
1005 | if( (current_node->is_last_child || current_node->is_array_end ) && !current_node->is_parent ){
1006 | parent_node = current_node;
1007 |
1008 | while( parent_node != root && (!current_node->next || parent_node != current_node->next->parent) ){
1009 | if( parent_node->is_array_end ){
1010 | depth--;
1011 | PRINT_NEWLINE;
1012 | PRINT_SPACES(depth*indent);
1013 | PRINT_CHAR(']');
1014 | if( !parent_node->is_last_child ){
1015 | PRINT_CHAR(',');
1016 | }
1017 | }
1018 |
1019 | if( parent_node->is_last_child ){
1020 | depth--;
1021 | PRINT_NEWLINE;
1022 | PRINT_SPACES(depth*indent);
1023 | PRINT_CHAR('}');
1024 | if( !parent_node->parent->is_last_child && !parent_node->parent->is_array_end ){
1025 | PRINT_CHAR(',');
1026 | }
1027 | }
1028 |
1029 | parent_node = parent_node->parent;
1030 | }
1031 | PRINT_NEWLINE;
1032 | }
1033 |
1034 | }
1035 |
1036 | return nJson;
1037 | }
1038 |
1039 | #ifdef SQLITE
1040 | /*
1041 | ** Implementation of xml_to_json() function.
1042 | */
1043 | static void xml_to_jsonFunc(
1044 | sqlite3_context *context,
1045 | int argc,
1046 | sqlite3_value **argv
1047 | ){
1048 | if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
1049 | int indent = -1;
1050 | char *xml = (char *)sqlite3_value_text(argv[0]);
1051 | char *json;
1052 |
1053 | if( argc==2 ){
1054 | if( sqlite3_value_type(argv[1])!=SQLITE_NULL )
1055 | indent = sqlite3_value_int(argv[1]);
1056 | }
1057 |
1058 | json = xml_to_json(xml, indent);
1059 |
1060 | sqlite3_result_text(context, json, -1, sqlite3_free);
1061 | }
1062 |
1063 | #ifdef _WIN32
1064 | __declspec(dllexport)
1065 | #endif
1066 | int sqlite3_xmltojson_init(
1067 | sqlite3 *db,
1068 | char **pzErrMsg,
1069 | const sqlite3_api_routines *pApi
1070 | ){
1071 | int rc = SQLITE_OK;
1072 | SQLITE_EXTENSION_INIT2(pApi);
1073 | (void)pzErrMsg; /* Unused parameter */
1074 | rc = sqlite3_create_function(db, "xml_to_json", 1, SQLITE_UTF8, 0,
1075 | xml_to_jsonFunc, 0, 0);
1076 | if( rc==SQLITE_OK ){
1077 | rc = sqlite3_create_function(db, "xml_to_json", 2, SQLITE_UTF8, 0,
1078 | xml_to_jsonFunc, 0, 0);
1079 | }
1080 | return rc;
1081 | }
1082 | #endif
1083 |
--------------------------------------------------------------------------------