├── LICENSE ├── README.md ├── index.html └── xml_to_json.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 jakethaw 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xml_to_json 2 | 3 | Implementation of `xml_to_json(xml, indent)` function. 4 | 5 | `xml_to_json(xml, indent)` takes two arguments: 6 | 7 | * xml - XML string UTF-8 encoded 8 | * indent - Indent for pretty printed JSON or -1 for minified JSON 9 | 10 | The input XML is not validated prior to conversion. 11 | 12 | # TOC 13 | 14 | - [WebAssembly](#webassembly) 15 | - [Compile](#compile) 16 | - [Usage example:](#usage-example) 17 | - [SQLite3](#sqlite3) 18 | - [Compile](#compile-1) 19 | - [Usage examples](#usage-examples) 20 | - [Implementation Method](#implementation-method) 21 | - [TODO](#todo) 22 | 23 | 24 | # WebAssembly 25 | 26 | ## Demo 27 | 28 | [jakethaw.com/xml_to_json](https://jakethaw.com/xml_to_json) 29 | 30 | ## Compile 31 | 32 | To compile with [Emscripten](https://emscripten.org) as a WebAssembly function: 33 | 34 | ```bash 35 | emcc -Oz xml_to_json.c -o xml_to_json.js -s EXPORTED_FUNCTIONS='["_xml_to_json", "_free"]' -s 'EXTRA_EXPORTED_RUNTIME_METHODS=["allocate", "intArrayFromString", "ALLOC_NORMAL", "UTF8ToString"]' 36 | ``` 37 | 38 | ## Usage example: 39 | 40 | ```javascript 41 | var xml = allocate(intArrayFromString("hello world"), 'i8', ALLOC_NORMAL); 42 | var indent = 2; 43 | 44 | var json = _xml_to_json(xml, indent); 45 | console.log(UTF8ToString(json, 5000)); 46 | 47 | _free(xml); 48 | _free(json); 49 | ``` 50 | ```json 51 | { 52 | "x": "hello world" 53 | } 54 | ``` 55 | 56 | # SQLite3 57 | 58 | Implementation of an [SQLite3](sqlite.org) `xml_to_json(X, N)` function. 59 | 60 | `xml_to_json(X, N)` takes one or two arguments: 61 | 62 | * X - XML string UTF-8 encoded 63 | * N - Optional indent for pretty printed JSON or -1 for minified JSON 64 | 65 | The input XML is not validated prior to conversion. 66 | 67 | 68 | ## Compile 69 | 70 | To compile with gcc as a run-time loadable extension: 71 | 72 | ```bash 73 | UNIX-like : gcc -g -O3 -fPIC -shared xml_to_json.c -o xml_to_json.so -DSQLITE 74 | Mac : gcc -g -O3 -fPIC -dynamiclib xml_to_json.c -o xml_to_json.dylib -DSQLITE 75 | Windows : gcc -g -O3 -shared xml_to_json.c -o xml_to_json.dll -DSQLITE 76 | ``` 77 | 78 | Add the `-DDEBUG` option to print debug information to stdout. 79 | 80 | E.g. 81 | 82 | ```bash 83 | gcc -g -O3 -fPIC -shared xml_to_json.c -o xml_to_json.so -DDEBUG 84 | ``` 85 | 86 | ## Usage examples 87 | 88 | ```sql 89 | SELECT xml_to_json('hello world', 2); 90 | ``` 91 | ```json 92 | { 93 | "x": "hello world" 94 | } 95 | ``` 96 | --- 97 | ```sql 98 | SELECT xml_to_json('ab', 2); 99 | ``` 100 | ```json 101 | { 102 | "x": { 103 | "#text": [ 104 | "a", 105 | "b" 106 | ], 107 | "y": null 108 | } 109 | } 110 | ``` 111 | --- 112 | ```sql 113 | SELECT xml_to_json('abcdef', 2); 114 | ``` 115 | ```json 116 | { 117 | "x": { 118 | "y": [ 119 | "abc", 120 | "def" 121 | ] 122 | } 123 | } 124 | ``` 125 | --- 126 | ```sql 127 | SELECT xml_to_json('helloabcworlddefxyz', 2); 128 | ``` 129 | ```json 130 | { 131 | "x": { 132 | "#text": [ 133 | "hello", 134 | "world", 135 | "xyz" 136 | ], 137 | "y": [ 138 | "abc", 139 | "def" 140 | ] 141 | } 142 | } 143 | ``` 144 | --- 145 | ```sql 146 | SELECT xml_to_json('& > < '', 2); 147 | ``` 148 | ```json 149 | { 150 | "x": { 151 | "@attr1": "attr val 1", 152 | "@attr2": "attr val 2", 153 | "#text": "& > < '" 154 | } 155 | } 156 | ``` 157 | 158 | # Implementation Method 159 | 160 | This implementation does not support the full [XML 1.0 Specification](https://www.w3.org/TR/REC-xml/). The following explaination is designed to describe what is currently supported. 161 | 162 | TODO 163 | 164 | # TODO 165 | 166 | * Improve readme 167 | * Add test cases 168 | * Benchmark 169 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | XML to JSON 7 | 13 | 14 | 15 |

XML to JSON

16 | 17 | github.com/jakethaw/xml_to_json 18 | 19 |
20 |
21 | 22 | 23 | 24 | 25 | 35 | 36 | 37 | 38 | 41 | 42 | 43 | 44 | 50 | 51 |
Indent 26 | 34 |
XML 39 | 40 |
JSON 45 | 49 |
52 | 53 | 54 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /xml_to_json.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** xml_to_json.c - 2018-01-22 - jakethaw 3 | ** 4 | ************************************************************************* 5 | ** 6 | ** MIT License 7 | ** 8 | ** Copyright (c) 2019 jakethaw 9 | ** 10 | ** Permission is hereby granted, free of charge, to any person obtaining a copy 11 | ** of this software and associated documentation files (the "Software"), to deal 12 | ** in the Software without restriction, including without limitation the rights 13 | ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | ** copies of the Software, and to permit persons to whom the Software is 15 | ** furnished to do so, subject to the following conditions: 16 | ** 17 | ** The above copyright notice and this permission notice shall be included in all 18 | ** copies or substantial portions of the Software. 19 | ** 20 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | ** SOFTWARE. 27 | ** 28 | ************************************************************************* 29 | ** WebAssembly ********************************************************* 30 | ************************************************************************* 31 | ** 32 | ** To compile with Emscripten as a WebAssembly function: 33 | ** 34 | ** emcc -Oz xml_to_json.c -o xml_to_json.js -s EXPORTED_FUNCTIONS='["_xml_to_json", "_free"]' -s 'EXTRA_EXPORTED_RUNTIME_METHODS=["allocate", "intArrayFromString", "ALLOC_NORMAL", "UTF8ToString"]' 35 | ** 36 | ************************************************************************* 37 | ** 38 | ** Usage example: 39 | ** 40 | ** var xml = allocate(intArrayFromString("hello world"), 'i8', ALLOC_NORMAL); 41 | ** var indent = 2; 42 | ** 43 | ** var json = _xml_to_json(xml, indent); 44 | ** console.log(UTF8ToString(json, 5000)); 45 | ** 46 | ** _free(xml); 47 | ** _free(json); 48 | ** 49 | ************************************************************************* 50 | ** SQLite3 ************************************************************* 51 | ************************************************************************* 52 | ** 53 | ** Implementation of an SQLite3 xml_to_json(X, N) function. 54 | ** 55 | ** xml_to_json(X, N) takes one or two arguments: 56 | ** 57 | ** * X - XML string UTF-8 encoded 58 | ** * N - Optional indent for pretty printed JSON or -1 for minified JSON 59 | ** 60 | ** The input XML is not validated prior to conversion. 61 | ** 62 | ************************************************************************* 63 | ** 64 | ** To compile with gcc as a run-time loadable extension: 65 | ** 66 | ** UNIX-like : gcc -g -O2 -fPIC -shared xml_to_json.c -o xml_to_json.so -DSQLITE 67 | ** Mac : gcc -g -O2 -fPIC -dynamiclib xml_to_json.c -o xml_to_json.dylib -DSQLITE 68 | ** Windows : gcc -g -O2 -shared xml_to_json.c -o xml_to_json.dll -DSQLITE 69 | ** 70 | ** Add the -DDEBUG option to print debug information to stdout. 71 | ** 72 | ************************************************************************* 73 | ** 74 | ** Usage examples: 75 | ** 76 | ** SELECT xml_to_json('ab'); 77 | ** SELECT xml_to_json('abcdef', 2); 78 | ** SELECT xml_to_json('helloabcworlddefxyz', 2); 79 | ** SELECT xml_to_json('& > < '', 2); 80 | ** 81 | ************************************************************************* 82 | */ 83 | 84 | #ifdef SQLITE 85 | #include "sqlite3ext.h" 86 | SQLITE_EXTENSION_INIT1 87 | #define MALLOC sqlite3_malloc 88 | #define FREE sqlite3_free 89 | #else 90 | #define MALLOC malloc 91 | #define FREE free 92 | #endif 93 | 94 | #include 95 | #include 96 | #include 97 | 98 | typedef struct element *element; 99 | struct element{ 100 | struct element *parent; // Link to parent element or null 101 | char *name; // Pointer to element name in original XML string 102 | int nName; // Length of name 103 | struct value *first_value; // Link to first value. Value might be an array of values e.g ab 104 | int depth; // Depth of element 105 | int is_parent; // True if element has children 106 | int child_index; // Index of element among siblings 107 | int is_last_child; // True if element does not link to sibling 108 | int array_index; // Index of element in array 109 | int is_array_end; // True if last element in array 110 | struct element *next; // Link to next element. Sibling or ancestor's sibling 111 | struct element_attribute *first_attr; // Link to first attribute 112 | }; 113 | 114 | typedef struct value *value; 115 | struct value{ 116 | struct value_part *first_value_part; // Link to first value part 117 | struct value *next_value; // Link to sibling value 118 | }; 119 | 120 | // Divide value into parts to support special characters 121 | // i.e. 122 | // & -> & 123 | // > -> > 124 | // < -> < 125 | // " -> " 126 | // ' -> ' 127 | // ' -> ' 128 | // etc. 129 | // 130 | // Constant memory for named special charactes 131 | // Dynamically allocated memory for html codes values 132 | // 133 | typedef struct value_part *value_part; 134 | struct value_part{ 135 | char *val; // Pointer to value part in original XML string (or special characters) 136 | int nVal; // Length of val 137 | int free; // True if val should be freed 138 | struct value_part *next_value_part; // Link to next value part 139 | }; 140 | 141 | typedef struct element_attribute *element_attribute; 142 | struct element_attribute{ 143 | char *name; // Pointer to element name in original XML string 144 | int nName; // Lenth of name 145 | struct value_part *first_value_part; // Link to first value part 146 | struct element_attribute *next_attr; // Link to nect attribute 147 | }; 148 | 149 | static value_part get_value_parts(int *i, int j, char *xml, value_part new_value_part, int is_attr); 150 | static int json_output(element root, char *json, int indent); 151 | 152 | static int is_space(char *z){ 153 | return z[0]==' ' || z[0]=='\t' || z[0]=='\n' || z[0]=='\f' || z[0]=='\r'; 154 | } 155 | 156 | static int print_spaces(char *json, int nJson, int spaces){ 157 | if( spaces<0 ) 158 | return 0; 159 | 160 | int i; 161 | for(i=0; iparent = 0; 232 | root->depth = 0; 233 | root->first_value = 0; 234 | root->is_parent = 0; 235 | root->child_index = 0; 236 | root->is_last_child = 1; 237 | root->array_index = 0; 238 | root->is_array_end = 0; 239 | root->next = 0; 240 | root->first_attr = 0; 241 | 242 | previous_node = root; 243 | 244 | i = 0; 245 | while( is_space(&xml[i]) ) i++; 246 | while(xml[i]){ 247 | // Element open tag 248 | //printf("%.*s\n", 1, &xml[i]); 249 | if( xml[i]=='<' && xml[i+1]!='/' ){ 250 | // Create node 251 | depth++; 252 | new_node = (element)MALLOC(sizeof(struct element)); 253 | 254 | // Node name 255 | j = 1; 256 | while( xml[i+j] && !is_space(&xml[i+j]) && !(xml[i+j]=='/' || xml[i+j]=='>') ) j++; 257 | j--; 258 | new_node->name = &xml[i+1]; 259 | new_node->nName = j; 260 | i += j+1; 261 | 262 | // Default values 263 | new_node->first_value = 0; 264 | new_node->depth = depth; 265 | new_node->is_parent = 0; 266 | new_node->array_index = 0; 267 | new_node->is_array_end = 0; 268 | new_node->next = 0; 269 | new_node->child_index = 0; 270 | new_node->is_last_child = 0; 271 | new_node->first_attr = 0; 272 | 273 | // Set parent node 274 | parent_node = previous_node; 275 | while( parent_node->depth >= new_node->depth && parent_node->parent ) 276 | parent_node = parent_node->parent; 277 | new_node->parent = parent_node; 278 | 279 | if( !parent_node->is_parent ) 280 | parent_node->is_parent = 1; 281 | 282 | // Make new node the current node 283 | previous_node->next = new_node; 284 | previous_node = new_node; 285 | current_node = new_node; 286 | 287 | // printf("%.*s\n", j, current_node->name); 288 | // if( parent_node && parent_node->parent ) 289 | // printf(" Parent = %.*s\n", parent_node->nName, parent_node->name); 290 | 291 | // Get attributes 292 | while( is_space(&xml[i]) ) i++; 293 | while( xml[i] && xml[i]!='/' && xml[i]!='?' && xml[i]!='>' ){ 294 | // Create attribute 295 | new_attr = (element_attribute)MALLOC(sizeof(struct element_attribute)); 296 | if( !current_node->first_attr ){ 297 | current_node->first_attr = new_attr; 298 | }else{ 299 | current_attr->next_attr = new_attr; 300 | } 301 | current_attr = new_attr; 302 | current_attr->first_value_part = 0; 303 | current_attr->next_attr = 0; 304 | 305 | // Attribute name 306 | j = 1; 307 | while( xml[i+j] && xml[i+j]!='=' && !is_space(&xml[i+j]) ) j++; 308 | current_attr->name = &xml[i]; 309 | current_attr->nName = j; 310 | i += j; 311 | 312 | // Ensure attribute value starts 313 | while( xml[i] && (xml[i]!='"' || is_space(&xml[i])) ) i++; 314 | 315 | if( xml[i] ){ 316 | i++; 317 | 318 | // Ensure attribute value ends 319 | j=0; 320 | while( xml[i+j] && xml[i+j]!='"' ) j++; 321 | 322 | if( xml[i+j] ){ 323 | // Attribute value 324 | do{ 325 | if( !current_attr->first_value_part ){ 326 | new_value_part = (value_part)MALLOC(sizeof(struct value_part)); 327 | new_value_part->next_value_part = 0; 328 | current_attr->first_value_part = new_value_part; 329 | }else{ 330 | new_value_part->next_value_part = (value_part)MALLOC(sizeof(struct value_part)); 331 | new_value_part = new_value_part->next_value_part; 332 | new_value_part->next_value_part = 0; 333 | } 334 | 335 | new_value_part = get_value_parts(&i, 0, xml, new_value_part, 1); 336 | }while( xml[i] && xml[i]!='"' ); 337 | 338 | if( xml[i] == '"' ){ 339 | i++; 340 | while( is_space(&xml[i]) ) i++; 341 | } 342 | } 343 | } 344 | } 345 | 346 | // Self closing element 347 | if( xml[i]=='/' || xml[i]=='?' ){ 348 | current_node = current_node->parent; 349 | depth--; 350 | while( xml[i] && xml[i]!='>' ) i++; 351 | } 352 | 353 | // Element close tag 354 | }else if( xml[i]=='<' && xml[i+1]=='/' ){ 355 | current_node = current_node->parent; 356 | depth--; 357 | while( xml[i] && xml[i]!='>' ) i++; 358 | 359 | }else{ 360 | i++; 361 | 362 | // Get value if it exists, or find the start of the next element 363 | j = 0; 364 | while( is_space(&xml[i+j]) ) j++; 365 | 366 | if( xml[i+j]!='<' || (!current_node->is_parent && xml[i+j]=='<' && xml[i+j+1]=='/') ){ 367 | 368 | // Determine the deepest value of this element 369 | current_value = current_node->first_value; 370 | while( current_value && current_value->next_value ) 371 | current_value = current_value->next_value; 372 | 373 | new_value = (value)MALLOC(sizeof(struct value)); 374 | 375 | // Either make the new value the first value of the element, 376 | // or link the new value to the previous one 377 | if( !current_node->first_value ){ 378 | current_node->first_value = new_value; 379 | }else{ 380 | current_value->next_value = new_value; 381 | } 382 | 383 | new_value->first_value_part = 0; 384 | new_value->next_value = 0; 385 | 386 | // Value 387 | new_value_part = 0; 388 | while( xml[i] && xml[i]!='<' ){ 389 | if( !new_value->first_value_part ){ 390 | new_value_part = (value_part)MALLOC(sizeof(struct value_part)); 391 | new_value_part->next_value_part = 0; 392 | new_value->first_value_part = new_value_part; 393 | }else{ 394 | new_value_part->next_value_part = (value_part)MALLOC(sizeof(struct value_part)); 395 | new_value_part = new_value_part->next_value_part; 396 | new_value_part->next_value_part = 0; 397 | } 398 | new_value_part = get_value_parts(&i, 0, xml, new_value_part, 0); 399 | j = 0; 400 | } 401 | 402 | // if( new_value_part ) 403 | // printf("%.*s=%.*s\n", current_node->nName, 404 | // current_node->name, 405 | // new_value_part->nVal, 406 | // new_value_part->val); 407 | } 408 | i += j; 409 | } 410 | } 411 | 412 | // 413 | // Determine first/last nodes in a family 414 | // 415 | current_node = root; 416 | while(current_node->next){ 417 | current_node = current_node->next; 418 | if( !current_node->child_index ){ 419 | i = 1; 420 | test_node = current_node; 421 | previous_node = 0; 422 | do{ 423 | if( current_node->parent == test_node->parent ){ 424 | if( !current_node->child_index ) 425 | current_node->child_index = 1; 426 | 427 | if( current_node != test_node ) 428 | test_node->child_index = ++i; 429 | 430 | previous_node = test_node; 431 | } 432 | 433 | test_node = test_node->next; 434 | }while(test_node && test_node->depth >= current_node->depth ); 435 | 436 | if( previous_node ) 437 | previous_node->is_last_child = 1; 438 | } 439 | } 440 | 441 | // 442 | // Determine and group arrays 443 | // 444 | current_node = root; 445 | while(current_node->next){ 446 | current_node = current_node->next; 447 | if( !current_node->array_index ){ 448 | i = 1; 449 | test_node = current_node; 450 | previous_array_node = 0; 451 | while(test_node->next && test_node->depth >= current_node->depth){ 452 | test_node = test_node->next; 453 | if( current_node->parent == test_node->parent 454 | && current_node->nName == test_node->nName 455 | && memcmp(current_node->name, test_node->name, test_node->nName) == 0){ 456 | if( !current_node->array_index ){ 457 | current_node->array_index = 1; 458 | previous_array_node = current_node; 459 | } 460 | test_node->array_index = ++i; 461 | 462 | // 463 | // Re-order if array elements are separated 464 | // 465 | // e.g. 466 | // 1 467 | // 468 | // 2 469 | // 470 | // 471 | // becomes: 472 | // 473 | // 474 | // 1 475 | // 2 476 | // 477 | // 478 | // 479 | if( test_node->child_index != previous_array_node->child_index+1 ){ 480 | 481 | next_node = previous_array_node->next; 482 | previous_sibling = next_node; 483 | 484 | // 485 | // Get the node that the furthest child of the test node points to 486 | // 487 | test_node_deepest_node = test_node; 488 | if( test_node_deepest_node->next ){ 489 | while( test_node_deepest_node->next->depth > test_node->depth) 490 | test_node_deepest_node = test_node_deepest_node->next; 491 | } 492 | 493 | // Shift up each sibling node that sits between the previous array element and the test node 494 | while( next_node->next != test_node ){ 495 | if( previous_array_node->parent == next_node->parent ){ 496 | previous_sibling = next_node; 497 | previous_sibling->child_index++; 498 | } 499 | next_node = next_node->next; 500 | } 501 | if( previous_array_node->parent == next_node->parent ){ 502 | previous_sibling = next_node; 503 | previous_sibling->child_index++; 504 | } 505 | 506 | // Set test node's previous node to point to the test node's next node 507 | next_node->next = test_node_deepest_node->next; 508 | 509 | // If the test node was the last child, 510 | // then flag the last array element as the last child, 511 | // and un-flag the test node 512 | if( test_node->is_last_child ){ 513 | previous_sibling->is_last_child = 1; 514 | test_node->is_last_child = 0; 515 | } 516 | 517 | // Set the test node to be the next adjacent sibling to the 518 | // previous array element 519 | test_node->child_index = previous_array_node->child_index+1; 520 | 521 | // 522 | // Get the node that the furthest child of the previous array node points to 523 | // 524 | next_node = previous_array_node; 525 | do{ 526 | previous_node = next_node; 527 | next_node = next_node->next; 528 | }while( next_node->parent != previous_array_node->parent ); 529 | 530 | previous_node->next = test_node; 531 | test_node_deepest_node->next = next_node; 532 | } 533 | 534 | previous_array_node = test_node; 535 | } 536 | } 537 | if( previous_array_node ) 538 | previous_array_node->is_array_end = 1; 539 | } 540 | } 541 | 542 | #ifdef DEBUG 543 | current_node = root; 544 | while(current_node->next){ 545 | current_node = current_node->next; 546 | 547 | printf("%.*s\n", current_node->nName, current_node->name); 548 | if( current_node->parent && current_node->parent->parent ) 549 | printf(" Parent = %.*s\n", current_node->parent->nName, current_node->parent->name); 550 | 551 | printf(" depth = %d\n", current_node->depth); 552 | printf(" is_parent = %d\n", current_node->is_parent); 553 | printf(" child_index = %d\n", current_node->child_index); 554 | printf(" is_last_child = %d\n", current_node->is_last_child); 555 | printf(" array_index = %d\n", current_node->array_index); 556 | printf(" is_array_end = %d\n", current_node->is_array_end); 557 | 558 | current_attr = current_node->first_attr; 559 | while( current_attr ){ 560 | printf(" @%.*s=%.*s\n", current_attr->nName, current_attr->name, current_attr->nName, current_attr->name); 561 | current_attr = current_attr->next_attr; 562 | } 563 | 564 | printf(" \"#text\":\"%.*s\"\n", current_node->nName, current_node->name); 565 | } 566 | #endif 567 | 568 | int nJson; 569 | char *json; 570 | 571 | // Calculate space required 572 | nJson = json_output(root, NULL, indent); 573 | 574 | // Construct JSON 575 | json = MALLOC(nJson+1); 576 | json_output(root, json, indent); 577 | json[nJson] = 0; 578 | 579 | // Cleanup elements 580 | current_node = root; 581 | next_node = current_node->next; 582 | while( current_node ){ 583 | 584 | // Cleanup attributes 585 | current_attr = current_node->first_attr; 586 | while( current_attr ){ 587 | 588 | // Cleanup value parts 589 | current_value_part = current_attr->first_value_part; 590 | while( current_value_part ){ 591 | next_value_part = current_value_part->next_value_part; 592 | FREE(current_value_part); 593 | current_value_part = next_value_part; 594 | } 595 | 596 | next_attr = current_attr->next_attr; 597 | FREE(current_attr); 598 | current_attr = next_attr; 599 | } 600 | 601 | // Cleanup values 602 | current_value = current_node->first_value; 603 | while( current_value ){ 604 | 605 | // Cleanup value parts 606 | current_value_part = current_value->first_value_part; 607 | while( current_value_part ){ 608 | next_value_part = current_value_part->next_value_part; 609 | if( current_value_part->free ) FREE(current_value_part->val); 610 | FREE(current_value_part); 611 | current_value_part = next_value_part; 612 | } 613 | 614 | next_value = current_value->next_value; 615 | FREE(current_value); 616 | current_value = next_value; 617 | } 618 | 619 | next_node = current_node->next; 620 | FREE(current_node); 621 | current_node = next_node; 622 | } 623 | 624 | return json; 625 | } 626 | 627 | // 628 | // html_code_to_str() 629 | // 630 | // Convert a html code to a char array. 631 | // 632 | // e.g. ' to ' 633 | // 634 | // must be freed 635 | // 636 | static void html_code_to_str(int *i, value_part value_part, const char *xml){ 637 | // find end of html code 638 | int start = *i+1; 639 | int len = 0; 640 | while( xml[start+len]!=';' ) 641 | len++; 642 | 643 | // advance through xml 644 | *i += 2+len; 645 | 646 | // str to int 647 | int m = 1; // multiplier 1, 10, 100 etc. 648 | unsigned long x = 0; 649 | while( len>0 ){ 650 | x += (xml[start+len-1]-48)*m; 651 | m *= 10; 652 | len--; 653 | } 654 | 655 | // int to char array 656 | char *str; 657 | if( x < 1 << 8 ){ 658 | value_part->nVal = 1; 659 | str = MALLOC(2); 660 | str[0] = x & 0xFF; 661 | str[1] = 0; 662 | }else if( x < 1 << 16 ){ 663 | value_part->nVal = 2; 664 | str = MALLOC(3); 665 | str[0] = (x >> 8) & 0xFF; 666 | str[1] = x & 0xFF; 667 | str[2] = 0; 668 | }else if( x < 1 << 16 ){ 669 | value_part->nVal = 3; 670 | str = MALLOC(4); 671 | str[0] = (x >> 16) & 0xFF; 672 | str[1] = (x >> 8) & 0xFF; 673 | str[2] = x & 0xFF; 674 | str[3] = 0; 675 | }else{ 676 | value_part->nVal = 4; 677 | str = MALLOC(5); 678 | str[0] = (x >> 24) & 0xFF; 679 | str[1] = (x >> 16) & 0xFF; 680 | str[2] = (x >> 8) & 0xFF; 681 | str[3] = x & 0xFF; 682 | str[4] = 0; 683 | } 684 | value_part->free = 1; 685 | value_part->val = str; 686 | } 687 | 688 | static value_part get_value_parts(int *i, int j, char *xml, value_part new_value_part, int is_attr){ 689 | 690 | while( xml[*i+j] && !(xml[*i+j]=='<' 691 | || xml[*i+j]=='&' 692 | || xml[*i+j]=='\b' 693 | || xml[*i+j]=='\t' 694 | || xml[*i+j]=='\n' 695 | || xml[*i+j]=='\f' 696 | || xml[*i+j]=='\r' 697 | || xml[*i+j]=='"' 698 | || xml[*i+j]=='\\') ) 699 | j++; 700 | 701 | //printf("%.*s\n", j, &xml[*i]); 702 | 703 | new_value_part->nVal = j; 704 | new_value_part->val = &xml[*i]; 705 | new_value_part->free = 0; 706 | *i += j; 707 | 708 | // Special characters 709 | if( xml[*i]=='&' 710 | || xml[*i]=='\b' 711 | || xml[*i]=='\t' 712 | || xml[*i]=='\n' 713 | || xml[*i]=='\f' 714 | || xml[*i]=='\r' 715 | || (xml[*i]=='"' && !is_attr) 716 | || xml[*i]=='\\' ){ 717 | new_value_part->next_value_part = (value_part)MALLOC(sizeof(struct value_part)); 718 | new_value_part = new_value_part->next_value_part; 719 | new_value_part->next_value_part = 0; 720 | new_value_part->free = 0; 721 | } 722 | 723 | if( xml[*i]=='&' ){ 724 | *i += 1; 725 | if( memcmp("amp;", &xml[*i], 4) == 0 ){ 726 | new_value_part->nVal = 1; 727 | new_value_part->val = "&"; 728 | *i += 4; 729 | }else if( memcmp("gt;", &xml[*i], 3) == 0 ){ 730 | new_value_part->nVal = 1; 731 | new_value_part->val = ">"; 732 | *i += 3; 733 | }else if( memcmp("lt;", &xml[*i], 3) == 0 ){ 734 | new_value_part->nVal = 1; 735 | new_value_part->val = "<"; 736 | *i += 3; 737 | }else if( memcmp("quot;", &xml[*i], 5) == 0 ){ 738 | new_value_part->nVal = 2; 739 | new_value_part->val = "\\\""; 740 | *i += 5; 741 | }else if( memcmp("apos;", &xml[*i], 5) == 0 ){ 742 | new_value_part->nVal = 1; 743 | new_value_part->val = "'"; 744 | *i += 5; 745 | }else if( memcmp("#8;", &xml[*i], 3) == 0 ){ 746 | new_value_part->nVal = 2; 747 | new_value_part->val = "\\b"; 748 | *i += 3; 749 | }else if( memcmp("#9;", &xml[*i], 3) == 0 ){ 750 | new_value_part->nVal = 2; 751 | new_value_part->val = "\\t"; 752 | *i += 3; 753 | }else if( memcmp("#10;", &xml[*i], 4) == 0 ){ 754 | new_value_part->nVal = 2; 755 | new_value_part->val = "\\n"; 756 | *i += 4; 757 | }else if( memcmp("#12;", &xml[*i], 4) == 0 ){ 758 | new_value_part->nVal = 2; 759 | new_value_part->val = "\\f"; 760 | *i += 4; 761 | }else if( memcmp("#13;", &xml[*i], 4) == 0 ){ 762 | new_value_part->nVal = 2; 763 | new_value_part->val = "\\r"; 764 | *i += 4; 765 | }else if( memcmp("#34;", &xml[*i], 4) == 0 ){ 766 | new_value_part->nVal = 2; 767 | new_value_part->val = "\\\""; 768 | *i += 4; 769 | }else if( memcmp("#92;", &xml[*i], 4) == 0 ){ 770 | new_value_part->nVal = 2; 771 | new_value_part->val = "\\\\"; 772 | *i += 4; 773 | }else if( memcmp("#", &xml[*i], 1) == 0 ){ 774 | html_code_to_str(i, new_value_part, (const char *)xml); 775 | } 776 | }else if( xml[*i]=='\b' ){ 777 | new_value_part->nVal = 2; 778 | new_value_part->val = "\\b"; 779 | *i += 1; 780 | }else if( xml[*i]=='\t' ){ 781 | new_value_part->nVal = 2; 782 | new_value_part->val = "\\t"; 783 | *i += 1; 784 | }else if( xml[*i]=='\n' ){ 785 | new_value_part->nVal = 2; 786 | new_value_part->val = "\\n"; 787 | *i += 1; 788 | }else if( xml[*i]=='\f' ){ 789 | new_value_part->nVal = 2; 790 | new_value_part->val = "\\f"; 791 | *i += 1; 792 | }else if( xml[*i]=='\r' ){ 793 | new_value_part->nVal = 2; 794 | new_value_part->val = "\\r"; 795 | *i += 1; 796 | }else if( !is_attr && xml[*i]=='"' ){ 797 | new_value_part->nVal = 2; 798 | new_value_part->val = "\\\""; 799 | *i += 1; 800 | }else if( xml[*i]=='\\' ){ 801 | new_value_part->nVal = 2; 802 | new_value_part->val = "\\\\"; 803 | *i += 1; 804 | } 805 | 806 | return new_value_part; 807 | } 808 | 809 | #define PRINT_SPACES(x) nJson += print_spaces(json, nJson, x) 810 | #define PRINT_NEWLINE nJson += print_newline(json, nJson, indent) 811 | #define PRINT_CHAR(x) nJson += print_char(json, nJson, x) 812 | #define PRINT_STRING(z,n) nJson += print_string(json, nJson, z, n); 813 | 814 | // 815 | // json_output 816 | // 817 | // If *json is null, then return total space required. 818 | // If *json is not null, then populate with JSON string. 819 | // 820 | // Does not zero terminate JSON string. 821 | // 822 | int json_output(element root, char *json, int indent){ 823 | int nJson = 0; 824 | int depth = 0; 825 | 826 | element current_node; 827 | element parent_node; 828 | element_attribute current_attr; 829 | value current_value; 830 | value_part current_value_part; 831 | 832 | current_node = root; 833 | 834 | while(current_node->next){ 835 | current_node = current_node->next; 836 | 837 | // Opening bracket 838 | if( (current_node->child_index == 1 && !current_node->parent->first_attr && !current_node->parent->first_value) || current_node == root->next ){ 839 | if( current_node->parent->array_index > 1){ 840 | PRINT_SPACES(depth*indent); 841 | } 842 | PRINT_CHAR('{'); 843 | PRINT_NEWLINE; 844 | depth++; 845 | } 846 | 847 | // Node name 848 | if( current_node->array_index <= 1 ){ 849 | PRINT_SPACES(depth*indent); 850 | PRINT_CHAR('"'); 851 | PRINT_STRING(current_node->name, current_node->nName); 852 | PRINT_CHAR('"'); 853 | PRINT_CHAR(':'); 854 | PRINT_SPACES(indent < 0 ? 0 : 1); 855 | } 856 | 857 | // Attributes 858 | current_attr = current_node->first_attr; 859 | if( current_attr ){ 860 | 861 | if( current_node->array_index == 1 ){ 862 | depth++; 863 | PRINT_CHAR('['); 864 | PRINT_NEWLINE; 865 | } 866 | 867 | if( current_node->array_index ){ 868 | PRINT_SPACES(depth*indent); 869 | } 870 | 871 | PRINT_CHAR('{'); 872 | PRINT_NEWLINE; 873 | depth++; 874 | 875 | while(current_attr){ 876 | // "@name":"value", 877 | PRINT_SPACES(depth*indent); 878 | PRINT_CHAR('"'); 879 | PRINT_CHAR('@'); 880 | PRINT_STRING(current_attr->name, current_attr->nName); 881 | PRINT_CHAR('"'); 882 | PRINT_CHAR(':'); 883 | PRINT_SPACES(indent < 0 ? 0 : 1); 884 | 885 | // Join value parts 886 | PRINT_CHAR('"'); 887 | current_value_part = current_attr->first_value_part; 888 | while( current_value_part ){ 889 | PRINT_STRING(current_value_part->val, current_value_part->nVal); 890 | current_value_part = current_value_part->next_value_part; 891 | } 892 | PRINT_CHAR('"'); 893 | 894 | current_attr = current_attr->next_attr; 895 | 896 | if( current_attr || current_node->first_value || current_node->is_parent ){ 897 | PRINT_CHAR(','); 898 | PRINT_NEWLINE; 899 | } 900 | } 901 | 902 | if( !current_node->first_value && !current_node->is_parent ){ 903 | depth--; 904 | PRINT_NEWLINE; 905 | PRINT_SPACES(depth*indent); 906 | PRINT_CHAR('}'); 907 | } 908 | } 909 | 910 | // #text 911 | if( current_node->first_value && (current_node->first_attr || current_node->is_parent) ){ 912 | if( current_node->array_index ){ 913 | PRINT_SPACES(depth*indent); 914 | } 915 | if( current_node->is_parent && !current_node->first_attr ){ 916 | PRINT_CHAR('{'); 917 | PRINT_NEWLINE; 918 | depth++; 919 | } 920 | if( !(current_node->first_attr && current_node->array_index ) ){ 921 | PRINT_SPACES(depth*indent); 922 | } 923 | PRINT_STRING("\"#text\":", 8); 924 | PRINT_SPACES(indent < 0 ? 0 : 1); 925 | 926 | // Array of values 927 | if( current_node->first_value->next_value ){ 928 | PRINT_CHAR('['); 929 | PRINT_NEWLINE; 930 | current_value = current_node->first_value; 931 | 932 | while( current_value ){ 933 | PRINT_SPACES((depth+1)*indent); 934 | 935 | // Join value parts 936 | PRINT_CHAR('"'); 937 | current_value_part = current_value->first_value_part; 938 | while( current_value_part ){ 939 | PRINT_STRING(current_value_part->val, current_value_part->nVal); 940 | current_value_part = current_value_part->next_value_part; 941 | } 942 | PRINT_CHAR('"'); 943 | 944 | current_value = current_value->next_value; 945 | if( current_value ){ 946 | PRINT_CHAR(','); 947 | PRINT_NEWLINE; 948 | }else{ 949 | PRINT_NEWLINE; 950 | PRINT_SPACES(depth*indent); 951 | PRINT_CHAR(']'); 952 | } 953 | } 954 | } 955 | } 956 | 957 | // Array start 958 | if( current_node->array_index == 1 && !current_node->first_attr ){ 959 | depth++; 960 | PRINT_CHAR('['); 961 | PRINT_NEWLINE; 962 | if( current_node->is_parent ){ 963 | PRINT_SPACES(depth*indent); 964 | } 965 | } 966 | 967 | // null 968 | if( !current_node->first_value && !current_node->is_parent && !current_node->first_attr ){ 969 | if( current_node->array_index ){ 970 | PRINT_SPACES(depth*indent); 971 | } 972 | PRINT_STRING("null", 4); 973 | } 974 | 975 | // Value 976 | if( current_node->first_value && !current_node->first_value->next_value ){ 977 | if( current_node->array_index && !current_node->is_parent && !current_node->first_attr ){ 978 | PRINT_SPACES(depth*indent); 979 | } 980 | 981 | // Join value parts 982 | PRINT_CHAR('"'); 983 | current_value_part = current_node->first_value->first_value_part; 984 | while( current_value_part ){ 985 | PRINT_STRING(current_value_part->val, current_value_part->nVal); 986 | current_value_part = current_value_part->next_value_part; 987 | } 988 | PRINT_CHAR('"'); 989 | 990 | if( current_node->first_attr && !current_node->is_parent ){ 991 | depth--; 992 | PRINT_NEWLINE; 993 | PRINT_SPACES(depth*indent); 994 | PRINT_CHAR('}'); 995 | } 996 | } 997 | 998 | // Comma 999 | if( (!current_node->is_last_child && !current_node->is_array_end && !current_node->is_parent) || (current_node->is_parent && current_node->first_value) ){ 1000 | PRINT_CHAR(','); 1001 | PRINT_NEWLINE; 1002 | } 1003 | 1004 | // Trailing brackets 1005 | if( (current_node->is_last_child || current_node->is_array_end ) && !current_node->is_parent ){ 1006 | parent_node = current_node; 1007 | 1008 | while( parent_node != root && (!current_node->next || parent_node != current_node->next->parent) ){ 1009 | if( parent_node->is_array_end ){ 1010 | depth--; 1011 | PRINT_NEWLINE; 1012 | PRINT_SPACES(depth*indent); 1013 | PRINT_CHAR(']'); 1014 | if( !parent_node->is_last_child ){ 1015 | PRINT_CHAR(','); 1016 | } 1017 | } 1018 | 1019 | if( parent_node->is_last_child ){ 1020 | depth--; 1021 | PRINT_NEWLINE; 1022 | PRINT_SPACES(depth*indent); 1023 | PRINT_CHAR('}'); 1024 | if( !parent_node->parent->is_last_child && !parent_node->parent->is_array_end ){ 1025 | PRINT_CHAR(','); 1026 | } 1027 | } 1028 | 1029 | parent_node = parent_node->parent; 1030 | } 1031 | PRINT_NEWLINE; 1032 | } 1033 | 1034 | } 1035 | 1036 | return nJson; 1037 | } 1038 | 1039 | #ifdef SQLITE 1040 | /* 1041 | ** Implementation of xml_to_json() function. 1042 | */ 1043 | static void xml_to_jsonFunc( 1044 | sqlite3_context *context, 1045 | int argc, 1046 | sqlite3_value **argv 1047 | ){ 1048 | if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; 1049 | int indent = -1; 1050 | char *xml = (char *)sqlite3_value_text(argv[0]); 1051 | char *json; 1052 | 1053 | if( argc==2 ){ 1054 | if( sqlite3_value_type(argv[1])!=SQLITE_NULL ) 1055 | indent = sqlite3_value_int(argv[1]); 1056 | } 1057 | 1058 | json = xml_to_json(xml, indent); 1059 | 1060 | sqlite3_result_text(context, json, -1, sqlite3_free); 1061 | } 1062 | 1063 | #ifdef _WIN32 1064 | __declspec(dllexport) 1065 | #endif 1066 | int sqlite3_xmltojson_init( 1067 | sqlite3 *db, 1068 | char **pzErrMsg, 1069 | const sqlite3_api_routines *pApi 1070 | ){ 1071 | int rc = SQLITE_OK; 1072 | SQLITE_EXTENSION_INIT2(pApi); 1073 | (void)pzErrMsg; /* Unused parameter */ 1074 | rc = sqlite3_create_function(db, "xml_to_json", 1, SQLITE_UTF8, 0, 1075 | xml_to_jsonFunc, 0, 0); 1076 | if( rc==SQLITE_OK ){ 1077 | rc = sqlite3_create_function(db, "xml_to_json", 2, SQLITE_UTF8, 0, 1078 | xml_to_jsonFunc, 0, 0); 1079 | } 1080 | return rc; 1081 | } 1082 | #endif 1083 | --------------------------------------------------------------------------------