├── LICENSE ├── OOXML-Library-XQuery-BaseXdb.xqm └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2016 - 2017, Eliud Santiago Meza y Rivera 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /OOXML-Library-XQuery-BaseXdb.xqm: -------------------------------------------------------------------------------- 1 | (: 2 | : -------------------------------- 3 | : Standard ECMA-376 4 | : The Office Open XML File Formats [Office Open XML Workbook] Library 5 | : for BaseX 8.4+ 6 | : By Eliúd Santiago Meza y Rivera eliud.meza@gmail.com 7 | : -------------------------------- 8 | :BSD 3-Clause License 9 | : 10 | :Copyright (c) 2016 - 2017, Eliud Santiago Meza y Rivera 11 | :All rights reserved. 12 | : 13 | :Redistribution and use in source and binary forms, with or without 14 | :modification, are permitted provided that the following conditions are met: 15 | : 16 | :* Redistributions of source code must retain the above copyright notice, this 17 | : list of conditions and the following disclaimer. 18 | : 19 | :* Redistributions in binary form must reproduce the above copyright notice, 20 | : this list of conditions and the following disclaimer in the documentation 21 | : and/or other materials provided with the distribution. 22 | : 23 | :* Neither the name of the copyright holder nor the names of its 24 | : contributors may be used to endorse or promote products derived from 25 | : this software without specific prior written permission. 26 | : 27 | :THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | :AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | :IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 | :DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 31 | :FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 | :DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 33 | :SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 34 | :CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 35 | :OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36 | :OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 | :) 38 | 39 | xquery version "3.1"; 40 | module namespace xlsx = 'http://basex.org/modules/ECMA-376/spreadsheetml'; 41 | (:OfficeOpenXML-Workbook:) 42 | import module namespace file = "http://expath.org/ns/file"; 43 | (:import module namespace functx = "http://www.functx.com";:) 44 | 45 | declare namespace xlsx-Content-Types = "http://schemas.openxmlformats.org/package/2006/content-types"; 46 | declare namespace xlsx-Core-Properties = "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"; 47 | declare namespace xlsx-Digital-Signatures = "http://schemas.openxmlformats.org/package/2006/digital-signature"; 48 | declare namespace xlsx-Relationships = "http://schemas.openxmlformats.org/package/2006/relationships"; 49 | declare namespace xlsx-Markup-Compatibility = "http://schemas.openxmlformats.org/markup-compatibility/2006"; 50 | declare namespace xlsx-spreadsheetml = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; 51 | declare namespace xlsx-sharedStrings = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"; 52 | declare namespace xlsx-x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"; 53 | declare namespace xlsx-mc="http://schemas.openxmlformats.org/markup-compatibility/2006"; 54 | 55 | (:declare default element namespace "http://schemas.openxmlformats.org/spreadsheetml/2006/main";:) 56 | 57 | (: --------- 58 | Return a binary representation of the workbook file... 59 | --------- :) 60 | declare function xlsx:get-file( 61 | $file as xs:string 62 | ) as xs:base64Binary { 63 | try { 64 | let $f := file:read-binary($file) 65 | return $f 66 | } catch * { 67 | xs:base64Binary(element error { 68 | element error_code {$err:code}, 69 | element error_description {$err:description}, 70 | element error_value{$err:value}, 71 | element error_module{$err:module}, 72 | element error_line_number{$err:line-number}, 73 | element error_column_number{$err:column-number}, 74 | element error_additional{$err:additional}, 75 | element error_function_name { 'xlsx:get-file' } 76 | }) 77 | } 78 | }; 79 | 80 | (: --------- 81 | Return a element containing the names of the worksheet of the workbook 82 | 2017-09-20: change param-type to string 83 | --------- :) 84 | declare function xlsx:get-sheets( 85 | $file as xs:string 86 | ) as element()? { 87 | try { 88 | element sheets { 89 | for $s in fn:parse-xml( 90 | archive:extract-text(xlsx:get-file($file),"xl/workbook.xml") 91 | )/descendant::xlsx-spreadsheetml:sheet 92 | return 93 | element sheet { 94 | $s/@name 95 | } 96 | } 97 | } catch * { 98 | element error { 99 | element error_code {$err:code}, 100 | element error_description {$err:description}, 101 | element error_value{$err:value}, 102 | element error_module{$err:module}, 103 | element error_line_number{$err:line-number}, 104 | element error_column_number{$err:column-number}, 105 | element error_additional{$err:additional}, 106 | element error_function_name { 'xlsx:get-sheets' } 107 | } 108 | } 109 | }; 110 | 111 | (: --------- 112 | Returns the Relationships elements contained in the workbook 113 | --------- :) 114 | declare %private function xlsx:get-Workbook-Relationships( 115 | $file as xs:base64Binary 116 | ) as item()* { 117 | let $rs := fn:parse-xml( 118 | archive:extract-text( 119 | $file, 120 | "xl/_rels/workbook.xml.rels" ) 121 | ) 122 | return $rs 123 | }; 124 | 125 | (: --------- 126 | Return a string of the id of the worksheet 127 | --------- :) 128 | declare %private function xlsx:get-rId-worksheet( 129 | $file as xs:base64Binary, 130 | $sheet as xs:string 131 | ) as xs:string* { 132 | try { 133 | let $rs:= fn:parse-xml( 134 | archive:extract-text( 135 | $file,"xl/workbook.xml") 136 | )/descendant::xlsx-spreadsheetml:sheets 137 | /descendant::xlsx-spreadsheetml:sheet 138 | [@name = $sheet]/attribute::*[name(.) = 'r:id'] 139 | return data($rs) 140 | } catch * { 141 | let $a:= '' 142 | return data($a) 143 | } 144 | }; 145 | 146 | (: --------- 147 | Returns the Shared-String elements contained in the workbook 148 | --------- :) 149 | declare function xlsx:get-sharedStrings( 150 | $file as xs:base64Binary 151 | ) as item()* { 152 | try { 153 | let $ss := fn:parse-xml( 154 | archive:extract-text( 155 | $file,"xl/sharedStrings.xml") 156 | )/descendant::xlsx-spreadsheetml:si 157 | return $ss 158 | } catch * { 159 | element error { 160 | element error_code {$err:code}, 161 | element error_description {$err:description}, 162 | element error_value{$err:value}, 163 | element error_module{$err:module}, 164 | element error_line_number{$err:line-number}, 165 | element error_column_number{$err:column-number}, 166 | element error_additional{$err:additional}, 167 | element error_function_name { 'xlsx:get-sharedStrings' } 168 | } 169 | } 170 | }; 171 | 172 | (: --------- 173 | Returns the style elements contained in the workbook 174 | --------- :) 175 | declare function xlsx:get-style( 176 | $file as xs:string 177 | ) as item()* { 178 | try { 179 | let $ss := fn:parse-xml( 180 | archive:extract-text( 181 | xlsx:get-file($file),"xl/styles.xml") 182 | )(:/xlsx-spreadsheetml:styleSheet:) 183 | return $ss 184 | } catch * { 185 | element error { 186 | element error_code {$err:code}, 187 | element error_description {$err:description}, 188 | element error_value{$err:value}, 189 | element error_module{$err:module}, 190 | element error_line_number{$err:line-number}, 191 | element error_column_number{$err:column-number}, 192 | element error_additional{$err:additional}, 193 | element error_function_name { 'xlsx:get-style' } 194 | } 195 | } 196 | }; 197 | 198 | (: --------- 199 | Se necesita trabajar más ... // need more work ... 200 | --------- :) 201 | declare function xlsx:set-style( 202 | $file as xs:base64Binary, 203 | $new-style as item()* 204 | ) as item()* { 205 | try { 206 | element something{ "aa"} 207 | } catch * { 208 | element error { 209 | element error_code {$err:code}, 210 | element error_description {$err:description}, 211 | element error_value{$err:value}, 212 | element error_module{$err:module}, 213 | element error_line_number{$err:line-number}, 214 | element error_column_number{$err:column-number}, 215 | element error_additional{$err:additional}, 216 | element error_function_name { 'xlsx:set-style' } 217 | } 218 | } 219 | }; 220 | 221 | (: --------- 222 | Convert a date to int value for excel... 223 | --------- :) 224 | declare %private function xlsx:date-to-int( 225 | $value as xs:string 226 | ) as xs:date { 227 | ( ( xs:date($value) + xs:dayTimeDuration('P2D') ) - 228 | xs:date('1900-01-01') 229 | ) div xs:dayTimeDuration('P1D') 230 | }; 231 | 232 | (: --------- 233 | Convert a int value to date value for excel... 234 | --------- :) 235 | declare %private function xlsx:int-to-date( 236 | $value as xs:integer 237 | ) as xs:date { 238 | xs:date('1900-01-01') + 239 | xs:dayTimeDuration('P' || ($value - 2) cast as xs:string || 'D') 240 | 241 | }; 242 | 243 | 244 | declare %private function xlsx:decimal-to-fraction( 245 | $numero as xs:decimal 246 | ) as xs:string { 247 | let $decimales := substring-after(string($numero - floor($numero)),'0.') 248 | return ( 249 | $decimales 250 | || '/' || 251 | '1' || string-join((for $i in 1 to string-length($decimales) return '0')) 252 | ) 253 | }; 254 | 255 | (: --------- 256 | Display format of a value 257 | --------- :) 258 | declare function xlsx:format-value( 259 | $data as xs:string, 260 | $excel-format-code as xs:integer) 261 | as xs:string { 262 | try { 263 | switch ($excel-format-code) 264 | case 0 265 | return $data 266 | case 1 267 | return $data 268 | case 2 (: 0.00 :) 269 | return format-number($data cast as xs:double,'#.00') 270 | case 3 (: #,##0 :) 271 | return string(format-number($data cast as xs:double,'#,##0')) 272 | case 4 (: #,##0.00 :) 273 | return string(format-number($data cast as xs:double,'#,##0.00')) 274 | case 9 (: 0% :) 275 | return string(format-number($data cast as xs:double,'0%')) 276 | case 10 (: 0.00%:) 277 | return string(format-number($data cast as xs:double,'0.00%')) 278 | case 11 (: 0.00E+00 yet ... :) 279 | return format-number($data, '00.0e0') 280 | case 12 (: # ?/? yet ... :) 281 | return xlsx:decimal-to-fraction ($data cast as xs:decimal) 282 | case 13 (: # ??/?? yet ... :) 283 | return xlsx:decimal-to-fraction ($data cast as xs:decimal) 284 | case 14 (: mm-dd-yy :) 285 | return ( 286 | if (string(number($data)) != 'NaN' ) 287 | then ( 288 | format-date(xlsx:int-to-date($data cast as xs:integer), 289 | "[M01]-[D01]-[Y01]") 290 | ) 291 | else( format-date($data cast as xs:date, "[M01]-[D01]-[Y01]") ) 292 | ) 293 | case 15 (: d-mmm-yy :) 294 | return ( 295 | if (string(number($data)) != 'NaN' ) 296 | then ( 297 | format-date(xlsx:int-to-date($data cast as xs:integer), 298 | "[D]-[Mn,*-3]-[Y01]") 299 | ) 300 | else( format-date($data cast as xs:date, "[D]-[Mn,*-3]-[Y01]") ) 301 | ) 302 | 303 | case 16 (: d-mmm :) 304 | return ( 305 | if (string(number($data)) != 'NaN' ) 306 | then ( 307 | format-date(xlsx:int-to-date($data cast as xs:integer), 308 | "[D01]-[Mn,*-3]") 309 | ) 310 | else ( 311 | format-date($data cast as xs:date, "[D01]-[Mn,*-3]") 312 | ) 313 | ) 314 | case 17 (: mmm-yy :) 315 | return format-date($data cast as xs:date, "[Mn,*-3]-[Y01]") 316 | case 18 (: h:mm AM/PM :) 317 | return 318 | format-time($data cast as xs:time, "[h]:[m01] [PN]", "en", (), ()) 319 | case 19 (: h:mm:ss AM/PM :) 320 | return 321 | format-time($data cast as xs:time, "[h]:[m01]:[s01] [PN]", "en", 322 | (), ()) 323 | case 20 (: h:mm :) 324 | return format-time($data cast as xs:time, "[h]:[m01]", "en", (), ()) 325 | case 21 (: h:mm:ss :) 326 | return format-time($data cast as xs:time, "[h]:[m01]:[s01]", "en", 327 | (), ()) 328 | case 22 (: m/d/yy h:mm :) 329 | return format-date($data cast as xs:dateTime, "[m]-[d]-[y01]") 330 | case 37 (: #,##0 ;(#,##0) yet :) 331 | return $data 332 | case 38 (: #,##0 ;[Red](#,##0) yet :) 333 | return $data 334 | case 39 (: #,##0.00;(#,##0.00) yet :) 335 | return $data 336 | case 40 (: #,##0.00;[Red](#,##0.00) yet :) 337 | return $data 338 | case 45 (: mm:ss :) 339 | return 340 | format-time($data cast as xs:time, "[m01]:[s01]", "en", (), ()) 341 | case 46 (: [h]:mm:ss :) 342 | return 343 | format-time($data cast as xs:time, "[h]:[m01]:[s01]", "en", (), ()) 344 | case 47 (: mmss.0 :) 345 | return 346 | format-time($data cast as xs:time, "[m01][s01].0", "en", (), ()) 347 | case 48 (: ##0.0E+0 yet :) 348 | return $data 349 | case 49 (: @ :) 350 | return string($data) 351 | default 352 | return $data 353 | } catch * { 354 | element error { 355 | element error_code {$err:code}, 356 | element error_description {$err:description}, 357 | element error_value{$err:value}, 358 | element error_module{$err:module}, 359 | element error_line_number{$err:line-number}, 360 | element error_column_number{$err:column-number}, 361 | element error_additional{$err:additional}, 362 | element error_variable_data {$data}, 363 | element error_variable_excel_format_code {$excel-format-code}, 364 | element error_function_name { 'xlsx:format-value' } 365 | } 366 | } 367 | }; 368 | 369 | (: --------- 370 | Display format of a value 371 | --------- :) 372 | declare function xlsx:display-cell-value( 373 | $c as item()*, 374 | $style as item()*, 375 | $fss as item()* 376 | ) as item ()* { 377 | try { 378 | if (empty($c/@t)) 379 | then ( 380 | if ( empty($style) ) 381 | then ( 382 | xlsx:format-value( 383 | string(data($c/descendant::xlsx-spreadsheetml:v)), 384 | 0) 385 | ) 386 | else ( 387 | xlsx:format-value( 388 | string(data($c/descendant::xlsx-spreadsheetml:v)), 389 | $style/@numFmtId cast as xs:integer) 390 | ) 391 | ) 392 | else ( 393 | switch ( string(data($c/@t)) ) 394 | case "b" (: boolean type:) 395 | return $c/descendant::xlsx-spreadsheetml:v 396 | case "d" (: date-time type:) 397 | return 398 | xlsx:format-value( 399 | string( 400 | data( 401 | $c/descendant::xlsx-spreadsheetml:v 402 | ) 403 | ), 404 | $style/@numFmtId cast as xs:integer 405 | ) 406 | case "e" (: error type:) 407 | return "Error" 408 | case "inlineStr" (: In Line String type:) 409 | return 410 | data ( 411 | $c/descendant::xlsx-spreadsheetml:is/ 412 | descendant::xlsx-spreadsheetml:t 413 | ) 414 | case "n" (: number type:) 415 | return 416 | xlsx:format-value( 417 | string( 418 | data( 419 | $c/descendant::xlsx-spreadsheetml:v 420 | ) 421 | ), 422 | $style/@numFmtId cast as xs:integer 423 | ) 424 | case "s" 425 | return 426 | data ($fss[position() = 427 | data( 428 | $c/descendant::xlsx-spreadsheetml:v) 429 | + 1] 430 | ) 431 | case "str" 432 | return $c/descendant::xlsx-spreadsheetml:v 433 | default return $c/descendant::xlsx-spreadsheetml:v 434 | ) 435 | } catch * { 436 | element error { 437 | element error_code {$err:code}, 438 | element error_description {$err:description}, 439 | element error_value{$err:value}, 440 | element error_module{$err:module}, 441 | element error_line_number{$err:line-number}, 442 | element error_column_number{$err:column-number}, 443 | element error_additional{$err:additional}, 444 | element error_variable_cell {$c}, 445 | element error_variable_style {$style}, 446 | element error_variable_shared_string {$fss}, 447 | element error_function_name { 'xlsx:format-value' } 448 | } 449 | } 450 | }; 451 | 452 | (: --------- 453 | Returns the Calc-Chain contained in the workbook 454 | --------- :) 455 | declare %private function xlsx:get-calcChain( 456 | $file as xs:base64Binary 457 | ) as item()* { 458 | try { 459 | let $ss := fn:parse-xml( 460 | archive:extract-text( 461 | $file,"xl/calcChain.xml") 462 | )/descendant::xlsx-spreadsheetml:t 463 | return $ss 464 | } catch * { 465 | element error { 466 | element error_code {$err:code}, 467 | element error_description {$err:description}, 468 | element error_value{$err:value}, 469 | element error_module{$err:module}, 470 | element error_line_number{$err:line-number}, 471 | element error_column_number{$err:column-number}, 472 | element error_additional{$err:additional}, 473 | element error_function_name { 'get-calcChain' } 474 | } 475 | } 476 | }; 477 | 478 | 479 | (: --------- 480 | Returns the xml path of the worksheet contained in the book 481 | --------- :) 482 | declare function xlsx:get-xml-path-worksheet( 483 | $file as xs:base64Binary, 484 | $sheet as xs:string 485 | ) as xs:string* { 486 | let $rsId := xlsx:get-rId-worksheet($file, $sheet) 487 | let $xml-path := xlsx:get-Workbook-Relationships($file) 488 | /descendant::xlsx-Relationships:Relationships 489 | /descendant::xlsx-Relationships:Relationship[@Id = data($rsId)] 490 | return data($xml-path/@Target) 491 | }; 492 | 493 | (: --------- 494 | Returns the content of the worksheet 495 | --------- :) 496 | declare function xlsx:get-worksheet-data ( 497 | $file as xs:string, 498 | $sheet as xs:string 499 | ) as item()*{ 500 | try { 501 | let $f:= xlsx:get-file($file) 502 | return ( 503 | let $rs := fn:parse-xml( 504 | archive:extract-text( 505 | $f, 506 | "xl/" || xlsx:get-xml-path-worksheet($f,$sheet) 507 | ) 508 | )/descendant::xlsx-spreadsheetml:sheetData 509 | return $rs 510 | ) 511 | }catch * { 512 | element error { 513 | element error_code {$err:code}, 514 | element error_description {$err:description}, 515 | element error_value{$err:value}, 516 | element error_module{$err:module}, 517 | element error_line_number{$err:line-number}, 518 | element error_column_number{$err:column-number}, 519 | element error_additional{$err:additional}, 520 | element error_function_name { 'xlsx:get-worksheet-data' } 521 | } 522 | } 523 | }; 524 | 525 | (: --------- 526 | Returns the content of a specified row in the worksheet 527 | --------- :) 528 | declare function xlsx:get-row( 529 | $file as xs:string, 530 | $sheet as xs:string, 531 | $row_number as xs:string 532 | ) as item()* { 533 | try { 534 | let $sheet-data := xlsx:get-worksheet-data($file,$sheet) 535 | return $sheet-data/descendant:: 536 | xlsx-spreadsheetml:row[@r=fn:upper-case($row_number)] 537 | } catch * { 538 | element error { 539 | element error_code {$err:code}, 540 | element error_description {$err:description}, 541 | element error_value{$err:value}, 542 | element error_module{$err:module}, 543 | element error_line_number{$err:line-number}, 544 | element error_column_number{$err:column-number}, 545 | element error_additional{$err:additional}, 546 | element error_function_name { 'xlsx:get-row' } 547 | } 548 | } 549 | }; 550 | 551 | (: --------- 552 | Returns the content of a specified column in the worksheet 553 | --------- :) 554 | declare function xlsx:get-col( 555 | $file as xs:string, 556 | $sheet as xs:string, 557 | $column as xs:string 558 | ) as item()* { 559 | try { 560 | let $sheet-data := xlsx:get-worksheet-data($file,$sheet) 561 | let $pattern := '^('|| fn:upper-case($column) ||')+\d' 562 | return $sheet-data/descendant::xlsx-spreadsheetml:c[fn:matches(@r,$pattern)] 563 | } catch * { 564 | element error { 565 | element error_code {$err:code}, 566 | element error_description {$err:description}, 567 | element error_value{$err:value}, 568 | element error_module{$err:module}, 569 | element error_line_number{$err:line-number}, 570 | element error_column_number{$err:column-number}, 571 | element error_additional{$err:additional}, 572 | element error_function_name { 'xlsx:get-col' } 573 | } 574 | } 575 | }; 576 | 577 | (: --------- 578 | Returns the cell element specified in the worksheet 579 | --------- :) 580 | declare function xlsx:get-cell( 581 | $file as xs:string, 582 | $sheet as xs:string, 583 | $cell as xs:string 584 | ) as item()* { 585 | try { 586 | let $sheet-data := xlsx:get-worksheet-data($file,$sheet) 587 | return $sheet-data/descendant::xlsx-spreadsheetml:c[@r=fn:upper-case($cell)] 588 | } catch * { 589 | element error { 590 | element error_code {$err:code}, 591 | element error_description {$err:description}, 592 | element error_value{$err:value}, 593 | element error_module{$err:module}, 594 | element error_line_number{$err:line-number}, 595 | element error_column_number{$err:column-number}, 596 | element error_additional{$err:additional}, 597 | element error_function_name { 'xlsx:get-cell' } 598 | } 599 | } 600 | }; 601 | 602 | (: --------- 603 | Returns all the cells element specified in the worksheet 604 | --------- :) 605 | declare function xlsx:get-cells( 606 | $file as xs:string, 607 | $sheet as xs:string 608 | ) as item()* { 609 | try { 610 | let $f:= xlsx:get-file($file) 611 | return ( 612 | let $rs := fn:parse-xml( 613 | archive:extract-text( 614 | $f, 615 | "xl/" || xlsx:get-xml-path-worksheet($f,$sheet) 616 | ) 617 | )/descendant::xlsx-spreadsheetml:sheetData 618 | /descendant::xlsx-spreadsheetml:c 619 | return $rs 620 | ) 621 | }catch * { 622 | element error { 623 | element error_code {$err:code}, 624 | element error_description {$err:description}, 625 | element error_value{$err:value}, 626 | element error_module{$err:module}, 627 | element error_line_number{$err:line-number}, 628 | element error_column_number{$err:column-number}, 629 | element error_additional{$err:additional}, 630 | element error_function_name { 'xlsx:get-cells' } 631 | } 632 | } 633 | }; 634 | 635 | 636 | (: --------- 637 | Returns all the cells element specified in a range 638 | --------- :) 639 | declare function xlsx:get-range( 640 | $file as xs:string, 641 | $sheet as xs:string, 642 | $range as xs:string 643 | ) as item()* { 644 | try { 645 | (: let $file := 'Libro4.xlsx' 646 | let $sheet := 'Hoja1' 647 | let $range := '$A$1:$G$10' :) 648 | let $range := fn:replace($range,'\$','') 649 | let $a := tokenize($range,':') 650 | let $pattern := '^('|| fn:upper-case('A') ||')+\d' 651 | let $tokens := tokenize($range,':') 652 | let $min-range-col := replace($tokens[1],'\d','') 653 | let $min-range-row := substring-after($tokens[1],$min-range-col) 654 | let $max-range-col := replace($tokens[2],'\d','') 655 | let $max-range-row := substring-after($tokens[2],$max-range-col) 656 | let $fstyle:= xlsx:get-style($file) 657 | let $fss := xlsx:get-sharedStrings(xlsx:get-file($file)) 658 | let $f:= xlsx:get-file($file) 659 | return ( 660 | (: se debe hacer las validaciones de que las filas y columnas cuando esten 661 | vacias... :) 662 | let $rs := fn:parse-xml( 663 | archive:extract-text( 664 | $f, 665 | "xl/" || xlsx:get-xml-path-worksheet($f,$sheet) 666 | ) 667 | )/descendant::xlsx-spreadsheetml:sheetData 668 | /descendant::xlsx-spreadsheetml:row 669 | [xs:double(./@r) >= xs:double($min-range-row) and 670 | xs:double(./@r) <= xs:double($max-range-row)] 671 | /descendant::xlsx-spreadsheetml:c 672 | [ matches(./@r,'^[' || $min-range-col || '-' 673 | || $max-range-col || ']\d') ] 674 | return ( 675 | element range { 676 | element param { 677 | element min-range-col {$min-range-col}, 678 | element min-range-row {$min-range-row}, 679 | element max-range-col {$max-range-col}, 680 | element max-range-row {$max-range-row} 681 | }, 682 | $rs 683 | (: for $c in $rs 684 | let $style-Cell := $fstyle/descendant::xlsx-spreadsheetml:cellXfs/ 685 | descendant::xlsx-spreadsheetml:xf 686 | [position() = (fn:number($c/@s) + 1)] 687 | return element cell{ 688 | attribute id {$c/@r}, 689 | element value { 690 | xlsx:display-cell-value($c,$style-Cell,$fss) 691 | 692 | } 693 | , $c 694 | } :) 695 | } 696 | ) 697 | ) 698 | } catch * { 699 | element error { 700 | element error_code {$err:code}, 701 | element error_description {$err:description}, 702 | element error_value{$err:value}, 703 | element error_module{$err:module}, 704 | element error_line_number{$err:line-number}, 705 | element error_column_number{$err:column-number}, 706 | element error_additional{$err:additional}, 707 | element error_function_name { 'xlsx:get-range' } 708 | } 709 | } 710 | }; 711 | 712 | (: --------- 713 | Returns the cell value specified in the worksheet 714 | --------- :) 715 | declare function xlsx:get-cell-value( 716 | $file as xs:string, 717 | $sheet as xs:string, 718 | $cell as xs:string 719 | ) as item()* { 720 | try { 721 | let $c := xlsx:get-cell($file,$sheet,$cell) 722 | let $f := xlsx:get-file($file) 723 | let $fstyle:= xlsx:get-style($file) 724 | let $fss := xlsx:get-sharedStrings(xlsx:get-file($file)) 725 | let $style-Cell := $fstyle/descendant::xlsx-spreadsheetml:cellXfs/ 726 | descendant::xlsx-spreadsheetml:xf 727 | [position() = (fn:number($c/@s) + 1)] 728 | return ( 729 | xlsx:display-cell-value($c,$style-Cell,$fss) 730 | ) 731 | } catch * { 732 | element error { 733 | element error_code {$err:code}, 734 | element error_description {$err:description}, 735 | element error_value{$err:value}, 736 | element error_module{$err:module}, 737 | element error_line_number{$err:line-number}, 738 | element error_column_number{$err:column-number}, 739 | element error_additional{$err:additional}, 740 | element error_function_name { 'xlsx:get-cell-value' } 741 | } 742 | } 743 | }; 744 | 745 | declare %updating function 746 | xlsx:upsert($e as element(), 747 | $an as xs:QName, 748 | $av as xs:anyAtomicType) 749 | { 750 | let $ea := $e/attribute()[fn:node-name(.) = $an] 751 | return 752 | if (fn:empty($ea)) 753 | then insert node attribute {$an} {$av} into $e 754 | else replace value of node $ea with $av 755 | }; 756 | 757 | (: --------- 758 | Update the value of the cell --- original function 759 | --------- :) 760 | declare %private updating function xlsx:set-cell-value-original( 761 | $file as xs:string, 762 | $sheet as xs:string, 763 | $cell as xs:string, 764 | $value as xs:anyAtomicType 765 | ) { 766 | let $f := file:read-binary($file) 767 | let $xml-sheet := 'xl/' || xlsx:get-xml-path-worksheet($f,$sheet) 768 | let $entry := 769 | copy $rs := fn:parse-xml( 770 | archive:extract-text( 771 | $f, 772 | 'xl/' || xlsx:get-xml-path-worksheet($f,$sheet) 773 | ) 774 | ) 775 | modify replace value of node $rs/descendant::xlsx-spreadsheetml:sheetData 776 | /descendant::xlsx-spreadsheetml:c[@r=$cell] 777 | /descendant::xlsx-spreadsheetml:v 778 | with $value 779 | return fn:serialize($rs) 780 | let $updated := archive:update($f,$xml-sheet,$entry) 781 | return file:write-binary($file,$updated) 782 | }; 783 | 784 | (: --------- 785 | Update the number value of the cell 786 | --------- :) 787 | declare %updating 788 | function xlsx:update-number-value( 789 | $file as xs:string, 790 | $sheet as xs:string, 791 | $cell as xs:string, 792 | $value as xs:anyAtomicType 793 | ) { 794 | let $f := xlsx:get-file($file) 795 | let $xml-sheet := 'xl/' || xlsx:get-xml-path-worksheet($f,$sheet) 796 | let $row_number := tokenize(fn:upper-case($cell),'[A-Z]') 797 | let $row_number := $row_number[count($row_number)] 798 | let $new-cell-node := element c { 799 | attribute r {fn:upper-case($cell)}, 800 | element v { 801 | $value 802 | } 803 | } 804 | let $new-row-node := element row { 805 | attribute r{$row_number}, 806 | $new-cell-node 807 | } 808 | let $entry := 809 | (:cell exists???:) 810 | if ( fn:empty(xlsx:get-cell($file,$sheet,fn:upper-case($cell))) ) 811 | then ( 812 | (:row exists???:) 813 | if ( fn:empty(xlsx:get-row ($file,$sheet,$row_number)) ) 814 | then ( 815 | copy $rs := fn:parse-xml( 816 | archive:extract-text( 817 | $f, 818 | $xml-sheet 819 | ) 820 | ) 821 | modify insert node $new-row-node 822 | after $rs/descendant::xlsx-spreadsheetml:sheetData 823 | /descendant::xlsx-spreadsheetml:row 824 | [xs:integer(@r) lt xs:integer($row_number)] 825 | [last()] 826 | return fn:serialize($rs) 827 | ) 828 | else( 829 | copy $rs := fn:parse-xml( 830 | archive:extract-text( 831 | $f, 832 | $xml-sheet 833 | ) 834 | ) 835 | modify insert node $new-cell-node 836 | after $rs/descendant::xlsx-spreadsheetml:sheetData 837 | /descendant::xlsx-spreadsheetml:row 838 | [xs:integer(@r) eq xs:integer($row_number) ] 839 | /descendant::xlsx-spreadsheetml:c[@r lt $cell][last()] 840 | return fn:serialize($rs) 841 | ) 842 | ) 843 | else ( 844 | copy $rs := fn:parse-xml( 845 | archive:extract-text( 846 | $f, 847 | $xml-sheet 848 | ) 849 | ) 850 | modify replace node $rs/descendant::xlsx-spreadsheetml:sheetData 851 | /descendant::xlsx-spreadsheetml:c[@r=$cell] 852 | 853 | with $new-cell-node 854 | return fn:serialize($rs) 855 | ) 856 | let $updated := archive:update($f,$xml-sheet,$entry) 857 | return file:write-binary($file,$updated) 858 | }; 859 | 860 | (: --------- 861 | Update the string value of the cell 862 | --------- :) 863 | declare %updating 864 | function xlsx:update-string-value( 865 | $file as xs:string, 866 | $sheet as xs:string, 867 | $cell as xs:string, 868 | $value as xs:anyAtomicType 869 | ) { 870 | let $f := xlsx:get-file($file) 871 | let $xml-sheet := 'xl/' || xlsx:get-xml-path-worksheet($f,$sheet) 872 | let $row_number := tokenize(fn:upper-case($cell),'[A-Z]') 873 | let $row_number := $row_number[count($row_number)] 874 | let $new-cell-node := element c { 875 | attribute r { fn:upper-case($cell) }, 876 | attribute t {"inlineStr"}, 877 | element is { 878 | element t { $value } 879 | } 880 | } 881 | let $new-row-node := element row { 882 | attribute r{$row_number}, 883 | $new-cell-node 884 | } 885 | let $entry := 886 | (:cell exists???:) 887 | if ( fn:empty(xlsx:get-cell($file,$sheet,fn:upper-case($cell))) ) 888 | then ( 889 | (:row exists???:) 890 | if ( fn:empty(xlsx:get-row ($file,$sheet,$row_number)) ) 891 | then ( 892 | copy $rs := fn:parse-xml( 893 | archive:extract-text( 894 | $f, 895 | $xml-sheet 896 | ) 897 | ) 898 | modify insert node $new-row-node 899 | after $rs/descendant::xlsx-spreadsheetml:sheetData 900 | /descendant::xlsx-spreadsheetml:row 901 | [xs:integer(@r) lt xs:integer($row_number)] 902 | [last()] 903 | return fn:serialize($rs) 904 | ) 905 | else( 906 | copy $rs := fn:parse-xml( 907 | archive:extract-text( 908 | $f, 909 | $xml-sheet 910 | ) 911 | ) 912 | modify insert node $new-cell-node 913 | after $rs/descendant::xlsx-spreadsheetml:sheetData 914 | /descendant::xlsx-spreadsheetml:row 915 | [xs:integer(@r) eq xs:integer($row_number) ] 916 | /descendant::xlsx-spreadsheetml:c[@r lt $cell][last()] 917 | return fn:serialize($rs) 918 | ) 919 | ) 920 | else ( 921 | copy $rs := fn:parse-xml( 922 | archive:extract-text( 923 | $f, 924 | $xml-sheet 925 | ) 926 | ) 927 | modify replace node $rs/descendant::xlsx-spreadsheetml:sheetData 928 | /descendant::xlsx-spreadsheetml:c[@r=$cell] 929 | 930 | with $new-cell-node 931 | return fn:serialize($rs) 932 | ) 933 | let $updated := archive:update($f,$xml-sheet,$entry) 934 | return file:write-binary($file,$updated) 935 | }; 936 | 937 | (: --------- 938 | Update the date value of the cell 939 | --------- :) 940 | declare updating 941 | function xlsx:update-date-value( 942 | $file as xs:string, 943 | $sheet as xs:string, 944 | $cell as xs:string, 945 | $value as xs:anyAtomicType 946 | ) { 947 | let $f := xlsx:get-file($file) 948 | let $xml-sheet := 'xl/' || xlsx:get-xml-path-worksheet($f,$sheet) 949 | let $date_to_int:= ( ( xs:date($value) + xs:dayTimeDuration('P2D') ) - 950 | xs:date('1900-01-01')) div xs:dayTimeDuration('P1D') 951 | let $row_number := tokenize(fn:upper-case($cell),'[A-Z]') 952 | let $row_number := $row_number[count($row_number)] 953 | let $new-cell-node := element c { 954 | attribute r {$cell}, 955 | attribute s {"3"}, 956 | element v { $date_to_int } 957 | } 958 | let $new-row-node := element row { 959 | attribute r{$row_number}, 960 | $new-cell-node 961 | } 962 | let $entry := 963 | (:cell exists???:) 964 | if ( fn:empty(xlsx:get-cell($file,$sheet,fn:upper-case($cell))) ) 965 | then ( 966 | (:row exists???:) 967 | if ( fn:empty(xlsx:get-row ($file,$sheet,$row_number)) ) 968 | then ( 969 | (:Solo se actualiza la celda... pero se debe actualizar el estilo :) 970 | copy $rs := fn:parse-xml( 971 | archive:extract-text( 972 | $f, 973 | $xml-sheet 974 | ) 975 | ) 976 | modify insert node $new-row-node 977 | after $rs/descendant::xlsx-spreadsheetml:sheetData 978 | /descendant::xlsx-spreadsheetml:row 979 | [xs:integer(@r) lt xs:integer($row_number)] 980 | [last()] 981 | return fn:serialize($rs) 982 | ) 983 | else( 984 | copy $rs := fn:parse-xml( 985 | archive:extract-text( 986 | $f, 987 | $xml-sheet 988 | ) 989 | ) 990 | modify insert node $new-cell-node 991 | after $rs/descendant::xlsx-spreadsheetml:sheetData 992 | /descendant::xlsx-spreadsheetml:row 993 | [xs:integer(@r) eq xs:integer($row_number) ] 994 | /descendant::xlsx-spreadsheetml:c[@r lt $cell][last()] 995 | return fn:serialize($rs) 996 | ) 997 | ) 998 | else ( 999 | copy $rs := fn:parse-xml( 1000 | archive:extract-text( 1001 | $f, 1002 | $xml-sheet 1003 | ) 1004 | ) 1005 | modify replace node $rs/descendant::xlsx-spreadsheetml:sheetData 1006 | /descendant::xlsx-spreadsheetml:c[@r=$cell] 1007 | 1008 | with $new-cell-node 1009 | return fn:serialize($rs) 1010 | ) 1011 | let $updated := archive:update($f,$xml-sheet,$entry) 1012 | return file:write-binary($file,$updated) 1013 | }; 1014 | 1015 | (: --------- 1016 | Update the value of the cell 1017 | --------- :) 1018 | declare updating function xlsx:set-cell-value( 1019 | $file as xs:string, 1020 | $sheet as xs:string, 1021 | $cell as xs:string, 1022 | $value as xs:anyAtomicType 1023 | ) { 1024 | typeswitch ($value) 1025 | case $value as xs:byte | 1026 | xs:short | 1027 | xs:int | 1028 | xs:long | 1029 | xs:unsignedByte | 1030 | xs:unsignedShort | 1031 | xs:unsignedInt | 1032 | xs:unsignedLong | 1033 | xs:positiveInteger | 1034 | xs:nonNegativeInteger | 1035 | xs:negativeInteger | 1036 | xs:nonPositiveInteger | 1037 | xs:integer | 1038 | xs:decimal | 1039 | xs:float 1040 | return xlsx:update-number-value($file,$sheet,$cell,$value) 1041 | case $value as xs:string | 1042 | xs:normalizedString | 1043 | xs:token | 1044 | xs:language | 1045 | xs:NMTOKEN | 1046 | xs:Name | 1047 | xs:NCName | 1048 | xs:ID | 1049 | xs:IDREF | 1050 | xs:ENTITY 1051 | return xlsx:update-string-value($file,$sheet,$cell,$value) 1052 | case $value as xs:date 1053 | return xlsx:update-date-value($file,$sheet,$cell,$value) 1054 | default return () 1055 | }; 1056 | 1057 | (: --------- 1058 | Export the worksheet data to an html table ... 1059 | --------- :) 1060 | declare function xlsx:worksheet-to-table( 1061 | $file as xs:string, 1062 | $sheet as xs:string 1063 | ) as item()*{ 1064 | try { 1065 | (:new code ... I hope a better code ... :) 1066 | let $wsd := xlsx:get-worksheet-data($file, $sheet) 1067 | let $fss := xlsx:get-sharedStrings(xlsx:get-file($file)) 1068 | let $fstyle := xlsx:get-style($file) 1069 | let $rows := $wsd/descendant::xlsx-spreadsheetml:row 1070 | return element table{ 1071 | attribute id {data($sheet)}, 1072 | for $r in $rows 1073 | return ( 1074 | element tr { 1075 | attribute id {'row-' || $r/@r}, 1076 | for $c in $r/descendant::xlsx-spreadsheetml:c 1077 | let $style-Cell := 1078 | $fstyle/descendant::xlsx-spreadsheetml:cellXfs/ 1079 | descendant::xlsx-spreadsheetml:xf 1080 | [position() = (fn:number($c/@s) + 1)] 1081 | return ( 1082 | element td { 1083 | attribute id {'cell-' || $c/@r}, 1084 | attribute s {$c/@s || ' - ' || (fn:number($c/@s) + 1)}, 1085 | xlsx:display-cell-value($c,$style-Cell, $fss) 1086 | } 1087 | ) 1088 | } 1089 | ) 1090 | } 1091 | 1092 | } catch * { 1093 | element error { 1094 | element error_code {$err:code}, 1095 | element error_description {$err:description}, 1096 | element error_value { $err:value}, 1097 | element error_module {$err:module}, 1098 | element error_line_number {$err:line-number}, 1099 | element error_column_number {$err:column-number}, 1100 | element error_additional {$err:additional} 1101 | } 1102 | } 1103 | }; 1104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Module for BaseX 8.4+ to handle OOXML Workbooks [ECMA-376] 2 | =========================================================== 3 | 4 | This module help to read data and make simples updates from XML Workbooks files [ECMA-376] for BaseX 8.4+ 5 | 6 | ## Installing this module 7 | 8 | 1. via command: 9 | ```REPO INSTALL OOXML-Module-for-BaseXdb.xqm``` 10 | 11 | 2. via GUI: 12 | > * Option 13 | > * Packages ... 14 | > * Instal ... 15 | > * Select the file "OOXML-Module-for-BaseXdb.xqm" 16 | 17 | ## Use 18 | 19 | Use the example below 20 | 21 | ```xquery 22 | import module namespace xlsx = 'http://basex.org/modules/ECMA-376/spreadsheetml'; 23 | 24 | (: Return the cell value of a worksheet :) 25 | xlsx:get-cell-value('Libro1.xlsx','Hoja1','B1') 26 | 27 | ... 28 | 29 | (: Return the cells of a column :) 30 | xlsx:get-col('Libro1.xlsx','Hoja1','B') 31 | 32 | ... 33 | 34 | (: Return the cells of a row :) 35 | xlsx:get-row('Libro1.xlsx','Hoja1','13') 36 | 37 | ... 38 | 39 | (: Update the cell value of a worksheet :) 40 | xlsx:set-cell-value('Libro1.xlsx','Hoja1','B1',23.45) 41 | 42 | ... 43 | 44 | (: Export the content of a worksheet into simple table :) 45 | xlsx:worksheet-to-table('Libro1.xlsx','Hoja1') 46 | ``` 47 | 48 | List of the current functions 49 | ----------------------------- 50 | 51 | * get-file: return a binary representation of the workbook file 52 | * get-sheets: return a element "sheets" containing the name of the workseet of the workbook 53 | * get-sharedStrings: return the Shared-String element contained in the workbook 54 | * get-calcChain: Returns the Calc-Chain contained in the workbook 55 | * get-worksheet-data: Returns the content of the worksheet 56 | * get-row: Returns the content of a specified row in the worksheet 57 | * get-col: Returns the content of a specified column in the worksheet 58 | * get-cell: Returns the cell element specified in the worksheet 59 | * get-cell-value: Returns the cell value specified in the worksheet (2017-oct-03: fix the date value representation ) 60 | * set-cell-value: Update the value of the cell (2017-oct-03: a big IF's replace with typeswitch ) 61 | * worksheet-to-table: Export the worksheet data to an html table (2017-oct-03: fix the date value representation) 62 | * display-cell-value: return the correct format of the value of the cell 63 | * format-value: return the correct format value based on the excel format-code (0 to 49) 64 | * get-range: return a range element containing the cell in the specified range 65 | 66 | 67 | 68 | 69 | --------------------------------------------------------------------------------