├── .gitignore ├── LICENSE ├── README.md ├── index.php ├── process.php ├── template ├── [Content_Types].xml ├── _rels │ └── .rels ├── docProps │ ├── app.xml │ └── core.xml └── word │ ├── _rels │ └── document.xml.rels │ ├── fontTable.xml │ ├── numbering.xml │ ├── settings.xml │ ├── styles.xml │ ├── theme │ └── theme1.xml │ └── webSettings.xml └── xml2docx.py /.gitignore: -------------------------------------------------------------------------------- 1 | header.inc 2 | sample 3 | draft-ietf-intarea-provisioning-domains-00.xml 4 | draft-*.xml 5 | sample.docx 6 | sample_document.xml 7 | test.docx 8 | ~$*.docx 9 | document.xml 10 | xml2doc.xml 11 | *.docx 12 | xml2docx.xml 13 | rfc*.xml 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xml2docx 2 | 3 | Convert a XML IETF document (draft) into a Office Open XML (e.g., Microsoft Word) .DOCX. This .docx can then be reviewed using the spelling and **grammar** plug-ins of the word processor. 4 | 5 | References: 6 | 7 | * http://officeopenxml.com/ 8 | * https://tools.ietf.org/html/rfc7991 9 | 10 | ## On-line tool 11 | 12 | [https://www.vyncke.org/xml2docx/] runs the latest version of this code. 13 | -------------------------------------------------------------------------------- /index.php: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | XML to Office OpenXML .DOCX 20 | 24 | 25 | 26 |

IETF XML2RFC file conversion into Office OpenXML .DOCX

27 | 28 |
29 | 30 | File to upload and convert to .DOCX : 31 |
32 | 33 |
34 | 35 |
36 | Copyright Eric Vyncke, 2020. Clone me at https://github.com/evyncke/xml2docx.git 37 | -------------------------------------------------------------------------------- /process.php: -------------------------------------------------------------------------------- 1 | 39 | 40 | 41 | XML to Office OpenXML DOCX 42 | 46 | 47 | 48 |

IETF XML2RFC file conversion into Office OpenXML .DOCX

49 | 50 |
51 | 52 | File to upload and convert to .DOCX : 53 |
54 | 55 |
56 | 57 |
58 | Copyright Eric Vyncke, 2020. Clone me at https://github.com/evyncke/xml2docx.git 59 | -------------------------------------------------------------------------------- /template/[Content_Types].xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /template/_rels/.rels: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /template/docProps/app.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 3 7 | 1 8 | 27 9 | 160 10 | Xml2Docx convertor 11 | 0 12 | 1 13 | 1 14 | false 15 | IETF open source 16 | false 17 | 186 18 | false 19 | false 20 | 1.0000 21 | 22 | -------------------------------------------------------------------------------- /template/docProps/core.xml: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 10 | Eric Vyncke (evyncke) 11 | 12 | 13 | XML2DOCX 14 | 5 15 | 2020-08-21T07:55:00Z 16 | 2020-08-21T08:04:00Z 17 | -------------------------------------------------------------------------------- /template/word/_rels/document.xml.rels: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /template/word/fontTable.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /template/word/numbering.xml: -------------------------------------------------------------------------------- 1 | 2 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | -------------------------------------------------------------------------------- /template/word/settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /template/word/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 885 | 886 | 887 | 888 | 889 | 890 | 891 | 892 | 893 | 894 | 895 | 896 | 897 | 898 | 899 | 900 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 909 | -------------------------------------------------------------------------------- /template/word/theme/theme1.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 293 | 294 | 295 | -------------------------------------------------------------------------------- /template/word/webSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /xml2docx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2020, Eric Vyncke, evyncke@cisco.com 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # A lot of information in http://officeopenxml.com/anatomyofOOXML.php 19 | 20 | # TODO 21 | # Handle external entities used notably for references... 22 | # https://www.w3schools.com/xml/xml_dtd_entities.asp 23 | # example 24 | # 25 | # 27 | # ]> 28 | 29 | from xml.dom import minidom, Node 30 | import xml.dom 31 | from pprint import pprint 32 | import sys, getopt 33 | import io, os 34 | import zipfile 35 | import tempfile, datetime 36 | import urllib.request 37 | 38 | # Same states to be kept 39 | rfcDate = None 40 | rfcAuthors = [] 41 | rfcTitle = None 42 | rfcKeywords = [] 43 | 44 | def printTree(front): 45 | print('All children:') 46 | for elem in front.childNodes: 47 | if elem.nodeType == Node.TEXT_NODE: 48 | print("\t TEXT: '", elem.nodeValue, "'") 49 | if elem.nodeType != Node.ELEMENT_NODE: 50 | continue 51 | print("\t", elem.nodeName) 52 | print("\tAttributes:") 53 | for i in range(elem.attributes.length): 54 | attrib = elem.attributes.item(i) 55 | print("\t\t", attrib.name, ' = ' , attrib.value) 56 | print("\tChildren:") 57 | for child in elem.childNodes: 58 | if child.nodeType == Node.ELEMENT_NODE: 59 | print("\t\tELEMENT: ",child.nodeName) 60 | elif child.nodeType == Node.TEXT_NODE: 61 | print("\t\tTEXT: ", child.nodeValue) 62 | print("\n----------\n") 63 | 64 | def docxNewParagraph(textValue, style = 'Normal', justification = None, unnumbered = None, numberingID = None, indentationLevel = None, removeEmpty = True, language = 'en-US', cdataSection = None): 65 | if textValue is None: 66 | return None 67 | if cdataSection == None: # remove extra spaces only if CDATA is not requested 68 | textValue = ' '.join(textValue.split()) 69 | if textValue == '' and removeEmpty: 70 | return None 71 | docxP = docxRoot.createElement('w:p') 72 | 73 | # First handle the style or justification 74 | # 75 | # 76 | # 77 | # 78 | # 79 | # 80 | # 81 | pPr = docxRoot.createElement('w:pPr') 82 | if style != None: 83 | pStyle = docxRoot.createElement('w:pStyle') 84 | pStyle.setAttribute('w:val', style) 85 | pPr.appendChild(pStyle) 86 | if justification != None: 87 | jc = docxRoot.createElement('w:jc') 88 | jc.setAttribute('w:val', justification) 89 | pPr.appendChild(jc) 90 | if unnumbered: # Try to override the default numbering in the style 91 | numPr = docxRoot.createElement('w:numPr') 92 | ilvl = docxRoot.createElement('w:ilvl ') 93 | ilvl.setAttribute('w:val', 0) 94 | numPr.appendChild(ilvl) 95 | numId = docxRoot.createElement('w:numId') 96 | numId.setAttribute('w:val', 0) 97 | numPr.appendChild(numId) 98 | pPr.appendChild(numPr) 99 | elif numberingID != None and indentationLevel != None: 100 | # 101 | # 102 | # 103 | # 104 | numPr = docxRoot.createElement('w:numPr') 105 | ilvl = docxRoot.createElement('w:ilvl ') 106 | ilvl.setAttribute('w:val', indentationLevel) 107 | numPr.appendChild(ilvl) 108 | numId = docxRoot.createElement('w:numId') 109 | numId.setAttribute('w:val', numberingID) 110 | numPr.appendChild(numId) 111 | pPr.appendChild(numPr) 112 | docxP.appendChild(pPr) 113 | 114 | # Then handle the actual text 115 | # 116 | # 117 | # 118 | # 119 | # Title 120 | # 121 | r = docxRoot.createElement('w:r') 122 | rPr = docxRoot.createElement('w:rPr') 123 | if language != None: 124 | lang = docxRoot.createElement('w:lang') 125 | lang.setAttribute('w:val', language) 126 | rPr.appendChild(lang) 127 | elif style != None: # Seems mandatory for figure ASCII art to repeat the style per run 128 | rStyle = docxRoot.createElement('w:rStyle') 129 | rStyle.setAttribute('w:val', style) 130 | rPr.appendChild(rStyle) 131 | r.appendChild(rPr) 132 | t = docxRoot.createElement('w:t') 133 | if cdataSection == None: 134 | text = docxRoot.createTextNode(textValue) 135 | else: 136 | t.setAttribute('xml:space', 'preserve') 137 | text = docxRoot.createTextNode(textValue) 138 | # text = docxRoot.createCDATASection(textValue) # xml:space is enough to keep leading spaces, CDATA adds 4 tabs after in the pretty printing :-( 139 | t.appendChild(text) 140 | r.appendChild(t) 141 | docxP.appendChild(r) 142 | return docxP 143 | 144 | libsTable = { 145 | 'RFC': 'http://www.rfc-editor.org/refs/bibxml/', 146 | 'I-D': 'http://xml2rfc.ietf.org/public/rfc/bibxml3/', 147 | 'W3C': 'http://xml2rfc.ietf.org/public/rfc/bibxml4/', 148 | 'SDO-3GPP': 'http://xml2rfc.ietf.org/public/rfc/bibxml5/', 149 | 'IEEE': 'http://xml2rfc.ietf.org/public/rfc/bibxml6/', 150 | 'DOI': 'http://xml2rfc.ietf.org/public/rfc/bibxml7/', 151 | 'BCP': 'http://xml2rfc.ietf.org/public/rfc/bibxml9/', 152 | 'FYI': 'http://xml2rfc.ietf.org/public/rfc/bibxml9/', 153 | 'STD': 'http://xml2rfc.ietf.org/public/rfc/bibxml9/', 154 | } 155 | 156 | def includeExternal(referenceName): 157 | global libsTable 158 | 159 | referenceTokens = referenceName.split('.') 160 | if libsTable.get(referenceTokens[1]): 161 | libURL = libsTable.get(referenceTokens[1]) 162 | print("Importing " + referenceName + " from " + libURL + referenceName + '.xml') 163 | try: 164 | response = urllib.request.urlopen(libURL + referenceName + '.xml') 165 | importedString = response.read() 166 | importedXML = minidom.parseString(importedString) 167 | except urllib.error.HTTPError as err: 168 | print("Cannot import XML from " + libURL + referenceName + ".xml, error: ", err) 169 | return None 170 | except: 171 | print('Not found or invalid XML in ' + libURL) 172 | return None 173 | return importedXML.getElementsByTagName('reference')[0] 174 | print("Reference type " + referenceTokens[1] + " not supported...") 175 | return None 176 | 177 | def parseAbstract(elem): 178 | for child in elem.childNodes: 179 | if child.nodeType != Node.ELEMENT_NODE: 180 | continue 181 | elif child.nodeName == 't': 182 | parseText(child, style = 'Abstract') 183 | else: 184 | print('Unexpected tagName in Abstract: ', child.nodeName) 185 | 186 | def parseArea(elem): 187 | textValue = 'Area: ' 188 | for text in elem.childNodes: 189 | if text.nodeType == Node.TEXT_NODE: 190 | textValue += text.nodeValue 191 | if elem.nodeType == Node.ELEMENT_NODE: 192 | if text.nodeName != '#text': 193 | print('!!!!! parseKeyword: Text is ELEMENT_NODE: ', text.nodeName) 194 | docxBody.appendChild(docxNewParagraph(textValue)) 195 | 196 | def parseArtWork(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.5 197 | # If there is no type attribute, let's process the element 198 | # If there is a type attribute, let's process the element only if type == ascii-art 199 | if (not elem.hasAttribute('type')) or (elem.hasAttribute('type') and (elem.getAttribute('type') == 'ascii-art' or elem.getAttribute('type') == '')): 200 | figureLines = '' 201 | for chunk in elem.childNodes: 202 | text = chunk.nodeValue 203 | figureLines += text 204 | # Let's split this string into lines and print each line 205 | for line in figureLines.splitlines(): 206 | docxBody.appendChild(docxNewParagraph(line.rstrip(" \t"), style = 'Code', removeEmpty = False, language = None, cdataSection = True)) 207 | 208 | def parseAuthor(elem): # Per https://tools.ietf.org/html/rfc7991#section-2.7 209 | global rfcAuthors 210 | 211 | # looking for the organization element as in https://tools.ietf.org/html/rfc7991#section-2.35 that can only contain text 212 | organization = '' 213 | for child in elem.childNodes: 214 | if child.nodeType != Node.ELEMENT_NODE: 215 | continue 216 | elif child.nodeName == 'organization': 217 | for grandchild in child.childNodes: 218 | if grandchild.nodeType == Node.TEXT_NODE: 219 | organization = ', ' + grandchild.nodeValue 220 | 221 | if elem.hasAttribute('asciiFullname'): 222 | docxBody.appendChild(docxNewParagraph(elem.getAttribute('asciiFullname') + organization, justification = 'right')) 223 | rfcAuthors.append(elem.getAttribute('asciiFullname') + organization) 224 | elif elem.hasAttribute('fullname'): 225 | docxBody.appendChild(docxNewParagraph(elem.getAttribute('fullname') + organization, justification = 'right')) 226 | rfcAuthors.append(elem.getAttribute('fullname') + organization) 227 | else: 228 | author = '' 229 | if elem.hasAttribute('initials'): 230 | author = author + elem.getAttribute('initials') + ' ' 231 | if elem.hasAttribute('surname'): 232 | author = author + elem.getAttribute('surname') 233 | if author != '': 234 | docxBody.appendChild(docxNewParagraph(author + organization, justification = 'right')) 235 | rfcAuthors.append(author + organization) 236 | 237 | def parseBack(elem): # https://tools.ietf.org/html/rfc7991#section-2.8 238 | if elem.nodeType != Node.ELEMENT_NODE: 239 | return 240 | # Let's hope that the children are in the right order... i.e., starting with the references 241 | docxBody.appendChild(docxNewParagraph('References', style = 'Heading1')) 242 | for child in elem.childNodes: 243 | if child.nodeType != Node.ELEMENT_NODE: 244 | continue 245 | if child.nodeName == 'displayreference': 246 | parseDisplayReference(child) 247 | elif child.nodeName == 'references': 248 | parseReferences(child) 249 | elif child.nodeName == 'section': 250 | parseSection(child, 2) 251 | else: 252 | print('!!!! parseBack: unexpected nodeName: ' + child.nodeName) 253 | 254 | def parseBcp14(elem): # https://tools.ietf.org/html/rfc7991#section-2.9 only text 255 | if elem.nodeValue != None: 256 | print('Bcp14 nodeValue: ' , elem.nodeValue) 257 | if elem.nodeType == Node.TEXT_NODE: 258 | print('Bcp14 node is TEXT_NODE') 259 | for child in elem.childNodes: 260 | if child.nodeType == Node.TEXT_NODE: 261 | return child.nodeValue 262 | else: 263 | print('!!!! parseBcp14 unexpected nodeType: ' + child.nodeType) 264 | 265 | def parseBlockQuote(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.10 that is similar to old items 266 | parseText(elem, style = 'Quote', numberingID = None, indentationLevel = None) 267 | 268 | def parseBoilerPlate(elem): 269 | for child in elem.childNodes: 270 | if child.nodeType != Node.ELEMENT_NODE: 271 | continue 272 | elif child.nodeName == 'section': 273 | parseSection(child, 1) 274 | else: 275 | print('Unexpected tagName in BoilerPlate: ', child.nodeName) 276 | 277 | def parseDate(elem): 278 | global rfcDate 279 | 280 | dateString = '' 281 | if elem.hasAttribute('day'): 282 | dateString = elem.getAttribute('day') + ' ' 283 | if elem.hasAttribute('month'): 284 | dateString = dateString + elem.getAttribute('month') + ' ' 285 | if elem.hasAttribute('year'): 286 | dateString = dateString + elem.getAttribute('year') 287 | if dateString != '': 288 | docxBody.appendChild(docxNewParagraph(dateString, justification = 'right')) 289 | rfcDate = dateString 290 | 291 | def parseDisplayReference(elem): # https://tools.ietf.org/html/rfc7991#section-2.19 292 | # Presentation only... skipping it for now 293 | return 294 | # print("parseDisplayReference not yet implemented") 295 | 296 | def parseDList(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.20 297 | for child in elem.childNodes: 298 | # If should be a serie of DT DD elements in the right order, the code is not resilient to out of order 299 | if child.nodeType != Node.ELEMENT_NODE: 300 | # print("parseDList unexpected node type...", child) # TODO sometimes it is CRLF + white spaces possibly for indentation ? 301 | continue 302 | if child.nodeName == 'dt': # Definition Term https://tools.ietf.org/html/rfc7991#section-2.21 303 | # Can contain text + some other elements 304 | parseText(child) 305 | elif child.nodeName == 'dd': # Definition part https://tools.ietf.org/html/rfc7991#section-2.18 306 | # Can contain text + some other elements including complex ones 307 | parseText(child) 308 | else: 309 | print('!!!! parseDList, unexpected child: ', child.nodeName) 310 | 311 | # TODO switch off language to avoid wrong typos ? 312 | def parseEref(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.24 313 | if elem.nodeValue != None: 314 | print('Eref nodeValue: ' , elem.nodeValue) 315 | if elem.hasAttribute('target'): # one and only mandatory attribute 316 | return '[' + elem.getAttribute('target') + ']' 317 | # Only target attribute, so, quite useless to parse other attributes 318 | if elem.nodeType == Node.TEXT_NODE: 319 | print('Eref node is TEXT_NODE') 320 | for child in elem.childNodes: 321 | if child.nodeType == Node.TEXT_NODE: 322 | return child.nodeValue 323 | if child.nodeName == 't': 324 | print("parseEref recurse into t !!!") 325 | parseText(child) 326 | 327 | def parseFigure(elem): # See https://tools.ietf.org/html/rfc7991#section-2.25 328 | # Figure had preamble (deprecated but let's process it) 329 | preambleChildren = elem.getElementsByTagName('preamble') 330 | if preambleChildren.length > 0 and preambleChildren[0].childNodes.length > 0: 331 | if preambleChildren[0].nodeType == Node.ELEMENT_NODE: 332 | preamble = preambleChildren[0].childNodes[0].nodeValue 333 | docxBody.appendChild(docxNewParagraph(preamble)) 334 | # Let's process a single artwork 335 | artworkChildren = elem.getElementsByTagName('artwork') 336 | for child in artworkChildren: 337 | parseArtWork(child) 338 | # Let's process the source code 339 | 340 | # Could have a title attribute rather than the name element (same as in section) 341 | if elem.nodeType != Node.ELEMENT_NODE: 342 | return 343 | figureTitle = None 344 | if elem.hasAttribute('title'): 345 | figureTitle = elem.getAttribute('title') 346 | else: 347 | nameChild = elem.getElementsByTagName('name') 348 | if nameChild.length > 0: 349 | if nameChild[0].nodeType == Node.ELEMENT_NODE: 350 | figureTitle = nameChild[0].childNodes[0].nodeValue 351 | if figureTitle != None: 352 | docxBody.appendChild(docxNewParagraph('Figure: ' + figureTitle, justification = 'center')) 353 | # Figure had postamble (deprecated but let's process it) 354 | postambleChildren = elem.getElementsByTagName('postamble') 355 | if postambleChildren.length > 0 and postambleChildren[0].childNodes.length > 0: 356 | if postambleChildren[0].nodeType == Node.ELEMENT_NODE: 357 | postamble = postambleChildren[0].childNodes[0].nodeValue 358 | docxBody.appendChild(docxNewParagraph(postamble)) 359 | 360 | def parseKeyword(elem): 361 | global rfcKeywords 362 | 363 | textValue = 'Keyword: ' 364 | for text in elem.childNodes: 365 | if text.nodeType == Node.TEXT_NODE: 366 | textValue += text.nodeValue 367 | rfcKeywords.append(text.nodeValue) 368 | if elem.nodeType == Node.ELEMENT_NODE: 369 | if text.nodeName != '#text': 370 | print('!!!!! parseKeyword: Text is ELEMENT_NODE: ', text.nodeName) 371 | docxBody.appendChild(docxNewParagraph(textValue)) 372 | 373 | def parseList(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.29 374 | for child in elem.childNodes: 375 | if child.nodeType == Node.COMMENT_NODE: 376 | continue 377 | elif child.nodeType == Node.TEXT_NODE: # Unexpected, let's hope it is empty space 378 | if child.nodeValue.strip(" \t\r\n") == '': 379 | continue 380 | print("!!!! parseList non empty text = '" + child.nodeValue.strip(" \t\r\n") + "'") 381 | continue 382 | elif child.nodeType != Node.ELEMENT_NODE: 383 | print('!!!! parseList, unexpected child node type: ', child) 384 | continue 385 | if child.nodeName == 't': 386 | parseText(child, style = 'ListParagraph', numberingID = '2', indentationLevel = '0') # numID = 2 is defined in numbering.xml as bullet list 387 | else: 388 | print('!!!! parseList, unexpected child: ', child.nodeName) 389 | 390 | def parseListItem(elem, style = 'ListParagraph', numberingID = None, indentationLevel = None): 391 | for i in range(elem.attributes.length): 392 | attrib = elem.attributes.item(i) 393 | if attrib.name == 'pn' or attrib.name == 'anchor' or attrib.name == 'derivedCounter': # Let's ignore this marking as no obvious requirement or support in Office OpenXML 394 | continue 395 | print("\tLI unexpected attribute: ", attrib.name, ' = ' , attrib.value) 396 | 397 | textValue = '' 398 | for text in elem.childNodes: 399 | if text.nodeType == Node.TEXT_NODE: 400 | textValue += text.nodeValue 401 | if elem.nodeType == Node.ELEMENT_NODE: 402 | if text.nodeName == 'bcp14': 403 | textValue = textValue + parseBcp14(text) 404 | elif text.nodeName == 'eref': 405 | textValue = textValue + parseXref(text) 406 | elif text.nodeName == 'ol': 407 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 408 | if p: 409 | docxBody.appendChild(p) # Need to emit the first part of the text 410 | textValue = '' 411 | parseOList(text) 412 | elif text.nodeName == 't': 413 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 414 | if p: 415 | docxBody.appendChild(p) # Need to emit the first part of the text textValue = '' 416 | parseText(text) 417 | elif text.nodeName == 'ul': 418 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 419 | if p: 420 | docxBody.appendChild(p) # Need to emit the first part of the text textValue = '' 421 | parseUList(text) 422 | elif text.nodeName == 'xref': 423 | textValue = textValue + parseXref(text) 424 | elif text.nodeName != '#text': 425 | print('!!!!! parseListItem: Text is ELEMENT_NODE: ', text.nodeName) 426 | # else: 427 | # print('parseListItem ignoring Text is ELEMENT_NODE: ', text.nodeName) 428 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 429 | if p: 430 | docxBody.appendChild(p) # Need to emit the last part of the text 431 | 432 | def parseNote(elem): # See https://tools.ietf.org/html/rfc7991#section-2.33 433 | print(" is an unsupported tag") 434 | 435 | # TODO should reset the numbering to 1... cfr draft-ietf-anima-autonomic-control-plane-29.xml 436 | def parseOList(elem): 437 | for child in elem.childNodes: 438 | if child.nodeType != Node.ELEMENT_NODE: 439 | continue 440 | if child.nodeName == 'li': 441 | parseListItem(child, numberingID = '1', indentationLevel = '0') # numID = 1 is defined in numbering.xml as enumeration list 442 | else: 443 | print('!!!! Unexpected List child: ', child.nodeName) 444 | 445 | def parseReference(elem): # See https://tools.ietf.org/html/rfc7991#section-2.40 446 | if elem.nodeType != Node.ELEMENT_NODE: 447 | return 448 | if elem.hasAttribute('anchor'): 449 | text = '[' + elem.getAttribute('anchor') + '] ' 450 | else: 451 | print('!!!! parseReference, missing anchor attribute') 452 | text = '' 453 | seriesInfoText = '' 454 | for serieInfo in elem.getElementsByTagName('seriesInfo'): 455 | if serieInfo.hasAttribute('name') and serieInfo.hasAttribute('value'): 456 | if serieInfo.getAttribute('value') == '': # Sometimes the value field is empty... no need to add a useless space 457 | seriesInfoText += serieInfo.getAttribute('name') + ' ' + serieInfo.getAttribute('value') + ', ' 458 | else: 459 | seriesInfoText += serieInfo.getAttribute('name') + ', ' 460 | else: 461 | print("!!!! parseReference, no name/value attribute in seriesInfo for " + text) 462 | frontElem = elem.getElementsByTagName('front')[0] 463 | if frontElem: 464 | for author in frontElem.getElementsByTagName('author'): 465 | authorName = '?' # Could also simply be in the child elemn 466 | if author.hasAttribute('surname'): 467 | if author.hasAttribute('initials'): 468 | authorName = author.getAttribute('surname') + ', ' + author.getAttribute('initials') 469 | else: 470 | authorName = author.getAttribute('surname') 471 | elif author.hasAttribute('fullname'): 472 | authorName = author.getAttribute('fullname') 473 | else: # Let's find the element 474 | orgElem = frontElem.getElementsByTagName('organization')[0] 475 | if orgElem: 476 | authorName = '' 477 | for child in orgElem.childNodes: 478 | if child.nodeType == Node.TEXT_NODE: 479 | authorName += child.nodeValue 480 | text += authorName + ', ' 481 | if frontElem.getElementsByTagName('title'): 482 | titleElem = frontElem.getElementsByTagName('title')[0] 483 | for child in titleElem.childNodes: 484 | if child.nodeType == Node.TEXT_NODE: 485 | text += '"' + child.nodeValue + '", ' 486 | # Insert seriesInfo if any 487 | text += seriesInfoText 488 | if frontElem.getElementsByTagName('date'): 489 | dateElem = frontElem.getElementsByTagName('date')[0] 490 | if dateElem.hasAttribute('year'): 491 | if dateElem.hasAttribute('month'): 492 | text += dateElem.getAttribute('month') + ' ' + dateElem.getAttribute('year') + ', ' 493 | else: 494 | text += dateElem.getAttribute('year') + ', ' 495 | else: # In the absence of element 496 | text += seriesInfoText 497 | 498 | if elem.hasAttribute('target'): 499 | text += elem.getAttribute('target') 500 | # Let's remove any trailing comma 501 | if text[-2:] == ', ': 502 | text = text[:-2] 503 | text += '.' 504 | p = docxNewParagraph(text) 505 | if p: 506 | docxBody.appendChild(p) 507 | 508 | def parseReferences(elem): # https://tools.ietf.org/html/rfc7991#section-2.42 509 | if elem.nodeType != Node.ELEMENT_NODE: 510 | return 511 | sectionTitle = None 512 | if elem.hasAttribute('title'): 513 | sectionTitle = elem.getAttribute('title') 514 | else: 515 | nameChild = elem.getElementsByTagName('name') 516 | if nameChild.length > 0: 517 | if nameChild[0].nodeType == Node.ELEMENT_NODE: 518 | sectionTitle = nameChild[0].childNodes[0].nodeValue 519 | else: 520 | print(elem) 521 | print('??? parseReferences: this references section has not title...') 522 | if sectionTitle != None: 523 | docxBody.appendChild(docxNewParagraph(sectionTitle, 'Heading2', unnumbered = None)) 524 | for child in elem.childNodes: 525 | if child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: # in this location it is probably or 526 | if child.target == 'rfc' and (child.data[0:9] == "include='" or child.data[0:9] == 'include="'): 527 | includeName = child.data[9:-1] 528 | child = includeExternal(includeName) 529 | if child is None: 530 | continue 531 | else: 532 | print("parseReferences: skipping unknown processing instruction: target = " + child.target + ", data = " + child.data[0:9]) 533 | if child.nodeType == Node.TEXT_NODE: # Let's skip whitespace (assuming it is white space...) 534 | continue 535 | if child.nodeType != Node.ELEMENT_NODE: 536 | print('!!!! parseReferences: unexpected nodeType: ', child) 537 | continue 538 | if child.nodeName == 'reference': 539 | parseReference(child) 540 | else: 541 | print('!!!! parseReferences: unexpected nodeName: ' + child.nodeName) 542 | 543 | def parseRfc(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.45 544 | if elem.nodeType != Node.ELEMENT_NODE: 545 | return 546 | rfcInfo = '' 547 | if elem.hasAttribute('category'): 548 | docxBody.appendChild(docxNewParagraph('Category: ' + elem.getAttribute('category'))) 549 | if elem.hasAttribute('submissionType'): 550 | docxBody.appendChild(docxNewParagraph('Submission type: ' + elem.getAttribute('submissionType'))) 551 | if elem.hasAttribute('obsoletes'): 552 | docxBody.appendChild(docxNewParagraph('Obsoletes: ' + elem.getAttribute('obsoletes'))) 553 | if elem.hasAttribute('updates'): 554 | docxBody.appendChild(docxNewParagraph('Updates: ' + elem.getAttribute('updates'))) 555 | 556 | def parseSection(elem, headingDepth): 557 | if elem.nodeType != Node.ELEMENT_NODE: 558 | return 559 | if elem.hasAttribute('numbered'): 560 | unnumbered = (elem.getAttribute('numbered') == 'false') 561 | else: 562 | unnumbered = None 563 | sectionTitle = None 564 | if elem.hasAttribute('title'): 565 | sectionTitle = elem.getAttribute('title') 566 | elif elem.nodeName == 'section': # Can be the case for .... that are also processed by this part 567 | # Look after a child node of tag "name" 568 | nameChild = elem.getElementsByTagName('name') 569 | if nameChild.length > 0: 570 | if nameChild[0].nodeType == Node.ELEMENT_NODE: 571 | sectionTitle = nameChild[0].childNodes[0].nodeValue 572 | else: 573 | print('??? This section has not title...') 574 | if sectionTitle != None: 575 | docxBody.appendChild(docxNewParagraph(sectionTitle, 'Heading' + str(headingDepth), unnumbered = unnumbered)) 576 | sectionId = 0 577 | for child in elem.childNodes: 578 | if child.nodeType != Node.ELEMENT_NODE: 579 | continue 580 | if child.nodeName == 'section': 581 | sectionId = sectionId + 1 582 | # Should create a docx Child ??? 583 | parseSection(child, headingDepth + 1) 584 | elif child.nodeName == 'abstract': 585 | parseAbstract(child) 586 | elif child.nodeName == 'area': 587 | parseArea(child) 588 | elif child.nodeName == 'artwork': 589 | parseArtWork(child) 590 | elif child.nodeName == 'author': 591 | parseAuthor(child) 592 | elif child.nodeName == 'blockquote': 593 | parseBlockQuote(child) 594 | elif child.nodeName == 'boilerplate': 595 | parseBoilerPlate(child) 596 | elif child.nodeName == 'date': 597 | parseDate(child) 598 | elif child.nodeName == 'dl': 599 | parseDList(child) 600 | elif child.nodeName == 'figure': 601 | parseFigure(child) 602 | elif child.nodeName == 'keyword': 603 | parseKeyword(child) 604 | elif child.nodeName == 'name': # Already processed 605 | continue 606 | elif child.nodeName == 'note': 607 | parseNote(child) 608 | elif child.nodeName == 'ol': 609 | parseOList(child) 610 | elif child.nodeName == 't': 611 | parseText(child, style = None) 612 | elif child.nodeName == 'seriesInfo': 613 | parseSeriesInfo(child) 614 | elif child.nodeName == 'texttable': 615 | parseTextTable(child) 616 | elif child.nodeName == 'title': 617 | parseTitle(child) 618 | elif child.nodeName == 'toc': 619 | print('Skipping the ToC') 620 | elif child.nodeName == 'ul': 621 | parseUList(child) 622 | elif child.nodeName == 'workgroup': 623 | parseWorkgroup(child) 624 | else: 625 | print('!!!!! Unexpected tag in parseSection: ' + child.tagName) 626 | 627 | # TODO handle wrongly formatted 628 | def parseSeriesInfo(elem): 629 | seriesInfoString = '' 630 | if elem.hasAttribute('name'): 631 | seriesInfoString = elem.getAttribute('name') + ' ' 632 | if elem.hasAttribute('value'): 633 | seriesInfoString = seriesInfoString + elem.getAttribute('value') + ' ' 634 | else: 635 | seriesInfoString = seriesInfoString 636 | if elem.hasAttribute('stream'): 637 | seriesInfoString = seriesInfoString + ' (stream: ' + elem.getAttribute('stream') + ')' 638 | if seriesInfoString != '': 639 | docxBody.appendChild(docxNewParagraph(seriesInfoString, justification = 'right')) 640 | 641 | 642 | def parseText(elem, style = None, numberingID = None, indentationLevel = None, Verbose = None): # See https://tools.ietf.org/html/rfc7991#section-2.53 643 | if Verbose: 644 | print("parseText start: ", elem) 645 | textValue = '' 646 | # Mainly for debugging 647 | for i in range(elem.attributes.length): 648 | attrib = elem.attributes.item(i) 649 | if attrib.name == 'hangText': 650 | textValue = attrib.value 651 | continue 652 | if attrib.name == 'pn': # Let's ignore this marking as no obvious requirement or support in Office OpenXML 653 | continue 654 | if attrib.name == 'indent': # TODO later if really required 655 | continue 656 | if attrib.name == 'keepWithNext': # TODO later if really required 657 | continue 658 | print("\tparseText unexpected attribute: ", attrib.name, '=' , attrib.value) 659 | 660 | for text in elem.childNodes: 661 | if text.nodeType == Node.TEXT_NODE: 662 | textValue += text.nodeValue 663 | if Verbose: 664 | print("parseText adding TEXT_NODE: '", text.nodeValue, "'") 665 | if elem.nodeType == Node.ELEMENT_NODE: 666 | if text.nodeName == 'bcp14': 667 | textValue = textValue + parseBcp14(text) 668 | elif text.nodeName == 'eref': 669 | textValue = textValue + parseEref(text) 670 | elif text.nodeName == 'figure': 671 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 672 | if p: 673 | docxBody.appendChild(p) # Need to emit the first part of the text 674 | textValue = '' 675 | parseFigure(text) 676 | elif text.nodeName == 'list': 677 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 678 | if p: 679 | docxBody.appendChild(p) # Need to emit the first part of the text 680 | textValue = '' 681 | parseList(text) 682 | elif text.nodeName == 'ol': 683 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 684 | if p: 685 | docxBody.appendChild(p) # Need to emit the first part of the text 686 | textValue = '' 687 | parseOList(text) 688 | elif text.nodeName == 't': 689 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 690 | if p: 691 | docxBody.appendChild(p) # Need to emit the first part of the text 692 | if Verbose: 693 | print("parseText found : emitting '", textValue, "'") 694 | textValue = '' 695 | parseText(text, style = style, numberingID = numberingID, indentationLevel = indentationLevel, Verbose = Verbose) 696 | elif text.nodeName == 'vspace': 697 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 698 | if p: 699 | docxBody.appendChild(p) # Need to emit the first part of the text 700 | # Now force an empty paragraph 701 | p = docxNewParagraph('', style = style, removeEmpty = False) 702 | if p: 703 | docxBody.appendChild(p) 704 | textValue = '' 705 | elif text.nodeName == 'ul': 706 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 707 | if p: 708 | docxBody.appendChild(p) # Need to emit the first part of the text 709 | textValue = '' 710 | parseUList(text) 711 | elif text.nodeName == 'xref': 712 | textValue = textValue + parseXref(text) 713 | elif text.nodeName != '#text' and text.nodeName != '#comment': 714 | print('!!!!! parseText: Text is ELEMENT_NODE: ', text.nodeName) 715 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel) 716 | if p: 717 | docxBody.appendChild(p) # Need to emit the first part of the text 718 | 719 | def parseTextTable(elem): 720 | print('Skipping TextTable') 721 | docxBody.appendChild(docxNewParagraph('... a TextTable was not imported...', justification = 'center')) 722 | 723 | def parseTitle(elem): 724 | global rfcTitle 725 | 726 | textValue = '' 727 | for text in elem.childNodes: 728 | if text.nodeType == Node.TEXT_NODE: 729 | textValue += text.nodeValue 730 | docxBody.appendChild(docxNewParagraph(textValue, 'Title')) 731 | rfcTitle = textValue 732 | 733 | def parseUList(elem): 734 | for child in elem.childNodes: 735 | if child.nodeType != Node.ELEMENT_NODE: 736 | continue 737 | if child.nodeName == 'li': 738 | parseListItem(child, numberingID = '2', indentationLevel = '0') # numID = 2 is defined in numbering.xml as bullet list 739 | else: 740 | print('!!!! Unexpected List child: ', child.nodeName) 741 | 742 | def parseWorkgroup(elem): 743 | textValue = 'Workgroup: ' 744 | for text in elem.childNodes: 745 | if text.nodeType == Node.TEXT_NODE: 746 | textValue += text.nodeValue 747 | if elem.nodeType == Node.ELEMENT_NODE: 748 | if text.nodeName != '#text': 749 | print('!!!!! parseKeyword: Text is ELEMENT_NODE: ', text.nodeName) 750 | docxBody.appendChild(docxNewParagraph(textValue)) 751 | 752 | def parseXref(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.66 753 | if elem.nodeValue != None: 754 | print('Xref nodeValue: ' , elem.nodeValue) 755 | if elem.hasAttribute('target'): # One and only mandatory attribute 756 | return '[' + elem.getAttribute('target') + ']' 757 | if elem.nodeType == Node.TEXT_NODE: 758 | print('Xref node is TEXT_NODE') 759 | # Only target attribute, so, quite useless to parse further for more attributes 760 | for child in elem.childNodes: 761 | if child.nodeType == Node.TEXT_NODE: 762 | return child.nodeValue 763 | print('!!!! parseXref, unexpected child.nodeName: ' + child.nodeName) # Only text is allowed 764 | 765 | 766 | def processXML(inFilename, outFilename = 'xml2docx.xml'): 767 | global xmldoc 768 | global docxRoot, docxBody, docxDocument 769 | 770 | if os.path.isfile(inFilename): 771 | xmldoc = minidom.parse(inFilename) 772 | else: 773 | try: 774 | response = urllib.request.urlopen('https://tools.ietf.org/id/' + inFilename + '.xml') 775 | except: 776 | print("Cannot fetch the XML document from the IETF site...") 777 | sys.exit(1) 778 | draftString = response.read() 779 | xmldoc = minidom.parseString(draftString) 780 | print("Fetching the draft from the IETF site...") 781 | 782 | rfc = xmldoc.getElementsByTagName('rfc')[0] 783 | 784 | front = rfc.getElementsByTagName('front')[0] 785 | middle = rfc.getElementsByTagName('middle')[0] 786 | back = rfc.getElementsByTagName('back')[0] 787 | 788 | domImplementation = xml.dom.getDOMImplementation() 789 | docxRoot = domImplementation.createDocument(None, None, None) 790 | 791 | docxDocument = docxRoot.createElement('w:document') 792 | docxDocument.setAttribute('xmlns:wpc', 'http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas') # To be repeated for all namespaces 793 | docxDocument.setAttribute('xmlns:cx', 'http://schemas.microsoft.com/office/drawing/2014/chartex') 794 | docxDocument.setAttribute('xmlns:cx1', 'http://schemas.microsoft.com/office/drawing/2015/9/8/chartex') 795 | docxDocument.setAttribute('xmlns:cx2', 'http://schemas.microsoft.com/office/drawing/2015/10/21/chartex') 796 | docxDocument.setAttribute('xmlns:cx3', 'http://schemas.microsoft.com/office/drawing/2016/5/9/chartex') 797 | docxDocument.setAttribute('xmlns:cx4', 'http://schemas.microsoft.com/office/drawing/2016/5/10/chartex') 798 | docxDocument.setAttribute('xmlns:cx5', 'http://schemas.microsoft.com/office/drawing/2016/5/11/chartex') 799 | docxDocument.setAttribute('xmlns:cx6', 'http://schemas.microsoft.com/office/drawing/2016/5/12/chartex') 800 | docxDocument.setAttribute('xmlns:cx7', 'http://schemas.microsoft.com/office/drawing/2016/5/13/chartex') 801 | docxDocument.setAttribute('xmlns:cx8', 'http://schemas.microsoft.com/office/drawing/2016/5/14/chartex') 802 | docxDocument.setAttribute('xmlns:mc', 'http://schemas.openxmlformats.org/markup-compatibility/2006') 803 | docxDocument.setAttribute('xmlns:aink', 'http://schemas.microsoft.com/office/drawing/2016/ink') 804 | docxDocument.setAttribute('xmlns:am3d', 'http://schemas.microsoft.com/office/drawing/2017/model3d') 805 | docxDocument.setAttribute('xmlns:o', 'urn:schemas-microsoft-com:office:office') 806 | docxDocument.setAttribute('xmlns:r', 'http://schemas.openxmlformats.org/officeDocument/2006/relationships') 807 | docxDocument.setAttribute('xmlns:m', 'http://schemas.openxmlformats.org/officeDocument/2006/math') 808 | docxDocument.setAttribute('xmlns:v', 'urn:schemas-microsoft-com:vml') 809 | docxDocument.setAttribute('xmlns:wp14', 'http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing') 810 | docxDocument.setAttribute('xmlns:wp', 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing') 811 | docxDocument.setAttribute('xmlns:w10', 'urn:schemas-microsoft-com:office:word') 812 | docxDocument.setAttribute('xmlns:w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main') 813 | docxDocument.setAttribute('xmlns:w14', 'http://schemas.microsoft.com/office/word/2010/wordml') 814 | docxDocument.setAttribute('xmlns:w15', 'http://schemas.microsoft.com/office/word/2012/wordml') 815 | docxDocument.setAttribute('xmlns:w16cex', 'http://schemas.microsoft.com/office/word/2018/wordml/cex') 816 | docxDocument.setAttribute('xmlns:w16cid', 'http://schemas.microsoft.com/office/word/2016/wordml/cid') 817 | docxDocument.setAttribute('xmlns:w16', 'http://schemas.microsoft.com/office/word/2018/wordml') 818 | docxDocument.setAttribute('xmlns:w16se', 'http://schemas.microsoft.com/office/word/2015/wordml/symex') 819 | docxDocument.setAttribute('xmlns:wpg', 'http://schemas.microsoft.com/office/word/2010/wordprocessingGroup') 820 | docxDocument.setAttribute('xmlns:wpi', 'http://schemas.microsoft.com/office/word/2010/wordprocessingInk') 821 | docxDocument.setAttribute('xmlns:wne', 'http://schemas.microsoft.com/office/word/2006/wordml') 822 | docxDocument.setAttribute('xmlns:wps', 'http://schemas.microsoft.com/office/word/2010/wordprocessingShape') 823 | docxDocument.setAttribute('mc:Ignorable', 'w14 w15 w16se w16cid w16 w16cex wp14') 824 | docxDocument.setAttribute('xmlns:w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main') 825 | docxRoot.appendChild(docxDocument) 826 | 827 | docxBody = docxRoot.createElement('w:body') 828 | docxDocument.appendChild(docxBody) 829 | 830 | parseRfc(rfc) 831 | parseSection(front, 0) 832 | parseSection(middle, 0) 833 | parseBack(back) 834 | 835 | sectPrElem = docxRoot.createElement('w:sectPr') 836 | 837 | pgSzElem = docxRoot.createElement('w:pgSz') 838 | pgSzElem.setAttribute('w:h', '15840') 839 | pgSzElem.setAttribute('w:w', '12240') 840 | sectPrElem.appendChild(pgSzElem) 841 | 842 | pgMarElem = docxRoot.createElement('w:pgMar') 843 | pgMarElem.setAttribute('w:gutter', '0') 844 | pgMarElem.setAttribute('w:footer', '708') 845 | pgMarElem.setAttribute('w:header', '708') 846 | pgMarElem.setAttribute('w:left', '1440') 847 | pgMarElem.setAttribute('w:bottom', '1400') 848 | pgMarElem.setAttribute('w:right', '1440') 849 | pgMarElem.setAttribute('w:top', '1440') 850 | sectPrElem.appendChild(pgMarElem) 851 | 852 | 853 | colsElem = docxRoot.createElement('w:cols') 854 | colsElem.setAttribute('w:space', '708') 855 | sectPrElem.appendChild(colsElem) 856 | 857 | docGrid = docxRoot.createElement('w:docGrid') 858 | docGrid.setAttribute('w:linePitch', '360') 859 | sectPrElem.appendChild(docGrid) 860 | 861 | docxBody.appendChild(sectPrElem) 862 | 863 | docxFile = io.open(outFilename, 'w', encoding="'utf8'") 864 | # Ugly but no other way to put attributes in the top XML 865 | docxFile.write(docxRoot.toprettyxml().replace('', '')) 866 | docxFile.close() 867 | print('OpenXML document.xml file is at', outFilename) 868 | 869 | def myParseDate(s): 870 | try: 871 | # Let's first try with short month names 872 | date = datetime.datetime.strptime(s,'%d %b %Y') 873 | except ValueError: 874 | # Then try with full length month names 875 | try: 876 | date = datetime.datetime.strptime(s,'%d %B %Y') 877 | except ValueError: 878 | date = datetime.datetime.utcnow() # Giving up... 879 | return date 880 | 881 | def generateDocPropsCore(): 882 | xmlcore = minidom.parse(templateDirectory + '/docProps/core.xml') 883 | 884 | if len(rfcAuthors) > 0: 885 | creatorElem = xmlcore.getElementsByTagName('dc:creator')[0] 886 | for child in creatorElem.childNodes: 887 | creatorElem.removeChild(child) 888 | text = xmlcore.createTextNode(', '.join(rfcAuthors)) 889 | creatorElem.appendChild(text) 890 | if rfcDate != None: 891 | createdElem = xmlcore.getElementsByTagName('dcterms:created')[0] 892 | for child in createdElem.childNodes: 893 | createdElem.removeChild(child) 894 | createdDate = myParseDate(rfcDate) 895 | text = xmlcore.createTextNode(createdDate.strftime('%Y-%m-%dT%H:%M:%SZ')) 896 | createdElem.appendChild(text) 897 | if len(rfcKeywords) > 0: 898 | keywordsElem = xmlcore.getElementsByTagName('cp:keywords')[0] 899 | for child in keywordsElem.childNodes: 900 | keywordsElem.removeChild(child) 901 | text = xmlcore.createTextNode(', '.join(rfcKeywords)) 902 | keywordsElem.appendChild(text) 903 | if rfcTitle != None: 904 | titleElem = xmlcore.getElementsByTagName('dc:title')[0] 905 | for child in titleElem.childNodes: 906 | titleElem.removeChild(child) 907 | text = xmlcore.createTextNode(rfcTitle) 908 | titleElem.appendChild(text) 909 | # Now, let's say that this script did it ;-) 910 | modifiedByElem = xmlcore.getElementsByTagName('cp:lastModifiedBy')[0] 911 | for child in modifiedByElem.childNodes: 912 | modifiedByElem.removeChild(child) 913 | text = xmlcore.createTextNode('Xml2rfc') 914 | modifiedByElem.appendChild(text) 915 | modifiedElem = xmlcore.getElementsByTagName('dcterms:modified')[0] 916 | for child in modifiedElem.childNodes: 917 | modifiedElem.removeChild(child) 918 | now = datetime.datetime.utcnow() 919 | text = xmlcore.createTextNode(now.strftime('%Y-%m-%dT%H:%M:%SZ')) 920 | modifiedElem.appendChild(text) 921 | 922 | return xmlcore.toprettyxml().replace('', '') 923 | 924 | def docxPackage(docxFilename, openXML, templateDirectory): 925 | print('Generating OpenXML packaging file', docxFilename) 926 | print("\tUsing template in" + templateDirectory) 927 | coreXML = generateDocPropsCore() 928 | with zipfile.ZipFile(docxFilename, 'w', compression=zipfile.ZIP_DEFLATED) as docx: 929 | files = [ '[Content_Types].xml', '_rels/.rels', 'docProps/app.xml', 930 | # Should not move the output in template directory... 'word/document.xml', 931 | 'word/fontTable.xml', 'word/settings.xml', 'word/numbering.xml', 'word/webSettings.xml', 932 | 'word/styles.xml', 'word/theme/theme1.xml', 'word/_rels/document.xml.rels'] 933 | for file in files: 934 | docx.write(templateDirectory + '/' + file, arcname = file) 935 | docx.write(openXML, arcname = 'word/document.xml') 936 | docx.writestr('docProps/core.xml', coreXML) 937 | 938 | if __name__ == '__main__': 939 | inFilename = None 940 | outFilename = None 941 | templateDirectory = None 942 | docxFilename = None 943 | try: 944 | opts, args = getopt.getopt(sys.argv[1:],"d:hi:o:t:",["ifile=","ofile=","template=", "docx="]) 945 | except getopt.GetoptError: 946 | print('xml2docx.py -i -o ') 947 | sys.exit(2) 948 | for opt, arg in opts: 949 | if opt == '-h': 950 | print('xml2docx.py -i [-o ] [--docx ]') 951 | sys.exit() 952 | elif opt in ("-i", "--ifile"): 953 | inFilename = arg 954 | elif opt in ("-o", "--ofile"): 955 | outFilename = arg 956 | elif opt in ("-t", "--template"): 957 | templateDirectory = arg 958 | elif opt in ("-d", "--docx"): 959 | docxFilename = arg 960 | if templateDirectory == None: 961 | templateDirectory = os.path.dirname(os.path.abspath(sys.argv[0])) + '/template' # default template is in the executable directory 962 | if inFilename == None: 963 | print('Missing input filename') 964 | sys.exit(2) 965 | if outFilename == None: 966 | if docxFilename != None: 967 | outFilename = templateDirectory + '/word/document.xml' 968 | else: 969 | outFilename = 'xml2docx.xml' 970 | if docxFilename == None: 971 | if inFilename[-4:] == '.xml': 972 | docxFilename = inFilename.replace('.xml', '.docx') 973 | else: 974 | docxFilename = inFilename + '.docx' 975 | 976 | # Let's generate the openXML word processing 'document.xml' file 977 | processXML(inFilename, outFilename) 978 | 979 | # Now, let's generate the .DOCX file 980 | docxPackage(docxFilename, outFilename, templateDirectory) 981 | --------------------------------------------------------------------------------