├── .gitignore
├── LICENSE
├── README.md
├── index.php
├── process.php
├── template
├── [Content_Types].xml
├── _rels
│ └── .rels
├── docProps
│ ├── app.xml
│ └── core.xml
└── word
│ ├── _rels
│ └── document.xml.rels
│ ├── fontTable.xml
│ ├── numbering.xml
│ ├── settings.xml
│ ├── styles.xml
│ ├── theme
│ └── theme1.xml
│ └── webSettings.xml
└── xml2docx.py
/.gitignore:
--------------------------------------------------------------------------------
1 | header.inc
2 | sample
3 | draft-ietf-intarea-provisioning-domains-00.xml
4 | draft-*.xml
5 | sample.docx
6 | sample_document.xml
7 | test.docx
8 | ~$*.docx
9 | document.xml
10 | xml2doc.xml
11 | *.docx
12 | xml2docx.xml
13 | rfc*.xml
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # xml2docx
2 |
3 | Convert a XML IETF document (draft) into a Office Open XML (e.g., Microsoft Word) .DOCX. This .docx can then be reviewed using the spelling and **grammar** plug-ins of the word processor.
4 |
5 | References:
6 |
7 | * http://officeopenxml.com/
8 | * https://tools.ietf.org/html/rfc7991
9 |
10 | ## On-line tool
11 |
12 | [https://www.vyncke.org/xml2docx/] runs the latest version of this code.
13 |
--------------------------------------------------------------------------------
/index.php:
--------------------------------------------------------------------------------
1 |
2 |
18 |
19 | XML to Office OpenXML .DOCX
20 |
24 |
25 |
26 | IETF XML2RFC file conversion into Office OpenXML .DOCX
27 |
28 |
34 |
35 |
36 | Copyright Eric Vyncke, 2020. Clone me at https://github.com/evyncke/xml2docx.git
37 |
--------------------------------------------------------------------------------
/process.php:
--------------------------------------------------------------------------------
1 |
39 |
40 |
41 | XML to Office OpenXML DOCX
42 |
46 |
47 |
48 | IETF XML2RFC file conversion into Office OpenXML .DOCX
49 |
50 |
56 |
57 |
58 | Copyright Eric Vyncke, 2020. Clone me at https://github.com/evyncke/xml2docx.git
59 |
--------------------------------------------------------------------------------
/template/[Content_Types].xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/template/_rels/.rels:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/template/docProps/app.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | Normal.dotm
6 | 3
7 | 1
8 | 27
9 | 160
10 | Xml2Docx convertor
11 | 0
12 | 1
13 | 1
14 | false
15 | IETF open source
16 | false
17 | 186
18 | false
19 | false
20 | 1.0000
21 |
22 |
--------------------------------------------------------------------------------
/template/docProps/core.xml:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 | Eric Vyncke (evyncke)
11 |
12 |
13 | XML2DOCX
14 | 5
15 | 2020-08-21T07:55:00Z
16 | 2020-08-21T08:04:00Z
17 |
--------------------------------------------------------------------------------
/template/word/_rels/document.xml.rels:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/template/word/fontTable.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/template/word/numbering.xml:
--------------------------------------------------------------------------------
1 |
2 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
--------------------------------------------------------------------------------
/template/word/settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/template/word/styles.xml:
--------------------------------------------------------------------------------
1 |
2 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
889 |
890 |
891 |
892 |
893 |
894 |
895 |
896 |
897 |
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
--------------------------------------------------------------------------------
/template/word/theme/theme1.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
293 |
294 |
295 |
--------------------------------------------------------------------------------
/template/word/webSettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/xml2docx.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright 2020, Eric Vyncke, evyncke@cisco.com
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # A lot of information in http://officeopenxml.com/anatomyofOOXML.php
19 |
20 | # TODO
21 | # Handle external entities used notably for references...
22 | # https://www.w3schools.com/xml/xml_dtd_entities.asp
23 | # example
24 | #
25 | #
27 | # ]>
28 |
29 | from xml.dom import minidom, Node
30 | import xml.dom
31 | from pprint import pprint
32 | import sys, getopt
33 | import io, os
34 | import zipfile
35 | import tempfile, datetime
36 | import urllib.request
37 |
38 | # Same states to be kept
39 | rfcDate = None
40 | rfcAuthors = []
41 | rfcTitle = None
42 | rfcKeywords = []
43 |
44 | def printTree(front):
45 | print('All children:')
46 | for elem in front.childNodes:
47 | if elem.nodeType == Node.TEXT_NODE:
48 | print("\t TEXT: '", elem.nodeValue, "'")
49 | if elem.nodeType != Node.ELEMENT_NODE:
50 | continue
51 | print("\t", elem.nodeName)
52 | print("\tAttributes:")
53 | for i in range(elem.attributes.length):
54 | attrib = elem.attributes.item(i)
55 | print("\t\t", attrib.name, ' = ' , attrib.value)
56 | print("\tChildren:")
57 | for child in elem.childNodes:
58 | if child.nodeType == Node.ELEMENT_NODE:
59 | print("\t\tELEMENT: ",child.nodeName)
60 | elif child.nodeType == Node.TEXT_NODE:
61 | print("\t\tTEXT: ", child.nodeValue)
62 | print("\n----------\n")
63 |
64 | def docxNewParagraph(textValue, style = 'Normal', justification = None, unnumbered = None, numberingID = None, indentationLevel = None, removeEmpty = True, language = 'en-US', cdataSection = None):
65 | if textValue is None:
66 | return None
67 | if cdataSection == None: # remove extra spaces only if CDATA is not requested
68 | textValue = ' '.join(textValue.split())
69 | if textValue == '' and removeEmpty:
70 | return None
71 | docxP = docxRoot.createElement('w:p')
72 |
73 | # First handle the style or justification
74 | #
75 | #
76 | #
77 | #
78 | #
79 | #
80 | #
81 | pPr = docxRoot.createElement('w:pPr')
82 | if style != None:
83 | pStyle = docxRoot.createElement('w:pStyle')
84 | pStyle.setAttribute('w:val', style)
85 | pPr.appendChild(pStyle)
86 | if justification != None:
87 | jc = docxRoot.createElement('w:jc')
88 | jc.setAttribute('w:val', justification)
89 | pPr.appendChild(jc)
90 | if unnumbered: # Try to override the default numbering in the style
91 | numPr = docxRoot.createElement('w:numPr')
92 | ilvl = docxRoot.createElement('w:ilvl ')
93 | ilvl.setAttribute('w:val', 0)
94 | numPr.appendChild(ilvl)
95 | numId = docxRoot.createElement('w:numId')
96 | numId.setAttribute('w:val', 0)
97 | numPr.appendChild(numId)
98 | pPr.appendChild(numPr)
99 | elif numberingID != None and indentationLevel != None:
100 | #
101 | #
102 | #
103 | #
104 | numPr = docxRoot.createElement('w:numPr')
105 | ilvl = docxRoot.createElement('w:ilvl ')
106 | ilvl.setAttribute('w:val', indentationLevel)
107 | numPr.appendChild(ilvl)
108 | numId = docxRoot.createElement('w:numId')
109 | numId.setAttribute('w:val', numberingID)
110 | numPr.appendChild(numId)
111 | pPr.appendChild(numPr)
112 | docxP.appendChild(pPr)
113 |
114 | # Then handle the actual text
115 | #
116 | #
117 | #
118 | #
119 | # Title
120 | #
121 | r = docxRoot.createElement('w:r')
122 | rPr = docxRoot.createElement('w:rPr')
123 | if language != None:
124 | lang = docxRoot.createElement('w:lang')
125 | lang.setAttribute('w:val', language)
126 | rPr.appendChild(lang)
127 | elif style != None: # Seems mandatory for figure ASCII art to repeat the style per run
128 | rStyle = docxRoot.createElement('w:rStyle')
129 | rStyle.setAttribute('w:val', style)
130 | rPr.appendChild(rStyle)
131 | r.appendChild(rPr)
132 | t = docxRoot.createElement('w:t')
133 | if cdataSection == None:
134 | text = docxRoot.createTextNode(textValue)
135 | else:
136 | t.setAttribute('xml:space', 'preserve')
137 | text = docxRoot.createTextNode(textValue)
138 | # text = docxRoot.createCDATASection(textValue) # xml:space is enough to keep leading spaces, CDATA adds 4 tabs after in the pretty printing :-(
139 | t.appendChild(text)
140 | r.appendChild(t)
141 | docxP.appendChild(r)
142 | return docxP
143 |
144 | libsTable = {
145 | 'RFC': 'http://www.rfc-editor.org/refs/bibxml/',
146 | 'I-D': 'http://xml2rfc.ietf.org/public/rfc/bibxml3/',
147 | 'W3C': 'http://xml2rfc.ietf.org/public/rfc/bibxml4/',
148 | 'SDO-3GPP': 'http://xml2rfc.ietf.org/public/rfc/bibxml5/',
149 | 'IEEE': 'http://xml2rfc.ietf.org/public/rfc/bibxml6/',
150 | 'DOI': 'http://xml2rfc.ietf.org/public/rfc/bibxml7/',
151 | 'BCP': 'http://xml2rfc.ietf.org/public/rfc/bibxml9/',
152 | 'FYI': 'http://xml2rfc.ietf.org/public/rfc/bibxml9/',
153 | 'STD': 'http://xml2rfc.ietf.org/public/rfc/bibxml9/',
154 | }
155 |
156 | def includeExternal(referenceName):
157 | global libsTable
158 |
159 | referenceTokens = referenceName.split('.')
160 | if libsTable.get(referenceTokens[1]):
161 | libURL = libsTable.get(referenceTokens[1])
162 | print("Importing " + referenceName + " from " + libURL + referenceName + '.xml')
163 | try:
164 | response = urllib.request.urlopen(libURL + referenceName + '.xml')
165 | importedString = response.read()
166 | importedXML = minidom.parseString(importedString)
167 | except urllib.error.HTTPError as err:
168 | print("Cannot import XML from " + libURL + referenceName + ".xml, error: ", err)
169 | return None
170 | except:
171 | print('Not found or invalid XML in ' + libURL)
172 | return None
173 | return importedXML.getElementsByTagName('reference')[0]
174 | print("Reference type " + referenceTokens[1] + " not supported...")
175 | return None
176 |
177 | def parseAbstract(elem):
178 | for child in elem.childNodes:
179 | if child.nodeType != Node.ELEMENT_NODE:
180 | continue
181 | elif child.nodeName == 't':
182 | parseText(child, style = 'Abstract')
183 | else:
184 | print('Unexpected tagName in Abstract: ', child.nodeName)
185 |
186 | def parseArea(elem):
187 | textValue = 'Area: '
188 | for text in elem.childNodes:
189 | if text.nodeType == Node.TEXT_NODE:
190 | textValue += text.nodeValue
191 | if elem.nodeType == Node.ELEMENT_NODE:
192 | if text.nodeName != '#text':
193 | print('!!!!! parseKeyword: Text is ELEMENT_NODE: ', text.nodeName)
194 | docxBody.appendChild(docxNewParagraph(textValue))
195 |
196 | def parseArtWork(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.5
197 | # If there is no type attribute, let's process the element
198 | # If there is a type attribute, let's process the element only if type == ascii-art
199 | if (not elem.hasAttribute('type')) or (elem.hasAttribute('type') and (elem.getAttribute('type') == 'ascii-art' or elem.getAttribute('type') == '')):
200 | figureLines = ''
201 | for chunk in elem.childNodes:
202 | text = chunk.nodeValue
203 | figureLines += text
204 | # Let's split this string into lines and print each line
205 | for line in figureLines.splitlines():
206 | docxBody.appendChild(docxNewParagraph(line.rstrip(" \t"), style = 'Code', removeEmpty = False, language = None, cdataSection = True))
207 |
208 | def parseAuthor(elem): # Per https://tools.ietf.org/html/rfc7991#section-2.7
209 | global rfcAuthors
210 |
211 | # looking for the organization element as in https://tools.ietf.org/html/rfc7991#section-2.35 that can only contain text
212 | organization = ''
213 | for child in elem.childNodes:
214 | if child.nodeType != Node.ELEMENT_NODE:
215 | continue
216 | elif child.nodeName == 'organization':
217 | for grandchild in child.childNodes:
218 | if grandchild.nodeType == Node.TEXT_NODE:
219 | organization = ', ' + grandchild.nodeValue
220 |
221 | if elem.hasAttribute('asciiFullname'):
222 | docxBody.appendChild(docxNewParagraph(elem.getAttribute('asciiFullname') + organization, justification = 'right'))
223 | rfcAuthors.append(elem.getAttribute('asciiFullname') + organization)
224 | elif elem.hasAttribute('fullname'):
225 | docxBody.appendChild(docxNewParagraph(elem.getAttribute('fullname') + organization, justification = 'right'))
226 | rfcAuthors.append(elem.getAttribute('fullname') + organization)
227 | else:
228 | author = ''
229 | if elem.hasAttribute('initials'):
230 | author = author + elem.getAttribute('initials') + ' '
231 | if elem.hasAttribute('surname'):
232 | author = author + elem.getAttribute('surname')
233 | if author != '':
234 | docxBody.appendChild(docxNewParagraph(author + organization, justification = 'right'))
235 | rfcAuthors.append(author + organization)
236 |
237 | def parseBack(elem): # https://tools.ietf.org/html/rfc7991#section-2.8
238 | if elem.nodeType != Node.ELEMENT_NODE:
239 | return
240 | # Let's hope that the children are in the right order... i.e., starting with the references
241 | docxBody.appendChild(docxNewParagraph('References', style = 'Heading1'))
242 | for child in elem.childNodes:
243 | if child.nodeType != Node.ELEMENT_NODE:
244 | continue
245 | if child.nodeName == 'displayreference':
246 | parseDisplayReference(child)
247 | elif child.nodeName == 'references':
248 | parseReferences(child)
249 | elif child.nodeName == 'section':
250 | parseSection(child, 2)
251 | else:
252 | print('!!!! parseBack: unexpected nodeName: ' + child.nodeName)
253 |
254 | def parseBcp14(elem): # https://tools.ietf.org/html/rfc7991#section-2.9 only text
255 | if elem.nodeValue != None:
256 | print('Bcp14 nodeValue: ' , elem.nodeValue)
257 | if elem.nodeType == Node.TEXT_NODE:
258 | print('Bcp14 node is TEXT_NODE')
259 | for child in elem.childNodes:
260 | if child.nodeType == Node.TEXT_NODE:
261 | return child.nodeValue
262 | else:
263 | print('!!!! parseBcp14 unexpected nodeType: ' + child.nodeType)
264 |
265 | def parseBlockQuote(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.10 that is similar to old items
266 | parseText(elem, style = 'Quote', numberingID = None, indentationLevel = None)
267 |
268 | def parseBoilerPlate(elem):
269 | for child in elem.childNodes:
270 | if child.nodeType != Node.ELEMENT_NODE:
271 | continue
272 | elif child.nodeName == 'section':
273 | parseSection(child, 1)
274 | else:
275 | print('Unexpected tagName in BoilerPlate: ', child.nodeName)
276 |
277 | def parseDate(elem):
278 | global rfcDate
279 |
280 | dateString = ''
281 | if elem.hasAttribute('day'):
282 | dateString = elem.getAttribute('day') + ' '
283 | if elem.hasAttribute('month'):
284 | dateString = dateString + elem.getAttribute('month') + ' '
285 | if elem.hasAttribute('year'):
286 | dateString = dateString + elem.getAttribute('year')
287 | if dateString != '':
288 | docxBody.appendChild(docxNewParagraph(dateString, justification = 'right'))
289 | rfcDate = dateString
290 |
291 | def parseDisplayReference(elem): # https://tools.ietf.org/html/rfc7991#section-2.19
292 | # Presentation only... skipping it for now
293 | return
294 | # print("parseDisplayReference not yet implemented")
295 |
296 | def parseDList(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.20
297 | for child in elem.childNodes:
298 | # If should be a serie of DT DD elements in the right order, the code is not resilient to out of order
299 | if child.nodeType != Node.ELEMENT_NODE:
300 | # print("parseDList unexpected node type...", child) # TODO sometimes it is CRLF + white spaces possibly for indentation ?
301 | continue
302 | if child.nodeName == 'dt': # Definition Term https://tools.ietf.org/html/rfc7991#section-2.21
303 | # Can contain text + some other elements
304 | parseText(child)
305 | elif child.nodeName == 'dd': # Definition part https://tools.ietf.org/html/rfc7991#section-2.18
306 | # Can contain text + some other elements including complex ones
307 | parseText(child)
308 | else:
309 | print('!!!! parseDList, unexpected child: ', child.nodeName)
310 |
311 | # TODO switch off language to avoid wrong typos ?
312 | def parseEref(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.24
313 | if elem.nodeValue != None:
314 | print('Eref nodeValue: ' , elem.nodeValue)
315 | if elem.hasAttribute('target'): # one and only mandatory attribute
316 | return '[' + elem.getAttribute('target') + ']'
317 | # Only target attribute, so, quite useless to parse other attributes
318 | if elem.nodeType == Node.TEXT_NODE:
319 | print('Eref node is TEXT_NODE')
320 | for child in elem.childNodes:
321 | if child.nodeType == Node.TEXT_NODE:
322 | return child.nodeValue
323 | if child.nodeName == 't':
324 | print("parseEref recurse into t !!!")
325 | parseText(child)
326 |
327 | def parseFigure(elem): # See https://tools.ietf.org/html/rfc7991#section-2.25
328 | # Figure had preamble (deprecated but let's process it)
329 | preambleChildren = elem.getElementsByTagName('preamble')
330 | if preambleChildren.length > 0 and preambleChildren[0].childNodes.length > 0:
331 | if preambleChildren[0].nodeType == Node.ELEMENT_NODE:
332 | preamble = preambleChildren[0].childNodes[0].nodeValue
333 | docxBody.appendChild(docxNewParagraph(preamble))
334 | # Let's process a single artwork
335 | artworkChildren = elem.getElementsByTagName('artwork')
336 | for child in artworkChildren:
337 | parseArtWork(child)
338 | # Let's process the source code
339 |
340 | # Could have a title attribute rather than the name element (same as in section)
341 | if elem.nodeType != Node.ELEMENT_NODE:
342 | return
343 | figureTitle = None
344 | if elem.hasAttribute('title'):
345 | figureTitle = elem.getAttribute('title')
346 | else:
347 | nameChild = elem.getElementsByTagName('name')
348 | if nameChild.length > 0:
349 | if nameChild[0].nodeType == Node.ELEMENT_NODE:
350 | figureTitle = nameChild[0].childNodes[0].nodeValue
351 | if figureTitle != None:
352 | docxBody.appendChild(docxNewParagraph('Figure: ' + figureTitle, justification = 'center'))
353 | # Figure had postamble (deprecated but let's process it)
354 | postambleChildren = elem.getElementsByTagName('postamble')
355 | if postambleChildren.length > 0 and postambleChildren[0].childNodes.length > 0:
356 | if postambleChildren[0].nodeType == Node.ELEMENT_NODE:
357 | postamble = postambleChildren[0].childNodes[0].nodeValue
358 | docxBody.appendChild(docxNewParagraph(postamble))
359 |
360 | def parseKeyword(elem):
361 | global rfcKeywords
362 |
363 | textValue = 'Keyword: '
364 | for text in elem.childNodes:
365 | if text.nodeType == Node.TEXT_NODE:
366 | textValue += text.nodeValue
367 | rfcKeywords.append(text.nodeValue)
368 | if elem.nodeType == Node.ELEMENT_NODE:
369 | if text.nodeName != '#text':
370 | print('!!!!! parseKeyword: Text is ELEMENT_NODE: ', text.nodeName)
371 | docxBody.appendChild(docxNewParagraph(textValue))
372 |
373 | def parseList(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.29
374 | for child in elem.childNodes:
375 | if child.nodeType == Node.COMMENT_NODE:
376 | continue
377 | elif child.nodeType == Node.TEXT_NODE: # Unexpected, let's hope it is empty space
378 | if child.nodeValue.strip(" \t\r\n") == '':
379 | continue
380 | print("!!!! parseList non empty text = '" + child.nodeValue.strip(" \t\r\n") + "'")
381 | continue
382 | elif child.nodeType != Node.ELEMENT_NODE:
383 | print('!!!! parseList, unexpected child node type: ', child)
384 | continue
385 | if child.nodeName == 't':
386 | parseText(child, style = 'ListParagraph', numberingID = '2', indentationLevel = '0') # numID = 2 is defined in numbering.xml as bullet list
387 | else:
388 | print('!!!! parseList, unexpected child: ', child.nodeName)
389 |
390 | def parseListItem(elem, style = 'ListParagraph', numberingID = None, indentationLevel = None):
391 | for i in range(elem.attributes.length):
392 | attrib = elem.attributes.item(i)
393 | if attrib.name == 'pn' or attrib.name == 'anchor' or attrib.name == 'derivedCounter': # Let's ignore this marking as no obvious requirement or support in Office OpenXML
394 | continue
395 | print("\tLI unexpected attribute: ", attrib.name, ' = ' , attrib.value)
396 |
397 | textValue = ''
398 | for text in elem.childNodes:
399 | if text.nodeType == Node.TEXT_NODE:
400 | textValue += text.nodeValue
401 | if elem.nodeType == Node.ELEMENT_NODE:
402 | if text.nodeName == 'bcp14':
403 | textValue = textValue + parseBcp14(text)
404 | elif text.nodeName == 'eref':
405 | textValue = textValue + parseXref(text)
406 | elif text.nodeName == 'ol':
407 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
408 | if p:
409 | docxBody.appendChild(p) # Need to emit the first part of the text
410 | textValue = ''
411 | parseOList(text)
412 | elif text.nodeName == 't':
413 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
414 | if p:
415 | docxBody.appendChild(p) # Need to emit the first part of the text textValue = ''
416 | parseText(text)
417 | elif text.nodeName == 'ul':
418 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
419 | if p:
420 | docxBody.appendChild(p) # Need to emit the first part of the text textValue = ''
421 | parseUList(text)
422 | elif text.nodeName == 'xref':
423 | textValue = textValue + parseXref(text)
424 | elif text.nodeName != '#text':
425 | print('!!!!! parseListItem: Text is ELEMENT_NODE: ', text.nodeName)
426 | # else:
427 | # print('parseListItem ignoring Text is ELEMENT_NODE: ', text.nodeName)
428 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
429 | if p:
430 | docxBody.appendChild(p) # Need to emit the last part of the text
431 |
432 | def parseNote(elem): # See https://tools.ietf.org/html/rfc7991#section-2.33
433 | print(" is an unsupported tag")
434 |
435 | # TODO should reset the numbering to 1... cfr draft-ietf-anima-autonomic-control-plane-29.xml
436 | def parseOList(elem):
437 | for child in elem.childNodes:
438 | if child.nodeType != Node.ELEMENT_NODE:
439 | continue
440 | if child.nodeName == 'li':
441 | parseListItem(child, numberingID = '1', indentationLevel = '0') # numID = 1 is defined in numbering.xml as enumeration list
442 | else:
443 | print('!!!! Unexpected List child: ', child.nodeName)
444 |
445 | def parseReference(elem): # See https://tools.ietf.org/html/rfc7991#section-2.40
446 | if elem.nodeType != Node.ELEMENT_NODE:
447 | return
448 | if elem.hasAttribute('anchor'):
449 | text = '[' + elem.getAttribute('anchor') + '] '
450 | else:
451 | print('!!!! parseReference, missing anchor attribute')
452 | text = ''
453 | seriesInfoText = ''
454 | for serieInfo in elem.getElementsByTagName('seriesInfo'):
455 | if serieInfo.hasAttribute('name') and serieInfo.hasAttribute('value'):
456 | if serieInfo.getAttribute('value') == '': # Sometimes the value field is empty... no need to add a useless space
457 | seriesInfoText += serieInfo.getAttribute('name') + ' ' + serieInfo.getAttribute('value') + ', '
458 | else:
459 | seriesInfoText += serieInfo.getAttribute('name') + ', '
460 | else:
461 | print("!!!! parseReference, no name/value attribute in seriesInfo for " + text)
462 | frontElem = elem.getElementsByTagName('front')[0]
463 | if frontElem:
464 | for author in frontElem.getElementsByTagName('author'):
465 | authorName = '?' # Could also simply be in the child elemn
466 | if author.hasAttribute('surname'):
467 | if author.hasAttribute('initials'):
468 | authorName = author.getAttribute('surname') + ', ' + author.getAttribute('initials')
469 | else:
470 | authorName = author.getAttribute('surname')
471 | elif author.hasAttribute('fullname'):
472 | authorName = author.getAttribute('fullname')
473 | else: # Let's find the element
474 | orgElem = frontElem.getElementsByTagName('organization')[0]
475 | if orgElem:
476 | authorName = ''
477 | for child in orgElem.childNodes:
478 | if child.nodeType == Node.TEXT_NODE:
479 | authorName += child.nodeValue
480 | text += authorName + ', '
481 | if frontElem.getElementsByTagName('title'):
482 | titleElem = frontElem.getElementsByTagName('title')[0]
483 | for child in titleElem.childNodes:
484 | if child.nodeType == Node.TEXT_NODE:
485 | text += '"' + child.nodeValue + '", '
486 | # Insert seriesInfo if any
487 | text += seriesInfoText
488 | if frontElem.getElementsByTagName('date'):
489 | dateElem = frontElem.getElementsByTagName('date')[0]
490 | if dateElem.hasAttribute('year'):
491 | if dateElem.hasAttribute('month'):
492 | text += dateElem.getAttribute('month') + ' ' + dateElem.getAttribute('year') + ', '
493 | else:
494 | text += dateElem.getAttribute('year') + ', '
495 | else: # In the absence of element
496 | text += seriesInfoText
497 |
498 | if elem.hasAttribute('target'):
499 | text += elem.getAttribute('target')
500 | # Let's remove any trailing comma
501 | if text[-2:] == ', ':
502 | text = text[:-2]
503 | text += '.'
504 | p = docxNewParagraph(text)
505 | if p:
506 | docxBody.appendChild(p)
507 |
508 | def parseReferences(elem): # https://tools.ietf.org/html/rfc7991#section-2.42
509 | if elem.nodeType != Node.ELEMENT_NODE:
510 | return
511 | sectionTitle = None
512 | if elem.hasAttribute('title'):
513 | sectionTitle = elem.getAttribute('title')
514 | else:
515 | nameChild = elem.getElementsByTagName('name')
516 | if nameChild.length > 0:
517 | if nameChild[0].nodeType == Node.ELEMENT_NODE:
518 | sectionTitle = nameChild[0].childNodes[0].nodeValue
519 | else:
520 | print(elem)
521 | print('??? parseReferences: this references section has not title...')
522 | if sectionTitle != None:
523 | docxBody.appendChild(docxNewParagraph(sectionTitle, 'Heading2', unnumbered = None))
524 | for child in elem.childNodes:
525 | if child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: # in this location it is probably or
526 | if child.target == 'rfc' and (child.data[0:9] == "include='" or child.data[0:9] == 'include="'):
527 | includeName = child.data[9:-1]
528 | child = includeExternal(includeName)
529 | if child is None:
530 | continue
531 | else:
532 | print("parseReferences: skipping unknown processing instruction: target = " + child.target + ", data = " + child.data[0:9])
533 | if child.nodeType == Node.TEXT_NODE: # Let's skip whitespace (assuming it is white space...)
534 | continue
535 | if child.nodeType != Node.ELEMENT_NODE:
536 | print('!!!! parseReferences: unexpected nodeType: ', child)
537 | continue
538 | if child.nodeName == 'reference':
539 | parseReference(child)
540 | else:
541 | print('!!!! parseReferences: unexpected nodeName: ' + child.nodeName)
542 |
543 | def parseRfc(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.45
544 | if elem.nodeType != Node.ELEMENT_NODE:
545 | return
546 | rfcInfo = ''
547 | if elem.hasAttribute('category'):
548 | docxBody.appendChild(docxNewParagraph('Category: ' + elem.getAttribute('category')))
549 | if elem.hasAttribute('submissionType'):
550 | docxBody.appendChild(docxNewParagraph('Submission type: ' + elem.getAttribute('submissionType')))
551 | if elem.hasAttribute('obsoletes'):
552 | docxBody.appendChild(docxNewParagraph('Obsoletes: ' + elem.getAttribute('obsoletes')))
553 | if elem.hasAttribute('updates'):
554 | docxBody.appendChild(docxNewParagraph('Updates: ' + elem.getAttribute('updates')))
555 |
556 | def parseSection(elem, headingDepth):
557 | if elem.nodeType != Node.ELEMENT_NODE:
558 | return
559 | if elem.hasAttribute('numbered'):
560 | unnumbered = (elem.getAttribute('numbered') == 'false')
561 | else:
562 | unnumbered = None
563 | sectionTitle = None
564 | if elem.hasAttribute('title'):
565 | sectionTitle = elem.getAttribute('title')
566 | elif elem.nodeName == 'section': # Can be the case for .... that are also processed by this part
567 | # Look after a child node of tag "name"
568 | nameChild = elem.getElementsByTagName('name')
569 | if nameChild.length > 0:
570 | if nameChild[0].nodeType == Node.ELEMENT_NODE:
571 | sectionTitle = nameChild[0].childNodes[0].nodeValue
572 | else:
573 | print('??? This section has not title...')
574 | if sectionTitle != None:
575 | docxBody.appendChild(docxNewParagraph(sectionTitle, 'Heading' + str(headingDepth), unnumbered = unnumbered))
576 | sectionId = 0
577 | for child in elem.childNodes:
578 | if child.nodeType != Node.ELEMENT_NODE:
579 | continue
580 | if child.nodeName == 'section':
581 | sectionId = sectionId + 1
582 | # Should create a docx Child ???
583 | parseSection(child, headingDepth + 1)
584 | elif child.nodeName == 'abstract':
585 | parseAbstract(child)
586 | elif child.nodeName == 'area':
587 | parseArea(child)
588 | elif child.nodeName == 'artwork':
589 | parseArtWork(child)
590 | elif child.nodeName == 'author':
591 | parseAuthor(child)
592 | elif child.nodeName == 'blockquote':
593 | parseBlockQuote(child)
594 | elif child.nodeName == 'boilerplate':
595 | parseBoilerPlate(child)
596 | elif child.nodeName == 'date':
597 | parseDate(child)
598 | elif child.nodeName == 'dl':
599 | parseDList(child)
600 | elif child.nodeName == 'figure':
601 | parseFigure(child)
602 | elif child.nodeName == 'keyword':
603 | parseKeyword(child)
604 | elif child.nodeName == 'name': # Already processed
605 | continue
606 | elif child.nodeName == 'note':
607 | parseNote(child)
608 | elif child.nodeName == 'ol':
609 | parseOList(child)
610 | elif child.nodeName == 't':
611 | parseText(child, style = None)
612 | elif child.nodeName == 'seriesInfo':
613 | parseSeriesInfo(child)
614 | elif child.nodeName == 'texttable':
615 | parseTextTable(child)
616 | elif child.nodeName == 'title':
617 | parseTitle(child)
618 | elif child.nodeName == 'toc':
619 | print('Skipping the ToC')
620 | elif child.nodeName == 'ul':
621 | parseUList(child)
622 | elif child.nodeName == 'workgroup':
623 | parseWorkgroup(child)
624 | else:
625 | print('!!!!! Unexpected tag in parseSection: ' + child.tagName)
626 |
627 | # TODO handle wrongly formatted
628 | def parseSeriesInfo(elem):
629 | seriesInfoString = ''
630 | if elem.hasAttribute('name'):
631 | seriesInfoString = elem.getAttribute('name') + ' '
632 | if elem.hasAttribute('value'):
633 | seriesInfoString = seriesInfoString + elem.getAttribute('value') + ' '
634 | else:
635 | seriesInfoString = seriesInfoString
636 | if elem.hasAttribute('stream'):
637 | seriesInfoString = seriesInfoString + ' (stream: ' + elem.getAttribute('stream') + ')'
638 | if seriesInfoString != '':
639 | docxBody.appendChild(docxNewParagraph(seriesInfoString, justification = 'right'))
640 |
641 |
642 | def parseText(elem, style = None, numberingID = None, indentationLevel = None, Verbose = None): # See https://tools.ietf.org/html/rfc7991#section-2.53
643 | if Verbose:
644 | print("parseText start: ", elem)
645 | textValue = ''
646 | # Mainly for debugging
647 | for i in range(elem.attributes.length):
648 | attrib = elem.attributes.item(i)
649 | if attrib.name == 'hangText':
650 | textValue = attrib.value
651 | continue
652 | if attrib.name == 'pn': # Let's ignore this marking as no obvious requirement or support in Office OpenXML
653 | continue
654 | if attrib.name == 'indent': # TODO later if really required
655 | continue
656 | if attrib.name == 'keepWithNext': # TODO later if really required
657 | continue
658 | print("\tparseText unexpected attribute: ", attrib.name, '=' , attrib.value)
659 |
660 | for text in elem.childNodes:
661 | if text.nodeType == Node.TEXT_NODE:
662 | textValue += text.nodeValue
663 | if Verbose:
664 | print("parseText adding TEXT_NODE: '", text.nodeValue, "'")
665 | if elem.nodeType == Node.ELEMENT_NODE:
666 | if text.nodeName == 'bcp14':
667 | textValue = textValue + parseBcp14(text)
668 | elif text.nodeName == 'eref':
669 | textValue = textValue + parseEref(text)
670 | elif text.nodeName == 'figure':
671 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
672 | if p:
673 | docxBody.appendChild(p) # Need to emit the first part of the text
674 | textValue = ''
675 | parseFigure(text)
676 | elif text.nodeName == 'list':
677 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
678 | if p:
679 | docxBody.appendChild(p) # Need to emit the first part of the text
680 | textValue = ''
681 | parseList(text)
682 | elif text.nodeName == 'ol':
683 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
684 | if p:
685 | docxBody.appendChild(p) # Need to emit the first part of the text
686 | textValue = ''
687 | parseOList(text)
688 | elif text.nodeName == 't':
689 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
690 | if p:
691 | docxBody.appendChild(p) # Need to emit the first part of the text
692 | if Verbose:
693 | print("parseText found : emitting '", textValue, "'")
694 | textValue = ''
695 | parseText(text, style = style, numberingID = numberingID, indentationLevel = indentationLevel, Verbose = Verbose)
696 | elif text.nodeName == 'vspace':
697 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
698 | if p:
699 | docxBody.appendChild(p) # Need to emit the first part of the text
700 | # Now force an empty paragraph
701 | p = docxNewParagraph('', style = style, removeEmpty = False)
702 | if p:
703 | docxBody.appendChild(p)
704 | textValue = ''
705 | elif text.nodeName == 'ul':
706 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
707 | if p:
708 | docxBody.appendChild(p) # Need to emit the first part of the text
709 | textValue = ''
710 | parseUList(text)
711 | elif text.nodeName == 'xref':
712 | textValue = textValue + parseXref(text)
713 | elif text.nodeName != '#text' and text.nodeName != '#comment':
714 | print('!!!!! parseText: Text is ELEMENT_NODE: ', text.nodeName)
715 | p = docxNewParagraph(textValue, style = style, numberingID = numberingID, indentationLevel = indentationLevel)
716 | if p:
717 | docxBody.appendChild(p) # Need to emit the first part of the text
718 |
719 | def parseTextTable(elem):
720 | print('Skipping TextTable')
721 | docxBody.appendChild(docxNewParagraph('... a TextTable was not imported...', justification = 'center'))
722 |
723 | def parseTitle(elem):
724 | global rfcTitle
725 |
726 | textValue = ''
727 | for text in elem.childNodes:
728 | if text.nodeType == Node.TEXT_NODE:
729 | textValue += text.nodeValue
730 | docxBody.appendChild(docxNewParagraph(textValue, 'Title'))
731 | rfcTitle = textValue
732 |
733 | def parseUList(elem):
734 | for child in elem.childNodes:
735 | if child.nodeType != Node.ELEMENT_NODE:
736 | continue
737 | if child.nodeName == 'li':
738 | parseListItem(child, numberingID = '2', indentationLevel = '0') # numID = 2 is defined in numbering.xml as bullet list
739 | else:
740 | print('!!!! Unexpected List child: ', child.nodeName)
741 |
742 | def parseWorkgroup(elem):
743 | textValue = 'Workgroup: '
744 | for text in elem.childNodes:
745 | if text.nodeType == Node.TEXT_NODE:
746 | textValue += text.nodeValue
747 | if elem.nodeType == Node.ELEMENT_NODE:
748 | if text.nodeName != '#text':
749 | print('!!!!! parseKeyword: Text is ELEMENT_NODE: ', text.nodeName)
750 | docxBody.appendChild(docxNewParagraph(textValue))
751 |
752 | def parseXref(elem): # See also https://tools.ietf.org/html/rfc7991#section-2.66
753 | if elem.nodeValue != None:
754 | print('Xref nodeValue: ' , elem.nodeValue)
755 | if elem.hasAttribute('target'): # One and only mandatory attribute
756 | return '[' + elem.getAttribute('target') + ']'
757 | if elem.nodeType == Node.TEXT_NODE:
758 | print('Xref node is TEXT_NODE')
759 | # Only target attribute, so, quite useless to parse further for more attributes
760 | for child in elem.childNodes:
761 | if child.nodeType == Node.TEXT_NODE:
762 | return child.nodeValue
763 | print('!!!! parseXref, unexpected child.nodeName: ' + child.nodeName) # Only text is allowed
764 |
765 |
766 | def processXML(inFilename, outFilename = 'xml2docx.xml'):
767 | global xmldoc
768 | global docxRoot, docxBody, docxDocument
769 |
770 | if os.path.isfile(inFilename):
771 | xmldoc = minidom.parse(inFilename)
772 | else:
773 | try:
774 | response = urllib.request.urlopen('https://tools.ietf.org/id/' + inFilename + '.xml')
775 | except:
776 | print("Cannot fetch the XML document from the IETF site...")
777 | sys.exit(1)
778 | draftString = response.read()
779 | xmldoc = minidom.parseString(draftString)
780 | print("Fetching the draft from the IETF site...")
781 |
782 | rfc = xmldoc.getElementsByTagName('rfc')[0]
783 |
784 | front = rfc.getElementsByTagName('front')[0]
785 | middle = rfc.getElementsByTagName('middle')[0]
786 | back = rfc.getElementsByTagName('back')[0]
787 |
788 | domImplementation = xml.dom.getDOMImplementation()
789 | docxRoot = domImplementation.createDocument(None, None, None)
790 |
791 | docxDocument = docxRoot.createElement('w:document')
792 | docxDocument.setAttribute('xmlns:wpc', 'http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas') # To be repeated for all namespaces
793 | docxDocument.setAttribute('xmlns:cx', 'http://schemas.microsoft.com/office/drawing/2014/chartex')
794 | docxDocument.setAttribute('xmlns:cx1', 'http://schemas.microsoft.com/office/drawing/2015/9/8/chartex')
795 | docxDocument.setAttribute('xmlns:cx2', 'http://schemas.microsoft.com/office/drawing/2015/10/21/chartex')
796 | docxDocument.setAttribute('xmlns:cx3', 'http://schemas.microsoft.com/office/drawing/2016/5/9/chartex')
797 | docxDocument.setAttribute('xmlns:cx4', 'http://schemas.microsoft.com/office/drawing/2016/5/10/chartex')
798 | docxDocument.setAttribute('xmlns:cx5', 'http://schemas.microsoft.com/office/drawing/2016/5/11/chartex')
799 | docxDocument.setAttribute('xmlns:cx6', 'http://schemas.microsoft.com/office/drawing/2016/5/12/chartex')
800 | docxDocument.setAttribute('xmlns:cx7', 'http://schemas.microsoft.com/office/drawing/2016/5/13/chartex')
801 | docxDocument.setAttribute('xmlns:cx8', 'http://schemas.microsoft.com/office/drawing/2016/5/14/chartex')
802 | docxDocument.setAttribute('xmlns:mc', 'http://schemas.openxmlformats.org/markup-compatibility/2006')
803 | docxDocument.setAttribute('xmlns:aink', 'http://schemas.microsoft.com/office/drawing/2016/ink')
804 | docxDocument.setAttribute('xmlns:am3d', 'http://schemas.microsoft.com/office/drawing/2017/model3d')
805 | docxDocument.setAttribute('xmlns:o', 'urn:schemas-microsoft-com:office:office')
806 | docxDocument.setAttribute('xmlns:r', 'http://schemas.openxmlformats.org/officeDocument/2006/relationships')
807 | docxDocument.setAttribute('xmlns:m', 'http://schemas.openxmlformats.org/officeDocument/2006/math')
808 | docxDocument.setAttribute('xmlns:v', 'urn:schemas-microsoft-com:vml')
809 | docxDocument.setAttribute('xmlns:wp14', 'http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing')
810 | docxDocument.setAttribute('xmlns:wp', 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing')
811 | docxDocument.setAttribute('xmlns:w10', 'urn:schemas-microsoft-com:office:word')
812 | docxDocument.setAttribute('xmlns:w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main')
813 | docxDocument.setAttribute('xmlns:w14', 'http://schemas.microsoft.com/office/word/2010/wordml')
814 | docxDocument.setAttribute('xmlns:w15', 'http://schemas.microsoft.com/office/word/2012/wordml')
815 | docxDocument.setAttribute('xmlns:w16cex', 'http://schemas.microsoft.com/office/word/2018/wordml/cex')
816 | docxDocument.setAttribute('xmlns:w16cid', 'http://schemas.microsoft.com/office/word/2016/wordml/cid')
817 | docxDocument.setAttribute('xmlns:w16', 'http://schemas.microsoft.com/office/word/2018/wordml')
818 | docxDocument.setAttribute('xmlns:w16se', 'http://schemas.microsoft.com/office/word/2015/wordml/symex')
819 | docxDocument.setAttribute('xmlns:wpg', 'http://schemas.microsoft.com/office/word/2010/wordprocessingGroup')
820 | docxDocument.setAttribute('xmlns:wpi', 'http://schemas.microsoft.com/office/word/2010/wordprocessingInk')
821 | docxDocument.setAttribute('xmlns:wne', 'http://schemas.microsoft.com/office/word/2006/wordml')
822 | docxDocument.setAttribute('xmlns:wps', 'http://schemas.microsoft.com/office/word/2010/wordprocessingShape')
823 | docxDocument.setAttribute('mc:Ignorable', 'w14 w15 w16se w16cid w16 w16cex wp14')
824 | docxDocument.setAttribute('xmlns:w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main')
825 | docxRoot.appendChild(docxDocument)
826 |
827 | docxBody = docxRoot.createElement('w:body')
828 | docxDocument.appendChild(docxBody)
829 |
830 | parseRfc(rfc)
831 | parseSection(front, 0)
832 | parseSection(middle, 0)
833 | parseBack(back)
834 |
835 | sectPrElem = docxRoot.createElement('w:sectPr')
836 |
837 | pgSzElem = docxRoot.createElement('w:pgSz')
838 | pgSzElem.setAttribute('w:h', '15840')
839 | pgSzElem.setAttribute('w:w', '12240')
840 | sectPrElem.appendChild(pgSzElem)
841 |
842 | pgMarElem = docxRoot.createElement('w:pgMar')
843 | pgMarElem.setAttribute('w:gutter', '0')
844 | pgMarElem.setAttribute('w:footer', '708')
845 | pgMarElem.setAttribute('w:header', '708')
846 | pgMarElem.setAttribute('w:left', '1440')
847 | pgMarElem.setAttribute('w:bottom', '1400')
848 | pgMarElem.setAttribute('w:right', '1440')
849 | pgMarElem.setAttribute('w:top', '1440')
850 | sectPrElem.appendChild(pgMarElem)
851 |
852 |
853 | colsElem = docxRoot.createElement('w:cols')
854 | colsElem.setAttribute('w:space', '708')
855 | sectPrElem.appendChild(colsElem)
856 |
857 | docGrid = docxRoot.createElement('w:docGrid')
858 | docGrid.setAttribute('w:linePitch', '360')
859 | sectPrElem.appendChild(docGrid)
860 |
861 | docxBody.appendChild(sectPrElem)
862 |
863 | docxFile = io.open(outFilename, 'w', encoding="'utf8'")
864 | # Ugly but no other way to put attributes in the top XML
865 | docxFile.write(docxRoot.toprettyxml().replace('', ''))
866 | docxFile.close()
867 | print('OpenXML document.xml file is at', outFilename)
868 |
869 | def myParseDate(s):
870 | try:
871 | # Let's first try with short month names
872 | date = datetime.datetime.strptime(s,'%d %b %Y')
873 | except ValueError:
874 | # Then try with full length month names
875 | try:
876 | date = datetime.datetime.strptime(s,'%d %B %Y')
877 | except ValueError:
878 | date = datetime.datetime.utcnow() # Giving up...
879 | return date
880 |
881 | def generateDocPropsCore():
882 | xmlcore = minidom.parse(templateDirectory + '/docProps/core.xml')
883 |
884 | if len(rfcAuthors) > 0:
885 | creatorElem = xmlcore.getElementsByTagName('dc:creator')[0]
886 | for child in creatorElem.childNodes:
887 | creatorElem.removeChild(child)
888 | text = xmlcore.createTextNode(', '.join(rfcAuthors))
889 | creatorElem.appendChild(text)
890 | if rfcDate != None:
891 | createdElem = xmlcore.getElementsByTagName('dcterms:created')[0]
892 | for child in createdElem.childNodes:
893 | createdElem.removeChild(child)
894 | createdDate = myParseDate(rfcDate)
895 | text = xmlcore.createTextNode(createdDate.strftime('%Y-%m-%dT%H:%M:%SZ'))
896 | createdElem.appendChild(text)
897 | if len(rfcKeywords) > 0:
898 | keywordsElem = xmlcore.getElementsByTagName('cp:keywords')[0]
899 | for child in keywordsElem.childNodes:
900 | keywordsElem.removeChild(child)
901 | text = xmlcore.createTextNode(', '.join(rfcKeywords))
902 | keywordsElem.appendChild(text)
903 | if rfcTitle != None:
904 | titleElem = xmlcore.getElementsByTagName('dc:title')[0]
905 | for child in titleElem.childNodes:
906 | titleElem.removeChild(child)
907 | text = xmlcore.createTextNode(rfcTitle)
908 | titleElem.appendChild(text)
909 | # Now, let's say that this script did it ;-)
910 | modifiedByElem = xmlcore.getElementsByTagName('cp:lastModifiedBy')[0]
911 | for child in modifiedByElem.childNodes:
912 | modifiedByElem.removeChild(child)
913 | text = xmlcore.createTextNode('Xml2rfc')
914 | modifiedByElem.appendChild(text)
915 | modifiedElem = xmlcore.getElementsByTagName('dcterms:modified')[0]
916 | for child in modifiedElem.childNodes:
917 | modifiedElem.removeChild(child)
918 | now = datetime.datetime.utcnow()
919 | text = xmlcore.createTextNode(now.strftime('%Y-%m-%dT%H:%M:%SZ'))
920 | modifiedElem.appendChild(text)
921 |
922 | return xmlcore.toprettyxml().replace('', '')
923 |
924 | def docxPackage(docxFilename, openXML, templateDirectory):
925 | print('Generating OpenXML packaging file', docxFilename)
926 | print("\tUsing template in" + templateDirectory)
927 | coreXML = generateDocPropsCore()
928 | with zipfile.ZipFile(docxFilename, 'w', compression=zipfile.ZIP_DEFLATED) as docx:
929 | files = [ '[Content_Types].xml', '_rels/.rels', 'docProps/app.xml',
930 | # Should not move the output in template directory... 'word/document.xml',
931 | 'word/fontTable.xml', 'word/settings.xml', 'word/numbering.xml', 'word/webSettings.xml',
932 | 'word/styles.xml', 'word/theme/theme1.xml', 'word/_rels/document.xml.rels']
933 | for file in files:
934 | docx.write(templateDirectory + '/' + file, arcname = file)
935 | docx.write(openXML, arcname = 'word/document.xml')
936 | docx.writestr('docProps/core.xml', coreXML)
937 |
938 | if __name__ == '__main__':
939 | inFilename = None
940 | outFilename = None
941 | templateDirectory = None
942 | docxFilename = None
943 | try:
944 | opts, args = getopt.getopt(sys.argv[1:],"d:hi:o:t:",["ifile=","ofile=","template=", "docx="])
945 | except getopt.GetoptError:
946 | print('xml2docx.py -i -o ')
947 | sys.exit(2)
948 | for opt, arg in opts:
949 | if opt == '-h':
950 | print('xml2docx.py -i [-o ] [--docx ]')
951 | sys.exit()
952 | elif opt in ("-i", "--ifile"):
953 | inFilename = arg
954 | elif opt in ("-o", "--ofile"):
955 | outFilename = arg
956 | elif opt in ("-t", "--template"):
957 | templateDirectory = arg
958 | elif opt in ("-d", "--docx"):
959 | docxFilename = arg
960 | if templateDirectory == None:
961 | templateDirectory = os.path.dirname(os.path.abspath(sys.argv[0])) + '/template' # default template is in the executable directory
962 | if inFilename == None:
963 | print('Missing input filename')
964 | sys.exit(2)
965 | if outFilename == None:
966 | if docxFilename != None:
967 | outFilename = templateDirectory + '/word/document.xml'
968 | else:
969 | outFilename = 'xml2docx.xml'
970 | if docxFilename == None:
971 | if inFilename[-4:] == '.xml':
972 | docxFilename = inFilename.replace('.xml', '.docx')
973 | else:
974 | docxFilename = inFilename + '.docx'
975 |
976 | # Let's generate the openXML word processing 'document.xml' file
977 | processXML(inFilename, outFilename)
978 |
979 | # Now, let's generate the .DOCX file
980 | docxPackage(docxFilename, outFilename, templateDirectory)
981 |
--------------------------------------------------------------------------------