├── LICENSE
├── README.md
├── example.lua
├── example.xml
├── tag.sh
├── test.lua
├── xmllpegparser-2.2-0.rockspec
└── xmllpegparser.lua
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Jonathan Poelen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # xmllpegparser
2 |
3 | `xmllpegparser` is a fast XML parser who uses [`LPeg`](http://www.inf.puc-rio.br/~roberto/lpeg) library.
4 |
5 |
6 | 1. [Installation](#installation)
7 | 2. [Test](#test)
8 | 3. [xmllpegparser API](#xmllpegparser-api)
9 | 1. [Parsing](#parsing)
10 | 2. [Entity](#entity)
11 | 3. [Parser](#parser)
12 | 4. [Utility](#utility)
13 | 5. [Document structure (default parser)](#document-structure-default-parser)
14 | 6. [Parser structure](#parser-structure)
15 | 7. [Visitor structure](#visitor-structure)
16 | 8. [Default parser limitations](#default-parser-limitations)
17 | 5. [Licence](#licence)
18 |
19 |
20 |
21 | ## Installation
22 |
23 | ```bash
24 | luarocks install --local https://raw.githubusercontent.com/jonathanpoelen/lua-xmllpegparser/master/xmllpegparser-2.2-0.rockspec
25 |
26 | # or in your local directory lua-xmllpegparser
27 |
28 | luarocks make --local xmllpegparser-2.2-0.rockspec
29 | ```
30 |
31 | ## Test
32 |
33 | Run `./example.lua`.
34 |
35 | ```
36 | ./example.lua xmlfile [replaceentities]
37 | ```
38 |
39 | `replaceentities` = anything, only to enable replacement of entities.
40 |
41 |
42 | ## xmllpegparser API
43 |
44 | ### Parsing
45 |
46 | - `parse(xmlstring[, visitorOrsubEntities[, visitorInitArgs...]])`:\
47 | Returns a tuple `document table, (string error or nil)` (see `visitor.finish`).\
48 | If `subEntities` is `true`, the entities are replaced and a `tentity` member is added to the document `table`.
49 | - `parseFile(filename[, visitorOrsubEntities[, visitorInitArgs...]])`:\
50 | Returns a tuple `document table, error file or error document`.
51 |
52 | ### Entity
53 |
54 | - `defaultEntitiyTable()`:\
55 | Returns the default entity table (`{ quot='"', ... }`).
56 | - `createEntityTable(docEntities[, resultEntities])`:\
57 | Creates an entity table from the document entity table. Return `resultEntities`.
58 | - `mkReplaceEntities(entityTable_or_func)`:\
59 | Returns an LPeg expression that can replace entities
60 | - `replaceEntities(s, entityTable_or_func)`:\
61 | Returns a `string`.
62 |
63 | ### Parsers
64 |
65 | - `parser(visitor[, safeVisitor: bool])`:\
66 | Returns a parser.
67 | If all visitor functions return `nil` (excepted `accuattr`, `init` and `finish`), then `safeVisitor` may be `true` and the parser will optimize the visitor's calls.
68 | - `lazyParser(visitorCreator)`:\
69 | Returns a parser.\
70 | `parser(visitorCreator())` is used on the first call of `myparser.parse(...)`.
71 | - `mkVisitor(evalEntities: bool, defaultEntities: table | function | nil, withoutPosition)`:\
72 | If `not defaultEntities` and `evalEntities` then `defaultEntities = defaultEntityTable()`.\
73 | If `withoutPosition`, then `pos` parameter does not exist for the visitor functions except for `finish`.
74 | - `treeParser`:\
75 | The default parser used by `parse(str, false)`
76 | - `treeParserWithReplacedEntities`:\
77 | The default parser used by `parse(str, true)`
78 | - `treeParserWithoutPos`:\
79 | Parser without `pos` parameter
80 | - `treeParserWithoutPosWithReplacedEntities`:\
81 | Parser without `pos` parameter
82 |
83 | ### Global parser options
84 |
85 | - `enableWithoutPosParser([bool])`:\
86 | Enable default parser with `treeParserWithoutPos*` version.\
87 | `enableParserWithoutPos(false)` is same to `setDefaultParsers()`.\
88 | Returns the previous parsers.
89 | - `setDefaultParsers(parser, parserWithReplacedEntities | bool | nil)`:\
90 | If `parserWithReplacedEntities == true`, then `parserWithReplacedEntities = p`.\
91 | `nil` or `false` value restore the default parser.\
92 | Returns the previous parsers.
93 |
94 | ### Utility
95 |
96 | - `toString(doc: table, indentationText: nil | string, params: nil | table)`:\
97 | - `indentationText` corresponds to the text used at each indentation level. If `nil`, there is no formatting.
98 | - `params` is table with
99 | - `shortEmptyElements: bool = true`: empty tag are self-closed or not.
100 | - `stableAttributes: bool | function = true`: If `true`, attribute are sorted by name. If a function, it takes the attribute table and should return an iterator function that gives the attribute name and its value.
101 | - `inlineTextLengthMax: number = 9999999`: a node that contains only one text is formatted on one line. When the text exceeds this value, it is indented.
102 | - `escape: table`: table of `function(string):string`
103 | - `attr`: text in double quote
104 | - `text`: text node
105 | - `cdata`: text between ``
106 | - `comment`: text between ``
107 | - `escapeFunctions(escapeAmp: bool = false)`:\
108 | Utility function for `params.escape` parameter of `toString`
109 | - `escapeAmp`: escape `&` char in text and attribute
110 | - `escapeComment(string):string`: replace `--` with `—`
111 | - `escapeAttribute(string):string`: replace `<` with `<` and `"` with `"`
112 | - `escapeAttributeAndAmp(string):string`: like `escapeAttribute` + replace `&` with `&`
113 | - `escapeCDATA(string):string`: replace `]]>` with `]]>]]>`
158 | entity = function(pos, name, value),
159 | doctype = function(pos, name, ident, pubident, dtd), -- called after all entity()
160 | accuattr = function(table, name, value), -- `table` is an accumulator that will be transmitted to tag.attrs. Set to `false` for disable this function.
161 | -- If `nil` and `tag` is `not nil`, a default accumalator is used.
162 | -- If `false`, the accumulator is disabled.
163 | -- (`tag(pos, name, accuattr(accuattr({}, attr1, value1), attr2, value2)`)
164 | tag = function(pos, name, attrs), -- for a new tag (`` or ``)
165 | open = function(), -- only for a open node (`` not ``), called after `tag`.
166 | close = function(name),
167 | text = function(pos, text),
168 | cdata = function(pos, text), -- or `text` if nil
169 | comment = function(str)
170 | }
171 | ```
172 |
173 | ### Default parser limitations
174 |
175 | - Non-validating
176 | - No DTD support
177 | - Ignore processing instructions
178 |
179 |
180 | ## Licence
181 |
182 | [MIT license](LICENSE)
183 |
184 |
185 |
186 |
--------------------------------------------------------------------------------
/example.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 |
3 | function printelem(e, prefix)
4 | prefix = prefix or ''
5 | if e.tag then
6 | print(prefix .. '<' .. e.tag .. '>')
7 | prefix = ' ' .. prefix
8 | for name, value in pairs(e.attrs) do
9 | print(prefix .. '@' .. name .. ': ' .. value)
10 | end
11 | for i, child in pairs(e.children) do
12 | printelem(child, prefix)
13 | end
14 | else
15 | print(prefix .. '<> ' .. e.text)
16 | end
17 | end
18 |
19 | function printdoc(doc)
20 | print('Entities:')
21 | for i, e in pairs(doc.entities) do
22 | print(' ' .. e.name .. ': ' .. e.value)
23 | end
24 | print('Data:')
25 | for i, child in pairs(doc.children) do
26 | printelem(child, ' ')
27 | end
28 | end
29 |
30 | local parseFile = require('xmllpegparser').parseFile
31 | local filename = arg[1] and #arg[1] > 0 and arg[1] or 'example.xml'
32 | local replaceEntities = arg[2] and #arg[2] > 0
33 |
34 | local doc, err = parseFile(filename, replaceEntities)
35 |
36 | printdoc(doc)
37 | if err then
38 | io.stderr:write(err .. '\n')
39 | end
40 |
--------------------------------------------------------------------------------
/example.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | ]>
7 |
8 | something
9 | blah blah
10 |
11 |
12 |
13 | something
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if (( $# < 3 )) ; then
6 | echo "$0 major minor revision" >&2
7 | exit 1
8 | fi
9 |
10 | oldfile=(*.rockspec)
11 | oldfile=${oldfile[0]}
12 | name=${oldfile::-9}
13 | lib=${name/-*}
14 | old_rock_vers=${name#*-}
15 | new_rock_vers="$1.$2-$3"
16 | new_std_vers="$1.$2.$3"
17 | newfile="$lib-$new_rock_vers.rockspec"
18 |
19 | sed -i "s/$old_rock_vers/$new_rock_vers/;s/${old_rock_vers/-/\\.}/$new_std_vers/" "$oldfile"
20 | sed -i "s/${oldfile//./\\.}/$newfile/" README.md
21 | mv "$oldfile" "$newfile"
22 |
23 | git add "$oldfile" "$newfile" README.md
24 | git commit -vm "update version to $new_std_vers"
25 | git tag "v$new_std_vers"
26 | git push --tags
27 | git push
28 |
--------------------------------------------------------------------------------
/test.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 |
3 | print_or_error = (#arg ~= 0 and error or print)
4 |
5 | function str(t, prefix)
6 | if not t then
7 | return ''
8 | end
9 | local orderedIndex = {}
10 | for i in pairs(t) do
11 | table.insert(orderedIndex, i)
12 | end
13 | table.sort(orderedIndex)
14 |
15 | local s, e = '{\n'
16 | for _, i in pairs(orderedIndex) do
17 | e = t[i]
18 | if type(e) == 'table' then
19 | if i ~= 'parent' then
20 | e = next(e) and str(e, prefix .. ' ') or '{}'
21 | else
22 | e = e.tag
23 | end
24 | end
25 | s = s .. prefix .. ' ' .. i .. ': ' .. tostring(e) .. ',\n'
26 | end
27 | return s .. prefix .. '}'
28 | end
29 |
30 | r = 0
31 |
32 | function printError(a, b, input, err, ierr)
33 | -- compute the position where the difference begins
34 | local idiffer = 0
35 | for i=1,math.min(#a, #b) + 1 do
36 | if a:byte(i) ~= b:byte(i) then
37 | idiffer = i - 1
38 | break
39 | end
40 | end
41 |
42 | print_or_error('[FAILURE]\n '
43 | .. a:sub(1, idiffer) .. '\x1b[31m' .. a:sub(idiffer+1) .. '\x1b[m'
44 | .. '\n ==\n '
45 | .. b:sub(1, idiffer) .. '\x1b[31m' .. b:sub(idiffer+1) .. '\x1b[m'
46 | .. '\n\n with ' .. input
47 | )
48 |
49 | if err then print(' ' .. err .. '/' .. ierr) end
50 | print()
51 |
52 | r = r + 1
53 | end
54 |
55 | function check(tdoc, err, s, input, ierr, resultError)
56 | local doc = str(tdoc, ' ')
57 | if resultError ~= err or s ~= doc then
58 | printError(s, doc, input, err, ierr)
59 | end
60 | end
61 |
62 | function _eq(parser, s, sxml, replaceEntities, resultError)
63 | local tdoc, err = parser.parse(sxml, replaceEntities)
64 | check(tdoc, err, s, sxml, #sxml, resultError)
65 | end
66 |
67 | function _feq(parser, s, filename)
68 | local tdoc, err = parser.parseFile(filename)
69 | check(tdoc, err, s, 'file ' .. filename, '???', resultError)
70 | end
71 |
72 | function _nopos(s)
73 | return s:gsub('\n%s+pos: %d+,', ''):gsub(' at position %d+', '')
74 | end
75 |
76 | function mkEq(eq)
77 | return function(s, filename_or_sxml, replaceEntities, resultError)
78 | eq(xmllpegparser, s, filename_or_sxml, replaceEntities, resultError)
79 | xmllpegparser.enableWithoutPosParser()
80 | eq(xmllpegparser, _nopos(s), filename_or_sxml, replaceEntities, resultError and _nopos(resultError))
81 | xmllpegparser.enableWithoutPosParser(false)
82 | end
83 | end
84 |
85 | local eq, feq = mkEq(_eq), mkEq(_feq)
86 |
87 |
88 | xmllpegparser = require'xmllpegparser'
89 |
90 | -- empty file
91 | eq([[{
92 | children: {},
93 | entities: {},
94 | lastpos: 1,
95 | preprocessor: {},
96 | }]],
97 | '')
98 |
99 | -- comment
100 | eq([[{
101 | children: {},
102 | entities: {},
103 | lastpos: 17,
104 | preprocessor: {},
105 | }]],
106 | '')
107 |
108 | -- single text
109 | eq([[{
110 | children: {
111 | 1: {
112 | parent: nil,
113 | pos: 1,
114 | text: abc,
115 | },
116 | },
117 | entities: {},
118 | lastpos: 4,
119 | preprocessor: {},
120 | }]],
121 | 'abc')
122 |
123 | -- single inline tag
124 | eq([[{
125 | children: {
126 | 1: {
127 | attrs: {},
128 | children: {},
129 | parent: nil,
130 | pos: 1,
131 | tag: a,
132 | },
133 | },
134 | entities: {},
135 | lastpos: 5,
136 | preprocessor: {},
137 | }]],
138 | '')
139 |
140 | -- single tag
141 | eq([[{
142 | children: {
143 | 1: {
144 | attrs: {},
145 | children: {
146 | 1: {
147 | parent: a,
148 | pos: 5,
149 | text: b,
150 | },
151 | },
152 | parent: nil,
153 | pos: 1,
154 | tag: a,
155 | },
156 | },
157 | entities: {},
158 | lastpos: 11,
159 | preprocessor: {},
160 | }]],
161 | ' b ')
162 |
163 | -- CDATA
164 | eq([[{
165 | children: {
166 | 1: {
167 | cdata: true,
168 | parent: nil,
169 | pos: 1,
170 | text: xy & ,
171 | },
172 | },
173 | entities: {},
174 | lastpos: 23,
175 | preprocessor: {},
176 | }]],
177 | '')
178 |
179 | -- CDATA with entity replacement
180 | eq([[{
181 | children: {
182 | 1: {
183 | cdata: true,
184 | parent: nil,
185 | pos: 1,
186 | text: xy & ,
187 | },
188 | },
189 | entities: {},
190 | lastpos: 23,
191 | preprocessor: {},
192 | }]],
193 | '', true)
194 |
195 | eq([[{
196 | children: {},
197 | doctype: {
198 | name: language,
199 | pos: 1,
200 | },
201 | entities: {},
202 | lastpos: 20,
203 | preprocessor: {},
204 | }]],
205 | '')
206 |
207 | eq([[{
208 | children: {},
209 | doctype: {
210 | name: language,
211 | pos: 2,
212 | },
213 | entities: {},
214 | lastpos: 23,
215 | preprocessor: {},
216 | }]],
217 | ' ')
218 |
219 | eq([[{
220 | children: {},
221 | doctype: {
222 | name: language,
223 | pos: 1,
224 | },
225 | entities: {},
226 | lastpos: 23,
227 | preprocessor: {},
228 | }]],
229 | '')
230 |
231 | eq([[{
232 | children: {},
233 | doctype: {
234 | dtd: language.dtd,
235 | ident: SYSTEM,
236 | name: language,
237 | pos: 1,
238 | },
239 | entities: {},
240 | lastpos: 42,
241 | preprocessor: {},
242 | }]],
243 | '')
244 |
245 | eq([[{
246 | children: {},
247 | doctype: {
248 | dtd: language.dtd,
249 | ident: PUBLIC,
250 | name: language,
251 | pos: 1,
252 | pubident: /quotedFPI/,
253 | },
254 | entities: {},
255 | lastpos: 56,
256 | preprocessor: {},
257 | }]],
258 | '')
259 |
260 | eq([[{
261 | children: {},
262 | doctype: {
263 | dtd: language.dtd,
264 | ident: SYSTEM,
265 | name: language,
266 | pos: 1,
267 | },
268 | entities: {},
269 | lastpos: 44,
270 | preprocessor: {},
271 | }]],
272 | '')
273 |
274 | eq([[{
275 | children: {},
276 | doctype: {
277 | dtd: language.dtd,
278 | ident: PUBLIC,
279 | name: language,
280 | pos: 1,
281 | pubident: blabla,
282 | },
283 | entities: {},
284 | lastpos: 53,
285 | preprocessor: {},
286 | }]],
287 | '')
288 |
289 | eq([[{
290 | children: {
291 | 1: {
292 | attrs: {},
293 | children: {},
294 | parent: nil,
295 | pos: 1,
296 | tag: a,
297 | },
298 | 2: {
299 | attrs: {},
300 | children: {
301 | 1: {
302 | parent: b,
303 | pos: 11,
304 | text: ad,
305 | },
306 | },
307 | parent: nil,
308 | pos: 8,
309 | tag: b,
310 | },
311 | 3: {
312 | attrs: {},
313 | children: {},
314 | parent: nil,
315 | pos: 17,
316 | tag: c,
317 | },
318 | 4: {
319 | attrs: {},
320 | children: {
321 | 1: {
322 | attrs: {},
323 | children: {
324 | 1: {
325 | parent: e,
326 | pos: 27,
327 | text: ds,
328 | },
329 | },
330 | parent: d,
331 | pos: 24,
332 | tag: e,
333 | },
334 | },
335 | parent: nil,
336 | pos: 21,
337 | tag: d,
338 | },
339 | 5: {
340 | attrs: {},
341 | children: {
342 | 1: {
343 | parent: f,
344 | pos: 40,
345 | text: a,
346 | },
347 | 2: {
348 | attrs: {},
349 | children: {},
350 | parent: f,
351 | pos: 41,
352 | tag: g,
353 | },
354 | 3: {
355 | parent: f,
356 | pos: 45,
357 | text: b,
358 | },
359 | },
360 | parent: nil,
361 | pos: 37,
362 | tag: f,
363 | },
364 | },
365 | entities: {},
366 | lastpos: 50,
367 | preprocessor: {},
368 | }]],
369 | 'addsab')
370 |
371 | eq([[{
372 | children: {
373 | 1: {
374 | attrs: {
375 | name: value,
376 | },
377 | children: {},
378 | parent: nil,
379 | pos: 1,
380 | tag: a,
381 | },
382 | 2: {
383 | attrs: {
384 | name: value,
385 | },
386 | children: {},
387 | parent: nil,
388 | pos: 18,
389 | tag: b,
390 | },
391 | 3: {
392 | attrs: {
393 | name: value,
394 | },
395 | children: {},
396 | parent: nil,
397 | pos: 41,
398 | tag: c,
399 | },
400 | 4: {
401 | attrs: {
402 | name: value,
403 | name2: value2,
404 | },
405 | children: {},
406 | parent: nil,
407 | pos: 60,
408 | tag: d,
409 | },
410 | },
411 | entities: {},
412 | lastpos: 93,
413 | preprocessor: {},
414 | }]],
415 | '')
416 |
417 | eq([[{
418 | children: {
419 | 1: {
420 | attrs: {
421 | name: v>a,
422 | },
423 | children: {},
424 | parent: nil,
425 | pos: 1,
426 | tag: a,
427 | },
428 | 2: {
429 | parent: nil,
430 | pos: 16,
431 | text: > b,
432 | },
433 | 3: {
434 | attrs: {
435 | name: >,
436 | },
437 | children: {
438 | 1: {
439 | parent: c,
440 | pos: 31,
441 | text: d,
442 | },
443 | },
444 | parent: nil,
445 | pos: 19,
446 | tag: c,
447 | },
448 | 4: {
449 | attrs: {
450 | name: a,
451 | },
452 | children: {
453 | 1: {
454 | parent: e,
455 | pos: 48,
456 | text: >f,
457 | },
458 | },
459 | parent: nil,
460 | pos: 36,
461 | tag: e,
462 | },
463 | },
464 | entities: {},
465 | lastpos: 54,
466 | preprocessor: {},
467 | }]],
468 | '> bd>f')
469 |
470 | -- entity without replacement
471 | eq([[{
472 | children: {
473 | 1: {
474 | attrs: {},
475 | children: {
476 | 1: {
477 | parent: a,
478 | pos: 75,
479 | text: b,
480 | },
481 | },
482 | parent: nil,
483 | pos: 72,
484 | tag: a,
485 | },
486 | },
487 | doctype: {
488 | dtd: l.dtd,
489 | ident: SYSTEM,
490 | name: l,
491 | pos: 1,
492 | },
493 | entities: {
494 | 1: {
495 | name: e1,
496 | pos: 29,
497 | value: fdd>d,
498 | },
499 | 2: {
500 | name: e2,
501 | pos: 53,
502 | value: a,
503 | },
504 | },
505 | lastpos: 80,
506 | preprocessor: {},
507 | }]],
508 | 'd"> ]>b')
509 |
510 | -- entity with replacement
511 | eq([[{
512 | children: {
513 | 1: {
514 | attrs: {},
515 | children: {
516 | 1: {
517 | parent: a,
518 | pos: 75,
519 | text: fdd>ddsa;,
520 | },
521 | },
522 | parent: nil,
523 | pos: 72,
524 | tag: a,
525 | },
526 | },
527 | doctype: {
528 | dtd: l.dtd,
529 | ident: SYSTEM,
530 | name: l,
531 | pos: 1,
532 | },
533 | entities: {
534 | 1: {
535 | name: e1,
536 | pos: 29,
537 | value: fdd>d,
538 | },
539 | 2: {
540 | name: e2,
541 | pos: 53,
542 | value: a,
543 | },
544 | },
545 | lastpos: 90,
546 | preprocessor: {},
547 | tentities: {
548 | amp: &,
549 | apos: ',
550 | e1: fdd>d,
551 | e2: a,
552 | gt: >,
553 | lt: <,
554 | nbsp: ,
555 | quot: ",
556 | tab: ]] .. '\t' .. [[,
557 | },
558 | }]],
559 | 'd"> ]>&e1;ds&e2;;', true)
560 |
561 | -- missing closing tag
562 | eq([[{
563 | children: {
564 | 1: {
565 | attrs: {},
566 | children: {
567 | 1: {
568 | parent: AA,
569 | pos: 6,
570 | text: b,
571 | },
572 | },
573 | parent: nil,
574 | pos: 1,
575 | tag: AA,
576 | },
577 | },
578 | entities: {},
579 | error: No matching closing tag for AA at position 1,
580 | lastpos: 7,
581 | preprocessor: {},
582 | }]],
583 | ' b', false, 'No matching closing tag for AA at position 1')
584 |
585 | -- closing tag does not match
586 | eq([[{
587 | bad: {
588 | children: {
589 | 1: {
590 | children: {},
591 | pos: 8,
592 | tag: BB,
593 | },
594 | },
595 | },
596 | children: {
597 | 1: {
598 | attrs: {},
599 | children: {
600 | 1: {
601 | parent: AA,
602 | pos: 6,
603 | text: b,
604 | },
605 | },
606 | parent: nil,
607 | pos: 1,
608 | tag: AA,
609 | },
610 | },
611 | entities: {},
612 | error: No matching opening tag for BB at position 8,
613 | lastpos: 13,
614 | preprocessor: {},
615 | }]],
616 | ' b ', false, 'No matching opening tag for BB at position 8')
617 |
618 | -- closing tag only
619 | eq([[{
620 | bad: {
621 | children: {
622 | 1: {
623 | children: {},
624 | pos: 1,
625 | tag: BB,
626 | },
627 | },
628 | },
629 | children: {},
630 | entities: {},
631 | error: No matching opening tag for BB at position 1,
632 | lastpos: 6,
633 | preprocessor: {},
634 | }]],
635 | '', false, 'No matching opening tag for BB at position 1')
636 |
637 | -- closing tag then tag
638 | eq([[{
639 | bad: {
640 | children: {
641 | 1: {
642 | children: {},
643 | pos: 1,
644 | tag: BB,
645 | },
646 | 2: {
647 | attrs: {},
648 | children: {},
649 | parent: nil,
650 | pos: 6,
651 | tag: a,
652 | },
653 | },
654 | },
655 | children: {},
656 | entities: {},
657 | error: No matching opening tag for BB at position 1,
658 | lastpos: 13,
659 | preprocessor: {},
660 | }]],
661 | '', false, 'No matching opening tag for BB at position 1')
662 |
663 | -- too many closing tag
664 | eq([[{
665 | bad: {
666 | children: {
667 | 1: {
668 | children: {},
669 | pos: 12,
670 | tag: BB,
671 | },
672 | },
673 | },
674 | children: {
675 | 1: {
676 | attrs: {},
677 | children: {
678 | 1: {
679 | parent: a,
680 | pos: 5,
681 | text: b,
682 | },
683 | },
684 | parent: nil,
685 | pos: 1,
686 | tag: a,
687 | },
688 | },
689 | entities: {},
690 | error: No matching opening tag for BB at position 12,
691 | lastpos: 17,
692 | preprocessor: {},
693 | }]],
694 | ' b ', false, 'No matching opening tag for BB at position 12')
695 |
696 | feq([[{
697 | children: {
698 | 1: {
699 | attrs: {},
700 | children: {
701 | 1: {
702 | attrs: {
703 | attribute: &entity1;,
704 | },
705 | children: {
706 | 1: {
707 | parent: lvl1,
708 | pos: 185,
709 | text: something,
710 | },
711 | },
712 | parent: xml,
713 | pos: 157,
714 | tag: lvl1,
715 | },
716 | 2: {
717 | parent: xml,
718 | pos: 204,
719 | text: blah blah,
720 | },
721 | 3: {
722 | attrs: {
723 | attribute: value,
724 | },
725 | children: {},
726 | parent: xml,
727 | pos: 216,
728 | tag: lvl1,
729 | },
730 | 4: {
731 | attrs: {},
732 | children: {
733 | 1: {
734 | attrs: {},
735 | children: {
736 | 1: {
737 | parent: lvl2,
738 | pos: 275,
739 | text: something,
740 | },
741 | },
742 | parent: other,
743 | pos: 262,
744 | tag: lvl2,
745 | },
746 | },
747 | parent: xml,
748 | pos: 250,
749 | tag: other,
750 | },
751 | },
752 | parent: nil,
753 | pos: 149,
754 | tag: xml,
755 | },
756 | },
757 | doctype: {
758 | dtd: something.dtd,
759 | ident: SYSTEM,
760 | name: something,
761 | pos: 40,
762 | },
763 | entities: {
764 | 1: {
765 | name: entity1,
766 | pos: 88,
767 | value: something,
768 | },
769 | 2: {
770 | name: entity2,
771 | pos: 121,
772 | value: test,
773 | },
774 | },
775 | lastpos: 315,
776 | preprocessor: {
777 | 1: {
778 | attrs: {
779 | encoding: UTF-8,
780 | version: 1.0,
781 | },
782 | pos: 1,
783 | tag: xml,
784 | },
785 | },
786 | }]],
787 | 'example.xml')
788 |
789 |
790 | tags={}
791 | parser = xmllpegparser.parser{
792 | tag=function(name)
793 | tags[#tags+1] = name
794 | return 'dummy' -- must not influence the result
795 | end,
796 | finish=function(err, pos)
797 | return {tagnames=tags, err=err, pos=pos}
798 | end
799 | }
800 | _eq(parser, [[{
801 | pos: 12,
802 | tagnames: {
803 | 1: a,
804 | 2: b,
805 | },
806 | }]],
807 | ''
808 | )
809 |
810 |
811 | mkParser = function(...)
812 | local v = xmllpegparser.mkVisitor(...)
813 | local f = v.finish
814 | v.finish = function(...)
815 | local doc, err = f(...)
816 | return (doc and doc.children[1] or {}), err
817 | end
818 | return xmllpegparser.parser(v)
819 | end
820 |
821 | parser1 = mkParser(true, {x='xxx'})
822 | parser2 = mkParser(true, {x='xxx'}, true)
823 |
824 | function peq(s, sxml)
825 | _eq(parser1, s, sxml)
826 | _eq(parser2, _nopos(s), sxml)
827 | end
828 |
829 | peq([[{
830 | attrs: {},
831 | children: {
832 | 1: {
833 | parent: x,
834 | pos: 4,
835 | text: xxx/&y;,
836 | },
837 | },
838 | parent: nil,
839 | pos: 1,
840 | tag: x,
841 | }]],
842 | '&x;/&y;')
843 |
844 | peq([[{
845 | attrs: {},
846 | children: {
847 | 1: {
848 | parent: x,
849 | pos: 51,
850 | text: xxx/yyy,
851 | },
852 | },
853 | parent: nil,
854 | pos: 48,
855 | tag: x,
856 | }]],
857 | ']>&x;/&y;')
858 |
859 | peq([[{
860 | attrs: {},
861 | children: {},
862 | parent: nil,
863 | pos: 20,
864 | tag: a,
865 | }]],
866 | '')
867 |
868 | peq([[{
869 | attrs: {},
870 | children: {},
871 | parent: nil,
872 | pos: 22,
873 | tag: a,
874 | }]],
875 | '')
876 |
877 | peq([[{
878 | attrs: {},
879 | children: {},
880 | parent: nil,
881 | pos: 39,
882 | tag: a,
883 | }]],
884 | ']>')
885 |
886 |
887 | tdoc = xmllpegparser.parse([=[
888 |
889 |
892 |
893 | ]>
894 |
895 | something
896 | something bla bla bla &entity2;
897 | blah blah
898 |
899 |
900 |
901 |
902 | something
903 |
904 |
905 |
906 | ]=])
907 |
908 | function checkToString(tdoc, s, ...)
909 | local s2 = xmllpegparser.tostring(tdoc, ...)
910 | if s ~= s2 then
911 | printError(s:gsub('\n', '\n '), s2:gsub('\n', '\n '), 'TODO')
912 | end
913 | end
914 |
915 | sxml1 = '' ..
916 | 'something' ..
917 | 'something bla bla bla &entity2;' ..
918 | 'blah blah' ..
919 | '' ..
920 | '' ..
921 | '' ..
922 | 'something' ..
923 | '' ..
924 | ''
925 | checkToString({children=tdoc.children}, sxml1)
926 |
927 | checkToString(tdoc,
928 | '' ..
929 | '' ..
931 | '' ..
932 | ']>' ..
933 | sxml1)
934 |
935 | checkToString(tdoc, [=[
936 |
937 |
939 |
940 | ]>
941 |
942 | something
943 | something bla bla bla &entity2;
944 | blah blah
945 |
946 |
947 |
948 | something
949 |
950 | ]=], ' ')
951 |
952 | checkToString(tdoc, [=[
953 |
954 |
956 | ..
957 | ]>
958 |
959 | ..something
960 | ..
961 | ....something bla bla bla &entity2;
962 | ..
963 | ..blah blah
964 | ..
965 | ..
966 | ..
967 | ....something
968 | ..
969 | ]=],
970 | '..', {
971 | inlineTextLengthMax = 10,
972 | shortEmptyElements = false,
973 | escapes = xmllpegparser.escapeFunctions(true),
974 | })
975 |
976 |
977 | if 0 == r then
978 | print('No error')
979 | end
980 | os.exit(r)
981 |
--------------------------------------------------------------------------------
/xmllpegparser-2.2-0.rockspec:
--------------------------------------------------------------------------------
1 | package = "xmllpegparser"
2 | version = "2.2-0"
3 | source = {
4 | url = "git://github.com/jonathanpoelen/lua-xmllpegparser",
5 | tag = "v2.2.0"
6 | }
7 | description = {
8 | summary = "Fast XML Parser written with LPeg.",
9 | detailed = [[
10 | Enables parsing a XML file and converting it to a Lua table,
11 | which can be handled directly by your application.
12 | ]],
13 | homepage = "https://github.com/jonathanpoelen/lua-xmllpegparser",
14 | license = "MIT"
15 | }
16 | dependencies = {
17 | "lua >= 5.1",
18 | "lpeg >= 1.0"
19 | }
20 | build = {
21 | type = "builtin",
22 | modules = {
23 | xmllpegparser = "xmllpegparser.lua"
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/xmllpegparser.lua:
--------------------------------------------------------------------------------
1 | -- from https://github.com/jonathanpoelen/lua-xmllpegparser
2 |
3 | local lpeg = require'lpeg'
4 | local lpeg1_0 = type(lpeg.version) == 'function' -- version is a string since 1.1
5 | local S = lpeg.S
6 | local C = lpeg.C
7 | local R = lpeg.R
8 | local Ct = lpeg.Ct
9 | local Cg = lpeg.Cg
10 | local Cs = lpeg.Cs
11 | local P = lpeg.P
12 | local I = lpeg.Cp()
13 | local Cc = lpeg.Cc
14 | local Ce = Cc()
15 |
16 | local Space = S' \n\t'
17 | local Space0 = Space^0
18 | local Space1 = Space^1
19 | local String = (S"'" * (1-S"'")^0 * S"'") + (S'"' * (1-S'"')^0 * S'"')
20 | local CString = (S"'" * C((1-S"'")^0) * S"'") + (S'"' * C((1-S'"')^0) * S'"')
21 | local Name = ((R('az','AZ') + S'_') * (R('az','AZ') + S'_-:' + R'09')^0)
22 | local CName = C(Name)
23 | local Attr = ( Name * Space0 * '=' * Space0 * String )
24 | local CAttr = Cg(CName * Space0 * '=' * Space0 * CString)
25 | local XMLComment = '')^0 * '-->'
26 | local CXMLComment = '')^0 * '-->'
27 | local Entity = ('')
28 | local CEntity = Cg('')
29 |
30 | local noop = function()end
31 |
32 | local mt = {__call = function(_, ...) return _.parse(...) end}
33 |
34 | local addI = function(x) return I * x end
35 | local identityFn = function(x) return x end
36 |
37 | -- force a no captured value with a query function
38 | local unsafeCall = function(patt, func)
39 | return patt / function(...) func(...) end
40 | end
41 |
42 | local safeCall = function(patt, func)
43 | return patt / func
44 | end
45 |
46 | local _parser = function(v, safeVisitor)
47 | local call = safeVisitor == true and safeCall or unsafeCall
48 | local mark = (v.withpos and addI or identityFn)
49 |
50 | local Comment = v.comment and call(CXMLComment, v.comment) or XMLComment
51 | local Comments = Space0 * (Comment * Space0)^0
52 |
53 | local hasAttr = v.accuattr or (v.accuattr ~= false and (v.tag or v.proc))
54 | local CAttrs = hasAttr and (
55 | lpeg1_0 -- Cf is deprecated in 1.1
56 | and lpeg.Cf(Ct'' * (Space1 * CAttr)^0, v.accuattr or rawset) * Space0
57 | or Ct'' * (Space1 * CAttr % (v.accuattr or rawset))^0 * Space0
58 | )
59 | local Attrs = (Space1 * Attr)^0 * Space0
60 | local ProcAttrs = (v.accuattr or (hasAttr and v.proc)) and CAttrs or Attrs
61 | local TagAttrs = (v.accuattr or (hasAttr and v.tag )) and CAttrs or Attrs
62 |
63 | local Preproc = v.proc and
64 | (Comments * call(mark('') * CName * ProcAttrs * '?>', v.proc))^0 or
65 | (Comments * '' * Name * ProcAttrs * '?>' )^0
66 |
67 | local Entities = v.entity and
68 | (Comments * call(Cg(mark(CEntity)), v.entity))^0 or
69 | (Comments * Entity )^0
70 |
71 | local DoctypeEnt = Space0 * (P'>' + '[' * Entities * Comments * ']' * Space0 * '>')
72 | local Doctype = v.doctype and
73 | Comments * (call(mark('' * call(Ce, v.open) + '/>' or
88 | P'>' + '/>'
89 |
90 | local Close = v.close and
91 | '' * call(mark(CName), v.close) * Space0 * '>' or
92 | '' * Name * Space0 * '>'
93 |
94 | local Text = v.text and
95 | call(mark(C((Space0 * (1-S" \n\t<")^1)^1)), v.text) or
96 | ((Space0 * (1-S" \n\t<")^1)^1)
97 |
98 | local Cdata = (v.cdata or v.text) and
99 | '')^0) * ']]>'), v.cdata or v.text) or
100 | '')^0) * ']]>'
101 |
102 | local G = Preproc * Doctype * (Space0 * (Tag * Open + Close + Comment + Cdata + Text))^0 * Space0 * I
103 |
104 | local init, finish = (v.init or noop), (v.finish or noop)
105 |
106 | return function(s, ...)
107 | local err
108 | local pos = init(...)
109 | pos = G:match(s, pos)
110 | if #s >= pos then
111 | err = 'parse error at position ' .. tostring(pos)
112 | end
113 |
114 | local doc, verr = finish(err, pos, s)
115 | return doc, (verr == nil and err or verr)
116 | end
117 | end
118 |
119 |
120 | local mkparser = function(pf)
121 | local p
122 | p = setmetatable({
123 | parse = pf,
124 | parseFile = function(filename, ...)
125 | local f, err = io.open(filename)
126 | if f then
127 | local content = f:read'*a'
128 | f:close()
129 | return p.parse(content, ...), nil
130 | end
131 | return f, err
132 | end,
133 | }, mt)
134 | return p
135 | end
136 |
137 | --! Create a parser.
138 | --! @param[in] visitor table : see mkVisitor()
139 | --! @param[in] safeVisitor boolean : when true, optimizes the parser.
140 | --! Should only be used if all visitor functions (except init and finish) return nil
141 | --! @return Parser
142 | --! @code
143 | --! @code
144 | --! -- all values are optional
145 | --! visitor = {
146 | --! withpos = boolean, -- indicates if pos parameter exists in function parameter (except `finish`)
147 | --! init = function(...), -- called before parsing, returns the position of the beginning of match or nil
148 | --! finish = function(err, pos, xmlstring), -- called after parsing, returns (doc, err) or nil
149 | --! proc = function(pos, name, attrs) or function(name, attrs), -- for `...?>`
150 | --! entity = function(entityName, entityValue),
151 | --! doctype = function(pos, name, ident, pubident, dtd) or function(name, ident, pubident, dtd), -- called after all entity()
152 | --! accuattr = function(table, entityName, entityValue),
153 | --! -- `table` is an accumulator that will be transmitted to tag.attrs.
154 | --! -- Set to `false` for disable this function.
155 | --! -- If `nil` and `tag` is `not nil`, a default accumalator is used.
156 | --! -- If `false`, the accumulator is disabled.
157 | --! -- (`tag(pos, name, accuattr(accuattr({}, attr1, value1), attr2, value2)`)
158 | --! tag = function(name, attrs), -- for a new tag (`` or ``)
159 | --! open = function(), -- only for a open node (`` not ``), called after `tag`.
160 | --! close = function(name),
161 | --! text = function(text),
162 | --! cdata = function(text), -- or `text` if nil
163 | --! comment = function(str),
164 | --! }
165 | --!
166 | --! parser = {
167 | --! __call = --[[call parse]]
168 | --! parse = function(str, --[[visitorInitArgs]]...),
169 | --! parseFile = function(filename, --[[visitorInitArgs]]...),
170 | --! }
171 | --! @endcode
172 | local function parser(visitor, safeVisitor)
173 | return mkparser(_parser(visitor, safeVisitor))
174 | end
175 |
176 | --! Returns the default entity table.
177 | --! @return table
178 | local function defaultEntityTable()
179 | return { quot='"', apos='\'', lt='<', gt='>', amp='&', tab='\t', nbsp=' ', }
180 | end
181 |
182 | local DeclEntity = P'&' * C((1-P';')^1) * P';'
183 |
184 | --! Returns an LPeg expression that can replace entities.
185 | --! @code
186 | --! p = mkReplaceEntities(defaultEntityTable())
187 | --! str = 'a & b'
188 | --! str = p:match(str)
189 | --! assert(str == 'a & b')
190 | --! @endcode
191 | local function mkReplaceEntities(repl)
192 | return Cs((DeclEntity / repl + 1)^0)
193 | end
194 |
195 | --! @param[in] s string
196 | --! @param[in] entities table : with entity name as key and value as replacement
197 | --! @return string
198 | local function replaceEntities(s, entities)
199 | return s:gsub('&([^;]+);', entities)
200 | end
201 |
202 | --! Add entities to resultEntities from the document entity table.
203 | --! Create new table when resultEntities is nil.
204 | --! @param[in] docEntities table
205 | --! @param[in,out] resultEntities table|nil
206 | --! @return resultEntities or a new table when nil
207 | local function createEntityTable(docEntities, resultEntities)
208 | local entities = resultEntities or defaultEntityTable()
209 | for _,e in pairs(docEntities) do
210 | e.value = replaceEntities(e.value, entities)
211 | entities[e.name] = e.value
212 | end
213 | return entities
214 | end
215 |
216 | --! Create a visitor.
217 | --! If `not defaultEntities` and `evalEntities` then `defaultEntities = defaultEntityTable()`.\
218 | --! If `withoutPosition`, then `pos` parameter does not exist for the visitor functions except for `finish`.
219 | --! @param[in] evalEntities boolean
220 | --! @param[in] defaultEntities boolean|table|function
221 | --! @param[in] withoutPosition boolean
222 | --! @return visitor table and true for safeVisitor (see parser())
223 | local function mkVisitor(evalEntities, defaultEntities, withoutPosition)
224 | local root, elem, doc, bad, SubEntity, accuattr, doctype, text, badclose
225 | local mkDefaultEntities = defaultEntities and (
226 | type(defaultEntities) == 'table' and function()
227 | local t = {}
228 | for k,e in pairs(defaultEntities) do
229 | t[k] = e
230 | end
231 | return t
232 | end
233 | or defaultEntities
234 | ) or defaultEntityTable
235 |
236 | if evalEntities then
237 | accuattr = function(a,k,v)
238 | a[k] = SubEntity:match(v)
239 | return a
240 | end
241 |
242 | doctype = withoutPosition and function(name, ident, pubident, dtd)
243 | doc.doctype = {name=name, ident=ident, pubident=pubident, dtd=dtd}
244 | doc.tentities = createEntityTable(doc.entities, mkDefaultEntities())
245 | SubEntity = mkReplaceEntities(doc.tentities)
246 | end or function(pos, name, ident, pubident, dtd)
247 | doc.doctype = {name=name, ident=ident, pubident=pubident, dtd=dtd, pos=pos}
248 | doc.tentities = createEntityTable(doc.entities, mkDefaultEntities())
249 | SubEntity = mkReplaceEntities(doc.tentities)
250 | end
251 |
252 | text = withoutPosition and function(str)
253 | elem.children[#elem.children+1] = {parent=elem, text=SubEntity:match(str)}
254 | end or function(pos, str)
255 | elem.children[#elem.children+1] = {parent=elem, text=SubEntity:match(str), pos=pos}
256 | end
257 | else
258 | -- accuattr = noop
259 |
260 | doctype = withoutPosition and function(name, ident, pubident, dtd)
261 | doc.doctype = {name=name, ident=ident, pubident=pubident, dtd=dtd}
262 | end or function(pos, name, ident, pubident, dtd)
263 | doc.doctype = {name=name, ident=ident, pubident=pubident, dtd=dtd, pos=pos}
264 | end
265 |
266 | text = withoutPosition and function(str)
267 | elem.children[#elem.children+1] = {parent=elem, text=str}
268 | end or function(pos, str)
269 | elem.children[#elem.children+1] = {parent=elem, text=str, pos=pos}
270 | end
271 | end
272 |
273 | local pushCloseError = function(tagname, pos)
274 | local errElem = withoutPosition
275 | and {tag=tagname, children={}}
276 | or {tag=tagname, children={}, pos=pos-2}
277 | bad.children[#bad.children+1] = errElem
278 | badclose = badclose or errElem
279 | elem = elem or bad
280 | end
281 |
282 | return {
283 | withpos=not withoutPosition,
284 | accuattr=accuattr,
285 | doctype=doctype,
286 | text=text,
287 |
288 | cdata = withoutPosition and function(str)
289 | elem.children[#elem.children+1] = {parent=elem, text=str, cdata=true}
290 | end or function(pos, str)
291 | elem.children[#elem.children+1] = {parent=elem, text=str, cdata=true, pos=pos-9}
292 | end,
293 |
294 | init=function()
295 | bad = {children={}}
296 | root = {children={}}
297 | doc = {preprocessor={}, entities={}, children=root.children}
298 | elem = root
299 | badclose = nil
300 | if evalEntities then
301 | SubEntity = mkReplaceEntities(mkDefaultEntities())
302 | end
303 | end,
304 |
305 | finish=function(err, pos)
306 | if badclose then
307 | doc.bad = bad
308 | err = (err and err .. ' ' or '')
309 | .. 'No matching opening tag for ' .. tostring(badclose.tag)
310 | .. (badclose.pos and ' at position ' .. tostring(badclose.pos) or '')
311 | elseif root ~= elem then
312 | err = (err and err .. ' ' or '')
313 | .. 'No matching closing tag for ' .. tostring(elem.tag)
314 | .. (elem.pos and ' at position ' .. tostring(elem.pos) or '')
315 | end
316 |
317 | doc.lastpos = pos
318 | if err then
319 | doc.error = err
320 | end
321 | return doc, err
322 | end,
323 |
324 | proc=withoutPosition and function(name, attrs)
325 | doc.preprocessor[#doc.preprocessor+1] = {tag=name, attrs=attrs}
326 | end or function(pos, name, attrs)
327 | doc.preprocessor[#doc.preprocessor+1] = {tag=name, attrs=attrs, pos=pos}
328 | end,
329 |
330 | entity=withoutPosition and function(k, v)
331 | doc.entities[#doc.entities+1] = {name=k, value=v}
332 | end or function(pos, k, v)
333 | doc.entities[#doc.entities+1] = {name=k, value=v, pos=pos}
334 | end,
335 |
336 | tag=withoutPosition and function(name, attrs)
337 | elem.children[#elem.children+1] = {tag=name, attrs=attrs, parent=elem, children={}}
338 | end or function(pos, name, attrs)
339 | elem.children[#elem.children+1] = {tag=name, attrs=attrs, parent=elem, children={}, pos=pos-1}
340 | end,
341 |
342 | open=function()
343 | elem = elem.children[#elem.children]
344 | end,
345 |
346 | close=withoutPosition and function(tagname)
347 | local currentTag = elem.tag
348 | elem = elem.parent
349 | if elem and currentTag == tagname then
350 | return
351 | end
352 | pushCloseError(tagname)
353 | end or function(pos, tagname)
354 | local currentTag = elem.tag
355 | elem = elem.parent
356 | if elem and currentTag == tagname then
357 | return
358 | end
359 | pushCloseError(tagname, pos)
360 | end,
361 | }, true -- safeVisitor
362 | end
363 |
364 | --! Create a parser whose visitor is built on the first call.
365 | --! @param[in] visitorCreator function
366 | --! @return Parser
367 | local function lazyParser(visitorCreator)
368 | local p
369 | p = mkparser(function(...)
370 | p.parse = _parser(visitorCreator())
371 | return p.parse(...)
372 | end)
373 | return p, true
374 | end
375 |
376 | --! @{
377 | --! Document structure for default parser:
378 | --! @code
379 | --! -- pos member = index of string. Only when visitor.withPos == true
380 | --! document = {
381 | --! children = {
382 | --! { pos=number, parent=table or nil, text=string[, cdata=true] } or
383 | --! { pos=number, parent=table or nil, tag=string, attrs={ { name=string, value=string }, ... }, children={ ... } },
384 | --! ...
385 | --! },
386 | --! bad = { children={ ... } } -- when a closed node has no match
387 | --! preprocessor = { { pos=number, tag=string, attrs={ { name=string, value=string }, ... } },
388 | --! doctype = { pos=number, name=string, ident=string, pubident=string or nil, dtd=string or nil }, -- if there is a doctype
389 | --! error = string, -- if error
390 | --! lastpos = number, -- last known position of parse()
391 | --! entities = { { pos=number, name=string, value=string }, ... },
392 | --! tentities = { name=value, ... } -- only if subEntities = true
393 | --! }
394 | --! @endcode
395 |
396 | -- The default parser used by parse(str, false)
397 | local treeParser = lazyParser(function() return mkVisitor() end)
398 | -- The default parser used by parse(str, true)
399 | local treeParserWithReplacedEntities = lazyParser(function() return mkVisitor(true) end)
400 | -- Parser without `pos` parameter
401 | local treeParserWithoutPos = lazyParser(function() return mkVisitor(nil,nil,true) end)
402 | -- Parser without `pos` parameter
403 | local treeParserWithoutPosWithReplacedEntities = lazyParser(function() return mkVisitor(true,nil,true) end)
404 | --! @}
405 |
406 | local _defaultParser, _defaultParserWithReplacedEntities = treeParser, treeParserWithReplacedEntities
407 |
408 | --! @param[in] b boolean|nil : when false, sets parsers that do not take a position as default parsers.
409 | --! @return old defaultParser and defaultParserWithReplacedEntities
410 | local function enableWithoutPosParser(b)
411 | local r1, r2 = _defaultParser, _defaultParserWithReplacedEntities
412 | if b == nil or b == true then
413 | _defaultParser, _defaultParserWithReplacedEntities = treeParserWithoutPos, treeParserWithoutPosWithReplacedEntities
414 | else
415 | _defaultParser, _defaultParserWithReplacedEntities = treeParser, treeParserWithReplacedEntities
416 | end
417 | return r1, r2
418 | end
419 |
420 | --! Sets default parsers for without and with entity replacement.
421 | --! @param[in] p table|nil : Use treeParser when p is nil
422 | --! @param[in] pWithReplacedEntities table|boolean|nil :
423 | --! Use treeParserWithReplacedEntities when pWithReplacedEntities is nil
424 | --! @return old defaultParser and defaultParserWithReplacedEntities
425 | local function setDefaultParsers(p, pWithReplacedEntities)
426 | local r1, r2 = _defaultParser, _defaultParserWithReplacedEntities
427 | _defaultParser = p or treeParser
428 | if pWithReplacedEntities == true then
429 | _defaultParserWithReplacedEntities = _defaultParser
430 | elseif pWithReplacedEntities == false then
431 | _defaultParserWithReplacedEntities = treeParserWithReplacedEntities
432 | else
433 | _defaultParserWithReplacedEntities = pWithReplacedEntities or treeParserWithReplacedEntities
434 | end
435 | return r1, r2
436 | end
437 |
438 | --! Returns a parser.
439 | --! @param[in] visitorOrEvalEntities table|bool|nil :
440 | --! When visitorOrEvalEntities is a boolean or nil,
441 | --! a default parser is returned (see \c setDefaultParsers()).
442 | --! Otherwise visitorOrEvalEntities is returned.
443 | --! @return Parser
444 | local getParser = function(visitorOrEvalEntities)
445 | return (not visitorOrEvalEntities and _defaultParser) or
446 | (visitorOrEvalEntities == true and _defaultParserWithReplacedEntities) or
447 | parser(visitorOrEvalEntities)
448 | end
449 |
450 | --! Returns a tuple `document table, (string error or nil)`. See `visitor.finish`.
451 | --! @param[in] s string : xml data
452 | --! @param[in,out] visitorOrEvalEntities table|bool|nil : see \c getParser()
453 | --! @param[in,out] ... argument for visitor.init()
454 | --! @return table
455 | local function parse(xmlstring, visitorOrEvalEntities, ...)
456 | return getParser(visitorOrEvalEntities).parse(xmlstring, ...)
457 | end
458 |
459 | --! Return a tuple `document table, error file`.
460 | --! @param filename[in] string
461 | --! @param[in,out] visitorOrEvalEntities table|bool|nil : see \c getParser()
462 | --! @param[in,out] ... argument for visitor.init()
463 | --! @return table
464 | local function parseFile(filename, visitorOrEvalEntities, ...)
465 | return getParser(visitorOrEvalEntities).parseFile(filename, ...)
466 | end
467 |
468 |
469 | local function flatAttrCmp(a, b)
470 | return a[1] < b[1]
471 | end
472 |
473 | local tinsert = table.insert
474 | local tremove = table.remove
475 |
476 | local function insertAttrs(t, it, escapeAttr)
477 | for name,value in it do
478 | tinsert(t, ' ')
479 | tinsert(t, name)
480 | tinsert(t, '="')
481 | tinsert(t, escapeAttr(value))
482 | tinsert(t, '"')
483 | end
484 | end
485 |
486 | local function toStringComputeIndent(tindent, lvl, indentationText)
487 | local prefix = tindent[lvl]
488 | if not prefix then
489 | prefix = tindent[lvl - 1] .. indentationText
490 | tindent[lvl] = prefix
491 | end
492 | return prefix
493 | end
494 |
495 | local function escapeComment(s)
496 | s = s:gsub('--', '—')
497 | return s
498 | end
499 |
500 | local function escapeAttribute(s)
501 | s = s:gsub('<', '<'):gsub('"', '"')
502 | return s
503 | end
504 |
505 | local function escapeAttributeAndAmp(s)
506 | s = s:gsub('&', '&'):gsub('<', '<'):gsub('"', '"')
507 | return s
508 | end
509 |
510 | local function escapeCDATA(s)
511 | s = s:gsub(']]>', ']]>]]>
565 | --! - \b comment: text between
566 | --! @return string
567 | local function documentToString(tdoc, indentationText, params)
568 | local escapeFns = params and params.escapes
569 | -- luacheck: push ignore 431
570 | local escapeAttr = escapeFns and escapeFns.attr or identityFn
571 | local escapeText = escapeFns and escapeFns.text or identityFn
572 | local escapeCDATA = escapeFns and escapeFns.cdata or identityFn
573 | local escapeComment = escapeFns and escapeFns.comment or identityFn
574 | -- luacheck: pop
575 | local inlineTextLengthMax = params and params.inlineTextLengthMax or 9999999
576 | local shortEmptyElements = not params or params.shortEmptyElements == nil or params.shortEmptyElements
577 |
578 | local attrIter
579 | if not params or params.stableAttributes == nil or params.stableAttributes == true then
580 | attrIter = function(attrs)
581 | local flatAttrs = {}
582 | for attr,value in pairs(attrs) do
583 | tinsert(flatAttrs, {attr,value})
584 | end
585 |
586 | table.sort(flatAttrs, flatAttrCmp)
587 |
588 | local idx = 0
589 | return function() -- simplified iterator since used only once
590 | idx = idx + 1
591 | local t = flatAttrs[idx]
592 | if t then
593 | return t[1], t[2]
594 | end
595 | end, flatAttrs, nil
596 | end
597 | elseif params.stableAttributes == false then
598 | attrIter = identityFn
599 | else
600 | attrIter = params.stableAttributes
601 | end
602 |
603 | local strs = {}
604 |
605 | local proc = tdoc.preprocessor
606 | if proc then
607 | for _, e in pairs(proc) do
608 | tinsert(strs, '')
609 | tinsert(strs, e.tag)
610 | insertAttrs(strs, attrIter(e.attrs), escapeAttr)
611 | tinsert(strs, '?>')
612 | end
613 | end
614 |
615 | local prefix = indentationText and '\n' or ''
616 | local tindent = {prefix}
617 |
618 | indentationText = indentationText or ''
619 |
620 | local doctype = tdoc.doctype
621 | if doctype then
622 | if proc then
623 | tinsert(strs, prefix)
624 | end
625 |
626 | tinsert(strs, '')
654 | end
655 |
656 | tinsert(strs, '[')
657 | if tdoc.entities then
658 | for _,t in pairs(tdoc.entities) do
659 | addEntity(t.name, t.value)
660 | end
661 | else
662 | for name,value in pairs(tdoc.tentities) do
663 | addEntity(name, value:gsub('%', '%'))
664 | end
665 | end
666 | tinsert(strs, prefix)
667 | tinsert(strs, ']')
668 | end
669 |
670 | tinsert(strs, '>')
671 | end
672 |
673 | local elems = tdoc.children
674 | if elems and elems[1] then
675 | local emptyTable = {}
676 |
677 | local lvl = 1
678 | local depths = {}
679 |
680 | local i = 1
681 | local e, e2, tag, children, node
682 |
683 | ::loop::
684 |
685 | e = elems[i]
686 | tag = e.tag
687 |
688 | -- tag
689 | if tag then
690 | tinsert(strs, prefix)
691 | tinsert(strs, '<')
692 | tinsert(strs, tag)
693 | insertAttrs(strs, attrIter(e.attrs), escapeAttr)
694 |
695 | children = e.children or emptyTable
696 |
697 | -- has at least 2 children or a tag as child
698 | if children[2] or (children[1] and children[1].tag) then
699 | tinsert(strs, '>')
700 |
701 | tinsert(depths, {elems, i})
702 | i = 0
703 | elems = children
704 | lvl = lvl + 1
705 | prefix = toStringComputeIndent(tindent, lvl, indentationText)
706 |
707 | -- only has one text as child
708 | elseif children[1] and children[1].text then
709 | tinsert(strs, '>')
710 | e2 = children[1]
711 | -- CDATA
712 | if e2.cdata then
713 | tinsert(strs, toStringComputeIndent(tindent, lvl+1, indentationText))
714 | tinsert(strs, '')
717 | tinsert(strs, prefix)
718 | -- inline text
719 | elseif #e2.text <= inlineTextLengthMax then
720 | tinsert(strs, escapeText(e2.text))
721 | -- text
722 | else
723 | tinsert(strs, toStringComputeIndent(tindent, lvl+1, indentationText))
724 | tinsert(strs, escapeText(e2.text))
725 | tinsert(strs, prefix)
726 | end
727 | tinsert(strs, '')
728 | tinsert(strs, tag)
729 | tinsert(strs, '>')
730 |
731 | -- empty short tag
732 | elseif shortEmptyElements then
733 | tinsert(strs, '/>')
734 |
735 | -- empty tag
736 | else
737 | tinsert(strs, '>')
738 | tinsert(strs, tag)
739 | tinsert(strs, '>')
740 | end
741 |
742 | -- text
743 | elseif e.text then
744 | -- CDATA
745 | if e.cdata then
746 | tinsert(strs, prefix)
747 | tinsert(strs, '')
750 | else
751 | tinsert(strs, prefix)
752 | tinsert(strs, escapeText(e.text))
753 | end
754 |
755 | -- comment
756 | elseif e.comment then
757 | tinsert(strs, prefix)
758 | tinsert(strs, '')
761 | end
762 |
763 | i = i + 1
764 | e = elems[i]
765 |
766 | -- close parent
767 | while not e do
768 | node = tremove(depths)
769 | if not node then
770 | return table.concat(strs, '')
771 | end
772 | elems = node[1]
773 | i = node[2]
774 | lvl = lvl - 1
775 | prefix = tindent[lvl]
776 | tinsert(strs, prefix)
777 | tinsert(strs, '')
778 | tinsert(strs, elems[i].tag)
779 | tinsert(strs, '>')
780 | i = i + 1
781 | e = elems[i]
782 | end
783 |
784 | goto loop
785 | end
786 |
787 | return table.concat(strs, '')
788 | end
789 |
790 | return {
791 | defaultEntityTable = defaultEntityTable,
792 | mkReplaceEntities = mkReplaceEntities,
793 | replaceEntities = replaceEntities,
794 | createEntityTable = createEntityTable,
795 | mkVisitor = mkVisitor,
796 | lazyParser = lazyParser,
797 | treeParser = treeParser,
798 | treeParserWithReplacedEntities = treeParserWithReplacedEntities,
799 | treeParserWithoutPos = treeParserWithoutPos,
800 | treeParserWithoutPosWithReplacedEntities = treeParserWithoutPosWithReplacedEntities,
801 | enableWithoutPosParser = enableWithoutPosParser,
802 | setDefaultParsers = setDefaultParsers,
803 | parser = parser,
804 | parse = parse,
805 | parseFile = parseFile,
806 | tostring = documentToString,
807 | escapeFunctions = escapeFunctions,
808 | escapeComment = escapeComment,
809 | escapeAttribute = escapeAttribute,
810 | escapeAttributeAndAmp = escapeAttributeAndAmp,
811 | escapeCDATA = escapeCDATA,
812 | escapeText = escapeText,
813 | escapeTextAndAmp = escapeTextAndAmp,
814 | }
815 |
--------------------------------------------------------------------------------