├── LICENSE
├── README.md
├── example.lua
├── example.xml
├── tag.sh
├── test.lua
├── xmlparser-2.2-3.rockspec
└── xmlparser.lua
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Jonathan Poelen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # lua-xmlparser
2 |
3 | `xmlparser` is a XML parser written entirely in Lua 5.
4 |
5 | This implementation is limited and extracts only valid entities, attributes and tags (without CDATA).
6 |
7 | For a faster parser with more features, look at [lua-xmllpegparser](https://github.com/jonathanpoelen/lua-xmllpegparser).
8 |
9 |
10 |
11 | 1. [Installation](#installation)
12 | 2. [Test](#test)
13 | 3. [xmlparser API](#xmlparser-api)
14 | 1. [Document structure](#document-structure)
15 | 2. [Limitations](#limitations)
16 | 5. [Licence](#licence)
17 |
18 |
19 |
20 | ## Installation
21 |
22 | ```bash
23 | luarocks install --local https://raw.githubusercontent.com/jonathanpoelen/lua-xmlparser/master/xmlparser-2.2-3.rockspec
24 |
25 | # Or in your local lua-xmlparser directory
26 |
27 | luarocks make --local xmlparser-2.2-3.rockspec
28 | ```
29 |
30 | ## Test
31 |
32 | Run `./example.lua`.
33 |
34 | ```sh
35 | ./example.lua [xmlfile [replaceentities]]
36 | ```
37 |
38 | `replaceentities` = anything, only to enable replacement of entities.
39 |
40 |
41 | ## xmlparser API
42 |
43 | - `xmlparser.parse(xmlstring[, evalEntities])`: Return a document `table` (see below).
44 | If `evalEntities` is `true`, the entities are replaced and a `tentity` member is added to the document `table`.
45 | - `xmlparser.parseFile(filename[, subEntities])`: Return a tuple `document table, error file`.
46 | - `xmlparser.defaultEntitiyTable()`: Return the default entity table (` { quot='"', ... }`).
47 | - `xmlparser.createEntityTable(docEntities[, resultEntities])`: Create an entity table from the document entity table. Return `resultEntities`.
48 | - `xmlparser.replaceEntities(s, entityTable)`: Return a `string`.
49 |
50 |
51 | ### Document structure
52 |
53 | ```lua
54 | document = {
55 | children = {
56 | { text=string } or
57 | { tag=string,
58 | attrs={ [name]=value ... },
59 | orderedattrs={ { name=string, value=string }, ... },
60 | children={ ... }
61 | },
62 | ...
63 | },
64 | entities = { { name=string, value=string }, ... },
65 | tentities = { name=value, ... } -- only if evalEntities = true
66 | }
67 | ```
68 |
69 |
70 | ### Limitations
71 |
72 | - Non-validating
73 | - No DTD support
74 | - No CDATA support
75 | - Fails to detect any errors
76 | - Ignore processing instructions
77 | - Ignore DOCTYPE, parse only ENTITY
78 |
79 |
80 | ## Licence
81 |
82 | [MIT license](LICENSE)
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/example.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 |
3 | function printelem(e, prefix)
4 | prefix = prefix or ''
5 | if e.tag then
6 | print(prefix .. '<' .. e.tag .. '>')
7 | prefix = ' ' .. prefix
8 | -- ordered value a attrs is unspecified
9 | -- for name, value in pairs(e.attrs) do
10 | -- print(prefix .. '@' .. name .. ': ' .. value)
11 | --end
12 | for _,attr in ipairs(e.orderedattrs) do
13 | print(prefix .. '@' .. attr.name .. ': ' .. attr.value)
14 | end
15 | for i, child in pairs(e.children) do
16 | printelem(child, prefix)
17 | end
18 | else
19 | print(prefix .. '<> ' .. e.text)
20 | end
21 | end
22 |
23 | function printdoc(doc)
24 | print('Entities:')
25 | for i, e in pairs(doc.entities) do
26 | print(' ' .. e.name .. ': ' .. e.value)
27 | end
28 | print('Data:')
29 | for i, child in pairs(doc.children) do
30 | printelem(child, ' ')
31 | end
32 | end
33 |
34 | local parseFile = require('xmlparser').parseFile
35 | local filename = arg[1] and #arg[1] > 0 and arg[1] or 'example.xml'
36 | local replaceEntities = arg[2] and #arg[2] > 0
37 |
38 | local doc, err = parseFile(filename, replaceEntities)
39 |
40 | printdoc(doc)
41 | if err then
42 | io.stderr:write(err .. '\n')
43 | end
44 |
--------------------------------------------------------------------------------
/example.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | ]>
7 |
8 | something
9 | blah blah
10 |
11 |
12 |
13 | something
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if (( $# < 3 )) ; then
6 | echo "$0 major minor revision" >&2
7 | exit 1
8 | fi
9 |
10 | oldfile=(*.rockspec)
11 | oldfile=${oldfile[0]}
12 | name=${oldfile::-9}
13 | lib=${name/-*}
14 | old_rock_vers=${name#*-}
15 | new_rock_vers="$1.$2-$3"
16 | new_std_vers="$1.$2.$3"
17 | newfile="$lib-$new_rock_vers.rockspec"
18 |
19 | sed -i "s/$old_rock_vers/$new_rock_vers/;s/${old_rock_vers/-/\\.}/$new_std_vers/" "$oldfile"
20 | sed -i "s/${oldfile//./\\.}/$newfile/" README.md
21 | mv "$oldfile" "$newfile"
22 |
23 | git add "$oldfile" "$newfile" README.md
24 | git commit -vm "$lib version $new_std_vers"
25 | git tag "v$new_std_vers"
26 | git push --tags
27 | git push
28 |
--------------------------------------------------------------------------------
/test.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 |
3 | xmlparser = require('xmlparser')
4 |
5 | function str(t)
6 | local orderedIndex = {}
7 | for i in pairs(t) do
8 | table.insert(orderedIndex, i)
9 | end
10 | table.sort(orderedIndex)
11 |
12 | local s, e = '{'
13 | for k, i in pairs(orderedIndex) do
14 | e = t[i]
15 | if type(e) == 'table' then
16 | e = str(e)
17 | end
18 | s = s .. i .. ':' .. e .. ','
19 | end
20 | return s .. '}'
21 | end
22 |
23 | r = 0
24 |
25 | function eq(s, sxml, replaceEntities)
26 | local doc = str(xmlparser.parse(sxml, replaceEntities))
27 | if s ~= doc then
28 | print('[FAILURE]\n ' .. s .. '\n ==\n ' .. doc .. '\n with', sxml)
29 | r = r + 1
30 | end
31 | end
32 |
33 | function feq(s, filename)
34 | local tdoc, err = xmlparser.parseFile(filename)
35 | local doc = str(tdoc)
36 | if err or s ~= doc then
37 | print('[FAILURE]\n ' .. s .. '\n ==\n ' .. doc .. '\n with file', filename)
38 | if err then print(' ' .. err .. '/' .. filename) end
39 | r = r + 1
40 | end
41 | end
42 |
43 |
44 | eq('{children:{1:{attrs:{},children:{},orderedattrs:{},tag:a,},2:{attrs:{},children:{1:{text:ad,},},orderedattrs:{},tag:b,},3:{attrs:{},children:{},orderedattrs:{},tag:c,},4:{attrs:{},children:{1:{attrs:{},children:{1:{text:ds,},},orderedattrs:{},tag:e,},},orderedattrs:{},tag:d,},5:{attrs:{},children:{1:{text:a,},2:{attrs:{},children:{},orderedattrs:{},tag:g,},3:{text:b,},},orderedattrs:{},tag:f,},},entities:{},}',
45 | 'addsab')
46 | eq('{children:{1:{attrs:{name:value,},children:{},orderedattrs:{1:{name:name,value:value,},},tag:a,},2:{attrs:{name:value,},children:{},orderedattrs:{1:{name:name,value:value,},},tag:b,},3:{attrs:{name:value,},children:{},orderedattrs:{1:{name:name,value:value,},},tag:c,},4:{attrs:{name:value,name2:value2,},children:{},orderedattrs:{1:{name:name,value:value,},2:{name:name2,value:value2,},},tag:d,},},entities:{},}',
47 | '')
48 | eq('{children:{1:{attrs:{name:v>a,},children:{},orderedattrs:{1:{name:name,value:v>a,},},tag:a,},2:{text:> b,},3:{attrs:{name:>,},children:{1:{text:d,},},orderedattrs:{1:{name:name,value:>,},},tag:c,},4:{attrs:{name:a,},children:{1:{text:>f,},},orderedattrs:{1:{name:name,value:a,},},tag:e,},},entities:{},}',
49 | '> bd>f')
50 | eq('{children:{1:{attrs:{},children:{1:{text:b,},},orderedattrs:{},tag:a,},},entities:{},}',
51 | ' b ')
52 | eq('{children:{1:{attrs:{},children:{1:{text:b,},},orderedattrs:{},tag:a,},},entities:{1:{name:e1,value:fdd>d,},2:{name:e2,value:a,},},}',
53 | 'd"> ]>b')
54 | eq('{children:{1:{attrs:{},children:{1:{text:fdd>ddsa;,},},orderedattrs:{},tag:a,},},entities:{1:{name:e1,value:fdd>d,},2:{name:e2,value:a,},},tentities:{amp:&,apos:\',e1:fdd>d,e2:a,gt:>,lt:<,nbsp: ,quot:",tab: ,},}',
55 | 'd"> ]>&e1;ds&e2;;', true)
56 |
57 | feq('{children:{1:{attrs:{},children:{1:{attrs:{attribute:&entity1;,},children:{1:{text:something,},},orderedattrs:{1:{name:attribute,value:&entity1;,},},tag:lvl1,},2:{text:blah blah,},3:{attrs:{attr3:value3,attribute:value,otherattribute:value2,},children:{},orderedattrs:{1:{name:attribute,value:value,},2:{name:otherattribute,value:value2,},3:{name:attr3,value:value3,},},tag:lvl1,},4:{attrs:{},children:{1:{attrs:{},children:{1:{text:something,},},orderedattrs:{},tag:lvl2,},},orderedattrs:{},tag:other,},},orderedattrs:{},tag:xml,},},entities:{1:{name:entity1,value:something,},2:{name:entity2,value:test,},},}',
58 | 'example.xml')
59 |
60 | if r == 0 then
61 | print('Ok')
62 | else
63 | os.exit(r)
64 | end
65 |
--------------------------------------------------------------------------------
/xmlparser-2.2-3.rockspec:
--------------------------------------------------------------------------------
1 | package = "xmlparser"
2 | version = "2.2-3"
3 | source = {
4 | url = "git://github.com/jonathanpoelen/lua-xmlparser",
5 | tag = "v2.2.3"
6 | }
7 | description = {
8 | summary = "XML parser written entirely in Lua 5.",
9 | detailed = [[
10 | Enables parsing a XML file and converting it to a Lua table,
11 | which can be handled directly by your application.
12 |
13 | This implementation is limited and extracts only valid entities,
14 | attributes and tags (without CDATA).
15 |
16 | For a faster parser with more features, look at lua-xmllpegparser.
17 | ]],
18 | homepage = "https://github.com/jonathanpoelen/lua-xmlparser",
19 | license = "MIT"
20 | }
21 | dependencies = {
22 | "lua >= 5.1"
23 | }
24 | build = {
25 | type = "builtin",
26 | modules = {
27 | xmlparser = "xmlparser.lua"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/xmlparser.lua:
--------------------------------------------------------------------------------
1 | -- from https://github.com/jonathanpoelen/lua-xmlparser
2 |
3 | local io, string, pairs = io, string, pairs
4 |
5 | local slashchar = string.byte('/', 1)
6 | local E = string.byte('E', 1)
7 |
8 | --! Return the default entity table.
9 | --! @return table
10 | local function defaultEntityTable()
11 | return { quot='"', apos='\'', lt='<', gt='>', amp='&', tab='\t', nbsp=' ', }
12 | end
13 |
14 | --! @param[in] s string
15 | --! @param[in] entities table : with entity name as key and value as replacement
16 | --! @return string
17 | local function replaceEntities(s, entities)
18 | return s:gsub('&([^;]+);', entities)
19 | end
20 |
21 | --! Add entities to resultEntities then return it.
22 | --! Create new table when resultEntities is nul.
23 | --! Create an entity table from the document entity table.
24 | --! @param[in] docEntities table
25 | --! @param[in,out] resultEntities table|nil
26 | --! @return table
27 | local function createEntityTable(docEntities, resultEntities)
28 | local entities = resultEntities or defaultEntityTable()
29 | for _,e in pairs(docEntities) do
30 | e.value = replaceEntities(e.value, entities)
31 | entities[e.name] = e.value
32 | end
33 | return entities
34 | end
35 |
36 | --! Return a document `table`.
37 | --! @code
38 | --! document = {
39 | --! children = {
40 | --! { text=string } or
41 | --! { tag=string,
42 | --! attrs={ [name]=value ... },
43 | --! orderedattrs={ { name=string, value=string }, ... },
44 | --! children={ ... }
45 | --! },
46 | --! ...
47 | --! },
48 | --! entities = { { name=string, value=string }, ... },
49 | --! tentities = { name=value, ... } -- only if evalEntities = true
50 | --! }
51 | --! @endcode
52 | --! If `evalEntities` is `true`, the entities are replaced and
53 | --! a `tentity` member is added to the document `table`.
54 | --! @param[in] s string : xml data
55 | --! @param[in] evalEntities boolean
56 | --! @return table
57 | local function parse(s, evalEntities)
58 | -- remove comments
59 | s = s:gsub('', '')
60 |
61 | local entities, tentities = {}
62 |
63 | if evalEntities then
64 | local pos = s:find('<[_%w]')
65 | if pos then
66 | s:sub(1, pos):gsub('?)([^<]*)', function(type, name, closed, txt)
84 | -- open
85 | if #type == 0 then
86 | local attrs, orderedattrs = {}, {}
87 | if #closed == 0 then
88 | local len = 0
89 | for all,aname,_,value,starttxt in string.gmatch(txt, "(.-([-_%w]+)%s*=%s*(.)(.-)%3%s*(/?>?))") do
90 | len = len + #all
91 | attrs[aname] = value
92 | orderedattrs[#orderedattrs+1] = {name=aname, value=value}
93 | if #starttxt ~= 0 then
94 | txt = txt:sub(len+1)
95 | closed = starttxt
96 | break
97 | end
98 | end
99 | end
100 | t[#t+1] = {tag=name, attrs=attrs, children={}, orderedattrs=orderedattrs}
101 |
102 | if closed:byte(1) ~= slashchar then
103 | l[#l+1] = t
104 | t = t[#t].children
105 | end
106 |
107 | addtext(txt)
108 | -- close
109 | elseif '/' == type then
110 | t = l[#l]
111 | l[#l] = nil
112 |
113 | addtext(txt)
114 | -- ENTITY
115 | elseif '!' == type then
116 | if E == name:byte(1) then
117 | txt:gsub('([_%w]+)%s+(.)(.-)%2', function(name, _, entity)
118 | entities[#entities+1] = {name=name, value=entity}
119 | end, 1)
120 | end
121 | -- elseif '?' == type then
122 | -- print('? ' .. name .. ' // ' .. attrs .. '$$')
123 | -- elseif '-' == type then
124 | -- print('comment ' .. name .. ' // ' .. attrs .. '$$')
125 | -- else
126 | -- print('o ' .. #p .. ' // ' .. name .. ' // ' .. attrs .. '$$')
127 | end
128 | end)
129 |
130 | return {children=t, entities=entities, tentities=tentities}
131 | end
132 |
133 | -- Return a tuple `document table, error file`.
134 | -- @param filename[in] string
135 | -- @param evalEntities[in] boolean : see \c parse()
136 | -- @return table : see parse
137 | local function parseFile(filename, evalEntities)
138 | local f, err = io.open(filename)
139 | if f then
140 | local content = f:read'*a'
141 | f:close()
142 | return parse(content, evalEntities), nil
143 | end
144 | return f, err
145 | end
146 |
147 | return {
148 | parse = parse,
149 | parseFile = parseFile,
150 | defaultEntityTable = defaultEntityTable,
151 | replaceEntities = replaceEntities,
152 | createEntityTable = createEntityTable,
153 | }
154 |
--------------------------------------------------------------------------------