├── LICENSE ├── README.md ├── example.lua ├── example.xml ├── tag.sh ├── test.lua ├── xmlparser-2.2-3.rockspec └── xmlparser.lua /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Jonathan Poelen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lua-xmlparser 2 | 3 | `xmlparser` is a XML parser written entirely in Lua 5. 4 | 5 | This implementation is limited and extracts only valid entities, attributes and tags (without CDATA). 6 | 7 | For a faster parser with more features, look at [lua-xmllpegparser](https://github.com/jonathanpoelen/lua-xmllpegparser). 8 | 9 | 10 | 11 | 1. [Installation](#installation) 12 | 2. [Test](#test) 13 | 3. [xmlparser API](#xmlparser-api) 14 | 1. [Document structure](#document-structure) 15 | 2. [Limitations](#limitations) 16 | 5. [Licence](#licence) 17 | 18 | 19 | 20 | ## Installation 21 | 22 | ```bash 23 | luarocks install --local https://raw.githubusercontent.com/jonathanpoelen/lua-xmlparser/master/xmlparser-2.2-3.rockspec 24 | 25 | # Or in your local lua-xmlparser directory 26 | 27 | luarocks make --local xmlparser-2.2-3.rockspec 28 | ``` 29 | 30 | ## Test 31 | 32 | Run `./example.lua`. 33 | 34 | ```sh 35 | ./example.lua [xmlfile [replaceentities]] 36 | ``` 37 | 38 | `replaceentities` = anything, only to enable replacement of entities. 39 | 40 | 41 | ## xmlparser API 42 | 43 | - `xmlparser.parse(xmlstring[, evalEntities])`: Return a document `table` (see below). 44 | If `evalEntities` is `true`, the entities are replaced and a `tentity` member is added to the document `table`. 45 | - `xmlparser.parseFile(filename[, subEntities])`: Return a tuple `document table, error file`. 46 | - `xmlparser.defaultEntitiyTable()`: Return the default entity table (` { quot='"', ... }`). 47 | - `xmlparser.createEntityTable(docEntities[, resultEntities])`: Create an entity table from the document entity table. Return `resultEntities`. 48 | - `xmlparser.replaceEntities(s, entityTable)`: Return a `string`. 49 | 50 | 51 | ### Document structure 52 | 53 | ```lua 54 | document = { 55 | children = { 56 | { text=string } or 57 | { tag=string, 58 | attrs={ [name]=value ... }, 59 | orderedattrs={ { name=string, value=string }, ... }, 60 | children={ ... } 61 | }, 62 | ... 63 | }, 64 | entities = { { name=string, value=string }, ... }, 65 | tentities = { name=value, ... } -- only if evalEntities = true 66 | } 67 | ``` 68 | 69 | 70 | ### Limitations 71 | 72 | - Non-validating 73 | - No DTD support 74 | - No CDATA support 75 | - Fails to detect any errors 76 | - Ignore processing instructions 77 | - Ignore DOCTYPE, parse only ENTITY 78 | 79 | 80 | ## Licence 81 | 82 | [MIT license](LICENSE) 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /example.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lua 2 | 3 | function printelem(e, prefix) 4 | prefix = prefix or '' 5 | if e.tag then 6 | print(prefix .. '<' .. e.tag .. '>') 7 | prefix = ' ' .. prefix 8 | -- ordered value a attrs is unspecified 9 | -- for name, value in pairs(e.attrs) do 10 | -- print(prefix .. '@' .. name .. ': ' .. value) 11 | --end 12 | for _,attr in ipairs(e.orderedattrs) do 13 | print(prefix .. '@' .. attr.name .. ': ' .. attr.value) 14 | end 15 | for i, child in pairs(e.children) do 16 | printelem(child, prefix) 17 | end 18 | else 19 | print(prefix .. '<> ' .. e.text) 20 | end 21 | end 22 | 23 | function printdoc(doc) 24 | print('Entities:') 25 | for i, e in pairs(doc.entities) do 26 | print(' ' .. e.name .. ': ' .. e.value) 27 | end 28 | print('Data:') 29 | for i, child in pairs(doc.children) do 30 | printelem(child, ' ') 31 | end 32 | end 33 | 34 | local parseFile = require('xmlparser').parseFile 35 | local filename = arg[1] and #arg[1] > 0 and arg[1] or 'example.xml' 36 | local replaceEntities = arg[2] and #arg[2] > 0 37 | 38 | local doc, err = parseFile(filename, replaceEntities) 39 | 40 | printdoc(doc) 41 | if err then 42 | io.stderr:write(err .. '\n') 43 | end 44 | -------------------------------------------------------------------------------- /example.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | ]> 7 | 8 | something 9 | blah blah 10 | 11 | 12 | 13 | something 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if (( $# < 3 )) ; then 6 | echo "$0 major minor revision" >&2 7 | exit 1 8 | fi 9 | 10 | oldfile=(*.rockspec) 11 | oldfile=${oldfile[0]} 12 | name=${oldfile::-9} 13 | lib=${name/-*} 14 | old_rock_vers=${name#*-} 15 | new_rock_vers="$1.$2-$3" 16 | new_std_vers="$1.$2.$3" 17 | newfile="$lib-$new_rock_vers.rockspec" 18 | 19 | sed -i "s/$old_rock_vers/$new_rock_vers/;s/${old_rock_vers/-/\\.}/$new_std_vers/" "$oldfile" 20 | sed -i "s/${oldfile//./\\.}/$newfile/" README.md 21 | mv "$oldfile" "$newfile" 22 | 23 | git add "$oldfile" "$newfile" README.md 24 | git commit -vm "$lib version $new_std_vers" 25 | git tag "v$new_std_vers" 26 | git push --tags 27 | git push 28 | -------------------------------------------------------------------------------- /test.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lua 2 | 3 | xmlparser = require('xmlparser') 4 | 5 | function str(t) 6 | local orderedIndex = {} 7 | for i in pairs(t) do 8 | table.insert(orderedIndex, i) 9 | end 10 | table.sort(orderedIndex) 11 | 12 | local s, e = '{' 13 | for k, i in pairs(orderedIndex) do 14 | e = t[i] 15 | if type(e) == 'table' then 16 | e = str(e) 17 | end 18 | s = s .. i .. ':' .. e .. ',' 19 | end 20 | return s .. '}' 21 | end 22 | 23 | r = 0 24 | 25 | function eq(s, sxml, replaceEntities) 26 | local doc = str(xmlparser.parse(sxml, replaceEntities)) 27 | if s ~= doc then 28 | print('[FAILURE]\n ' .. s .. '\n ==\n ' .. doc .. '\n with', sxml) 29 | r = r + 1 30 | end 31 | end 32 | 33 | function feq(s, filename) 34 | local tdoc, err = xmlparser.parseFile(filename) 35 | local doc = str(tdoc) 36 | if err or s ~= doc then 37 | print('[FAILURE]\n ' .. s .. '\n ==\n ' .. doc .. '\n with file', filename) 38 | if err then print(' ' .. err .. '/' .. filename) end 39 | r = r + 1 40 | end 41 | end 42 | 43 | 44 | eq('{children:{1:{attrs:{},children:{},orderedattrs:{},tag:a,},2:{attrs:{},children:{1:{text:ad,},},orderedattrs:{},tag:b,},3:{attrs:{},children:{},orderedattrs:{},tag:c,},4:{attrs:{},children:{1:{attrs:{},children:{1:{text:ds,},},orderedattrs:{},tag:e,},},orderedattrs:{},tag:d,},5:{attrs:{},children:{1:{text:a,},2:{attrs:{},children:{},orderedattrs:{},tag:g,},3:{text:b,},},orderedattrs:{},tag:f,},},entities:{},}', 45 | 'addsab') 46 | eq('{children:{1:{attrs:{name:value,},children:{},orderedattrs:{1:{name:name,value:value,},},tag:a,},2:{attrs:{name:value,},children:{},orderedattrs:{1:{name:name,value:value,},},tag:b,},3:{attrs:{name:value,},children:{},orderedattrs:{1:{name:name,value:value,},},tag:c,},4:{attrs:{name:value,name2:value2,},children:{},orderedattrs:{1:{name:name,value:value,},2:{name:name2,value:value2,},},tag:d,},},entities:{},}', 47 | '') 48 | eq('{children:{1:{attrs:{name:v>a,},children:{},orderedattrs:{1:{name:name,value:v>a,},},tag:a,},2:{text:> b,},3:{attrs:{name:>,},children:{1:{text:d,},},orderedattrs:{1:{name:name,value:>,},},tag:c,},4:{attrs:{name:a,},children:{1:{text:>f,},},orderedattrs:{1:{name:name,value:a,},},tag:e,},},entities:{},}', 49 | '> bd>f') 50 | eq('{children:{1:{attrs:{},children:{1:{text:b,},},orderedattrs:{},tag:a,},},entities:{},}', 51 | ' b ') 52 | eq('{children:{1:{attrs:{},children:{1:{text:b,},},orderedattrs:{},tag:a,},},entities:{1:{name:e1,value:fdd>d,},2:{name:e2,value:a,},},}', 53 | 'd"> ]>b') 54 | eq('{children:{1:{attrs:{},children:{1:{text:fdd>ddsa;,},},orderedattrs:{},tag:a,},},entities:{1:{name:e1,value:fdd>d,},2:{name:e2,value:a,},},tentities:{amp:&,apos:\',e1:fdd>d,e2:a,gt:>,lt:<,nbsp: ,quot:",tab: ,},}', 55 | 'd"> ]>&e1;ds&e2;;', true) 56 | 57 | feq('{children:{1:{attrs:{},children:{1:{attrs:{attribute:&entity1;,},children:{1:{text:something,},},orderedattrs:{1:{name:attribute,value:&entity1;,},},tag:lvl1,},2:{text:blah blah,},3:{attrs:{attr3:value3,attribute:value,otherattribute:value2,},children:{},orderedattrs:{1:{name:attribute,value:value,},2:{name:otherattribute,value:value2,},3:{name:attr3,value:value3,},},tag:lvl1,},4:{attrs:{},children:{1:{attrs:{},children:{1:{text:something,},},orderedattrs:{},tag:lvl2,},},orderedattrs:{},tag:other,},},orderedattrs:{},tag:xml,},},entities:{1:{name:entity1,value:something,},2:{name:entity2,value:test,},},}', 58 | 'example.xml') 59 | 60 | if r == 0 then 61 | print('Ok') 62 | else 63 | os.exit(r) 64 | end 65 | -------------------------------------------------------------------------------- /xmlparser-2.2-3.rockspec: -------------------------------------------------------------------------------- 1 | package = "xmlparser" 2 | version = "2.2-3" 3 | source = { 4 | url = "git://github.com/jonathanpoelen/lua-xmlparser", 5 | tag = "v2.2.3" 6 | } 7 | description = { 8 | summary = "XML parser written entirely in Lua 5.", 9 | detailed = [[ 10 | Enables parsing a XML file and converting it to a Lua table, 11 | which can be handled directly by your application. 12 | 13 | This implementation is limited and extracts only valid entities, 14 | attributes and tags (without CDATA). 15 | 16 | For a faster parser with more features, look at lua-xmllpegparser. 17 | ]], 18 | homepage = "https://github.com/jonathanpoelen/lua-xmlparser", 19 | license = "MIT" 20 | } 21 | dependencies = { 22 | "lua >= 5.1" 23 | } 24 | build = { 25 | type = "builtin", 26 | modules = { 27 | xmlparser = "xmlparser.lua" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /xmlparser.lua: -------------------------------------------------------------------------------- 1 | -- from https://github.com/jonathanpoelen/lua-xmlparser 2 | 3 | local io, string, pairs = io, string, pairs 4 | 5 | local slashchar = string.byte('/', 1) 6 | local E = string.byte('E', 1) 7 | 8 | --! Return the default entity table. 9 | --! @return table 10 | local function defaultEntityTable() 11 | return { quot='"', apos='\'', lt='<', gt='>', amp='&', tab='\t', nbsp=' ', } 12 | end 13 | 14 | --! @param[in] s string 15 | --! @param[in] entities table : with entity name as key and value as replacement 16 | --! @return string 17 | local function replaceEntities(s, entities) 18 | return s:gsub('&([^;]+);', entities) 19 | end 20 | 21 | --! Add entities to resultEntities then return it. 22 | --! Create new table when resultEntities is nul. 23 | --! Create an entity table from the document entity table. 24 | --! @param[in] docEntities table 25 | --! @param[in,out] resultEntities table|nil 26 | --! @return table 27 | local function createEntityTable(docEntities, resultEntities) 28 | local entities = resultEntities or defaultEntityTable() 29 | for _,e in pairs(docEntities) do 30 | e.value = replaceEntities(e.value, entities) 31 | entities[e.name] = e.value 32 | end 33 | return entities 34 | end 35 | 36 | --! Return a document `table`. 37 | --! @code 38 | --! document = { 39 | --! children = { 40 | --! { text=string } or 41 | --! { tag=string, 42 | --! attrs={ [name]=value ... }, 43 | --! orderedattrs={ { name=string, value=string }, ... }, 44 | --! children={ ... } 45 | --! }, 46 | --! ... 47 | --! }, 48 | --! entities = { { name=string, value=string }, ... }, 49 | --! tentities = { name=value, ... } -- only if evalEntities = true 50 | --! } 51 | --! @endcode 52 | --! If `evalEntities` is `true`, the entities are replaced and 53 | --! a `tentity` member is added to the document `table`. 54 | --! @param[in] s string : xml data 55 | --! @param[in] evalEntities boolean 56 | --! @return table 57 | local function parse(s, evalEntities) 58 | -- remove comments 59 | s = s:gsub('', '') 60 | 61 | local entities, tentities = {} 62 | 63 | if evalEntities then 64 | local pos = s:find('<[_%w]') 65 | if pos then 66 | s:sub(1, pos):gsub('?)([^<]*)', function(type, name, closed, txt) 84 | -- open 85 | if #type == 0 then 86 | local attrs, orderedattrs = {}, {} 87 | if #closed == 0 then 88 | local len = 0 89 | for all,aname,_,value,starttxt in string.gmatch(txt, "(.-([-_%w]+)%s*=%s*(.)(.-)%3%s*(/?>?))") do 90 | len = len + #all 91 | attrs[aname] = value 92 | orderedattrs[#orderedattrs+1] = {name=aname, value=value} 93 | if #starttxt ~= 0 then 94 | txt = txt:sub(len+1) 95 | closed = starttxt 96 | break 97 | end 98 | end 99 | end 100 | t[#t+1] = {tag=name, attrs=attrs, children={}, orderedattrs=orderedattrs} 101 | 102 | if closed:byte(1) ~= slashchar then 103 | l[#l+1] = t 104 | t = t[#t].children 105 | end 106 | 107 | addtext(txt) 108 | -- close 109 | elseif '/' == type then 110 | t = l[#l] 111 | l[#l] = nil 112 | 113 | addtext(txt) 114 | -- ENTITY 115 | elseif '!' == type then 116 | if E == name:byte(1) then 117 | txt:gsub('([_%w]+)%s+(.)(.-)%2', function(name, _, entity) 118 | entities[#entities+1] = {name=name, value=entity} 119 | end, 1) 120 | end 121 | -- elseif '?' == type then 122 | -- print('? ' .. name .. ' // ' .. attrs .. '$$') 123 | -- elseif '-' == type then 124 | -- print('comment ' .. name .. ' // ' .. attrs .. '$$') 125 | -- else 126 | -- print('o ' .. #p .. ' // ' .. name .. ' // ' .. attrs .. '$$') 127 | end 128 | end) 129 | 130 | return {children=t, entities=entities, tentities=tentities} 131 | end 132 | 133 | -- Return a tuple `document table, error file`. 134 | -- @param filename[in] string 135 | -- @param evalEntities[in] boolean : see \c parse() 136 | -- @return table : see parse 137 | local function parseFile(filename, evalEntities) 138 | local f, err = io.open(filename) 139 | if f then 140 | local content = f:read'*a' 141 | f:close() 142 | return parse(content, evalEntities), nil 143 | end 144 | return f, err 145 | end 146 | 147 | return { 148 | parse = parse, 149 | parseFile = parseFile, 150 | defaultEntityTable = defaultEntityTable, 151 | replaceEntities = replaceEntities, 152 | createEntityTable = createEntityTable, 153 | } 154 | --------------------------------------------------------------------------------