├── .gitignore ├── .npmignore ├── Makefile ├── Readme.md ├── benchmark └── index.js ├── examples ├── catalog.xml ├── developerforce.xml ├── index.js ├── note.xml └── page.xml ├── index.js ├── package.json └── test └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | benchmark 3 | examples 4 | test 5 | Makefile 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | test: 3 | @./node_modules/.bin/mocha \ 4 | --require should \ 5 | --reporter dot \ 6 | --bail 7 | 8 | bench: 9 | @./node_modules/.bin/matcha 10 | 11 | .PHONY: test bench -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | # xml-parser 3 | 4 | Simple non-compiant XML parser because we just need to parse some basic responses and libxml takes forever to compile :D you probably don't want to use this unless you also have similar needs. 5 | 6 | ## Installation 7 | 8 | ``` 9 | $ npm install xml-parser 10 | ``` 11 | 12 | ## Example 13 | 14 | JavaScript: 15 | 16 | ```js 17 | var fs = require('fs'); 18 | var parse = require('xml-parser'); 19 | var xml = fs.readFileSync('examples/developerforce.xml', 'utf8'); 20 | var inspect = require('util').inspect; 21 | 22 | var obj = parse(xml); 23 | console.log(inspect(obj, { colors: true, depth: Infinity })); 24 | ``` 25 | 26 | XML: 27 | 28 | ```xml 29 | 30 | 32 | 33 | 34 | 35 | 003D000000OY9omIAD 36 | true 37 | 38 | 39 | 001D000000HTK3aIAH 40 | true 41 | 42 | 43 | 44 | 45 | ``` 46 | 47 | Yields: 48 | 49 | ```js 50 | { declaration: { attributes: { version: '1.0', encoding: 'utf-8' } }, 51 | root: 52 | { name: 'soapenv:Envelope', 53 | attributes: 54 | { 'xmlns:soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', 55 | xmlns: 'urn:enterprise.soap.sforce.com' }, 56 | children: 57 | [ { name: 'soapenv:Body', 58 | attributes: {}, 59 | children: 60 | [ { name: 'createResponse', 61 | attributes: {}, 62 | children: 63 | [ { name: 'result', 64 | attributes: {}, 65 | children: 66 | [ { name: 'id', 67 | attributes: {}, 68 | children: [], 69 | content: '003D000000OY9omIAD' }, 70 | { name: 'success', attributes: {}, children: [], content: 'true' } ], 71 | content: '' }, 72 | { name: 'result', 73 | attributes: {}, 74 | children: 75 | [ { name: 'id', 76 | attributes: {}, 77 | children: [], 78 | content: '001D000000HTK3aIAH' }, 79 | { name: 'success', attributes: {}, children: [], content: 'true' } ], 80 | content: '' } ], 81 | content: '' } ], 82 | content: '' } ], 83 | content: '' } } 84 | ``` 85 | 86 | # License 87 | 88 | MIT -------------------------------------------------------------------------------- /benchmark/index.js: -------------------------------------------------------------------------------- 1 | 2 | var fs = require('fs'); 3 | var parse = require('..'); 4 | 5 | var large = fs.readFileSync('examples/page.xml', 'utf8'); 6 | var small = fs.readFileSync('examples/developerforce.xml', 'utf8'); 7 | 8 | suite('parse', function(){ 9 | bench('small', function(){ 10 | parse(small) 11 | }) 12 | 13 | bench('large', function(){ 14 | parse(large) 15 | }) 16 | }) 17 | -------------------------------------------------------------------------------- /examples/catalog.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Empire Burlesque 9 | Bob Dylan 10 | USA 11 | Columbia 12 | 10.90 13 | 1985 14 | 15 | 16 | Hide your heart 17 | Bonnie Tyler 18 | UK 19 | CBS Records 20 | 9.90 21 | 1988 22 | 23 | 24 | Greatest Hits 25 | Dolly Parton 26 | USA 27 | RCA 28 | 9.90 29 | 1982 30 | 31 | 32 | Still got the blues 33 | Gary Moore 34 | UK 35 | Virgin records 36 | 10.20 37 | 1990 38 | 39 | 40 | Eros 41 | Eros Ramazzotti 42 | EU 43 | BMG 44 | 9.90 45 | 1997 46 | 47 | 48 | One night only 49 | Bee Gees 50 | UK 51 | Polydor 52 | 10.90 53 | 1998 54 | 55 | 56 | Sylvias Mother 57 | Dr.Hook 58 | UK 59 | CBS 60 | 8.10 61 | 1973 62 | 63 | 64 | Maggie May 65 | Rod Stewart 66 | UK 67 | Pickwick 68 | 8.50 69 | 1990 70 | 71 | 72 | Romanza 73 | Andrea Bocelli 74 | EU 75 | Polydor 76 | 10.80 77 | 1996 78 | 79 | 80 | When a man loves a woman 81 | Percy Sledge 82 | USA 83 | Atlantic 84 | 8.70 85 | 1987 86 | 87 | 88 | Black angel 89 | Savage Rose 90 | EU 91 | Mega 92 | 10.90 93 | 1995 94 | 95 | 96 | 1999 Grammy Nominees 97 | Many 98 | USA 99 | Grammy 100 | 10.20 101 | 1999 102 | 103 | 104 | For the good times 105 | Kenny Rogers 106 | UK 107 | Mucik Master 108 | 8.70 109 | 1995 110 | 111 | 112 | Big Willie style 113 | Will Smith 114 | USA 115 | Columbia 116 | 9.90 117 | 1997 118 | 119 | 120 | Tupelo Honey 121 | Van Morrison 122 | UK 123 | Polydor 124 | 8.20 125 | 1971 126 | 127 | 128 | Soulsville 129 | Jorn Hoel 130 | Norway 131 | WEA 132 | 7.90 133 | 1996 134 | 135 | 136 | The very best of 137 | Cat Stevens 138 | UK 139 | Island 140 | 8.90 141 | 1990 142 | 143 | 144 | Stop 145 | Sam Brown 146 | UK 147 | A and M 148 | 8.90 149 | 1988 150 | 151 | 152 | Bridge of Spies 153 | T'Pau 154 | UK 155 | Siren 156 | 7.90 157 | 1987 158 | 159 | 160 | Private Dancer 161 | Tina Turner 162 | UK 163 | Capitol 164 | 8.90 165 | 1983 166 | 167 | 168 | Midt om natten 169 | Kim Larsen 170 | EU 171 | Medley 172 | 7.80 173 | 1983 174 | 175 | 176 | Pavarotti Gala Concert 177 | Luciano Pavarotti 178 | UK 179 | DECCA 180 | 9.90 181 | 1991 182 | 183 | 184 | The dock of the bay 185 | Otis Redding 186 | USA 187 | Atlantic 188 | 7.90 189 | 1987 190 | 191 | 192 | Picture book 193 | Simply Red 194 | EU 195 | Elektra 196 | 7.20 197 | 1985 198 | 199 | 200 | Red 201 | The Communards 202 | UK 203 | London 204 | 7.80 205 | 1987 206 | 207 | 208 | Unchain my heart 209 | Joe Cocker 210 | USA 211 | EMI 212 | 8.20 213 | 1987 214 | 215 | 216 | -------------------------------------------------------------------------------- /examples/developerforce.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 003D000000OY9omIAD 8 | true 9 | 10 | 11 | 001D000000HTK3aIAH 12 | true 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/index.js: -------------------------------------------------------------------------------- 1 | 2 | var path = process.argv[2]; 3 | if (!path) throw new Error('path required'); 4 | 5 | var fs = require('fs'); 6 | var parse = require('..'); 7 | var xml = fs.readFileSync(path, 'utf8'); 8 | var inspect = require('util').inspect; 9 | 10 | var obj = parse(xml); 11 | console.log(inspect(obj, { colors: true, depth: Infinity })); -------------------------------------------------------------------------------- /examples/note.xml: -------------------------------------------------------------------------------- 1 | 2 | Tobi 3 | Loki 4 | Reminder 5 | You're a ferret 6 | -------------------------------------------------------------------------------- /examples/page.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Segment.io - The ultimate analytics platform 4 | 5 | 6 | 7 | 8 | 9 | 10 | 14 | 15 | 16 | 71 |
72 |
73 | 74 | 75 |
76 |

Integrate instantly.

77 |

Integrate analytics tools in seconds, not days. No more setup headaches, no need to push code.

78 | See all integrations 79 |
80 |
81 | 82 | 83 |
84 |

No data lock-in.

85 |

We back up your data so that you can search it or export it. Never be locked-in to your tools again.

86 | View features 87 |
88 |
89 | 90 | 91 |
92 |

For web & mobile.

93 |

We give you simple, clean libraries for your mobile apps, your websites and even your servers.

94 | View libraries 95 |
96 |
97 |
98 |
99 |

100 | Our Customers 101 |

102 |
103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 |
128 | 132 |
133 |
134 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Module dependencies. 4 | */ 5 | 6 | var debug = require('debug')('xml-parser'); 7 | 8 | /** 9 | * Expose `parse`. 10 | */ 11 | 12 | module.exports = parse; 13 | 14 | /** 15 | * Parse the given string of `xml`. 16 | * 17 | * @param {String} xml 18 | * @return {Object} 19 | * @api public 20 | */ 21 | 22 | function parse(xml) { 23 | xml = xml.trim(); 24 | 25 | // strip comments 26 | xml = xml.replace(//g, ''); 27 | 28 | return document(); 29 | 30 | /** 31 | * XML document. 32 | */ 33 | 34 | function document() { 35 | return { 36 | declaration: declaration(), 37 | root: tag() 38 | } 39 | } 40 | 41 | /** 42 | * Declaration. 43 | */ 44 | 45 | function declaration() { 46 | var m = match(/^<\?xml\s*/); 47 | if (!m) return; 48 | 49 | // tag 50 | var node = { 51 | attributes: {} 52 | }; 53 | 54 | // attributes 55 | while (!(eos() || is('?>'))) { 56 | var attr = attribute(); 57 | if (!attr) return node; 58 | node.attributes[attr.name] = attr.value; 59 | } 60 | 61 | match(/\?>\s*/); 62 | 63 | return node; 64 | } 65 | 66 | /** 67 | * Tag. 68 | */ 69 | 70 | function tag() { 71 | debug('tag %j', xml); 72 | var m = match(/^<([\w-:.]+)\s*/); 73 | if (!m) return; 74 | 75 | // name 76 | var node = { 77 | name: m[1], 78 | attributes: {}, 79 | children: [] 80 | }; 81 | 82 | // attributes 83 | while (!(eos() || is('>') || is('?>') || is('/>'))) { 84 | var attr = attribute(); 85 | if (!attr) return node; 86 | node.attributes[attr.name] = attr.value; 87 | } 88 | 89 | // self closing tag 90 | if (match(/^\s*\/>\s*/)) { 91 | return node; 92 | } 93 | 94 | match(/\??>\s*/); 95 | 96 | // content 97 | node.content = content(); 98 | 99 | // children 100 | var child; 101 | while (child = tag()) { 102 | node.children.push(child); 103 | } 104 | 105 | // closing 106 | match(/^<\/[\w-:.]+>\s*/); 107 | 108 | return node; 109 | } 110 | 111 | /** 112 | * Text content. 113 | */ 114 | 115 | function content() { 116 | debug('content %j', xml); 117 | var m = match(/^([^<]*)/); 118 | if (m) return m[1]; 119 | return ''; 120 | } 121 | 122 | /** 123 | * Attribute. 124 | */ 125 | 126 | function attribute() { 127 | debug('attribute %j', xml); 128 | var m = match(/([\w:-]+)\s*=\s*("[^"]*"|'[^']*'|\w+)\s*/); 129 | if (!m) return; 130 | return { name: m[1], value: strip(m[2]) } 131 | } 132 | 133 | /** 134 | * Strip quotes from `val`. 135 | */ 136 | 137 | function strip(val) { 138 | return val.replace(/^['"]|['"]$/g, ''); 139 | } 140 | 141 | /** 142 | * Match `re` and advance the string. 143 | */ 144 | 145 | function match(re) { 146 | var m = xml.match(re); 147 | if (!m) return; 148 | xml = xml.slice(m[0].length); 149 | return m; 150 | } 151 | 152 | /** 153 | * End-of-source. 154 | */ 155 | 156 | function eos() { 157 | return 0 == xml.length; 158 | } 159 | 160 | /** 161 | * Check for `prefix`. 162 | */ 163 | 164 | function is(prefix) { 165 | return 0 == xml.indexOf(prefix); 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xml-parser", 3 | "version": "1.2.1", 4 | "repository": "segmentio/xml-parser", 5 | "description": "the little xml parser that could", 6 | "scripts": { 7 | "test": "make test" 8 | }, 9 | "keywords": [ 10 | "xml", 11 | "sucks" 12 | ], 13 | "dependencies": { 14 | "debug": "^2.2.0" 15 | }, 16 | "devDependencies": { 17 | "matcha": "^0.6.0", 18 | "mocha": "^2.2.5", 19 | "should": "^6.0.3" 20 | }, 21 | "license": "MIT" 22 | } -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | 2 | var parse = require('..'); 3 | var should = require('should'); 4 | 5 | it('should support blank strings', function(){ 6 | var node = parse(''); 7 | node.should.eql({ declaration: undefined, root: undefined }); 8 | }) 9 | 10 | it('should support declarations', function(){ 11 | var node = parse(''); 12 | node.should.eql({ 13 | declaration: { 14 | attributes: { 15 | version: '1.0' 16 | } 17 | }, 18 | root: undefined 19 | }) 20 | }) 21 | 22 | it('should support comments', function(){ 23 | var node = parse(''); 24 | node.root.should.eql({ 25 | name: 'foo', 26 | attributes: {}, 27 | children: [], 28 | content: '' 29 | }); 30 | }) 31 | 32 | it('should support tags', function(){ 33 | var node = parse(''); 34 | node.root.should.eql({ 35 | name: 'foo', 36 | attributes: {}, 37 | children: [], 38 | content: '' 39 | }); 40 | }) 41 | 42 | it('should support tags with text', function(){ 43 | var node = parse('hello world'); 44 | node.root.should.eql({ 45 | name: 'foo', 46 | attributes: {}, 47 | children: [], 48 | content: 'hello world' 49 | }); 50 | }) 51 | 52 | it('should support weird whitespace', function(){ 53 | var node = parse('\n\nhello world'); 54 | node.root.should.eql({ 55 | name: 'foo', 56 | attributes: { bar: 'baz' }, 57 | children: [], 58 | content: 'hello world' 59 | }); 60 | }) 61 | 62 | it('should support tags with attributes', function(){ 63 | var node = parse(''); 64 | node.root.should.eql({ 65 | name: 'foo', 66 | attributes: { 67 | bar: 'baz', 68 | some: 'stuff here', 69 | whatever: 'whoop' 70 | }, 71 | children: [], 72 | content: '' 73 | }); 74 | }) 75 | 76 | it('should support nested tags', function(){ 77 | var node = parse('hello'); 78 | node.root.should.eql({ 79 | "name": "a", 80 | "attributes": {}, 81 | "children": [ 82 | { 83 | "name": "b", 84 | "attributes": {}, 85 | "children": [ 86 | { 87 | "name": "c", 88 | "attributes": {}, 89 | "children": [], 90 | "content": "hello" 91 | } 92 | ], 93 | "content": "" 94 | } 95 | ], 96 | "content": "" 97 | }) 98 | }) 99 | 100 | it('should support nested tags with text', function(){ 101 | var node = parse('foo bar baz'); 102 | node.root.should.eql({ 103 | "name": "a", 104 | "attributes": {}, 105 | "children": [ 106 | { 107 | "name": "b", 108 | "attributes": {}, 109 | "children": [ 110 | { 111 | "name": "c", 112 | "attributes": {}, 113 | "children": [], 114 | "content": "baz" 115 | } 116 | ], 117 | "content": "bar " 118 | } 119 | ], 120 | "content": "foo " 121 | }) 122 | }) 123 | 124 | it('should support self-closing tags', function () { 125 | var node = parse('foobar'); 126 | node.root.should.eql({ 127 | "name": "a", 128 | "attributes": {}, 129 | "children": [ 130 | { 131 | "name": "b", 132 | "attributes": {}, 133 | "children": [], 134 | "content": "foo" 135 | }, 136 | { 137 | "name": "b", 138 | "attributes": { 139 | "a": "bar" 140 | }, 141 | "children": [] 142 | }, 143 | { 144 | "name": "b", 145 | "attributes": {}, 146 | "children": [], 147 | "content": "bar" 148 | } 149 | ], 150 | "content": "" 151 | }) 152 | }) 153 | 154 | it('should support self-closing tags without attributes', function () { 155 | var node = parse('foo bar'); 156 | node.root.should.eql({ 157 | "name": "a", 158 | "attributes": {}, 159 | "children": [ 160 | { 161 | "name": "b", 162 | "attributes": {}, 163 | "children": [], 164 | "content": "foo" 165 | }, 166 | { 167 | "name": "b", 168 | "attributes": {}, 169 | "children": [] 170 | }, 171 | { 172 | "name": "b", 173 | "attributes": {}, 174 | "children": [], 175 | "content": "bar" 176 | } 177 | ], 178 | "content": "" 179 | }) 180 | }) 181 | 182 | it('should support multi-line comments', function () { 183 | var node = parse('foo') 184 | node.root.should.eql({ 185 | "name": "a", 186 | "attributes": {}, 187 | "children": [], 188 | "content": "foo" 189 | }) 190 | }) 191 | 192 | it('should support attributes with a hyphen', function () { 193 | var node = parse('foo') 194 | node.root.should.eql({ 195 | name: "a", 196 | attributes: { 197 | "data-bar": "baz" 198 | }, 199 | children: [], 200 | content: "foo" 201 | }) 202 | }) 203 | 204 | it('should support tags with a dot', function () { 205 | var node = parse(''); 206 | node.root.should.eql({ 207 | name: "root", 208 | attributes: {}, 209 | children: [{ 210 | name: "c:Key.Columns", 211 | attributes: {}, 212 | children: [{ 213 | name: "o:Column", 214 | attributes: { 215 | Ref: "ol1" 216 | }, 217 | children: [] 218 | }], 219 | content: "" 220 | }, { 221 | name: "c:Key.Columns", 222 | attributes: {}, 223 | children: [{ 224 | name: "o:Column", 225 | attributes: { 226 | "Ref": "ol2" 227 | }, 228 | children: [] 229 | }], 230 | content: "" 231 | }], 232 | content: "" 233 | }) 234 | }) 235 | 236 | it('should support tags with hyphen', function () { 237 | var node = parse( 238 | '' + 239 | 'val1' + 240 | 'val2' + 241 | '' 242 | ); 243 | node.root.should.eql({ 244 | name: 'root', 245 | attributes: {}, 246 | content: '', 247 | children: [ 248 | { 249 | name: 'data-field1', 250 | attributes: {}, 251 | children: [], 252 | content: 'val1' 253 | }, 254 | { 255 | name: 'data-field2', 256 | attributes: {}, 257 | children: [], 258 | content: 'val2' 259 | } 260 | ] 261 | }); 262 | }); 263 | --------------------------------------------------------------------------------