├── .gitignore
├── .npmignore
├── Makefile
├── Readme.md
├── benchmark
└── index.js
├── examples
├── catalog.xml
├── developerforce.xml
├── index.js
├── note.xml
└── page.xml
├── index.js
├── package.json
└── test
└── index.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | benchmark
3 | examples
4 | test
5 | Makefile
6 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | test:
3 | @./node_modules/.bin/mocha \
4 | --require should \
5 | --reporter dot \
6 | --bail
7 |
8 | bench:
9 | @./node_modules/.bin/matcha
10 |
11 | .PHONY: test bench
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | # xml-parser
3 |
4 | Simple non-compiant XML parser because we just need to parse some basic responses and libxml takes forever to compile :D you probably don't want to use this unless you also have similar needs.
5 |
6 | ## Installation
7 |
8 | ```
9 | $ npm install xml-parser
10 | ```
11 |
12 | ## Example
13 |
14 | JavaScript:
15 |
16 | ```js
17 | var fs = require('fs');
18 | var parse = require('xml-parser');
19 | var xml = fs.readFileSync('examples/developerforce.xml', 'utf8');
20 | var inspect = require('util').inspect;
21 |
22 | var obj = parse(xml);
23 | console.log(inspect(obj, { colors: true, depth: Infinity }));
24 | ```
25 |
26 | XML:
27 |
28 | ```xml
29 |
30 |
32 |
33 |
34 |
35 | 003D000000OY9omIAD
36 | true
37 |
38 |
39 | 001D000000HTK3aIAH
40 | true
41 |
42 |
43 |
44 |
45 | ```
46 |
47 | Yields:
48 |
49 | ```js
50 | { declaration: { attributes: { version: '1.0', encoding: 'utf-8' } },
51 | root:
52 | { name: 'soapenv:Envelope',
53 | attributes:
54 | { 'xmlns:soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
55 | xmlns: 'urn:enterprise.soap.sforce.com' },
56 | children:
57 | [ { name: 'soapenv:Body',
58 | attributes: {},
59 | children:
60 | [ { name: 'createResponse',
61 | attributes: {},
62 | children:
63 | [ { name: 'result',
64 | attributes: {},
65 | children:
66 | [ { name: 'id',
67 | attributes: {},
68 | children: [],
69 | content: '003D000000OY9omIAD' },
70 | { name: 'success', attributes: {}, children: [], content: 'true' } ],
71 | content: '' },
72 | { name: 'result',
73 | attributes: {},
74 | children:
75 | [ { name: 'id',
76 | attributes: {},
77 | children: [],
78 | content: '001D000000HTK3aIAH' },
79 | { name: 'success', attributes: {}, children: [], content: 'true' } ],
80 | content: '' } ],
81 | content: '' } ],
82 | content: '' } ],
83 | content: '' } }
84 | ```
85 |
86 | # License
87 |
88 | MIT
--------------------------------------------------------------------------------
/benchmark/index.js:
--------------------------------------------------------------------------------
1 |
2 | var fs = require('fs');
3 | var parse = require('..');
4 |
5 | var large = fs.readFileSync('examples/page.xml', 'utf8');
6 | var small = fs.readFileSync('examples/developerforce.xml', 'utf8');
7 |
8 | suite('parse', function(){
9 | bench('small', function(){
10 | parse(small)
11 | })
12 |
13 | bench('large', function(){
14 | parse(large)
15 | })
16 | })
17 |
--------------------------------------------------------------------------------
/examples/catalog.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Empire Burlesque
9 | Bob Dylan
10 | USA
11 | Columbia
12 | 10.90
13 | 1985
14 |
15 |
16 | Hide your heart
17 | Bonnie Tyler
18 | UK
19 | CBS Records
20 | 9.90
21 | 1988
22 |
23 |
24 | Greatest Hits
25 | Dolly Parton
26 | USA
27 | RCA
28 | 9.90
29 | 1982
30 |
31 |
32 | Still got the blues
33 | Gary Moore
34 | UK
35 | Virgin records
36 | 10.20
37 | 1990
38 |
39 |
40 | Eros
41 | Eros Ramazzotti
42 | EU
43 | BMG
44 | 9.90
45 | 1997
46 |
47 |
48 | One night only
49 | Bee Gees
50 | UK
51 | Polydor
52 | 10.90
53 | 1998
54 |
55 |
56 | Sylvias Mother
57 | Dr.Hook
58 | UK
59 | CBS
60 | 8.10
61 | 1973
62 |
63 |
64 | Maggie May
65 | Rod Stewart
66 | UK
67 | Pickwick
68 | 8.50
69 | 1990
70 |
71 |
72 | Romanza
73 | Andrea Bocelli
74 | EU
75 | Polydor
76 | 10.80
77 | 1996
78 |
79 |
80 | When a man loves a woman
81 | Percy Sledge
82 | USA
83 | Atlantic
84 | 8.70
85 | 1987
86 |
87 |
88 | Black angel
89 | Savage Rose
90 | EU
91 | Mega
92 | 10.90
93 | 1995
94 |
95 |
96 | 1999 Grammy Nominees
97 | Many
98 | USA
99 | Grammy
100 | 10.20
101 | 1999
102 |
103 |
104 | For the good times
105 | Kenny Rogers
106 | UK
107 | Mucik Master
108 | 8.70
109 | 1995
110 |
111 |
112 | Big Willie style
113 | Will Smith
114 | USA
115 | Columbia
116 | 9.90
117 | 1997
118 |
119 |
120 | Tupelo Honey
121 | Van Morrison
122 | UK
123 | Polydor
124 | 8.20
125 | 1971
126 |
127 |
128 | Soulsville
129 | Jorn Hoel
130 | Norway
131 | WEA
132 | 7.90
133 | 1996
134 |
135 |
136 | The very best of
137 | Cat Stevens
138 | UK
139 | Island
140 | 8.90
141 | 1990
142 |
143 |
144 | Stop
145 | Sam Brown
146 | UK
147 | A and M
148 | 8.90
149 | 1988
150 |
151 |
152 | Bridge of Spies
153 | T'Pau
154 | UK
155 | Siren
156 | 7.90
157 | 1987
158 |
159 |
160 | Private Dancer
161 | Tina Turner
162 | UK
163 | Capitol
164 | 8.90
165 | 1983
166 |
167 |
168 | Midt om natten
169 | Kim Larsen
170 | EU
171 | Medley
172 | 7.80
173 | 1983
174 |
175 |
176 | Pavarotti Gala Concert
177 | Luciano Pavarotti
178 | UK
179 | DECCA
180 | 9.90
181 | 1991
182 |
183 |
184 | The dock of the bay
185 | Otis Redding
186 | USA
187 | Atlantic
188 | 7.90
189 | 1987
190 |
191 |
192 | Picture book
193 | Simply Red
194 | EU
195 | Elektra
196 | 7.20
197 | 1985
198 |
199 |
200 | Red
201 | The Communards
202 | UK
203 | London
204 | 7.80
205 | 1987
206 |
207 |
208 | Unchain my heart
209 | Joe Cocker
210 | USA
211 | EMI
212 | 8.20
213 | 1987
214 |
215 |
216 |
--------------------------------------------------------------------------------
/examples/developerforce.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 | 003D000000OY9omIAD
8 | true
9 |
10 |
11 | 001D000000HTK3aIAH
12 | true
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/examples/index.js:
--------------------------------------------------------------------------------
1 |
2 | var path = process.argv[2];
3 | if (!path) throw new Error('path required');
4 |
5 | var fs = require('fs');
6 | var parse = require('..');
7 | var xml = fs.readFileSync(path, 'utf8');
8 | var inspect = require('util').inspect;
9 |
10 | var obj = parse(xml);
11 | console.log(inspect(obj, { colors: true, depth: Infinity }));
--------------------------------------------------------------------------------
/examples/note.xml:
--------------------------------------------------------------------------------
1 |
2 | Tobi
3 | Loki
4 | Reminder
5 | You're a ferret
6 |
--------------------------------------------------------------------------------
/examples/page.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | Segment.io - The ultimate analytics platform
4 |
5 |
6 |
7 |
8 |
9 |
10 |
14 |
15 |
16 |
71 |
72 |
98 |
99 |
100 | Our Customers
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
132 |
133 |
134 |
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 |
2 | /**
3 | * Module dependencies.
4 | */
5 |
6 | var debug = require('debug')('xml-parser');
7 |
8 | /**
9 | * Expose `parse`.
10 | */
11 |
12 | module.exports = parse;
13 |
14 | /**
15 | * Parse the given string of `xml`.
16 | *
17 | * @param {String} xml
18 | * @return {Object}
19 | * @api public
20 | */
21 |
22 | function parse(xml) {
23 | xml = xml.trim();
24 |
25 | // strip comments
26 | xml = xml.replace(//g, '');
27 |
28 | return document();
29 |
30 | /**
31 | * XML document.
32 | */
33 |
34 | function document() {
35 | return {
36 | declaration: declaration(),
37 | root: tag()
38 | }
39 | }
40 |
41 | /**
42 | * Declaration.
43 | */
44 |
45 | function declaration() {
46 | var m = match(/^<\?xml\s*/);
47 | if (!m) return;
48 |
49 | // tag
50 | var node = {
51 | attributes: {}
52 | };
53 |
54 | // attributes
55 | while (!(eos() || is('?>'))) {
56 | var attr = attribute();
57 | if (!attr) return node;
58 | node.attributes[attr.name] = attr.value;
59 | }
60 |
61 | match(/\?>\s*/);
62 |
63 | return node;
64 | }
65 |
66 | /**
67 | * Tag.
68 | */
69 |
70 | function tag() {
71 | debug('tag %j', xml);
72 | var m = match(/^<([\w-:.]+)\s*/);
73 | if (!m) return;
74 |
75 | // name
76 | var node = {
77 | name: m[1],
78 | attributes: {},
79 | children: []
80 | };
81 |
82 | // attributes
83 | while (!(eos() || is('>') || is('?>') || is('/>'))) {
84 | var attr = attribute();
85 | if (!attr) return node;
86 | node.attributes[attr.name] = attr.value;
87 | }
88 |
89 | // self closing tag
90 | if (match(/^\s*\/>\s*/)) {
91 | return node;
92 | }
93 |
94 | match(/\??>\s*/);
95 |
96 | // content
97 | node.content = content();
98 |
99 | // children
100 | var child;
101 | while (child = tag()) {
102 | node.children.push(child);
103 | }
104 |
105 | // closing
106 | match(/^<\/[\w-:.]+>\s*/);
107 |
108 | return node;
109 | }
110 |
111 | /**
112 | * Text content.
113 | */
114 |
115 | function content() {
116 | debug('content %j', xml);
117 | var m = match(/^([^<]*)/);
118 | if (m) return m[1];
119 | return '';
120 | }
121 |
122 | /**
123 | * Attribute.
124 | */
125 |
126 | function attribute() {
127 | debug('attribute %j', xml);
128 | var m = match(/([\w:-]+)\s*=\s*("[^"]*"|'[^']*'|\w+)\s*/);
129 | if (!m) return;
130 | return { name: m[1], value: strip(m[2]) }
131 | }
132 |
133 | /**
134 | * Strip quotes from `val`.
135 | */
136 |
137 | function strip(val) {
138 | return val.replace(/^['"]|['"]$/g, '');
139 | }
140 |
141 | /**
142 | * Match `re` and advance the string.
143 | */
144 |
145 | function match(re) {
146 | var m = xml.match(re);
147 | if (!m) return;
148 | xml = xml.slice(m[0].length);
149 | return m;
150 | }
151 |
152 | /**
153 | * End-of-source.
154 | */
155 |
156 | function eos() {
157 | return 0 == xml.length;
158 | }
159 |
160 | /**
161 | * Check for `prefix`.
162 | */
163 |
164 | function is(prefix) {
165 | return 0 == xml.indexOf(prefix);
166 | }
167 | }
168 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "xml-parser",
3 | "version": "1.2.1",
4 | "repository": "segmentio/xml-parser",
5 | "description": "the little xml parser that could",
6 | "scripts": {
7 | "test": "make test"
8 | },
9 | "keywords": [
10 | "xml",
11 | "sucks"
12 | ],
13 | "dependencies": {
14 | "debug": "^2.2.0"
15 | },
16 | "devDependencies": {
17 | "matcha": "^0.6.0",
18 | "mocha": "^2.2.5",
19 | "should": "^6.0.3"
20 | },
21 | "license": "MIT"
22 | }
--------------------------------------------------------------------------------
/test/index.js:
--------------------------------------------------------------------------------
1 |
2 | var parse = require('..');
3 | var should = require('should');
4 |
5 | it('should support blank strings', function(){
6 | var node = parse('');
7 | node.should.eql({ declaration: undefined, root: undefined });
8 | })
9 |
10 | it('should support declarations', function(){
11 | var node = parse('');
12 | node.should.eql({
13 | declaration: {
14 | attributes: {
15 | version: '1.0'
16 | }
17 | },
18 | root: undefined
19 | })
20 | })
21 |
22 | it('should support comments', function(){
23 | var node = parse('');
24 | node.root.should.eql({
25 | name: 'foo',
26 | attributes: {},
27 | children: [],
28 | content: ''
29 | });
30 | })
31 |
32 | it('should support tags', function(){
33 | var node = parse('');
34 | node.root.should.eql({
35 | name: 'foo',
36 | attributes: {},
37 | children: [],
38 | content: ''
39 | });
40 | })
41 |
42 | it('should support tags with text', function(){
43 | var node = parse('hello world');
44 | node.root.should.eql({
45 | name: 'foo',
46 | attributes: {},
47 | children: [],
48 | content: 'hello world'
49 | });
50 | })
51 |
52 | it('should support weird whitespace', function(){
53 | var node = parse('\n\nhello world\n\nfoo>');
54 | node.root.should.eql({
55 | name: 'foo',
56 | attributes: { bar: 'baz' },
57 | children: [],
58 | content: 'hello world'
59 | });
60 | })
61 |
62 | it('should support tags with attributes', function(){
63 | var node = parse('');
64 | node.root.should.eql({
65 | name: 'foo',
66 | attributes: {
67 | bar: 'baz',
68 | some: 'stuff here',
69 | whatever: 'whoop'
70 | },
71 | children: [],
72 | content: ''
73 | });
74 | })
75 |
76 | it('should support nested tags', function(){
77 | var node = parse('hello');
78 | node.root.should.eql({
79 | "name": "a",
80 | "attributes": {},
81 | "children": [
82 | {
83 | "name": "b",
84 | "attributes": {},
85 | "children": [
86 | {
87 | "name": "c",
88 | "attributes": {},
89 | "children": [],
90 | "content": "hello"
91 | }
92 | ],
93 | "content": ""
94 | }
95 | ],
96 | "content": ""
97 | })
98 | })
99 |
100 | it('should support nested tags with text', function(){
101 | var node = parse('foo bar baz');
102 | node.root.should.eql({
103 | "name": "a",
104 | "attributes": {},
105 | "children": [
106 | {
107 | "name": "b",
108 | "attributes": {},
109 | "children": [
110 | {
111 | "name": "c",
112 | "attributes": {},
113 | "children": [],
114 | "content": "baz"
115 | }
116 | ],
117 | "content": "bar "
118 | }
119 | ],
120 | "content": "foo "
121 | })
122 | })
123 |
124 | it('should support self-closing tags', function () {
125 | var node = parse('foobar');
126 | node.root.should.eql({
127 | "name": "a",
128 | "attributes": {},
129 | "children": [
130 | {
131 | "name": "b",
132 | "attributes": {},
133 | "children": [],
134 | "content": "foo"
135 | },
136 | {
137 | "name": "b",
138 | "attributes": {
139 | "a": "bar"
140 | },
141 | "children": []
142 | },
143 | {
144 | "name": "b",
145 | "attributes": {},
146 | "children": [],
147 | "content": "bar"
148 | }
149 | ],
150 | "content": ""
151 | })
152 | })
153 |
154 | it('should support self-closing tags without attributes', function () {
155 | var node = parse('foo bar');
156 | node.root.should.eql({
157 | "name": "a",
158 | "attributes": {},
159 | "children": [
160 | {
161 | "name": "b",
162 | "attributes": {},
163 | "children": [],
164 | "content": "foo"
165 | },
166 | {
167 | "name": "b",
168 | "attributes": {},
169 | "children": []
170 | },
171 | {
172 | "name": "b",
173 | "attributes": {},
174 | "children": [],
175 | "content": "bar"
176 | }
177 | ],
178 | "content": ""
179 | })
180 | })
181 |
182 | it('should support multi-line comments', function () {
183 | var node = parse('foo')
184 | node.root.should.eql({
185 | "name": "a",
186 | "attributes": {},
187 | "children": [],
188 | "content": "foo"
189 | })
190 | })
191 |
192 | it('should support attributes with a hyphen', function () {
193 | var node = parse('foo')
194 | node.root.should.eql({
195 | name: "a",
196 | attributes: {
197 | "data-bar": "baz"
198 | },
199 | children: [],
200 | content: "foo"
201 | })
202 | })
203 |
204 | it('should support tags with a dot', function () {
205 | var node = parse('');
206 | node.root.should.eql({
207 | name: "root",
208 | attributes: {},
209 | children: [{
210 | name: "c:Key.Columns",
211 | attributes: {},
212 | children: [{
213 | name: "o:Column",
214 | attributes: {
215 | Ref: "ol1"
216 | },
217 | children: []
218 | }],
219 | content: ""
220 | }, {
221 | name: "c:Key.Columns",
222 | attributes: {},
223 | children: [{
224 | name: "o:Column",
225 | attributes: {
226 | "Ref": "ol2"
227 | },
228 | children: []
229 | }],
230 | content: ""
231 | }],
232 | content: ""
233 | })
234 | })
235 |
236 | it('should support tags with hyphen', function () {
237 | var node = parse(
238 | '' +
239 | 'val1' +
240 | 'val2' +
241 | ''
242 | );
243 | node.root.should.eql({
244 | name: 'root',
245 | attributes: {},
246 | content: '',
247 | children: [
248 | {
249 | name: 'data-field1',
250 | attributes: {},
251 | children: [],
252 | content: 'val1'
253 | },
254 | {
255 | name: 'data-field2',
256 | attributes: {},
257 | children: [],
258 | content: 'val2'
259 | }
260 | ]
261 | });
262 | });
263 |
--------------------------------------------------------------------------------