├── LICENSE
├── README
├── example.js
├── node-rss.js
├── node-xml.js
└── nodeblogs.com.feed.xml


/LICENSE:
--------------------------------------------------------------------------------
 1 | ----------------------------------------------------------------------
 2 | node-rss is released under the MIT License
 3 | 
 4 | Copyright (c) 2010 Rob Searles - http://www.robsearles.com
 5 | 
 6 | Permission is hereby granted, free of charge, to any person
 7 | obtaining a copy of this software and associated documentation
 8 | files (the "Software"), to deal in the Software without
 9 | restriction, including without limitation the rights to use,
10 | copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the
12 | Software is furnished to do so, subject to the following
13 | conditions:
14 | 
15 | The above copyright notice and this permission notice shall be
16 | included in all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | OTHER DEALINGS IN THE SOFTWARE.
26 | ----------------------------------------------------------------------
27 | node-xml, which node-rss makes heavy usage is also released under the
28 | MIT License - see http://github.com/robrighter/node-xml for more info
29 | ----------------------------------------------------------------------
30 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | ----------------------------------------------------------------------
 2 | node-rss - an RSS parser for node.
 3 | http://github.com/ibrow/node-rss
 4 | Rob Searles - http://www.robsearles.com
 5 | ----------------------------------------------------------------------
 6 | node-rss makes heavy use of the node-xml module written by 
 7 | Rob Righter - @robrighter
 8 | http://github.com/robrighter/node-xml
 9 | ----------------------------------------------------------------------
10 | node-rss is released under the MIT licence
11 | ----------------------------------------------------------------------
12 | See examples.js for working examples of node-rss
13 | 
14 | ----------------------------------------------------------------------
15 | TODO
16 | ----------------------------------------------------------------------
17 | Lots, mainly:
18 |  - error checking
19 |  - writing tests
20 |  - make parsing mode robust
21 |  - conform to all specifications
22 | 
23 | ----------------------------------------------------------------------
24 | HISTORY
25 | ----------------------------------------------------------------------
26 | 21 May 2010
27 | Initial release, working on v0.1.95-17-g1036aa9
28 | ----------------------------------------------------------------------
29 | 
30 | ----------------------------------------------------------------------
31 | REFERENCE
32 | ----------------------------------------------------------------------
33 | RSS 2.0 specification
34 | http://cyber.law.harvard.edu/rss/rss.html
35 | 
36 | RSS 1.0 specification
37 | http://web.resource.org/rss/1.0/spec
38 | 
39 | Atom 1.0 specification
40 | http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html


--------------------------------------------------------------------------------
/example.js:
--------------------------------------------------------------------------------
 1 | /**********************************************************************
 2 | example.js
 3 | Example of the node-rss feed parser
 4 | 
 5 | **********************************************************************/
 6 | var sys = require('sys');
 7 | var rss = require('./node-rss');
 8 | 
 9 | 
10 | /**********************************************************************
11 | Example One:
12 | Getting a remote RSS feed and parsing
13 | rss.parseURL(feed_url, use_excerpt, callback);
14 | **********************************************************************/
15 | // URL of the feed you want to parse
16 | var feed_url = 'http://feeds.feedburner.com/github';
17 | 
18 | var response = rss.parseURL(feed_url, function(articles) {
19 |     sys.puts(articles.length);
20 |     for(i=0; i<articles.length; i++) {
21 | 	sys.puts("Article: "+i+", "+
22 | 		 articles[i].title+"\n"+
23 | 		 articles[i].link+"\n"+
24 | 		 articles[i].description+"\n"+
25 | 		 articles[i].content
26 | 		);
27 |     }
28 | });
29 | 
30 | /**********************************************************************
31 | Example Two:
32 | Getting a local RSS feed and parsing
33 | rss.parseFile(feed_file, use_excerpt, callback);
34 | **********************************************************************/
35 | var response = rss.parseFile('nodeblogs.com.feed.xml', function(articles) {
36 |     sys.puts(articles.length);
37 |     for(i=0; i<articles.length; i++) {
38 | 	sys.puts("Article: "+i+", "+
39 | 		 articles[i].title+"\n"+
40 | 		 articles[i].link+"\n"+
41 | 		 articles[i].description+"\n"+
42 | 		 articles[i].content
43 | 		);
44 |     }
45 | });


--------------------------------------------------------------------------------
/node-rss.js:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 |  node-rss - an RSS parser for node.
  3 |  http://github.com/ibrow/node-rss
  4 | 
  5 |  Copyright (c) 2010 Rob Searles
  6 |  http://www.robsearles.com
  7 |  
  8 |  node-rss is released under the MIT license
  9 |   - see LICENSE for more info
 10 | 
 11 |  *********************************************************************
 12 |  node-rss makes heavy use of the node-xml module written by 
 13 |  Rob Righter - @robrighter
 14 |  http://github.com/robrighter/node-xml
 15 | **********************************************************************/
 16 | var sys = require('sys'), http = require('http');
 17 | var xml = require("./node-xml");
 18 | 
 19 | // variable for holding the callback function which is passed to the
 20 | // exported function. This callback is passed the articles array
 21 | var callback = function() {};
 22 | 
 23 | // The main "meat" of this module - parses an rss feed and triggers
 24 | // the callback when done.
 25 | // using node-xml: http://github.com/robrighter/node-xml
 26 | var parser = new xml.SaxParser(function(cb) {
 27 |     var articles = Array();
 28 |     var current_element = false;
 29 |     var article_count = 0;
 30 |     var in_item = false;
 31 |     var current_chars = '';
 32 | 
 33 | 
 34 |     cb.onStartDocument(function() { });
 35 | 
 36 |     // when finished parsing the RSS feed, trigger the callback
 37 |     cb.onEndDocument(function() {	    
 38 | 	callback(articles);
 39 |     });
 40 | 
 41 | 
 42 |     //track what element we are currently in. If it is an <item> this is
 43 |     // an article, add container array to the list of articles
 44 |     cb.onStartElementNS(function(elem, attrs, prefix, uri, namespaces) {
 45 | 	current_element = elem.toLowerCase();
 46 | 	if(current_element == 'item' || current_element == 'entry') {
 47 | 	    in_item = true;
 48 | 	    articles[article_count] = Array();
 49 | 	}
 50 |     });
 51 |     // when we are at the end of an element, save its related content
 52 |     cb.onEndElementNS(function(elem, prefix, uri) {
 53 | 	if(in_item) {
 54 | 	    switch(current_element) 
 55 | 	    {
 56 | 	    case 'description':
 57 | 	    case 'summary':
 58 | 		articles[article_count][current_element] = current_chars.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
 59 | 		break;
 60 | 	    case 'content':
 61 | 	    case 'encoded': // feedburner is <content:encoded>, node-xml reads as <encoded>
 62 | 		current_element = 'content';
 63 | 		articles[article_count][current_element] = current_chars.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
 64 | 		break;
 65 | 	    case 'link':
 66 | 	    case 'title':
 67 | 		articles[article_count][current_element] = current_chars.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
 68 | 		break;
 69 | 	    }
 70 | 	    
 71 | 	    current_element = false;
 72 | 	    current_chars = '';
 73 | 	    if(elem.toLowerCase() == 'item' || elem.toString() == 'entry') {
 74 | 		in_item = false;
 75 | 		article_count ++;   
 76 | 	    }
 77 | 	}
 78 |     });
 79 |     
 80 |     cb.onCharacters(addContent);
 81 |     cb.onCdata(addContent);
 82 |     function addContent(chars) {
 83 | 	if(in_item) {
 84 | 	    current_chars += chars;
 85 | 	}
 86 |     };
 87 | 
 88 |     // @TODO handle warnings and errors properly
 89 |     cb.onWarning(function(msg) {
 90 | 	sys.puts('<WARNING>'+msg+"</WARNING>");
 91 |     });
 92 |     cb.onError(function(msg) {
 93 | 	sys.puts('<ERROR>'+JSON.stringify(msg)+"</ERROR>");
 94 |     });
 95 | });
 96 | 
 97 | 
 98 | /**
 99 |  * parseFile()
100 |  * Parses an RSS feed from a file. 
101 |  * @param file - path to the RSS feed file
102 |  * @param cb - callback function to be triggered at end of parsing
103 |  */
104 | exports.parseFile = function(file, cb) {
105 |     callback = cb;
106 |     parser.parseFile(file);
107 | }
108 | /**
109 |  * parseURL()
110 |  * Parses an RSS feed from a URL. 
111 |  * @param url - URL of the RSS feed file
112 |  * @param cb - callback function to be triggered at end of parsing
113 |  *
114 |  * @TODO - decent error checking
115 |  */
116 | exports.parseURL = function(url, cb) {
117 |     callback = cb;
118 | 
119 |     get_rss(url);
120 |     function get_rss(url) {
121 | 	var u = require('url'), http = require('http');
122 | 	var parts = u.parse(url);
123 | 	//sys.puts(JSON.stringify(parts));
124 | 
125 | 	// set the default port to 80
126 | 	if(!parts.port) { parts.port = 80; }
127 | 	
128 | 
129 | 	var redirection_level = 0;
130 |        	var client = http.createClient(parts.port, parts.hostname);
131 | 	var request = client.request('GET', parts.pathname, {'host': parts.hostname});
132 | 	request.addListener('response', function (response) {
133 | 	    //sys.puts('STATUS: ' + response.statusCode);
134 | 	    //sys.puts('HEADERS: ' + JSON.stringify(response.headers));
135 | 
136 | 	    // check to see the type of status
137 | 	    switch(response.statusCode) {
138 | 		// check for ALL OK
139 | 	    case 200:
140 | 		var body = ''; 
141 | 		response.addListener('data', function (chunk) {
142 | 		    body += chunk;
143 | 		});
144 | 		response.addListener('end', function() {
145 | 		    parser.parseString(body);
146 | 		});
147 | 		break;
148 | 		// redirect status returned
149 | 	    case 301:
150 | 	    case 302:
151 | 		if(redirection_level > 10) {
152 | 		    sys.puts("too many redirects");
153 | 		}
154 | 		else {
155 | 		    sys.puts("redirect to "+response.headers.location);
156 | 		    get_rss(response.headers.location);
157 | 		}
158 | 		break;
159 | 	    default:
160 | 		/*
161 | 		response.setEncoding('utf8');
162 | 		response.addListener('data', function (chunk) {
163 | 		    //sys.puts('BODY: ' + chunk);
164 | 		});
165 | */
166 | 		break;
167 | 	    }	  
168 | 	});
169 | 	request.end();	
170 |     }
171 | };


--------------------------------------------------------------------------------
/node-xml.js:
--------------------------------------------------------------------------------
   1 | // node-xml
   2 | // An xml parser for node.js
   3 | // (C) Rob Righter (@robrighter) 2009 - 2010, Licensed under the MIT-LICENSE
   4 | // Contributions from David Joham
   5 | 
   6 | 
   7 | (function () {
   8 | 
   9 | // CONSTANTS
  10 | var whitespace = "\n\r\t ";
  11 | 
  12 | 
  13 | //XMLP is a pull-based parser. The calling application passes in a XML string
  14 | //to the constructor, then repeatedly calls .next() to parse the next segment.
  15 | //.next() returns a flag indicating what type of segment was found, and stores
  16 | //data temporarily in couple member variables (name, content, array of
  17 | //attributes), which can be accessed by several .get____() methods.
  18 | //
  19 | //Basically, XMLP is the lowest common denominator parser - an very simple
  20 | //API which other wrappers can be built against.
  21 | 
  22 | 
  23 | var XMLP = function(strXML) {
  24 |     // Normalize line breaks
  25 |     strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n");
  26 |     strXML = SAXStrings.replace(strXML, null, null, "\r", "\n");
  27 | 
  28 |     this.m_xml = strXML;
  29 |     this.m_iP = 0;
  30 |     this.m_iState = XMLP._STATE_PROLOG;
  31 |     this.m_stack = new Stack();
  32 |     this._clearAttributes();
  33 |     this.m_pause = false;
  34 |     this.m_preInterruptIState = XMLP._STATE_PROLOG;
  35 |     this.m_namespaceList = new Array();
  36 |     this.m_chunkTransitionContinuation = null;
  37 | 
  38 | }
  39 | 
  40 | 
  41 | // CONSTANTS    (these must be below the constructor)
  42 | XMLP._NONE    = 0;
  43 | XMLP._ELM_B   = 1;
  44 | XMLP._ELM_E   = 2;
  45 | XMLP._ELM_EMP = 3;
  46 | XMLP._ATT     = 4;
  47 | XMLP._TEXT    = 5;
  48 | XMLP._ENTITY  = 6;
  49 | XMLP._PI      = 7;
  50 | XMLP._CDATA   = 8;
  51 | XMLP._COMMENT = 9;
  52 | XMLP._DTD     = 10;
  53 | XMLP._ERROR   = 11;
  54 | XMLP._INTERRUPT = 12;
  55 | 
  56 | XMLP._CONT_XML = 0;
  57 | XMLP._CONT_ALT = 1;
  58 | 
  59 | XMLP._ATT_NAME = 0;
  60 | XMLP._ATT_VAL  = 1;
  61 | 
  62 | XMLP._STATE_PROLOG = 1;
  63 | XMLP._STATE_DOCUMENT = 2;
  64 | XMLP._STATE_MISC = 3;
  65 | 
  66 | XMLP._errs = new Array();
  67 | XMLP._errs[XMLP.ERR_CLOSE_PI       = 0 ] = "PI: missing closing sequence";
  68 | XMLP._errs[XMLP.ERR_CLOSE_DTD      = 1 ] = "DTD: missing closing sequence";
  69 | XMLP._errs[XMLP.ERR_CLOSE_COMMENT  = 2 ] = "Comment: missing closing sequence";
  70 | XMLP._errs[XMLP.ERR_CLOSE_CDATA    = 3 ] = "CDATA: missing closing sequence";
  71 | XMLP._errs[XMLP.ERR_CLOSE_ELM      = 4 ] = "Element: missing closing sequence";
  72 | XMLP._errs[XMLP.ERR_CLOSE_ENTITY   = 5 ] = "Entity: missing closing sequence";
  73 | XMLP._errs[XMLP.ERR_PI_TARGET      = 6 ] = "PI: target is required";
  74 | XMLP._errs[XMLP.ERR_ELM_EMPTY      = 7 ] = "Element: cannot be both empty and closing";
  75 | XMLP._errs[XMLP.ERR_ELM_NAME       = 8 ] = "Element: name must immediatly follow \"<\"";
  76 | XMLP._errs[XMLP.ERR_ELM_LT_NAME    = 9 ] = "Element: \"<\" not allowed in element names";
  77 | XMLP._errs[XMLP.ERR_ATT_VALUES     = 10] = "Attribute: values are required and must be in quotes";
  78 | XMLP._errs[XMLP.ERR_ATT_LT_NAME    = 11] = "Element: \"<\" not allowed in attribute names";
  79 | XMLP._errs[XMLP.ERR_ATT_LT_VALUE   = 12] = "Attribute: \"<\" not allowed in attribute values";
  80 | XMLP._errs[XMLP.ERR_ATT_DUP        = 13] = "Attribute: duplicate attributes not allowed";
  81 | XMLP._errs[XMLP.ERR_ENTITY_UNKNOWN = 14] = "Entity: unknown entity";
  82 | XMLP._errs[XMLP.ERR_INFINITELOOP   = 15] = "Infininte loop";
  83 | XMLP._errs[XMLP.ERR_DOC_STRUCTURE  = 16] = "Document: only comments, processing instructions, or whitespace allowed outside of document element";
  84 | XMLP._errs[XMLP.ERR_ELM_NESTING    = 17] = "Element: must be nested correctly";
  85 | 
  86 | 
  87 | 
  88 | XMLP.prototype.continueParsing = function(strXML) {
  89 | 
  90 |     if(this.m_chunkTransitionContinuation){
  91 |         strXML = this.m_chunkTransitionContinuation + strXML;
  92 |     }
  93 |     // Normalize line breaks
  94 |     strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n");
  95 |     strXML = SAXStrings.replace(strXML, null, null, "\r", "\n");
  96 | 
  97 |     this.m_xml = strXML;
  98 |     this.m_iP = 0;
  99 |     this.m_iState = XMLP._STATE_DOCUMENT;
 100 |     //this.m_stack = new Stack();
 101 |     //this._clearAttributes();
 102 |     this.m_pause = false;
 103 |     this.m_preInterruptIState = XMLP._STATE_PROLOG;
 104 |     this.m_chunkTransitionContinuation = null;
 105 | 
 106 | }
 107 | 
 108 | XMLP.prototype._addAttribute = function(name, value) {
 109 |     this.m_atts[this.m_atts.length] = new Array(name, value);
 110 | }
 111 | 
 112 | XMLP.prototype._checkStructure = function(iEvent) {
 113 | 	if(XMLP._STATE_PROLOG == this.m_iState) {
 114 | 		if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) {
 115 |             if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) {
 116 | 				return this._setErr(XMLP.ERR_DOC_STRUCTURE);
 117 |             }
 118 |         }
 119 | 
 120 |         if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) {
 121 |             this.m_iState = XMLP._STATE_DOCUMENT;
 122 |             // Don't return - fall through to next state
 123 |         }
 124 |     }
 125 |     if(XMLP._STATE_DOCUMENT == this.m_iState) {
 126 |         if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) {
 127 |             this.m_stack.push(this.getName());
 128 |         }
 129 | 
 130 |         if((XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent)) {
 131 |             var strTop = this.m_stack.pop();
 132 |             if((strTop == null) || (strTop != this.getName())) {
 133 |                 return this._setErr(XMLP.ERR_ELM_NESTING);
 134 |             }
 135 |         }
 136 | 
 137 |         if(this.m_stack.count() == 0) {
 138 |             this.m_iState = XMLP._STATE_MISC;
 139 |             return iEvent;
 140 |         }
 141 |     }
 142 |     if(XMLP._STATE_MISC == this.m_iState) {
 143 | 		if((XMLP._ELM_B == iEvent) || (XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent) || (XMLP.EVT_DTD == iEvent)) {
 144 | 			return this._setErr(XMLP.ERR_DOC_STRUCTURE);
 145 |         }
 146 | 
 147 |         if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) {
 148 | 			if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) {
 149 | 				return this._setErr(XMLP.ERR_DOC_STRUCTURE);
 150 |             }
 151 |         }
 152 |     }
 153 | 
 154 |     return iEvent;
 155 | 
 156 | }
 157 | 
 158 | XMLP.prototype._clearAttributes = function() {
 159 |     this.m_atts = new Array();
 160 | }
 161 | 
 162 | XMLP.prototype._findAttributeIndex = function(name) {
 163 |     for(var i = 0; i < this.m_atts.length; i++) {
 164 |         if(this.m_atts[i][XMLP._ATT_NAME] == name) {
 165 |             return i;
 166 |         }
 167 |     }
 168 |     return -1;
 169 | 
 170 | }
 171 | 
 172 | XMLP.prototype.getAttributeCount = function() {
 173 |     return this.m_atts ? this.m_atts.length : 0;
 174 | }
 175 | 
 176 | XMLP.prototype.getAttributeName = function(index) {
 177 |     return ((index < 0) || (index >= this.m_atts.length)) ? null : this.m_atts[index][XMLP._ATT_NAME];
 178 | }
 179 | 
 180 | XMLP.prototype.getAttributeValue = function(index) {
 181 |     return ((index < 0) || (index >= this.m_atts.length)) ? null : __unescapeString(this.m_atts[index][XMLP._ATT_VAL]);
 182 | }
 183 | 
 184 | XMLP.prototype.getAttributeValueByName = function(name) {
 185 |     return this.getAttributeValue(this._findAttributeIndex(name));
 186 | }
 187 | 
 188 | XMLP.prototype.getColumnNumber = function() {
 189 |     return SAXStrings.getColumnNumber(this.m_xml, this.m_iP);
 190 | }
 191 | 
 192 | XMLP.prototype.getContent = function() {
 193 |     return (this.m_cSrc == XMLP._CONT_XML) ? this.m_xml : this.m_cAlt;
 194 | }
 195 | 
 196 | XMLP.prototype.getContentBegin = function() {
 197 |     return this.m_cB;
 198 | }
 199 | 
 200 | XMLP.prototype.getContentEnd = function() {
 201 |     return this.m_cE;
 202 | }
 203 | 
 204 | XMLP.prototype.getLineNumber = function() {
 205 |     return SAXStrings.getLineNumber(this.m_xml, this.m_iP);
 206 | }
 207 | 
 208 | XMLP.prototype.getName = function() {
 209 |     return this.m_name;
 210 | }
 211 | 
 212 | XMLP.prototype.pause = function(){
 213 |     this.m_pause = true;
 214 | }
 215 | 
 216 | XMLP.prototype.resume = function(){
 217 |     this.m_pause = false;
 218 |     this.m_iState = this.m_preInterruptIState;
 219 | }
 220 | 
 221 | XMLP.prototype.next = function() {
 222 |     if(!this.m_pause){
 223 |         return this._checkStructure(this._parse());
 224 |     }
 225 |     else{
 226 |         //save off the current event loop state and set the state to interrupt
 227 |         this.m_preInterruptIState = this.m_iState;
 228 |         return XMLP._INTERRUPT;
 229 |     }
 230 | }
 231 | 
 232 | XMLP.prototype._parse = function() {
 233 |     if(this.m_iP == this.m_xml.length) {
 234 |         return XMLP._NONE;
 235 |     }
 236 | 
 237 |     if(this.m_iP == this.m_xml.indexOf("<?",        this.m_iP)) {
 238 |         return this._parsePI     (this.m_iP + 2);
 239 |     }
 240 |     else if(this.m_iP == this.m_xml.indexOf("<!DOCTYPE", this.m_iP)) {
 241 |         return this._parseDTD    (this.m_iP + 9);
 242 |     }
 243 |     else if(this.m_iP == this.m_xml.indexOf("<!--",      this.m_iP)) {
 244 |         return this._parseComment(this.m_iP + 4);
 245 |     }
 246 |     else if(this.m_iP == this.m_xml.indexOf("<![CDATA[", this.m_iP)) {
 247 |         return this._parseCDATA  (this.m_iP + 9);
 248 |     }
 249 |     else if(this.m_iP == this.m_xml.indexOf("<",         this.m_iP)) {
 250 |         return this._parseElement(this.m_iP + 1);
 251 |     }
 252 |     else if(this.m_iP == this.m_xml.indexOf("&",         this.m_iP)) {
 253 |         return this._parseEntity (this.m_iP + 1);
 254 |     }
 255 |     else{
 256 |         return this._parseText   (this.m_iP);
 257 |     }
 258 | 
 259 | 
 260 | }
 261 | 
 262 | ////////// NAMESPACE SUPPORT //////////////////////////////////////////
 263 | XMLP.prototype._parsePrefixAndElementName = function (elementlabel){
 264 |     splits = elementlabel.split(':',2);
 265 |     return { prefix : ((splits.length === 1) ? '' : splits[0]), name : ((splits.length === 1) ? elementlabel : splits[1]), };
 266 | }
 267 | 
 268 | XMLP.prototype._parseNamespacesAndAtts = function (atts){
 269 |    //translate namespaces into objects with "prefix","uri", "scopetag" Add them to: this.m_namespaceList
 270 |    //The function should return a new list of tag attributes with the namespaces filtered
 271 |     that = this;
 272 |     var newnamespaces = [];
 273 |     var filteredatts = [];
 274 |     atts.map(function (item){
 275 |         if(item[0].slice(0,5) === "xmlns"){
 276 |             newnamespaces.push({
 277 |                                    prefix : item[0].slice(6),
 278 |                                    uri : item[1],
 279 |                                    scopetag : that.m_name,
 280 |                                 });
 281 |         }
 282 |         else{
 283 |             filteredatts.push(item);
 284 |         }
 285 |         return "not used";
 286 |     });
 287 |     this.m_namespaceList = this.m_namespaceList.concat(newnamespaces);
 288 |     return [ filteredatts, newnamespaces.map(function(item){return [item.prefix,item.uri];}) ];
 289 | }
 290 | 
 291 | XMLP.prototype._getContextualNamespace = function (prefix){
 292 |     if(prefix !== ''){
 293 |         for(item in this.m_namespaceList){
 294 |             item = this.m_namespaceList[item];
 295 |             if(item.prefix === prefix){
 296 |                 return item.uri;
 297 |             }
 298 |         }
 299 |     }
 300 | 
 301 |     //no match was found for the prefix so pop off the first non-prefix namespace
 302 |     for(var i = (this.m_namespaceList.length-1); i>= 0; i--){
 303 |         var item = this.m_namespaceList[i];
 304 |         if(item.prefix === ''){
 305 |             return item.uri;
 306 |         }
 307 |     }
 308 | 
 309 |     //still nothing, lets just return an empty string
 310 |     return '';
 311 | }
 312 | 
 313 | XMLP.prototype._removeExpiredNamesapces = function (closingtagname) {
 314 |     //remove the expiring namespaces from the list (you can id them by scopetag)
 315 |     var keeps = [];
 316 |     this.m_namespaceList.map(function (item){
 317 |         if(item.scopetag !== closingtagname){
 318 |             keeps.push(item);
 319 |         }
 320 |     });
 321 | 
 322 |     this.m_namespaceList = keeps;
 323 | 
 324 | }
 325 | 
 326 | ////////////////////////////////////////////////////////////////////////
 327 | 
 328 | 
 329 | XMLP.prototype._parseAttribute = function(iB, iE) {
 330 |     var iNB, iNE, iEq, iVB, iVE;
 331 |     var cQuote, strN, strV;
 332 | 
 333 | 	this.m_cAlt = ""; //resets the value so we don't use an old one by accident (see testAttribute7 in the test suite)
 334 | 
 335 | 	iNB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE);
 336 |     if((iNB == -1) ||(iNB >= iE)) {
 337 |         return iNB;
 338 |     }
 339 | 
 340 |     iEq = this.m_xml.indexOf("=", iNB);
 341 |     if((iEq == -1) || (iEq > iE)) {
 342 |         return this._setErr(XMLP.ERR_ATT_VALUES);
 343 |     }
 344 | 
 345 |     iNE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iNB, iEq);
 346 | 
 347 |     iVB = SAXStrings.indexOfNonWhitespace(this.m_xml, iEq + 1, iE);
 348 |     if((iVB == -1) ||(iVB > iE)) {
 349 |         return this._setErr(XMLP.ERR_ATT_VALUES);
 350 |     }
 351 | 
 352 |     cQuote = this.m_xml.charAt(iVB);
 353 |     if(SAXStrings.QUOTES.indexOf(cQuote) == -1) {
 354 |         return this._setErr(XMLP.ERR_ATT_VALUES);
 355 |     }
 356 | 
 357 |     iVE = this.m_xml.indexOf(cQuote, iVB + 1);
 358 |     if((iVE == -1) ||(iVE > iE)) {
 359 |         return this._setErr(XMLP.ERR_ATT_VALUES);
 360 |     }
 361 | 
 362 |     strN = this.m_xml.substring(iNB, iNE + 1);
 363 |     strV = this.m_xml.substring(iVB + 1, iVE);
 364 | 
 365 |     if(strN.indexOf("<") != -1) {
 366 |         return this._setErr(XMLP.ERR_ATT_LT_NAME);
 367 |     }
 368 | 
 369 |     if(strV.indexOf("<") != -1) {
 370 |         return this._setErr(XMLP.ERR_ATT_LT_VALUE);
 371 |     }
 372 | 
 373 |     strV = SAXStrings.replace(strV, null, null, "\n", " ");
 374 |     strV = SAXStrings.replace(strV, null, null, "\t", " ");
 375 | 	iRet = this._replaceEntities(strV);
 376 |     if(iRet == XMLP._ERROR) {
 377 |         return iRet;
 378 |     }
 379 | 
 380 |     strV = this.m_cAlt;
 381 | 
 382 |     if(this._findAttributeIndex(strN) == -1) {
 383 |         this._addAttribute(strN, strV);
 384 |     }
 385 |     else {
 386 |         return this._setErr(XMLP.ERR_ATT_DUP);
 387 |     }
 388 | 
 389 |     this.m_iP = iVE + 2;
 390 | 
 391 |     return XMLP._ATT;
 392 | 
 393 | }
 394 | 
 395 | XMLP.prototype._parseCDATA = function(iB) {
 396 |     var iE = this.m_xml.indexOf("]]>", iB);
 397 |     if (iE == -1) {
 398 |         //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
 399 |         this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-<![CDATA[ adds the '<!DOCTYPE' back into the string
 400 |         return XMLP._INTERRUPT;
 401 |         //return this._setErr(XMLP.ERR_CLOSE_CDATA);
 402 |     }
 403 | 
 404 |     this._setContent(XMLP._CONT_XML, iB, iE);
 405 | 
 406 |     this.m_iP = iE + 3;
 407 | 
 408 |     return XMLP._CDATA;
 409 | 
 410 | }
 411 | 
 412 | XMLP.prototype._parseComment = function(iB) {
 413 |     var iE = this.m_xml.indexOf("-" + "->", iB);
 414 |     if (iE == -1) {
 415 |         //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
 416 |         this.m_chunkTransitionContinuation = this.m_xml.slice(iB-4);//the '-4' adds the '<!--' back into the string
 417 |         return XMLP._INTERRUPT;
 418 |         //return this._setErr(XMLP.ERR_CLOSE_COMMENT);
 419 |     }
 420 | 
 421 |     this._setContent(XMLP._CONT_XML, iB, iE);
 422 | 
 423 |     this.m_iP = iE + 3;
 424 | 
 425 |     return XMLP._COMMENT;
 426 | 
 427 | }
 428 | 
 429 | XMLP.prototype._parseDTD = function(iB) {
 430 |     // Eat DTD
 431 |     var iE, strClose, iInt, iLast;
 432 | 
 433 |     iE = this.m_xml.indexOf(">", iB);
 434 |     if(iE == -1) {
 435 |         //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
 436 |         this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-9' adds the '<!DOCTYPE' back into the string
 437 |         return XMLP._INTERRUPT;
 438 |         //return this._setErr(XMLP.ERR_CLOSE_DTD);
 439 |     }
 440 | 
 441 |     iInt = this.m_xml.indexOf("[", iB);
 442 |     strClose = ((iInt != -1) && (iInt < iE)) ? "]>" : ">";
 443 | 
 444 |     while(true) {
 445 |         // DEBUG: Remove
 446 |         if(iE == iLast) {
 447 |             return this._setErr(XMLP.ERR_INFINITELOOP);
 448 |         }
 449 | 
 450 |         iLast = iE;
 451 |         // DEBUG: Remove End
 452 | 
 453 |         iE = this.m_xml.indexOf(strClose, iB);
 454 |         if(iE == -1) {
 455 |             return this._setErr(XMLP.ERR_CLOSE_DTD);
 456 |         }
 457 | 
 458 |         // Make sure it is not the end of a CDATA section
 459 |         if (this.m_xml.substring(iE - 1, iE + 2) != "]]>") {
 460 |             break;
 461 |         }
 462 |     }
 463 | 
 464 |     this.m_iP = iE + strClose.length;
 465 | 
 466 |     return XMLP._DTD;
 467 | 
 468 | }
 469 | 
 470 | XMLP.prototype._parseElement = function(iB) {
 471 |     sys = require('sys');
 472 |     var iE, iDE, iNE, iRet;
 473 |     var iType, strN, iLast;
 474 | 
 475 |     iDE = iE = this.m_xml.indexOf(">", iB);
 476 |     if(iE == -1) {
 477 |         //This element never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
 478 |         this.m_chunkTransitionContinuation = this.m_xml.slice(iB-1);//the '-1' adds the '<' back into the string
 479 |         return XMLP._INTERRUPT;
 480 |         //return this._setErr(XMLP.ERR_CLOSE_ELM);
 481 |     }
 482 | 
 483 |     if(this.m_xml.charAt(iB) == "/") {
 484 |         iType = XMLP._ELM_E;
 485 |         iB++;
 486 |     } else {
 487 |         iType = XMLP._ELM_B;
 488 |     }
 489 | 
 490 |     if(this.m_xml.charAt(iE - 1) == "/") {
 491 |         if(iType == XMLP._ELM_E) {
 492 |             return this._setErr(XMLP.ERR_ELM_EMPTY);
 493 |         }
 494 |         iType = XMLP._ELM_EMP;
 495 |         iDE--;
 496 |     }
 497 | 
 498 |     iDE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iB, iDE);
 499 | 
 500 |     //djohack
 501 |     //hack to allow for elements with single character names to be recognized
 502 | 
 503 |     if (iE - iB != 1 ) {
 504 |         if(SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iDE) != iB) {
 505 |             return this._setErr(XMLP.ERR_ELM_NAME);
 506 |         }
 507 |     }
 508 |     // end hack -- original code below
 509 | 
 510 |     /*
 511 |     if(SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iDE) != iB)
 512 |         return this._setErr(XMLP.ERR_ELM_NAME);
 513 |     */
 514 |     this._clearAttributes();
 515 | 
 516 |     iNE = SAXStrings.indexOfWhitespace(this.m_xml, iB, iDE);
 517 |     if(iNE == -1) {
 518 |         iNE = iDE + 1;
 519 |     }
 520 |     else {
 521 |         this.m_iP = iNE;
 522 |         while(this.m_iP < iDE) {
 523 |             // DEBUG: Remove
 524 |             if(this.m_iP == iLast) return this._setErr(XMLP.ERR_INFINITELOOP);
 525 |             iLast = this.m_iP;
 526 |             // DEBUG: Remove End
 527 | 
 528 | 
 529 |             iRet = this._parseAttribute(this.m_iP, iDE);
 530 |             if(iRet == XMLP._ERROR) return iRet;
 531 |         }
 532 |     }
 533 | 
 534 |     strN = this.m_xml.substring(iB, iNE);
 535 | 
 536 |     if(strN.indexOf("<") != -1) {
 537 |         return this._setErr(XMLP.ERR_ELM_LT_NAME);
 538 |     }
 539 | 
 540 |     this.m_name = strN;
 541 |     this.m_iP = iE + 1;
 542 | 
 543 |     return iType;
 544 | 
 545 | }
 546 | 
 547 | XMLP.prototype._parseEntity = function(iB) {
 548 |     var iE = this.m_xml.indexOf(";", iB);
 549 |     if(iE == -1) {
 550 |         //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
 551 |         this.m_chunkTransitionContinuation = this.m_xml.slice(iB-1);//the '-1' adds the '&' back into the string
 552 |         return XMLP._INTERRUPT;
 553 |         //return this._setErr(XMLP.ERR_CLOSE_ENTITY);
 554 |     }
 555 | 
 556 |     this.m_iP = iE + 1;
 557 | 
 558 |     return this._replaceEntity(this.m_xml, iB, iE);
 559 | 
 560 | }
 561 | 
 562 | XMLP.prototype._parsePI = function(iB) {
 563 |     var iE, iTB, iTE, iCB, iCE;
 564 | 
 565 |     iE = this.m_xml.indexOf("?>", iB);
 566 |     if(iE   == -1) {
 567 |         //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
 568 |         this.m_chunkTransitionContinuation = this.m_xml.slice(iB-2);//the '-2' adds the '?>' back into the string
 569 |         return XMLP._INTERRUPT;
 570 |         return this._setErr(XMLP.ERR_CLOSE_PI);
 571 |     }
 572 | 
 573 |     iTB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE);
 574 |     if(iTB == -1) {
 575 |         return this._setErr(XMLP.ERR_PI_TARGET);
 576 |     }
 577 | 
 578 |     iTE = SAXStrings.indexOfWhitespace(this.m_xml, iTB, iE);
 579 |     if(iTE  == -1) {
 580 |         iTE = iE;
 581 |     }
 582 | 
 583 |     iCB = SAXStrings.indexOfNonWhitespace(this.m_xml, iTE, iE);
 584 |     if(iCB == -1) {
 585 |         iCB = iE;
 586 |     }
 587 | 
 588 |     iCE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iCB, iE);
 589 |     if(iCE  == -1) {
 590 |         iCE = iE - 1;
 591 |     }
 592 | 
 593 |     this.m_name = this.m_xml.substring(iTB, iTE);
 594 |     this._setContent(XMLP._CONT_XML, iCB, iCE + 1);
 595 |     this.m_iP = iE + 2;
 596 | 
 597 |     return XMLP._PI;
 598 | 
 599 | }
 600 | 
 601 | XMLP.prototype._parseText = function(iB) {
 602 |     var iE, iEE;
 603 | 
 604 |     iE = this.m_xml.indexOf("<", iB);
 605 |     if(iE == -1) {
 606 |         iE = this.m_xml.length;
 607 |     }
 608 | 
 609 |     iEE = this.m_xml.indexOf("&", iB);
 610 |     if((iEE != -1) && (iEE <= iE)) {
 611 |         iE = iEE;
 612 |     }
 613 | 
 614 |     this._setContent(XMLP._CONT_XML, iB, iE);
 615 | 
 616 |     this.m_iP = iE;
 617 | 
 618 |     return XMLP._TEXT;
 619 | 
 620 | }
 621 | 
 622 | XMLP.prototype._replaceEntities = function(strD, iB, iE) {
 623 |     if(SAXStrings.isEmpty(strD)) return "";
 624 |     iB = iB || 0;
 625 |     iE = iE || strD.length;
 626 | 
 627 | 
 628 |     var iEB, iEE, strRet = "";
 629 | 
 630 |     iEB = strD.indexOf("&", iB);
 631 |     iEE = iB;
 632 | 
 633 |     while((iEB > 0) && (iEB < iE)) {
 634 |         strRet += strD.substring(iEE, iEB);
 635 | 
 636 |         iEE = strD.indexOf(";", iEB) + 1;
 637 | 
 638 |         if((iEE == 0) || (iEE > iE)) {
 639 |             return this._setErr(XMLP.ERR_CLOSE_ENTITY);
 640 |         }
 641 | 
 642 |         iRet = this._replaceEntity(strD, iEB + 1, iEE - 1);
 643 |         if(iRet == XMLP._ERROR) {
 644 |             return iRet;
 645 |         }
 646 | 
 647 |         strRet += this.m_cAlt;
 648 | 
 649 |         iEB = strD.indexOf("&", iEE);
 650 |     }
 651 | 
 652 |     if(iEE != iE) {
 653 |         strRet += strD.substring(iEE, iE);
 654 |     }
 655 | 
 656 |     this._setContent(XMLP._CONT_ALT, strRet);
 657 | 
 658 |     return XMLP._ENTITY;
 659 | 
 660 | }
 661 | 
 662 | XMLP.prototype._replaceEntity = function(strD, iB, iE) {
 663 |     if(SAXStrings.isEmpty(strD)) return -1;
 664 |     iB = iB || 0;
 665 |     iE = iE || strD.length;
 666 | 
 667 |     switch(strD.substring(iB, iE)) {
 668 |         case "amp":  strEnt = "&";  break;
 669 |         case "lt":   strEnt = "<";  break;
 670 |         case "gt":   strEnt = ">";  break;
 671 |         case "apos": strEnt = "'";  break;
 672 |         case "quot": strEnt = "\""; break;
 673 |         case "nbsp":strEnt = ''; break;
 674 |         case "lt":strEnt = '<'; break;
 675 |         case "gt":strEnt = '>'; break;
 676 |         case "amp":strEnt = '&'; break;
 677 |         case "cent":strEnt = "¢"; break;
 678 |         case "pound":strEnt = '£'; break;
 679 |         case "yen":strEnt = '¥'; break;
 680 |         case "euro":strEnt = '€'; break;
 681 |         case "sect":strEnt = '§'; break;
 682 |         case "copy":strEnt = '©'; break;
 683 |         case "reg":strEnt = '®'; break;
 684 |         default:
 685 |             if(strD.charAt(iB) == "#") {
 686 |                 strEnt = String.fromCharCode(parseInt(strD.substring(iB + 1, iE)));
 687 |             } else {
 688 |                 strEnt = ' ';
 689 |                 //return this._setErr(XMLP.ERR_ENTITY_UNKNOWN);
 690 |             }
 691 |         break;
 692 |     }
 693 |     this._setContent(XMLP._CONT_ALT, strEnt);
 694 | 
 695 |     return XMLP._ENTITY;
 696 | }
 697 | 
 698 | XMLP.prototype._setContent = function(iSrc) {
 699 |     var args = arguments;
 700 | 
 701 |     if(XMLP._CONT_XML == iSrc) {
 702 |         this.m_cAlt = null;
 703 |         this.m_cB = args[1];
 704 |         this.m_cE = args[2];
 705 |     } else {
 706 |         this.m_cAlt = args[1];
 707 |         this.m_cB = 0;
 708 |         this.m_cE = args[1].length;
 709 |     }
 710 |     this.m_cSrc = iSrc;
 711 | 
 712 | }
 713 | 
 714 | XMLP.prototype._setErr = function(iErr) {
 715 |     var strErr = XMLP._errs[iErr];
 716 | 
 717 |     this.m_cAlt = strErr;
 718 |     this.m_cB = 0;
 719 |     this.m_cE = strErr.length;
 720 |     this.m_cSrc = XMLP._CONT_ALT;
 721 | 
 722 |     return XMLP._ERROR;
 723 | 
 724 | }  // end function _setErr
 725 | 
 726 | 
 727 | //SaxParser is an object that basically wraps an XMLP instance, and provides an
 728 | //event-based interface for parsing. This is the object users interact with when coding
 729 | //with XML for <SCRIPT>
 730 | var SaxParser = function(eventhandlerfactory) {
 731 | 
 732 |     var eventhandler = new function(){
 733 | 
 734 |     }
 735 | 
 736 |     var thehandler = function() {};
 737 |     thehandler.prototype.onStartDocument = function (funct){
 738 |       eventhandler.onStartDocument = funct;
 739 |     }
 740 |     thehandler.prototype.onEndDocument = function (funct){
 741 |       eventhandler.onEndDocument = funct;
 742 |     }
 743 |     thehandler.prototype.onStartElementNS = function (funct){
 744 |       eventhandler.onStartElementNS = funct;
 745 |     }
 746 |     thehandler.prototype.onEndElementNS = function (funct){
 747 |       eventhandler.onEndElementNS = funct;
 748 |     }
 749 |     thehandler.prototype.onCharacters = function(funct) {
 750 |       eventhandler.onCharacters = funct;
 751 |     }
 752 |     thehandler.prototype.onCdata = function(funct) {
 753 |       eventhandler.onCdata = funct;
 754 |     }
 755 |     thehandler.prototype.onComment = function(funct) {
 756 |       eventhandler.onComment = funct;
 757 |     }
 758 |     thehandler.prototype.onWarning = function(funct) {
 759 |       eventhandler.onWarning = funct;
 760 |     }
 761 | 
 762 |     thehandler.prototype.onError = function(funct) {
 763 |       eventhandler.onError = funct;
 764 |     }
 765 | 
 766 | 
 767 |     eventhandlerfactory(new thehandler());
 768 |     //eventhandler = eventhandler(eventhandler);
 769 |     this.m_hndDoc = eventhandler;
 770 |     this.m_hndErr = eventhandler;
 771 |     this.m_hndLex = eventhandler;
 772 |     this.m_interrupted = false;
 773 | }
 774 | 
 775 | 
 776 | // CONSTANTS    (these must be below the constructor)
 777 | SaxParser.DOC_B = 1;
 778 | SaxParser.DOC_E = 2;
 779 | SaxParser.ELM_B = 3;
 780 | SaxParser.ELM_E = 4;
 781 | SaxParser.CHARS = 5;
 782 | SaxParser.PI    = 6;
 783 | SaxParser.CD_B  = 7;
 784 | SaxParser.CD_E  = 8;
 785 | SaxParser.CMNT  = 9;
 786 | SaxParser.DTD_B = 10;
 787 | SaxParser.DTD_E = 11;
 788 | 
 789 | SaxParser.prototype.parseFile = function(filename) { //This function will only work in the node.js environment.
 790 |     var fs = require('fs');
 791 |     var that = this;
 792 |     fs.readFile(filename, function (err, data) {
 793 |       that.parseString(data);
 794 |     });
 795 | }
 796 | 
 797 | 
 798 | SaxParser.prototype.parseString = function(strD) {
 799 |     sys = require('sys');
 800 |     var that = this;
 801 | 
 802 |     setTimeout(function(){
 803 |             var startnew = true;
 804 |             if(!that.m_parser){
 805 |                 that.m_parser = new XMLP(strD);
 806 |                 startnew = false;
 807 |             }
 808 |             else{
 809 |                 that.m_parser.continueParsing(strD);
 810 |                 startnew = true;
 811 |             }
 812 | 
 813 |             //if(that.m_hndDoc && that.m_hndDoc.setDocumentLocator) {
 814 |             //    that.m_hndDoc.setDocumentLocator(that);
 815 |             //}
 816 | 
 817 |             that.m_bErr = false;
 818 | 
 819 |             if(!that.m_bErr && !startnew) {
 820 |                 that._fireEvent(SaxParser.DOC_B);
 821 |             }
 822 |             that._parseLoop();
 823 |             if(!that.m_bErr && !that.m_interrupted) {
 824 |                 that._fireEvent(SaxParser.DOC_E);
 825 |             }
 826 | 
 827 |             that.m_xml = null;
 828 |             that.m_iP = 0;
 829 |             that.m_interrupted = false;
 830 |     }, 0);
 831 | 
 832 | }
 833 | 
 834 | SaxParser.prototype.pause = function() {
 835 |     this.m_parser.pause();
 836 | }
 837 | 
 838 | SaxParser.prototype.resume = function() {
 839 |     //reset the state
 840 |     this.m_parser.resume();
 841 |     //now start up the parse loop
 842 |     var that = this;
 843 |     setTimeout(function(){
 844 |             that._parseLoop();
 845 |     }, 0);
 846 | }
 847 | 
 848 | SaxParser.prototype.setDocumentHandler = function(hnd) {
 849 |     this.m_hndDoc = hnd;
 850 | }
 851 | 
 852 | SaxParser.prototype.setErrorHandler = function(hnd) {
 853 |     this.m_hndErr = hnd;
 854 | }
 855 | 
 856 | SaxParser.prototype.setLexicalHandler = function(hnd) {
 857 |     this.m_hndLex = hnd;
 858 | }
 859 | 
 860 | SaxParser.prototype.getColumnNumber = function() {
 861 |     return this.m_parser.getColumnNumber();
 862 | }
 863 | 
 864 | SaxParser.prototype.getLineNumber = function() {
 865 |     return this.m_parser.getLineNumber();
 866 | }
 867 | 
 868 | SaxParser.prototype.getMessage = function() {
 869 |     return this.m_strErrMsg;
 870 | }
 871 | 
 872 | SaxParser.prototype.getPublicId = function() {
 873 |     return null;
 874 | }
 875 | 
 876 | SaxParser.prototype.getSystemId = function() {
 877 |     return null;
 878 | }
 879 | 
 880 | SaxParser.prototype.getLength = function() {
 881 |     return this.m_parser.getAttributeCount();
 882 | }
 883 | 
 884 | SaxParser.prototype.getName = function(index) {
 885 |     return this.m_parser.getAttributeName(index);
 886 | }
 887 | 
 888 | SaxParser.prototype.getValue = function(index) {
 889 |     return this.m_parser.getAttributeValue(index);
 890 | }
 891 | 
 892 | SaxParser.prototype.getValueByName = function(name) {
 893 |     return this.m_parser.getAttributeValueByName(name);
 894 | }
 895 | 
 896 | SaxParser.prototype._fireError = function(strMsg) {
 897 |     this.m_strErrMsg = strMsg;
 898 |     this.m_bErr = true;
 899 | 
 900 |     if(this.m_hndErr && this.m_hndErr.onError) {
 901 |         this.m_hndErr.onError(this.m_strErrMsg);
 902 |     }
 903 | }
 904 | 
 905 | 
 906 | 
 907 | SaxParser.prototype._fireEvent = function(iEvt) {
 908 |     var hnd, func, args = arguments, iLen = args.length - 1;
 909 | 
 910 | 
 911 |     if(this.m_bErr) return;
 912 | 
 913 |     if(SaxParser.DOC_B == iEvt) {
 914 |         func = "onStartDocument";         hnd = this.m_hndDoc;
 915 |     }
 916 |     else if (SaxParser.DOC_E == iEvt) {
 917 |         func = "onEndDocument";           hnd = this.m_hndDoc;
 918 |     }
 919 |     else if (SaxParser.ELM_B == iEvt) {
 920 |         func = "onStartElementNS";          hnd = this.m_hndDoc;
 921 |     }
 922 |     else if (SaxParser.ELM_E == iEvt) {
 923 |         func = "onEndElementNS";            hnd = this.m_hndDoc;
 924 |     }
 925 |     else if (SaxParser.CHARS == iEvt) {
 926 |         func = "onCharacters";            hnd = this.m_hndDoc;
 927 |     }
 928 |     else if (SaxParser.PI    == iEvt) {
 929 |         func = "processingInstruction"; hnd = this.m_hndDoc;
 930 |     }
 931 |     else if (SaxParser.CD_B  == iEvt) {
 932 |         func = "onCdata";            hnd = this.m_hndLex;
 933 |     }
 934 |     else if (SaxParser.CD_E  == iEvt) {
 935 |         func = "onEndCDATA";              hnd = this.m_hndLex;
 936 |     }
 937 |     else if (SaxParser.CMNT  == iEvt) {
 938 |         func = "onComment";               hnd = this.m_hndLex;
 939 |     }
 940 | 
 941 |     if(hnd && hnd[func]) {
 942 |         if(0 == iLen) {
 943 |             hnd[func]();
 944 |         }
 945 |         else if (1 == iLen) {
 946 |             hnd[func](args[1]);
 947 |         }
 948 |         else if (2 == iLen) {
 949 |             hnd[func](args[1], args[2]);
 950 |         }
 951 |         else if (3 == iLen) {
 952 |             hnd[func](args[1], args[2], args[3]);
 953 |         }
 954 |         else if (4 == iLen) {
 955 |             hnd[func](args[1], args[2], args[3], args[4]);
 956 |         }
 957 |         else if (5 == iLen) {
 958 |             hnd[func](args[1], args[2], args[3], args[4], args[5]);
 959 |         }
 960 |     }
 961 | 
 962 | }
 963 | 
 964 | 
 965 | 
 966 | 
 967 | SaxParser.prototype._parseLoop = function(parser) {
 968 |     var iEvent, parser;
 969 | 
 970 |     parser = this.m_parser;
 971 |     while(!this.m_bErr) {
 972 |         iEvent = parser.next();
 973 | 
 974 |         if(iEvent == XMLP._ELM_B) {
 975 |             theatts = this.m_parser.m_atts;
 976 |             nameobject = parser._parsePrefixAndElementName(parser.getName());
 977 |             theattsandnamespace = parser._parseNamespacesAndAtts(theatts);
 978 |             var theuri = parser._getContextualNamespace(nameobject.prefix);
 979 |             this._fireEvent(SaxParser.ELM_B, nameobject.name, theattsandnamespace[0], (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri ,theattsandnamespace[1] );
 980 |         }
 981 |         else if(iEvent == XMLP._ELM_E) {
 982 |             nameobject = parser._parsePrefixAndElementName(parser.getName());
 983 |             var theuri = parser._getContextualNamespace(nameobject.prefix);
 984 |             parser._removeExpiredNamesapces(parser.getName());
 985 |             this._fireEvent(SaxParser.ELM_E, nameobject.name, (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri);
 986 |         }
 987 |         else if(iEvent == XMLP._ELM_EMP) {
 988 |             //this is both a begin and end element
 989 |             theatts = this.m_parser.m_atts;
 990 |             nameobject = parser._parsePrefixAndElementName(parser.getName());
 991 |             theattsandnamespace = parser._parseNamespacesAndAtts(theatts);
 992 |             var theuri = parser._getContextualNamespace(nameobject.prefix);
 993 |             this._fireEvent(SaxParser.ELM_B, nameobject.name, theattsandnamespace[0], (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri ,theattsandnamespace[1] );
 994 | 
 995 |             parser._removeExpiredNamesapces(parser.getName());
 996 |             this._fireEvent(SaxParser.ELM_E, nameobject.name, (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri);
 997 |             //this._fireEvent(SaxParser.ELM_B, parser.getName(), this.m_parser.m_atts.map(function(item){return { name : item[0], value : item[1], };}) );
 998 |             //this._fireEvent(SaxParser.ELM_E, parser.getName());
 999 |         }
1000 |         else if(iEvent == XMLP._TEXT) {
1001 |             this._fireEvent(SaxParser.CHARS, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
1002 |         }
1003 |         else if(iEvent == XMLP._ENTITY) {
1004 |             this._fireEvent(SaxParser.CHARS, parser.getContent(), parser.getContentBegin(), parser.getContentEnd() - parser.getContentBegin());
1005 |         }
1006 |         else if(iEvent == XMLP._PI) {
1007 |             this._fireEvent(SaxParser.PI, parser.getName(), parser.getContent().substring(parser.getContentBegin(), parser.getContentEnd()));
1008 |         }
1009 |         else if(iEvent == XMLP._CDATA) {
1010 |             this._fireEvent(SaxParser.CD_B, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
1011 |             //this._fireEvent(SaxParser.CHARS, parser.getContent(), parser.getContentBegin(), parser.getContentEnd() - parser.getContentBegin());
1012 |             //this._fireEvent(SaxParser.CD_E);
1013 |         }
1014 |         else if(iEvent == XMLP._COMMENT) {
1015 |             this._fireEvent(SaxParser.CMNT, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
1016 |         }
1017 |         else if(iEvent == XMLP._DTD) {
1018 |         }
1019 |         else if(iEvent == XMLP._ERROR) {
1020 |             this._fireError(parser.getContent());
1021 |         }
1022 |         else if(iEvent == XMLP._INTERRUPT){
1023 |             this.m_interrupted = true;
1024 |             return;//just return and wait to be restarted
1025 |         }
1026 |         else if(iEvent == XMLP._NONE) {
1027 |             return;
1028 |         }
1029 |     }
1030 | 
1031 | }
1032 | 
1033 | //SAXStrings: a useful object containing string manipulation functions
1034 | var SAXStrings = function() {
1035 | //This is the constructor of the SAXStrings object
1036 | }
1037 | 
1038 | 
1039 | // CONSTANTS    (these must be below the constructor)
1040 | SAXStrings.WHITESPACE = " \t\n\r";
1041 | SAXStrings.QUOTES = "\"'";
1042 | 
1043 | 
1044 | SAXStrings.getColumnNumber = function(strD, iP) {
1045 |     if(SAXStrings.isEmpty(strD)) {
1046 |         return -1;
1047 |     }
1048 |     iP = iP || strD.length;
1049 | 
1050 |     var arrD = strD.substring(0, iP).split("\n");
1051 |     var strLine = arrD[arrD.length - 1];
1052 |     arrD.length--;
1053 |     var iLinePos = arrD.join("\n").length;
1054 | 
1055 |     return iP - iLinePos;
1056 | 
1057 | }
1058 | 
1059 | SAXStrings.getLineNumber = function(strD, iP) {
1060 |     if(SAXStrings.isEmpty(strD)) {
1061 |         return -1;
1062 |     }
1063 |     iP = iP || strD.length;
1064 | 
1065 |     return strD.substring(0, iP).split("\n").length
1066 | }
1067 | 
1068 | SAXStrings.indexOfNonWhitespace = function(strD, iB, iE) {
1069 |     if(SAXStrings.isEmpty(strD)) {
1070 |         return -1;
1071 |     }
1072 |     iB = iB || 0;
1073 |     iE = iE || strD.length;
1074 | 
1075 |     for(var i = iB; i < iE; i++){
1076 |         if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) == -1) {
1077 |             return i;
1078 |         }
1079 |     }
1080 |     return -1;
1081 | }
1082 | 
1083 | SAXStrings.indexOfWhitespace = function(strD, iB, iE) {
1084 |     if(SAXStrings.isEmpty(strD)) {
1085 |         return -1;
1086 |     }
1087 |     iB = iB || 0;
1088 |     iE = iE || strD.length;
1089 | 
1090 |     for(var i = iB; i < iE; i++) {
1091 |         if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) != -1) {
1092 |             return i;
1093 |         }
1094 |     }
1095 |     return -1;
1096 | }
1097 | 
1098 | SAXStrings.isEmpty = function(strD) {
1099 |     return (strD == null) || (strD.length == 0);
1100 | }
1101 | 
1102 | SAXStrings.lastIndexOfNonWhitespace = function(strD, iB, iE) {
1103 |     if(SAXStrings.isEmpty(strD)) {
1104 |         return -1;
1105 |     }
1106 |     iB = iB || 0;
1107 |     iE = iE || strD.length;
1108 | 
1109 |     for(var i = iE - 1; i >= iB; i--){
1110 |         if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) == -1){
1111 |             return i;
1112 |         }
1113 |     }
1114 |     return -1;
1115 | }
1116 | 
1117 | SAXStrings.replace = function(strD, iB, iE, strF, strR) {
1118 |     if(SAXStrings.isEmpty(strD)) {
1119 |         return "";
1120 |     }
1121 |     iB = iB || 0;
1122 |     iE = iE || strD.length;
1123 | 
1124 |     return strD.toString().substring(iB, iE).split(strF).join(strR);
1125 | 
1126 | }
1127 | 
1128 | var Stack = function() {
1129 |     this.m_arr = new Array();
1130 | }
1131 | 
1132 | Stack.prototype.clear = function() {
1133 |     this.m_arr = new Array();
1134 | }
1135 | 
1136 | Stack.prototype.count = function() {
1137 |     return this.m_arr.length;
1138 | }
1139 | 
1140 | Stack.prototype.destroy = function() {
1141 |     this.m_arr = null;
1142 | }
1143 | 
1144 | Stack.prototype.peek = function() {
1145 |     if(this.m_arr.length == 0) {
1146 |         return null;
1147 |     }
1148 | 
1149 |     return this.m_arr[this.m_arr.length - 1];
1150 | 
1151 | }
1152 | 
1153 | Stack.prototype.pop = function() {
1154 |     if(this.m_arr.length == 0) {
1155 |         return null;
1156 |     }
1157 | 
1158 |     var o = this.m_arr[this.m_arr.length - 1];
1159 |     this.m_arr.length--;
1160 |     return o;
1161 | 
1162 | }
1163 | 
1164 | Stack.prototype.push = function(o) {
1165 |     this.m_arr[this.m_arr.length] = o;
1166 | }
1167 | 
1168 | // CONVENIENCE FUNCTIONS
1169 | function isEmpty(str) {
1170 |      return (str==null) || (str.length==0);
1171 | }
1172 | 
1173 | 
1174 | function trim(trimString, leftTrim, rightTrim) {
1175 |     if (isEmpty(trimString)) {
1176 |         return "";
1177 |     }
1178 | 
1179 |     // the general focus here is on minimal method calls - hence only one
1180 |     // substring is done to complete the trim.
1181 | 
1182 |     if (leftTrim == null) {
1183 |         leftTrim = true;
1184 |     }
1185 | 
1186 |     if (rightTrim == null) {
1187 |         rightTrim = true;
1188 |     }
1189 | 
1190 |     var left=0;
1191 |     var right=0;
1192 |     var i=0;
1193 |     var k=0;
1194 | 
1195 | 
1196 |     // modified to properly handle strings that are all whitespace
1197 |     if (leftTrim == true) {
1198 |         while ((i<trimString.length) && (whitespace.indexOf(trimString.charAt(i++))!=-1)) {
1199 |             left++;
1200 |         }
1201 |     }
1202 |     if (rightTrim == true) {
1203 |         k=trimString.length-1;
1204 |         while((k>=left) && (whitespace.indexOf(trimString.charAt(k--))!=-1)) {
1205 |             right++;
1206 |         }
1207 |     }
1208 |     return trimString.substring(left, trimString.length - right);
1209 | }
1210 | 
1211 | function __escapeString(str) {
1212 | 
1213 |     var escAmpRegEx = /&/g;
1214 |     var escLtRegEx = /</g;
1215 |     var escGtRegEx = />/g;
1216 |     var quotRegEx = /"/g;
1217 |     var aposRegEx = /'/g;
1218 | 
1219 |     str = str.replace(escAmpRegEx, "&amp;");
1220 |     str = str.replace(escLtRegEx, "&lt;");
1221 |     str = str.replace(escGtRegEx, "&gt;");
1222 |     str = str.replace(quotRegEx, "&quot;");
1223 |     str = str.replace(aposRegEx, "&apos;");
1224 | 
1225 |   return str;
1226 | }
1227 | 
1228 | function __unescapeString(str) {
1229 | 
1230 |     var escAmpRegEx = /&amp;/g;
1231 |     var escLtRegEx = /&lt;/g;
1232 |     var escGtRegEx = /&gt;/g;
1233 |     var quotRegEx = /&quot;/g;
1234 |     var aposRegEx = /&apos;/g;
1235 | 
1236 |     str = str.replace(escAmpRegEx, "&");
1237 |     str = str.replace(escLtRegEx, "<");
1238 |     str = str.replace(escGtRegEx, ">");
1239 |     str = str.replace(quotRegEx, "\"");
1240 |     str = str.replace(aposRegEx, "'");
1241 | 
1242 |   return str;
1243 | }
1244 | 
1245 | exports.SaxParser = SaxParser;
1246 | 
1247 | 
1248 | })()
1249 | 


--------------------------------------------------------------------------------