├── .gitignore ├── README.md ├── feeder-parser.js ├── lib ├── fixes.js ├── rss-parser.js └── uri.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | feeder-parser 3 | ============= 4 | 5 | An extremely pragmatic approach to parsing RSS and Atom feeds. Used to run [feeder.co](http://feeder.co) extension and pro service. 6 | 7 | Tests need to be ported to this repo... 8 | 9 | ```javascript 10 | var http = require("http"); 11 | 12 | var RSSParser = require("feeder-parser").RSSParser; 13 | 14 | // The first parameter to the RSSParser constructor should be an object with a `path` attribute 15 | var feed = { 16 | path: "http://www.reddit.com/r/all.rss" 17 | } 18 | 19 | function parse(body) { 20 | var parser = new RSSParser(feed); 21 | parser.setResult(body); 22 | parser.parse(function(parser) { 23 | parser.posts.forEach(function(post) { 24 | console.log(post.title); 25 | }); 26 | }); 27 | } 28 | 29 | function fetch(url, callback) { 30 | http.get(url, function(res) { 31 | var data = []; 32 | 33 | res.setEncoding("utf8"); 34 | res.on("data", function(chunk) { 35 | data.push(chunk); 36 | }); 37 | 38 | res.on("end", function() { 39 | callback(data.join("")); 40 | }); 41 | }); 42 | } 43 | 44 | fetch(feed.path, parse); 45 | ``` -------------------------------------------------------------------------------- /feeder-parser.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | RSSParser: require("./lib/rss-parser") 3 | } -------------------------------------------------------------------------------- /lib/fixes.js: -------------------------------------------------------------------------------- 1 | // COOL FILE OF FEEDER FIXES 2 | 3 | var FIXES = { 4 | 'http://www.zhihu.com/rss': {noPublished: true}, 5 | 'http://social.msdn.microsoft.com/search/en-US/feed?query=blogs&refinement=109': {noPublished: true, noGUID: true}, 6 | 'http://www.lebikini.com/programmation/rss': {noPublished: true} 7 | } 8 | 9 | if (typeof module !== "undefined") { 10 | module.exports = FIXES; 11 | } 12 | -------------------------------------------------------------------------------- /lib/rss-parser.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | var parseTo$, parseRoot; 3 | var entities, URI, FIXES; 4 | 5 | if (typeof require !== "undefined") { 6 | entities = require('entities'); 7 | 8 | URI = require('./uri'); 9 | 10 | FIXES = require('./fixes'); 11 | 12 | parseTo$ = function(text) { 13 | return require('cheerio').load(text, { 14 | xmlMode: true, 15 | lowerCaseTags: true 16 | }); 17 | } 18 | 19 | parseRoot = function(text, $) { 20 | return $.root().children().first(); 21 | } 22 | } else { 23 | URI = this.URI; 24 | FIXES = this.FIXES; 25 | 26 | parseTo$ = function(text) { 27 | return jQuery; 28 | } 29 | 30 | parseRoot = function(text, $) { 31 | var xml = new DOMParser().parseFromString(text, 'text/xml'); 32 | var root = xml.documentElement; 33 | 34 | // // If parsing as XML failed, try and parse as HTML, because HTML is so lovely 35 | // if (root && root.querySelector('parsererror') ) { 36 | // // Try to parse as HTML instead 37 | // // TODO: FIXME: Strip