├── .gitignore ├── Procfile ├── README.md ├── article-stream.js ├── original-approach.js ├── package.json └── style.css /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | build 14 | 15 | node_modules -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: node article-stream.js 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | node-parse-rss 2 | ============== 3 | 4 | A quick node.js RSS feed parser. 5 | 6 | Original **quest**ion: 7 | http://stackoverflow.com/questions/20177259/requesting-rss-feeds-from-two-web-sites-in-node-js 8 | Saw this question on Stack Overflow and decided it merited a *quick* answer. 9 | 10 | ## Try it yourself 11 | 12 | > Visit: https://news-stream.herokuapp.com 13 | 14 | 15 | ### DIY 16 | 17 | Clone/copy this repo to your local machine: 18 | ```sh 19 | git clone https://github.com/nelsonic/node-parse-rss.git 20 | ``` 21 | Install the modules: 22 | ```sh 23 | npm install 24 | ``` 25 | Run the node script 26 | ```sh 27 | node article-stream.js 28 | ``` 29 | 30 | Point your browser at: [**http://localhost:5000**](http://localhost:5000) 31 | 32 | You will expect to see something like: 33 | ![RSS News Stream](http://i.imgur.com/3rmmsb2.png "RSS News Stream") 34 | 35 | *Yes*, this is not beautiful. But the question was specific to the technical 36 | side of how to parse the RSS feeds not a *full* (attractive) solution. 37 | That is an "*exercise left to the reader*"... :-) 38 | 39 | - - - 40 | 41 | # Solution 42 | 43 | ## The Original Question on StackOverflow (async) 44 | 45 | 46 | ```javascript 47 | var feed = require('feed-read'); 48 | var http = require('http'); 49 | var async = require('async'); 50 | var request = require('request'); 51 | 52 | var LIMIT = 10; 53 | var UNABLE_TO_CONNECT = "Unable to connect."; 54 | var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml'; 55 | var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml'; 56 | 57 | var server = http.createServer(onRequest); 58 | server.listen(9000); 59 | 60 | function onRequest(req, res) { 61 | res.writeHead(200, { 62 | 'Content-Type' : 'text/html; charset=utf-8' 63 | }); 64 | 65 | async.parallel([ function(callback) { 66 | feed(BBC_URL, onRssFetched); 67 | // TODO: where to call callback()? 68 | }, function(callback) { 69 | feed(SKY_URL, onRssFetched); 70 | // TODO: where to call callback()? 71 | } ], function done(err, results) { 72 | console.log("Done"); 73 | if (err) { 74 | throw err; 75 | } 76 | }); 77 | } 78 | 79 | function onRssFetched(err, articles) { 80 | console.log("RSS fetched"); 81 | var html = []; 82 | if (err) { 83 | html.push("

", UNABLE_TO_CONNECT = "

"); 84 | } else { 85 | html.push("
    "); 86 | var i = 0; 87 | articles.forEach(function(entry) { 88 | if (i == LIMIT) { 89 | return; 90 | } 91 | html.push("
  1. " + entry.title 92 | + "
  2. "); 93 | i++; 94 | }); 95 | } 96 | console.log(html.join("")); 97 | } 98 | ``` 99 | 100 | The author also posted it on LinkedIn: 101 | http://www.linkedin.com/groups/Parse-RSS-feeds-using-Nodejs-2906459.S.5811745652475990020 102 | 103 | 104 | ## Parsing Multiple RSS Feeds *Without* Async 105 | 106 | The async module is the *hammer* of the node.js world. 107 | (read: [Law of the Instrument](http://en.wikipedia.org/wiki/Law_of_the_instrument))
    108 | I see **async.parallel** used *everywhere* to *force* asynchronous requests 109 | to ***wait*** until all responses have returned before performing a final action. 110 | 111 | There's another (**better**?) way of doing it... but it requires *more work*. 112 | 113 | ### Create an Array of RSS Feed URls 114 | 115 | Rather than having a separate **var**iable for *each* RSS feed, 116 | we put them in an array. This allows us to itterate over the urls array 117 | and fetch each RSS feed. It makes it easy to add/remove feeds without 118 | having to touch the application logic. 119 | 120 | ```javascript 121 | urls = [ 122 | "http://feeds.bbci.co.uk/news/rss.xml", 123 | "http://news.sky.com/feeds/rss/home.xml" 124 | ]; // Example RSS Feeds 125 | ``` 126 | ### Create an Http Write Stream to Client 127 | 128 | Because the node.js **http** (core) module supports **streams** *natively*, 129 | we can stream the news articles to the client individually: 130 | 131 | ```javascript 132 | http.createServer(function (req, res) { 133 | // send basic http headers to client 134 | res.writeHead(200, { 135 | "Content-Type": "text/html", 136 | "Transfer-Encoding": "chunked" 137 | }); 138 | 139 | // setup simple html page: 140 | res.write("\n\nRSS Feeds\n\n"); 141 | 142 | // loop through our list of RSS feed urls 143 | for (var j = 0; j < urls.length; j++) { 144 | 145 | // fetch rss feed for the url: 146 | feed(urls[j], function(err, articles) { 147 | 148 | // loop through the list of articles returned 149 | for (var i = 0; i < articles.length; i++) { 150 | 151 | // stream article title (and what ever else you want) to client 152 | res.write("

    "+articles[i].title +"

    "); 153 | 154 | // check we have reached the end of our list of articles & urls 155 | if( i === articles.length-1 && j === urls.length-1) { 156 | res.end("\n"); // end http response 157 | } // else still have rss urls to check 158 | } // end inner for loop 159 | }); // end call to feed (feed-read) method 160 | } // end urls for loop 161 | }).listen(5000); 162 | ``` 163 | 164 | putting it all together we get: 165 | 166 | ```javascript 167 | var feed = require('feed-read'), // require the feed-read module 168 | http = require("http"), 169 | urls = [ 170 | "http://feeds.bbci.co.uk/news/rss.xml", 171 | "http://news.sky.com/feeds/rss/home.xml", 172 | "http://www.techmeme.com/feed.xml" 173 | ]; // Example RSS Feeds 174 | 175 | http.createServer(function (req, res) { 176 | // send basic http headers to client 177 | res.writeHead(200, { 178 | "Content-Type": "text/html", 179 | "Transfer-Encoding": "chunked" 180 | }); 181 | 182 | // setup simple html page: 183 | res.write("\n\nRSS Feeds\n\n"); 184 | 185 | // loop through our list of RSS feed urls 186 | for (var j = 0; j < urls.length; j++) { 187 | 188 | // fetch rss feed for the url: 189 | feed(urls[j], function(err, articles) { 190 | 191 | // loop through the list of articles returned 192 | for (var i = 0; i < articles.length; i++) { 193 | 194 | // stream article title (and what ever else you want) to client 195 | res.write("

    "+articles[i].title +"

    "); 196 | 197 | // check we have reached the end of our list of articles & urls 198 | if( i === articles.length-1 && j === urls.length-1) { 199 | res.end("\n"); // end http response 200 | } // else still have rss urls to check 201 | } // end inner for loop 202 | }); // end call to feed (feed-read) method 203 | } // end urls for loop 204 | }).listen(5000); 205 | ``` 206 | 207 | > Let me know your thoughts on this! I'd love to hear if you have a
    208 | > **better** way of doing it! :-) 209 | 210 | 211 | ## Background 212 | 213 | From the original code on StackOverflow Maksim is using the following 214 | *non-core* node modules: 215 | 216 | 1. **async**: https://github.com/caolan/async 217 | 2. **request**: https://github.com/mikeal/request 218 | 3. **feed-read**: https://github.com/sentientwaffle/feed-read 219 | 220 | The firts two (async and request) are *uber* popular node modules that 221 | have been tested by thousands of people and used in many high-profile projects. 222 | 223 | feed-read on the other hand ... 224 | 225 | ![feed-read module page](http://i.imgur.com/Y3oqs0x.png "feed-read module") 226 | 227 | only **5 watchers** at the time of writing (*not v. popular*) 228 | and it was *last updated* **2 years ago** ... (might not be compatible with 229 | the latest version of node.js or its dependencies!) 230 | but it *does* have **unit tests** which is a *good sign* so lets *try* it! 231 | 232 | ## Read Documentation (Readme & Unit Tests) 233 | 234 | Often developers neglect to document their work adequately in the **README.md** 235 | If this is the case, the best way of learning how to use a new module is to 236 | read through the unit tests in the ./**test** folder in the case of feed-read 237 | 238 | https://github.com/sentientwaffle/feed-read/blob/master/test/index.test.js 239 | 240 | The tests are very clear. And the module is well written. 241 | 242 | > I sent a clarifying question on LinkedIn: http://lnkd.in/dY2Xtf6
    243 | > Meanwhile @GoloRoden gave an answer on Stack: http://stackoverflow.com/a/20273797/1148249 244 | 245 | ```javascript 246 | async.parallel({ 247 | bbc: function (callback) { 248 | feed(BBC_URL, callback); 249 | }, 250 | sky: function (callback) { 251 | feed(SKY_URL, callback); 252 | } 253 | }, function (err, result) { 254 | if (err) { 255 | // Somewhere, something went wrong… 256 | } 257 | 258 | var rssBbc = result.bbc, 259 | rssSky = result.sky; 260 | 261 | // Merge the two feeds or deliver them to the client or do 262 | // whatever you want to do with them. 263 | }); 264 | ``` 265 | This answer requires the **Async** Module... 266 | 267 | **What if** we instead try and write one ***without*** relying on async (for once)? 268 | 269 | ## Notes 270 | 271 | - Node.js Streams Handbook: https://github.com/substack/stream-handbook 272 | - http://nodejs.org/api/stream.html#stream_readable_stream 273 | - http://nodejs.org/api/http.html#http_http_clientresponse 274 | -------------------------------------------------------------------------------- /article-stream.js: -------------------------------------------------------------------------------- 1 | var feed = require('feed-read'), // require the feed-read module 2 | http = require("http"), 3 | port = process.env.PORT || 5000, // allow heroku/nodejitsu to set port 4 | urls = [ 5 | "http://www.theguardian.com/technology/rss", 6 | "http://feeds.bbci.co.uk/news/technology/rss.xml", 7 | "http://feeds.skynews.com/feeds/rss/technology.xml", 8 | "http://www.techmeme.com/feed.xml" 9 | ]; // Example RSS Feeds 10 | 11 | // load css styles 12 | var css = ' '; 13 | css = css + '' 14 | 15 | http.createServer(function (req, res) { 16 | // send basic http headers to client 17 | res.writeHead(200, { 18 | "Content-Type": "text/html", 19 | "Transfer-Encoding": "chunked" 20 | }); 21 | // setup simple html page: 22 | res.write("\n\nRSS Feeds - Stream\n" +css +"\n"); 23 | 24 | // loop through our list of RSS feed urls 25 | for (var j = 0; j < urls.length; j++) { 26 | 27 | // fetch rss feed for the url: 28 | feed(urls[j], function(err, articles) { 29 | 30 | // loop through the list of articles returned 31 | for (var i = 0; i < articles.length; i++) { 32 | 33 | // stream article title (and what ever else you want) to client 34 | displayArticle(res, articles[i]); 35 | 36 | // check we have reached the end of our list of articles & urls 37 | if( i === articles.length-1 && j === urls.length-1) { 38 | res.end("\n"); // end http response 39 | } // else still have rss urls to check 40 | } // end inner for loop 41 | }); // end call to feed (feed-read) method 42 | } // end urls for loop 43 | 44 | setTimeout(function() { 45 | res.end("\n"); // end http response 46 | }, 4000); 47 | 48 | }).listen(port); 49 | console.log("HTTP Listening on: http://localhost:"+port); 50 | 51 | // a mini-rendering function - you can expand this or add html markup 52 | function displayArticle(res, a) { 53 | 54 | var author = a.author || a.feed.name; // some feeds don't have author (BBC!) 55 | // send the article content to client 56 | res.write('
    ') 57 | res.write("

    "+a.title +"

    "); 58 | res.write("

    " +author +" - " +a.published +"
    \n"); 59 | res.write(a.content+"

    \n"); 60 | } 61 | -------------------------------------------------------------------------------- /original-approach.js: -------------------------------------------------------------------------------- 1 | var feed = require('feed-read'); 2 | var http = require('http'); 3 | var async = require('async'); 4 | var request = require('request'); 5 | 6 | var LIMIT = 10; 7 | var UNABLE_TO_CONNECT = "Unable to connect."; 8 | var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml'; 9 | var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml'; 10 | 11 | var server = http.createServer(onRequest); 12 | server.listen(9000); 13 | 14 | function onRequest(req, res) { 15 | res.writeHead(200, { 16 | 'Content-Type' : 'text/html; charset=utf-8' 17 | }); 18 | 19 | async.parallel([ function(callback) { 20 | feed(BBC_URL, onRssFetched); 21 | // TODO: where to call callback()? 22 | }, function(callback) { 23 | feed(SKY_URL, onRssFetched); 24 | // TODO: where to call callback()? 25 | } ], function done(err, results) { 26 | console.log("Done"); 27 | if (err) { 28 | throw err; 29 | } 30 | }); 31 | } 32 | 33 | function onRssFetched(err, articles) { 34 | console.log("RSS fetched"); 35 | var html = []; 36 | if (err) { 37 | html.push("

    ", UNABLE_TO_CONNECT = "

    "); 38 | } else { 39 | html.push("
      "); 40 | var i = 0; 41 | articles.forEach(function(entry) { 42 | if (i == LIMIT) { 43 | return; 44 | } 45 | html.push("
    1. " + entry.title 46 | + "
    2. "); 47 | i++; 48 | }); 49 | } 50 | console.log(html.join("")); 51 | } 52 | 53 | console.log("RSS Server listening on http://localhost:9000"); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-parse-rss", 3 | "description": "RSS Feed Reader in Node.js", 4 | "author": "https://github.com/nelsonic", 5 | "dependencies": { 6 | "feed-read": "*" 7 | }, 8 | "engines": { 9 | "node": "0.10.x" 10 | }, 11 | "repository": { 12 | "type": "git", 13 | "url": "https://github.com/nelsonic/node-parse-rss.git" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | body { 2 | padding: 2em 20%; 3 | background-color: #424242; 4 | } 5 | 6 | .article { 7 | /*border: solid black 1px;*/ 8 | padding: 1em 1.5em; 9 | background: #fff; 10 | padding-bottom: 0.5em; 11 | margin-bottom: 1em; 12 | -webkit-border-bottom-right-radius: 3px; 13 | -webkit-border-bottom-left-radius: 3px; 14 | -moz-border-radius-bottomright: 3px; 15 | -moz-border-radius-bottomleft: 3px; 16 | border-bottom-right-radius: 3px; 17 | border-bottom-left-radius: 3px; 18 | -webkit-box-shadow: 0 1px 1px 0 rgba(0, 0, 0, 0.18); 19 | -moz-box-shadow: 0 1px 1px 0 rgba(0, 0, 0, 0.18); 20 | box-shadow: 0 1px 1px 0 rgba(0, 0, 0, 0.18); 21 | border-radius: 0.5em; 22 | } 23 | --------------------------------------------------------------------------------