├── config └── constants.prod.js ├── README.md ├── bin └── www ├── models └── topic.js ├── package.json ├── .gitignore ├── LICENSE ├── app.js └── routes └── index.js /config/constants.prod.js: -------------------------------------------------------------------------------- 1 | var Constants = { 2 | MongoURL: process.env.MONGOLAB_URI 3 | }; 4 | 5 | module.exports = Constants; 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | inside-api 2 | ========== 3 | 4 | **Note: Their API is pretty open. Use that if you must.** 5 | 6 | Inside.com API to get top authors and sources for any topic (https://www.inside.com/search/topics) 7 | -------------------------------------------------------------------------------- /bin/www: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var debug = require('debug')('inside-api'); 3 | var app = require('../app'); 4 | 5 | app.set('port', process.env.PORT || 3000); 6 | 7 | var server = app.listen(app.get('port'), function() { 8 | debug('Express server listening on port ' + server.address().port); 9 | }); 10 | -------------------------------------------------------------------------------- /models/topic.js: -------------------------------------------------------------------------------- 1 | var mongoose = require('mongoose'), 2 | Schema = mongoose.Schema; 3 | 4 | var topicSchema = new Schema({ 5 | date: { 6 | type: Date, 7 | default: new Date().toJSON().slice(0,10) 8 | }, 9 | topic: String, 10 | sources: [], 11 | authors: [], 12 | expires: { 13 | type: Date, 14 | default: new Date(Date.now() + 24*60*60*1000) // 24 hours 15 | } 16 | }); 17 | 18 | module.exports = mongoose.model('Topic', topicSchema); 19 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "inside-api", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "node ./bin/www" 7 | }, 8 | "dependencies": { 9 | "body-parser": "~1.8.1", 10 | "cheerio": "^0.18.0", 11 | "cookie-parser": "~1.3.3", 12 | "debug": "~2.0.0", 13 | "express": "^4.9.8", 14 | "jade": "~1.6.0", 15 | "mongoose": "^3.8.21", 16 | "morgan": "~1.3.0", 17 | "request": "^2.51.0", 18 | "serve-favicon": "~2.1.3" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # Compiled binary addons (http://nodejs.org/api/addons.html) 20 | build/Release 21 | 22 | # Dependency directory 23 | # Commenting this out is preferred by some people, see 24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 25 | node_modules 26 | 27 | # Users Environment Variables 28 | .lock-wscript 29 | config/constants.dev.js 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Karan Goel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | var express = require('express'); 2 | var path = require('path'); 3 | var favicon = require('serve-favicon'); 4 | var logger = require('morgan'); 5 | var cookieParser = require('cookie-parser'); 6 | var bodyParser = require('body-parser'); 7 | 8 | if (process.env.NODE_ENV === 'production') { 9 | var constants = require('./config/constants.prod.js'); 10 | } else { 11 | var constants = require('./config/constants.dev.js'); 12 | } 13 | 14 | var mongoose = require('mongoose'); 15 | mongoose.connect(constants.MongoURL, { server: { auto_reconnect: true } }); 16 | require('./models/topic.js'); 17 | 18 | var routes = require('./routes/index'); 19 | 20 | var app = express(); 21 | 22 | // view engine setup 23 | app.set('views', path.join(__dirname, 'views')); 24 | app.set('view engine', 'jade'); 25 | 26 | // uncomment after placing your favicon in /public 27 | //app.use(favicon(__dirname + '/public/favicon.ico')); 28 | app.use(logger('dev')); 29 | app.use(bodyParser.json()); 30 | app.use(bodyParser.urlencoded({ extended: false })); 31 | app.use(cookieParser()); 32 | app.use(express.static(path.join(__dirname, 'public'))); 33 | 34 | app.use('/', routes); 35 | 36 | // catch 404 and forward to error handler 37 | app.use(function(req, res, next) { 38 | var err = new Error('Not Found'); 39 | err.status = 404; 40 | next(err); 41 | }); 42 | 43 | // error handlers 44 | 45 | // development error handler 46 | // will print stacktrace 47 | if (app.get('env') === 'development') { 48 | app.use(function(err, req, res, next) { 49 | res.status(err.status || 500); 50 | res.render('error', { 51 | message: err.message, 52 | error: err 53 | }); 54 | }); 55 | } 56 | 57 | // production error handler 58 | // no stacktraces leaked to user 59 | app.use(function(err, req, res, next) { 60 | res.status(err.status || 500); 61 | res.render('error', { 62 | message: err.message, 63 | error: {} 64 | }); 65 | }); 66 | 67 | 68 | module.exports = app; 69 | -------------------------------------------------------------------------------- /routes/index.js: -------------------------------------------------------------------------------- 1 | var express = require('express'); 2 | var router = express.Router(); 3 | 4 | var cheerio = require('cheerio'); 5 | var request = require('request'); 6 | var mongoose = require('mongoose'); 7 | var Topic = mongoose.model('Topic'); 8 | 9 | if (process.env.NODE_ENV === 'production') { 10 | var constants = require('./../config/constants.prod.js'); 11 | } else { 12 | var constants = require('./../config/constants.dev.js'); 13 | } 14 | 15 | /* GET home page. */ 16 | router.get('/', function(req, res) { 17 | res.redirect('https://github.com/karan/inside-api'); 18 | }); 19 | 20 | router.get('/:topic', function(req, res) { 21 | var topic = req.params.topic.toLowerCase(); 22 | 23 | console.log('QUERY: for topic: ' + topic); 24 | 25 | Topic.findOne({ topic: topic }, function(err, doc) { 26 | if (err) { 27 | console.log('ERROR: ' + err.toString()); 28 | return res.send(err); 29 | } 30 | 31 | // if ((doc && doc.expires < Date.now()) || (!doc)) { 32 | // console.log('DB: Expired or not found'); 33 | // expired or not found, scrape again, save and return 34 | getTopicSourcesAndAuthors(topic, function(err, sources, authors) { 35 | if (err) { 36 | return res.send(err); 37 | } 38 | 39 | save(topic, sources, authors, function(err, finalData) { 40 | if (err) { 41 | return res.send('Something went wrong.', 500); 42 | } 43 | 44 | return res.send({ 45 | topic: finalData.topic, 46 | sources: finalData.sources, 47 | authors: finalData.authors 48 | }, 200); 49 | }); 50 | }); 51 | 52 | // } else if (doc) { 53 | // console.log('DB: Found and Valid'); 54 | // // topic not expired 55 | // return res.send({ 56 | // topic: doc.topic, 57 | // sources: doc.sources, 58 | // authors: doc.authors 59 | // }, 200); 60 | // } 61 | }); 62 | }); 63 | 64 | 65 | function getTopicSourcesAndAuthors(topic, callback) { 66 | var url = 'https://www.inside.com/' + topic; 67 | 68 | console.log(url); 69 | 70 | request(url, function (error, response, body) { 71 | 72 | if (!error && response.statusCode == 200) { 73 | 74 | console.log('No errors'); 75 | 76 | $ = cheerio.load(body); 77 | 78 | var fs = require('fs'); 79 | fs.writeFile("test.html", body, function(err) {}); 80 | 81 | var sources = []; 82 | var authors = []; 83 | 84 | // get top sources 85 | var sources_li = $('.top-sources-container').find('li').find('a.link'); 86 | console.log(sources_li.length); 87 | for (var i = 0; i < sources_li.length; i++) { 88 | console.log(sources); 89 | var this_source = $(sources_li[i]); 90 | console.log('SOURCE ', i, ': ', this_source.toString()); 91 | sources.push({ 92 | name: this_source.text().trim(), 93 | inside_link: 'https://www.inside.com' + this_source.attr('href') 94 | }); 95 | } 96 | console.log('END of sources loop'); 97 | 98 | // get top authors 99 | var authors_li = $('.top-authors-container').find('li'); 100 | for (var i = 0; i < authors_li.length; i++) { 101 | console.log(authors); 102 | var this_author = $(authors_li[i]); 103 | authors.push({ 104 | image: 'https://www.inside.com' + this_author.find('img').attr('src'), 105 | name: this_author.find('a.link').text().trim(), 106 | inside_link: 'https://www.inside.com' + this_author.find('a.link').attr('href'), 107 | twitter: this_author.find('.twitter').attr('href') || '' 108 | }); 109 | } 110 | console.log('END of authors loop'); 111 | 112 | if (sources.length === sources_li.length && authors.length === authors_li.length) { 113 | console.log('IN final callback'); 114 | callback(null, sources, authors); 115 | } 116 | 117 | } else { 118 | callback(response.statusCode, null); 119 | } 120 | 121 | }); 122 | } 123 | 124 | 125 | function save(topic, sources, authors, callback) { 126 | new Topic({ 127 | topic: topic, 128 | sources: sources, 129 | authors: authors 130 | }).save(function(err, doc) { 131 | if (err) { 132 | return callback(err, null); 133 | } 134 | 135 | callback(null, doc); 136 | }); 137 | } 138 | 139 | module.exports = router; 140 | --------------------------------------------------------------------------------