├── Procfile ├── .gitignore ├── logo.png ├── models ├── posts.js └── comments.js ├── package.json ├── newrelic.js ├── config └── db.js ├── LICENSE ├── README.md └── app.js /Procfile: -------------------------------------------------------------------------------- 1 | web: node app.js 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | .env 3 | newrelic_agent.log 4 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karan/Hook/HEAD/logo.png -------------------------------------------------------------------------------- /models/posts.js: -------------------------------------------------------------------------------- 1 | var mongoose = require('mongoose'), 2 | Schema = mongoose.Schema; 3 | 4 | // For any user 5 | var postsSchema = new Schema({ 6 | date: { 7 | type: Date, 8 | default: new Date().toJSON().slice(0,10) 9 | }, 10 | expires: { 11 | type: Date, 12 | default: new Date(Date.now() + 60*60*1000) // 1 hour 13 | }, 14 | posts: [] 15 | }); 16 | 17 | module.exports = mongoose.model('Posts', postsSchema); 18 | -------------------------------------------------------------------------------- /models/comments.js: -------------------------------------------------------------------------------- 1 | var mongoose = require('mongoose'), 2 | Schema = mongoose.Schema; 3 | 4 | // For any user 5 | var commentsSchema = new Schema({ 6 | created: { 7 | type: Date, 8 | default: Date.now 9 | }, 10 | expires: { 11 | type: Date, 12 | default: new Date(Date.now() + 2*60*60*1000) // 2 hours 13 | }, 14 | permalink: String, 15 | post: {}, 16 | comments: [] 17 | }); 18 | 19 | module.exports = mongoose.model('Comments', commentsSchema); 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Hook", 3 | "private": true, 4 | "version": "0.0.0", 5 | "description": "ProductHunt API", 6 | "main": "app.js", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/karan/Hook.git" 10 | }, 11 | "author": "Karan Goel", 12 | "license": "MIT", 13 | "bugs": { 14 | "url": "https://github.com/karan/Hook/issues" 15 | }, 16 | "homepage": "https://github.com/karan/Hook", 17 | "dependencies": { 18 | "cheerio": "^0.17.0", 19 | "express": "3.4.7", 20 | "mongodb": "1.3.23", 21 | "mongoose": "3.8.3", 22 | "newrelic": "^1.7.1", 23 | "request": "^2.36.0" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /newrelic.js: -------------------------------------------------------------------------------- 1 | /** 2 | * New Relic agent configuration. 3 | * 4 | * See lib/config.defaults.js in the agent distribution for a more complete 5 | * description of configuration variables and their potential values. 6 | */ 7 | exports.config = { 8 | /** 9 | * Array of application names. 10 | */ 11 | app_name : ['Hook'], 12 | /** 13 | * Your New Relic license key. 14 | */ 15 | license_key : process.env.NEW_RELIC_LICENSE_KEY, 16 | logging : { 17 | /** 18 | * Level at which to log. 'trace' is most useful to New Relic when diagnosing 19 | * issues with the agent, 'info' and higher will impose the least overhead on 20 | * production applications. 21 | */ 22 | level : 'trace' 23 | } 24 | }; 25 | -------------------------------------------------------------------------------- /config/db.js: -------------------------------------------------------------------------------- 1 | // Established database connection 2 | 3 | var mongoose = require('mongoose'); 4 | 5 | var dbURI = process.env.MONGOLAB_URI; 6 | mongoose.connect(dbURI); 7 | 8 | // when connected with db 9 | mongoose.connection.on('connected', function() { 10 | console.log('Connected to db ' + dbURI); 11 | }); 12 | 13 | // some error when connecting 14 | mongoose.connection.on('error', function(err) { 15 | console.log('Connection error: ' + err); 16 | }); 17 | 18 | // disconnected from db 19 | mongoose.connection.on('disconnected', function() { 20 | console.log('Disconnected from DB.'); 21 | }); 22 | 23 | // If the Node process ends, close the Mongoose connection 24 | process.on('SIGINT', function() { 25 | mongoose.connection.close(function() { 26 | console.log('Disconnected from DB by app.'); 27 | process.exit(0); 28 | }); 29 | }); 30 | 31 | // bring in all models 32 | require('./../models/posts'); 33 | require('./../models/comments'); 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Karan Goel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | Hook 4 | ==== 5 | 6 | ProductHunt API for retrieving today's hunts and comments for any post. 7 | 8 | Discussion on [Product Hunt](http://www.producthunt.com/posts/hook-producthunt-api). 9 | 10 | **Since official @ProductHunt API is in private beta, I'm discontinuing support for Hook. I highly recommend switching to the official API.** 11 | 12 | *Note: This is an unofficial API and __not__ supported or controlled by ProductHunt itself. Any questions, comments, feedback or feature requests should be directed to [karan](http://github.com/karan) or via an [issue](https://github.com/karan/Hook/issues) in this repo.* 13 | 14 | Usage 15 | ===== 16 | 17 | **Base URL:** [http://hook-api.herokuapp.com/](http://hook-api.herokuapp.com/) 18 | 19 | **Output:** JSON 20 | 21 | ### Get today's products 22 | 23 | Posts are cached for 60 minutes. 24 | 25 | #### `GET /today` 26 | 27 | Example Query: 28 | 29 | ``` 30 | http://hook-api.herokuapp.com/today 31 | ``` 32 | 33 | Response: 34 | 35 | ```json 36 | { 37 | "status": "success", 38 | "hunts": [ 39 | { 40 | "url": "http://bit.ly/1oShrzl", 41 | "permalink": "/posts/the-news-ios", 42 | "comments": 20, 43 | "tagline": "Designer News + Hacker News, now on iOS", 44 | "rank": 2, 45 | "user": { 46 | "name": "Tosin Afolabi", 47 | "username": "TosinAF" 48 | }, 49 | "votes": 48, 50 | "title": "The News (iOS)" 51 | }, 52 | ... 53 | ... 54 | ``` 55 | 56 | ### Get comments for any post 57 | 58 | Comments are cached for 120 minutes. 59 | 60 | #### `GET /:permalink` 61 | 62 | Example Query: 63 | 64 | ``` 65 | http://hook-api.herokuapp.com/posts/hook-producthunt-api 66 | ``` 67 | 68 | Response: 69 | 70 | ```json 71 | 72 | { 73 | "status": "success", 74 | "post": { 75 | "url": "https://github.com/karan/Hook", 76 | "permalink": "/posts/hook-producthunt-api", 77 | "comment_count": "10", 78 | "tagline": "ProductHunt API for retrieving today's hunts", 79 | "rank": 1, 80 | "user": { 81 | "name": "Karan Goel", 82 | "username": "karangoel" 83 | }, 84 | "votes": 46, 85 | "title": "Hook - ProductHunt API" 86 | }, 87 | "comments": [ 88 | { 89 | "comment_html": "\n \n Yo guys. I have been toying with a scraper for PH for some time now, and was really motivated after seeing @TosinAF 's thread and packaged all I had in a neat API.

Try it here

Currently it gets today's posts, and has a cache of 1 hour. I hope to see people make a ton of good stuff with it. :)

PS: I make a lot of cool stuff, and people love it. Leave your e-mail here and stay tuned about my projects.\n ", 90 | "comment": "\n \n Yo guys. I have been toying with a scraper for PH for some time now, and was really motivated after seeing @TosinAF 's thread and packaged all I had in a neat API.Try it hereCurrently it gets today's posts, and has a cache of 1 hour. I hope to see people make a ton of good stuff with it. :)PS: I make a lot of cool stuff, and people love it. Leave your e-mail here and stay tuned about my projects.\n ", 91 | "timestamp": "7h ago", 92 | "user": { 93 | "name": "Karan Goel", 94 | "username": "karangoel" 95 | }, 96 | "index": 1 97 | }, 98 | ... 99 | ... 100 | ``` 101 | 102 | Expo 103 | ======= 104 | 105 | Some apps built using this API: 106 | 107 | | Name | Description | URL | 108 | | ---- | ---- | ---- | 109 | | alfred-producthunt-workflow | Product Hunt Workflow for Alfred 2.0 | https://github.com/loris/alfred-producthunt-workflow | 110 | | PH | Product Hunt Android App | https://github.com/yelinaung/PH | 111 | | TheNews (iOS) | PH, DN, Hn, now on iOS | https://appsto.re/us/PpnV0.i | 112 | | ProductHuntExtn | Today View Extension for Product Hunt (Yosemite) | https://github.com/zameericle/ProductHuntExtn | 113 | | ProductHunt (iOS) | iOS App for producthunt.co | https://github.com/sapanbhuta/ProductHunt | 114 | | Spear | Product Hunt for Hackers - a CLI to Product Hunt. | https://github.com/karan/Spear | 115 | 116 | *If you are using Hook, please let me know and I'll showcase your app here.* 117 | 118 | Start 119 | ===== 120 | 121 | ```bash 122 | $ npm install # install dependencies 123 | $ node app # start the server 124 | ``` 125 | 126 | Deploy to Heroku 127 | ===== 128 | 129 | ```bash 130 | $ npm install # install dependencies 131 | $ heroku create 132 | $ heroku addons:add mongolab 133 | $ (git add, git commit) 134 | $ git push heroku master 135 | ``` 136 | 137 | Donation 138 | ======= 139 | 140 | Donations to my open source work are greatly appreciated and help me dedicate more time and energy into making cool things. If you want to help me produce this work as well as more like it, please take a moment to contribute. 141 | 142 | - Bitcoin: 1GZqi6qUGSKGQvjd4CvVBJ9FYpsQvU2P7h 143 | - Gratipay: https://www.gratipay.com/karan/ 144 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | require('newrelic'); 2 | var express = require('express'); 3 | var request = require('request'); 4 | var db = require('./config/db'); 5 | var Posts = require('./models/posts'); 6 | var Comments = require('./models/comments'); 7 | var cheerio = require('cheerio'); 8 | 9 | var app = express(); 10 | 11 | var BASE_URL = 'http://www.producthunt.com'; 12 | 13 | 14 | app.configure(function (){ 15 | app.set('port', process.env.PORT || 8888); 16 | app.use(express.cookieParser('keyboard cat')); 17 | app.use(express.session({ secret: 'keyboard cat' })); 18 | app.use(app.router); 19 | }); 20 | 21 | 22 | app.get('/', function (req, res) { 23 | res.redirect('https://github.com/karan/Hook'); 24 | }); 25 | 26 | 27 | app.get('/today', function (req, res) { 28 | 29 | var today = new Date().toJSON().slice(0,10); 30 | 31 | Posts.findOne({date: today}, function (err, obj) { 32 | 33 | if (obj && obj.expires < Date.now()) { 34 | // post expired, scrape again, and save 35 | console.log("posts expired - " + today); 36 | getHomePosts(null, function (posts) { 37 | Posts.findOneAndUpdate({date: today}, {posts: posts, expires: new Date(Date.now() + 60*60*1000)}, {new: true}, function (err, newObj) { 38 | res.send(200, { 39 | status: 'success', 40 | hunts: posts 41 | }); 42 | }); 43 | }); 44 | } else if (obj) { 45 | console.log("posts not expired - " + today); 46 | // not expired, just return this 47 | res.send(200, { 48 | status: 'success', 49 | hunts: obj.posts 50 | }); 51 | } else { 52 | // not in the db, scrape and send 53 | console.log("posts not found in db - " + today); 54 | 55 | getHomePosts(null, function (posts) { 56 | console.log("got details"); 57 | new Posts({ 58 | date: today, 59 | posts: posts 60 | }).save(function (err) { 61 | res.send(200, { 62 | status: 'success', 63 | hunts: posts 64 | }); 65 | }); 66 | }); 67 | 68 | } 69 | 70 | }); 71 | 72 | }); 73 | 74 | 75 | app.get("/posts/:slug", function (req, res) { 76 | 77 | var post_url = '/posts/'+req.params.slug; 78 | 79 | Comments.findOne({'permalink': post_url}, function(err, commentobj) { 80 | 81 | if (commentobj && commentobj.expires < Date.now()) { 82 | // expired. Scrape again, save and send 83 | console.log("expired"); 84 | getPostDetails(post_url, function (post) { 85 | console.log(post); 86 | getComments(post_url, function (err, comments, related) { 87 | Comments.findOneAndUpdate({permalink: post_url}, {post: post, comments: comments, expires: new Date(Date.now() + 2*60*60*1000)}, function (err, newObj) { 88 | res.send(200, { 89 | status: 'success', 90 | post: post, 91 | comments: comments 92 | }); 93 | }); 94 | }); 95 | }); 96 | } else if (commentobj) { 97 | console.log("in db - fine"); 98 | // not expired, just send response 99 | res.send(200, { 100 | status: 'success', 101 | post: commentobj.post, 102 | comments: commentobj.comments 103 | }); 104 | } else { 105 | console.log("not in db"); 106 | // not in db, scrape, save and send 107 | getPostDetails(post_url, function (post) { 108 | console.log(post); 109 | getComments(post_url, function (err, comments, related) { 110 | new Comments({ 111 | post: post, 112 | permalink: post.permalink, 113 | comments: comments 114 | }).save(function(err) { 115 | res.send(200, { 116 | status: 'success', 117 | post: post, 118 | comments: comments 119 | }); 120 | }); 121 | }); 122 | }); 123 | 124 | } 125 | 126 | }); 127 | 128 | }); 129 | 130 | 131 | // Gets the details of a single post 132 | function getPostDetails(post_url, callback) { 133 | var url = post_url ? BASE_URL + post_url : BASE_URL; 134 | console.log(url); 135 | 136 | request(url, function (error, response, body) { 137 | 138 | if (!error && response.statusCode == 200) { 139 | $ = cheerio.load(body); 140 | 141 | var header_dom = $(".comments-header"); 142 | 143 | var votes = +header_dom.find(".vote-count").text(); 144 | var name = /Posted by (.*) \d+ .*/g.exec(header_dom.find(".posted-by").text().trim().replace(/"/g, ""))[1]; 145 | var username = header_dom.find(".user-with-tooltip").attr("href").slice(1).trim().replace(/"/g, ""); 146 | var title = header_dom.find(".post-url").text(); 147 | var tagline = header_dom.find(".post-tagline").text(); 148 | 149 | var comment_count = $($(".modal-container").find(".subhead")[2]).text().trim().match(/(\d+)/g); 150 | comment_count = comment_count ? comment_count[0] : 0; 151 | 152 | var permalink = post_url; 153 | 154 | request({url: BASE_URL+header_dom.find(".post-url").attr("href"), followRedirect: false}, function (error, response, body) { 155 | url = response.headers.location; 156 | 157 | callback({ 158 | 'title': title, 159 | 'votes': votes, 160 | 'user': { 161 | 'username': username, 162 | 'name': name 163 | }, 164 | 'rank': 1, 165 | 'tagline': tagline, 166 | 'comment_count': comment_count, 167 | 'permalink': permalink, 168 | 'url': url 169 | }); 170 | 171 | }); 172 | } 173 | 174 | }); 175 | } 176 | 177 | // Returns comments for a single post 178 | function getComments(url, callback) { 179 | 180 | var comments = []; 181 | 182 | request(BASE_URL+url, function (error, response, body) { 183 | if (!error && response.statusCode == 200) { 184 | 185 | $ = cheerio.load(body); 186 | var comments_dom = $(".modal-container").find(".comment"); 187 | 188 | if (comments_dom.length === 0) { 189 | return callback(null, comments); 190 | } 191 | 192 | comments_dom.each(function (index) { 193 | 194 | var name = $(this).find(".comment-user-name a").text(); 195 | var username = $(this).find(".comment-user-handle").text().replace(/[{()} ]/g, ''); 196 | var timestamp = $(this).find(".comment-time-ago").text().replace(/\s+/g, ''); 197 | var comment = $(this).find(".actual-comment").find(".comment-user-name").remove().end().text().replace(/^\s+|\s+$/g,''); 198 | var comment_html = $(this).find(".actual-comment").html().replace(/^\s+|\s+$/g,''); 199 | 200 | comments.push({ 201 | index: index+1, 202 | user: { 203 | username: username, 204 | name: name 205 | }, 206 | timestamp: timestamp, 207 | comment: comment, 208 | comment_html: comment_html 209 | }); 210 | 211 | 212 | if (comments.length === comments_dom.length) { 213 | callback(null, comments); 214 | } 215 | }); 216 | } 217 | }); 218 | 219 | } 220 | 221 | 222 | function compare(a,b) { 223 | return a.rank - b.rank; 224 | } 225 | 226 | 227 | // Returns all homepage posts posted today 228 | function getHomePosts(post_url, callback) { 229 | var url = post_url ? BASE_URL + post_url : BASE_URL; 230 | var posts = []; 231 | 232 | request(BASE_URL, function (error, response, body) { 233 | if (!error && response.statusCode == 200) { 234 | 235 | $ = cheerio.load(body); 236 | var x = $('.today .posts-group tr'); 237 | 238 | var container = null; 239 | 240 | x.each(function (rank) { 241 | 242 | var votes = $(this).find(".upvote").text().replace(/\s+/g, ''); 243 | 244 | var re = /(.*)\(\@(.*)\)/g; 245 | var user_str = $(this).find(".user-image-td").find("h3").text().trim().replace(/\s*/g, ""); 246 | var user_match = re.exec(user_str) 247 | var name = user_match[1] 248 | var username = user_match[2]; 249 | var title = $(this).find(".post-url").text(); 250 | var tagline = $(this).find(".post-tagline").text(); 251 | 252 | if (container) { 253 | var comment_count = $(container.find(".subhead")[2]).text().trim().match(/(\d+)/g);; 254 | } else { 255 | var comment_count = $(this).find(".view-discussion").text().trim().match(/(\d+)/g); 256 | } 257 | comment_count = comment_count ? comment_count[0] : 0; 258 | 259 | var permalink = post_url ? post_url : $(this).find(".view-discussion").attr("data-url"); 260 | 261 | var url = BASE_URL+$(this).find(".post-url").attr("href"); 262 | 263 | 264 | request({url: BASE_URL+$(this).find(".post-url").attr("href"), followRedirect: false}, function (error, response, body) { 265 | if (error) console.log("ERROR " + error); 266 | url = response.headers.location; 267 | 268 | posts.push({ 269 | 'title': title, 270 | 'votes': votes, 271 | 'user': { 272 | 'username': username, 273 | 'name': name 274 | }, 275 | 'rank': rank + 1, 276 | 'tagline': tagline, 277 | 'comment_count': comment_count, 278 | 'permalink': permalink, 279 | 'url': url 280 | }); 281 | 282 | posts.sort(compare); 283 | 284 | if (posts.length === x.length) { 285 | callback(posts); 286 | } 287 | }); 288 | }); 289 | } 290 | }); 291 | } 292 | 293 | 294 | app.listen(app.get('port')); 295 | --------------------------------------------------------------------------------