├── Procfile
├── .gitignore
├── logo.png
├── models
├── posts.js
└── comments.js
├── package.json
├── newrelic.js
├── config
└── db.js
├── LICENSE
├── README.md
└── app.js
/Procfile:
--------------------------------------------------------------------------------
1 | web: node app.js
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | .env
3 | newrelic_agent.log
4 |
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karan/Hook/HEAD/logo.png
--------------------------------------------------------------------------------
/models/posts.js:
--------------------------------------------------------------------------------
1 | var mongoose = require('mongoose'),
2 | Schema = mongoose.Schema;
3 |
4 | // For any user
5 | var postsSchema = new Schema({
6 | date: {
7 | type: Date,
8 | default: new Date().toJSON().slice(0,10)
9 | },
10 | expires: {
11 | type: Date,
12 | default: new Date(Date.now() + 60*60*1000) // 1 hour
13 | },
14 | posts: []
15 | });
16 |
17 | module.exports = mongoose.model('Posts', postsSchema);
18 |
--------------------------------------------------------------------------------
/models/comments.js:
--------------------------------------------------------------------------------
1 | var mongoose = require('mongoose'),
2 | Schema = mongoose.Schema;
3 |
4 | // For any user
5 | var commentsSchema = new Schema({
6 | created: {
7 | type: Date,
8 | default: Date.now
9 | },
10 | expires: {
11 | type: Date,
12 | default: new Date(Date.now() + 2*60*60*1000) // 2 hours
13 | },
14 | permalink: String,
15 | post: {},
16 | comments: []
17 | });
18 |
19 | module.exports = mongoose.model('Comments', commentsSchema);
20 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Hook",
3 | "private": true,
4 | "version": "0.0.0",
5 | "description": "ProductHunt API",
6 | "main": "app.js",
7 | "repository": {
8 | "type": "git",
9 | "url": "https://github.com/karan/Hook.git"
10 | },
11 | "author": "Karan Goel",
12 | "license": "MIT",
13 | "bugs": {
14 | "url": "https://github.com/karan/Hook/issues"
15 | },
16 | "homepage": "https://github.com/karan/Hook",
17 | "dependencies": {
18 | "cheerio": "^0.17.0",
19 | "express": "3.4.7",
20 | "mongodb": "1.3.23",
21 | "mongoose": "3.8.3",
22 | "newrelic": "^1.7.1",
23 | "request": "^2.36.0"
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/newrelic.js:
--------------------------------------------------------------------------------
1 | /**
2 | * New Relic agent configuration.
3 | *
4 | * See lib/config.defaults.js in the agent distribution for a more complete
5 | * description of configuration variables and their potential values.
6 | */
7 | exports.config = {
8 | /**
9 | * Array of application names.
10 | */
11 | app_name : ['Hook'],
12 | /**
13 | * Your New Relic license key.
14 | */
15 | license_key : process.env.NEW_RELIC_LICENSE_KEY,
16 | logging : {
17 | /**
18 | * Level at which to log. 'trace' is most useful to New Relic when diagnosing
19 | * issues with the agent, 'info' and higher will impose the least overhead on
20 | * production applications.
21 | */
22 | level : 'trace'
23 | }
24 | };
25 |
--------------------------------------------------------------------------------
/config/db.js:
--------------------------------------------------------------------------------
1 | // Established database connection
2 |
3 | var mongoose = require('mongoose');
4 |
5 | var dbURI = process.env.MONGOLAB_URI;
6 | mongoose.connect(dbURI);
7 |
8 | // when connected with db
9 | mongoose.connection.on('connected', function() {
10 | console.log('Connected to db ' + dbURI);
11 | });
12 |
13 | // some error when connecting
14 | mongoose.connection.on('error', function(err) {
15 | console.log('Connection error: ' + err);
16 | });
17 |
18 | // disconnected from db
19 | mongoose.connection.on('disconnected', function() {
20 | console.log('Disconnected from DB.');
21 | });
22 |
23 | // If the Node process ends, close the Mongoose connection
24 | process.on('SIGINT', function() {
25 | mongoose.connection.close(function() {
26 | console.log('Disconnected from DB by app.');
27 | process.exit(0);
28 | });
29 | });
30 |
31 | // bring in all models
32 | require('./../models/posts');
33 | require('./../models/comments');
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 Karan Goel
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |

2 |
3 | Hook
4 | ====
5 |
6 | ProductHunt API for retrieving today's hunts and comments for any post.
7 |
8 | Discussion on [Product Hunt](http://www.producthunt.com/posts/hook-producthunt-api).
9 |
10 | **Since official @ProductHunt API is in private beta, I'm discontinuing support for Hook. I highly recommend switching to the official API.**
11 |
12 | *Note: This is an unofficial API and __not__ supported or controlled by ProductHunt itself. Any questions, comments, feedback or feature requests should be directed to [karan](http://github.com/karan) or via an [issue](https://github.com/karan/Hook/issues) in this repo.*
13 |
14 | Usage
15 | =====
16 |
17 | **Base URL:** [http://hook-api.herokuapp.com/](http://hook-api.herokuapp.com/)
18 |
19 | **Output:** JSON
20 |
21 | ### Get today's products
22 |
23 | Posts are cached for 60 minutes.
24 |
25 | #### `GET /today`
26 |
27 | Example Query:
28 |
29 | ```
30 | http://hook-api.herokuapp.com/today
31 | ```
32 |
33 | Response:
34 |
35 | ```json
36 | {
37 | "status": "success",
38 | "hunts": [
39 | {
40 | "url": "http://bit.ly/1oShrzl",
41 | "permalink": "/posts/the-news-ios",
42 | "comments": 20,
43 | "tagline": "Designer News + Hacker News, now on iOS",
44 | "rank": 2,
45 | "user": {
46 | "name": "Tosin Afolabi",
47 | "username": "TosinAF"
48 | },
49 | "votes": 48,
50 | "title": "The News (iOS)"
51 | },
52 | ...
53 | ...
54 | ```
55 |
56 | ### Get comments for any post
57 |
58 | Comments are cached for 120 minutes.
59 |
60 | #### `GET /:permalink`
61 |
62 | Example Query:
63 |
64 | ```
65 | http://hook-api.herokuapp.com/posts/hook-producthunt-api
66 | ```
67 |
68 | Response:
69 |
70 | ```json
71 |
72 | {
73 | "status": "success",
74 | "post": {
75 | "url": "https://github.com/karan/Hook",
76 | "permalink": "/posts/hook-producthunt-api",
77 | "comment_count": "10",
78 | "tagline": "ProductHunt API for retrieving today's hunts",
79 | "rank": 1,
80 | "user": {
81 | "name": "Karan Goel",
82 | "username": "karangoel"
83 | },
84 | "votes": 46,
85 | "title": "Hook - ProductHunt API"
86 | },
87 | "comments": [
88 | {
89 | "comment_html": "\n \n Yo guys. I have been toying with a scraper for PH for some time now, and was really motivated after seeing @TosinAF 's thread and packaged all I had in a neat API.
Try it here
Currently it gets today's posts, and has a cache of 1 hour. I hope to see people make a ton of good stuff with it. :)
PS: I make a lot of cool stuff, and people love it. Leave your e-mail here and stay tuned about my projects.\n ",
90 | "comment": "\n \n Yo guys. I have been toying with a scraper for PH for some time now, and was really motivated after seeing @TosinAF 's thread and packaged all I had in a neat API.Try it hereCurrently it gets today's posts, and has a cache of 1 hour. I hope to see people make a ton of good stuff with it. :)PS: I make a lot of cool stuff, and people love it. Leave your e-mail here and stay tuned about my projects.\n ",
91 | "timestamp": "7h ago",
92 | "user": {
93 | "name": "Karan Goel",
94 | "username": "karangoel"
95 | },
96 | "index": 1
97 | },
98 | ...
99 | ...
100 | ```
101 |
102 | Expo
103 | =======
104 |
105 | Some apps built using this API:
106 |
107 | | Name | Description | URL |
108 | | ---- | ---- | ---- |
109 | | alfred-producthunt-workflow | Product Hunt Workflow for Alfred 2.0 | https://github.com/loris/alfred-producthunt-workflow |
110 | | PH | Product Hunt Android App | https://github.com/yelinaung/PH |
111 | | TheNews (iOS) | PH, DN, Hn, now on iOS | https://appsto.re/us/PpnV0.i |
112 | | ProductHuntExtn | Today View Extension for Product Hunt (Yosemite) | https://github.com/zameericle/ProductHuntExtn |
113 | | ProductHunt (iOS) | iOS App for producthunt.co | https://github.com/sapanbhuta/ProductHunt |
114 | | Spear | Product Hunt for Hackers - a CLI to Product Hunt. | https://github.com/karan/Spear |
115 |
116 | *If you are using Hook, please let me know and I'll showcase your app here.*
117 |
118 | Start
119 | =====
120 |
121 | ```bash
122 | $ npm install # install dependencies
123 | $ node app # start the server
124 | ```
125 |
126 | Deploy to Heroku
127 | =====
128 |
129 | ```bash
130 | $ npm install # install dependencies
131 | $ heroku create
132 | $ heroku addons:add mongolab
133 | $ (git add, git commit)
134 | $ git push heroku master
135 | ```
136 |
137 | Donation
138 | =======
139 |
140 | Donations to my open source work are greatly appreciated and help me dedicate more time and energy into making cool things. If you want to help me produce this work as well as more like it, please take a moment to contribute.
141 |
142 | - Bitcoin: 1GZqi6qUGSKGQvjd4CvVBJ9FYpsQvU2P7h
143 | - Gratipay: https://www.gratipay.com/karan/
144 |
--------------------------------------------------------------------------------
/app.js:
--------------------------------------------------------------------------------
1 | require('newrelic');
2 | var express = require('express');
3 | var request = require('request');
4 | var db = require('./config/db');
5 | var Posts = require('./models/posts');
6 | var Comments = require('./models/comments');
7 | var cheerio = require('cheerio');
8 |
9 | var app = express();
10 |
11 | var BASE_URL = 'http://www.producthunt.com';
12 |
13 |
14 | app.configure(function (){
15 | app.set('port', process.env.PORT || 8888);
16 | app.use(express.cookieParser('keyboard cat'));
17 | app.use(express.session({ secret: 'keyboard cat' }));
18 | app.use(app.router);
19 | });
20 |
21 |
22 | app.get('/', function (req, res) {
23 | res.redirect('https://github.com/karan/Hook');
24 | });
25 |
26 |
27 | app.get('/today', function (req, res) {
28 |
29 | var today = new Date().toJSON().slice(0,10);
30 |
31 | Posts.findOne({date: today}, function (err, obj) {
32 |
33 | if (obj && obj.expires < Date.now()) {
34 | // post expired, scrape again, and save
35 | console.log("posts expired - " + today);
36 | getHomePosts(null, function (posts) {
37 | Posts.findOneAndUpdate({date: today}, {posts: posts, expires: new Date(Date.now() + 60*60*1000)}, {new: true}, function (err, newObj) {
38 | res.send(200, {
39 | status: 'success',
40 | hunts: posts
41 | });
42 | });
43 | });
44 | } else if (obj) {
45 | console.log("posts not expired - " + today);
46 | // not expired, just return this
47 | res.send(200, {
48 | status: 'success',
49 | hunts: obj.posts
50 | });
51 | } else {
52 | // not in the db, scrape and send
53 | console.log("posts not found in db - " + today);
54 |
55 | getHomePosts(null, function (posts) {
56 | console.log("got details");
57 | new Posts({
58 | date: today,
59 | posts: posts
60 | }).save(function (err) {
61 | res.send(200, {
62 | status: 'success',
63 | hunts: posts
64 | });
65 | });
66 | });
67 |
68 | }
69 |
70 | });
71 |
72 | });
73 |
74 |
75 | app.get("/posts/:slug", function (req, res) {
76 |
77 | var post_url = '/posts/'+req.params.slug;
78 |
79 | Comments.findOne({'permalink': post_url}, function(err, commentobj) {
80 |
81 | if (commentobj && commentobj.expires < Date.now()) {
82 | // expired. Scrape again, save and send
83 | console.log("expired");
84 | getPostDetails(post_url, function (post) {
85 | console.log(post);
86 | getComments(post_url, function (err, comments, related) {
87 | Comments.findOneAndUpdate({permalink: post_url}, {post: post, comments: comments, expires: new Date(Date.now() + 2*60*60*1000)}, function (err, newObj) {
88 | res.send(200, {
89 | status: 'success',
90 | post: post,
91 | comments: comments
92 | });
93 | });
94 | });
95 | });
96 | } else if (commentobj) {
97 | console.log("in db - fine");
98 | // not expired, just send response
99 | res.send(200, {
100 | status: 'success',
101 | post: commentobj.post,
102 | comments: commentobj.comments
103 | });
104 | } else {
105 | console.log("not in db");
106 | // not in db, scrape, save and send
107 | getPostDetails(post_url, function (post) {
108 | console.log(post);
109 | getComments(post_url, function (err, comments, related) {
110 | new Comments({
111 | post: post,
112 | permalink: post.permalink,
113 | comments: comments
114 | }).save(function(err) {
115 | res.send(200, {
116 | status: 'success',
117 | post: post,
118 | comments: comments
119 | });
120 | });
121 | });
122 | });
123 |
124 | }
125 |
126 | });
127 |
128 | });
129 |
130 |
131 | // Gets the details of a single post
132 | function getPostDetails(post_url, callback) {
133 | var url = post_url ? BASE_URL + post_url : BASE_URL;
134 | console.log(url);
135 |
136 | request(url, function (error, response, body) {
137 |
138 | if (!error && response.statusCode == 200) {
139 | $ = cheerio.load(body);
140 |
141 | var header_dom = $(".comments-header");
142 |
143 | var votes = +header_dom.find(".vote-count").text();
144 | var name = /Posted by (.*) \d+ .*/g.exec(header_dom.find(".posted-by").text().trim().replace(/"/g, ""))[1];
145 | var username = header_dom.find(".user-with-tooltip").attr("href").slice(1).trim().replace(/"/g, "");
146 | var title = header_dom.find(".post-url").text();
147 | var tagline = header_dom.find(".post-tagline").text();
148 |
149 | var comment_count = $($(".modal-container").find(".subhead")[2]).text().trim().match(/(\d+)/g);
150 | comment_count = comment_count ? comment_count[0] : 0;
151 |
152 | var permalink = post_url;
153 |
154 | request({url: BASE_URL+header_dom.find(".post-url").attr("href"), followRedirect: false}, function (error, response, body) {
155 | url = response.headers.location;
156 |
157 | callback({
158 | 'title': title,
159 | 'votes': votes,
160 | 'user': {
161 | 'username': username,
162 | 'name': name
163 | },
164 | 'rank': 1,
165 | 'tagline': tagline,
166 | 'comment_count': comment_count,
167 | 'permalink': permalink,
168 | 'url': url
169 | });
170 |
171 | });
172 | }
173 |
174 | });
175 | }
176 |
177 | // Returns comments for a single post
178 | function getComments(url, callback) {
179 |
180 | var comments = [];
181 |
182 | request(BASE_URL+url, function (error, response, body) {
183 | if (!error && response.statusCode == 200) {
184 |
185 | $ = cheerio.load(body);
186 | var comments_dom = $(".modal-container").find(".comment");
187 |
188 | if (comments_dom.length === 0) {
189 | return callback(null, comments);
190 | }
191 |
192 | comments_dom.each(function (index) {
193 |
194 | var name = $(this).find(".comment-user-name a").text();
195 | var username = $(this).find(".comment-user-handle").text().replace(/[{()} ]/g, '');
196 | var timestamp = $(this).find(".comment-time-ago").text().replace(/\s+/g, '');
197 | var comment = $(this).find(".actual-comment").find(".comment-user-name").remove().end().text().replace(/^\s+|\s+$/g,'');
198 | var comment_html = $(this).find(".actual-comment").html().replace(/^\s+|\s+$/g,'');
199 |
200 | comments.push({
201 | index: index+1,
202 | user: {
203 | username: username,
204 | name: name
205 | },
206 | timestamp: timestamp,
207 | comment: comment,
208 | comment_html: comment_html
209 | });
210 |
211 |
212 | if (comments.length === comments_dom.length) {
213 | callback(null, comments);
214 | }
215 | });
216 | }
217 | });
218 |
219 | }
220 |
221 |
222 | function compare(a,b) {
223 | return a.rank - b.rank;
224 | }
225 |
226 |
227 | // Returns all homepage posts posted today
228 | function getHomePosts(post_url, callback) {
229 | var url = post_url ? BASE_URL + post_url : BASE_URL;
230 | var posts = [];
231 |
232 | request(BASE_URL, function (error, response, body) {
233 | if (!error && response.statusCode == 200) {
234 |
235 | $ = cheerio.load(body);
236 | var x = $('.today .posts-group tr');
237 |
238 | var container = null;
239 |
240 | x.each(function (rank) {
241 |
242 | var votes = $(this).find(".upvote").text().replace(/\s+/g, '');
243 |
244 | var re = /(.*)\(\@(.*)\)/g;
245 | var user_str = $(this).find(".user-image-td").find("h3").text().trim().replace(/\s*/g, "");
246 | var user_match = re.exec(user_str)
247 | var name = user_match[1]
248 | var username = user_match[2];
249 | var title = $(this).find(".post-url").text();
250 | var tagline = $(this).find(".post-tagline").text();
251 |
252 | if (container) {
253 | var comment_count = $(container.find(".subhead")[2]).text().trim().match(/(\d+)/g);;
254 | } else {
255 | var comment_count = $(this).find(".view-discussion").text().trim().match(/(\d+)/g);
256 | }
257 | comment_count = comment_count ? comment_count[0] : 0;
258 |
259 | var permalink = post_url ? post_url : $(this).find(".view-discussion").attr("data-url");
260 |
261 | var url = BASE_URL+$(this).find(".post-url").attr("href");
262 |
263 |
264 | request({url: BASE_URL+$(this).find(".post-url").attr("href"), followRedirect: false}, function (error, response, body) {
265 | if (error) console.log("ERROR " + error);
266 | url = response.headers.location;
267 |
268 | posts.push({
269 | 'title': title,
270 | 'votes': votes,
271 | 'user': {
272 | 'username': username,
273 | 'name': name
274 | },
275 | 'rank': rank + 1,
276 | 'tagline': tagline,
277 | 'comment_count': comment_count,
278 | 'permalink': permalink,
279 | 'url': url
280 | });
281 |
282 | posts.sort(compare);
283 |
284 | if (posts.length === x.length) {
285 | callback(posts);
286 | }
287 | });
288 | });
289 | }
290 | });
291 | }
292 |
293 |
294 | app.listen(app.get('port'));
295 |
--------------------------------------------------------------------------------