├── .gitignore ├── LICENSE.txt ├── Procfile ├── README.md ├── app.js ├── backend.js ├── clock.js ├── knexfile.js ├── migrations ├── 20141229012503_00.js └── 20160123121933_🌟stars🌟.js ├── package.json ├── routes.js ├── static └── style.css ├── update-db.js └── views ├── base.hjs └── index.hjs /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | node_modules 3 | dev.sqlite3 4 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Ian Kronquist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: node app.js 2 | worker: npm run worker 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Y-Cloninator 2 | ============ 3 | 4 | Get the best parts of Y-Combinator's Hacker News - the links to awesome github 5 | projects 6 | 7 | Getting started 8 | --------------- 9 | * Install node.js on your system 10 | * Install dependencies 11 | * Run migrations 12 | * Run the app 13 | ```shell 14 | $ npm install 15 | $ npm run migrate 16 | $ npm start 17 | ``` 18 | 19 | Testing 20 | ------- 21 | Tests are run using mocha. 22 | ```shell 23 | $ npm test 24 | ``` 25 | You can also run the jshint linter quite easily: 26 | ```shell 27 | $ npm run lint 28 | ``` 29 | 30 | Configuration 31 | ------------- 32 | Configuration is done using environment variables. Sane defaults are provided 33 | for a dev environment, but require dev dependencies to be installed. 34 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | var express = require('express'); 5 | var nunjucks = require('nunjucks'); 6 | var bodyParser = require('body-parser'); 7 | var knex = require('knex')({ 8 | client: process.env.CLIENT || 'sqlite3', 9 | connection: process.env.DATABASE_URL || { filename: 'dev.sqlite3' } 10 | }); 11 | 12 | var app = express(); 13 | app.use(express.static(__dirname + '/static')); 14 | app.use(bodyParser.json()); 15 | app.use(bodyParser.urlencoded({ extended: true })); 16 | app.set('knex', knex); 17 | 18 | nunjucks.configure('views', { 19 | autoescape: true, 20 | express: app 21 | }); 22 | 23 | var routes = require('./routes')(app); 24 | 25 | app.listen(process.env.PORT || 8000, function () { 26 | console.log('App now listening on %s', process.env.Port || 8000); 27 | }); 28 | -------------------------------------------------------------------------------- /backend.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | "use strict"; 3 | 4 | var http = require('follow-redirects').https; 5 | 6 | var knex = require('knex')({ 7 | client: process.env.CLIENT || 'sqlite3', 8 | connection: process.env.DATABASE_URL || { filename: 'dev.sqlite3' } 9 | }); 10 | 11 | 12 | var hn_api_host = 'hacker-news.firebaseio.com'; 13 | var post_details_url_suffix = '.json'; 14 | var gh_responses = []; 15 | 16 | var httpGet = function(host, path, cb) { 17 | var options = { 18 | host: host, 19 | path: path, 20 | headers: {'User-Agent': 'Mozilla/5.0'} 21 | }; 22 | 23 | var req = http.get(options, function(res) { 24 | var data = ''; 25 | res.on('data', function(chunk) { 26 | data += chunk.toString(); 27 | }); 28 | res.on('end', function(){ 29 | cb(data); 30 | }); 31 | if (res.statusCode != 200) { 32 | console.log('HTTP Status code not OK!', host, path, data); 33 | } 34 | }).on('error', function(e) { 35 | console.log("Got error: " + e.message); 36 | }); 37 | }; 38 | 39 | function saveGithubPost(data) { 40 | var project = JSON.parse(data); 41 | if (!project.name) { // probably an API ratelimiting issue 42 | console.error("Could not get Github project! Received data: "); 43 | console.error(project); 44 | return; 45 | } 46 | knex('ghprojects').insert({ 47 | gh_url: project.html_url, 48 | gh_name: project.name, 49 | gh_description: project.description, 50 | gh_stars: project.stargazers_count, 51 | gh_language: project.language 52 | }).then(function() { 53 | console.log("New project: " + project.name) 54 | }).catch(function(error) { 55 | console.log(error); 56 | }); 57 | } 58 | 59 | function hnItemUrl(itemId) { 60 | return 'https://news.ycombinator.com/item?id=' + itemId; 61 | } 62 | 63 | function checkPost(data) { 64 | var post_details = JSON.parse(data); 65 | var repository_url = /https?:\/\/github.com(\/.*?\/[^\/]*).*?/.exec( 66 | post_details.url); 67 | if (!repository_url) { 68 | return; 69 | } 70 | knex('hnposts').insert({ 71 | id: post_details.id, 72 | gh_url: repository_url[0], 73 | retrievedAt: Date.now(), 74 | hn_url: hnItemUrl(post_details.id), 75 | hn_time: post_details.time 76 | }).then(function () { 77 | httpGet('api.github.com', '/repos' + repository_url[1], saveGithubPost); 78 | }).catch(function(error) { 79 | console.error(error); 80 | }); 81 | } 82 | 83 | function processHNPosts(data) { 84 | var current_time = new Date(); 85 | var top_list = JSON.parse(data); 86 | top_list.forEach(function(entry) { 87 | httpGet(hn_api_host, 88 | '/v0/item/' + entry + post_details_url_suffix, checkPost); 89 | }); 90 | } 91 | 92 | function clearOldPosts() { 93 | var date = new Date(); 94 | date.setHours(date.getHours() - 2); 95 | 96 | // Split into two queries - join deletes aren't supported 97 | // https://github.com/tgriesser/knex/issues/873 98 | knex('hnposts') 99 | .distinct('gh_url') 100 | .select() 101 | .where('retrievedAt', '<', date) 102 | .then(function(posts) { 103 | var ghUrls = posts.map(function(post) { return post.gh_url }); 104 | 105 | knex('hnposts') 106 | .whereIn('gh_url', ghUrls) 107 | .del(); 108 | 109 | knex('ghprojects') 110 | .whereIn('gh_url', ghUrls) 111 | .del(); 112 | }).catch(function(error) { 113 | console.error(error); 114 | }); 115 | } 116 | 117 | module.exports.httpGet = httpGet; 118 | module.exports.hn_api_host = hn_api_host; 119 | module.exports.processHNPosts = processHNPosts; 120 | module.exports.clearOldPosts = clearOldPosts; 121 | module.exports.hnItemUrl = hnItemUrl; 122 | -------------------------------------------------------------------------------- /clock.js: -------------------------------------------------------------------------------- 1 | var schedule = require('node-schedule'); 2 | var backend = require('./backend'); 3 | 4 | // every ten minutes from 8:00-20:00 5 | var job = schedule.scheduleJob('*/10 8-20 * * *', function () { 6 | runJob(); 7 | }); 8 | 9 | function runJob() { 10 | console.log("Starting job."); 11 | backend.httpGet( 12 | backend.hn_api_host, 13 | '/v0/topstories.json', 14 | backend.processHNPosts); 15 | backend.clearOldPosts(); 16 | } 17 | 18 | module.exports.forceUpdate = runJob; 19 | -------------------------------------------------------------------------------- /knexfile.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | 3 | development: { 4 | client: 'sqlite3', 5 | connection: { 6 | filename: './dev.sqlite3' 7 | } 8 | }, 9 | 10 | staging: { 11 | client: 'postgresql', 12 | connection: process.env.DATABASE_URL, 13 | pool: { 14 | min: 2, 15 | max: 10 16 | }, 17 | migrations: { 18 | tableName: 'knex_migrations' 19 | } 20 | }, 21 | 22 | production: { 23 | client: 'postgresql', 24 | connection: process.env.DATABASE_URL, 25 | pool: { 26 | min: 2, 27 | max: 10 28 | }, 29 | migrations: { 30 | tableName: 'knex_migrations' 31 | } 32 | } 33 | 34 | }; 35 | -------------------------------------------------------------------------------- /migrations/20141229012503_00.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | exports.up = function(knex, Promise) { 4 | return knex.schema.createTable('ghprojects', function (table) { 5 | table.string('gh_url').primary(); 6 | table.string('gh_name'); 7 | table.string('gh_description'); 8 | table.string('gh_language'); 9 | }).createTable('hnposts', function (table) { 10 | table.integer('id').primary(); 11 | table.string('gh_url').references('gh_url').inTable('ghprojects'); 12 | table.timestamp('retrievedAt'); 13 | table.string('hn_url'); 14 | table.string('hn_time'); 15 | }); 16 | }; 17 | 18 | exports.down = function(knex, Promise) { 19 | return knex.schema.dropTable('hnposts') 20 | .dropTable('ghprojects'); 21 | }; 22 | -------------------------------------------------------------------------------- /migrations/20160123121933_🌟stars🌟.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | exports.up = function(knex, Promise) { 4 | return knex.schema.table('ghprojects', function (table) { 5 | table.integer('gh_stars'); 6 | }); 7 | }; 8 | 9 | exports.down = function(knex, Promise) { 10 | }; 11 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "y-cloninator", 3 | "version": "1.0.0", 4 | "description": "Scrapes HN for github projects", 5 | "main": "app.js", 6 | "scripts": { 7 | "start": "node app.js", 8 | "migrate": "knex migrate:latest", 9 | "worker": "node clock.js", 10 | "lint": "jshint **.js", 11 | "update": "node update-db.js" 12 | }, 13 | "repository": { 14 | "type": "git", 15 | "url": "https://github.com/iankronquist/y-cloninator" 16 | }, 17 | "keywords": [ 18 | "hacker", 19 | "news", 20 | "scraper", 21 | "github" 22 | ], 23 | "author": "Ian Kronquist and Evan Tschuy", 24 | "license": "MIT", 25 | "bugs": { 26 | "url": "https://github.com/iankronquist/y-cloninator/issues" 27 | }, 28 | "homepage": "https://github.com/iankronquist/y-cloninator", 29 | "dependencies": { 30 | "body-parser": "^1.10.0", 31 | "express": "^4.10.6", 32 | "follow-redirects": "0.0.3", 33 | "knex": "^0.7.3", 34 | "node-schedule": "^0.1.15", 35 | "nunjucks": "^1.1.0", 36 | "pg": "^4.1.1" 37 | }, 38 | "devDependencies": { 39 | "mocha": "^1.21.3", 40 | "sqlite3": "^3.0.4", 41 | "jshint": "^2.5.11" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /routes.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var backend = require('./backend'); 4 | 5 | module.exports = function(app) { 6 | var knex = app.get('knex'); 7 | // 30 minutes 8 | var lastUpdated = 0; 9 | const halfHour = 30*60*60; 10 | 11 | function displayUTCDate(timestamp) { 12 | var date = new Date(Number(timestamp) * 1000); 13 | return date.toLocaleDateString(); 14 | } 15 | 16 | function makeBaseQuery() { 17 | return knex.select( 18 | 'ghprojects.gh_url', 19 | 'ghprojects.gh_name', 20 | 'ghprojects.gh_description', 21 | 'ghprojects.gh_stars', 22 | 'ghprojects.gh_language', 23 | knex.raw('count(*) as hn_mentions'), 24 | knex.raw('min(hnposts.hn_time) as hn_first_mention_timestamp'), 25 | knex.raw('max(hnposts.hn_time) as hn_last_mention_timestamp'), 26 | knex.raw('min(hnposts.id) as hn_first_mention_id'), 27 | knex.raw('max(hnposts.id) as hn_last_mention_id') 28 | ) 29 | .from('ghprojects') 30 | .join('hnposts', 'ghprojects.gh_url', 'hnposts.gh_url') 31 | .groupBy('ghprojects.gh_url') 32 | .orderBy('hn_last_mention_id', 'desc'); 33 | } 34 | 35 | var searchLanguage = function(res, language) { 36 | makeBaseQuery() 37 | .whereRaw('LOWER(ghprojects.gh_language) = LOWER(?)', [language]) 38 | .then(function (projects) { 39 | return res.render('index.hjs', { 40 | filter_lang: language, 41 | projects: projects, 42 | hnItemUrl: backend.hnItemUrl, 43 | displayUTCDate: displayUTCDate 44 | }); 45 | }).catch(function (error) { 46 | console.error(error); 47 | }); 48 | }; 49 | 50 | app.get('/', function (req, res) { 51 | makeBaseQuery() 52 | .then(function(projects) { 53 | return res.render('index.hjs', { 54 | projects: projects, 55 | hnItemUrl: backend.hnItemUrl, 56 | displayUTCDate: displayUTCDate 57 | }); 58 | }).catch(function (error) { 59 | console.error(error); 60 | }); 61 | }); 62 | 63 | app.get('/refresh-content', function (req, res) { 64 | // Noop if we have refreshed the content in the past 1/2 hour 65 | // Of course this counter is reset if the app is restarted 66 | let rightNow = Date.now(); 67 | if (rightNow >= (lastUpdated + halfHour)) { 68 | console.log('Refreshing DB', rightNow); 69 | lastUpdated = rightNow; 70 | backend.httpGet( 71 | backend.hn_api_host, 72 | '/v0/topstories.json', 73 | backend.processHNPosts); 74 | backend.clearOldPosts(); 75 | } else { 76 | console.log('Not refreshing. Last new post retreived at ', lastUpdated); 77 | } 78 | res.redirect('/'); 79 | }); 80 | 81 | app.get('/:language', function (req, res) { 82 | return searchLanguage(res, req.params.language); 83 | }); 84 | 85 | app.post('/', function (req, res) { 86 | return searchLanguage(res, req.body.language); 87 | }); 88 | 89 | }; 90 | -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | .page { 2 | max-width: 1000px; 3 | width: 90%; 4 | display: block; 5 | margin: auto; 6 | height: calc(100% - 110px); 7 | } 8 | 9 | .proj-title { 10 | padding-right: 10px; 11 | } 12 | 13 | tr.project > td 14 | { 15 | padding-bottom: 0.5em; 16 | } 17 | 18 | .proj-description-content { 19 | position: relative; 20 | overflow: hidden; 21 | max-height: 4.0em; 22 | } 23 | 24 | .proj-description-shadow { 25 | position:absolute; 26 | top: 3.0em; 27 | width:100%; 28 | height:1.0em; 29 | background: linear-gradient(transparent, white); 30 | } 31 | 32 | 33 | .proj-language { 34 | padding-right: 10px; 35 | padding-left: 10px; 36 | } 37 | 38 | .nowrap { 39 | white-space: nowrap; 40 | } 41 | 42 | .header { 43 | background-color: #57105E; 44 | } 45 | 46 | #filters { 47 | padding: 10px; 48 | padding-top: 0px; 49 | padding-bottom: 0px; 50 | } 51 | 52 | .search { 53 | border: 2px solid #333; 54 | margin: 0px auto 10px; 55 | height: 30px; 56 | font-size: 16pt; 57 | width: calc(100% - 120px); 58 | display: inline-block; 59 | } 60 | 61 | .filter-message p { 62 | position: relative; 63 | top: -100px; 64 | } 65 | 66 | .submit { 67 | background: #5B1662; 68 | color: #FFFFFF; 69 | width: 100px; 70 | border: 2px solid #333; 71 | font-size: 16pt; 72 | height: 36px; 73 | padding: 2px; 74 | display: inline-block; 75 | margin: 20px auto; 76 | } 77 | 78 | .header-inner { 79 | max-width: 1000px; 80 | width: 90%; 81 | display: block; 82 | margin-left: auto; 83 | margin-right: auto; 84 | } 85 | 86 | .right .github-fork-ribbon { 87 | background-color: #39013F; 88 | } 89 | 90 | table { 91 | padding-top: 15px; 92 | width: 100%; 93 | border-spacing: 0px; 94 | padding-bottom: 10px; 95 | } 96 | 97 | .table-header { 98 | font-weight: 600; 99 | } 100 | 101 | body { 102 | font-family: 'Open Sans', sans-serif; 103 | } 104 | 105 | .title { 106 | width: 49%; 107 | display: inline-block; 108 | text-align: left; 109 | } 110 | 111 | .catchphrase { 112 | width: 49%; 113 | display: inline-block; 114 | text-align: right; 115 | color: #FFFFFF; 116 | font-size: 1.4em; 117 | } 118 | 119 | .message { 120 | display: block; 121 | position: relative; 122 | top: -10px; 123 | } 124 | 125 | td { 126 | padding-bottom: 5px; 127 | } 128 | 129 | h1 { 130 | font-family: 'Pacifico', cursive; 131 | font-size: 3em; 132 | margin: 0px; 133 | color: #FFFFFF; 134 | } 135 | 136 | html, body { 137 | height: 100%; 138 | margin: 0px; 139 | padding: 0px; 140 | } 141 | 142 | .h1link:link, .h1link:visited, .h1link:active { 143 | color: #FFFFFF; 144 | text-decoration: none; 145 | } 146 | 147 | .h1link:hover { 148 | color: #E4C9E6; 149 | text-decoration: none; 150 | } 151 | 152 | a:link, a:visited, a:active { 153 | color: #1E0429; 154 | } 155 | 156 | a:hover { 157 | color: #57105E; 158 | } 159 | -------------------------------------------------------------------------------- /update-db.js: -------------------------------------------------------------------------------- 1 | var clock = require('./clock'); 2 | clock.forceUpdate(); 3 | -------------------------------------------------------------------------------- /views/base.hjs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Y-Cloninator 5 | 6 | 7 | 8 |
9 |
10 | Fork me on GitHub 11 |
12 |
13 |
14 |
15 |
16 |

17 | Y-Cloninator 18 |

19 |
20 |
21 |

22 | Just the code, ma'am. 23 |

24 |
25 |
26 |
27 |
28 |
29 |

30 | Some of the best content on Hacker News are the 31 | links to up-and coming projects on GitHub. 32 | Get a list of these awesome projects without the distraction of the rest 33 | of Hacker News. 34 |

35 |
36 |
37 | {% block content %} 38 | {% endblock %} 39 |
40 |
41 | 42 | {% block script %}{% endblock %} 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /views/index.hjs: -------------------------------------------------------------------------------- 1 | {% extends "base.hjs" %} 2 | 3 | {% block script %} 4 | 10 | {% endblock %} 11 | 12 | {% block content %} 13 |
14 |
15 | 16 | 17 |
18 |
19 | {% if filter_lang %} 20 |
21 |

22 | Only showing results for the language {{ filter_lang }}. 23 |

24 |
25 | {% endif %} 26 | 27 | 28 | 31 | 34 | 37 | 40 | 43 | 45 | 46 | {% for project in projects %} 47 | 48 | 51 | 55 | 58 | 61 | {% if project.hn_first_mention_timestamp == project.hn_last_mention_timestamp %} 62 | 67 | 69 | {% else %} 70 | 75 | 80 | {% endif %} 81 | 82 | {% endfor %} 83 |
29 | Project Name 30 | 32 | Description 33 | 35 | Language 36 | 38 | Stars 39 | 41 | Hacker News 42 | 44 |
49 | {{ project.gh_name }} 50 | 52 |
53 |
{{ project.gh_description }}
54 |
56 | {{ project.gh_language }} 57 | 59 | 🌟 {{project.gh_stars}} 60 | 63 | 64 | Read on HN 65 | 66 | 68 | 71 | 72 | Oldest 73 | 74 | 76 | 77 | Newest 78 | 79 |
84 | {% endblock %} 85 | --------------------------------------------------------------------------------