├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── entries
    ├── __init__.py
    └── get_entries.py
├── main.py
├── requirements.txt
└── templates
    └── index.html


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.env
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # Environments
 84 | .env
 85 | .venv
 86 | env/
 87 | venv/
 88 | ENV/
 89 | env.bak/
 90 | venv.bak/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-alpine
 2 | 
 3 | LABEL name "entries-by-votes"
 4 | 
 5 | EXPOSE 8888
 6 | 
 7 | RUN mkdir -p /usr/src/app
 8 | WORKDIR /usr/src/app
 9 | 
10 | COPY requirements.txt /usr/src/app/
11 | RUN pip install -r requirements.txt
12 | 
13 | COPY . /usr/src/app
14 | 
15 | CMD python main.py
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 JuanPablo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Engineering-blogs entries ordered by Hacker News and Reddit votes
 2 | 
 3 | 1. Get feeds of [Engineering-blogs](https://github.com/kilimchoi/engineering-blogs).
 4 | 2. Get entries of feeds.
 5 | 3. Get votes of entries from Hackers News and Reddit.
 6 | 4. Sort entries with this ranking method
 7 | 
 8 |     (votes - 1) / (t + 2)^1.8
 9 | 
10 | [Reference - How Hacker News ranking algorithm works](https://medium.com/hacking-and-gonzo/how-hacker-news-ranking-algorithm-works-1d9b0cf2c08d)
11 | 
12 | ## Deploy
13 | 
14 | ### Environment variables
15 | 
16 |     PRAW_CLIENT_ID
17 |     PRAW_CLIENT_SECRET
18 |     PRAW_USER_AGENT
19 |     MONGO_ENTRIES
20 |     TORNADO_PORT
21 | 
22 | ### Deploy to zeit.co/now
23 | 
24 | Add the environment variables to a entries.env file.
25 | 
26 | Deploy with now
27 | 
28 |     now --dotenv entries.env --public
29 | 


--------------------------------------------------------------------------------
/entries/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import listparser
  3 | import feedparser
  4 | from urllib.parse import urlencode
  5 | import requests
  6 | import praw
  7 | import os
  8 | from math import pow
  9 | from time import mktime
 10 | from datetime import datetime
 11 | import binascii
 12 | 
 13 | 
 14 | def datetime_from_struct_time(struct_time):
 15 |     try:
 16 |         return datetime.fromtimestamp(mktime(struct_time))
 17 |     except (OverflowError, ValueError) as error:
 18 |         print('Error with struct_time:', error, struct_time)
 19 |         return datetime(1970, 1, 1)
 20 | 
 21 | 
 22 | def string_from_struct_time(struct_time):
 23 |     return datetime_from_struct_time(struct_time).strftime("%Y-%m-%d %H:%M:%S")
 24 | 
 25 | 
 26 | class Opml(object):
 27 | 
 28 |     def __init__(self):
 29 |         self.opml_url = (
 30 |             'https://raw.githubusercontent.com/kilimchoi/'
 31 |             'engineering-blogs/master/engineering_blogs.opml'
 32 |         )
 33 |         self.feeds = []
 34 | 
 35 |     def request_ompl(self):
 36 |         self.opml = listparser.parse(self.opml_url)
 37 | 
 38 |     def generate_feeds(self):
 39 |         feeds = []
 40 | 
 41 |         for feed in self.opml.feeds:
 42 |             feeds.append({'url': feed.url, 'title': feed.title})
 43 | 
 44 |         self.feeds = feeds
 45 | 
 46 |     def get_feeds(self):
 47 | 
 48 |         if self.feeds == []:
 49 |             self.request_ompl()
 50 |             self.generate_feeds()
 51 | 
 52 |         return self.feeds
 53 | 
 54 | 
 55 | class Entry(object):
 56 | 
 57 |     def __init__(self, raw_entry):
 58 |         self.raw_entry = raw_entry
 59 |         self.published = self.search_published_date(raw_entry)
 60 |         self.links = self.get_html_links(raw_entry.get('links', []))
 61 |         self.title = raw_entry.get('title', '')
 62 |         self.votes = []
 63 | 
 64 |     def age(self):
 65 |         return (datetime.now() - self.published)
 66 | 
 67 |     def hours_age(self):
 68 |         return self.age().total_seconds() / 3600.
 69 | 
 70 |     def days_age(self):
 71 |         return self.hours_age() / 24.
 72 | 
 73 |     def get_html_links(self, links):
 74 |         return [link.get('href', '') for link in links
 75 |                 if link.get('type', None) == 'text/html']
 76 | 
 77 |     def search_published_date(self, entry):
 78 | 
 79 |         if entry.get('published_parsed', False):
 80 |             return datetime_from_struct_time(entry["published_parsed"])
 81 |         if entry.get('updated_parsed', False):
 82 |             return datetime_from_struct_time(entry["updated_parsed"])
 83 | 
 84 |         return datetime(1970, 1, 1)
 85 | 
 86 |     def set_votes(self, votes):
 87 |         self.votes = sorted(votes, key=lambda k: k['votes'], reverse=True)
 88 | 
 89 |     def get_total_votes(self):
 90 |         total = 0.0
 91 |         for vote in self.votes:
 92 |             total += vote['votes'] - 1
 93 | 
 94 |         return total
 95 | 
 96 |     def get_rank(self):
 97 |         try:
 98 |             return self.get_total_votes() / pow(self.hours_age() + 2., 1.8)
 99 |         except ValueError:
100 |             return 0.0
101 | 
102 |     def __str__(self):
103 |         return '<Entry: {}>'.format(self.title)
104 | 
105 | 
106 | class Feed(object):
107 | 
108 |     def __init__(self, url, title):
109 |         self.url = url
110 |         self.title = title
111 |         self.entries = []
112 |         self.content = None
113 | 
114 |     def request_entries(self):
115 |         try:
116 |             self.content = feedparser.parse(self.url)
117 |         except (UnicodeEncodeError, binascii.Error) as error:
118 |             print('Error:', error, self.url)
119 | 
120 |     def parse_entries(self):
121 | 
122 |         if self.content is None:
123 |             return
124 | 
125 |         raw_entries = self.content.entries
126 |         entries = []
127 | 
128 |         if len(raw_entries) > 0:
129 | 
130 |             for entry in raw_entries:
131 |                 entries.append(Entry(entry))
132 | 
133 |         self.entries = entries
134 | 
135 |     def get_entries(self):
136 | 
137 |         if self.entries == []:
138 |             self.request_entries()
139 |             self.parse_entries()
140 | 
141 |         return self.entries
142 | 
143 |     def __str__(self):
144 |         return '<Feed: {} {}>'.format(self.title, self.url)
145 | 
146 | 
147 | class HackerNews(object):
148 | 
149 |     def __init__(self):
150 |         self.api_url = 'https://hn.algolia.com/api/v1/search?'
151 | 
152 |     def search_url(self, url):
153 |         query_url = urlencode({'query': url})
154 |         request = self.api_url + query_url
155 | 
156 |         response = requests.get(request).json()
157 | 
158 |         return [hit for hit in response.get('hits', []) if hit['url'] == url]
159 | 
160 |     def votes_and_comments(self, url):
161 |         hits = self.search_url(url)
162 | 
163 |         return [{
164 |             'source': 'hacker_news',
165 |             'votes': hit.get('points', 0),
166 |             'comments': hit.get('num_comments', 0),
167 |             'id': hit.get('objectID', 0)
168 |         } for hit in hits]
169 | 
170 | 
171 | class Reddit(object):
172 | 
173 |     def __init__(self):
174 |         self.client = praw.Reddit(
175 |             client_id=os.environ['PRAW_CLIENT_ID'],
176 |             client_secret=os.environ['PRAW_CLIENT_SECRET'],
177 |             user_agent=os.environ['PRAW_USER_AGENT']
178 |         )
179 | 
180 |     def votes_and_comments(self, url):
181 |         votes = []
182 | 
183 |         try:
184 |             for sub in self.client.info(url=url):
185 |                 votes.append({
186 |                     'source': 'reddit', 'id': sub.id,
187 |                     'subreddit': sub.subreddit.display_name,
188 |                     'votes': sub.ups, 'comments': sub.num_comments
189 |                 })
190 |         except TypeError as err:
191 |             print(url, err)
192 | 
193 |         return votes
194 | 


--------------------------------------------------------------------------------
/entries/get_entries.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | import os
  4 | from . import Opml, Feed
  5 | from . import HackerNews, Reddit
  6 | from tornado import gen, queues
  7 | from tornado.ioloop import IOLoop
  8 | from concurrent.futures import ThreadPoolExecutor
  9 | from motor.motor_tornado import MotorClient
 10 | 
 11 | thread_pool = ThreadPoolExecutor(2)
 12 | 
 13 | feeds = queues.Queue()
 14 | entries = queues.Queue()
 15 | client = MotorClient(os.environ['MONGO_ENTRIES'])
 16 | db = client['entries-by-votes']
 17 | 
 18 | ENTRIES_AGE = float(os.environ.get('ENTRIES_AGE', 14))
 19 | 
 20 | 
 21 | @gen.coroutine
 22 | def do_insert_entry(entry):
 23 |     yield db.entries.update_one(
 24 |         {'link': entry['link']}, {'$set': entry}, upsert=True
 25 |     )
 26 | 
 27 | 
 28 | @gen.coroutine
 29 | def get_feeds():
 30 |     return (yield thread_pool.submit(Opml().get_feeds))
 31 | 
 32 | 
 33 | @gen.coroutine
 34 | def get_entries(url, title):
 35 |     return (yield thread_pool.submit(Feed(url, title).get_entries))
 36 | 
 37 | 
 38 | @gen.coroutine
 39 | def votes_from_hacker_news(url):
 40 |     return (yield thread_pool.submit(HackerNews().votes_and_comments, url))
 41 | 
 42 | 
 43 | @gen.coroutine
 44 | def votes_from_reddit(url):
 45 |     return (yield thread_pool.submit(Reddit().votes_and_comments, url))
 46 | 
 47 | 
 48 | @gen.coroutine
 49 | def votes_from_entry(link):
 50 |     votes = []
 51 | 
 52 |     for vote in (yield votes_from_hacker_news(link)):
 53 |         votes.append(vote)
 54 |     for vote in (yield votes_from_reddit(link)):
 55 |         votes.append(vote)
 56 | 
 57 |     return votes
 58 | 
 59 | 
 60 | @gen.coroutine
 61 | def entries_consumer():
 62 |     while True:
 63 |         current_entry = yield entries.get()
 64 |         print(
 65 |             'Fetching entry', entries.qsize(),
 66 |             current_entry.published, current_entry
 67 |         )
 68 |         try:
 69 |             for link in current_entry.links:
 70 |                 votes = yield votes_from_entry(link)
 71 | 
 72 |                 if votes != []:
 73 |                     current_entry.set_votes(votes)
 74 |                     print(
 75 |                         current_entry.published, current_entry.title,
 76 |                         link, current_entry.get_rank()
 77 |                     )
 78 |                     yield do_insert_entry({
 79 |                         'title': current_entry.title,
 80 |                         'link': link,
 81 |                         'published': current_entry.published,
 82 |                         'votes': current_entry.votes,
 83 |                         'total_votes': current_entry.get_total_votes(),
 84 |                         'rank': current_entry.get_rank()
 85 |                     })
 86 |             yield gen.sleep(1)
 87 |         finally:
 88 |             entries.task_done()
 89 | 
 90 | 
 91 | @gen.coroutine
 92 | def get_new_entries_from_feed():
 93 |     current_feed = yield feeds.get()
 94 |     print('Fetching feed', feeds.qsize(), current_feed['url'])
 95 |     try:
 96 |         url, title = current_feed['url'], current_feed['title']
 97 |         for entry in (yield get_entries(url, title)):
 98 |             if entry.days_age() < ENTRIES_AGE:
 99 |                 yield entries.put(entry)
100 |     finally:
101 |         feeds.task_done()
102 | 
103 | 
104 | @gen.coroutine
105 | def feeds_consumer():
106 |     while True:
107 |         yield get_new_entries_from_feed()
108 | 
109 | 
110 | @gen.coroutine
111 | def feeds_producer():
112 |     for feed in (yield get_feeds()):
113 |         yield feeds.put(feed)
114 | 
115 | 
116 | @gen.coroutine
117 | def entries_update():
118 | 
119 |     while True:
120 |         print('Starting entries update')
121 |         yield feeds_producer()
122 |         yield feeds.join()
123 |         yield entries.join()
124 |         print('Entries updated done')
125 |         yield gen.sleep(3600)
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     IOLoop.current().spawn_callback(feeds_consumer)
130 |     IOLoop.current().spawn_callback(entries_consumer)
131 |     IOLoop.current().run_sync(entries_update)
132 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | import os
 4 | from tornado import ioloop, gen
 5 | from tornado import web
 6 | from motor.motor_tornado import MotorClient
 7 | from entries.get_entries import entries_update
 8 | from entries.get_entries import entries_consumer, feeds_consumer
 9 | from datetime import datetime
10 | 
11 | client = MotorClient(os.environ['MONGO_ENTRIES'])
12 | db = client['entries-by-votes']
13 | 
14 | 
15 | class MainHandler(web.RequestHandler):
16 | 
17 |     @gen.coroutine
18 |     def get(self):
19 |         db = self.settings['db']
20 | 
21 |         cursor = db.entries.aggregate([
22 |             {'$project': {
23 |                 '_id':0, 'link':1, 'title':1, 'published':1, 'total_votes':1, 'votes':1, 'rank': {
24 |                 '$divide': ["$total_votes", {
25 |                     '$pow': [{
26 |                         '$add': [{'$divide': [{'$subtract' : [datetime.now(), "$published"]}, 1000*60*60]}, 2]
27 |                     }, 1.8]
28 |                 }]
29 |             }}},
30 |             {'$sort': {'rank': -1}},
31 |             {'$limit': 100},
32 |         ]);
33 | 
34 |         entries = yield cursor.to_list(length=100)
35 | 
36 |         self.render('templates/index.html', entries=entries)
37 | 
38 | 
39 | def make_app():
40 |     return web.Application([
41 |         (r'/', MainHandler),
42 |     ], db=db)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     port = os.environ.get('TORNADO_PORT', 8888)
47 |     print('Starting app in port {}'.format(port))
48 |     app = make_app()
49 |     app.listen(port)
50 |     io_loop = ioloop.IOLoop.current()
51 |     io_loop.spawn_callback(feeds_consumer)
52 |     io_loop.spawn_callback(entries_consumer)
53 |     io_loop.spawn_callback(entries_update)
54 |     io_loop.start()
55 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | feedparser==5.2.1
2 | flake8==3.4.1
3 | listparser==0.18
4 | motor==1.1
5 | praw==5.1.0
6 | requests==2.18.4
7 | tornado==4.5.2
8 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8" />
 6 |     <title>Entries by votes</title>
 7 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
 8 | 
 9 |     <link href="https://use.fontawesome.com/releases/v5.0.6/css/all.css" rel="stylesheet">
10 | 
11 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js" charset="utf-8"></script>
12 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
13 | 
14 |     <script>
15 |         (function(i, s, o, g, r, a, m) {
16 |             i['GoogleAnalyticsObject'] = r;
17 |             i[r] = i[r] || function() {
18 |                 (i[r].q = i[r].q || []).push(arguments)
19 |             }, i[r].l = 1 * new Date();
20 |             a = s.createElement(o),
21 |                 m = s.getElementsByTagName(o)[0];
22 |             a.async = 1;
23 |             a.src = g;
24 |             m.parentNode.insertBefore(a, m)
25 |         })(window, document, 'script', 'https://www.google-analytics.com/analytics.js', 'ga');
26 | 
27 |         ga('create', 'UA-106882528-1', 'auto');
28 |         ga('send', 'pageview');
29 |     </script>
30 |     <meta name="viewport" content="width=device-width, initial-scale=1">
31 |     <style type="text/css" media="screen">
32 | .row.no-gutters > [class^="col-"],
33 | .row.no-gutters > [class*=" col-"] {
34 |   padding-right: 4px;
35 |   padding-left: 4px;
36 | }
37 |     </style>
38 | </head>
39 | 
40 | <body>
41 |     <div class="container">
42 |         <h3><a href="https://github.com/kilimchoi/engineering-blogs">Engineering-blogs</a> entries ordered by Hacker News and Reddit votes</h3>
43 |         <hr>
44 |         <ul class="list-unstyled">
45 |             {% for entry_num, entry in enumerate(entries) %}
46 |             <li>
47 |                 <div class="row no-gutters">
48 |                     <div class="col-sm-2 hidden-xs text-right">
49 |                         {{ entry['published'].date() }}
50 |                     </div>
51 |                     <div class="col-xs-3 col-sm-1 col-md-1">
52 |                         <a class="btn btn-default btn-xs btn-block" role="button" data-toggle="collapse" href="#collapseList-{{entry_num}}" aria-expanded="false" aria-controls="collapseList-{{entry_num}}">
53 |                         <i class="far fa-arrow-alt-circle-up"></i>
54 |                         {{ int(entry['total_votes']) }}
55 |                         </a>
56 |                     </div>
57 |                     <div class="col-xs-9 col-sm-6">
58 |                         <a href="{{entry['link']}}">{{ entry['title'] }}</a>
59 |                         <div class="collapse" id="collapseList-{{entry_num}}">
60 |                             <ul class="list-inline small">
61 |                                 {% for vote in entry['votes'] %}
62 |                                 <li class="">
63 |                                     {% if vote['source'] == 'hacker_news' %}
64 |                                     <a href="https://news.ycombinator.com/item?id={{vote['id']}}"><i class="fab fa-hacker-news-square" aria-hidden="true"></i> {{vote['votes']}} <i class="far fa-comment" aria-hidden="true"></i> {{vote['comments']}}</a>
65 |                                     {% else %}
66 |                                     <a href="http://redd.it/{{vote.get('id', '')}}">
67 |                                         <i class="fab fa-reddit-square" aria-hidden="true"></i> r/{{vote['subreddit']}} {{vote['votes']}} <i class="far fa-comment" aria-hidden="true"></i> {{vote['comments']}}
68 |                                     </a>
69 |                                     {% end %}
70 |                                 </li>
71 |                                 {% end %}
72 |                             </ul>
73 |                         </div>
74 |                     </div>
75 |                 </div>
76 |             </li>
77 |             {% end %}
78 |         </ul>
79 |         <hr>
80 |         <footer>
81 |             <a href="https://github.com/juanpabloaj/entries-by-votes">Source code</a> | Powered by <a href="https://zeit.co/now">now</a>
82 |         </footer>
83 |     </div>
84 | </body>
85 | 
86 | </html>
87 | 


--------------------------------------------------------------------------------