├── .hgignore
├── requirements.txt
├── README.markdown
└── hackernews.py
/.hgignore:
--------------------------------------------------------------------------------
1 | .venv
2 | hackernews.cookie
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pyquery
3 | pystache
4 |
--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
1 | Hacker News
2 | ===========
3 |
4 | A Python-based CLI for working with [Hacker News](https://news.ycombinator.com).
5 |
6 | Requirements
7 | ------------
8 |
9 | * [Requests](http://docs.python-requests.org/en/latest/index.html)
10 | * [pystache](https://github.com/defunkt/pystache)
11 | * [pyquery](http://packages.python.org/pyquery/)
12 |
13 | Requirements can be installed directly by using:
14 |
15 | pip install -r requirements.txt
16 |
17 | Using hackernews
18 | ----------------
19 |
20 | ### Help
21 |
22 | hackernews.py -h
23 |
24 | ### Cookies
25 |
26 | By default, `hackernews` uses a built-in cookie system. It saves the file `hackernews.cookie` to the base directory. To disable the use of cookies:
27 |
28 | hackernews.py --no-cookies
29 |
30 | When cookies are turned off, two additional requests are needed per-command to retrieve proper login info.
31 |
32 | ### Saved items
33 |
34 | Retrieve a user's latest saved items, and print the output as JSON (default):
35 |
36 | hackernews.py saved -u 'username' -p 'password'
37 |
38 | Retrieve all saved items, and print the output as XML (this might take a while):
39 |
40 | hackernews.py saved --all -e xml -u 'username' -p 'password'
41 |
42 | Help for the `saved` subcommand:
43 |
44 | hackernews.py saved -h
45 |
46 | ### Comments
47 |
48 | Retrieve a user's comment threads, and print the output as JSON (default):
49 |
50 | hackernews.py comments -u 'username' -p 'password'
51 |
52 | Retrieve all comments, and print the output as XML (this might take a while):
53 |
54 | hackernews.py comments --all -e xml -u 'username' -p 'password'
55 |
56 | Don't show owner's comments (the logged-in user); this is useful for a feed of replies to your comments:
57 |
58 | hackernews.py comments --no-owner -u 'username' -p 'password'
59 |
60 | Help for the `comments` subcommand:
61 |
62 | hackernews.py comments -h
63 |
--------------------------------------------------------------------------------
/hackernews.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse, os, pickle, re, requests
4 | from pyquery import PyQuery as pq
5 | import pystache
6 |
7 | BASE_PATH = os.path.dirname(__file__)
8 | COOKIE = os.path.join(BASE_PATH, 'hackernews.cookie')
9 | EXPORT_TYPES = ( 'json', 'xml', )
10 |
11 | tries = 0
12 |
13 |
14 | def _login(**kwargs):
15 | """Log in to Hacker News and return the cookies."""
16 |
17 | if 'r' not in kwargs:
18 | # We haven't established a login request.
19 |
20 | # If we're using cookies, try to return those instead.
21 | if not kwargs['args'].no_cookies:
22 |
23 | # If the cookie doesn't exist, create it.
24 | try:
25 | cookie = open(COOKIE, 'r')
26 | except IOError:
27 | cookie = open(COOKIE, 'w')
28 |
29 | # If there's something in the cookie, return that.
30 | if os.stat(COOKIE).st_size:
31 | cookies = pickle.load(cookie)
32 | cookie.close()
33 | return cookies
34 | else:
35 | cookie.close()
36 |
37 | # Request a blank login page to harvest the fnid (a CSRF-type key).
38 | r = requests.get('https://news.ycombinator.com/newslogin')
39 | J = pq(r.content)
40 | fnid = J('input[name="fnid"]').val()
41 |
42 | # Build the login POST data and make the login request.
43 | payload = {
44 | 'fnid': fnid,
45 | 'u': kwargs['args'].username,
46 | 'p': kwargs['args'].password,
47 | }
48 | r = requests.post('https://news.ycombinator.com/y', data=payload)
49 |
50 | cookies = r.cookies
51 |
52 | else:
53 | # Set the cookies to the cached login request's cookies.
54 | cookies = kwargs['r'].cookies
55 |
56 | # Set the cookie
57 | if not kwargs['args'].no_cookies:
58 | cookie = open(COOKIE, 'w+')
59 | pickle.dump(cookies, cookie)
60 | cookie.close()
61 |
62 | return cookies
63 |
64 | def _reset_cookie(tries):
65 |
66 | # Reset the cookie and mark this as a try.
67 | # If we try 5 times, kill the script.
68 | if tries < 5:
69 | cookie = open(COOKIE, 'r+')
70 | cookie.truncate(0)
71 | cookie.close()
72 | tries = tries + 1
73 | else:
74 | raise BaseException('Too many tries with bad responses (Hacker News may be down).')
75 |
76 | def _good_response(**kwargs):
77 |
78 | # Handle an invalid cookie / login.
79 | if kwargs['r'].content == "Can't display that.":
80 | _reset_cookie(tries)
81 | return False
82 |
83 | return True
84 |
85 | def _sanitize_comment(J, c):
86 | user = J('span.comhead a:eq(0)', c).text()
87 | link = 'https://news.ycombinator.com/%s' % J('span.comhead a:eq(1)', c).attr('href')
88 | points = J('span.comhead span', c).text()
89 |
90 | # 'Parent' and 'Story' don't exist for non-owned comments.
91 | parent = J('span.comhead a:eq(2)', c).attr('href')
92 | if parent is None:
93 | parent = 'N/A'
94 | story = 'N/A'
95 | else:
96 | parent = 'https://news.ycombinator.com/%s' % parent
97 | story = 'https://news.ycombinator.com/%s' % J('span.comhead a:eq(3)', c).attr('href')
98 |
99 | # Reply link doesn't always exist, for some reason.
100 | reply = J('u a', c).attr('href')
101 | if reply is None:
102 | reply = 'N/A'
103 | else:
104 | reply = 'https://news.ycombinator.com/%s' % J('u a', c).attr('href')
105 |
106 | # Sanitize the comment.
107 | comment = J('span.comment', c).html()
108 | comment = re.sub('
', '\n\n', comment)
109 | comment = re.sub('<[^<]+?>', '', comment).rstrip('\n\n')
110 |
111 | # Grab the points, if possible.
112 | if points != None:
113 | points = re.sub('points?', '', points).strip()
114 | else:
115 | points = 'N/A'
116 |
117 | # Strip the comhead and harvest the date.
118 | J('span.comhead a, span.comhead span', c).remove()
119 | date = J('span.comhead', c).text()
120 | date = re.sub('on:|by|\||', '', date).strip()
121 |
122 | return {
123 | 'user': user,
124 | 'comment': comment,
125 | 'reply': reply,
126 | 'points': points,
127 | 'link': link,
128 | 'parent': parent,
129 | 'story': story,
130 | 'date': date,
131 | }
132 |
133 |
134 | def _get_saved_stories(**kwargs):
135 | """Returns a sorted list of the user's saved stories."""
136 |
137 | # Log in to get cookies.
138 | cookies = _login(**kwargs)
139 |
140 | if 'r' not in kwargs:
141 | # This is the first saved items request.
142 | # Make the saved items request and set an empty list.
143 | kwargs['r'] = requests.get('https://news.ycombinator.com/saved?id=%s' % kwargs['args'].username,
144 | cookies=cookies)
145 |
146 | # Check to make sure we have a good response.
147 | if not _good_response(**kwargs):
148 | kwargs.pop('r')
149 | return _get_saved_stories(**kwargs)
150 |
151 | kwargs['saved'] = []
152 |
153 | # Grab the stories.
154 | J = pq(kwargs['r'].content)
155 | stories = J('table table td.title')
156 |
157 | for story in stories:
158 | title = J(story).text()
159 | url = J('a', story).attr('href')
160 |
161 | # Skip digit-only s and the 'More' link.
162 | if not re.match('\d+\.$|More', title):
163 |
164 | # Skip HN dead links
165 | if url is not None:
166 |
167 | # For HN links, make absolute URL.
168 | if not url.startswith('http'):
169 | url = 'https://news.ycombinator.com/' + url
170 |
171 | # Add the story to the saved list.
172 | kwargs['saved'].append({
173 | 'title': title,
174 | 'url': url,
175 | })
176 |
177 | # If we're getting all saved stories.
178 | if kwargs['args'].all:
179 |
180 | # Find the 'More' link and load it.
181 | last = J('a', J('table table tr td.title:last'))
182 | if last.text() == 'More':
183 | kwargs['r'] = requests.get('https://news.ycombinator.com%s' % last.attr('href'),
184 | cookies=cookies)
185 |
186 | # Check to make sure we have a good response.
187 | if not _good_response(**kwargs):
188 | kwargs.pop('r')
189 | return _get_saved_stories(**kwargs)
190 |
191 | # Call this function again, this time with the new list.
192 | return _get_saved_stories(**kwargs)
193 |
194 | return kwargs['saved']
195 |
196 | def _get_comments(**kwargs):
197 | """Returns a sorted list of the user's comments."""
198 |
199 | # Log in to get cookies.
200 | cookies = _login(**kwargs)
201 |
202 | if 'r' not in kwargs:
203 | # This is the first comments request.
204 | # Make the comments request and set an empty list.
205 | kwargs['r'] = requests.get('https://news.ycombinator.com/threads?id=%s' % kwargs['args'].username,
206 | cookies=cookies)
207 |
208 | # Check to make sure we have a good response.
209 | if not _good_response(**kwargs):
210 | kwargs.pop('r')
211 | return _get_comments(**kwargs)
212 |
213 | kwargs['comments'] = []
214 |
215 | # Grab the comments.
216 | J = pq(kwargs['r'].content)
217 | comments = J('table table td.default')
218 |
219 | for c in comments:
220 |
221 | comment = _sanitize_comment(J, c)
222 |
223 | if kwargs['args'].no_owner and comment['user'] == kwargs['args'].username:
224 | continue
225 |
226 | # Add the comment to the saved list.
227 | kwargs['comments'].append({
228 | 'user': comment['user'],
229 | 'comment': comment['comment'],
230 | 'reply': comment['reply'],
231 | 'points': comment['points'],
232 | 'link': comment['link'],
233 | 'parent': comment['parent'],
234 | 'story': comment['story'],
235 | 'date': comment['date'],
236 | })
237 |
238 | # If we're getting all comments.
239 | if kwargs['args'].all:
240 |
241 | # Find the 'More' link and load it.
242 | last = J('a', J('table table tr td.title:last'))
243 | if last.text() == 'More':
244 | kwargs['r'] = requests.get('https://news.ycombinator.com%s' % last.attr('href'),
245 | cookies=cookies)
246 |
247 | # Check to make sure we have a good response.
248 | if not _good_response(**kwargs):
249 | kwargs.pop('r')
250 | return _get_comments(**kwargs)
251 |
252 | # Call this function again, this time with the new list.
253 | return _get_comments(**kwargs)
254 |
255 | return kwargs['comments']
256 |
257 |
258 | def saved(args):
259 | """Returns a formatted list of the logged-in user's saved stories."""
260 |
261 | stories = _get_saved_stories(args=args)
262 |
263 | if args.export == 'json':
264 | return stories
265 | elif args.export == 'xml':
266 | return pystache.render("""
267 |
268 | Saved stories on Hacker News
269 | {{#stories}}
270 |
271 | {{title}}
272 |
273 |
274 | {{/stories}}
275 | """, {'stories': stories})
276 |
277 | def comments(args):
278 | """Returns a formatted list of the logged-in user's comments."""
279 |
280 | comments = _get_comments(args=args)
281 |
282 | if args.export == 'json':
283 | return comments
284 | elif args.export == 'xml':
285 | return pystache.render("""
286 |
287 | Comments on Hacker News
288 | {{#comments}}
289 |
290 | {{comment}}
291 |
292 | {{user}}
293 |
294 | {{reply}}
295 | {{points}}
296 |
297 | {{parent}}
298 | {{story}}
299 | {{date}}
300 |
301 | {{/comments}}
302 | """, {'comments': comments})
303 |
304 |
305 | if __name__ == '__main__':
306 |
307 | # Parser
308 | parser = argparse.ArgumentParser(prog='Hacker News')
309 | parser.add_argument('--version', action='version', version='%(prog)s 0.1')
310 | subparsers = parser.add_subparsers()
311 |
312 | # TODO: --username, --password, --all, --no-cookies should probably be stored
313 | # at the parser level, not subparser.
314 |
315 | # Saved stories
316 | saved_parser = subparsers.add_parser('saved')
317 | saved_parser.add_argument('-u', '--username', dest='username', help='HN Username',
318 | required=True)
319 | saved_parser.add_argument('-p', '--password', dest='password', help='HN Password',
320 | required=True)
321 | saved_parser.add_argument('-e', '--export', dest='export', help='Export type',
322 | required=False, default='json', choices=EXPORT_TYPES)
323 | saved_parser.add_argument('--all', dest='all', help='Get all saved stories',
324 | action='store_true')
325 | saved_parser.add_argument('--no-cookies', dest='no_cookies', help="Don't use cookies",
326 | action='store_true', default=False)
327 | saved_parser.set_defaults(func=saved)
328 |
329 | # Comments
330 | comments_parser = subparsers.add_parser('comments')
331 | comments_parser.add_argument('-u', '--username', dest='username', help='HN Username',
332 | required=True)
333 | comments_parser.add_argument('-p', '--password', dest='password', help='HN Password',
334 | required=True)
335 | comments_parser.add_argument('-e', '--export', dest='export', help='Export type',
336 | required=False, default='json', choices=EXPORT_TYPES)
337 | comments_parser.add_argument('--all', dest='all', help='Get all comments',
338 | action='store_true')
339 | comments_parser.add_argument('--no-cookies', dest='no_cookies', help="Don't use cookies",
340 | action='store_true', default=False)
341 | comments_parser.add_argument('--no-owner', dest='no_owner', help="Don't show owner's comments",
342 | action='store_true', default=False)
343 | comments_parser.set_defaults(func=comments)
344 |
345 | # Args
346 | args = parser.parse_args()
347 | print args.func(args)
348 |
--------------------------------------------------------------------------------
|