├── .hgignore ├── requirements.txt ├── README.markdown └── hackernews.py /.hgignore: -------------------------------------------------------------------------------- 1 | .venv 2 | hackernews.cookie 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | pyquery 3 | pystache 4 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | Hacker News 2 | =========== 3 | 4 | A Python-based CLI for working with [Hacker News](https://news.ycombinator.com). 5 | 6 | Requirements 7 | ------------ 8 | 9 | * [Requests](http://docs.python-requests.org/en/latest/index.html) 10 | * [pystache](https://github.com/defunkt/pystache) 11 | * [pyquery](http://packages.python.org/pyquery/) 12 | 13 | Requirements can be installed directly by using: 14 | 15 | pip install -r requirements.txt 16 | 17 | Using hackernews 18 | ---------------- 19 | 20 | ### Help 21 | 22 | hackernews.py -h 23 | 24 | ### Cookies 25 | 26 | By default, `hackernews` uses a built-in cookie system. It saves the file `hackernews.cookie` to the base directory. To disable the use of cookies: 27 | 28 | hackernews.py --no-cookies 29 | 30 | When cookies are turned off, two additional requests are needed per-command to retrieve proper login info. 31 | 32 | ### Saved items 33 | 34 | Retrieve a user's latest saved items, and print the output as JSON (default): 35 | 36 | hackernews.py saved -u 'username' -p 'password' 37 | 38 | Retrieve all saved items, and print the output as XML (this might take a while): 39 | 40 | hackernews.py saved --all -e xml -u 'username' -p 'password' 41 | 42 | Help for the `saved` subcommand: 43 | 44 | hackernews.py saved -h 45 | 46 | ### Comments 47 | 48 | Retrieve a user's comment threads, and print the output as JSON (default): 49 | 50 | hackernews.py comments -u 'username' -p 'password' 51 | 52 | Retrieve all comments, and print the output as XML (this might take a while): 53 | 54 | hackernews.py comments --all -e xml -u 'username' -p 'password' 55 | 56 | Don't show owner's comments (the logged-in user); this is useful for a feed of replies to your comments: 57 | 58 | hackernews.py comments --no-owner -u 'username' -p 'password' 59 | 60 | Help for the `comments` subcommand: 61 | 62 | hackernews.py comments -h 63 | -------------------------------------------------------------------------------- /hackernews.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse, os, pickle, re, requests 4 | from pyquery import PyQuery as pq 5 | import pystache 6 | 7 | BASE_PATH = os.path.dirname(__file__) 8 | COOKIE = os.path.join(BASE_PATH, 'hackernews.cookie') 9 | EXPORT_TYPES = ( 'json', 'xml', ) 10 | 11 | tries = 0 12 | 13 | 14 | def _login(**kwargs): 15 | """Log in to Hacker News and return the cookies.""" 16 | 17 | if 'r' not in kwargs: 18 | # We haven't established a login request. 19 | 20 | # If we're using cookies, try to return those instead. 21 | if not kwargs['args'].no_cookies: 22 | 23 | # If the cookie doesn't exist, create it. 24 | try: 25 | cookie = open(COOKIE, 'r') 26 | except IOError: 27 | cookie = open(COOKIE, 'w') 28 | 29 | # If there's something in the cookie, return that. 30 | if os.stat(COOKIE).st_size: 31 | cookies = pickle.load(cookie) 32 | cookie.close() 33 | return cookies 34 | else: 35 | cookie.close() 36 | 37 | # Request a blank login page to harvest the fnid (a CSRF-type key). 38 | r = requests.get('https://news.ycombinator.com/newslogin') 39 | J = pq(r.content) 40 | fnid = J('input[name="fnid"]').val() 41 | 42 | # Build the login POST data and make the login request. 43 | payload = { 44 | 'fnid': fnid, 45 | 'u': kwargs['args'].username, 46 | 'p': kwargs['args'].password, 47 | } 48 | r = requests.post('https://news.ycombinator.com/y', data=payload) 49 | 50 | cookies = r.cookies 51 | 52 | else: 53 | # Set the cookies to the cached login request's cookies. 54 | cookies = kwargs['r'].cookies 55 | 56 | # Set the cookie 57 | if not kwargs['args'].no_cookies: 58 | cookie = open(COOKIE, 'w+') 59 | pickle.dump(cookies, cookie) 60 | cookie.close() 61 | 62 | return cookies 63 | 64 | def _reset_cookie(tries): 65 | 66 | # Reset the cookie and mark this as a try. 67 | # If we try 5 times, kill the script. 68 | if tries < 5: 69 | cookie = open(COOKIE, 'r+') 70 | cookie.truncate(0) 71 | cookie.close() 72 | tries = tries + 1 73 | else: 74 | raise BaseException('Too many tries with bad responses (Hacker News may be down).') 75 | 76 | def _good_response(**kwargs): 77 | 78 | # Handle an invalid cookie / login. 79 | if kwargs['r'].content == "Can't display that.": 80 | _reset_cookie(tries) 81 | return False 82 | 83 | return True 84 | 85 | def _sanitize_comment(J, c): 86 | user = J('span.comhead a:eq(0)', c).text() 87 | link = 'https://news.ycombinator.com/%s' % J('span.comhead a:eq(1)', c).attr('href') 88 | points = J('span.comhead span', c).text() 89 | 90 | # 'Parent' and 'Story' don't exist for non-owned comments. 91 | parent = J('span.comhead a:eq(2)', c).attr('href') 92 | if parent is None: 93 | parent = 'N/A' 94 | story = 'N/A' 95 | else: 96 | parent = 'https://news.ycombinator.com/%s' % parent 97 | story = 'https://news.ycombinator.com/%s' % J('span.comhead a:eq(3)', c).attr('href') 98 | 99 | # Reply link doesn't always exist, for some reason. 100 | reply = J('u a', c).attr('href') 101 | if reply is None: 102 | reply = 'N/A' 103 | else: 104 | reply = 'https://news.ycombinator.com/%s' % J('u a', c).attr('href') 105 | 106 | # Sanitize the comment. 107 | comment = J('span.comment', c).html() 108 | comment = re.sub('

', '\n\n', comment) 109 | comment = re.sub('<[^<]+?>', '', comment).rstrip('\n\n') 110 | 111 | # Grab the points, if possible. 112 | if points != None: 113 | points = re.sub('points?', '', points).strip() 114 | else: 115 | points = 'N/A' 116 | 117 | # Strip the comhead and harvest the date. 118 | J('span.comhead a, span.comhead span', c).remove() 119 | date = J('span.comhead', c).text() 120 | date = re.sub('on:|by|\||', '', date).strip() 121 | 122 | return { 123 | 'user': user, 124 | 'comment': comment, 125 | 'reply': reply, 126 | 'points': points, 127 | 'link': link, 128 | 'parent': parent, 129 | 'story': story, 130 | 'date': date, 131 | } 132 | 133 | 134 | def _get_saved_stories(**kwargs): 135 | """Returns a sorted list of the user's saved stories.""" 136 | 137 | # Log in to get cookies. 138 | cookies = _login(**kwargs) 139 | 140 | if 'r' not in kwargs: 141 | # This is the first saved items request. 142 | # Make the saved items request and set an empty list. 143 | kwargs['r'] = requests.get('https://news.ycombinator.com/saved?id=%s' % kwargs['args'].username, 144 | cookies=cookies) 145 | 146 | # Check to make sure we have a good response. 147 | if not _good_response(**kwargs): 148 | kwargs.pop('r') 149 | return _get_saved_stories(**kwargs) 150 | 151 | kwargs['saved'] = [] 152 | 153 | # Grab the stories. 154 | J = pq(kwargs['r'].content) 155 | stories = J('table table td.title') 156 | 157 | for story in stories: 158 | title = J(story).text() 159 | url = J('a', story).attr('href') 160 | 161 | # Skip digit-only s and the 'More' link. 162 | if not re.match('\d+\.$|More', title): 163 | 164 | # Skip HN dead links 165 | if url is not None: 166 | 167 | # For HN links, make absolute URL. 168 | if not url.startswith('http'): 169 | url = 'https://news.ycombinator.com/' + url 170 | 171 | # Add the story to the saved list. 172 | kwargs['saved'].append({ 173 | 'title': title, 174 | 'url': url, 175 | }) 176 | 177 | # If we're getting all saved stories. 178 | if kwargs['args'].all: 179 | 180 | # Find the 'More' link and load it. 181 | last = J('a', J('table table tr td.title:last')) 182 | if last.text() == 'More': 183 | kwargs['r'] = requests.get('https://news.ycombinator.com%s' % last.attr('href'), 184 | cookies=cookies) 185 | 186 | # Check to make sure we have a good response. 187 | if not _good_response(**kwargs): 188 | kwargs.pop('r') 189 | return _get_saved_stories(**kwargs) 190 | 191 | # Call this function again, this time with the new list. 192 | return _get_saved_stories(**kwargs) 193 | 194 | return kwargs['saved'] 195 | 196 | def _get_comments(**kwargs): 197 | """Returns a sorted list of the user's comments.""" 198 | 199 | # Log in to get cookies. 200 | cookies = _login(**kwargs) 201 | 202 | if 'r' not in kwargs: 203 | # This is the first comments request. 204 | # Make the comments request and set an empty list. 205 | kwargs['r'] = requests.get('https://news.ycombinator.com/threads?id=%s' % kwargs['args'].username, 206 | cookies=cookies) 207 | 208 | # Check to make sure we have a good response. 209 | if not _good_response(**kwargs): 210 | kwargs.pop('r') 211 | return _get_comments(**kwargs) 212 | 213 | kwargs['comments'] = [] 214 | 215 | # Grab the comments. 216 | J = pq(kwargs['r'].content) 217 | comments = J('table table td.default') 218 | 219 | for c in comments: 220 | 221 | comment = _sanitize_comment(J, c) 222 | 223 | if kwargs['args'].no_owner and comment['user'] == kwargs['args'].username: 224 | continue 225 | 226 | # Add the comment to the saved list. 227 | kwargs['comments'].append({ 228 | 'user': comment['user'], 229 | 'comment': comment['comment'], 230 | 'reply': comment['reply'], 231 | 'points': comment['points'], 232 | 'link': comment['link'], 233 | 'parent': comment['parent'], 234 | 'story': comment['story'], 235 | 'date': comment['date'], 236 | }) 237 | 238 | # If we're getting all comments. 239 | if kwargs['args'].all: 240 | 241 | # Find the 'More' link and load it. 242 | last = J('a', J('table table tr td.title:last')) 243 | if last.text() == 'More': 244 | kwargs['r'] = requests.get('https://news.ycombinator.com%s' % last.attr('href'), 245 | cookies=cookies) 246 | 247 | # Check to make sure we have a good response. 248 | if not _good_response(**kwargs): 249 | kwargs.pop('r') 250 | return _get_comments(**kwargs) 251 | 252 | # Call this function again, this time with the new list. 253 | return _get_comments(**kwargs) 254 | 255 | return kwargs['comments'] 256 | 257 | 258 | def saved(args): 259 | """Returns a formatted list of the logged-in user's saved stories.""" 260 | 261 | stories = _get_saved_stories(args=args) 262 | 263 | if args.export == 'json': 264 | return stories 265 | elif args.export == 'xml': 266 | return pystache.render(""" 267 | 268 | Saved stories on Hacker News 269 | {{#stories}} 270 | 271 | {{title}} 272 | 273 | 274 | {{/stories}} 275 | """, {'stories': stories}) 276 | 277 | def comments(args): 278 | """Returns a formatted list of the logged-in user's comments.""" 279 | 280 | comments = _get_comments(args=args) 281 | 282 | if args.export == 'json': 283 | return comments 284 | elif args.export == 'xml': 285 | return pystache.render(""" 286 | 287 | Comments on Hacker News 288 | {{#comments}} 289 | 290 | {{comment}} 291 | 292 | {{user}} 293 | 294 | {{reply}} 295 | {{points}} 296 | 297 | {{parent}} 298 | {{story}} 299 | {{date}} 300 | 301 | {{/comments}} 302 | """, {'comments': comments}) 303 | 304 | 305 | if __name__ == '__main__': 306 | 307 | # Parser 308 | parser = argparse.ArgumentParser(prog='Hacker News') 309 | parser.add_argument('--version', action='version', version='%(prog)s 0.1') 310 | subparsers = parser.add_subparsers() 311 | 312 | # TODO: --username, --password, --all, --no-cookies should probably be stored 313 | # at the parser level, not subparser. 314 | 315 | # Saved stories 316 | saved_parser = subparsers.add_parser('saved') 317 | saved_parser.add_argument('-u', '--username', dest='username', help='HN Username', 318 | required=True) 319 | saved_parser.add_argument('-p', '--password', dest='password', help='HN Password', 320 | required=True) 321 | saved_parser.add_argument('-e', '--export', dest='export', help='Export type', 322 | required=False, default='json', choices=EXPORT_TYPES) 323 | saved_parser.add_argument('--all', dest='all', help='Get all saved stories', 324 | action='store_true') 325 | saved_parser.add_argument('--no-cookies', dest='no_cookies', help="Don't use cookies", 326 | action='store_true', default=False) 327 | saved_parser.set_defaults(func=saved) 328 | 329 | # Comments 330 | comments_parser = subparsers.add_parser('comments') 331 | comments_parser.add_argument('-u', '--username', dest='username', help='HN Username', 332 | required=True) 333 | comments_parser.add_argument('-p', '--password', dest='password', help='HN Password', 334 | required=True) 335 | comments_parser.add_argument('-e', '--export', dest='export', help='Export type', 336 | required=False, default='json', choices=EXPORT_TYPES) 337 | comments_parser.add_argument('--all', dest='all', help='Get all comments', 338 | action='store_true') 339 | comments_parser.add_argument('--no-cookies', dest='no_cookies', help="Don't use cookies", 340 | action='store_true', default=False) 341 | comments_parser.add_argument('--no-owner', dest='no_owner', help="Don't show owner's comments", 342 | action='store_true', default=False) 343 | comments_parser.set_defaults(func=comments) 344 | 345 | # Args 346 | args = parser.parse_args() 347 | print args.func(args) 348 | --------------------------------------------------------------------------------