├── .gitignore ├── DB.py ├── Httpy.py ├── LICENSE ├── README.md ├── app.py ├── common.py ├── demo.gif ├── img_util.py ├── index.py ├── rabbitmq_listen.py ├── reddit.py ├── requirements.txt ├── search.py ├── static ├── index.js ├── main.css ├── materialize.css ├── materialize.min.js ├── nsfw.png └── sfw.png ├── status.py ├── subreddits.py ├── subs.txt ├── templates ├── index_nsfw.html ├── index_sfw.html └── layout.html ├── update_clean_url.py ├── update_hash.py ├── upload.py ├── util.py ├── video_thumbs.py └── video_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.db 2 | thumbs/ 3 | *.pyc 4 | .idea/ 5 | *.log 6 | login_credentials.txt 7 | -------------------------------------------------------------------------------- /DB.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | from time import sleep 4 | 5 | import psycopg2 6 | from psycopg2.errorcodes import UNIQUE_VIOLATION 7 | from psycopg2.extensions import ISOLATION_LEVEL_READ_COMMITTED 8 | 9 | from common import logger, SQL_DEBUG 10 | from img_util import thumb_path 11 | from util import clean_url 12 | 13 | 14 | class SearchResult: 15 | __slots__ = "permalink", "subreddit", "created", \ 16 | "author", "item", "ups", "downs", "hexid", "author" 17 | 18 | def __init__(self, permalink, subreddit, created, item, ups, downs, hexid, author): 19 | self.permalink = permalink 20 | self.subreddit = subreddit 21 | self.created = created 22 | self.item = item 23 | self.ups = ups 24 | self.downs = downs 25 | self.hexid = hexid 26 | self.author = author 27 | 28 | def json(self): 29 | raise NotImplementedError 30 | 31 | 32 | class CommentSearchResult(SearchResult): 33 | __slots__ = "body", "post_id" 34 | 35 | def __init__(self, body, post_id, permalink, subreddit, created, item, 36 | ups, downs, hexid, author): 37 | super().__init__(permalink, subreddit, created, item, 38 | ups, downs, hexid, author) 39 | 40 | self.body = body 41 | self.post_id = post_id 42 | 43 | def json(self): 44 | return { 45 | "body": self.body, 46 | "post_id": self.post_id, 47 | "permalink": self.permalink, 48 | "subreddit": self.subreddit, 49 | "created": self.created, 50 | "ups": self.ups, 51 | "downs": self.downs, 52 | "hexid": self.hexid, 53 | "author": self.author, 54 | "item": self.item.json(), 55 | "type": "comment", 56 | } 57 | 58 | 59 | class PostSearchResult(SearchResult): 60 | __slots__ = "title", "text", "comments" 61 | 62 | def __init__(self, title, text, comments, permalink, subreddit, 63 | created, item, ups, downs, hexid, author): 64 | super().__init__(permalink, subreddit, created, 65 | item, ups, downs, hexid, author) 66 | 67 | self.title = title 68 | self.text = text 69 | self.comments = comments 70 | 71 | def json(self): 72 | return { 73 | "title": self.title, 74 | "text": self.text, 75 | "comments": self.comments, 76 | "permalink": self.permalink, 77 | "subreddit": self.subreddit, 78 | "created": self.created, 79 | "ups": self.ups, 80 | "downs": self.downs, 81 | "hexid": self.hexid, 82 | "author": self.author, 83 | "item": self.item.json(), 84 | "type": "post", 85 | } 86 | 87 | 88 | class ImageItem: 89 | __slots__ = "url", "width", "height", "size", "thumb", "sha1", "album_url" 90 | 91 | def __init__(self, url, width, height, size, thumb, sha1, album_url): 92 | self.url = url 93 | self.width = width 94 | self.height = height 95 | self.size = size 96 | self.sha1 = sha1 97 | self.thumb = thumb 98 | self.album_url = album_url 99 | 100 | def json(self): 101 | return { 102 | "type": "image", 103 | "url": self.url, 104 | "width": self.width, 105 | "height": self.height, 106 | "size": self.size, 107 | "sha1": self.sha1, 108 | "thumb": self.thumb, 109 | "album_url": self.album_url, 110 | } 111 | 112 | 113 | class VideoItem: 114 | __slots__ = "url", "width", "height", "size", "bitrate", "codec", "format", "sha1", "duration", \ 115 | "frames", "video_id" 116 | 117 | def __init__(self, url, width, height, size, bitrate, codec, format, duration, 118 | frames, sha1, video_id): 119 | self.url = url 120 | self.width = width 121 | self.height = height 122 | self.size = size 123 | self.bitrate = bitrate 124 | self.codec = codec 125 | self.format = format 126 | self.duration = duration 127 | self.frames = frames 128 | self.sha1 = sha1 129 | self.video_id = video_id 130 | 131 | def json(self): 132 | return { 133 | "type": "video", 134 | "url": self.url, 135 | "width": self.width, 136 | "height": self.height, 137 | "size": self.size, 138 | "sha1": self.sha1, 139 | "video_id": self.video_id, 140 | "bitrate": self.bitrate, 141 | "codec": self.codec, 142 | "format": self.format, 143 | "duration": self.duration, 144 | "frames": self.frames, 145 | } 146 | 147 | 148 | class PgConn: 149 | def __init__(self, conn, conn_str): 150 | self.conn = conn 151 | self.conn_str = conn_str 152 | self.cur = conn.cursor() 153 | 154 | def __enter__(self): 155 | return self 156 | 157 | def exec(self, query_string, args=None): 158 | if args is None: 159 | args = [] 160 | while True: 161 | try: 162 | self.cur.execute(query_string, args) 163 | break 164 | except psycopg2.Error as e: 165 | if e.pgcode == UNIQUE_VIOLATION: 166 | break 167 | traceback.print_stack() 168 | self._handle_err(e, query_string, args) 169 | 170 | def query(self, query_string, args=None, read_committed=False): 171 | if read_committed: 172 | self.conn.set_isolation_level(ISOLATION_LEVEL_READ_COMMITTED) 173 | while True: 174 | try: 175 | if SQL_DEBUG: 176 | logger.debug(query_string) 177 | logger.debug("With args " + str(args)) 178 | 179 | self.cur.execute(query_string, args) 180 | res = self.cur.fetchall() 181 | 182 | if SQL_DEBUG: 183 | logger.debug("result: " + str(res)) 184 | 185 | return res 186 | except psycopg2.Error as e: 187 | if e.pgcode == UNIQUE_VIOLATION: 188 | break 189 | self._handle_err(e, query_string, args) 190 | 191 | def _handle_err(self, err, query, args): 192 | logger.warn("Error during query '%s' with args %s: %s %s (%s)" % (query, args, type(err), err, err.pgcode)) 193 | self.conn = psycopg2.connect(self.conn_str) 194 | self.cur = self.conn.cursor() 195 | sleep(0.1) 196 | 197 | def __exit__(self, type, value, traceback): 198 | try: 199 | self.conn.commit() 200 | self.cur.close() 201 | except: 202 | pass 203 | 204 | 205 | class DB: 206 | 207 | def __init__(self, db_file, **schemas): 208 | """ 209 | Initializes database. 210 | Attempts to creates tables with schemas if needed. 211 | * db_file - Name of the database file. 212 | * schemas - A python dictionary where: 213 | KEY is the table name, 214 | VALUE is that table's schema. 215 | 216 | For example: 217 | db = DB('file.db', { 218 | 'Customer': 'name text, phone int, address text', 219 | 'Order': 'id int primary key, customer_name text, cost real'}) 220 | # This would open the 'file.db' file and create two tables with the respective schemas. 221 | If the tables already exist, the existing tables remain unaltered. 222 | """ 223 | self.db_file = db_file 224 | self.conn = psycopg2.connect(self.db_file) 225 | 226 | # Don't create tables if not supplied. 227 | if schemas is not None and schemas != {} and schemas: 228 | 229 | # Create table for every schema given. 230 | for key in schemas: 231 | self._create_table(key, schemas[key]) 232 | 233 | def get_conn(self): 234 | return PgConn(self.conn, self.db_file) 235 | 236 | def _create_table(self, table_name, schema): 237 | """Creates new table with schema""" 238 | with self.get_conn() as conn: 239 | conn.exec('''CREATE TABLE IF NOT EXISTS %s (%s)''' % (table_name, schema)) 240 | 241 | def get_image_from_url(self, url): 242 | with self.get_conn() as conn: 243 | res = conn.query("SELECT i.id from imageurls " 244 | "INNER JOIN images i on i.id = imageurls.imageid " 245 | "WHERE clean_url = %s", (clean_url(url),)) 246 | 247 | return None if not res else res[0][0] 248 | 249 | def get_image_hash_from_url(self, url): 250 | with self.get_conn() as conn: 251 | res = conn.query("SELECT i.hash from imageurls " 252 | "INNER JOIN images i on i.id = imageurls.imageid " 253 | "WHERE clean_url = %s", (clean_url(url),), read_committed=True) 254 | 255 | return None if not res else res[0][0] 256 | 257 | def get_similar_images(self, hash, distance=0): 258 | with self.get_conn() as conn: 259 | # TODO: LIMIT X 260 | if distance <= 0: 261 | res = conn.query("SELECT id from images WHERE hash = %s", (hash,), 262 | read_committed=True) 263 | else: 264 | res = conn.query( 265 | "SELECT id FROM images WHERE hash_is_within_distance(hash, %s, %s)", 266 | (hash, distance,), read_committed=True, 267 | ) 268 | 269 | return [] if not res else [row[0] for row in res] 270 | 271 | def get_similar_videos_by_hash(self, hashes, distance, frame_count): 272 | hashes = list(set(hashes)) 273 | with self.get_conn() as conn: 274 | # TODO: LIMIT X 275 | if distance == 0: 276 | res = conn.query( 277 | "SELECT videos.id from videoframes " 278 | "INNER JOIN videos on videos.id = videoid " 279 | "WHERE hash_equ_any(hash, %s) " 280 | "GROUP BY videos.id " 281 | "HAVING COUNT(videoframes.id) >= %s", 282 | (b''.join(hashes), frame_count), read_committed=True 283 | ) 284 | else: 285 | res = conn.query( 286 | "SELECT videos.id from videoframes " 287 | "INNER JOIN videos on videos.id = videoid " 288 | "WHERE hash_is_within_distance_any(hash, %s, %s) " 289 | "GROUP BY videos.id " 290 | "HAVING COUNT(videoframes.id) >= %s", 291 | (b''.join(hashes), distance, frame_count), read_committed=True, 292 | ) 293 | 294 | return [] if not res else [row[0] for row in res] 295 | 296 | def get_image_from_sha1(self, sha1): 297 | with self.get_conn() as conn: 298 | res = conn.query("SELECT id from images " 299 | "WHERE sha1=%s", (sha1,)) 300 | 301 | return None if not res else res[0][0] 302 | 303 | def insert_imageurl(self, url, imageid, albumid, postid, commentid): 304 | with self.get_conn() as conn: 305 | conn.exec("INSERT INTO imageurls (url, clean_url, imageid, albumid, postid, commentid) " 306 | "VALUES (%s,%s,%s,%s,%s,%s)", (url, clean_url(url), imageid, albumid, postid, commentid)) 307 | 308 | def insert_image(self, imhash, width, height, size, sha1): 309 | with self.get_conn() as conn: 310 | res = conn.query("INSERT INTO images (width, height, bytes, hash, sha1) " 311 | "VALUES (%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING returning id ", 312 | (width, height, size, imhash, sha1)) 313 | # race condition: image was inserted after the existing_by_sha1 check 314 | if not res: 315 | res = conn.query("SELECT id FROM images WHERE sha1=%s", (sha1,)) 316 | 317 | return None if not res else res[0][0] 318 | 319 | def insert_video(self, sha1, size=0, info={}): 320 | with self.get_conn() as conn: 321 | res = conn.query("INSERT INTO videos " 322 | "(sha1, width, height, bitrate, codec, format, duration, frames, bytes) " 323 | "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING returning id ", 324 | (sha1, info["width"], info["height"], info["bitrate"], info["codec"], info["format"], 325 | info["duration"], info["frames"], size)) 326 | # race condition: video was inserted after the existing_by_sha1 check 327 | if not res: 328 | res = conn.query("SELECT id FROM videos WHERE sha1=%s", (sha1,)) 329 | 330 | return None if not res else res[0][0] 331 | 332 | def insert_videourl(self, url, video_id, postid, commentid): 333 | with self.get_conn() as conn: 334 | conn.exec("INSERT INTO videourls (url, clean_url, videoid, postid, commentid) " 335 | "VALUES (%s,%s,%s,%s,%s)", (url, clean_url(url), video_id, postid, commentid)) 336 | 337 | def insert_video_frames(self, video_id, frames): 338 | with self.get_conn() as conn: 339 | res = conn.query( 340 | "INSERT INTO videoframes (hash, videoid) VALUES " + 341 | ", ".join("(%%s,%d)" % (video_id,) for _ in frames) + 342 | " RETURNING id", 343 | list(i for i in frames) 344 | ) 345 | 346 | return None if not res else [r[0] for r in res] 347 | 348 | def get_video_from_url(self, url): 349 | with self.get_conn() as conn: 350 | res = conn.query("SELECT v.id from videourls " 351 | "INNER JOIN videos v on v.id = videourls.videoid " 352 | "WHERE clean_url = %s", (clean_url(url),)) 353 | 354 | return None if not res else res[0][0] 355 | 356 | def get_video_from_sha1(self, sha1): 357 | with self.get_conn() as conn: 358 | res = conn.query("SELECT id from videos " 359 | "WHERE sha1=%s", (sha1,)) 360 | return None if not res else res[0][0] 361 | 362 | def get_videoframes(self, video_id): 363 | with self.get_conn() as conn: 364 | res = conn.query("SELECT id from videoframes " 365 | "WHERE videoid=%s", (video_id,), read_committed=True) 366 | return None if not res else [r[0] for r in res] 367 | 368 | def get_video_hashes(self, video_id): 369 | with self.get_conn() as conn: 370 | res = conn.query("SELECT hash from videoframes " 371 | "WHERE videoid=%s", (video_id,), read_committed=True) 372 | return None if not res else [r[0] for r in res] 373 | 374 | def get_or_create_album(self, url): 375 | with self.get_conn() as conn: 376 | res = conn.query("INSERT INTO albums (url) VALUES (%s) ON CONFLICT DO NOTHING RETURNING ID", (url,)) 377 | 378 | # album already exists.. 379 | if not res: 380 | res = conn.query("SELECT id FROM albums WHERE url=%s", (url,)) 381 | 382 | return None if not res else res[0][0] 383 | 384 | def insert_comment(self, postid, comment_id, comment_author, 385 | comment_body, comment_upvotes, comment_downvotes, comment_created_utc): 386 | with self.get_conn() as conn: 387 | res = conn.query("INSERT INTO comments (postid, hexid, author, body, ups, downs, created)" 388 | " VALUES (%s,%s,%s,%s,%s,%s,%s) RETURNING ID", 389 | (postid, comment_id, comment_author, comment_body, comment_upvotes, comment_downvotes, 390 | comment_created_utc)) 391 | return None if not res else res[0][0] 392 | 393 | def insert_post(self, post_id, title, url, selftext, 394 | author, permalink, subreddit, num_comments, 395 | upvotes, downvotes, score, 396 | created_utc, is_self, over_18): 397 | 398 | with self.get_conn() as conn: 399 | res = conn.query( 400 | "INSERT INTO posts (hexid, title, url, text, author, permalink," 401 | " subreddit, comments, ups, downs, score, created, is_self, over_18)" 402 | " VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING RETURNING ID", 403 | (post_id, title, url, selftext, author, permalink, subreddit, num_comments, 404 | upvotes, downvotes, score, created_utc, is_self, over_18)) 405 | return None if not res else res[0][0] 406 | 407 | def get_postid_from_hexid(self, hexid): 408 | with self.get_conn() as conn: 409 | res = conn.query( 410 | "SELECT id FROM posts WHERE hexid=%s", 411 | (hexid,)) 412 | return None if not res else res[0] 413 | 414 | # Search 415 | 416 | def build_result_for_images(self, images): 417 | if not images: 418 | return [] 419 | 420 | results = [] 421 | 422 | with self.get_conn() as conn: 423 | # TODO: order by? 424 | res = conn.query( 425 | "WITH DATA AS(" 426 | " SELECT url, imageid, bytes, sha1, width, height, albumid, postid, commentid " 427 | " FROM imageurls imu, images im WHERE imu.imageid = im.id AND im.id = ANY (%s) " 428 | ") " 429 | "SELECT " 430 | "d.url, a.url, c.postid, c.hexid, c.author, c.body, c.ups, " 431 | "c.downs, c.created, cp.subreddit, cp.permalink, cp.hexid, " 432 | "p.hexid, p.title, p.url, p.text, p.author, p.permalink, p.subreddit, " 433 | "p.comments, p.ups, p.downs, p.score, p.created, d.width, d.height, " 434 | "d.bytes, d.sha1, imageid " 435 | "FROM data d " 436 | "LEFT JOIN albums a on d.albumid = a.id " 437 | "LEFT JOIN comments c on d.commentid = c.id " 438 | "LEFT JOIN posts cp on c.postid = cp.id " 439 | "LEFT JOIN posts p on d.postid = p.id", 440 | ([(x[0] if isinstance(x, tuple) else x) for x in images],), read_committed=True, 441 | ) 442 | 443 | for row in res: 444 | if row[3] is not None: 445 | results.append(CommentSearchResult( 446 | body=row[5], 447 | post_id=row[11], 448 | hexid=row[3], 449 | author=row[4], 450 | ups=row[6], 451 | downs=row[7], 452 | created=row[8], 453 | subreddit=row[9], 454 | permalink=row[10], 455 | item=ImageItem( 456 | url=row[0], 457 | width=row[24], 458 | height=row[25], 459 | size=row[26], 460 | thumb=os.path.join(thumb_path(row[28]), str(row[28]) + ".jpg"), 461 | sha1=row[27], 462 | album_url=row[1] 463 | ) 464 | )) 465 | else: 466 | results.append(PostSearchResult( 467 | text=row[15], 468 | title=row[13], 469 | hexid=row[12], 470 | author=row[16], 471 | ups=row[20], 472 | downs=row[21], 473 | created=row[23], 474 | subreddit=row[18], 475 | permalink=row[17], 476 | comments=row[19], 477 | item=ImageItem( 478 | url=row[0], 479 | width=row[24], 480 | height=row[25], 481 | size=row[26], 482 | thumb=os.path.join(thumb_path(row[28]), str(row[28]) + ".jpg"), 483 | sha1=row[27], 484 | album_url=row[1] 485 | ) 486 | )) 487 | return results 488 | 489 | def build_results_for_videos(self, videos): 490 | 491 | results = [] 492 | 493 | with self.get_conn() as conn: 494 | # TODO: order by? 495 | res = conn.query( 496 | "WITH DATA AS (" 497 | " SELECT url, videoid, postid, commentid, width, " 498 | " height, bytes, sha1, frames, duration, format, codec, bitrate " 499 | " FROM videourls vu, videos vid " 500 | " WHERE vu.videoid = vid.id AND vid.id = ANY (%s)" 501 | ")" 502 | "SELECT d.url, c.postid, c.hexid, c.author, c.body, c.ups, c.downs," 503 | " c.created, cp.subreddit, cp.permalink, cp.hexid, p.hexid, p.title," 504 | " p.url, p.text, p.author, p.permalink, p.subreddit, p.comments, p.ups," 505 | " p.downs, p.score, p.created, d.width, d.height, d.bytes, d.sha1, d.frames," 506 | " d.duration, d.format, d.codec, d.bitrate, d.videoid " 507 | "FROM data d " 508 | " LEFT JOIN comments c on d.commentid = c.id " 509 | " LEFT JOIN posts cp on c.postid = cp.id " 510 | " LEFT JOIN posts p on d.postid = p.id", 511 | ([(x[0] if isinstance(x, tuple) else x) for x in videos],), read_committed=True, 512 | ) 513 | 514 | for row in res: 515 | if row[2] is not None: 516 | results.append(CommentSearchResult( 517 | body=row[4], 518 | post_id=row[10], 519 | hexid=row[2], 520 | author=row[3], 521 | ups=row[5], 522 | downs=row[6], 523 | created=row[7], 524 | subreddit=row[8], 525 | permalink=row[9], 526 | item=VideoItem( 527 | url=row[0], 528 | width=row[23], 529 | height=row[24], 530 | size=row[25], 531 | sha1=row[26], 532 | frames=row[27], 533 | duration=row[28], 534 | format=row[29], 535 | codec=row[30], 536 | bitrate=row[31], 537 | video_id=row[32] 538 | ) 539 | )) 540 | else: 541 | results.append(PostSearchResult( 542 | text=row[14], 543 | title=row[12], 544 | hexid=row[11], 545 | author=row[15], 546 | ups=row[19], 547 | downs=row[20], 548 | created=row[22], 549 | subreddit=row[17], 550 | permalink=row[16], 551 | comments=row[18], 552 | item=VideoItem( 553 | url=row[0], 554 | width=row[23], 555 | height=row[24], 556 | size=row[25], 557 | sha1=row[26], 558 | frames=row[27], 559 | duration=row[28], 560 | format=row[29], 561 | codec=row[30], 562 | bitrate=row[31], 563 | video_id=row[32] 564 | ) 565 | )) 566 | return results 567 | 568 | def get_images_from_reddit_id(self, reddit_id): 569 | with self.get_conn() as conn: 570 | if len(reddit_id) == 6: 571 | res = conn.query( 572 | "SELECT DISTINCT imageid from imageurls " 573 | "LEFT JOIN posts p on imageurls.postid = p.id " 574 | "WHERE p.hexid=%s", 575 | (reddit_id,), read_committed=True 576 | ) 577 | elif len(reddit_id) == 7: 578 | res = conn.query( 579 | "SELECT DISTINCT imageid from imageurls " 580 | "LEFT JOIN comments c on imageurls.commentid = c.id " 581 | "WHERE c.hexid=%s", 582 | (reddit_id,), read_committed=True 583 | ) 584 | else: 585 | raise Exception("Invalid reddit id") 586 | return res 587 | 588 | def get_images_from_author(self, author): 589 | 590 | with self.get_conn() as conn: 591 | imageids = [] 592 | imageids.extend(conn.query( 593 | "SELECT imageid " 594 | "from imageurls " 595 | "INNER JOIN posts p on imageurls.postid = p.id WHERE author = %s", 596 | (author,), read_committed=True, 597 | )) 598 | imageids.extend(conn.query( 599 | "SELECT imageid " 600 | "from imageurls " 601 | "INNER JOIN comments c on imageurls.postid = c.id WHERE author = %s", 602 | (author,), read_committed=True, 603 | )) 604 | return imageids 605 | 606 | def get_images_from_album_url(self, album_url): 607 | with self.get_conn() as conn: 608 | res = conn.query( 609 | "SELECT i.id, u.url, i.width, i.height from albums " 610 | "INNER JOIN imageurls u on albums.id = u.albumid " 611 | "INNER JOIN images i on u.imageid = i.id WHERE albums.url = %s", 612 | (album_url,) 613 | , read_committed=True) 614 | return res 615 | 616 | def get_images_from_text(self, text): 617 | with self.get_conn() as conn: 618 | text = "%" + text + "%" 619 | res = conn.query( 620 | "SELECT DISTINCT(imageid) FROM imageurls " 621 | "WHERE commentid is NULL AND postid IN " 622 | "(SELECT id FROM Posts WHERE title LIKE %s or text LIKE %s ORDER BY ups DESC) " 623 | "OR commentid IN " 624 | "(SELECT id FROM Comments WHERE body LIKE %s ORDER BY ups DESC) " 625 | "LIMIT 50", 626 | (text, text, text) 627 | ) 628 | return res 629 | 630 | # Stats 631 | def get_post_count(self): 632 | with self.get_conn() as conn: 633 | return conn.query( 634 | "SELECT reltuples AS approximate_row_count FROM pg_class WHERE relname = 'posts'", 635 | read_committed=True 636 | )[0][0] 637 | 638 | def get_image_count(self): 639 | with self.get_conn() as conn: 640 | return conn.query( 641 | "SELECT reltuples AS approximate_row_count FROM pg_class WHERE relname = 'images'", 642 | read_committed=True 643 | )[0][0] 644 | 645 | def get_videoframe_count(self): 646 | with self.get_conn() as conn: 647 | return conn.query( 648 | "SELECT reltuples AS approximate_row_count FROM pg_class WHERE relname = 'videoframes'", 649 | read_committed=True 650 | )[0][0] 651 | 652 | def get_comment_count(self): 653 | with self.get_conn() as conn: 654 | return conn.query( 655 | "SELECT reltuples AS approximate_row_count FROM pg_class WHERE relname = 'comments'", 656 | read_committed=True 657 | )[0][0] 658 | 659 | def get_album_count(self): 660 | with self.get_conn() as conn: 661 | return conn.query( 662 | "SELECT reltuples AS approximate_row_count FROM pg_class WHERE relname = 'albums'", 663 | read_committed=True 664 | )[0][0] 665 | -------------------------------------------------------------------------------- /Httpy.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | from pycurl import Curl 4 | from urllib3 import disable_warnings 5 | 6 | from common import HTTP_PROXY, logger 7 | 8 | disable_warnings() 9 | 10 | DEFAULT_TIMEOUT = 600 11 | 12 | 13 | class Httpy: 14 | """ 15 | Easily perform GET and POST requests with web servers. 16 | Keeps cookies to retain web sessions. 17 | Includes helpful methods that go beyond GET and POST: 18 | * get_meta - retrieves meta info about a URL 19 | * unshorten - returns (some) redirected URLs 20 | """ 21 | 22 | def __init__(self): 23 | self.curl = Curl() 24 | self.curl.setopt(self.curl.SSL_VERIFYPEER, 0) 25 | self.curl.setopt(self.curl.SSL_VERIFYHOST, 0) 26 | self.curl.setopt(self.curl.TIMEOUT, DEFAULT_TIMEOUT) 27 | self.curl.setopt(self.curl.PROXY, HTTP_PROXY) 28 | self.curl.setopt(self.curl.FOLLOWLOCATION, True) 29 | 30 | def get(self, url): 31 | """ GET request """ 32 | try: 33 | body = BytesIO() 34 | self.curl.setopt(self.curl.WRITEFUNCTION, body.write) 35 | self.curl.setopt(self.curl.URL, url) 36 | self.curl.perform() 37 | r = body.getvalue() 38 | body.close() 39 | return r.decode() 40 | except Exception as e: 41 | raise e 42 | 43 | def download(self, url): 44 | """ Downloads file from URL to save_as path. """ 45 | retries = 3 46 | while retries: 47 | try: 48 | body = BytesIO() 49 | self.curl.setopt(self.curl.WRITEFUNCTION, body.write) 50 | self.curl.setopt(self.curl.URL, url) 51 | self.curl.perform() 52 | if self.curl.getinfo(self.curl.HTTP_CODE) != 200: 53 | 54 | text = body.getvalue() 55 | if "404" not in text: 56 | raise Exception("HTTP" + str(self.curl.getinfo(self.curl.HTTP_CODE))) 57 | r = body.getvalue() 58 | body.close() 59 | return r 60 | except Exception as e: 61 | if str(e).find("transfer closed") > 0 and retries: 62 | retries -= 1 63 | continue 64 | raise Exception(str(e) + " HTTP" + str(self.curl.getinfo(self.curl.HTTP_CODE))) 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | irarchives 2 | ========== 3 | 4 | [![CodeFactor](https://www.codefactor.io/repository/github/simon987/irarchives/badge/master)](https://www.codefactor.io/repository/github/simon987/irarchives/overview/master) 5 | ![GitHub](https://img.shields.io/github/license/simon987/irarchives.svg) 6 | 7 | Summary 8 | ------- 9 | Reverse image/video search for reddit 10 | 11 | ![demo](demo.gif) 12 | Realtime dhash demo. The hash (on the right) is stored as a 144-bit 13 | integer. We can compute the similarity of two images by counting the number 14 | of matching bits of their hashes. 15 | 16 | Overview 17 | -------- 18 | 19 | The repo contains: 20 | * A script to scrape images from reddit posts and store the data in a database. 21 | * A web interface for searching the database 22 | 23 | ### Database schema 24 | ![schema](schema.png) 25 | 26 | Requirements 27 | ------------ 28 | Tested with Python 3.7.2. 29 | 30 | Dependencies on Debian: `apt install libgmp-dev libmpfr-dev libmpc-dev` 31 | 32 | This project relies on [Architeuthis MITM proxy](https://github.com/simon987/Architeuthis) to respect rate-limits 33 | and handle http errors. 34 | 35 | [Additional C-Language functions](https://github.com/simon987/pg_hamming) 36 | for PostgreSQL need to be installed for almost all queries. 37 | 38 | The search interface can be configured to use *redis* for caching 39 | (see [common.py](common.py)). 40 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | from common import cache 4 | from index import index_page 5 | from search import search_page 6 | from status import status_page 7 | from subreddits import subreddits_page 8 | from upload import upload_page 9 | from video_thumbs import video_thumbs 10 | 11 | app = Flask(__name__) 12 | cache.init_app(app) 13 | app.register_blueprint(subreddits_page) 14 | app.register_blueprint(status_page) 15 | app.register_blueprint(index_page) 16 | app.register_blueprint(search_page) 17 | app.register_blueprint(upload_page) 18 | app.register_blueprint(video_thumbs) 19 | 20 | if __name__ == '__main__': 21 | app.run(port=3080) 22 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from logging import FileHandler, StreamHandler 4 | 5 | from flask_caching import Cache 6 | 7 | HTTP_PROXY = "http://localhost:5050" 8 | DBFILE = "dbname=ir user=ir password=ir" 9 | USE_REDIS = False 10 | SFW = True 11 | # SFW = False 12 | TN_SIZE = 500 13 | 14 | SQL_DEBUG = False 15 | 16 | if USE_REDIS: 17 | cache = Cache(config={ 18 | "CACHE_TYPE": "redis", 19 | "CACHE_KEY_PREFIX": "ir", 20 | "CACHE_REDIS_HOST": "localhost", 21 | "CACHE_REDIS_PORT": "6379" 22 | }) 23 | else: 24 | cache = Cache(config={ 25 | "CACHE_TYPE": "null" 26 | }) 27 | 28 | logger = logging.getLogger("default") 29 | logger.setLevel(logging.DEBUG) 30 | 31 | formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s') 32 | file_handler = FileHandler("rarchives.log") 33 | file_handler.setFormatter(formatter) 34 | for h in logger.handlers: 35 | logger.removeHandler(h) 36 | logger.addHandler(file_handler) 37 | logger.addHandler(StreamHandler(sys.stdout)) 38 | 39 | -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simon987/irarchives/040dedffa4108f16ba1bd34aaa869ed2e47c5005/demo.gif -------------------------------------------------------------------------------- /img_util.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | from io import BytesIO, StringIO 4 | 5 | import sys 6 | from PIL import Image 7 | from gallery_dl import job, config 8 | from gallery_dl.job import UrlJob 9 | from imagehash import dhash 10 | 11 | from common import logger, HTTP_PROXY, TN_SIZE 12 | from util import thumb_path 13 | 14 | 15 | class ListUrlJob(UrlJob): 16 | def __init__(self, url): 17 | super().__init__(url) 18 | self.list = [] 19 | 20 | def handle_url(self, url, _): 21 | self.list.append(url) 22 | 23 | 24 | def get_image_urls(url): 25 | logger.debug('Getting urls from %s ...' % (url,)) 26 | 27 | config.set(["proxy"], HTTP_PROXY) 28 | config.set(["verify"], False) 29 | config.set(["retries"], 2) 30 | config.set(["timeout"], 600) 31 | 32 | j = ListUrlJob(url) 33 | j.run() 34 | 35 | result = set(j.list) 36 | 37 | logger.debug('Got %d urls from %s' % (len(result), url)) 38 | 39 | return result 40 | 41 | 42 | def image_from_buffer(buf): 43 | return Image.open(BytesIO(buf)) 44 | 45 | 46 | def create_thumb(im, num): 47 | """ 48 | Creates a thumbnail for a given image file. 49 | Saves to 'thumbs' directory, named .jpg 50 | """ 51 | 52 | dirpath = thumb_path(num) 53 | 54 | try: 55 | os.makedirs(dirpath, exist_ok=True) 56 | except OSError as e: 57 | logger.warn("Could not create dir: %s" % (e, )) 58 | pass 59 | 60 | # Convert to RGB if not already 61 | if im.mode != "RGB": 62 | im = im.convert("RGB") 63 | im.thumbnail((TN_SIZE, TN_SIZE), Image.ANTIALIAS) 64 | 65 | im.save(os.path.join(dirpath, str(num) + ".jpg"), 'JPEG') 66 | 67 | 68 | def get_sha1(buffer): 69 | return hashlib.sha1(buffer).hexdigest() 70 | 71 | 72 | def get_hash(im): 73 | return sum(1 << i for i, b in enumerate(dhash(im, hash_size=12).hash.flatten()) if b)\ 74 | .to_bytes(18, "big") 75 | -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, render_template, send_from_directory 2 | 3 | from common import SFW, cache 4 | 5 | index_page = Blueprint('index', __name__, template_folder='templates') 6 | 7 | 8 | @index_page.route('/favicon.ico') 9 | @cache.cached(timeout=3600) 10 | def favicon(): 11 | if SFW: 12 | return send_from_directory('static', 'sfw.png', mimetype='image/png') 13 | else: 14 | return send_from_directory('static', 'nsfw.png', mimetype='image/png') 15 | 16 | 17 | @index_page.route("/") 18 | @cache.cached(timeout=3600) 19 | def index(): 20 | if SFW: 21 | return render_template("index_sfw.html") 22 | else: 23 | return render_template("index_nsfw.html") 24 | -------------------------------------------------------------------------------- /rabbitmq_listen.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import json 3 | import os 4 | import sys 5 | from queue import Queue 6 | from subprocess import getstatusoutput 7 | from threading import Thread 8 | 9 | import pika 10 | from youtube_dl import YoutubeDL 11 | 12 | from DB import DB 13 | from Httpy import Httpy 14 | from common import logger, DBFILE 15 | from img_util import get_image_urls, create_thumb, image_from_buffer, get_sha1, get_hash, thumb_path 16 | from reddit import Post, Comment, COMMENT_FIELDS, POST_FIELDS 17 | from util import is_image_direct_link, should_parse_link, is_video, load_list 18 | from video_util import info_from_video_buffer, flatten_video_info 19 | 20 | SCHEMA = { 21 | 'Posts': 22 | '\n\t' + 23 | 'id SERIAL PRIMARY KEY, \n\t' + 24 | 'hexid TEXT UNIQUE, \n\t' + # base36 reddit id to comment 25 | 'title TEXT, \n\t' + 26 | 'url TEXT, \n\t' + 27 | 'text TEXT, \n\t' + # self-text 28 | 'author TEXT, \n\t' + 29 | 'permalink TEXT, \n\t' + # /r/Subreddit/comments/id/title 30 | 'subreddit TEXT, \n\t' + 31 | 'comments INTEGER, \n\t' + # Number of comment 32 | 'ups INTEGER, \n\t' + 33 | 'downs INTEGER, \n\t' + 34 | 'score INTEGER, \n\t' + 35 | 'created INTEGER, \n\t' + # Time in UTC 36 | 'is_self BOOLEAN, \n\t' + 37 | 'over_18 BOOLEAN', 38 | 39 | 'Comments': 40 | '\n\t' + 41 | 'id SERIAL PRIMARY KEY, \n\t' + 42 | 'postid INTEGER, \n\t' + # Reference to Posts table 43 | 'hexid TEXT UNIQUE, \n\t' + # base36 reddit id to comment 44 | 'author TEXT, \n\t' + 45 | 'body TEXT, \n\t' + 46 | 'ups INTEGER, \n\t' + 47 | 'downs INTEGER, \n\t' + 48 | 'created INTEGER, \n\t' + # Time in UTC 49 | 'FOREIGN KEY(postid) REFERENCES posts(id)', 50 | 51 | 'Images': 52 | '\n\t' + 53 | 'id SERIAL PRIMARY KEY, \n\t' + 54 | 'sha1 TEXT UNIQUE, \n\t' + 55 | 'hash bytea, \n\t' + 56 | 'width INTEGER, \n\t' + 57 | 'height INTEGER, \n\t' + 58 | 'bytes INTEGER', 59 | 60 | 'videos': 61 | '\n\t' + 62 | 'id SERIAL PRIMARY KEY, \n\t' + 63 | 'sha1 TEXT UNIQUE, \n\t' + 64 | 'width INTEGER, \n\t' + 65 | 'height INTEGER, \n\t' + 66 | 'bitrate INTEGER, \n\t' + 67 | 'codec TEXT, \n\t' + 68 | 'format TEXT, \n\t' + 69 | 'duration INTEGER, \n\t' + 70 | 'frames INTEGER, \n\t' + 71 | 'bytes INTEGER', 72 | 73 | 'videoframes': 74 | 'id SERIAL PRIMARY KEY, \n\t' + 75 | 'hash bytea NOT NULL, \n\t' + 76 | 'videoid INTEGER NOT NULL, \n\t' + 77 | 'FOREIGN KEY(videoid) REFERENCES videos(id)', 78 | 79 | 'videourls': 80 | '\n\t' + 81 | 'id SERIAL PRIMARY KEY, \n\t' + 82 | 'url TEXT, \n\t' + 83 | 'clean_url TEXT, \n\t' + 84 | 'videoid INTEGER NOT NULL, \n\t' + 85 | 'postid INTEGER, \n\t' + 86 | 'commentid INTEGER, \n\t' + 87 | 'FOREIGN KEY(videoid) REFERENCES videos(id), \n\t' + 88 | 'FOREIGN KEY(postid) REFERENCES posts(id), \n\t' + 89 | 'FOREIGN KEY(commentid) REFERENCES comments(id)', 90 | 91 | 'Albums': 92 | '\n\t' + 93 | 'id SERIAL PRIMARY KEY, \n\t' + 94 | 'url TEXT UNIQUE', 95 | 96 | 'ImageURLs': 97 | '\n\t' + 98 | 'id SERIAL PRIMARY KEY, \n\t' + 99 | 'url TEXT, \n\t' + 100 | 'clean_url TEXT, \n\t' 101 | 'imageid INTEGER NOT NULL, \n\t' + 102 | 'albumid INTEGER, \n\t' + 103 | 'postid INTEGER, \n\t' + 104 | 'commentid INTEGER, \n\t' + 105 | 'FOREIGN KEY(imageid) REFERENCES images(id), \n\t' + 106 | 'FOREIGN KEY(postid) REFERENCES posts(id), \n\t' + 107 | 'FOREIGN KEY(commentid) REFERENCES comments(id), \n\t' + 108 | 'FOREIGN KEY(albumid) REFERENCES albums(id)', 109 | 110 | } 111 | 112 | 113 | class Consumer: 114 | 115 | def __init__(self): 116 | self.db = DB(DBFILE, **SCHEMA) 117 | self.web = Httpy() 118 | self._rabbitmq = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) 119 | self._rabbitmq_channel = self._rabbitmq.channel() 120 | self._rabbitmq_channel.exchange_declare(exchange='reddit', exchange_type='topic') 121 | self._rabbitmq_queue = self._rabbitmq_channel.queue_declare('', exclusive=True) 122 | self._q = Queue() 123 | 124 | def run(self): 125 | for sub in load_list("subs.txt"): 126 | self._rabbitmq_channel.queue_bind(exchange='reddit', 127 | queue=self._rabbitmq_queue.method.queue, 128 | routing_key="*.%s" % sub) 129 | 130 | def msg_callback(ch, method, properties, body): 131 | self._q.put(body) 132 | 133 | self._rabbitmq_channel.basic_consume(queue=self._rabbitmq_queue.method.queue, 134 | on_message_callback=msg_callback, 135 | auto_ack=True) 136 | for _ in range(0, 30): 137 | t = Thread(target=self._message_callback_worker) 138 | t.start() 139 | self._rabbitmq_channel.start_consuming() 140 | 141 | def _message_callback(self, body, web): 142 | j = json.loads(body) 143 | for item in j: 144 | item["urls"] = item["_urls"] 145 | 146 | if "title" in item: 147 | self.parse_post(Post(*(item[k] for k in POST_FIELDS)), web) 148 | else: 149 | self.parse_comment(Comment(*(item[k] for k in COMMENT_FIELDS)), web) 150 | 151 | def _message_callback_worker(self): 152 | logger.info("Started message callback worker") 153 | web = Httpy() 154 | while True: 155 | try: 156 | body = self._q.get() 157 | self._message_callback(body, web) 158 | except Exception as e: 159 | logger.error(e) 160 | finally: 161 | self._q.task_done() 162 | 163 | def parse_post(self, post, web): 164 | # Add post to database 165 | postid_db = self.db.insert_post(post.id, post.title, post.url, post.selftext, 166 | post.author, post.permalink, post.subreddit, post.num_comments, 167 | post.ups, post.downs, post.score, 168 | int(post.created_utc), post.is_self, post.over_18) 169 | 170 | if postid_db is None: 171 | logger.debug('Ignoring post (already indexed)') 172 | return False 173 | 174 | for url in post.urls: 175 | self.parse_url(url, web, postid=postid_db) 176 | 177 | def parse_comment(self, comment, web): 178 | 179 | if comment.urls: 180 | postid = self.db.get_postid_from_hexid(comment.link_id[3:]) 181 | if not postid: 182 | return 183 | comid_db = self.db.insert_comment(postid, comment.id, comment.author, 184 | comment.body, comment.ups, comment.downs, comment.created_utc) 185 | for url in comment.urls: 186 | self.parse_url(url, web, postid=postid, commentid=comid_db) 187 | 188 | def parse_url(self, url, web, postid=None, commentid=None): 189 | """ Gets image hash(es) from URL, populates database """ 190 | 191 | if is_image_direct_link(url): 192 | self.parse_image(url, web, postid=postid, commentid=commentid, albumid=None) 193 | return True 194 | 195 | if is_video(url): 196 | self.parse_video(url, web, postid=postid, commentid=commentid) 197 | return True 198 | 199 | if "v.redd.it" in url: 200 | logger.debug("Using youtube-dl to get reddit video url") 201 | ytdl = YoutubeDL() 202 | info = ytdl.extract_info(url, download=False, process=False) 203 | 204 | best = max(info["formats"], key=lambda x: x["width"] if "width" in x and x["width"] else 0) 205 | self.parse_video(best["url"], web, postid=postid, commentid=commentid) 206 | return 207 | 208 | if not should_parse_link(url): 209 | return 210 | 211 | image_urls = get_image_urls(url) 212 | 213 | # We assume that any url that yields more than 1 image is an album 214 | albumid = None 215 | if len(image_urls) > 1: 216 | albumid = self.db.get_or_create_album(url) # TODO: fix url len thing 217 | 218 | for image_url in image_urls: 219 | if is_image_direct_link(image_url): 220 | self.parse_image(image_url, web, postid=postid, commentid=commentid, albumid=albumid) 221 | elif is_video(image_url): 222 | self.parse_video(image_url, web, postid=postid, commentid=commentid) 223 | return True 224 | 225 | def parse_image(self, url, web, postid=None, commentid=None, albumid=None): 226 | existing_by_url = self.db.get_image_from_url(url) 227 | if existing_by_url: 228 | self.db.insert_imageurl(url=url, imageid=existing_by_url, postid=postid, commentid=commentid, 229 | albumid=albumid) 230 | return 231 | 232 | try: 233 | image_buffer = web.download(url) 234 | 235 | sha1 = get_sha1(image_buffer) 236 | existing_by_sha1 = self.db.get_image_from_sha1(sha1) 237 | if existing_by_sha1: 238 | self.db.insert_imageurl(url=url, imageid=existing_by_sha1, postid=postid, commentid=commentid, 239 | albumid=albumid) 240 | return 241 | 242 | im = image_from_buffer(image_buffer) 243 | imhash = get_hash(im) 244 | width, height = im.size 245 | size = len(image_buffer) 246 | 247 | imageid = self.db.insert_image(imhash, width, height, size, sha1) 248 | self.db.insert_imageurl(url, imageid=imageid, albumid=albumid, postid=postid, commentid=commentid) 249 | create_thumb(im, imageid) 250 | del im 251 | del image_buffer 252 | 253 | logger.info("(+) Image ID(%s) [%dx%s %dB] #%s" % 254 | ( 255 | imageid, width, height, size, 256 | binascii.hexlify(imhash).decode("ascii") 257 | )) 258 | except Exception as e: 259 | logger.error(e) 260 | 261 | def parse_video(self, url, web, postid=None, commentid=None): 262 | existing_by_url = self.db.get_video_from_url(url) 263 | if existing_by_url: 264 | self.db.insert_videourl(url=url, video_id=existing_by_url, postid=postid, commentid=commentid) 265 | return 266 | 267 | try: 268 | video_buffer = web.download(url) 269 | 270 | if not video_buffer: 271 | raise Exception("Download failed %s" % url) 272 | except Exception as e: 273 | logger.error(e) 274 | return 275 | 276 | sha1 = get_sha1(video_buffer) 277 | existing_by_sha1 = self.db.get_video_from_sha1(sha1) 278 | if existing_by_sha1: 279 | self.db.insert_videourl(url=url, video_id=existing_by_sha1, postid=postid, commentid=commentid) 280 | return 281 | 282 | frames, info = info_from_video_buffer(video_buffer, url[url.rfind(".") + 1:].replace("gifv", "mp4")) 283 | if not frames: 284 | logger.error("No frames " + url) 285 | return 286 | 287 | info = flatten_video_info(info) 288 | 289 | video_id = self.db.insert_video(sha1, size=len(video_buffer), info=info) 290 | self.db.insert_videourl(url, video_id, postid, commentid) 291 | 292 | frame_ids = self.db.insert_video_frames(video_id, frames) 293 | 294 | for i, thumb in enumerate(frames.values()): 295 | dirpath = thumb_path(frame_ids[i], "vid") 296 | os.makedirs(dirpath, exist_ok=True) 297 | thumb.save(os.path.join(dirpath, "%d.jpg" % frame_ids[i])) 298 | 299 | logger.info("(+) Video ID(%s) [%dx%s %dB] %d frames" % 300 | (video_id, info["width"], info["height"], 301 | len(video_buffer), len(frames))) 302 | 303 | 304 | if __name__ == '__main__': 305 | try: 306 | consumer = Consumer() 307 | consumer.run() 308 | 309 | except KeyboardInterrupt: 310 | logger.error('Interrupted (^C)') 311 | -------------------------------------------------------------------------------- /reddit.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | POST_FIELDS = [ 4 | "author", "created", "created_utc", "downs", 5 | "id", "is_self", "num_comments", "over_18", 6 | "score", "selftext", "subreddit", "ups", "title", 7 | "url", "permalink", "urls" 8 | ] 9 | Post = namedtuple("Post", POST_FIELDS) 10 | 11 | COMMENT_FIELDS = [ 12 | "author", "body", "created", "created_utc", 13 | "downs", "id", "permalink", "score", "subreddit", 14 | "ups", "link_id", "urls" 15 | ] 16 | Comment = namedtuple("Comment", COMMENT_FIELDS) 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow 2 | requests 3 | flask 4 | gallery-dl 5 | youtube_dl 6 | urllib3 7 | numpy 8 | imagehash 9 | gmpy2 10 | flask-caching 11 | redis 12 | psycopg2 13 | pika 14 | pycurl 15 | -------------------------------------------------------------------------------- /search.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | from os import path 5 | 6 | from flask import Blueprint, Response, request 7 | 8 | from DB import DB 9 | from Httpy import Httpy 10 | from common import DBFILE, cache 11 | from img_util import thumb_path, image_from_buffer, get_hash 12 | from util import clean_url, is_user_valid 13 | from video_util import info_from_video_buffer 14 | 15 | search_page = Blueprint('search', __name__, template_folder='templates') 16 | 17 | AlphaNum = re.compile(r'[\W_]+') 18 | MAX_DISTANCE = 30 19 | 20 | MAX_FRAME_COUNT = 30 21 | DEFAULT_FRAME_COUNT = 10 22 | 23 | db = DB(DBFILE) 24 | 25 | 26 | class SearchResults: 27 | __slots__ = "url", "hits", "error", "result_count" 28 | 29 | def __init__(self, hits, error=None, url=""): 30 | self.url = url 31 | self.hits = hits 32 | self.error = error 33 | self.result_count = len(hits) 34 | 35 | def json(self): 36 | 37 | return json.dumps({ 38 | "hits": [h.json() for h in self.hits], 39 | "error": self.error, 40 | "url": self.url, 41 | "result_count": self.result_count 42 | }) 43 | 44 | 45 | def build_results_for_images(images): 46 | results = db.build_result_for_images(images) 47 | 48 | return SearchResults(results) 49 | 50 | 51 | @search_page.route("/search") 52 | @cache.cached(timeout=3600 * 24, query_string=True) 53 | def search(): 54 | """ Searches for a single URL, prints results """ 55 | 56 | try: 57 | if "d" in request.args: 58 | try: 59 | distance = min(int(request.args["d"]), MAX_DISTANCE) 60 | except: 61 | distance = 0 62 | else: 63 | distance = 0 64 | 65 | if "f" in request.args: 66 | try: 67 | frame_count = max(min(int(request.args["f"]), MAX_FRAME_COUNT), 1) 68 | except: 69 | frame_count = DEFAULT_FRAME_COUNT 70 | else: 71 | frame_count = DEFAULT_FRAME_COUNT 72 | 73 | if "img" in request.args: 74 | return search_img_url(request.args["img"], distance) 75 | 76 | if "vid" in request.args: 77 | return search_vid_url(request.args["vid"], distance, frame_count) 78 | 79 | if "album" in request.args: 80 | return search_album(request.args["album"]) 81 | 82 | if "user" in request.args: 83 | return search_user(request.args["user"]) 84 | except Exception as e: 85 | return Response(json.dumps({'error': str(e)}), mimetype="application/json") 86 | 87 | return Response(json.dumps({'error': "Invalid query"}), mimetype="application/json") 88 | 89 | # if "reddit" in request.args: 90 | # return search_reddit(request.args["reddit"]) 91 | 92 | # if "text" in request.args: 93 | # return search_text(request.args["text"]) 94 | 95 | 96 | 97 | def search_vid_url(query, distance, frame_count): 98 | if ' ' in query: 99 | query = query.replace(' ', '%20') 100 | 101 | try: 102 | video_id = db.get_video_from_url(url=query) 103 | 104 | if not video_id: 105 | # Download video 106 | web = Httpy() 107 | video_buffer = web.download(url=query) 108 | if not video_buffer: 109 | raise Exception('unable to download video at %s' % query) 110 | 111 | try: 112 | frames, info = info_from_video_buffer(video_buffer, os.path.splitext(query)[1][1:]) 113 | except: 114 | raise Exception("Could not identify video") 115 | 116 | videos = db.get_similar_videos_by_hash(frames, distance, frame_count) 117 | 118 | else: 119 | 120 | hashes = db.get_video_hashes(video_id) 121 | videos = db.get_similar_videos_by_hash(hashes, distance, frame_count) 122 | 123 | results = SearchResults(db.build_results_for_videos(videos)) 124 | 125 | except Exception as e: 126 | return Response(json.dumps({'error': str(e)}), mimetype="application/json") 127 | 128 | return Response(results.json(), mimetype="application/json") 129 | 130 | 131 | def is_valid_url(url): 132 | if not url.startswith(("http://", "https://")): 133 | return False 134 | return True 135 | 136 | 137 | def search_img_url(query, distance): 138 | if ' ' in query: 139 | query = query.replace(' ', '%20') 140 | 141 | if not is_valid_url(query): 142 | raise Exception("Invalid query: '%s'" % query) 143 | 144 | try: 145 | hash = db.get_image_hash_from_url(url=query) 146 | 147 | if not hash: 148 | # Download image 149 | web = Httpy() 150 | try: 151 | image_buffer = web.download(url=query) 152 | except: 153 | raise Exception('unable to download image at %s' % query) 154 | 155 | try: 156 | im = image_from_buffer(image_buffer) 157 | hash = get_hash(im) 158 | except: 159 | raise Exception("Could not identify image") 160 | 161 | images = db.get_similar_images(hash, distance=distance) 162 | results = build_results_for_images(images) 163 | 164 | except Exception as e: 165 | return Response(json.dumps({'error': str(e)}), mimetype="application/json") 166 | 167 | return Response(results.json(), mimetype="application/json") 168 | 169 | 170 | # TODO update 171 | def search_reddit(reddit_id): 172 | """ Match on comment/post id""" 173 | images = db.get_images_from_reddit_id(reddit_id=reddit_id) 174 | comments, posts = db.build_result_for_images(images) 175 | 176 | return Response(json.dumps({ 177 | 'url': 'reddit:%s' % reddit_id, 178 | 'posts': posts, 179 | 'comments': comments 180 | }), mimetype="application/json") 181 | 182 | 183 | def search_user(user): 184 | """ Returns posts/comments by a reddit user """ 185 | if user.strip() == '' or not is_user_valid(user): 186 | raise Exception('invalid username') 187 | 188 | images = db.get_images_from_author(author=user) 189 | results = build_results_for_images(images) 190 | 191 | return Response(results.json(), mimetype="application/json") 192 | 193 | 194 | def search_album(url): 195 | """ 196 | Prints list of images inside of an album 197 | The images are stored in the database, so 404'd albums 198 | can be retrieved via this method (sometimes) 199 | """ 200 | try: 201 | url = clean_url(url) 202 | except Exception as e: 203 | return Response(json.dumps({"error": str(e)}), mimetype="application/json") 204 | images = [] 205 | image_tuples = db.get_images_from_album_url(album_url=url) 206 | 207 | for (urlid, imageurl, width, height) in image_tuples: 208 | image = { 209 | "thumb": path.join(thumb_path(urlid), '%d.jpg' % urlid), 210 | "url": imageurl, 211 | "width": width, 212 | "height": height, 213 | } 214 | images.append(image) 215 | 216 | return Response(json.dumps({ 217 | 'url': 'cache:%s' % url, 218 | 'images': images 219 | }), mimetype="application/json") 220 | 221 | 222 | # TODO update (FULLTEXT please!) 223 | def search_text(text): 224 | """ Prints posts/comments containing text in title/body. """ 225 | text = AlphaNum.sub('', text) 226 | images = db.get_images_from_text(text) 227 | 228 | comments, posts = db.build_result_for_images(images) 229 | 230 | return Response(json.dumps({ 231 | 'url': 'text:%s' % text, 232 | 'posts': posts, 233 | 'comments': comments 234 | }), mimetype="application/json") 235 | -------------------------------------------------------------------------------- /static/index.js: -------------------------------------------------------------------------------- 1 | let helpModal; 2 | 3 | //Markdown to html https://github.com/p01/mmd.js 4 | function mmd(s) { 5 | let h = ''; 6 | 7 | function E(s) { 8 | return new Option(s).innerHTML 9 | } 10 | 11 | function I(s) { 12 | return E(s).replace(/!\[([^\]]*)]\(([^(]+)\)/g, '$1').replace(/\[([^\]]+)]\(([^(]+)\)/g, '$1'.link('$2')).replace(/`([^`]+)`/g, '$1').replace(/\*\*([^*]+)\*\*/g, '$1').replace(/\*([^*]+)\*/g, '$1') 13 | } 14 | 15 | s.replace(/^\s+|\r|\s+$/g, '').replace(/\t/g, ' ').split(/\n\n+/).forEach(function (b, f, R) { 16 | R = { 17 | '*': [/\n\* /, '
  • ', '
'], 18 | 1: [/\n[1-9]\d*\.? /, '
  1. ', '
'], 19 | ' ': [/\n /, '
', '
', '\n'], 20 | '>': [/\n> /, '
', '
', '\n'] 21 | }[f = b[0]]; 22 | h += R ? R[1] + ('\n' + b).split(R[0]).slice(1).map(R[3] ? E : I).join(R[3] || '\n
  • ') + R[2] : f === '#' ? '' + I(b.slice(f + 1)) + '' : f === '<' ? b : '

    ' + I(b) + '

    ' 23 | }); 24 | let p = document.createElement('p'); 25 | p.innerHTML = h; 26 | return p 27 | } 28 | 29 | function getImageBlob(event) { 30 | const items = (event.clipboardData || event.originalEvent.clipboardData).items; 31 | for (let i in items) { 32 | const item = items[i]; 33 | if (item.kind === 'file') { 34 | return item.getAsFile(); 35 | } 36 | } 37 | } 38 | 39 | function uploadBlob(blob) { 40 | const reader = new FileReader(); 41 | reader.onload = function (event) { 42 | 43 | clearResults(); 44 | const results_el = gebi('output'); 45 | const pl = mkPreloader(); 46 | results_el.appendChild(pl); 47 | 48 | const form = new FormData(); 49 | form.append('fname', 'image'); 50 | form.append('data', event.target.result); 51 | 52 | const request = new XMLHttpRequest(); 53 | request.open("POST", 'upload', true); 54 | request.send(form); 55 | request.onreadystatechange = function () { 56 | if (request.readyState === 4) { 57 | if (request.status === 200) { 58 | const json = JSON.parse(request.responseText); 59 | gebi("search").value = json.url; 60 | handleSearchResponse(request.responseText); 61 | pl.remove(); 62 | } else { 63 | console.log(request.responseText) 64 | } 65 | } 66 | }; 67 | }; 68 | reader.readAsDataURL(blob); 69 | } 70 | 71 | 72 | window.onload = function () { 73 | M.Modal.init(document.querySelectorAll(".modal"), {}); 74 | M.Tabs.init(document.getElementById("rri_menu"), {}); 75 | M.Tabs.init(document.getElementById("search-menu"), {}); 76 | helpModal = M.Modal.getInstance(document.getElementById("help")); 77 | get_subreddits(); 78 | get_status(); 79 | gebi("search").addEventListener("paste", function (e) { 80 | const blob = getImageBlob(e); 81 | if (blob) { 82 | uploadBlob(blob) 83 | } 84 | }, false); 85 | }; 86 | 87 | 88 | function gebi(id) { 89 | return document.getElementById(id); 90 | } 91 | 92 | function get_subreddits() { 93 | const request = new XMLHttpRequest(); 94 | request.open("GET", 'subreddits', true); 95 | request.send(null); 96 | request.onreadystatechange = function () { 97 | if (request.readyState === 4) { 98 | if (request.status === 200) { 99 | gebi('subreddit_err').classList.remove("active"); 100 | 101 | const json = JSON.parse(request.responseText); 102 | 103 | if (json['error'] != null) { 104 | gebi('subreddits').innerText = 'error: ' + json["error"]; 105 | return; 106 | } 107 | 108 | const subreddits = json['subreddits']; 109 | let output = '
    Monitoring ' + subreddits.length + ' subreddits
    '; 110 | for (let i in subreddits) { 111 | output += '' + 112 | '' + subreddits[i] + ' '; 114 | } 115 | gebi('subreddits').innerHTML = output; 116 | 117 | } else { 118 | gebi('subreddit_err').innerText = "Error: " + request.status; 119 | gebi('subreddit_err').classList.add("active") 120 | } 121 | } 122 | } 123 | } 124 | 125 | // Add commas to the thousands places in a number 126 | function number_commas(x) { 127 | return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ","); 128 | } 129 | 130 | function get_status() { 131 | const request = new XMLHttpRequest(); 132 | request.open("GET", 'status', true); 133 | request.send(null); 134 | request.onreadystatechange = function () { 135 | if (request.readyState === 4) { 136 | if (request.status === 200) { 137 | const resp = JSON.parse(request.responseText)["status"]; 138 | gebi("db_images").innerText = number_commas(resp['images']); 139 | gebi("db_posts").innerText = number_commas(resp['posts']); 140 | gebi("db_videos").innerText = number_commas(resp['videos']); 141 | gebi("db_comments").innerText = number_commas(resp['comments']); 142 | gebi("db_albums").innerText = number_commas(resp['albums']); 143 | gebi("db_subreddits").innerText = number_commas(resp['subreddits']); 144 | } 145 | } 146 | } 147 | } 148 | 149 | function research(q) { 150 | gebi("search").value = q; 151 | query(); 152 | } 153 | 154 | function clearResults() { 155 | const results_el = gebi('output'); 156 | while (results_el.hasChildNodes()) { 157 | results_el.removeChild(results_el.lastChild); 158 | } 159 | } 160 | 161 | function query() { 162 | clearResults(); 163 | 164 | const q = gebi("search").value; 165 | let params = {}; 166 | 167 | q.split(/\s+/).forEach(tok => { 168 | params[tok.match(/^(?!https?)(\w+):/) ? tok.match(/^(?!https?)^(\w+):/)[1] : "image"] 169 | = tok.substring(tok.match(/^(?!https?)(\w+):/) ? tok.match(/^\w+:/)[0].length : 0) 170 | }) 171 | 172 | params["d"] = params["d"] || 0 173 | let queryString = "search?d=" + params["d"] 174 | 175 | if (params["image"]) { 176 | queryString += '&img=' + params["image"] 177 | } 178 | if (params["frames"]) { 179 | queryString += '&f=' + params["frames"] 180 | } 181 | if (params["video"]) { 182 | queryString += '&vid=' + params["video"] 183 | } 184 | if (params["album"]) { 185 | queryString += '&album=' + params["album"] 186 | } 187 | if (params["user"]) { 188 | queryString += '&user=' + params["user"] 189 | } 190 | 191 | //TODO: sha1 etc 192 | 193 | const results_el = gebi('output'); 194 | const pl = mkPreloader(); 195 | results_el.appendChild(pl); 196 | 197 | const request = new XMLHttpRequest(); 198 | request.open("GET", queryString, true); 199 | request.send(null); 200 | request.onreadystatechange = function () { 201 | if (request.readyState === 4) { 202 | if (request.status === 200) { 203 | pl.remove(); 204 | handleSearchResponse(request.responseText) 205 | } else if (request.status === 504) { 206 | pl.remove(); 207 | results_el.appendChild(mkErrorMsg(`Query timed out, try again in a few minutes.`)); 208 | } 209 | } 210 | }; 211 | 212 | // Don't refresh page on submit 213 | return false; 214 | } 215 | 216 | function handleSearchResponse(responseText) { 217 | 218 | const results_el = gebi('output'); 219 | const resp = JSON.parse(responseText); 220 | 221 | if (resp['error'] != null) { 222 | results_el.appendChild(mkErrorMsg(`Error: ${resp['error']}`)); 223 | return; 224 | } 225 | 226 | //TODO: no! 227 | if (resp['images']) { 228 | results_el.appendChild(mkGallery(resp['images'])); 229 | return 230 | } 231 | 232 | if (resp.result_count === 0) { 233 | results_el.appendChild(mkErrorMsg('No results')); 234 | return; 235 | } 236 | 237 | results_el.appendChild(mkHeader(`${resp.result_count} item${resp.result_count === 1 ? '' : 's'}`)); 238 | for (let i in resp['hits']) { 239 | if (resp['hits'][i]['type'] === 'comment') { 240 | results_el.appendChild(mkComment(resp['hits'][i])); 241 | } else { 242 | results_el.appendChild(mkPost(resp['hits'][i])); 243 | } 244 | } 245 | } 246 | 247 | function get_time(seconds) { 248 | const d = { 249 | 'second': 60, 250 | 'minute': 60, 251 | 'hour': 24, 252 | 'day': 30, 253 | 'month': 12, 254 | 'year': 1000 255 | }; 256 | for (let key in d) { 257 | if (seconds <= d[key]) { 258 | seconds = seconds.toFixed(0); 259 | let result = seconds + ' '; 260 | result += key; 261 | if (seconds !== "1") 262 | result += 's'; 263 | return result; 264 | } 265 | seconds /= d[key]; 266 | } 267 | return '?'; 268 | } 269 | 270 | function get_time_diff(seconds) { 271 | return get_time(Math.round(new Date().getTime() / 1000) - seconds); 272 | } 273 | 274 | function bytes_to_readable(bytes) { 275 | const scale = ['B', 'KB', 'MB']; 276 | for (let i = scale.length - 1; i >= 0; i--) { 277 | const cur = Math.pow(1024, i); 278 | if (cur < bytes) { 279 | return (bytes / cur).toFixed(1) + scale[i]; 280 | } 281 | } 282 | return '?bytes'; 283 | } 284 | 285 | function get_video_thumbs(videoId, cb) { 286 | 287 | const request = new XMLHttpRequest(); 288 | request.open("GET", "/video_thumbs/" + videoId, true); 289 | request.send(null); 290 | request.onreadystatechange = function () { 291 | if (request.readyState === 4) { 292 | if (request.status === 200) { 293 | cb( 294 | JSON.parse(request.responseText)["thumbs"] 295 | .map(t => t.toString()) 296 | .map(tn => { 297 | return "/static/thumbs/vid/" + tn[0] 298 | + "/" + (tn.length > 1 ? tn[1] : "0") 299 | + "/" + tn + ".jpg"; 300 | }) 301 | ) 302 | } 303 | } 304 | }; 305 | } 306 | 307 | function bits_to_readable(bytes) { 308 | const scale = ['b', 'Kb', 'Mb']; 309 | for (let i = scale.length - 1; i >= 0; i--) { 310 | const cur = Math.pow(1024, i); 311 | if (cur < bytes) { 312 | return (bytes / cur).toFixed(1) + scale[i]; 313 | } 314 | } 315 | return '?bits'; 316 | } 317 | 318 | function mkHeader(text) { 319 | const el = document.createElement('h5'); 320 | el.setAttribute('class', 'white-text'); 321 | el.appendChild(document.createTextNode(text)); 322 | return el; 323 | } 324 | 325 | function mkErrorMsg(text) { 326 | const el = document.createElement('h5'); 327 | el.setAttribute('class', 'white-text'); 328 | el.appendChild(document.createTextNode(text)); 329 | 330 | const helpLink = document.createElement("a") 331 | helpLink.setAttribute("class", "modal-trigger help-link") 332 | helpLink.setAttribute("href", "#help") 333 | helpLink.appendChild(document.createTextNode("(Help!)")) 334 | el.appendChild(helpLink); 335 | 336 | return el; 337 | } 338 | 339 | function mkExtSearchLinks(url) { 340 | const el = document.createElement('div'); 341 | el.setAttribute('class', 'card-action'); 342 | el.appendChild(mkLink(`https://images.google.com/searchbyimage?image_url=${url}`, 'Google')); 343 | el.appendChild(mkLink(`https://www.tineye.com/search?pluginver=bookmark_1.0&url=${url}`, 'TinEye')); 344 | el.appendChild(mkLink(`https://www.karmadecay.com/${url.replace('http:', '').replace('https:', '')}`, 'KarmaDecay')); 345 | return el; 346 | } 347 | 348 | function mkExtSearchLinksMobile(url) { 349 | const el = document.createElement('div'); 350 | el.setAttribute('class', 'reverse_links'); 351 | el.appendChild(mkButton(`https://images.google.com/searchbyimage?image_url=${url}`, 'Google')); 352 | el.appendChild(mkButton(`https://www.tineye.com/search?pluginver=bookmark_1.0&url=${url}`, 'TinEye')); 353 | el.appendChild(mkButton(`https://www.karmadecay.com/${url.replace('http:', '').replace('https:', '')}`, 'KarmaDecay')); 354 | return el; 355 | } 356 | 357 | function mkPost(post) { 358 | 359 | const card = document.createElement('div'); 360 | card.setAttribute('class', 'card horizontal post'); 361 | const cardStacked = document.createElement('div'); 362 | cardStacked.setAttribute('class', 'card-stacked'); 363 | const cardContent = document.createElement('div'); 364 | cardContent.setAttribute('class', 'card-content'); 365 | const cardTitle = document.createElement('span'); 366 | cardTitle.setAttribute('class', 'card-title'); 367 | cardTitle.appendChild(document.createTextNode(post.title)); 368 | cardContent.appendChild(cardTitle); 369 | 370 | const cardItemWrapper = document.createElement('div'); 371 | cardItemWrapper.setAttribute('class', 'card-image img_wrapper'); 372 | 373 | let cardItem; 374 | if (post.item.type === 'image') { 375 | cardItem = document.createElement('img'); 376 | cardItem.setAttribute('src', post.item.thumb); 377 | } else { 378 | cardItem = makeSlideShow(post.item.video_id, post.item.duration); 379 | } 380 | 381 | const contentWrapper = document.createElement('div'); 382 | contentWrapper.setAttribute('class', 'row'); 383 | 384 | const right = document.createElement('div'); 385 | right.setAttribute('class', 'col s10 l11'); 386 | 387 | const left = document.createElement('div'); 388 | left.setAttribute('class', 'col s2 l1'); 389 | left.setAttribute('style', 'padding: 0'); 390 | left.appendChild(mkUpboat(post.ups - post.downs)); 391 | 392 | const info = document.createElement('p'); 393 | info.appendChild(document.createTextNode('Submitted ')); 394 | info.appendChild(mkBold(get_time_diff(post.created), new Date(post.created * 1000).toUTCString())); 395 | info.appendChild(document.createTextNode(' ago by ')); 396 | if (post.author.toLowerCase() !== "[deleted]") { 397 | info.appendChild(mkCallback(function () { 398 | research("user:" + post.author) 399 | }, post.author, 'Search this user')); 400 | } else { 401 | info.appendChild(document.createTextNode(post.author)); 402 | } 403 | info.appendChild(document.createTextNode(' to ')); 404 | info.appendChild(mkLink('http://www.reddit.com/r/' + post.subreddit, post.subreddit)); 405 | right.appendChild(info); 406 | 407 | right.appendChild(mkLink('http://www.reddit.com/' + post.permalink, 408 | (post.comments === 1 ? "1 comment" : `${post.comments} comments`) 409 | )); 410 | 411 | right.appendChild(document.createTextNode(' ')); 412 | right.appendChild(mkLink(post.item.url, 413 | post.item.type === 'image' 414 | ? ` (⛶ ${post.item.width}x${post.item.height} ${bytes_to_readable(post.item.size)})` 415 | : ` (▷ ${post.item.height}p ${bits_to_readable(post.item.bitrate)}/s ${get_time(post.item.duration)} ${bytes_to_readable(post.item.size)})` 416 | )); 417 | 418 | 419 | card.appendChild(cardItemWrapper); 420 | card.appendChild(cardStacked); 421 | 422 | cardItemWrapper.appendChild(cardItem); 423 | cardStacked.appendChild(cardContent); 424 | cardContent.appendChild(contentWrapper); 425 | contentWrapper.appendChild(left); 426 | contentWrapper.appendChild(right); 427 | 428 | let links = mkExtSearchLinks(post.item.url); 429 | let mobileLinks = mkExtSearchLinksMobile(post.item.url); 430 | 431 | if (post.item.album_url !== null) { 432 | links.appendChild(mkCallback( 433 | () => research("album:" + post.item.album_url), 434 | "album", post.item.album_url, 435 | )); 436 | mobileLinks.appendChild(mkCallback( 437 | () => research("album:" + post.item.album_url), 438 | "album", post.item.album_url, 439 | true 440 | )); 441 | } 442 | 443 | card.appendChild(links); 444 | contentWrapper.appendChild(mobileLinks); 445 | 446 | return card; 447 | } 448 | 449 | function mkComment(comment) { 450 | 451 | const card = document.createElement('div'); 452 | card.setAttribute('class', 'card horizontal comment'); 453 | const cardContent = document.createElement('div'); 454 | cardContent.setAttribute('class', 'card-content'); 455 | 456 | const cardItemWrapper = document.createElement('div'); 457 | cardItemWrapper.setAttribute('class', 'card-image img_wrapper'); 458 | 459 | let cardItem; 460 | if (comment.item.type === 'image') { 461 | cardItem = document.createElement('img'); 462 | cardItem.setAttribute('src', comment.item.thumb); 463 | } else { 464 | cardItem = makeSlideShow(comment.item.video_id, comment.item.duration); 465 | } 466 | 467 | const contentWrapper = document.createElement('div'); 468 | contentWrapper.setAttribute('class', 'row'); 469 | 470 | const right = document.createElement('div'); 471 | right.setAttribute('class', 'col s10 m10 l11'); 472 | 473 | const left = document.createElement('div'); 474 | left.setAttribute('class', 'col s2 m2 l1'); 475 | left.appendChild(mkUpboat(comment.ups - comment.downs)); 476 | 477 | const info = document.createElement('p'); 478 | info.appendChild(document.createTextNode('Commented ')); 479 | info.appendChild(mkBold(get_time_diff(comment.created), new Date(comment.created * 1000).toUTCString())); 480 | info.appendChild(document.createTextNode(' ago by ')); 481 | if (comment.author.toLowerCase() !== "[deleted]") { 482 | info.appendChild(mkCallback(function () { 483 | research("user:" + comment.author) 484 | }, comment.author, 'Search this user')); 485 | } else { 486 | info.appendChild(document.createTextNode(comment.author)); 487 | } 488 | info.appendChild(mmd(comment.body)); 489 | right.appendChild(info); 490 | 491 | if (comment.item) { 492 | right.appendChild(mkLink(comment.item.url, 493 | comment.item.type === 'image' 494 | ? ` (⛶ ${comment.item.width}x${comment.item.height} ${bytes_to_readable(comment.item.size)})` 495 | : ` (▷ ${comment.item.height}p ${bits_to_readable(comment.item.bitrate)}/s ${get_time(comment.item.duration)} ${bytes_to_readable(comment.item.size)})` 496 | )); 497 | } 498 | 499 | card.appendChild(cardItemWrapper); 500 | card.appendChild(cardContent); 501 | cardContent.appendChild(contentWrapper); 502 | cardItemWrapper.appendChild(cardItem); 503 | contentWrapper.appendChild(left); 504 | contentWrapper.appendChild(right); 505 | 506 | let links = mkExtSearchLinks(comment.item.url); 507 | let mobileLinks = mkExtSearchLinksMobile(comment.item.url); 508 | 509 | if (comment.item.album_url !== null) { 510 | links.appendChild(mkCallback( 511 | () => research("album:" + comment.item.album_url), 512 | "album", comment.item.album_url, 513 | )); 514 | mobileLinks.appendChild(mkCallback( 515 | () => research("album:" + comment.item.album_url), 516 | "album", comment.item.album_url, 517 | true 518 | )); 519 | } 520 | card.appendChild(links); 521 | contentWrapper.appendChild(mobileLinks); 522 | 523 | return card; 524 | } 525 | 526 | function mkBold(text, title = "") { 527 | const el = document.createElement('span'); 528 | el.setAttribute('class', 'bold'); 529 | el.setAttribute('title', title); 530 | el.appendChild(document.createTextNode(text)); 531 | return el 532 | } 533 | 534 | function mkLink(href, text, target = "_blank") { 535 | const el = document.createElement('a'); 536 | el.setAttribute('href', href); 537 | el.setAttribute('target', target); 538 | el.appendChild(document.createTextNode(text)); 539 | return el 540 | } 541 | 542 | function mkButton(href, text, target = "_blank") { 543 | const el = document.createElement('a'); 544 | el.setAttribute('href', href); 545 | el.setAttribute('class', 'waves-effect waves-light btn'); 546 | el.setAttribute('target', target); 547 | el.appendChild(document.createTextNode(text)); 548 | return el 549 | } 550 | 551 | function mkCallback(callback, text, title = '', button) { 552 | const el = document.createElement("a"); 553 | el.addEventListener('click', callback); 554 | if (button) { 555 | el.setAttribute('class', 'callback_btn waves-effect waves-light btn'); 556 | } else { 557 | el.setAttribute('class', 'callback_btn'); 558 | } 559 | el.setAttribute('title', title); 560 | el.appendChild(document.createTextNode(text)); 561 | return el 562 | } 563 | 564 | function mkPreloader() { 565 | const el = document.createElement('div'); 566 | el.setAttribute('class', 'progress'); 567 | const indeterminate = document.createElement('div'); 568 | indeterminate.setAttribute('class', 'indeterminate'); 569 | el.appendChild(indeterminate); 570 | return el; 571 | } 572 | 573 | function mkUpboat(count) { 574 | const el = document.createElement('div'); 575 | el.setAttribute('class', 'upboat'); 576 | const up = document.createElement('span'); 577 | up.setAttribute('class', 'up'); 578 | up.appendChild(document.createTextNode('▲')); 579 | const votes = document.createElement('span'); 580 | votes.setAttribute('class', 'votes'); 581 | votes.appendChild(document.createTextNode(count)); 582 | const down = document.createElement('span'); 583 | down.setAttribute('class', 'down'); 584 | down.appendChild(document.createTextNode('▼')); 585 | el.appendChild(up); 586 | el.appendChild(votes); 587 | el.appendChild(down); 588 | return el; 589 | } 590 | 591 | //TODO: set rows based on screen width 592 | function mkGallery(images, rows = 3) { 593 | const gallery = document.createElement('div'); 594 | gallery.setAttribute('class', 'row'); 595 | 596 | let cols = []; 597 | let colHeights = []; 598 | for (let i = 0; i < rows; i++) { 599 | let col = document.createElement('div'); 600 | col.setAttribute('class', `col s${12 / rows}`); 601 | gallery.appendChild(col); 602 | cols.push(col); 603 | colHeights.push(0); 604 | } 605 | 606 | for (let im in images) { 607 | 608 | let minHeight = Number.MAX_VALUE; 609 | let min = 0; 610 | 611 | for (let i = 0; i < cols.length; i++) { 612 | if (colHeights[i] < minHeight) { 613 | minHeight = colHeights[i]; 614 | min = i; 615 | } 616 | } 617 | 618 | const img = document.createElement('img'); 619 | img.setAttribute('src', images[im].thumb); 620 | cols[min].appendChild(img); 621 | colHeights[min] += height(images[im]); 622 | } 623 | 624 | return gallery; 625 | } 626 | 627 | // Quick hack to estimate img height in a col 628 | function height(im) { 629 | 630 | const TNSIZE = 404.167; 631 | 632 | if (im.width > im.height) { 633 | return TNSIZE * (im.width / im.height) 634 | } else { 635 | return TNSIZE * (im.height / im.width) 636 | } 637 | } 638 | 639 | function makeSlideShow(videoId, duration) { 640 | 641 | const el = document.createElement('div'); 642 | el.setAttribute("class", "slideshow"); 643 | 644 | get_video_thumbs(videoId, function (images) { 645 | 646 | for (let i = 0; i < images.length; i++) { 647 | const img = document.createElement("img"); 648 | if (i === 0) { 649 | img.setAttribute("class", "gallery-item showcase-img"); 650 | } else { 651 | img.setAttribute("class", "gallery-item"); 652 | } 653 | img.setAttribute("src", images[i]); 654 | el.appendChild(img); 655 | } 656 | 657 | let imageCounter = images.length; 658 | let timer = undefined 659 | 660 | el.onmouseenter = function () { 661 | timer = window.setInterval(function () { 662 | const images = el.querySelectorAll(".gallery-item"); 663 | let newIndex = imageCounter % images.length; 664 | let lastIndex = 0; 665 | newIndex === 0 ? lastIndex = images.length - 1 : lastIndex = newIndex - 1; 666 | images[newIndex].classList.add("showcase-img"); 667 | images[lastIndex].classList.remove("showcase-img"); 668 | 669 | imageCounter += 1; 670 | }, duration / images.length * 800) 671 | } 672 | 673 | el.onmouseleave = function () { 674 | if (timer) { 675 | window.clearInterval(timer) 676 | timer = undefined 677 | } 678 | } 679 | }) 680 | 681 | return el; 682 | } 683 | -------------------------------------------------------------------------------- /static/main.css: -------------------------------------------------------------------------------- 1 | .header { 2 | text-align: center; 3 | outline: none; 4 | } 5 | 6 | .bold { 7 | font-weight: bold; 8 | } 9 | 10 | body { 11 | display: flex; 12 | min-height: 100vh; 13 | flex-direction: column; 14 | background: #212121; 15 | } 16 | 17 | main { 18 | flex: 1 0 auto; 19 | } 20 | 21 | #subreddit_header { 22 | padding-top: 15px; 23 | padding-bottom: 10px; 24 | font-size: 1.5em; 25 | font-weight: bold; 26 | } 27 | 28 | #subreddits { 29 | text-align: center; 30 | font-size: 0.8em; 31 | padding: 10px; 32 | } 33 | 34 | .subreddit { 35 | display: inline; 36 | padding-right: 15px; 37 | line-height: 200%; 38 | } 39 | 40 | .callback_btn { 41 | cursor: pointer; 42 | } 43 | 44 | .progress { 45 | margin: 3em 0; 46 | } 47 | 48 | .up { 49 | display: block; 50 | color: #039be5; 51 | } 52 | 53 | .votes { 54 | display: block; 55 | } 56 | 57 | .down { 58 | display: block; 59 | color: orangered; 60 | } 61 | 62 | .upboat { 63 | text-align: center; 64 | } 65 | 66 | /*HEADER*/ 67 | a.nsfw { 68 | text-align: center; 69 | font-size: 2.5rem; 70 | letter-spacing: -0.02em; 71 | font-weight: bold !important; 72 | } 73 | 74 | .sfw_blue { 75 | color: #3D5AFE; 76 | letter-spacing: -0.05em; 77 | padding: 0; 78 | text-shadow: 4px 0 10px rgba(63, 81, 181, 0.4), 79 | 0 4px 10px rgba(63, 81, 181, 0.4), 80 | -4px 0 10px rgba(63, 81, 181, 0.4), 81 | 0 -4px 10px rgba(63, 81, 181, 0.4); 82 | } 83 | 84 | .nsfw_red { 85 | color: #b00; 86 | letter-spacing: -0.05em; 87 | padding: 0; 88 | text-shadow: 4px 0 10px rgba(200, 0, 0, 0.3), 89 | 0 4px 10px rgba(200, 0, 0, 0.3), 90 | -4px 0 10px rgba(200, 0, 0, 0.3), 91 | 0 -4px 10px rgba(200, 0, 0, 0.3); 92 | } 93 | 94 | a.nsfw:hover .nsfw_red { 95 | color: #f00; 96 | text-shadow: 4px 0 10px rgba(255, 0, 0, 0.3), 97 | 0 4px 10px rgba(255, 0, 0, 0.3), 98 | -4px 0 10px rgba(255, 0, 0, 0.3), 99 | 0 -4px 10px rgba(255, 0, 0, 0.3); 100 | } 101 | 102 | .nsfw_bracket { 103 | color: #b00; 104 | font-size: 1.2em; 105 | letter-spacing: -0.15em; 106 | position: relative; 107 | text-shadow: 4px 0 10px rgba(200, 0, 0, 0.3), 108 | 0 4px 10px rgba(200, 0, 0, 0.3), 109 | -4px 0 10px rgba(200, 0, 0, 0.3), 110 | 0 -4px 10px rgba(200, 0, 0, 0.3); 111 | } 112 | 113 | .sfw_bracket { 114 | color: #3D5AFE; 115 | font-size: 1.2em; 116 | letter-spacing: -0.15em; 117 | position: relative; 118 | text-shadow: 4px 0 10px rgba(63, 81, 181, 0.4), 119 | 0 4px 10px rgba(63, 81, 181, 0.4), 120 | -4px 0 10px rgba(63, 81, 181, 0.4), 121 | 0 -4px 10px rgba(63, 81, 181, 0.4); 122 | } 123 | 124 | a.nsfw:hover .sfw_bracket { 125 | color: #2979FF; 126 | letter-spacing: -0.15em; 127 | text-shadow: 4px 0 10px rgba(68, 138, 255, 0.3), 128 | 0 4px 10px rgba(68, 138, 255, 0.3), 129 | -4px 0 10px rgba(68, 138, 255, 0.3), 130 | 0 -4px 10px rgba(68, 138, 255, 0.3); 131 | } 132 | 133 | a.nsfw:hover .sfw_blue { 134 | color: #2979FF; 135 | text-shadow: 4px 0 10px rgba(68, 138, 255, 0.3), 136 | 0 4px 10px rgba(68, 138, 255, 0.3), 137 | -4px 0 10px rgba(68, 138, 255, 0.3), 138 | 0 -4px 10px rgba(68, 138, 255, 0.3); 139 | } 140 | 141 | a.nsfw:hover .nsfw_bracket { 142 | color: #f00; 143 | letter-spacing: -0.15em; 144 | text-shadow: 4px 0 10px rgba(255, 0, 0, 0.3), 145 | 0 4px 10px rgba(255, 0, 0, 0.3), 146 | -4px 0 10px rgba(255, 0, 0, 0.3), 147 | 0 -4px 10px rgba(255, 0, 0, 0.3); 148 | } 149 | 150 | #nsfw_bracket_left { 151 | left: 5px; 152 | } 153 | 154 | #nsfw_bracket_right { 155 | left: -7px; 156 | } 157 | 158 | .nsfw_white { 159 | color: #ddd; 160 | position: relative; 161 | top: -2px; 162 | padding: 0; 163 | letter-spacing: -0.05em; 164 | } 165 | 166 | a.nsfw:hover .nsfw_white { 167 | color: #fff; 168 | } 169 | 170 | .card-panel .input-field { 171 | margin-bottom: 0; 172 | } 173 | 174 | footer a { 175 | color: #CFD8DC; 176 | } 177 | 178 | 179 | @media (min-width: 700px) and (max-width: 1500px) { 180 | .card.horizontal .card-action { 181 | display: none; 182 | } 183 | .card.horizontal .reverse_links { 184 | display: inherit; 185 | } 186 | } 187 | 188 | @media (max-width: 700px) { 189 | .card.horizontal { 190 | display: block; 191 | } 192 | .card.horizontal .card-image { 193 | max-width: inherit; 194 | width: 100%; 195 | } 196 | .card.horizontal .card-image img { 197 | width: inherit; 198 | } 199 | .reverse_links { 200 | display: none; 201 | } 202 | } 203 | 204 | @media (min-width: 1501px) { 205 | .reverse_links { 206 | display: none; 207 | } 208 | .card.horizontal .card-action { 209 | display: block; 210 | } 211 | } 212 | 213 | .reverse_links .btn { 214 | margin-right: 0.5em; 215 | margin-top: 0.5em; 216 | } 217 | 218 | .col img { 219 | width: 100%; 220 | } 221 | 222 | .page-footer.sfw_footer { 223 | background-color: #1E88E5; 224 | } 225 | 226 | .sfw_tabs.tabs .tab a { 227 | color: rgba(30, 136, 229, 0.7); 228 | } 229 | 230 | .sfw_tabs.tabs .tab a:focus, .sfw_tabs.tabs .tab a:focus:active { 231 | background-color: rgba(30, 136, 229, 0.2); 232 | } 233 | 234 | .sfw_tabs.tabs .indicator { 235 | background-color: #1E88E5; 236 | } 237 | 238 | /* img slideshow */ 239 | .slideshow { 240 | position: relative; 241 | overflow: hidden; 242 | width: 100%; 243 | } 244 | 245 | .gallery-item { 246 | position: absolute; 247 | width: 100%; 248 | display: none !important; 249 | } 250 | 251 | .gallery-item.showcase-img { 252 | display: inherit !important; 253 | } 254 | 255 | .sha1 { 256 | position: absolute; 257 | bottom: 1em; 258 | } 259 | code { 260 | color: #607D8B; 261 | } 262 | 263 | .push { 264 | margin-bottom: auto; 265 | } 266 | 267 | .help { 268 | font-size: 25px; 269 | font-weight: bold; 270 | } 271 | 272 | @media screen and (max-width: 400px) { 273 | .btn-col { 274 | display: none; 275 | } 276 | .col { 277 | width: 100%; 278 | } 279 | } 280 | 281 | code { 282 | color: deeppink; 283 | } 284 | 285 | .help-link { 286 | margin-left: 1rem; 287 | } 288 | 289 | .card code { 290 | white-space: pre-wrap; 291 | } 292 | -------------------------------------------------------------------------------- /static/nsfw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simon987/irarchives/040dedffa4108f16ba1bd34aaa869ed2e47c5005/static/nsfw.png -------------------------------------------------------------------------------- /static/sfw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simon987/irarchives/040dedffa4108f16ba1bd34aaa869ed2e47c5005/static/sfw.png -------------------------------------------------------------------------------- /status.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from flask import Blueprint, Response 4 | 5 | from DB import DB 6 | from common import DBFILE, cache 7 | from util import load_list 8 | 9 | db = DB(DBFILE) 10 | status_page = Blueprint('status', __name__, template_folder='templates') 11 | 12 | 13 | def count_subs_txt(): 14 | return len(load_list('subs.txt')) 15 | 16 | 17 | @status_page.route("/status") 18 | @cache.cached(timeout=3600 * 4) 19 | def status(): 20 | return Response(json.dumps({ 21 | 'status': { 22 | 'posts': db.get_post_count(), 23 | 'comments': db.get_comment_count(), 24 | 'videos': db.get_videoframe_count(), 25 | 'albums': db.get_album_count(), 26 | 'images': db.get_image_count(), 27 | 'subreddits': count_subs_txt() 28 | }, 29 | }), mimetype='application/json') 30 | -------------------------------------------------------------------------------- /subreddits.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from flask import Blueprint, Response 4 | 5 | from common import cache 6 | from util import load_list 7 | 8 | subreddits_page = Blueprint('subreddits', __name__, template_folder='templates') 9 | 10 | 11 | @subreddits_page.route("/subreddits") 12 | @cache.cached(timeout=3600) 13 | def get_subs(): 14 | return Response(json.dumps({'subreddits': load_list('subs.txt')}), 15 | mimetype="application/json") 16 | -------------------------------------------------------------------------------- /subs.txt: -------------------------------------------------------------------------------- 1 | 18_19 2 | 2busty2hide 3 | 3dporncraft 4 | 60fpsporn 5 | 8muses 6 | aa_cups 7 | aa_cupsxxx 8 | abbywinters 9 | abdl 10 | abellaanderson 11 | abuseporn2 12 | accidentalnudity 13 | adultcomix 14 | adultgifs 15 | adultmovies 16 | adultpics 17 | advicehell 18 | aella_girl 19 | Afrodisiac 20 | after_the_shot 21 | agedbeauty 22 | agedlikefinewine 23 | ageplaying 24 | ahegao 25 | airtight 26 | alexapearl 27 | alexistexas 28 | alinali 29 | alisonangel 30 | altgonewild 31 | amateur 32 | amateurallure 33 | amateurarchives 34 | amateurasiangirls 35 | amateur_bitches 36 | amateurbondage 37 | amateurcumsluts 38 | amateurgfs 39 | amateurgirlsbigcocks 40 | amateurhotties 41 | amateurlesbians 42 | amateurporn 43 | amateurs 44 | amateurselfpic 45 | amateurxxx 46 | amberblank 47 | ameristraliagw 48 | amyanderssen 49 | anachronia 50 | anal 51 | anal_addiction 52 | analfisting 53 | anal_gifs 54 | analgw 55 | analinsertions 56 | analladies 57 | analorgasms 58 | analporn 59 | analwreakage 60 | angelawhite 61 | anilingus 62 | animatedgif 63 | animatedporn 64 | animatedporngifs 65 | anjelica_ebbi 66 | Anjelica_Ebbi 67 | anklepanties 68 | anneli 69 | annoyedtobenude 70 | annoyedtobenude 71 | anoralfixation 72 | anyonefortennis 73 | aparthigh 74 | aprons 75 | archerporn 76 | areolas 77 | ariagiovanni 78 | ariel_model 79 | arielrebel 80 | artgw 81 | asaakira 82 | ashlynn_brooke 83 | asianamericanhotties 84 | asianass 85 | asianasshole 86 | asianbabes 87 | asianblowjobs 88 | asianchicks 89 | asiancumsluts 90 | asian_fever 91 | asian_gifs 92 | asianhotties 93 | AsianHotties 94 | asianhottiesgifs 95 | asiannipples 96 | asiannsfw 97 | AsianNSFW 98 | asianol 99 | asianpee 100 | asianporn 101 | asianpussy 102 | asiansgonewild 103 | AsiansGoneWild 104 | askredditafterdark 105 | ass 106 | ass_and_titties 107 | asseffect 108 | assgifs 109 | asshole 110 | assholebehindthong 111 | AssholeBehindThong 112 | assholegonewild 113 | assinthong 114 | asslick 115 | assontheglass 116 | asstastic 117 | ass_to_ass 118 | ass_to_ssa 119 | athleticgirls 120 | athleticwearporn 121 | atporn 122 | augustames 123 | autofellatio 124 | autogynephilia 125 | avatar34 126 | avatarporn 127 | avocadosgonewild 128 | awwyea 129 | babes 130 | backdimples 131 | bad_ass_girlfriends 132 | badassgirls 133 | baddragon 134 | badmothers 135 | badparents 136 | badtats 137 | baileyjay 138 | ballsucking 139 | bananatits 140 | baramanga 141 | barebacksex 142 | baregirls 143 | barelylegal 144 | barelylegalteens 145 | bathing 146 | bbw 147 | bbw 148 | bbwbikinis 149 | bbwgw 150 | bbwvideos 151 | b_cups 152 | bdsm 153 | bdsmama 154 | bdsmcommunity 155 | bdsmdiy 156 | bdsmerotica 157 | bdsmfaq 158 | bdsmgw 159 | bdsm_nospam 160 | bdsmpersonals 161 | bdsmtasks 162 | bdsmvideo 163 | beach 164 | beachgirls 165 | beastiality 166 | beautifultitsandass 167 | beautyqueenporn 168 | beefflaps 169 | beef_flaps 170 | beertits 171 | before_after_cumsluts 172 | beforeandafterboltons 173 | ben10porn 174 | bestiality 175 | bestofcamgirls 176 | bhmgonewild 177 | biancabeauchamp 178 | bibijones 179 | bigareolas 180 | bigasses 181 | bigboobies 182 | bigboobs 183 | bigboobsgonewild 184 | bigboobsgw 185 | BigBoobsGW 186 | bigboobsontopgw 187 | bigclit 188 | bigdickgirl 189 | biggerthanyouthought 190 | bigonewild 191 | bigtiddygothgf 192 | bigtitssmalltits 193 | bikini 194 | bikinibridge 195 | bikinis 196 | bimbofetish 197 | bimbofication 198 | bimboxxx 199 | bioshockporn 200 | bisexy 201 | bitches 202 | bitches_like_it_big 203 | bizarresex 204 | blackchickswhitedicks 205 | blackfathers 206 | blackgirlpics 207 | blackgirlslovefacials 208 | blackmailporn 209 | black_porn 210 | blacktapeproject 211 | BlancNoir 212 | bleach_hentai 213 | blonde 214 | blonde 215 | blondeass 216 | blondehairblueeyes 217 | blondes 218 | blowbang 219 | blowjob 220 | blowjobeyecontact 221 | blowjobgifs 222 | blowjobs 223 | blowjobsandwich 224 | bodyperfection 225 | bodyshots 226 | bodystockings 227 | boltedonbooty 228 | boltedondicks 229 | boltedonlips 230 | boltedonmaxed 231 | boltedontits 232 | bondage 233 | bonermaterial 234 | bonersinpublic 235 | boobbounce 236 | boobgifs 237 | boobies 238 | boobkarma 239 | boobland 240 | boobs 241 | boobstrap 242 | booknymphs 243 | boots 244 | booty 245 | booty_gifs 246 | boredandignored 247 | boredignored 248 | borednignored 249 | bottomless 250 | bottomless_vixens 251 | Bottomless_Vixens 252 | boundgirls 253 | bowsette 254 | boyshort 255 | boyshorts 256 | braandpanties 257 | braceface 258 | braceface 259 | brasgonewild 260 | breastenvy 261 | breastexpansion 262 | breastplay 263 | breasts 264 | breeding 265 | brentramboapproves 266 | briebby 267 | broslikeus 268 | brownbubbas 269 | brunette 270 | brunetteass 271 | bryci 272 | bubblebutts 273 | bubbling 274 | bukkake 275 | bulges 276 | burlesque 277 | burstingout 278 | bustier 279 | busty 280 | bustyasians 281 | bustybabes 282 | bustybabesgalore 283 | bustyfit 284 | busty_gifs 285 | bustypetite 286 | BustyPetite 287 | butterface 288 | butterflywings 289 | buttjobs 290 | buttloads 291 | buttplug 292 | buttsandbarefeet 293 | buttsex 294 | buttsharpies 295 | buttsthatgrip 296 | caitlyn87 297 | calicoo 298 | cameltoe 299 | cameltoegirls 300 | cameltoepics 301 | camshow 302 | camsluts 303 | camwhores 304 | cardsagainsthumanity 305 | caribbeangirls 306 | carlottachampagne 307 | carnalclass 308 | casualjiggles 309 | cat_girls 310 | catsgonewild 311 | caughtfucking 312 | celebcumsluts 313 | celebfakes 314 | celebnipslips 315 | celebnsfw 316 | celebnudes 317 | celeboops 318 | celebritybutts 319 | celebrityfakes 320 | celebrityhardcore 321 | celebritynipples 322 | celebritypenis 323 | celebritypokies 324 | celebritypussy 325 | celebsexscenes 326 | celebsnaked 327 | celebsreality 328 | celebsunleashed 329 | celebupskirts 330 | celestestar 331 | cellshots 332 | cfnf 333 | cfnm 334 | cfnmfetish 335 | chakuero 336 | changingrooms 337 | ChangingRooms 338 | chastity 339 | chastitytraining 340 | cheatingwives 341 | cheeking 342 | cheekyasian 343 | cheekybottoms 344 | chesthairporn 345 | chickflixxx 346 | chickswearingchucks 347 | chilly 348 | chinesehotties 349 | chixxx_gifs 350 | chocolatemilf 351 | choking 352 | christiangirls 353 | christy_mack 354 | chronianuit 355 | chubby 356 | cinched 357 | classicpornmags 358 | classicxxx 359 | cleavage 360 | clopclop 361 | clopcomics 362 | closeup 363 | clothedwomen 364 | cock 365 | cocklady 366 | collared 367 | collegeamateurs 368 | collegeinitiation 369 | collegensfw 370 | collegesluts 371 | comics18_story 372 | ComplexionExcellence 373 | confusedboners 374 | coontown 375 | cosplayboobs 376 | cosplaybutts 377 | cosplayheels 378 | cosplayonoff 379 | cosporn 380 | cottonpanties 381 | cottontails 382 | cougars 383 | countrygirls 384 | couplesgonewild 385 | coveredincum 386 | crazyamy 387 | crazyjenny 388 | creampie 389 | creampiegifs 390 | creampies 391 | criminalprotips 392 | cripplingorgasm 393 | crosseyedfap 394 | crotchlesspanties 395 | cryptoanarchy 396 | cuckold 397 | cuckoldcaptions 398 | cuckoldcommunity 399 | cuckoldpregnancy 400 | cuckquean 401 | cuckqueancommunity 402 | cuffed 403 | cumagain 404 | cumbtwntits 405 | cumcoveredfucking 406 | cumfarting 407 | cumfetish 408 | cumfromanal 409 | cumhaters 410 | cumintheair 411 | cummingonfigurines 412 | cumonboobs 413 | cumonclothes 414 | cumonglasses 415 | cumov 416 | cumplay_gifs 417 | cumshot 418 | cumshotgifs 419 | cumshots 420 | cumshotselfies 421 | cumsluts 422 | cumswallowing 423 | cumswap 424 | cumtrays 425 | cumvids 426 | cunnilingus 427 | cupless 428 | curls 429 | curvesarebeautiful 430 | curvy 431 | curvygonewild 432 | curvyplus 433 | cuteguybutts 434 | CuteModeSlutMode 435 | dailymotiontitties 436 | damngoodinterracial 437 | damselsindistress 438 | danidaniels 439 | daresgonewild 440 | darkangels 441 | darknetmarkets 442 | datass 443 | datbuttfromthefront 444 | datgap 445 | datgrip 446 | datv 447 | dbz34 448 | debs_and_doxies 449 | decrypto 450 | deepthroat 451 | deepthroattears 452 | deformed 453 | delicioustraps 454 | desiboners 455 | desigonewild 456 | dickgirls 457 | dickslips 458 | dionnedaniels 459 | diorabaird 460 | dirtyfamilyphotos 461 | dirtygaming 462 | dirtykikpals 463 | dirtypantiesgw 464 | dirtypenpals 465 | dirtyr4r 466 | dirtysmall 467 | dirtysnapchat 468 | disneyporn 469 | distension 470 | doggy 471 | dolan 472 | dollywinks 473 | domesticgirls 474 | dominated 475 | doppelbangher 476 | dopplebangher 477 | dota2smut 478 | doublepenetration 479 | doubletrouble 480 | doujinshi 481 | downblouse 482 | downbra 483 | dpgirls 484 | dp_porn 485 | dpsex 486 | dragonsfuckingcars 487 | dreamjobs 488 | dressedandundressed 489 | dropem 490 | drunkencookery 491 | drunkgirls 492 | dsls 493 | dyedpubes 494 | dyke 495 | dykesgonewild 496 | easterneuropeangirls 497 | ebony 498 | ebonyasshole 499 | ecchi 500 | edmontongonewild 501 | elaysmith 502 | emilybloom 503 | emmaglover 504 | emogirls 505 | emoporn 506 | enf 507 | engorgedveinybreasts 508 | enoughinternet 509 | entexchange 510 | epiccleavage 511 | ericacampbell 512 | eroge 513 | erotica 514 | eroticart 515 | eroticauthors 516 | erotic_galleries 517 | erotichypnosis 518 | eroticliterature 519 | eroticpenpals 520 | ethnicgirlfacials 521 | eurogirls 522 | eva_angelina 523 | evalovia 524 | exotic_oasis 525 | exploitation 526 | exposedinpublic 527 | extramile 528 | exxxtras 529 | faceandasshole 530 | facedownassup 531 | facefuck 532 | facefuck 533 | facesitting 534 | facial 535 | facialcumshots 536 | facialfun 537 | FacialFun 538 | facials 539 | fairytail_hentai 540 | fakecum 541 | fapb4momgetshome 542 | fapdeciders 543 | fapfactory 544 | fapfap 545 | fapfapfap 546 | fappitt 547 | fappygood 548 | faptasticimages 549 | fapucational 550 | fauxbait 551 | fayereagan 552 | fearme 553 | feelthemup 554 | feet 555 | feetish 556 | feet_nsfw 557 | feetup 558 | felching 559 | fellatio 560 | femaleasiananal 561 | femalecompletion 562 | femalepov 563 | femalesgonewild 564 | femboys 565 | femdom 566 | femdomcommunity 567 | femdomhumiliation 568 | femsub 569 | feralpokeporn 570 | feralyiff 571 | festivalsluts 572 | fetish 573 | fetlife 574 | fifthworldgonewild 575 | fights 576 | fillyfiddlers 577 | filthygirls 578 | firecrotch 579 | fishnets 580 | fisting 581 | fitgirls 582 | flannelgetsmehot 583 | flashingandflaunting 584 | flashinggirls 585 | FlashingGirls 586 | flashygirls 587 | flexi 588 | fmgonewild 589 | fmn 590 | foodfuckers 591 | footfetish 592 | forcedorgasms 593 | foreignobjectsgw 594 | foreskin 595 | formylover 596 | foxydi 597 | foxyladies 598 | freckledcumsluts 599 | freckledredheads 600 | freeuse 601 | freexxxvideos 602 | frenchesmilfs 603 | frenchmaid 604 | freshfromtheshower 605 | frogbutt 606 | frontgonewild 607 | frontmagazine 608 | fuckableamateurs 609 | fuckbuddies 610 | fuckingmachines 611 | fuckmarryorkill 612 | fullnsfwmovies 613 | funpiece 614 | funsized 615 | funsizedasian 616 | furry 617 | furryfemdom 618 | fursuitsex 619 | futanari 620 | futanaria 621 | futanarihentai 622 | futanaripegging 623 | fuxtaposition 624 | fuzzypeeks 625 | gabengonewild 626 | gag 627 | gagged 628 | gag_spit 629 | gangbang 630 | gape 631 | garterbelts 632 | gaybears 633 | gaybrosgonewild 634 | gaychubs 635 | gaycruising 636 | gaycumsluts 637 | gaydaddiespics 638 | gaygifs 639 | gayincest 640 | gaykink 641 | gaymersgonewild 642 | gaynsfw 643 | gayotters 644 | gayporn 645 | gaypornhunters 646 | gaypornwithplot 647 | gaysex 648 | gaysnapchatimages 649 | gaystoriesgonewild 650 | gayvideos 651 | gaywatersports 652 | genderotica 653 | geriatricporn 654 | gettingherselfoff 655 | gfur 656 | ghostnipples 657 | giannamichaels 658 | GiannaMichaels 659 | gifsgonewild 660 | gilf 661 | gillianbarnes 662 | gilrsinyogapants 663 | ginger 664 | gingerdudes 665 | gingerpuss 666 | girlplay 667 | girlsboxing 668 | girlscontrolled 669 | girlscuddling 670 | girlsdoingnerdythings 671 | girlsdoingstuffnaked 672 | GirlsDoPorn 673 | girlsfinishingthejob 674 | GirlsFinishingTheJob 675 | girlsflashing 676 | girlsgonebitcoin 677 | girlsgonedogecoin 678 | girlsholdingdicks 679 | GirlsHumpingThings 680 | girlsinanklesocks 681 | girlsinhoodies 682 | girlsinlacefishnets 683 | girlsinleggings 684 | girlsinmessyrooms 685 | girlsinpantyhose 686 | girlsinpinkundies 687 | girlsinplaidskirts 688 | girlsinschooluniforms 689 | girlsinschooluniforms 690 | girlsinsocks 691 | girlsintanningbeds 692 | girlsintrenchcoats 693 | girlsintubesocks 694 | girlsinyogapants 695 | girlsinyogashorts 696 | girlskissing 697 | girlslickingcum 698 | girlsonstripperpoles 699 | girlsontheirbacks 700 | girlsontop 701 | girlspooping 702 | girlswatchingporn 703 | girlswearingstrapons 704 | girlswearingvs 705 | girlswithbangs 706 | girlswithbigguns 707 | girlswithbikes 708 | girlswithbodypaint 709 | girlswithfreckles 710 | girlswithglasses 711 | girlswithguns 712 | girlswithheadtowels 713 | girlswithiphones 714 | girlswithneonhair 715 | GirlswithNeonHair 716 | girlswithpigtails 717 | girlswithtoys 718 | girlwiththepiercings 719 | glitch_porn 720 | gloriav 721 | glorp 722 | gloryholes 723 | godpussy 724 | goneclothed 725 | goneerotic 726 | goneinsane 727 | gonemild 728 | gonewanton 729 | gonewidl 730 | gonewild 731 | gonewildalbums 732 | gonewildaudio 733 | gonewildcd 734 | gonewildcolor 735 | gonewildcouples 736 | gonewildcurvy 737 | gonewilder 738 | gonewildfaces 739 | gonewildflicks 740 | gonewild_gifs 741 | gonewildhairy 742 | gonewildmetal 743 | gonewildnerdy 744 | gonewildplus 745 | gonewildpopping 746 | gonewildrequest 747 | gonewildscrubs 748 | gonewildsmiles 749 | gonewildstories 750 | gonewildtrans 751 | gonewildtube 752 | gonewildtwerk 753 | gonewildvideos 754 | gonwild 755 | gore 756 | gothsluts 757 | grannypanties 758 | gravure 759 | greatapes 760 | grool 761 | groupofnudegirls 762 | groupofnudemilfs 763 | groupsex 764 | grower 765 | guro 766 | gushinggirls 767 | guysgonewild 768 | guyskissing 769 | gwbanned 770 | gwbooks 771 | gwchallenge 772 | gwcouples 773 | gwcouples4ladies 774 | gwcumsluts 775 | gwnerdy 776 | gwpublic 777 | gymnastgirls 778 | hairbra 779 | hairy 780 | hairyarmpits 781 | hairyassgirls 782 | hairychicks 783 | hairycurvy 784 | hairymilfs 785 | hairypussy 786 | handbra 787 | handinpanties 788 | handjob 789 | hanging 790 | hannahilton 791 | happyembarrassedgirls 792 | HappyEmbarrassedGirls 793 | happygaps 794 | hardboltons 795 | hardcoresex 796 | harrypotterporn 797 | hashtagy0l0swaggang 798 | hawtness 799 | hayden_winters 800 | heavyeyeliner 801 | heavyhangers 802 | heels 803 | hegre 804 | helgalovekaty 805 | hentai 806 | hentaibondage 807 | hentaifemdom 808 | hentai_gif 809 | hentai_irl 810 | hentaimanga 811 | hentaipics 812 | hentaivids 813 | hereinmycar 814 | hersheyskisstits 815 | heteroflexible 816 | hiddenandwatching 817 | highheelsnsfw 818 | highresass 819 | highresnsfw 820 | hipbones 821 | hipcleavage 822 | hips 823 | hitomi_tanaka 824 | holdthemoan 825 | holycherriesbatcave 826 | homegrowntits 827 | homemadexxx 828 | homosexual 829 | horsemaskgw 830 | hotamputees 831 | hotasianmilfs 832 | hotchickswithtattoos 833 | Hotchickswithtattoos 834 | hotdogging 835 | hotgirls 836 | hotguyswithtattoos 837 | hotinthekitchen 838 | hotlatinas 839 | hotleggings 840 | hotmilfs 841 | hotness 842 | hottestvoyeurs 843 | hotwife 844 | hotwiferequests 845 | hqhentai 846 | hucow 847 | hugeass 848 | hugeboobs 849 | hugeboobshardcore 850 | hugeboobvideos 851 | hugedicktinychick 852 | hugefutanari 853 | hugenaturals 854 | hugeracks 855 | hugetitssoftcore 856 | humiliation 857 | hungrybutts 858 | hypnogonewild 859 | hypnohentai 860 | iateacrayon 861 | ifeelmyself 862 | Ifyouhadtopickone 863 | igawyrwal 864 | ignoscemihi 865 | iheartbigtits 866 | ilikelittlebutts 867 | iloveemos 868 | imaginaryboners 869 | imgoingtohellforthis 870 | imgurnsfw 871 | impressedbycum 872 | inbreeding 873 | incest 874 | incestcomics 875 | incestdoujinshi 876 | incestgifs 877 | incestpersonals 878 | incestporn 879 | incestvideos 880 | indian 881 | indiana_a 882 | indianbabes 883 | indianfetish 884 | indiansgonewild 885 | indianteens 886 | inflagranti 887 | innie 888 | insertions 889 | instahotties 890 | interracial_hardcore 891 | interracial_porn 892 | inthebushes 893 | isthatcum 894 | ivysnow 895 | iwanttofuckher 896 | jab 897 | jackandjill 898 | jacking 899 | jamesdeen 900 | janadefi 901 | japanesehotties 902 | japaneseporn 903 | japanesepornidols 904 | javdownloadcenter 905 | javure 906 | jennahaze 907 | jenniejune 908 | jenniferwhite 909 | jenni_gregg 910 | jennypoussin 911 | jenselter 912 | jenya_d 913 | jerkingencouragement 914 | jerkinginstruction 915 | jerkofftocelebs 916 | jewishbabes 917 | jigglefuck 918 | jilling 919 | jilling_under_panties 920 | jobuds 921 | jockstraps 922 | joi 923 | jordancarver 924 | joymii 925 | juicyasians 926 | juicybooty 927 | juliekennedy 928 | junglefever 929 | just18 930 | justoneboob 931 | justpeachyy 932 | jynxmaze 933 | kateeowen 934 | katyaclover 935 | kawaiikitten 936 | keepitclassy 937 | kelloggsgonewild 938 | keriberry_420 939 | ketogonewild 940 | keyholdercaptions 941 | killlahentai 942 | kimpossibleporn 943 | kinbaku 944 | kinderyiff 945 | kink 946 | kinksters_gone_wild 947 | kinky 948 | kinkyporn 949 | kneesocks 950 | knockers 951 | knottymemes 952 | koreanhotties 953 | kpopfap 954 | labeautefeminine 955 | labiagw 956 | LabiaGW 957 | lactation 958 | lactation 959 | ladiesgonewild 960 | ladiesinleather 961 | ladybonerofaces 962 | ladybonersgw 963 | ladygagasass 964 | lanarhoades 965 | laracroftnsfw 966 | lass 967 | latexunderclothes 968 | latinacuties 969 | latinaporn 970 | latinas 971 | latinascaliente 972 | latinasgw 973 | laundryday 974 | leannadecker 975 | legalteens 976 | legendarylootz 977 | leggingsgonewild 978 | leggingsgonewild 979 | legs 980 | legsup 981 | leila_gw 982 | leotards 983 | leotards 984 | lesbdsm 985 | lesbian_gifs 986 | lesbians 987 | lesbos 988 | lesdom 989 | lexibelle 990 | lezdom 991 | lgbtgonewild 992 | librarygirls 993 | lickingdick 994 | lifeisabeach 995 | lilyc 996 | lilyivy 997 | lineups 998 | lingerie 999 | lingeriegw 1000 | lipbite 1001 | lipsthatgrip 1002 | lisa_ann 1003 | littlecaprice 1004 | littlepersonals 1005 | littlespace 1006 | lockedup 1007 | lockerroom 1008 | lolgrindr 1009 | lolicons 1010 | lolishota 1011 | londonandrews 1012 | lordosis 1013 | lovegaymale 1014 | lovetowatchyouleave 1015 | luckycameraman 1016 | lucypinder 1017 | lucy_vixen 1018 | lusciousladies 1019 | lyingonbellynsfw 1020 | madison_ivy 1021 | makeupfetish 1022 | maledom 1023 | malenamorgan 1024 | malena_morgan 1025 | malepornstars 1026 | malesgonewild 1027 | malespandex 1028 | maleunderwear 1029 | manass 1030 | mandingo 1031 | mangonewild 1032 | manlove 1033 | manmilk 1034 | mansex 1035 | maria_ozawa 1036 | mariaryabushkina 1037 | mariorule34 1038 | maryjanejohnson 1039 | mass_effect_porn 1040 | masserect 1041 | massivecock 1042 | massivecockvids 1043 | massivetitsnass 1044 | masterofanal 1045 | masturbation 1046 | masturbationgonewild 1047 | masturbatorsanonymous 1048 | maturegw 1049 | maturemilf 1050 | maturewoman 1051 | megaporn 1052 | meido 1053 | meinmyplace 1054 | melissadebling 1055 | mellisaclarke 1056 | mengonewild 1057 | men_in_panties 1058 | meninuniform 1059 | menwithtoys 1060 | metalgirls 1061 | metart 1062 | mexicana 1063 | miakhalifa 1064 | miamalkova 1065 | miasollis 1066 | middleeasternhotties 1067 | miela 1068 | mildlyoffensive 1069 | milf 1070 | milfgw 1071 | milfs 1072 | milfsandhousewives 1073 | militarygonewild 1074 | militarymen 1075 | milkingtable 1076 | misogyny 1077 | missalice_18 1078 | missivyjean 1079 | mlpmature 1080 | mmgirls 1081 | moddedgirls 1082 | momson 1083 | monokini 1084 | monster_cocks 1085 | monstergirl 1086 | mooning 1087 | morbidreality 1088 | morninggirls 1089 | morphs 1090 | motherdaughter 1091 | motiontrackedboobs 1092 | motiontrackedporn 1093 | motorcyclesgonewild 1094 | moundofvenus 1095 | mouthstretching 1096 | mouthwideopen 1097 | moxxigonewild 1098 | muricansfw 1099 | mycherrycrush 1100 | mycleavage 1101 | myfavoritepornstars 1102 | myfreecams 1103 | nakedbabes 1104 | nakedcelebs 1105 | nakedfamouspeople 1106 | nakedprogress 1107 | naruto_hentai 1108 | naturalgirls 1109 | naturaltitties 1110 | naturalwomen 1111 | naturists 1112 | naughtyalysha 1113 | naughtyatwork 1114 | naughtyfromneglect 1115 | nekomimi 1116 | nessadevil 1117 | neveruntieme 1118 | nextdoorasians 1119 | nibblemynipples 1120 | nightlysex 1121 | nippleripple 1122 | nipples 1123 | nobra 1124 | nobsnsfw 1125 | nomorals 1126 | nopanties 1127 | no_pants_party 1128 | nordicwomen 1129 | normalnudes 1130 | nosecum 1131 | notop 1132 | notorso 1133 | notreallyenjoyingit 1134 | notsafefornature 1135 | NotSafeForNature 1136 | nsfl 1137 | nsfw 1138 | nsfw2 1139 | nsfw2048 1140 | nsfw411 1141 | nsfw_amateurs 1142 | nsfwanimegifs 1143 | nsfwart 1144 | nsfwasmr 1145 | nsfwbarista 1146 | nsfw_bdsm 1147 | nsfwbikini 1148 | nsfwblackpeoplegifs 1149 | nsfwbraids 1150 | nsfw_busted 1151 | nsfw_bw 1152 | nsfwcelebarchive 1153 | nsfwcelebarchive /r/celebritypussy 1154 | nsfwcelebgifs 1155 | nsfw_celebrity 1156 | nsfwcelebs 1157 | nsfw_china 1158 | nsfwcloseups 1159 | nsfwcomics 1160 | nsfwcosplay 1161 | nsfwcostumes 1162 | nsfwcringe 1163 | nsfwcute 1164 | nsfwdumps 1165 | nsfwfashion 1166 | nsfwfunny 1167 | nsfw_funny 1168 | NSFWFunny 1169 | nsfwgaming 1170 | nsfw_gay 1171 | nsfw_gfy 1172 | nsfwgif 1173 | nsfw_gif 1174 | NSFW_GIF 1175 | nsfw_gifs 1176 | nsfw_gifsound 1177 | nsfw_girlfriendvideos 1178 | nsfw_hardbodies 1179 | nsfwhardcore 1180 | nsfw_hardcore 1181 | nsfw_hd 1182 | nsfwhtml5 1183 | nsfw_html5 1184 | NSFW_HTML5 1185 | nsfwiama 1186 | nsfw_japan 1187 | NSFW_Japan 1188 | nsfw_korea 1189 | NSFW_Korea 1190 | nsfwnew 1191 | nsfwnonporn 1192 | nsfw_nospam 1193 | nsfwonly 1194 | nsfw_outdoors 1195 | nsfwoutfits 1196 | nsfwphotoshopbattles 1197 | nsfw_plowcam 1198 | nsfw_porn_only 1199 | nsfwsector 1200 | nsfw_sets 1201 | nsfw_sexy_gif 1202 | nsfwskirts 1203 | nsfwskype 1204 | nsfwskyrim 1205 | nsfw_snapchat 1206 | nsfwvideos 1207 | nsfw_videos 1208 | nsfw_vids 1209 | nsfwvine 1210 | nsfw_wallpapers 1211 | nsfw_wtf 1212 | nubilefilms 1213 | nude 1214 | nudeamateurporn 1215 | nudebeach 1216 | nudebeauty 1217 | nudeselfies 1218 | nudevines 1219 | nudist_beach 1220 | nugporn 1221 | nutsbabes 1222 | nyr4r 1223 | objects 1224 | oculusnsfw 1225 | o_face 1226 | o_faces 1227 | O_faces 1228 | ohcumon 1229 | ohlympics 1230 | Ohlympics 1231 | oilporn 1232 | oily 1233 | oldenporn 1234 | oldschoolcoolnsfw 1235 | oldschoolcoolNSFW 1236 | oliveskin 1237 | omgbeckylookathiscock 1238 | onallfours 1239 | oneinoneout 1240 | onherknees 1241 | onherstomach 1242 | onlyblondes 1243 | onlygoodporn 1244 | onmmww 1245 | onoff 1246 | OnOff 1247 | onoffceleb 1248 | onoffcelebs 1249 | onoffcollages 1250 | onon 1251 | openbra 1252 | openholes 1253 | openshirt 1254 | oppailove 1255 | oralcreampie 1256 | oralcreampie 1257 | oralsex 1258 | orgasmcontrol 1259 | orgasms 1260 | oriental 1261 | outdoorsex 1262 | outercourse 1263 | overwatch_porn 1264 | oviposition 1265 | paag 1266 | paag 1267 | page3glamour 1268 | painal 1269 | paizuri 1270 | pajamas 1271 | palegirls 1272 | paleskin 1273 | panties 1274 | pantiesdown 1275 | pantiestotheside 1276 | pantsinggirls 1277 | pantsu 1278 | pantyfetish 1279 | pantyhose 1280 | pantyhosedgirls 1281 | pantyslide 1282 | passionpics 1283 | passionsex 1284 | passionx 1285 | passionx 1286 | passionyz 1287 | pasties 1288 | patriciacaprice 1289 | pawg 1290 | pawgtastic 1291 | peachfuzz 1292 | peachlips 1293 | pearlgirls 1294 | pee 1295 | peegonewild 1296 | pegging 1297 | pegging_unkinked 1298 | pelfie 1299 | penetration_gifs 1300 | penis 1301 | perfectpussies 1302 | perfectthighs 1303 | perfecttits 1304 | perky 1305 | perkychubby 1306 | personallyyours 1307 | petite 1308 | petitegirls 1309 | petitegonewild 1310 | petitegonewild /r/xsmallgirls 1311 | petplay 1312 | photoplunder 1313 | picsofdeadkids 1314 | picsofdeadtoasters 1315 | picsofhorsedicks 1316 | piercednsfw 1317 | piercedtits 1318 | pigtails 1319 | pillowtalkaudio 1320 | pink 1321 | pinkandbare 1322 | pinupstyle 1323 | playboy 1324 | plugged 1325 | plumper 1326 | plumpers 1327 | pm_me_your_tits_girl 1328 | pokeporn 1329 | pokies 1330 | polishnsfw 1331 | polistan 1332 | ponytails 1333 | porn 1334 | pornbattles 1335 | porngif 1336 | porngifs 1337 | porn_gifs 1338 | porngifsbybot 1339 | pornhighlights 1340 | pornhubcomments 1341 | pornid 1342 | porninfifteenseconds 1343 | pornlovers 1344 | pornparody 1345 | pornpedia 1346 | pornpleasure 1347 | porn_plots 1348 | pornstarlethq 1349 | pornstars 1350 | pornstarsafterhours 1351 | porn_tubes 1352 | pornvids 1353 | portalporn 1354 | pounding 1355 | pov 1356 | povpornpics 1357 | povpornvids 1358 | povtranny 1359 | preggo 1360 | preggoporn 1361 | pregporn 1362 | prematurecumshots 1363 | presenting 1364 | primes 1365 | pronebone 1366 | ps34 1367 | public 1368 | publicboys 1369 | publicflashing 1370 | publicupskirts 1371 | publicvideos 1372 | puffies 1373 | puffypussy 1374 | pulsatingcumshots 1375 | punkgirls 1376 | punklovers 1377 | puremilf 1378 | pushing 1379 | pussy 1380 | pussyflashing 1381 | pussyjobs 1382 | pussymound 1383 | pussymound 1384 | pussyslip 1385 | qualitynsfw 1386 | quiver 1387 | rainbowbar 1388 | randomactsofblowjob 1389 | randomactsofmuffdive 1390 | randomactsoforalsex 1391 | randomsexiness 1392 | randomsexygifs 1393 | ranked_girls 1394 | rape_roleplay 1395 | rapingwomen 1396 | rapunzel 1397 | ratemynudebody 1398 | ravegirls 1399 | ravenhaired 1400 | ravergirl 1401 | rawcelebs 1402 | realamateurpics 1403 | realasians 1404 | realbeauties 1405 | realbikinis 1406 | realgirls 1407 | realgirlsphotoalbums 1408 | realmilf 1409 | realmoms 1410 | realolderwomen 1411 | realpublicnudity 1412 | realrule34 1413 | realthreesomes 1414 | rearpussy 1415 | recoilboobs 1416 | redditorcum 1417 | redheadass 1418 | redheads 1419 | redpassword 1420 | reflections 1421 | remylacroix 1422 | repressedgonewild 1423 | reversableshirtgirls 1424 | ribcage 1425 | rileyreid 1426 | roadhead 1427 | rosiejones 1428 | rosie_jones 1429 | roughanal 1430 | rubberneckers 1431 | rud_fuckers 1432 | ruinedorgasms 1433 | rule34 1434 | rule34_albums 1435 | rule34_ass 1436 | rule34cartoons 1437 | rule34_comics 1438 | rule34feet 1439 | rule34gifs 1440 | rule34lol 1441 | rule34overwatch 1442 | rule34pinups 1443 | rule34requests 1444 | runwild 1445 | rush_boobs 1446 | sacrilicious 1447 | saggy 1448 | samespecies 1449 | sanic 1450 | sapphicgifs 1451 | sappho 1452 | sarah_xxx 1453 | sarajay 1454 | sarajunderwood 1455 | sarinavalentina 1456 | sashagrey 1457 | satinpanties 1458 | saturdaymorninggirls 1459 | scandinaviangirls 1460 | scaredshitless 1461 | scarybilbo 1462 | sceneboys 1463 | scenegirls 1464 | scissoring 1465 | sea_girls 1466 | seasquared 1467 | seethroughleggings 1468 | seethru 1469 | self_perfection 1470 | selfpix 1471 | selfportraitnsfw 1472 | selfservice 1473 | selfshotgirls 1474 | selfshots 1475 | serafuku 1476 | sex_comics 1477 | sexdolls 1478 | sex_games 1479 | sexgifs 1480 | sexinfrontofothers 1481 | sexmachines 1482 | sexselfies 1483 | sexsells 1484 | sext 1485 | sexting 1486 | sextoys 1487 | sexwithdogs 1488 | sexy 1489 | sexyandshocking 1490 | sexybunnies 1491 | sexybutnotporn 1492 | sexy_ed 1493 | sexyflowerwater 1494 | sexyfrex 1495 | sexygirlsinboots 1496 | sexygirlsinjeans 1497 | sexyness 1498 | sexynsfw 1499 | sexyshemales 1500 | sexystarwars 1501 | sexystories 1502 | sexysuperheroines 1503 | sexytgirls 1504 | sexytimechat 1505 | sexytummies 1506 | SexyTummies 1507 | sexyuniforms 1508 | sexywallpapers 1509 | sexzone 1510 | shareafap 1511 | sharktits 1512 | shatter 1513 | shavedgirls 1514 | shavedpussies 1515 | shay_laren 1516 | sheerpanties 1517 | shelikesitrough 1518 | SheLikesItRough 1519 | shemale 1520 | shemale_gifs 1521 | shemales 1522 | shewantstofuck 1523 | shingekinohentai 1524 | shinybondage 1525 | shinyfetish 1526 | shinypants 1527 | shinyporn 1528 | shorthairchicks 1529 | shortshorts 1530 | showerbeergonewild 1531 | sideboob 1532 | sillygirls 1533 | simplynaked 1534 | simps 1535 | singlets 1536 | sissies 1537 | sissies 1538 | sissy_humiliation 1539 | sissyhypno 1540 | sizecomparison 1541 | skaro 1542 | skindiamond 1543 | skinnyanal 1544 | skinnytail 1545 | skirtriding 1546 | skivvies 1547 | slimegirls 1548 | slimgirls 1549 | SlimThick 1550 | slingbikini 1551 | slippery 1552 | slomoboobs 1553 | slothporn 1554 | slutbusty 1555 | sluttyconfessions 1556 | sluttyhalloween 1557 | sluttystrangers 1558 | slutwife 1559 | slutwives 1560 | smalldickproblems 1561 | smalldicks 1562 | smallnipples 1563 | smashbros34 1564 | smokin 1565 | snapchatboobs 1566 | snapchatgw 1567 | snapchat_sluts 1568 | snapleaks 1569 | sneakersgonewild 1570 | snowgirls 1571 | snowwhites 1572 | socialmediasluts 1573 | sockgirls 1574 | socksgonewild 1575 | softcorejapan 1576 | softcorenights 1577 | softies 1578 | spacedicks 1579 | spanking 1580 | spermjoy 1581 | spitroasted 1582 | spod 1583 | spreadeagle 1584 | spreadem 1585 | spreading 1586 | springbreakers 1587 | squidsgonewild 1588 | squirting 1589 | srssex 1590 | ssbbw 1591 | ssbbw_love 1592 | stacked 1593 | standingasshole 1594 | starwarsnsfw 1595 | stealthvibes 1596 | stockings 1597 | stocking_vids 1598 | stomachdownfeetup 1599 | stoyaxxx 1600 | straightgirlsplaying 1601 | StraightGirlsPlaying 1602 | strapon 1603 | streamersgonewild 1604 | streetskateboarding 1605 | strippersonthejob 1606 | strugglefucking 1607 | struggleporn 1608 | stuffers 1609 | stupidslutsclub 1610 | submissive 1611 | subsanctuary 1612 | subtlenudity 1613 | subwife 1614 | suckingitdry 1615 | suctiondildos 1616 | suicidegirls 1617 | sukebei 1618 | summertimeheat 1619 | surfinggirls 1620 | sweatermeat 1621 | sweatysex 1622 | sweet_sexuality 1623 | swensfw 1624 | swingcommunity 1625 | swingersgw 1626 | swordswallowers 1627 | sybian 1628 | tailbait 1629 | tailplug 1630 | talesfromyourstripper 1631 | tallgonewild 1632 | tanlines 1633 | tbulges 1634 | teagirls 1635 | teamvrb 1636 | teaseanddenial 1637 | teasemepleaseme 1638 | teenkasia 1639 | teensexcum 1640 | teentitansporn 1641 | tentai 1642 | tessafowler 1643 | tflop 1644 | tgifs 1645 | tgirlgifs 1646 | tgirls 1647 | Tgirls 1648 | tgirl_tube 1649 | thapt 1650 | thatrearview 1651 | thebackdoor 1652 | the_best_nsfw_gifs 1653 | thefullbush 1654 | thegap 1655 | thegoldstandard 1656 | thehangingboobs 1657 | thelandingstrip 1658 | thelostwoods 1659 | theplayroom 1660 | theratio 1661 | theratio 1662 | therearpussy 1663 | theredfox 1664 | theresatata 1665 | theunderboob 1666 | theunderbun 1667 | thick 1668 | thickasians 1669 | thickchixxx 1670 | thicker 1671 | thickloads 1672 | thicklogic 1673 | thickthighs 1674 | thighdeology 1675 | thighhighhentai 1676 | thighhighs 1677 | thighs 1678 | thinchickswithtits 1679 | thinclothing 1680 | thirdgender 1681 | thongs 1682 | threesome 1683 | throatfucking 1684 | throatpies 1685 | tickling 1686 | tiffanythompson 1687 | tift 1688 | tightdresses 1689 | tights 1690 | tightshirts 1691 | tightshorts 1692 | tight_shorts 1693 | tightsqueeze 1694 | tilgonewild 1695 | tinyasiantits 1696 | tinydick 1697 | tinytits 1698 | tinyvulgarunicorn 1699 | tipofmypenis 1700 | titfuck 1701 | titler 1702 | tits 1703 | titsagainstglass 1704 | titsassandnoclass 1705 | titsgalore 1706 | titstouchingtits 1707 | titties_n_kitties 1708 | tittydrop 1709 | tokyogirl 1710 | tongueoutbjs 1711 | toobig 1712 | toplessinjeans 1713 | topless_vixens 1714 | topsandbottoms 1715 | top_tier_asses 1716 | tori_black 1717 | torpedotits 1718 | totallystraight 1719 | transex 1720 | transformation 1721 | translucent_porn 1722 | trapgifs 1723 | traphentai 1724 | traps 1725 | trashyboners 1726 | treatemright 1727 | treesgonewild 1728 | tributeme 1729 | trim 1730 | triplepenetration 1731 | truebukkake 1732 | trueclop 1733 | truedownblouse 1734 | truehorror 1735 | tsexual 1736 | tshirtsandtanktops 1737 | ttotm 1738 | tumblrarchives 1739 | tumblrporn 1740 | turnsmeon 1741 | tushy 1742 | twerking 1743 | twingirls 1744 | twinklove 1745 | twinks 1746 | twistys 1747 | uhdnsfw 1748 | unashamed 1749 | unashamedguys 1750 | uncensoredhentai 1751 | underarms 1752 | underboob 1753 | underbun 1754 | underwaterbabes 1755 | underweargw 1756 | undies 1757 | unrealgirls 1758 | upherbutt 1759 | upskirt 1760 | upskirt 1761 | upskirtpics 1762 | usedpanties 1763 | utsm 1764 | vagina 1765 | veins 1766 | victoriansluts 1767 | victoriaraeblack 1768 | victoriasecret 1769 | victory_girls 1770 | vintageamateurs 1771 | vintagebabes 1772 | vintagecelebsnsfw 1773 | vintageerotica 1774 | vintagesmut 1775 | volleyballgirls 1776 | voluptuous 1777 | vore 1778 | voyeurs 1779 | vulva 1780 | waif 1781 | WatchItForThePlot 1782 | watchpeopledie 1783 | waterpoloboobs 1784 | watersports 1785 | weddingringsshowing 1786 | weddingsgonewild 1787 | wedgiegirls 1788 | weeklynsfw 1789 | wellwornbimbos 1790 | wendyfiore 1791 | westernhentai 1792 | wet 1793 | wetandmessy 1794 | wetbabes 1795 | wetfetish 1796 | wetontheoutside 1797 | wetspot 1798 | wettshirts 1799 | whatwouldyourateher 1800 | whenitgoesin 1801 | wheredidthesodagowild 1802 | whichonewouldyoupick 1803 | whiskey_bent 1804 | whooties 1805 | whywouldyoufuckthat 1806 | wifeporn 1807 | wifeshare 1808 | wifesharing 1809 | wincest 1810 | wincest 1811 | woahpoon 1812 | womeninuniform 1813 | womenlookingdown 1814 | womenofcolor 1815 | WomenOfColor 1816 | womenofcolorgifs 1817 | womenofcolorrisque 1818 | womenofcolorxxx 1819 | womenofcolour 1820 | womenorgasm 1821 | womenwearingshirts 1822 | woodnymphs 1823 | workgonewild 1824 | workoutgirls 1825 | workoutgonewild 1826 | worldclassporn 1827 | worldcupgirls 1828 | world_of_cum 1829 | worldstaruncut 1830 | wouldnotbang 1831 | wouldtotallyfuck 1832 | wouldyoufuckmywife 1833 | wowthissubexistsnsfw 1834 | wtf_porn_gifs 1835 | wtsstadamit 1836 | WtSSTaDaMiT 1837 | xart 1838 | xartbabes 1839 | xposing 1840 | xsmallgirls 1841 | xsome 1842 | xxx_animated_gifs 1843 | xxxcaptions 1844 | yaoi 1845 | yiff 1846 | yiffcomics 1847 | yiffgif 1848 | yogapants 1849 | youngmaleporn 1850 | youngmonroe 1851 | youngporn 1852 | youtubefakes 1853 | youtubetitties 1854 | yuri 1855 | zelda_romance -------------------------------------------------------------------------------- /templates/index_nsfw.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% set title = "{NSFW} Reddit reverse image search" %} 4 | {% set description = "Search NSFW subreddits by image. Easily find the source for some photos (and more) by finding the relevant posts on reddit." %} 5 | 6 | {% set tabs_style = "" %} 7 | {% set footer_style = "" %} 8 | {% set help_style = "deep-orange lighten-1" %} 9 | 10 | {% block header %} 11 | 12 |

    13 | 14 | { 15 | NSFW 16 | } 17 | 18 | reddit reverse image search 19 |

    20 |
    21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /templates/index_sfw.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% set title = "{SFW} Reddit reverse image search" %} 4 | {% set description = "Search SFW subreddits by image. Easily find the source for some photos (and more) by finding the relevant posts on reddit." %} 5 | 6 | {% set tabs_style = "sfw_tabs" %} 7 | {% set footer_style = "sfw_footer" %} 8 | {% set help_style = "pink" %} 9 | 10 | {% block header %} 11 | 12 |

    13 | 14 | { 15 | SFW 16 | } 17 | 18 | reddit reverse image search 19 |

    20 |
    21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ title }} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | {#Header#} 17 | {% block header %}{% endblock %} 18 | 19 |
    20 | 21 | {# Search bar #} 22 |
    23 |
    24 |
    25 |
    26 | 27 | 28 |
    29 |
    30 |
    31 |
    32 | 33 | {# Search results #} 34 |
    35 | 36 | {# Search results #} 37 |
    38 | 39 | {# menu #} 40 |
    41 |
    42 | 43 | 48 | 49 | {# Database stats #} 50 |
    51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 |
    images...
    video frames...
    comments...
    albums...
    posts...
    subreddits...
    79 |
    80 | 81 | {# Subreddits #} 82 |
    83 |
    84 |
    85 |
    86 | 87 | {# About #} 88 |
    89 |
    90 |
    Who?
    91 |
    Originally created by 4_pr0n, resurrected by simon987
    92 |
    93 |
    94 |
    What?
    95 |
    Reverse image & video reverse search
    96 |
    97 |
    98 |
    Why?
    99 |
    The greater good
    100 |
    101 |
    102 |
    How?
    103 |
    Image comparison, python and C. see the source code on 104 | Github
    105 |
    106 |
    107 |
    108 |
    109 |
    110 | 111 |
    112 |
    113 |
    114 | Suggestions or comments? Contact me by email here 115 | [pubkey] 116 |
    117 |
    118 | 121 |
    122 | 123 |
    124 | 125 | ? 126 | 127 |
    128 | 129 | 130 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /update_clean_url.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # alter table imageurls add clean_url TEXT; 4 | # drop index imageurls_url_index; 5 | # create index imageurls_clean_url_index on imageurls (clean_url); 6 | 7 | from DB import DB 8 | from common import DBFILE 9 | from util import clean_url 10 | 11 | db = DB(DBFILE) 12 | with db.get_conn() as conn: 13 | update_map = list() 14 | for row in conn.query("SELECT id, url FROM ir.public.imageurls"): 15 | update_map.append((row[0], clean_url(row[1]))) 16 | 17 | print("Updating %s imageurls" % len(update_map)) 18 | input("Continue?") 19 | 20 | with db.get_conn() as conn: 21 | for i, update in enumerate(update_map): 22 | conn.exec("UPDATE imageurls SET clean_url = %s WHERE id=%s", (update[1], update[0])) 23 | print("%08d/%08d" % (i, len(update_map))) 24 | 25 | -------------------------------------------------------------------------------- /update_hash.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # alter table images add hash2 bytea; 4 | 5 | from DB import DB 6 | from common import DBFILE 7 | 8 | 9 | db = DB(DBFILE) 10 | with db.get_conn() as conn: 11 | update_map = list() 12 | for row in conn.query("SELECT id, hash FROM videoframes"): 13 | update_map.append((row[0], bytes(row[1]))) 14 | 15 | 16 | print("Updating %s images" % len(update_map)) 17 | input("Continue?") 18 | cnt = len(update_map) 19 | 20 | with db.get_conn() as conn: 21 | for i, update in enumerate(update_map): 22 | conn.exec("UPDATE videoframes SET hash = %s WHERE id=%s", (update[1], update[0])) 23 | print("%08d/%08d" % (i, cnt)) 24 | -------------------------------------------------------------------------------- /upload.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | import binascii 5 | from flask import Blueprint, Response, request 6 | 7 | from DB import DB 8 | from common import DBFILE 9 | from common import logger 10 | from img_util import get_hash, image_from_buffer 11 | from search import MAX_DISTANCE, SearchResults 12 | 13 | upload_page = Blueprint('upload', __name__, template_folder='templates') 14 | db = DB(DBFILE) 15 | 16 | 17 | @upload_page.route("/upload", methods=["POST"]) 18 | def upload(): 19 | if "data" in request.form \ 20 | and "fname" in request.form \ 21 | and request.form["fname"] == "image" \ 22 | and "," in request.form["data"]: 23 | 24 | if "d" in request.form: 25 | try: 26 | distance = min(int(request.form["d"]), MAX_DISTANCE) 27 | except: 28 | distance = 0 29 | else: 30 | distance = 0 31 | logger.info("Paste upload with distance %d" % (distance, )) 32 | 33 | image_buffer = base64.b64decode(request.form["data"][request.form["data"].index(","):]) 34 | image = image_from_buffer(image_buffer) 35 | image_hash = get_hash(image) 36 | 37 | images = db.get_similar_images(image_hash, distance) 38 | if images: 39 | results = SearchResults(db.build_result_for_images(images), 40 | url="hash:" + binascii.hexlify(image_hash).decode('ascii') 41 | ) 42 | else: 43 | results = SearchResults([]) 44 | 45 | return Response(results.json(), mimetype="application/json") 46 | 47 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from common import logger 4 | import os 5 | 6 | LINK_RE = re.compile(r'\[.*\]\(([^)]+)\)') 7 | SUB_RE = re.compile(r"^(.*)/r/([\w+]+)" 8 | r"($|/|/about/(.*)|/wiki/(.*)|/(top|new|hot|rising|controvertial|gilded)/?\?.*|/comments/(.*))" 9 | r"($|\?.*$)") 10 | USER_RE = re.compile("^https?://(.*)/(u|user)/(\\w+)($|/)$") 11 | 12 | IMAGE_FILETYPES = ( 13 | # :orig for twitter cdn 14 | '.jpg', 15 | '.jpg:orig', 16 | '.jpeg', 17 | '.jpeg:orig', 18 | '.png', 19 | '.png:orig', 20 | '.gif', 21 | '.gif:orig', 22 | '.tiff', 23 | '.bmp', 24 | '.webp' 25 | ) 26 | 27 | # Reminder that .gifv is not a real file type, it should be passed to gallery-dl so it can pull the real 28 | # file url 29 | VIDEO_FILETYPES = ( 30 | ".webm", 31 | ".mp4", 32 | ) 33 | 34 | 35 | def load_list(filename): 36 | with open(filename) as f: 37 | return [x.strip().lower() for x in f.readlines() if x != "\n"] 38 | 39 | 40 | def should_parse_link(url): 41 | 42 | if "message/compose" in url: 43 | logger.debug('Skipping url %s: PM' % url) 44 | return False 45 | 46 | if "youtu.be" in url or "youtube.com" in url: 47 | logger.debug('Skipping url %s: Youtube' % url) 48 | return False 49 | 50 | if "reddit.com/search?q=" in url or "github.com" in url or\ 51 | "wikipedia.org" in url or "addons.mozilla.org" in url: 52 | logger.debug('Skipping url %s: Misc' % url) 53 | return False 54 | 55 | return True 56 | 57 | 58 | def get_links_from_body(body): 59 | """ Returns list of URLs found in body (e.g. selfpost or comment). """ 60 | result = set() 61 | 62 | body = body.replace("\\)", "(") 63 | for match in LINK_RE.finditer(body): 64 | url = match.group(1) 65 | if should_parse_link(url): 66 | result.add(url) 67 | 68 | return result 69 | 70 | 71 | def _is_ddl_image(url): 72 | 73 | if "i.reddituploads.com" in url: 74 | return True 75 | 76 | if '?' in url: 77 | url = url[:url.find('?')] 78 | return url.lower().endswith(IMAGE_FILETYPES) 79 | 80 | 81 | def is_video(url): 82 | 83 | if '?' in url: 84 | url = url[:url.find('?')] 85 | return url.lower().endswith(VIDEO_FILETYPES) 86 | 87 | 88 | def is_image_direct_link(url): 89 | return _is_ddl_image(url) 90 | 91 | 92 | def clean_url(url): 93 | """ Sanitizes URLs for DB input, strips excess chars """ 94 | url = url.replace('"', '%22')\ 95 | .replace("'", '%27')\ 96 | .replace("'", '%27')\ 97 | .replace('http://', '')\ 98 | .replace('https://', '') 99 | 100 | while url.endswith('/'): 101 | url = url[:-1] 102 | if '?' in url: 103 | url = url[:url.find('?')] 104 | if '#' in url: 105 | url = url[:url.find('#')] 106 | url = 'http://' + url 107 | return url 108 | 109 | 110 | def is_user_valid(username): 111 | """ Checks if username is valid reddit name, assumes lcase/strip """ 112 | allowed = 'abcdefghijklmnopqrstuvwxyz1234567890_-' 113 | valid = True 114 | for c in username.lower(): 115 | if c not in allowed: 116 | valid = False 117 | break 118 | return valid 119 | 120 | 121 | def thumb_path(thumb_id, folder="im"): 122 | digit1 = str(thumb_id)[0] 123 | digit2 = str(thumb_id)[1] if thumb_id >= 10 else "0" 124 | return os.path.join('static/thumbs/', folder, digit1, digit2) 125 | 126 | -------------------------------------------------------------------------------- /video_thumbs.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from flask import Blueprint, Response 4 | 5 | from DB import DB 6 | from common import DBFILE, cache 7 | 8 | db = DB(DBFILE) 9 | video_thumbs = Blueprint('video_thumbs', __name__, template_folder='templates') 10 | 11 | 12 | @video_thumbs.route("/video_thumbs/") 13 | @cache.cached(timeout=600) 14 | def thumbs(video_id): 15 | return Response(json.dumps({ 16 | 'thumbs': db.get_videoframes(video_id), 17 | }), mimetype='application/json') 18 | -------------------------------------------------------------------------------- /video_util.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import subprocess 4 | import tempfile 5 | import traceback 6 | from io import BytesIO 7 | from threading import Thread 8 | 9 | from PIL import Image 10 | 11 | from common import logger, TN_SIZE 12 | from img_util import get_hash, image_from_buffer 13 | 14 | CHUNK_LENGTH = 1024 * 24 15 | 16 | 17 | def feed_buffer_to_process(buffer, p): 18 | try: 19 | p.stdin.write(buffer) 20 | p.stdin.close() 21 | except: 22 | pass 23 | 24 | 25 | def info_from_video_buffer(video_buffer, ext, disk=False): 26 | 27 | if disk: 28 | logger.info("Temporarily saving to disk because I can't pipe mp4" 29 | " that has metadata at the end of the file to ffmpeg") 30 | tmp = tempfile.NamedTemporaryFile(delete=False) 31 | tmp.file.write(video_buffer) 32 | tmp.close() 33 | 34 | p = subprocess.Popen([ 35 | "ffmpeg", "-threads", "1", "-i", 36 | ("pipe:" + ext) if not disk else tmp.name, 37 | # Extract frame if is multiple of 6 OR is a keyframe 38 | "-vf", "select=not(mod(n\\,6))+eq(pict_type\\,I)", "-vsync", "0", 39 | "-f", "image2pipe", "-loglevel", "error", 40 | "pipe:jpg" 41 | ], 42 | stdin=subprocess.PIPE, 43 | stdout=subprocess.PIPE, 44 | stderr=subprocess.DEVNULL 45 | ) 46 | if not disk: 47 | # Write to stdin in a different thread to avoid deadlock 48 | feeding_thread = Thread(target=feed_buffer_to_process, args=(video_buffer, p)) 49 | feeding_thread.start() 50 | try: 51 | frames = dict() 52 | image_buffer = BytesIO() 53 | last_byte_was_marker_byte = False 54 | 55 | image_buffer.write(p.stdout.read(2)) # Skip first jpeg start magic number 56 | chunk = p.stdout.read(CHUNK_LENGTH) 57 | while chunk: 58 | last_image_offset = 0 59 | for offset, b in enumerate(chunk): 60 | if b == 0xFF: 61 | last_byte_was_marker_byte = True 62 | continue 63 | else: 64 | if last_byte_was_marker_byte and b == 0xD9: 65 | image_buffer.write(chunk[last_image_offset:offset + 3]) 66 | 67 | im = image_from_buffer(image_buffer.getvalue()) 68 | im.thumbnail((TN_SIZE, TN_SIZE), Image.ANTIALIAS) 69 | 70 | frames[get_hash(im)] = im 71 | 72 | image_buffer = BytesIO() 73 | last_image_offset = offset + 1 74 | last_byte_was_marker_byte = False 75 | 76 | image_buffer.write(chunk[last_image_offset:]) 77 | chunk = p.stdout.read(CHUNK_LENGTH) 78 | 79 | if not frames and not disk and ext == "mp4": 80 | return info_from_video_buffer(video_buffer, ext, True) 81 | 82 | # Get media info 83 | if disk: 84 | info = get_video_info_disk(tmp.name) 85 | else: 86 | info = get_video_info_buffer(video_buffer) 87 | 88 | return frames, info 89 | except Exception as e: 90 | logger.error(e) 91 | logger.error(traceback.format_exc()) 92 | finally: 93 | p.stdout.close() 94 | p.terminate() 95 | if disk: 96 | try_remove(tmp.name) 97 | 98 | 99 | def get_video_info_buffer(video_buffer): 100 | 101 | p = subprocess.Popen([ 102 | "ffprobe", "-v", "quiet", "-print_format", "json=c=1", "-show_format", "-show_streams", "pipe:" 103 | ], 104 | stdin=subprocess.PIPE, 105 | stdout=subprocess.PIPE, 106 | stderr=subprocess.DEVNULL 107 | ) 108 | 109 | feeding_thread = Thread(target=feed_buffer_to_process, args=(video_buffer, p)) 110 | feeding_thread.start() 111 | 112 | result = p.stdout.read() 113 | j = json.loads(result.decode()) 114 | return j 115 | 116 | 117 | def flatten_video_info(info): 118 | 119 | result = dict() 120 | 121 | for stream in info["streams"]: 122 | if stream["codec_type"] == "video": 123 | result["codec"] = stream["codec_name"] 124 | result["width"] = stream["width"] 125 | result["height"] = stream["height"] 126 | 127 | result["bitrate"] = int(stream["bit_rate"]) if "bit_rate" in stream else 0 128 | result["frames"] = int(stream["nb_frames"]) if "nb_frames" in stream else 0 129 | 130 | if "duration" in stream: 131 | result["duration"] = int(float(stream["duration"])) 132 | elif "duration" in info["format"]: 133 | result["duration"] = int(float(info["format"]["duration"])) 134 | else: 135 | result["duration"] = 0 136 | break 137 | 138 | result["format"] = info["format"]["format_long_name"] 139 | 140 | for k in ("width", "height", "bitrate", "frames", "duration"): 141 | if k in result: 142 | result[k] = min(result[k], 2147483647) 143 | return result 144 | 145 | 146 | def get_video_info_disk(filename): 147 | p = subprocess.Popen([ 148 | "ffprobe", "-v", "quiet", "-print_format", "json=c=1", "-show_format", "-show_streams", filename 149 | ], 150 | stdin=subprocess.PIPE, 151 | stdout=subprocess.PIPE, 152 | # stderr=subprocess.DEVNULL 153 | ) 154 | 155 | result = p.stdout.read() 156 | j = json.loads(result.decode()) 157 | return j 158 | 159 | 160 | def try_remove(name): 161 | try: 162 | os.remove(name) 163 | except: 164 | pass 165 | --------------------------------------------------------------------------------