├── subs_all.txt ├── subs_month.txt ├── l ├── .gitignore ├── favicon.ico ├── images ├── up.png ├── down.png ├── spinner_dark.gif └── spinner_light.gif ├── scrape.sh ├── .htaccess ├── subreddits.cgi ├── README.md ├── subs_week.txt ├── add_sub.cgi ├── status.cgi ├── subs.txt ├── ImageHash.py ├── ClientDB.py ├── DB.py ├── Httpy.py ├── index.html ├── light.css ├── dark.css ├── scan.py ├── search.cgi ├── search.js └── ReddiWrap.py /subs_all.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /subs_month.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /l: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | tail -f log.txt 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | reddit.db 2 | thumbs/ 3 | *.pyc 4 | -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4pr0n/irarchives/HEAD/favicon.ico -------------------------------------------------------------------------------- /images/up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4pr0n/irarchives/HEAD/images/up.png -------------------------------------------------------------------------------- /images/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4pr0n/irarchives/HEAD/images/down.png -------------------------------------------------------------------------------- /images/spinner_dark.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4pr0n/irarchives/HEAD/images/spinner_dark.gif -------------------------------------------------------------------------------- /images/spinner_light.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4pr0n/irarchives/HEAD/images/spinner_light.gif -------------------------------------------------------------------------------- /scrape.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Launches scraper script 'scan.py', stores all output in 'log.txt' 4 | # Runs script in background! 5 | python scan.py > log.txt 2>&1 & 6 | -------------------------------------------------------------------------------- /.htaccess: -------------------------------------------------------------------------------- 1 | AllowOverride Options 2 | Options +ExecCGI 3 | RedirectMatch 404 ^/thumbs/*$ 4 | RedirectMatch 404 ^/images/*$ 5 | RewriteEngine on 6 | RewriteRule \.(txt|db|py)$ - [R=404] 7 | -------------------------------------------------------------------------------- /subreddits.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import cgitb; cgitb.enable() # for debugging 4 | import cgi # for getting query keys/values 5 | 6 | from JSON import json 7 | 8 | def get_subs(): 9 | f = open('subs.txt', 'r') 10 | subs = f.read().lower().split('\n') 11 | f.close() 12 | while subs.count("") > 0: 13 | subs.remove("") 14 | print json.dumps({'subreddits': subs}) 15 | 16 | def get_keys(): 17 | """ Retrieves key/value pairs from query, puts in dict """ 18 | form = cgi.FieldStorage() 19 | keys = {} 20 | for key in form.keys(): 21 | keys[key] = form[key].value 22 | return keys 23 | 24 | def main(): 25 | keys = get_keys() 26 | if 'get' in keys: 27 | get_subs() 28 | return 29 | print json.dumps({'error': 'no valid keys given'}) 30 | 31 | if __name__ == '__main__': 32 | print "Content-Type: application/json" 33 | print "" 34 | main() 35 | print '\n\n' 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | irarchives 2 | ========== 3 | 4 | Summary 5 | ------- 6 | NSFW reverse image search for reddit 7 | 8 | Implementation 9 | -------------- 10 | http://i.rarchives.com (Warning: **NSFW**) 11 | 12 | Overview 13 | -------- 14 | Many NSFW reddit posts contain more information about an image. 15 | 16 | The repo contains: 17 | * a script to scrape images from reddit posts and store the data in a database. 18 | * a web interface for searching the database 19 | 20 | Requirements 21 | ------------ 22 | Tested with Python 2.6.x. Should work with 2.7 and possibly 2.5 (not compatible with 2.4 due to JSON limitations). 23 | 24 | The image calculations require Python Imaging Library, or [PIL](http://www.pythonware.com/products/pil/). 25 | 26 | Notes 27 | ----- 28 | There is no database included with the repo for obvious reasons. 29 | 30 | The database will have to be generated by the user or, if you ask nicely, you can have a copy of the current database which has over 1.5 million hashed images. 31 | -------------------------------------------------------------------------------- /subs_week.txt: -------------------------------------------------------------------------------- 1 | penis 2 | asiannsfw 3 | nsfwfunny 4 | girlsflashing 5 | highheels 6 | amateurarchives 7 | punkgirls 8 | femalesgonewild 9 | facialfun 10 | bdsmgw 11 | girlsdoingnerdythings 12 | ultimate_nsfw 13 | femalepov 14 | gaymersgonewild 15 | stockings 16 | girlswithneonhair 17 | cuckold 18 | tipofmypenis 19 | thinspo 20 | legs 21 | dykesgonewild 22 | clopclop 23 | vagina 24 | pregnant 25 | facesitting 26 | pegging 27 | bustybabes 28 | realasians 29 | a_cups 30 | lipsthatgrip 31 | onherknees 32 | tightdresses 33 | sexyfrex 34 | bustypetite 35 | blonde 36 | beachgirls 37 | pornid 38 | curls 39 | suicidegirls 40 | tinytits 41 | palegirls 42 | christiangirls 43 | collegeamateurs 44 | juicyasians 45 | amateurselfshot 46 | stacked 47 | asstastic 48 | happyembarrassedgirls 49 | twingirls 50 | bonermaterial 51 | happygaps 52 | incest 53 | girlsinschooluniforms 54 | groupofnudegirls 55 | victoriaraeblack 56 | doggystyle 57 | tittydrop 58 | festivalsluts 59 | asshole 60 | nsfwcosplay 61 | collared 62 | shinyporn 63 | boobbounce 64 | unrealgirls 65 | hardbodies 66 | photoplunder 67 | cellshots 68 | -------------------------------------------------------------------------------- /add_sub.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import cgitb; cgitb.enable() # for debugging 4 | import cgi # for getting query keys/values 5 | 6 | from JSON import json 7 | 8 | def get_subs(): 9 | f = open('subs.txt', 'r') 10 | subs = f.read().lower().split('\n') 11 | f.close() 12 | while subs.count("") > 0: 13 | subs.remove("") 14 | return subs 15 | 16 | def save_subs(subs): 17 | f = open('subs.txt', 'w') 18 | for sub in subs: 19 | f.write(sub + '\n') 20 | f.close() 21 | 22 | def get_keys(): 23 | """ Retrieves key/value pairs from query, puts in dict """ 24 | form = cgi.FieldStorage() 25 | keys = {} 26 | for key in form.keys(): 27 | keys[key] = form[key].value 28 | return keys 29 | 30 | def main(): 31 | keys = get_keys() 32 | if not 'subreddit' in keys: 33 | err = {} 34 | err['result'] = "no subreddit given" 35 | print json.dumps(err) 36 | return 37 | sub = keys['subreddit'].lower() 38 | while ' ' in sub: sub.replace(' ', '') 39 | 40 | subs = get_subs() 41 | if sub in subs: 42 | err = {} 43 | err['result'] = "subreddit already exists" 44 | print json.dumps(err) 45 | return 46 | 47 | subs.append(sub) 48 | save_subs(subs) 49 | 50 | j = {} 51 | j['result'] = "subreddit added" 52 | print json.dumps(j) 53 | 54 | if __name__ == '__main__': 55 | print "Content-Type: application/json" 56 | print "" 57 | main() 58 | print '\n\n' 59 | 60 | -------------------------------------------------------------------------------- /status.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | 4 | ############### 5 | # IMPORTS 6 | 7 | import cgitb; cgitb.enable() # for debugging 8 | #import cgi # for getting query keys/values 9 | 10 | from ClientDB import DB 11 | db = DB('reddit.db') 12 | 13 | import_failed = True 14 | try: 15 | import json 16 | import_failed = False 17 | except ImportError: pass 18 | 19 | if import_failed: 20 | # Older versions of python don't work with 'json', so we use simplejson 21 | try: 22 | import simplejson as json 23 | import_failed = False 24 | except ImportError: pass 25 | 26 | if import_failed: 27 | # If simplejson isn't found, we must be on Python 2.5 or below 28 | try: 29 | from JSON import json 30 | except ImportError: 31 | print '\un Unable to load JSON library! exiting' 32 | exit(1) 33 | 34 | 35 | ###################### 36 | # METHODS 37 | 38 | def start(): 39 | print to_json() 40 | 41 | def get_count(table): 42 | return db.select("count(*)", table)[0][0] 43 | 44 | def count_subs_db(): 45 | return db.select("count(distinct subreddit)", "Posts")[0][0] 46 | 47 | def count_subs_txt(): 48 | f = open('subs.txt', 'r') 49 | subs = f.read().split('\n') 50 | f.close() 51 | while subs.count("") > 0: 52 | subs.remove("") 53 | return len(subs) 54 | 55 | def to_json(): 56 | dict = { 57 | 'status' : { 58 | 'posts' : get_count('Posts'), 59 | 'comments' : get_count('Comments'), 60 | 'albums' : get_count('Albums'), 61 | 'images' : get_count('Images'), 62 | 'subreddits' : count_subs_txt(), 63 | 'subreddits_pending' : count_subs_txt() 64 | }, 65 | } 66 | return json.dumps(dict) 67 | 68 | if __name__ == '__main__': 69 | print "Content-Type: application/json" 70 | print "" 71 | start() 72 | print '\n\n' 73 | 74 | -------------------------------------------------------------------------------- /subs.txt: -------------------------------------------------------------------------------- 1 | gonewild 2 | nsfw 3 | realgirls 4 | boobies 5 | nsfw_gif 6 | legalteens 7 | ass 8 | amateur 9 | ginger 10 | girlsinyogapants 11 | milf 12 | onoff 13 | nsfw_gifs 14 | randomsexiness 15 | treesgonewild 16 | cumsluts 17 | gonewildplus 18 | dirtysmall 19 | girlswithglasses 20 | nsfw_nospam 21 | hotchickswithtattoos 22 | voluptuous 23 | redheads 24 | thick 25 | curvy 26 | passionx 27 | asianhotties 28 | blowjobs 29 | o_faces 30 | hugeboobs 31 | upskirt 32 | nsfw2 33 | nsfwoutfits 34 | gonewildstories 35 | scenegirls 36 | collegesluts 37 | camwhores 38 | rule34 39 | unashamed 40 | nsfw_wtf 41 | ladybonersgw 42 | pornography 43 | bondage 44 | nsfwhardcore 45 | bdsmcommunity 46 | hentai 47 | wtsstadamit 48 | sexybutnotporn 49 | boltedontits 50 | grool 51 | metart 52 | porn 53 | bottomless_vixens 54 | girlsfinishingthejob 55 | notsafefornature 56 | bdsm 57 | pussy 58 | tightshorts 59 | nude 60 | datgap 61 | nsfw_wallpapers 62 | girlskissing 63 | lesbians 64 | womenofcolor 65 | homemadexxx 66 | dirtygaming 67 | photobucketplunder 68 | nsfwvideos 69 | indianbabes 70 | facedownassup 71 | gonewildcurvy 72 | beach 73 | girlsinstripedsocks 74 | nipples 75 | cumfetish 76 | creampies 77 | downblouse 78 | tgirls 79 | burstingout 80 | dyke 81 | penis 82 | asiannsfw 83 | nsfwfunny 84 | highheels 85 | amateurarchives 86 | punkgirls 87 | femalesgonewild 88 | facialfun 89 | bdsmgw 90 | girlsdoingnerdythings 91 | ultimate_nsfw 92 | femalepov 93 | gaymersgonewild 94 | stockings 95 | girlswithneonhair 96 | cuckold 97 | tipofmypenis 98 | thinspo 99 | legs 100 | dykesgonewild 101 | clopclop 102 | vagina 103 | pregnant 104 | facesitting 105 | pegging 106 | bustybabes 107 | realasians 108 | a_cups 109 | lipsthatgrip 110 | onherknees 111 | tightdresses 112 | sexyfrex 113 | bustypetite 114 | blonde 115 | beachgirls 116 | pornid 117 | curls 118 | suicidegirls 119 | tinytits 120 | palegirls 121 | christiangirls 122 | collegeamateurs 123 | juicyasians 124 | stacked 125 | asstastic 126 | happyembarrassedgirls 127 | twingirls 128 | bonermaterial 129 | happygaps 130 | incest 131 | girlsinschooluniforms 132 | groupofnudegirls 133 | victoriaraeblack 134 | doggystyle 135 | tittydrop 136 | festivalsluts 137 | asshole 138 | nsfwcosplay 139 | collared 140 | shinyporn 141 | boobbounce 142 | unrealgirls 143 | hardbodies 144 | photoplunder 145 | cellshots 146 | hannahilton 147 | ariagiovanni 148 | page3glamour 149 | theunderboob 150 | theunderbun 151 | pokies 152 | gonewildcolor 153 | athleticgirls 154 | girlsontheirbacks 155 | bikinis 156 | sexygirlsinboots 157 | wet 158 | thehangingboobs 159 | photoedited 160 | -------------------------------------------------------------------------------- /ImageHash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # From http://sprunge.us/WcVJ?py 4 | # All credit goes to original author 5 | 6 | from os import path, mkdir, sep, remove 7 | from sys import exit, argv 8 | from Httpy import Httpy 9 | 10 | from PIL import Image 11 | 12 | def avhash(im): 13 | """ 14 | Shrinks image to 16x16 pixels, 15 | Finds average amongst the pixels, 16 | Iterates over every pixel, comparing to average. 17 | 1 if above avg, 0 if below. 18 | Returns resulting integer. (hash of the image 'im') 19 | Updated to not use ternary operator (not available in python 2.4.x) 20 | """ 21 | if not isinstance(im, Image.Image): 22 | im = Image.open(im) 23 | im = im.convert('L').resize((16, 16), Image.ANTIALIAS) 24 | ttl = 0 25 | for gd in im.getdata(): ttl += gd 26 | avg = ttl / 256 27 | result = 0 28 | for i, gd in enumerate(im.getdata()): 29 | if gd > avg: 30 | result += (1 << i) 31 | del im 32 | return result 33 | 34 | def avhash_dict(im): 35 | """ 36 | Generate hashes for the image, including variations of the image 37 | * Regular image 38 | * Mirrored (left-right) 39 | * Rotated left (90deg) 40 | * Rotated right (270deg) 41 | """ 42 | if not isinstance(im, Image.Image): 43 | im = Image.open(im) 44 | im = im.resize((16, 16), Image.ANTIALIAS).convert('L') 45 | ttl = 0 46 | for gd in im.getdata(): ttl += gd 47 | avg = ttl / 256 48 | result = {} 49 | 50 | # Regular hash 51 | regular_hash = 0 52 | for i, gd in enumerate(im.getdata()): 53 | if gd > avg: 54 | regular_hash += (1 << i) 55 | result['hash'] = regular_hash 56 | 57 | # Mirror hash 58 | mirror_im = im.transpose(Image.FLIP_LEFT_RIGHT) 59 | mirror_hash = 0 60 | for i, gd in enumerate(mirror_im.getdata()): 61 | if gd > avg: 62 | mirror_hash += (1 << i) 63 | result['mirror'] = mirror_hash 64 | 65 | # Rotated 90deg hash 66 | left_im = im.transpose(Image.ROTATE_90) 67 | left_hash = 0 68 | for i, gd in enumerate(left_im.getdata()): 69 | if gd > avg: 70 | left_hash += (1 << i) 71 | result['left'] = left_hash 72 | 73 | # Rotated 270deg hash 74 | right_im = im.transpose(Image.ROTATE_270) 75 | right_hash = 0 76 | for i, gd in enumerate(right_im.getdata()): 77 | if gd > avg: 78 | right_hash += (1 << i) 79 | result['right'] = right_hash 80 | del im 81 | return result 82 | 83 | def dimensions(im): 84 | """ Returns tuple (Width, Height) for given image. """ 85 | if not isinstance(im, Image.Image): 86 | im = Image.open(im) 87 | result = im.size 88 | del im 89 | return result 90 | 91 | def create_thumb(im, num): 92 | """ 93 | Creates a thumbnail for a given image file. 94 | Saves to 'thumbs' directory, named .jpg 95 | """ 96 | try: mkdir('thumbs') 97 | except OSError: pass 98 | 99 | if not isinstance(im, Image.Image): 100 | im = Image.open(im) 101 | # Convert to RGB if not already 102 | if im.mode != "RGB": im = im.convert("RGB") 103 | im.thumbnail( (100, 100), Image.ANTIALIAS) 104 | im.save('thumbs%s%d.jpg' % (sep, num), 'JPEG') 105 | del im 106 | 107 | 108 | if __name__ == '__main__': 109 | args = argv[1:] 110 | if len(args) == 0: 111 | print 'argument required: image file location' 112 | exit(1) 113 | filename = ' '.join(args) 114 | remove_file = False 115 | if not path.exists(filename): 116 | if '://' in filename: 117 | web = Httpy() 118 | web.download(filename, 'img.jpg') 119 | filename = 'img.jpg' 120 | remove_file = True 121 | else: 122 | print 'file not found: %s' % (filename) 123 | exit(1) 124 | 125 | print 'Hash:\t\t%d' % avhash(filename) 126 | 127 | print '' 128 | d = avhash_dict(filename) 129 | for key in d: 130 | print 'Hash[%s] = \t%d' % (key, d[key]) 131 | print '' 132 | 133 | dim = dimensions(filename) 134 | print 'Dimensions:\t%dx%d' % (dim[0], dim[1]) 135 | 136 | #create_thumb(filename, 1) 137 | if remove_file: 138 | remove(filename) 139 | -------------------------------------------------------------------------------- /ClientDB.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | try: 5 | import sqlite3 6 | except ImportError: 7 | import sqlite as sqlite3 8 | 9 | from time import sleep 10 | 11 | class DB: 12 | """ 13 | Database object. 14 | Used for communicating with the SQLite database. 15 | """ 16 | 17 | def __init__(self, db_file): 18 | """ 19 | Initializes database. 20 | Attempts to creates tables with schemas if needed. 21 | * db_file - Name of the database file. 22 | 23 | For example: 24 | db = DB('file.db') 25 | """ 26 | self.conn = None 27 | 28 | self.conn = sqlite3.connect(db_file) # TODO CHANGE BACK, encoding='utf-8') 29 | self.conn.text_factory = lambda x: unicode(x, "utf-8", "ignore") 30 | 31 | def get_cursor(self): 32 | return self.conn.cursor() 33 | 34 | def count(self, table, where): 35 | """ 36 | Counts the number of tuples in 'table' where the 'where' condition holds 37 | * table - The table name, a string 38 | * where - A condition, such as "year == 1999" 39 | 40 | Returns # of tuples found in query. 41 | """ 42 | cur = self.conn.cursor() 43 | result = cur.execute('''select count(*) from %s where %s''' % (table, where, )).fetchall() 44 | #self.conn.commit() 45 | cur.close() 46 | return result[0][0] 47 | 48 | 49 | def select(self, what, table, where=''): 50 | """ 51 | Executes a SQL SELECT command. Returns tuples 52 | Type the entire SELECT statement. 53 | For example: 54 | 55 | db = DB('file.db', {'table_name': 'id int primary key'} ) 56 | tuples = db.select('''SELECT * FROM table WHERE id > 0''') 57 | for result in tuples: 58 | print result[0] # prints first attribute 59 | print result[1] # prints second attribute 60 | ... 61 | """ 62 | cur = self.conn.cursor() 63 | query_string = '''SELECT %s FROM %s''' % (what, table) 64 | if where != '': 65 | query_string += ''' WHERE %s''' % (where) 66 | # Great for debugging; print every sql query 67 | #print query_string 68 | 69 | try_again = True 70 | while try_again: 71 | try: 72 | cur.execute(query_string) 73 | try_again = False 74 | except Exception: 75 | sleep(0.1) 76 | 77 | results = [] 78 | for result in cur: 79 | results.append(result) 80 | cur.close() 81 | return results 82 | 83 | 84 | def insert(self, table, values): 85 | """ 86 | Inserts tuple of values into database. 87 | * table - The table name, a string 88 | * values - The tuple to insert into the database. 89 | Returns row id of tuple inserted, or -1 if error occurred. 90 | """ 91 | cur = self.conn.cursor() 92 | try: 93 | questions = '' 94 | for i in xrange(0, len(values)): 95 | if questions != '': questions += ',' 96 | questions += '%s' 97 | exec_string = '''insert into %s values (%s)''' % (table, questions) 98 | result = cur.execute(exec_string, values) 99 | last_row_id = cur.lastrowid 100 | cur.close() 101 | return last_row_id 102 | except sqlite3.IntegrityError: 103 | cur.close() 104 | return -1 105 | 106 | 107 | def commit(self): 108 | """ 109 | Commits any changes to the database. 110 | CHANGES WILL NOT HAPPEN UNLEsS THIS COMMAND IS EXECUTED AFTERWARD! 111 | """ 112 | try_again = True 113 | while try_again: 114 | try: 115 | self.conn.commit() 116 | try_again = False 117 | except Exception: 118 | sleep(0.1) 119 | 120 | 121 | def execute(self, statement, values=None): 122 | """ 123 | Executes a statement. Similar to the 'select' method, but does not return anything. 124 | """ 125 | cur = self.conn.cursor() 126 | try_again = True 127 | while try_again: 128 | try: 129 | if values == None: 130 | result = cur.execute(statement) 131 | else: 132 | result = cur.execute(statement, values) 133 | try_again = False 134 | except: 135 | sleep(0.1) 136 | return result 137 | 138 | -------------------------------------------------------------------------------- /DB.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | try: 5 | import sqlite3 6 | except ImportError: 7 | import sqlite as sqlite3 8 | 9 | from time import sleep 10 | 11 | class DB: 12 | """ 13 | Database object. 14 | Used for communicating with the SQLite database. 15 | """ 16 | 17 | def __init__(self, db_file, **schemas): 18 | """ 19 | Initializes database. 20 | Attempts to creates tables with schemas if needed. 21 | * db_file - Name of the database file. 22 | * schemas - A python dictionary where: 23 | KEY is the table name, 24 | VALUE is that table's schema. 25 | 26 | For example: 27 | db = DB('file.db', { 28 | 'Customer': 'name text, phone int, address text', 29 | 'Order': 'id int primary key, customer_name text, cost real'}) 30 | # This would open the 'file.db' file and create two tables with the respective schemas. 31 | If the tables already exist, the existing tables remain unaltered. 32 | """ 33 | self.conn = None 34 | 35 | self.conn = sqlite3.connect(db_file) #TODO CHANGE BACK, encoding='utf-8') 36 | self.conn.text_factory = lambda x: unicode(x, "utf-8", "ignore") 37 | 38 | # Don't create tables if not supplied. 39 | if schemas != None and schemas != {} and len(schemas) > 0: 40 | 41 | # Create table for every schema given. 42 | for key in schemas: 43 | self.create_table(key, schemas[key]) 44 | 45 | def create_table(self, table_name, schema): 46 | """ 47 | Creates new table with schema 48 | """ 49 | cur = self.conn.cursor() 50 | try: 51 | cur.execute('''CREATE TABLE IF NOT EXISTS %s (%s)''' % (table_name, schema) ) 52 | self.conn.commit() 53 | except sqlite3.OperationalError, e: 54 | # Ignore if table already exists, otherwise print error 55 | if str(e).find('already exists') == -1: 56 | print ' ***', e 57 | cur.close() 58 | 59 | 60 | 61 | def commit(self): 62 | """ 63 | Commits any changes to the database. 64 | CHANGES WILL NOT HAPPEN UNLEsS THIS COMMAND IS EXECUTED AFTERWARD! 65 | """ 66 | try_again = True 67 | while try_again: 68 | try: 69 | self.conn.commit() 70 | try_again = False 71 | except: 72 | sleep(1) 73 | 74 | 75 | def insert(self, table, values): 76 | """ 77 | Inserts tuple of values into database. 78 | * table - The table name, a string 79 | * values - The tuple to insert into the database. 80 | 81 | Returns row id of tuple inserted, or -1 if error occurred. 82 | """ 83 | cur = self.conn.cursor() 84 | try: 85 | questions = '' 86 | for i in xrange(0, len(values)): 87 | if questions != '': questions += ',' 88 | questions += '?' 89 | exec_string = '''insert into %s values (%s)''' % (table, questions) 90 | result = cur.execute(exec_string, values) 91 | #self.conn.commit() 92 | last_row_id = cur.lastrowid 93 | cur.close() 94 | return last_row_id 95 | except sqlite3.IntegrityError: 96 | cur.close() 97 | return -1 98 | 99 | 100 | def get_cursor(self): 101 | return self.conn.cursor() 102 | 103 | def count(self, table, where): 104 | """ 105 | Counts the number of tuples in 'table' where the 'where' condition holds 106 | * table - The table name, a string 107 | * where - A condition, such as "year == 1999" 108 | 109 | Returns # of tuples found in query. 110 | """ 111 | cur = self.conn.cursor() 112 | result = cur.execute('''select count(*) from %s where %s''' % (table, where, )).fetchall() 113 | #self.conn.commit() 114 | cur.close() 115 | return result[0][0] 116 | 117 | 118 | def select(self, what, table, where=''): 119 | """ 120 | Executes a SQL SELECT command. Returns tuples 121 | Type the entire SELECT statement. 122 | For example: 123 | 124 | db = DB('file.db', {'table_name': 'id int primary key'} ) 125 | tuples = db.select('''SELECT * FROM table WHERE id > 0''') 126 | for result in tuples: 127 | print result[0] # prints first attribute 128 | print result[1] # prints second attribute 129 | ... 130 | """ 131 | cur = self.conn.cursor() 132 | query_string = '''SELECT %s FROM %s''' % (what, table) 133 | if where != '': 134 | query_string += ''' WHERE %s''' % (where) 135 | cur.execute(query_string) 136 | results = [] 137 | for result in cur: 138 | results.append(result) 139 | #self.conn.commit() 140 | cur.close() 141 | return results 142 | 143 | 144 | def execute(self, statement): 145 | """ 146 | Executes a statement. Similar to the 'select' method, but does not return anything. 147 | """ 148 | cur = self.conn.cursor() 149 | result = cur.execute(statement) 150 | #self.conn.commit() 151 | return result 152 | 153 | -------------------------------------------------------------------------------- /Httpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from cookielib import LWPCookieJar as CookieJar 4 | from urllib2 import build_opener, HTTPCookieProcessor, Request 5 | from urllib import urlencode 6 | 7 | DEFAULT_USERAGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:19.0) Gecko/20100101 Firefox/19.0' 8 | DEFAULT_TIMEOUT = 15 9 | 10 | class Httpy: 11 | """ 12 | Easily perform GET and POST requests with web servers. 13 | Keeps cookies to retain web sessions. 14 | Includes helpful methods that go beyond GET and POST: 15 | * get_meta - retrieves meta info about a URL 16 | * unshorten - returns (some) redirected URLs 17 | """ 18 | 19 | def __init__(self, user_agent=DEFAULT_USERAGENT, timeout=DEFAULT_TIMEOUT): 20 | self.cj = CookieJar() 21 | self.opener = build_opener(HTTPCookieProcessor(self.cj)) 22 | self.urlopen = self.opener.open 23 | self.user_agent = user_agent 24 | self.timeout = timeout 25 | 26 | def get(self, url, timeout=DEFAULT_TIMEOUT, raise_exception=False): 27 | """ GET request """ 28 | result = '' 29 | headers = self.get_headers() 30 | try: 31 | req = Request(url, headers=headers) 32 | handle = self.urlopen(req, timeout=timeout) 33 | result = handle.read() 34 | except Exception, e: 35 | if raise_exception: 36 | raise e 37 | return result 38 | 39 | def post(self, url, postdata=None, timeout=DEFAULT_TIMEOUT, raise_exception=False): 40 | """ 41 | Submits a POST request to URL. Posts 'postdata' if 42 | not None. URL-encodes postdata and strips Unicode chars. 43 | """ 44 | result = '' 45 | headers = self.get_headers() 46 | if postdata == None: 47 | encoded_data = '' 48 | else: 49 | encoded_data = urlencode(postdata) 50 | try: 51 | req = Request(url, encoded_data, headers) 52 | handle = self.urlopen(req, timeout=timeout) 53 | result = handle.read() 54 | except Exception, e: 55 | if raise_exception: raise e 56 | return result 57 | 58 | def download(self, url, save_as, timeout=DEFAULT_TIMEOUT, raise_exception=False): 59 | """ Downloads file from URL to save_as path. """ 60 | result = False 61 | headers = self.get_headers() 62 | outfile = open(save_as, 'w') 63 | try: 64 | req = Request(url, headers=headers) 65 | handle = self.urlopen(req, timeout=timeout) 66 | while True: 67 | buf = handle.read(65536) 68 | if len(buf) == 0: break 69 | outfile.write(buf) 70 | result = True 71 | except Exception, e: 72 | if raise_exception: raise e 73 | outfile.close() 74 | return result 75 | 76 | def check_url(self, url): 77 | """ Returns True if URL is valid and can be opened. """ 78 | try: 79 | req = Request(url) 80 | self.urlopen(url) 81 | except Exception: 82 | return False 83 | return True 84 | 85 | def get_meta(self, url, raise_exception=False, timeout=DEFAULT_TIMEOUT): 86 | """ 87 | Returns a dict containing info about the URL. 88 | Such as Content-Type, Content-Length, etc. 89 | """ 90 | try: 91 | headers = self.get_headers() 92 | req = Request(url, headers=headers) 93 | handle = self.urlopen(req, timeout=timeout) 94 | return handle.info() 95 | except Exception, e: 96 | if raise_exception: raise e 97 | return {} 98 | 99 | def unshorten(self, url, timeout=DEFAULT_TIMEOUT): 100 | """ 101 | Attempts to resolve redirected URL. 102 | Returns new resolved URL if found, 103 | otherwise returns original URL. 104 | """ 105 | try: 106 | headers = self.get_headers() 107 | req = Request(url, headers=headers) 108 | handle = urlopen(req, timeout=timeout) 109 | return handle.url 110 | except Exception: 111 | return url 112 | 113 | # SETTERS 114 | def clear_cookies(self): 115 | self.cj.clear() 116 | def set_user_agent(self, user_agent): 117 | self.user_agent = user_agent 118 | 119 | # HELPER METHODS 120 | def get_headers(self): 121 | """ Returns default headers for URL requests """ 122 | return {'User-agent' : self.user_agent} 123 | 124 | def between(self, source, start, finish): 125 | """ 126 | Useful when parsing responses from web servers. 127 | 128 | Looks through a given source string for all items between two other strings, 129 | returns the list of items (or empty list if none are found). 130 | 131 | Example: 132 | test = 'hello >30< test >20< asdf >>10<< sadf>' 133 | print between(test, '>', '<') 134 | 135 | would print the list: 136 | ['30', '20', '>10'] 137 | """ 138 | result = [] 139 | i = source.find(start) 140 | j = source.find(finish, i + len(start) + 1) 141 | 142 | while i >= 0 and j >= 0: 143 | i = i + len(start) 144 | result.append(source[i:j]) 145 | i = source.find(start, j + len(finish)) 146 | j = source.find(finish, i + len(start) + 1) 147 | 148 | return result 149 | 150 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | {NSFW} reddit reverse image search 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 |
20 | 38 |
39 | 45 |
46 | 47 | 49 | 199 | 200 |
48 |
50 | 51 |
52 |
53 | 54 | { 55 | NSFW 56 | } 57 | 58 | 59 | reddit reverse image search 60 |
61 |
62 |
63 | 64 | 65 | 66 | 67 | 89 | 90 | 91 | 92 | 106 | 107 | 108 | 111 | 112 | 113 | 114 |


115 |
116 | 117 | 118 | 119 | 133 | 134 | 135 | 136 | 143 | 144 | 180 | 181 | 182 | 183 | 184 | 185 | 195 | 196 |
186 | 192 | 193 | 194 |
197 | 198 |
201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /light.css: -------------------------------------------------------------------------------- 1 | div.top_bar { 2 | font-size: 0.8em; 3 | padding: 0px; 4 | border-bottom: solid 2px #f00; 5 | position: fixed; 6 | display: block; 7 | top: 0; 8 | left: 0; 9 | width: 100%; 10 | z-index: 5; 11 | } 12 | 13 | div.div_bar { 14 | display: none; 15 | font-size: 0.8em; 16 | color: #222; 17 | background-color: #fff; 18 | padding: 2px; 19 | padding-left: 10px; 20 | z-index: 5; 21 | } 22 | 23 | div.div_bar_warning { 24 | font-size: 1.2em; 25 | color: #f00; 26 | background-color: #fff; 27 | text-align: center; 28 | padding: 3px; 29 | z-index: 5; 30 | } 31 | 32 | a.bookmarklet { 33 | text-decoration: none; 34 | font-weight: bold; 35 | color: #222; 36 | background-color: #fff; 37 | border: solid 1px #222; 38 | border-radius: 5px; 39 | padding: 3px; 40 | padding-top: 1px; 41 | padding-bottom: 0px; 42 | } 43 | a.bookmarklet:hover { 44 | background-color: #f00; 45 | color: #fff; 46 | } 47 | 48 | a.theme { 49 | color: #222; 50 | font-weight: bold; 51 | } 52 | a.theme:hover { 53 | color: #000; 54 | text-decoration: underline; 55 | } 56 | 57 | table.page { 58 | max-width: 900px; 59 | padding: 0px; 60 | padding-left: 20px; 61 | padding-right: 20px; 62 | } 63 | 64 | body { 65 | padding: 0px; 66 | background-color: #fefefe; 67 | color: #222; 68 | font-family: Helvetica, Arial, Verdana; /*Consolas, Verdana, monospace;*/ 69 | font-size: 1.2em; 70 | } 71 | 72 | a { 73 | text-decoration: none; 74 | color: #00f; 75 | } 76 | a:visited { color: #606; } 77 | a:hover { color: #505; } 78 | 79 | a.no_color { 80 | text-decoration: underline; 81 | color: #fff; 82 | } 83 | a.no_color:hover { color: #ddd; } 84 | a.no_color:visited { color: #ddd; } 85 | 86 | a.nsfw { 87 | font-size: 2.5em; 88 | letter-spacing: -0.02em; 89 | font-weight: bold; 90 | } 91 | .nsfw_red { 92 | color: #b00; 93 | letter-spacing: -0.05em; 94 | padding: 0px; 95 | text-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 96 | 0px 4px 10px rgba(200, 0, 0, 0.3), 97 | -4px 0px 10px rgba(200, 0, 0, 0.3), 98 | 0px -4px 10px rgba(200, 0, 0, 0.3); 99 | } 100 | a.nsfw:hover .nsfw_red { 101 | color: #f00; 102 | text-shadow: 4px 0px 10px rgba(255, 0, 0, 0.3), 103 | 0px 4px 10px rgba(255, 0, 0, 0.3), 104 | -4px 0px 10px rgba(255, 0, 0, 0.3), 105 | 0px -4px 10px rgba(255, 0, 0, 0.3); 106 | } 107 | .nsfw_bracket { 108 | color: #b00; 109 | font-size: 1.2em; 110 | letter-spacing: -0.15em; 111 | position: relative; 112 | text-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 113 | 0px 4px 10px rgba(200, 0, 0, 0.3), 114 | -4px 0px 10px rgba(200, 0, 0, 0.3), 115 | 0px -4px 10px rgba(200, 0, 0, 0.3); 116 | } 117 | a.nsfw:hover .nsfw_bracket { 118 | color: #f00; 119 | letter-spacing: -0.15em; 120 | text-shadow: 4px 0px 10px rgba(255, 0, 0, 0.3), 121 | 0px 4px 10px rgba(255, 0, 0, 0.3), 122 | -4px 0px 10px rgba(255, 0, 0, 0.3), 123 | 0px -4px 10px rgba(255, 0, 0, 0.3); 124 | } 125 | #nsfw_bracket_left { 126 | left: 5px; 127 | } 128 | #nsfw_bracket_right { 129 | left: -7px; 130 | } 131 | .nsfw_white { 132 | color: #666; 133 | position: relative; 134 | top: -2px; 135 | padding: 0px; 136 | letter-spacing: -0.05em; 137 | } 138 | a.nsfw:hover .nsfw_white { 139 | color: #999; 140 | } 141 | 142 | table { 143 | border-width: 0px; 144 | padding: 10px; 145 | border-spacing: 10px; 146 | border-style: outset; 147 | } 148 | 149 | table.invisible { 150 | border-width: 0px; 151 | padding: 0px; 152 | border-spacing: 0px; 153 | } 154 | 155 | table.search { 156 | padding: 0px; 157 | padding-left: 25px; 158 | } 159 | 160 | td { 161 | padding: 0px; 162 | } 163 | 164 | td.search_url { 165 | font-size: 1.5em; 166 | cursor: default; 167 | text-align: right; 168 | vertical-align: bottom; 169 | padding-bottom: 2px; 170 | } 171 | 172 | input.search_text { 173 | font-size: 1.2em; 174 | background-color: #fff; 175 | color: #222; 176 | cursor: default; 177 | border: solid 1px #222; 178 | border-radius: 5px; 179 | width: 100%; 180 | padding: 4px; 181 | letter-spacing: -0.05em; 182 | -webkit-box-sizing: border-box; 183 | -moz-box-sizing: border-box; 184 | } 185 | input.search_text:focus { 186 | color: #222; 187 | background-color: #eee; 188 | } 189 | input.search_text:hover { 190 | color: #222; 191 | background-color: #eee; 192 | } 193 | 194 | input.search_button { 195 | font-size: 1.5em; 196 | font-weight: bold; 197 | background-color: #b00; 198 | color: #fff; 199 | border: solid 1px #222; 200 | border-radius: 5px; 201 | } 202 | input.search_button:hover { 203 | background-color: #f00; 204 | } 205 | 206 | td.search_status { 207 | padding-top: 15px; 208 | padding-bottom: 15px; 209 | padding-left: 0px; 210 | text-align: right; 211 | } 212 | 213 | td.search_status_result { 214 | padding-top: 15px; 215 | padding-bottom: 0px; 216 | padding-left: 10px; 217 | padding-right: 10px; 218 | text-align: center; 219 | } 220 | 221 | .search_count { 222 | color: #222; 223 | font-weight: bold; 224 | font-size: 1.5em; 225 | } 226 | 227 | .search_count_empty { 228 | color: #222; 229 | font-weight: bold; 230 | font-size: 1.5em; 231 | } 232 | 233 | .search_count_subtext { 234 | color: #222; 235 | font-weight: bold; 236 | font-size: 1.2em; 237 | } 238 | 239 | .search_result_title { 240 | color: #222; 241 | font-weight: bold; 242 | font-size: 1.5em; 243 | text-align: center; 244 | border-width: 0px; 245 | border: none; 246 | } 247 | 248 | img.result_thumbnail { 249 | padding: 0px; 250 | padding-left: 5px; 251 | display: block; 252 | } 253 | 254 | td.result_arrow { 255 | display: block; 256 | font-size: 1.5em; 257 | padding: 0px; 258 | padding-right: 5px; 259 | padding-left: 5px; 260 | text-align: center; 261 | } 262 | 263 | td.result_score { 264 | display: block; 265 | padding: 0px; 266 | padding-left: 5px; 267 | padding-right: 5px; 268 | text-align: center; 269 | } 270 | 271 | span.result_score { 272 | padding: 0px; 273 | font-size: 1.2em; 274 | font-weight: bold; 275 | vertical-align: middle; 276 | color: #ee3400; 277 | } 278 | 279 | img.vote { 280 | width: 20px; 281 | height: 18px; 282 | display: inline-block; 283 | } 284 | 285 | img.result_thumbnail { 286 | 287 | } 288 | 289 | a.result_link { 290 | font-size: 1.5em; 291 | font-weight: none; 292 | } 293 | 294 | span.post_domain { 295 | color: #666; 296 | } 297 | a.post_author { 298 | color: #20b; 299 | } 300 | span.post_ups { 301 | color: #ff4500; 302 | padding: 1px; 303 | padding-right: 2px; 304 | } 305 | span.post_downs { 306 | color: #00f; 307 | padding: 1px; 308 | } 309 | 310 | a.comment_author { 311 | padding-right: 5px; 312 | font-weight: bold; 313 | } 314 | span.comment_ups { 315 | color: #ff4500; 316 | padding: 1px; 317 | padding-right: 2px; 318 | font-weight: bold; 319 | } 320 | span.comment_downs { 321 | color: #00f; 322 | padding: 1px; 323 | font-weight: bold; 324 | } 325 | a.relevant_url, a.relevant_url:visited { 326 | color: #33f; 327 | background-color: #bbb; 328 | border: none; /*outset 2px #f00;*/ 329 | border-radius: 3px; 330 | padding: 2px; 331 | padding-bottom: 0px; 332 | } 333 | a.relevant_url:hover { 334 | color: #00f; 335 | background-color: #ddd; 336 | } 337 | 338 | td.result_info { 339 | color: #666; 340 | padding-top: 3px; 341 | padding-bottom: 0px; 342 | border-spacing: 0px; 343 | } 344 | 345 | span.result_date { 346 | color: #666; 347 | padding-top: 0px; 348 | padding-bottom: 0px; 349 | } 350 | 351 | a.result_comments { 352 | color: #222; 353 | text-decoration: underline; 354 | padding-top: 0px; 355 | padding-bottom: 0px; 356 | } 357 | 358 | td.result_comment_info { 359 | padding-bottom: 5px; 360 | } 361 | 362 | td.result_comment_body { 363 | padding-bottom: 5px; 364 | line-height: 150%; 365 | } 366 | 367 | a.result_comment_link { 368 | color: #555; 369 | font-weight: bold; 370 | text-decoration: none; 371 | padding-right: 5px; 372 | } 373 | a.result_comment_link:hover { 374 | text-decoration: underline; 375 | } 376 | 377 | li { 378 | color: #222; 379 | } 380 | a.external_link { 381 | font-size: 1.1em; 382 | color: #000; 383 | font-weight: bold; 384 | } 385 | a.external_link:hover { 386 | color: #f00; 387 | } 388 | 389 | .db_stats { 390 | font-weight: bold; 391 | font-size: 1.3em; 392 | } 393 | 394 | .db_status { 395 | text-align: left; 396 | font-weight: bold; 397 | } 398 | 399 | .status_refresh { 400 | font-size: 1.3em; 401 | font-weight: bold; 402 | background-color: #b00; 403 | padding: 0px; 404 | padding-left: 3px; 405 | padding-right: 3px; 406 | color: #fff; 407 | border: none; 408 | border-radius: 5px; 409 | } 410 | 411 | p.footer { 412 | cursor: default; 413 | font-size: 0.6em; 414 | text-align: left; 415 | } 416 | a.footer { 417 | text-decoration: none; 418 | font-weight: bold; 419 | border: solid 1px #222; 420 | background-color: #f00; 421 | color: #fff; 422 | border-radius: 2px; 423 | padding: 3px; 424 | padding-bottom: 1px; 425 | padding-top: 3px; 426 | } 427 | a.footer:hover { 428 | color: #555; 429 | text-decoration: underline; 430 | } 431 | 432 | table.menu { 433 | width: 100%; 434 | padding: 0px; 435 | border-collapse: collapse; 436 | border-spacing: 0; 437 | border-radius: 20px; 438 | } 439 | 440 | tr.menu { 441 | -moz-border-radius-topleft: 10px; 442 | -webkit-border-top-left-radius: 10px; 443 | -khtml-border-top-left-radius: 10px; 444 | border-top-left-radius: 10px; 445 | -moz-border-radius-topright: 10px; 446 | -webkit-border-top-right-radius: 10px; 447 | -khtml-border-top-right-radius: 10px; 448 | border-top-right-radius: 10px; 449 | } 450 | 451 | td.menu { 452 | background-color: #b00; 453 | color: #fff; 454 | font-size: 1.3em; 455 | font-weight: bold; 456 | cursor: pointer; 457 | width: 20%; 458 | padding: 10px; 459 | text-align: center; 460 | white-space: nowrap; 461 | -moz-border-radius-topleft: 10px; 462 | -webkit-border-top-left-radius: 10px; 463 | -khtml-border-top-left-radius: 10px; 464 | border-top-left-radius: 10px; 465 | -moz-border-radius-topright: 10px; 466 | -webkit-border-top-right-radius: 10px; 467 | -khtml-border-top-right-radius: 10px; 468 | border-top-right-radius: 10px; 469 | } 470 | td.menu:hover { 471 | background-color: #d00; 472 | } 473 | td.menuActive { 474 | background-color: #f00; 475 | color: #fff; 476 | font-size: 1.3em; 477 | font-weight: bold; 478 | width: 20%; 479 | cursor: default; 480 | padding: 10px; 481 | text-align: center; 482 | white-space: nowrap; 483 | -moz-border-radius-topleft: 10px; 484 | -webkit-border-top-left-radius: 10px; 485 | -khtml-border-top-left-radius: 10px; 486 | border-top-left-radius: 10px; 487 | -moz-border-radius-topright: 10px; 488 | -webkit-border-top-right-radius: 10px; 489 | -khtml-border-top-right-radius: 10px; 490 | border-top-right-radius: 10px; 491 | } 492 | td.menuActive:hover { 493 | background-color: #f00; 494 | } 495 | 496 | .menu_dropdown { 497 | background-color: #f00; 498 | display: none; 499 | padding: 0px; 500 | text-align: center; 501 | -moz-border-radius-bottomleft: 10px; 502 | -webkit-border-bottom-left-radius: 10px; 503 | -khtml-border-bottom-left-radius: 10px; 504 | border-bottom-left-radius: 10px; 505 | 506 | -moz-border-radius-bottomright: 10px; 507 | -webkit-border-bottom-right-radius: 10px; 508 | -khtml-border-bottom-right-radius: 10px; 509 | border-bottom-right-radius: 10px; 510 | } 511 | 512 | #database_dropdown { 513 | display: none; 514 | color: #fff; 515 | } 516 | 517 | #subreddit_dropdown { 518 | display: none; 519 | max-width: 900px; 520 | } 521 | div.subreddits_header { 522 | color: #fff; 523 | padding-top: 15px; 524 | padding-bottom: 10px; 525 | font-size: 1.5em; 526 | font-weight: bold; 527 | } 528 | div#subreddits { 529 | text-align: center; 530 | font-size: 0.8em; 531 | padding: 10px; 532 | } 533 | a.subreddit { 534 | color: #eee; 535 | } 536 | a.subreddit:hover { 537 | color: #fff; 538 | text-decoration: underline; 539 | } 540 | a.subreddit:visited { 541 | color: #aaa; 542 | } 543 | 544 | #about_dropdown { 545 | color: #fff; 546 | display: none; 547 | max-width: 900px; 548 | padding-left: 40px; 549 | padding-right: 40px; 550 | font-size: 0.8em; 551 | } 552 | 553 | table.about_content { 554 | border-collapse: collapse; 555 | margin-left: auto; 556 | margin-right: auto; 557 | margin-top: 20px; 558 | margin-bottom: 15px; 559 | text-align: left; 560 | } 561 | 562 | td.about_left { 563 | text-align: right; 564 | font-size: 1.5em; 565 | font-weight: bold; 566 | height: 40px; 567 | padding-right: 10px; 568 | padding-left: 0px; 569 | } 570 | 571 | div.over18 { 572 | margin: 20px; 573 | cursor: default; 574 | } 575 | h1.over18 { 576 | color: #f00; 577 | text-shadow: 4px 0px 10px rgba(250, 100, 100, 0.3), 578 | 0px 4px 10px rgba(250, 100, 100, 0.3), 579 | -4px 0px 10px rgba(250, 100, 100, 0.3), 580 | 0px -4px 10px rgba(250, 100, 100, 0.3); 581 | } 582 | input.over18 { 583 | font-size: 1.2em; 584 | font-weight: bold; 585 | color: #eee; 586 | background-color: #e00; 587 | padding: 10px; 588 | border-radius: 5px; 589 | border: none; 590 | margin: 10px; 591 | } 592 | input.over18:hover { 593 | cursor: pointer; 594 | box-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 595 | 0px 4px 10px rgba(200, 0, 0, 0.3), 596 | -4px 0px 10px rgba(200, 0, 0, 0.3), 597 | 0px -4px 10px rgba(200, 0, 0, 0.3); 598 | } 599 | -------------------------------------------------------------------------------- /dark.css: -------------------------------------------------------------------------------- 1 | div.top_bar { 2 | font-size: 0.8em; 3 | padding: 0px; 4 | border-bottom: solid 2px #f00; 5 | position: fixed; 6 | display: block; 7 | top: 0; 8 | left: 0; 9 | width: 100%; 10 | z-index: 5; 11 | } 12 | 13 | div.div_bar { 14 | display: none; 15 | font-size: 0.8em; 16 | color: #ccc; 17 | background-color: #000; 18 | padding: 2px; 19 | padding-left: 10px; 20 | z-index: 5; 21 | } 22 | 23 | div.div_bar_warning { 24 | font-size: 1.2em; 25 | color: #fff; 26 | background-color: #f00; 27 | text-align: center; 28 | padding: 3px; 29 | z-index: 5; 30 | } 31 | 32 | a.bookmarklet { 33 | text-decoration: none; 34 | font-weight: bold; 35 | color: #fff; 36 | background-color: #000; 37 | border: solid 1px #ccc; 38 | border-radius: 5px; 39 | padding: 3px; 40 | padding-top: 1px; 41 | padding-bottom: 0px; 42 | } 43 | a.bookmarklet:visited { 44 | color: #fff; 45 | background-color: #000; 46 | } 47 | a.bookmarklet:hover { 48 | color: #fff; 49 | background-color: #f00; 50 | } 51 | a.bookmarklet:hover:visited { 52 | color: #fff; 53 | background-color: #f00; 54 | } 55 | 56 | 57 | a.theme { 58 | color: #ccc; 59 | font-weight: bold; 60 | } 61 | a.theme:hover { 62 | color: #aaa; 63 | text-decoration: underline; 64 | } 65 | 66 | table.page { 67 | max-width: 900px; 68 | padding: 0px; 69 | padding-left: 20px; 70 | padding-right: 20px; 71 | } 72 | 73 | body { 74 | padding: 0px; 75 | background-color: #030303; 76 | color: #ccc; 77 | font-family: Helvetica, Arial, Verdana; /*Consolas, Verdana, monospace;*/ 78 | font-size: 1.2em; 79 | } 80 | 81 | a { 82 | text-decoration: none; 83 | color: #00f; 84 | } 85 | a:visited { color: #70d; } 86 | a:hover { color: #66f; } 87 | 88 | a.no_color { 89 | text-decoration: underline; 90 | color: #ccc; 91 | } 92 | a.no_color:hover { color: #fff; } 93 | a.no_color:visited { color: #aaa; } 94 | 95 | a.nsfw { 96 | font-size: 2.5em; 97 | letter-spacing: -0.02em; 98 | font-weight: bold; 99 | } 100 | .nsfw_red { 101 | color: #b00; 102 | letter-spacing: -0.05em; 103 | padding: 0px; 104 | text-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 105 | 0px 4px 10px rgba(200, 0, 0, 0.3), 106 | -4px 0px 10px rgba(200, 0, 0, 0.3), 107 | 0px -4px 10px rgba(200, 0, 0, 0.3); 108 | } 109 | a.nsfw:hover .nsfw_red { 110 | color: #f00; 111 | text-shadow: 4px 0px 10px rgba(255, 0, 0, 0.3), 112 | 0px 4px 10px rgba(255, 0, 0, 0.3), 113 | -4px 0px 10px rgba(255, 0, 0, 0.3), 114 | 0px -4px 10px rgba(255, 0, 0, 0.3); 115 | } 116 | .nsfw_bracket { 117 | color: #b00; 118 | font-size: 1.2em; 119 | letter-spacing: -0.15em; 120 | position: relative; 121 | text-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 122 | 0px 4px 10px rgba(200, 0, 0, 0.3), 123 | -4px 0px 10px rgba(200, 0, 0, 0.3), 124 | 0px -4px 10px rgba(200, 0, 0, 0.3); 125 | } 126 | a.nsfw:hover .nsfw_bracket { 127 | color: #f00; 128 | letter-spacing: -0.15em; 129 | text-shadow: 4px 0px 10px rgba(255, 0, 0, 0.3), 130 | 0px 4px 10px rgba(255, 0, 0, 0.3), 131 | -4px 0px 10px rgba(255, 0, 0, 0.3), 132 | 0px -4px 10px rgba(255, 0, 0, 0.3); 133 | } 134 | #nsfw_bracket_left { 135 | left: 5px; 136 | } 137 | #nsfw_bracket_right { 138 | left: -7px; 139 | } 140 | .nsfw_white { 141 | color: #ddd; 142 | position: relative; 143 | top: -2px; 144 | padding: 0px; 145 | letter-spacing: -0.05em; 146 | } 147 | a.nsfw:hover .nsfw_white { 148 | color: #fff; 149 | } 150 | 151 | table { 152 | border-color: grey; 153 | border-width: 0px; 154 | border-spacing: 10px; 155 | border-style: outset; 156 | } 157 | 158 | table.invisible { 159 | border-width: 0px; 160 | padding: 0px; 161 | border-spacing: 0px; 162 | } 163 | 164 | table.search { 165 | padding: 0px; 166 | padding-left: 25px; 167 | } 168 | 169 | td { 170 | padding: 0px; 171 | } 172 | 173 | td.search_url { 174 | font-size: 1.5em; 175 | cursor: default; 176 | text-align: right; 177 | vertical-align: bottom; 178 | padding-bottom: 2px; 179 | } 180 | 181 | input.search_text { 182 | font-size: 1.2em; 183 | background-color: #444; 184 | color: #eee; 185 | cursor: default; 186 | border: 0px; 187 | border-radius: 5px; 188 | width: 100%; 189 | padding: 4px; 190 | -webkit-box-sizing: border-box; 191 | -moz-box-sizing: border-box; 192 | } 193 | input.search_text:focus { 194 | color: #f00; 195 | background-color: #303030; 196 | } 197 | input.search_text:hover { 198 | color: #f00; 199 | background-color: #303030; 200 | } 201 | 202 | input.search_button { 203 | font-size: 1.5em; 204 | font-weight: bold; 205 | background-color: #b00; 206 | color: #fcfcfc; 207 | border: none; 208 | border-radius: 5px; 209 | } 210 | input.search_button:hover { 211 | background-color: #f00; 212 | } 213 | 214 | td.search_status { 215 | padding-top: 15px; 216 | padding-bottom: 15px; 217 | padding-left: 0px; 218 | text-align: right; 219 | } 220 | 221 | td.search_status_result { 222 | padding-top: 15px; 223 | padding-bottom: 0px; 224 | padding-left: 10px; 225 | padding-right: 10px; 226 | text-align: center; 227 | } 228 | 229 | .search_count { 230 | color: #f00; 231 | font-weight: bold; 232 | font-size: 1.5em; 233 | } 234 | 235 | .search_count_empty { 236 | color: #f00; 237 | font-weight: bold; 238 | font-size: 1.5em; 239 | } 240 | 241 | .search_count_subtext { 242 | color: #f00; 243 | font-weight: bold; 244 | font-size: 1.2em; 245 | } 246 | 247 | a.external_link { 248 | font-size: 1.1em; 249 | font-weight: bold; 250 | } 251 | 252 | .search_result_title { 253 | color: #f00; 254 | font-weight: bold; 255 | font-size: 1.5em; 256 | text-align: center; 257 | border-width: 0px; 258 | border: none; 259 | } 260 | 261 | img.result_thumbnail { 262 | padding: 0px; 263 | padding-left: 5px; 264 | display: block; 265 | } 266 | 267 | td.result_arrow { 268 | display: block; 269 | font-size: 1.5em; 270 | padding: 0px; 271 | padding-right: 5px; 272 | padding-left: 5px; 273 | text-align: center; 274 | } 275 | 276 | td.result_score { 277 | display: block; 278 | padding: 0px; 279 | padding-left: 5px; 280 | padding-right: 5px; 281 | text-align: center; 282 | } 283 | 284 | span.result_score { 285 | padding: 0px; 286 | font-size: 1.2em; 287 | font-weight: bold; 288 | vertical-align: middle; 289 | color: #ff4500; 290 | } 291 | 292 | img.vote { 293 | width: 20px; 294 | height: 18px; 295 | display: inline-block; 296 | } 297 | 298 | img.result_thumbnail{ 299 | 300 | } 301 | 302 | a.result_link { 303 | font-size: 1.5em; 304 | } 305 | 306 | span.post_domain { 307 | color: #666; 308 | padding-left: 5px; 309 | } 310 | a.post_author { 311 | color: #00f; 312 | } 313 | span.post_ups { 314 | color: #ff4500; 315 | padding: 1px; 316 | } 317 | span.post_downs { 318 | color: #00f; 319 | padding: 1px; 320 | } 321 | a.result_image_link { 322 | color: #666; 323 | padding: 3px; 324 | } 325 | a.result_image_link:hover { 326 | color: #999; 327 | } 328 | 329 | a.comment_author { 330 | padding-right: 5px; 331 | font-weight: bold; 332 | } 333 | span.comment_ups { 334 | color: #ff4500; 335 | padding: 1px; 336 | } 337 | span.comment_downs { 338 | color: #00f; 339 | padding: 1px; 340 | } 341 | span.relevant_link { 342 | color: #f00; 343 | } 344 | a.relevant_url, a.relevant_url:visited { 345 | color: #33f; 346 | background-color: #333; 347 | border: none; /*outset 2px #f00;*/ 348 | border-radius: 3px; 349 | padding: 2px; 350 | padding-bottom: 0px; 351 | } 352 | a.relevant_url:hover { 353 | color: #66f; 354 | background-color: #666; 355 | } 356 | 357 | td.result_info { 358 | padding-top: 3px; 359 | padding-bottom: 0px; 360 | border-spacing: 0px; 361 | } 362 | 363 | span.result_date { 364 | color: #cc0; 365 | padding-top: 0px; 366 | padding-bottom: 0px; 367 | } 368 | 369 | a.result_comments { 370 | color: #ffffff; 371 | text-decoration: underline; 372 | padding-top: 0px; 373 | padding-bottom: 0px; 374 | } 375 | 376 | td.result_comment_info { 377 | padding-bottom: 5px; 378 | } 379 | 380 | td.result_comment_body { 381 | padding-bottom: 5px; 382 | line-height: 150%; 383 | white-space: pre; /* CSS 2.0 */ 384 | white-space: pre-wrap; /* CSS 2.1 */ 385 | white-space: pre-line; /* CSS 3.0 */ 386 | white-space: -pre-wrap; /* Opera 4-6 */ 387 | white-space: -o-pre-wrap; /* Opera 7 */ 388 | white-space: -moz-pre-wrap; /* Mozilla */ 389 | white-space: -hp-pre-wrap; /* HP Printers */ 390 | word-wrap: break-word; /* IE 5+ */ 391 | } 392 | 393 | a.result_comment_link { 394 | color: #00f; 395 | font-weight: bold; 396 | text-decoration: none; 397 | padding-right: 5px; 398 | } 399 | a.result_comment_link:hover { 400 | text-decoration: underline; 401 | } 402 | 403 | li { 404 | color: #ccc; 405 | } 406 | a.external_link { 407 | color: #c00; 408 | font-weight: bold; 409 | } 410 | a.external_link:hover { 411 | color: #f00; 412 | } 413 | 414 | .db_stats { 415 | font-weight: bold; 416 | font-size: 1.3em; 417 | } 418 | 419 | .db_status { 420 | text-align: left; 421 | font-weight: bold; 422 | } 423 | 424 | .status_refresh { 425 | font-size: 1.3em; 426 | font-weight: bold; 427 | background-color: #b00; 428 | padding: 0px; 429 | padding-left: 3px; 430 | padding-right: 3px; 431 | color: #fff; 432 | border: none; 433 | border-radius: 5px; 434 | } 435 | 436 | p.footer { 437 | cursor: default; 438 | font-size: 0.6em; 439 | text-align: left; 440 | } 441 | a.footer { 442 | text-decoration: none; 443 | font-weight: bold; 444 | color: #ccc; 445 | } 446 | a.footer:hover { 447 | color: #f00; 448 | text-decoration: underline; 449 | } 450 | 451 | table.menu { 452 | width: 100%; 453 | padding: 0px; 454 | border-collapse: collapse; 455 | border-spacing: 0; 456 | border-radius: 20px; 457 | } 458 | 459 | tr.menu { 460 | -moz-border-radius-topleft: 10px; 461 | -webkit-border-top-left-radius: 10px; 462 | -khtml-border-top-left-radius: 10px; 463 | border-top-left-radius: 10px; 464 | -moz-border-radius-topright: 10px; 465 | -webkit-border-top-right-radius: 10px; 466 | -khtml-border-top-right-radius: 10px; 467 | border-top-right-radius: 10px; 468 | } 469 | 470 | td.menu { 471 | background-color: #300; 472 | font-size: 1.3em; 473 | font-weight: bold; 474 | cursor: pointer; 475 | width: 20%; 476 | padding: 10px; 477 | text-align: center; 478 | white-space: nowrap; 479 | -moz-border-radius-topleft: 10px; 480 | -webkit-border-top-left-radius: 10px; 481 | -khtml-border-top-left-radius: 10px; 482 | border-top-left-radius: 10px; 483 | -moz-border-radius-topright: 10px; 484 | -webkit-border-top-right-radius: 10px; 485 | -khtml-border-top-right-radius: 10px; 486 | border-top-right-radius: 10px; 487 | } 488 | td.menu:hover { 489 | background-color: #400; 490 | } 491 | td.menuActive { 492 | background-color: #700; 493 | font-size: 1.3em; 494 | font-weight: bold; 495 | width: 20%; 496 | cursor: default; 497 | padding: 10px; 498 | text-align: center; 499 | white-space: nowrap; 500 | -moz-border-radius-topleft: 10px; 501 | -webkit-border-top-left-radius: 10px; 502 | -khtml-border-top-left-radius: 10px; 503 | border-top-left-radius: 10px; 504 | -moz-border-radius-topright: 10px; 505 | -webkit-border-top-right-radius: 10px; 506 | -khtml-border-top-right-radius: 10px; 507 | border-top-right-radius: 10px; 508 | } 509 | td.menuActive { 510 | background-color: #700; 511 | } 512 | td.menuActive:hover { 513 | background-color: #700; 514 | } 515 | 516 | .menu_dropdown { 517 | background-color: #700; 518 | display: none; 519 | padding: 0px; 520 | text-align: center; 521 | -moz-border-radius-bottomleft: 10px; 522 | -webkit-border-bottom-left-radius: 10px; 523 | -khtml-border-bottom-left-radius: 10px; 524 | border-bottom-left-radius: 10px; 525 | 526 | -moz-border-radius-bottomright: 10px; 527 | -webkit-border-bottom-right-radius: 10px; 528 | -khtml-border-bottom-right-radius: 10px; 529 | border-bottom-right-radius: 10px; 530 | } 531 | 532 | #database_dropdown { 533 | display: none; 534 | } 535 | 536 | #subreddit_dropdown { 537 | display: none; 538 | max-width: 900px; 539 | } 540 | div.subreddits_header { 541 | padding-top: 15px; 542 | padding-bottom: 10px; 543 | font-size: 1.5em; 544 | font-weight: bold; 545 | } 546 | div#subreddits { 547 | text-align: center; 548 | font-size: 0.8em; 549 | padding: 10px; 550 | } 551 | a.subreddit { 552 | color: #ccc; 553 | } 554 | a.subreddit:hover { 555 | color: #eee; 556 | text-decoration: underline; 557 | } 558 | a.subreddit:visited { 559 | color: #888; 560 | } 561 | 562 | #about_dropdown { 563 | display: none; 564 | max-width: 900px; 565 | padding-left: 40px; 566 | padding-right: 40px; 567 | font-size: 0.8em; 568 | } 569 | 570 | table.about_content { 571 | border-collapse: collapse; 572 | margin-left: auto; 573 | margin-right: auto; 574 | margin-top: 20px; 575 | margin-bottom: 15px; 576 | text-align: left; 577 | } 578 | 579 | td.about_left { 580 | text-align: right; 581 | font-size: 1.5em; 582 | font-weight: bold; 583 | height: 40px; 584 | padding-right: 10px; 585 | padding-left: 0px; 586 | } 587 | 588 | div.over18 { 589 | margin: 20px; 590 | cursor: default; 591 | } 592 | h1.over18 { 593 | color: #f00; 594 | text-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 595 | 0px 4px 10px rgba(200, 0, 0, 0.3), 596 | -4px 0px 10px rgba(200, 0, 0, 0.3), 597 | 0px -4px 10px rgba(200, 0, 0, 0.3); 598 | } 599 | input.over18 { 600 | font-size: 1.2em; 601 | font-weight: bold; 602 | color: #eee; 603 | background-color: #e00; 604 | padding: 10px; 605 | border-radius: 5px; 606 | border: none; 607 | margin: 10px; 608 | } 609 | input.over18:hover { 610 | cursor: pointer; 611 | box-shadow: 4px 0px 10px rgba(200, 0, 0, 0.3), 612 | 0px 4px 10px rgba(200, 0, 0, 0.3), 613 | -4px 0px 10px rgba(200, 0, 0, 0.3), 614 | 0px -4px 10px rgba(200, 0, 0, 0.3); 615 | } 616 | -------------------------------------------------------------------------------- /scan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | What this script does: 5 | 1. Scans reddit.com subreddits for new posts/comments 6 | 2. Retrieves images from day-old posts/comments 7 | 3. Stores image information (hash, size, etc) in a database 8 | 4. If post/comment contains image/link, stores post/comment info in database 9 | ''' 10 | 11 | ############################## 12 | # Standard libraries 13 | from os import path, close, remove 14 | from sys import exit, stdout 15 | import time, tempfile 16 | ############################## 17 | # Reddit 18 | import ReddiWrap 19 | reddit = ReddiWrap.ReddiWrap() 20 | ############################## 21 | # Image hash 22 | from ImageHash import avhash, dimensions, create_thumb 23 | ############################## 24 | # WEB 25 | from Httpy import Httpy 26 | web = Httpy() 27 | ############################## 28 | # Database 29 | from DB import DB 30 | ############################## 31 | # Command-line output 32 | from commands import getstatusoutput 33 | 34 | ################# 35 | # Globals 36 | SCHEMA = { 37 | 'Posts' : 38 | '\n\t' + 39 | 'id INTEGER PRIMARY KEY, \n\t' + 40 | 'hexid TEXT UNIQUE, \n\t' + # base36 reddit id to comment 41 | 'title TEXT, \n\t' + 42 | 'url TEXT, \n\t' + 43 | 'text TEXT, \n\t' + # self-text 44 | 'author TEXT, \n\t' + 45 | 'permalink TEXT, \n\t' + # /r/Subreddit/comments/id/title 46 | 'subreddit TEXT, \n\t' + 47 | 'comments INTEGER, \n\t' + # Number of comment 48 | 'ups INTEGER, \n\t' + 49 | 'downs INTEGER, \n\t' + 50 | 'score INTEGER, \n\t' + 51 | 'created INTEGER, \n\t' + # Time in UTC 52 | 'is_self NUMERIC, \n\t' + 53 | 'over_18 NUMERIC', 54 | 55 | 'Comments' : 56 | '\n\t' + 57 | 'id INTEGER PRIMARY KEY, \n\t' + 58 | 'postid INTEGER, \n\t' + # Reference to Posts table 59 | 'hexid TEXT UNIQUE, \n\t' + # base36 reddit id to comment 60 | 'author TEXT, \n\t' + 61 | 'body TEXT, \n\t' + 62 | 'ups INTEGER, \n\t' + 63 | 'downs INTEGER, \n\t' + 64 | 'created INTEGER, \n\t' + # Time in UTC 65 | 'FOREIGN KEY(postid) REFERENCES Posts(id)', 66 | 67 | 'Hashes' : 68 | '\n\t' + 69 | 'id INTEGER PRIMARY KEY, \n\t' + 70 | 'hash TEXT UNIQUE', 71 | 72 | 'ImageURLs' : 73 | '\n\t' + 74 | 'id INTEGER PRIMARY KEY, \n\t' + 75 | 'url TEXT UNIQUE, \n\t' + 76 | 'hashid INTEGER, \n\t' + # Reference to Hashes table 77 | 'width INTEGER, \n\t' + 78 | 'height INTEGER, \n\t' + 79 | 'bytes INTEGER, \n\t' + 80 | 'FOREIGN KEY(hashid) REFERENCES Hashes(id)', 81 | 82 | 'Albums' : 83 | '\n\t' + 84 | 'id INTEGER PRIMARY KEY, \n\t' + 85 | 'url TEXT UNIQUE', 86 | 87 | 'Images' : 88 | '\n\t' + 89 | 'urlid INTEGER, \n\t' + # Reference to ImageURLs table 90 | 'hashid INTEGER, \n\t' + # Reference to Hashes table 91 | 'albumid INTEGER, \n\t' + # Reference to Albums table (0 if none) 92 | 'postid INTEGER, \n\t' + # Reference to Posts table 93 | 'commentid INTEGER, \n\t' + # Reference to Comments table (0 if post) 94 | 'FOREIGN KEY(urlid) REFERENCES ImageURLs(id), \n\t' + 95 | 'FOREIGN KEY(hashid) REFERENCES Hashes(id), \n\t' + 96 | 'FOREIGN KEY(albumid) REFERENCES Albums(id), \n\t' + 97 | 'FOREIGN KEY(postid) REFERENCES Posts(id), \n\t' + 98 | 'FOREIGN KEY(commentid) REFERENCES Comments(id), \n\t' + 99 | 'PRIMARY KEY(urlid, postid, commentid)' # Prevent a post or comment from having more than two of the same exact image 100 | } 101 | db = DB('reddit.db', **SCHEMA) 102 | 103 | CONSOLE_WIDTH = 150 # With of console (number of characters across) 104 | 105 | 106 | def main(): 107 | """ 108 | Main loop of program. 109 | Infinitely iterates over the list of subreddits 110 | """ 111 | exit_if_already_started() 112 | # Login to reddit acct or die 113 | if not login(): return 114 | while True: 115 | # Subreddits are added to "subs_all.txt", "subs_month.txt", and 116 | # "subs_week.txt", and "subs.txt" (master list). 117 | # These lists tell the script which top?t=timeperiod to grab 118 | # After grabbing the top from all/month, the script continues to 119 | # check the subreddit's top weekly posts 120 | for timeframe in ['all', 'month', 'week']: 121 | if timeframe == 'week': 122 | # Load subreddits to check the top?t=week of, or load 123 | # all subs from the masterlist if found to be empty. 124 | subreddits = load_list('subs_%s.txt' % timeframe, load_subs=True) 125 | else: 126 | # Only load subs from all/month, don't load more if the 127 | # lists are found to be empty 128 | subreddits = load_list('subs_%s.txt' % timeframe) 129 | while len(subreddits) > 0: 130 | # Grab all images/comments from sub, remove from list 131 | parse_subreddit(subreddits.pop(0), timeframe) 132 | # Save current list in case script needs to be restarted 133 | save_list(subreddits, 'subs_%s.txt' % timeframe) 134 | time.sleep(2) 135 | 136 | def exit_if_already_started(): 137 | (status, output) = getstatusoutput('ps aux') 138 | running_processes = 0 139 | for line in output.split('\n'): 140 | if 'python' in line and 'scan.py' in line and not '/bin/sh -c' in line: 141 | running_processes += 1 142 | if running_processes > 1: 143 | print "process is already running, exiting" 144 | exit(0) # Quit if the bot is already running 145 | 146 | def login(): 147 | """ Logs into reddit. Returns false if it can't """ 148 | if path.exists('login_credentials.txt'): 149 | login_file = open('login_credentials.txt') 150 | login_list = login_file.read().split('\n') 151 | login_file.close() 152 | if len(login_list) >= 2: 153 | user = login_list[0] 154 | password = login_list[1] 155 | print ' [+] logging in to %s...' % user, 156 | stdout.flush() 157 | result = reddit.login(user=user, password=password) 158 | if result == 0: 159 | print 'success' 160 | return True 161 | else: 162 | print 'failed (status code %d)' % result 163 | return False 164 | print '\n [!] unable to find/validate user/pass' 165 | print ' credentials need to be in login_credentials.txt' 166 | print ' expecting: username and password separated by new lines' 167 | return False 168 | 169 | def parse_subreddit(subreddit, timeframe): 170 | """ Parses top 1,000 posts from subreddit within timeframe. """ 171 | total_post_count = 0 172 | current_post_index = 0 173 | while True: 174 | # Check if there are pending albums to be indexed 175 | check_and_drain_queue() 176 | query_text = '/r/%s/top?t=%s' % (subreddit, timeframe) 177 | if total_post_count == 0: 178 | prntln(' [+] loading first page of %s' % query_text) 179 | stdout.flush() 180 | posts = reddit.get(query_text) 181 | elif reddit.has_next(): 182 | prnt(' [+] loading next page of %s' % query_text) 183 | stdout.flush() 184 | posts = reddit.get_next() 185 | else: 186 | # No more pages to load 187 | return 188 | if posts == None or len(posts) == 0: 189 | print ' [!] no posts found' 190 | return 191 | total_post_count += len(posts) 192 | for post in posts: 193 | current_post_index += 1 194 | prnt('[%3d/%3d] scraping http://redd.it/%s %s' % \ 195 | (current_post_index, total_post_count, post.id, post.url[:50])) 196 | stdout.flush() 197 | if parse_post(post): # Returns True if we made a request to reddit 198 | time.sleep(2) # Sleep to stay within rate limit 199 | 200 | time.sleep(2) 201 | 202 | def parse_post(post): 203 | """ Scrapes and indexes a post and it's comments. """ 204 | # Ignore posts less than 24 hours old 205 | if time.time() - post.created < 60 * 60 * 24: return False 206 | 207 | # Add post to database 208 | postid_db = db.insert('Posts', \ 209 | (None, \ 210 | post.id, \ 211 | post.title, \ 212 | post.url, \ 213 | post.selftext, \ 214 | post.author, \ 215 | post.permalink, \ 216 | post.subreddit, \ 217 | post.num_comments, \ 218 | post.upvotes, \ 219 | post.downvotes, \ 220 | post.score, \ 221 | post.created_utc, \ 222 | int(post.is_self), \ 223 | int(post.over_18))) 224 | # If post already exists, we've already indexed it; skip! 225 | if postid_db == -1: return False 226 | # Write post to DB so we don't hit it again 227 | 228 | # NOTE: postid_db is the ID of the post in the database; NOT on reddit 229 | 230 | # Check for self-post 231 | if post.selftext != '': 232 | urls = get_links_from_body(post.selftext) 233 | for url in urls: 234 | parse_url(url, postid=postid_db) 235 | else: 236 | # Attempt to retrieve hash(es) from link 237 | parse_url(post.url, postid=postid_db) 238 | 239 | # Iterate over top-level comments 240 | if post.num_comments > 0: 241 | reddit.fetch_comments(post) 242 | for comment in post.comments: 243 | parse_comment(comment, postid_db) 244 | db.commit() 245 | 246 | def parse_comment(comment, postid): 247 | """ 248 | Parses links from a comment. Populates DB. 249 | Recursively parses child comments. 250 | """ 251 | urls = get_links_from_body(comment.body) 252 | if len(urls) > 0: 253 | # Only insert comment into DB if it contains a link 254 | comid_db = db.insert('Comments', \ 255 | (None, \ 256 | postid, \ 257 | comment.id, \ 258 | comment.author, \ 259 | comment.body, \ 260 | comment.upvotes, \ 261 | comment.downvotes, \ 262 | comment.created_utc)) 263 | for url in urls: 264 | parse_url(url, postid=postid, commentid=comid_db) 265 | # Recurse over child comments 266 | for child in comment.children: 267 | parse_comment(child, postid) 268 | 269 | def get_links_from_body(body): 270 | """ Returns list of URLs found in body (e.g. selfpost or comment). """ 271 | result = [] 272 | i = -1 # Starting index 273 | while True: 274 | i = body.find('http://', i + 1) # Find next link 275 | if i == -1: break 276 | j = i 277 | # Iterate forward until we hit the end of the URL 278 | while j < len(body) and \ 279 | body[j] != ')' and \ 280 | body[j] != ']' and \ 281 | body[j] != ' ' and \ 282 | body[j] != '"' and \ 283 | body[j] != '\n' and \ 284 | body[j] != '\t': 285 | j += 1 286 | result.append(body[i:j]) # Add to list 287 | i = j 288 | result = list(set(result)) # Remove duplicates 289 | return result 290 | 291 | def sanitize_url(url): 292 | """ Sanitizes URLs for DB input, strips excess chars """ 293 | url = url.replace('"', '%22') 294 | url = url.replace("'", '%27') 295 | if '?' in url: url = url[:url.find('?')] 296 | if '#' in url: url = url[:url.find('#')] 297 | return url 298 | 299 | def parse_url(url, postid=0, commentid=0): 300 | """ Gets image hash(es) from URL, populates database """ 301 | while url.endswith('/'): url = url[:-1] 302 | if 'imgur.com' in url: 303 | url = url.replace('/m.imgur.com/', '/imgur.com/') 304 | if '?' in url: url = url[:url.find('?')] 305 | if '.com/a/' in url: 306 | # Album 307 | print '' 308 | result = parse_album(url, postid=postid, commentid=commentid) 309 | db.commit() 310 | return result 311 | elif url.lower().endswith('.jpg') or \ 312 | url.lower().endswith('.jpeg') or \ 313 | url.lower().endswith('.png') or \ 314 | url.lower().endswith('.gif'): 315 | # Direct imgur link, find highest res 316 | url = imgur_get_highest_res(url) 317 | # Drop out of if statement & parse image 318 | else: 319 | # Indirect imgur link (e.g. "imgur.com/abcde") 320 | r = web.get(url) 321 | if '"image_src"' in r: 322 | chunk = web.between(r, '"image_src"', '>')[0] 323 | url = web.between(chunk, 'href="', '"')[0] 324 | else: 325 | print '\n [!] unable to find direct imgur link for %s (404?)' % url 326 | return False 327 | 328 | elif url.lower().endswith('.jpg') or \ 329 | url.lower().endswith('.jpeg') or \ 330 | url.lower().endswith('.png') or \ 331 | url.lower().endswith('.gif'): 332 | # Direct link to non-imgur image 333 | pass # Drop out of if statement & parse image 334 | 335 | elif 'gfycat.com' in url: 336 | r = web.get(url) 337 | if "og:image' content='" in r: 338 | url = web.between(r, "og:image' content='", "'")[-1] 339 | else: 340 | print '\n [!] unable to find gfycat image for %s' % url 341 | return False 342 | 343 | elif 'mediacru.sh' in url: 344 | r = web.get(url) 345 | if 'property="og:type" content="' not in r: 346 | content = web.between(r, 'property="og:type" content="', '')[0] 347 | if not content.startswith('image'): 348 | print '\n [!] got non-image content "%s" for %s ' % (content, url) 349 | return False 350 | if content == '': 351 | # Album (?) 352 | print '' 353 | result = parse_album_mediacrush(url, postid=postid, commentid=commentid) 354 | db.commit() 355 | return result 356 | else: 357 | # Single image (?) 358 | if 'property="og:image" content="' in r: 359 | url = web.between(r, '"og:image" content="', '"')[0] 360 | else: 361 | print '\n [!] unable to find mediacru.sh image for ' % url 362 | return False 363 | else: 364 | # Not imgur, not a direct link; no way to parse 365 | # TODO Develop a way to find images in other websites? 366 | return False 367 | print '' 368 | result = parse_image(url, postid=postid, commentid=commentid) 369 | db.commit() 370 | return result 371 | 372 | def parse_album_mediacrush(url, postid=0, commentid=0): 373 | """ Indexes every image in an mediacru.sh album """ 374 | from json import loads 375 | json = loads(web.get('%s.json' % url)) 376 | files = json['files'] 377 | for fil in files: 378 | parse_image(fil['url'], postid=postid, commentid=commentid, albumid=albumid) 379 | if len(files) == 0: 380 | print ' [!] no images found in album!' 381 | return False 382 | else: 383 | return True 384 | 385 | def parse_album(url, postid=0, commentid=0): 386 | """ Indexes every image in an imgur album """ 387 | # cleanup URL 388 | url = url.replace('http://', '').replace('https://', '') 389 | while url.endswith('/'): url = url[:-1] 390 | while url.count('/') > 2: url = url[:url.rfind('/')] 391 | if '?' in url: url = url[:url.find('?')] 392 | if '#' in url: url = url[:url.find('#')] 393 | url = 'http://%s' % url # How the URL will be stored in the DB 394 | albumid = db.insert('Albums', (None, url)) 395 | if albumid == -1: 396 | albumids = db.select('id', 'Albums', 'url = "%s"' % url) 397 | if len(albumids) == 0: return 398 | albumid = albumids[0][0] 399 | # Download album 400 | url = url + '/noscript' 401 | r = web.get(url) 402 | links = web.between(r, 'img src="//i.', '"') 403 | for link in links: 404 | link = 'http://i.%s' % link 405 | if '?' in link: link = link[:link.find('?')] 406 | if '#' in link: link = link[:link.find('#')] 407 | link = imgur_get_highest_res(link) 408 | # Parse each image 409 | parse_image(link, postid=postid, commentid=commentid, albumid=albumid) 410 | if len(links) == 0: 411 | print ' [!] no images found in album!' 412 | return False 413 | else: 414 | return True 415 | 416 | def parse_image(url, postid=0, commentid=0, albumid=0): 417 | """ 418 | Downloads & indexes image. 419 | Populates 'Hashes', 'ImageURLs', and 'Images' tables 420 | """ 421 | try: 422 | (hashid, urlid, downloaded) = get_hashid_and_urlid(url) 423 | except Exception, e: 424 | print '\n [!] failed to calculate hash for %s' % url 425 | print ' [!] Exception: %s' % str(e) 426 | return False 427 | # 'Images' table is used for linking reddit posts/comments to images 428 | # If there is no post/comment, don't bother linking 429 | if postid != 0 or commentid != 0: 430 | imageid = db.insert('Images', (urlid, hashid, albumid, postid, commentid)) 431 | return True 432 | 433 | 434 | def get_hashid_and_urlid(url, verbose=True): 435 | """ 436 | Retrieves hash ID ('Hashes' table) and URL ID 437 | ('ImageURLs' table) for an image at a given URL. 438 | Populates 'Hashes' and 'ImageURLs' if needed. 439 | 3rd tuple is True if downloading of image was required 440 | """ 441 | existing = db.select('id, hashid', 'ImageURLs', 'url = "%s"' % url) 442 | if len(existing) > 0: 443 | urlid = existing[0][0] 444 | hashid = existing[0][1] 445 | return (hashid, urlid, False) 446 | 447 | # Download image 448 | (file, temp_image) = tempfile.mkstemp(prefix='redditimg', suffix='.jpg') 449 | close(file) 450 | if url.startswith('//'): url = 'http:%s' % url 451 | if verbose: print ' [+] downloading %s ...' % url, 452 | stdout.flush() 453 | if not web.download(url, temp_image): 454 | if verbose: print 'failed' 455 | raise Exception('unable to download image at %s' % url) 456 | # Get image hash 457 | try: 458 | if verbose: print 'hashing ...', 459 | stdout.flush() 460 | (width, height) = dimensions(temp_image) 461 | if width > 4000 or height > 4000: 462 | print '\n[!] image too large to hash (%dx%d)' % (width, height) 463 | raise Exception('too large to hash (%dx%d)' % (width, height)) 464 | if width == 161 and height == 81: 465 | # Size of empty imgur image ('not found!') 466 | raise Exception('Found 404 image dimensions (161x81)') 467 | image_hash = str(avhash(temp_image)) 468 | except Exception, e: 469 | # Failed to get hash, delete image & raise exception 470 | if verbose: print 'failed' 471 | try: remove(temp_image) 472 | except: pass 473 | raise e 474 | if verbose: print 'indexing ...', 475 | stdout.flush() 476 | 477 | # Insert image hash into Hashes table 478 | hashid = db.insert('Hashes', (None, image_hash)) 479 | if hashid == -1: 480 | # Already exists, need to lookup existing hash 481 | hashids = db.select('id', 'Hashes', 'hash = "%s"' % (image_hash)) 482 | if len(hashids) == 0: 483 | try: remove(temp_image) 484 | except: pass 485 | raise Exception('unable to add hash to table, or find hash (wtf?)') 486 | hashid = hashids[0][0] 487 | 488 | # Image attributes 489 | try: 490 | filesize = path.getsize(temp_image) 491 | urlid = db.insert('ImageURLs', (None, url, hashid, width, height, filesize)) 492 | db.commit() 493 | create_thumb(temp_image, urlid) # Make a thumbnail! 494 | if verbose: print 'done' 495 | except Exception, e: 496 | try: remove(temp_image) 497 | except: pass 498 | raise e 499 | remove(temp_image) 500 | return (hashid, urlid, True) 501 | 502 | def imgur_get_highest_res(url): 503 | """ Retrieves highest-res imgur image """ 504 | if not 'h.' in url: 505 | return url 506 | temp = url.replace('h.', '.') 507 | m = web.get_meta(temp) 508 | if 'Content-Type' in m and 'image' in m['Content-Type'].lower() and \ 509 | 'Content-Length' in m and m['Content-Length'] != '503': 510 | return temp 511 | else: 512 | return url 513 | 514 | def save_subs(filename): 515 | """ Copies list of subreddits to filename """ 516 | sub_list = load_list('subs.txt') 517 | save_list(sub_list, filename) 518 | return sub_list 519 | 520 | def save_list(lst, filename): 521 | """ Saves list to filename """ 522 | f = open(filename, 'w') 523 | for item in lst: 524 | f.write(item + '\n') 525 | f.close() 526 | 527 | def load_list(filename, load_subs=False): 528 | """ 529 | Loads list from filename 530 | If 'load_subs' is true and the list is empty, 531 | automatically load full list of subs & save to file 532 | """ 533 | if not path.exists(filename): 534 | return save_subs(filename) 535 | f = open(filename, 'r') 536 | result = f.read().split('\n') 537 | f.close() 538 | while result.count("") > 0: 539 | result.remove("") 540 | if len(result) == 0 and load_subs: 541 | return save_subs(filename) 542 | return result 543 | 544 | def check_and_drain_queue(): 545 | """ 546 | Indexes & empties file containing list of URLs to index 547 | File is populated via front-end requests. 548 | """ 549 | if not path.exists('index_queue.lst'): return 550 | # Read URLs 551 | f = open('index_queue.lst', 'r') 552 | queue_lines = f.read() 553 | f.close() 554 | # Delete 555 | #remove('index_queue.lst') 556 | f = open('index_queue.lst', 'w') 557 | f.write('') 558 | f.close() 559 | queue = queue_lines.split('\n') 560 | while queue.count('') > 0: queue.remove('') 561 | if len(queue) == 0: return 562 | queue = list(set(queue)) # remove duplicates 563 | print '\n [!] found %d images to index' % len(queue) 564 | for url in queue: 565 | url = url.strip() 566 | if url == '': continue 567 | parse_url(url) 568 | 569 | ################## 570 | # Print methods 571 | # Useful for overwriting one-liners 572 | def prnt(text): 573 | try: 574 | print '\r%s%s' % (text, ' ' * (CONSOLE_WIDTH - len(text))), 575 | except: pass 576 | def prntln(text): 577 | try: 578 | print '\r%s%s' % (text, ' ' * (CONSOLE_WIDTH - len(text))) 579 | except: pass 580 | 581 | if __name__ == '__main__': 582 | """ only run when executed (not imported) """ 583 | try: 584 | main() 585 | except KeyboardInterrupt: 586 | print '\n\n Interrupted (^C)' 587 | -------------------------------------------------------------------------------- /search.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ###################### 4 | # Standard library # 5 | import cgitb; cgitb.enable() # for debugging 6 | import cgi 7 | import tempfile 8 | from os import path, close, remove 9 | from sys import argv 10 | from time import sleep, time 11 | import json 12 | from threading import Thread, current_thread 13 | 14 | ###################### 15 | # Database # 16 | from DB import DB 17 | ###################### 18 | # Image hashing # 19 | from ImageHash import avhash 20 | ###################### 21 | # Web # 22 | from Httpy import Httpy 23 | 24 | ###################### 25 | # Globals 26 | db = DB('reddit.db') # Access to database 27 | web = Httpy() # Web functionality 28 | # Constants 29 | TRUSTED_AUTHORS = [ \ 30 | '4_pr0n', \ 31 | 'pervertedbylanguage', \ 32 | 'WakingLife'] 33 | TRUSTED_SUBREDDITS = [ \ 34 | 'AmateurArchives', \ 35 | 'gonewild', \ 36 | 'pornID', \ 37 | 'tipofmypenis', \ 38 | 'UnrealGirls'] 39 | MAX_ALBUM_SEARCH_DEPTH = 3 # Number of images to download from album 40 | MAX_ALBUM_SEARCH_TIME = 10 # Max time to search album in seconds 41 | MAX_GOOGLE_SEARCH_TIME = 10 # Max time to spend retrieving & searching google results 42 | 43 | #################### 44 | # MAIN 45 | def main(): 46 | """ Gets keys from query, performs search, prints results """ 47 | keys = get_keys() 48 | func_map = { 49 | 'url' : search_url, 50 | 'user' : search_user, 51 | 'cache' : search_cache, 52 | 'text' : search_text, 53 | 'google': search_google 54 | } 55 | for key in func_map: 56 | if key in keys: 57 | func_map[key](keys[key]) 58 | return 59 | print_error('did not receive expected key: url, user, cache, or text') 60 | 61 | ################### 62 | # Primary methods 63 | def search_url(url): 64 | """ Searches for a single URL, prints results """ 65 | if url.lower().startswith('cache:'): 66 | search_cache(url[len('cache:'):]) 67 | return 68 | elif 'imgur.com/a/' in url: 69 | search_album(url) # Searching album 70 | return 71 | elif url.lower().startswith('user:'): 72 | search_user(url[len('user:'):]) 73 | return 74 | elif url.lower().startswith('text:'): 75 | search_text(url[len('text:'):]) 76 | return 77 | elif 'reddit.com/u/' in url: 78 | search_user(url[url.find('/u/')+3:]) 79 | return 80 | elif 'reddit.com/user/' in url: 81 | search_user(url[url.find('/user/')+6:]) 82 | return 83 | elif 'reddit.com/r/' in url and '/comments/' in url: 84 | # Reddit post 85 | if not url.endswith('.json'): url += '.json' 86 | r = web.get(url) 87 | if '"url": "' in r: 88 | url = web.between(r, '"url": "', '"')[0] 89 | if ' ' in url: url = url.replace(' ', '%20') 90 | try: 91 | (url, posts, comments, related, downloaded) = \ 92 | get_results_tuple_for_image(url) 93 | except Exception, e: 94 | print_error(str(e)) 95 | return 96 | print json.dumps( { 97 | 'posts' : posts, 98 | 'comments' : comments, 99 | 'url' : url, 100 | 'related' : related 101 | } ) 102 | 103 | def search_album(url): 104 | url = url.replace('http://', '').replace('https://', '').replace('m.imgur.com', 'imgur.com') 105 | while url.endswith('/'): url = url[:-1] 106 | while url.count('/') > 2: url = url[:url.rfind('/')] 107 | if '?' in url: url = url[:url.find('?')] 108 | if '#' in url: url = url[:url.find('#')] 109 | url = 'http://%s' % url # How the URL will be stored in the DB 110 | posts = [] 111 | comments = [] 112 | related = [] 113 | checked_count = 0 114 | time_started = time() 115 | albumids = db.select('id', 'Albums', 'url = "%s"' % url) 116 | if len(albumids) > 0: 117 | # Album is already indexed 118 | albumid = albumids[0][0] 119 | query_text = 'id IN ' 120 | query_text += '(SELECT DISTINCT urlid FROM Images ' 121 | query_text += 'WHERE albumid = %d)' % albumid 122 | image_urls = db.select('url', 'ImageURLs', query_text) 123 | for image_url in image_urls: 124 | image_url = image_url[0] 125 | if time() - time_started > MAX_ALBUM_SEARCH_TIME: break 126 | checked_count += 1 127 | try: 128 | (imgurl, resposts, rescomments, resrelated, downloaded) = \ 129 | get_results_tuple_for_image(image_url) 130 | merge_results(posts, resposts) 131 | merge_results(comments, rescomments) 132 | merge_results(related, resrelated) 133 | except Exception, e: 134 | continue 135 | else: 136 | # Album is not indexed; need to scrape images 137 | r = web.get('%s/noscript' % url) 138 | image_urls = web.between(r, 'img src="//i.', '"') 139 | if len(image_urls) == 0: 140 | print_error('empty imgur album (404?)') 141 | return 142 | # Search stats 143 | downloaded_count = 0 144 | for link in image_urls: 145 | if downloaded_count >= MAX_ALBUM_SEARCH_DEPTH: break 146 | if time() - time_started > MAX_ALBUM_SEARCH_TIME: break 147 | link = 'http://i.%s' % link 148 | if '?' in link: link = link[:link.find('?')] 149 | if '#' in link: link = link[:link.find('#')] 150 | link = imgur_get_highest_res(link) 151 | checked_count += 1 152 | try: 153 | (imgurl, resposts, rescomments, resrelated, downloaded) = \ 154 | get_results_tuple_for_image(link) 155 | if downloaded: downloaded_count += 1 156 | merge_results(posts, resposts) 157 | merge_results(comments, rescomments) 158 | merge_results(related, resrelated) 159 | except Exception, e: 160 | continue 161 | # Add album images to queue, to be parsed by backend scraper 162 | f = open('index_queue.lst', 'a') 163 | f.write('http://i.%s\n' % '\nhttp://i.'.join(image_urls)) 164 | f.flush() 165 | f.close() 166 | print json.dumps( { 167 | 'url' : url, 168 | 'checked' : checked_count, 169 | 'total' : len(image_urls), 170 | 'cached' : len(albumids) > 0, 171 | 'posts' : posts, 172 | 'comments' : comments, 173 | 'related' : related 174 | } ) 175 | 176 | def search_user(user): 177 | """ Returns posts/comments by a reddit user """ 178 | if user.strip() == '' or not is_user_valid(user): 179 | print_error('invalid username') 180 | return 181 | posts = [] 182 | comments = [] 183 | related = [] 184 | # This search will pull up all posts and comments by the user 185 | # NOTE It will also grab all comments containing links in the user's posts (!) 186 | query_text = 'postid IN ' 187 | query_text += '(SELECT DISTINCT id FROM Posts ' 188 | query_text += 'WHERE author LIKE "%s" ' % user 189 | query_text += 'ORDER BY ups DESC LIMIT 50) ' 190 | query_text += 'OR ' 191 | query_text += 'commentid IN ' 192 | query_text += '(SELECT DISTINCT id FROM Comments ' 193 | query_text += 'WHERE author LIKE "%s" ' % user 194 | query_text += 'ORDER BY ups DESC LIMIT 50) ' 195 | query_text += 'GROUP BY postid, commentid' #LIMIT 50' 196 | # To avoid comments not created by the author, use this query: 197 | #query_text = 'commentid = 0 AND postid IN (SELECT DISTINCT id FROM Posts WHERE author LIKE "%s" ORDER BY ups DESC LIMIT 50) OR commentid IN (SELECT DISTINCT id FROM Comments WHERE author LIKE "%s" ORDER BY ups DESC LIMIT 50) GROUP BY postid, commentid LIMIT 50' % (user, user) 198 | images = db.select('urlid, albumid, postid, commentid', 'Images', query_text) 199 | for (urlid, albumid, postid, commentid) in images: 200 | # Get image's URL, dimensions & size 201 | if commentid != 0: 202 | # Comment 203 | try: 204 | comment_dict = build_comment(commentid, urlid, albumid) 205 | comments.append(comment_dict) 206 | except: pass 207 | else: 208 | # Post 209 | try: 210 | post_dict = build_post(postid, urlid, albumid) 211 | posts.append(post_dict) 212 | related += build_related_comments(postid, urlid, albumid) 213 | except: pass 214 | posts = sort_by_ranking(posts) 215 | comments = sort_by_ranking(comments) 216 | print json.dumps( { 217 | 'url' : 'user:%s' % user, #'http://reddit.com/user/%s' % user, 218 | 'posts' : posts, 219 | 'comments' : comments, 220 | 'related' : related 221 | } ) 222 | 223 | def search_cache(url): 224 | """ 225 | Prints list of images inside of an album 226 | The images are stored in the database, so 404'd albums 227 | can be retrieved via this method (sometimes) 228 | """ 229 | try: 230 | url = sanitize_url(url) 231 | except Exception, e: 232 | print_error(str(e)) 233 | return 234 | images = [] 235 | query_text = 'id IN (SELECT urlid FROM Images WHERE albumid IN (SELECT DISTINCT id FROM albums WHERE url = "%s"))' % (url) 236 | image_tuples = db.select('id, url', 'ImageURLs', query_text) 237 | for (urlid, imageurl) in image_tuples: 238 | image = { 239 | 'thumb' : 'thumbs/%d.jpg' % urlid, 240 | 'url' : imageurl 241 | } 242 | images.append(image) 243 | print json.dumps( { 244 | 'url' : 'cache:%s' % url, 245 | 'images' : images 246 | } ) 247 | 248 | def search_text(text): 249 | """ Prints posts/comments containing text in title/body. """ 250 | posts = [] 251 | comments = [] 252 | related = [] 253 | query_text = 'commentid = 0 AND postid IN (SELECT DISTINCT id FROM Posts WHERE title LIKE "%%%s%%" or text LIKE "%%%s%%" ORDER BY ups DESC LIMIT 50) OR commentid IN (SELECT DISTINCT id FROM Comments WHERE body LIKE "%%%s%%" ORDER BY ups DESC LIMIT 50) GROUP BY postid, commentid LIMIT 50' % (text, text, text) 254 | images = db.select('urlid, albumid, postid, commentid', 'Images', query_text) 255 | for (urlid, albumid, postid, commentid) in images: 256 | # Get image's URL, dimensions & size 257 | if commentid != 0: 258 | # Comment 259 | try: 260 | comment_dict = build_comment(commentid, urlid, albumid) 261 | comments.append(comment_dict) 262 | except: pass 263 | else: 264 | # Post 265 | try: 266 | post_dict = build_post(postid, urlid, albumid) 267 | posts.append(post_dict) 268 | related += build_related_comments(postid, urlid, albumid) 269 | except: pass 270 | posts = sort_by_ranking(posts) 271 | comments = sort_by_ranking(comments) 272 | print json.dumps( { 273 | 'url' : 'text:%s' % text, 274 | 'posts' : posts, 275 | 'comments' : comments, 276 | 'related' : related 277 | } ) 278 | 279 | GOOGLE_RESULTS = [] 280 | GOOGLE_THREAD_COUNT = 0 281 | GOOGLE_THREAD_MAX = 3 282 | 283 | def search_google(url): 284 | """ 285 | Searches google reverse image search, 286 | gets URL of highest-res image, 287 | searches that. 288 | """ 289 | # No country redirect 290 | web.get('http://www.google.com/ncr') 291 | sleep(0.2) 292 | 293 | time_started = time() 294 | time_to_stop = time_started + MAX_GOOGLE_SEARCH_TIME 295 | # Get image results 296 | u = 'http://images.google.com/searchbyimage?hl=en&safe=off&image_url=%s' % url 297 | r = web.get(u) 298 | total_searched = 0 299 | start = 10 300 | while True: 301 | if 'that include matching images' in r: 302 | chunk = r[r.find('that include matching images'):] 303 | elif start == 10: 304 | break 305 | else: 306 | chunk = r 307 | if 'Visually similar images' in chunk: 308 | chunk = chunk[:chunk.find('Visually similar images')] 309 | images = web.between(chunk, '/imgres?imgurl=', '&imgref') 310 | for image in images: 311 | if time() > time_to_stop: break 312 | splits = image.split('&') 313 | image = '' 314 | for split in splits: 315 | if split.startswith('amp;'): break 316 | if image != '': image += '&' 317 | image += split 318 | # Launch thread 319 | while GOOGLE_THREAD_COUNT >= GOOGLE_THREAD_MAX: sleep(0.1) 320 | if time() < time_to_stop: 321 | args = (image, time_to_stop) 322 | t = Thread(target=handle_google_result, args=args) 323 | t.start() 324 | else: 325 | break 326 | 327 | if time() > time_to_stop: break 328 | if '>Next<' not in r: break 329 | sleep(1) 330 | r = web.get('%s&start=%s' % (u, start)) 331 | start += 10 332 | 333 | posts = [] 334 | comments = [] 335 | related = [] 336 | # Wait for threads to finish 337 | while GOOGLE_THREAD_COUNT > 0: sleep(0.1) 338 | # Iterate over results 339 | for (image_url, image_hash, downloaded) in GOOGLE_RESULTS: 340 | #hashid = get_hashid_from_hash(image_hash) 341 | try: 342 | (t_url, t_posts, t_comments, t_related, t_downloaded) = \ 343 | get_results_tuple_for_hash(image_url, image_hash, downloaded) 344 | except Exception, e: 345 | continue 346 | total_searched += 1 347 | merge_results(posts, t_posts) 348 | merge_results(comments, t_comments) 349 | merge_results(related, t_related) 350 | if len(posts) + len(comments) + len(related) == 0: 351 | print_error('no results - searched %d google images' % total_searched) 352 | return 353 | print json.dumps( { 354 | 'posts' : posts, 355 | 'comments' : comments, 356 | 'url' : 'google:%s' % url, 357 | 'related' : related 358 | } ) 359 | 360 | def handle_google_result(url, time_to_stop): 361 | global GOOGLE_RESULTS, GOOGLE_THREAD_MAX, GOOGLE_THREAD_COUNT 362 | if time() > time_to_stop: return 363 | GOOGLE_THREAD_COUNT += 1 364 | url = web.unshorten(url, timeout=3) 365 | if time() > time_to_stop: 366 | GOOGLE_THREAD_COUNT -= 1 367 | return 368 | m = web.get_meta(url, timeout=3) 369 | if 'Content-Type' not in m or \ 370 | 'image' not in m['Content-Type'].lower() or \ 371 | time() > time_to_stop: 372 | GOOGLE_THREAD_COUNT -= 1 373 | return 374 | try: 375 | image_hash = get_hash(url, timeout=4) 376 | GOOGLE_RESULTS.append( (url, image_hash, True) ) 377 | except Exception, e: 378 | GOOGLE_THREAD_COUNT -= 1 379 | pass 380 | GOOGLE_THREAD_COUNT -= 1 381 | 382 | ################### 383 | # Helper methods 384 | def get_results_tuple_for_image(url): 385 | """ Returns tuple of posts, comments, related for an image """ 386 | url = sanitize_url(url) 387 | 388 | try: 389 | (hashid, downloaded) = get_hashid(url) 390 | if hashid == -1 or hashid == 870075: # No hash matches 391 | return (url, [], [], [], downloaded) 392 | image_hashes = db.select('hash', 'Hashes', 'id = %d' % hashid) 393 | if len(image_hashes) == 0: raise Exception('could not get hash for %s' % url) 394 | image_hash = image_hashes[0][0] 395 | except Exception, e: 396 | raise e 397 | 398 | return get_results_tuple_for_hash(url, image_hash, downloaded) 399 | 400 | def get_results_tuple_for_hash(url, image_hash, downloaded): 401 | posts = [] 402 | comments = [] 403 | related = [] # Comments contaiing links found in posts 404 | 405 | # Get matching hashes in 'Images' table. 406 | # This shows all of the posts, comments, and albums containing the hash 407 | query_text = 'hashid IN' 408 | query_text += ' (SELECT id FROM Hashes WHERE hash = "%s")' % (image_hash) 409 | query_text += ' GROUP BY postid, commentid' 410 | query_text += ' LIMIT 50' 411 | images = db.select('urlid, albumid, postid, commentid', 'Images', query_text) 412 | for (urlid, albumid, postid, commentid) in images: 413 | # Get image's URL, dimensions & size 414 | if commentid != 0: 415 | # Comment 416 | try: 417 | comment_dict = build_comment(commentid, urlid, albumid) 418 | if comment_dict['author'] == 'rarchives': continue 419 | comments.append(comment_dict) 420 | except: pass 421 | else: 422 | # Post 423 | try: 424 | post_dict = build_post(postid, urlid, albumid) 425 | posts.append(post_dict) 426 | 427 | for rel in build_related_comments(postid, urlid, albumid): 428 | if rel['author'] == 'rarchives': continue 429 | related.append(rel) 430 | except: pass 431 | 432 | for com in comments: 433 | for rel in related: 434 | if rel['hexid'] == com['hexid']: 435 | related.remove(rel) 436 | break 437 | 438 | posts = sort_by_ranking(posts) 439 | comments = sort_by_ranking(comments) 440 | return (url, posts, comments, related, downloaded) 441 | 442 | def get_hash(url, timeout=10): 443 | """ 444 | Retrieves hash ID ('Hashes' table) for image. 445 | Returns -1 if the image's hash was not found in the table. 446 | Does not modify DB! (read only) 447 | """ 448 | # Download image 449 | (file, temp_image) = tempfile.mkstemp(prefix='redditimg', suffix='.jpg') 450 | close(file) 451 | if not web.download(url, temp_image, timeout=timeout): 452 | raise Exception('unable to download image at %s' % url) 453 | 454 | # Get image hash 455 | try: 456 | image_hash = str(avhash(temp_image)) 457 | try: remove(temp_image) 458 | except: pass 459 | return image_hash 460 | except Exception, e: 461 | # Failed to get hash, delete image & raise exception 462 | try: remove(temp_image) 463 | except: pass 464 | raise e 465 | 466 | def get_hashid_from_hash(image_hash): 467 | hashids = db.select('id', 'Hashes', 'hash = "%s"' % (image_hash)) 468 | if len(hashids) == 0: 469 | return -1 470 | return hashids[0][0] 471 | 472 | 473 | def get_hashid(url, timeout=10): 474 | """ 475 | Retrieves hash ID ('Hashes' table) for image. 476 | Returns -1 if the image's hash was not found in the table. 477 | Does not modify DB! (read only) 478 | """ 479 | existing = db.select('hashid', 'ImageURLs', 'url = "%s"' % url) 480 | if len(existing) > 0: 481 | return (existing[0][0], False) 482 | 483 | # Download image 484 | (file, temp_image) = tempfile.mkstemp(prefix='redditimg', suffix='.jpg') 485 | close(file) 486 | if not web.download(url, temp_image, timeout=timeout): 487 | raise Exception('unable to download image at %s' % url) 488 | 489 | # Get image hash 490 | try: 491 | image_hash = str(avhash(temp_image)) 492 | try: remove(temp_image) 493 | except: pass 494 | except Exception, e: 495 | # Failed to get hash, delete image & raise exception 496 | try: remove(temp_image) 497 | except: pass 498 | raise e 499 | 500 | hashids = db.select('id', 'Hashes', 'hash = "%s"' % (image_hash)) 501 | if len(hashids) == 0: 502 | return (-1, True) 503 | return (hashids[0][0], True) 504 | 505 | def merge_results(source_list, to_add): 506 | """ 507 | Adds posts/comments from to_add list to source_list 508 | Ensures source_list is free fo duplicates. 509 | """ 510 | for target in to_add: 511 | should_add = True 512 | # Check for duplicates 513 | for source in source_list: 514 | if target['hexid'] == source['hexid']: 515 | should_add = False 516 | break 517 | if should_add: source_list.append(target) 518 | 519 | 520 | 521 | ################### 522 | # "Builder" methods 523 | 524 | def build_post(postid, urlid, albumid): 525 | """ Builds dict containing attributes about a post """ 526 | item = {} # Dict to return 527 | # Thumbnail 528 | item['thumb'] = 'thumbs/%d.jpg' % urlid 529 | if not path.exists(item['thumb']): item['thumb'] = '' 530 | 531 | # Get info about post 532 | ( postid, \ 533 | item['hexid'], \ 534 | item['title'], \ 535 | item['url'], \ 536 | item['text'], \ 537 | item['author'], \ 538 | item['permalink'], \ 539 | item['subreddit'], \ 540 | item['comments'], \ 541 | item['ups'], \ 542 | item['downs'], \ 543 | item['score'], \ 544 | item['created'], \ 545 | item['is_self'], \ 546 | item['over_18']) \ 547 | = db.select('*', 'Posts', 'id = %d' % (postid))[0] 548 | # Get info about image 549 | ( item['imageurl'], \ 550 | item['width'], \ 551 | item['height'], \ 552 | item['size']) \ 553 | = db.select('url, width, height, bytes', 'ImageURLs', 'id = %d' % urlid)[0] 554 | # Set URL to be the album (if it's an album) 555 | if albumid != 0: 556 | item['url'] = db.select("url", "Albums", "id = %d" % albumid)[0][0] 557 | return item 558 | 559 | def build_comment(commentid, urlid, albumid): 560 | """ Builds dict containing attributes about a comment """ 561 | item = {} # Dict to return 562 | 563 | # Thumbnail 564 | item['thumb'] = 'thumbs/%d.jpg' % urlid 565 | if not path.exists(item['thumb']): item['thumb'] = '' 566 | 567 | # Get info about comment 568 | ( comid, \ 569 | postid, \ 570 | item['hexid'], \ 571 | item['author'], \ 572 | item['body'], \ 573 | item['ups'], \ 574 | item['downs'], \ 575 | item['created']) \ 576 | = db.select('*', 'Comments', 'id = %d' % commentid)[0] 577 | 578 | # Get info about post comment is replying to 579 | ( item['subreddit'], \ 580 | item['permalink'], \ 581 | item['postid']) \ 582 | = db.select('subreddit, permalink, hexid', 'Posts', 'id = %d' % (postid))[0] 583 | # Get info about image 584 | ( item['imageurl'], \ 585 | item['width'], \ 586 | item['height'], \ 587 | item['size']) \ 588 | = db.select('url, width, height, bytes', 'ImageURLs', 'id = %d' % urlid)[0] 589 | if albumid != 0: 590 | item['url'] = db.select("url", "Albums", "id = %d" % albumid)[0][0] 591 | return item 592 | 593 | def build_related_comments(postid, urlid, albumid): 594 | """ Builds dict containing attributes about a comment related to a post""" 595 | items = [] # List to return 596 | #return items 597 | 598 | # Get info about post comment is replying to 599 | ( postsubreddit, \ 600 | postpermalink, \ 601 | posthex) \ 602 | = db.select('subreddit, permalink, hexid', 'Posts', 'id = %d' % postid)[0] 603 | 604 | # Get & iterate over comments 605 | for ( comid, \ 606 | postid, \ 607 | comhexid, \ 608 | comauthor, \ 609 | combody, \ 610 | comups, \ 611 | comdowns, \ 612 | comcreated) \ 613 | in db.select('*', 'Comments', 'postid = %d' % postid): 614 | item = { 615 | # Post-specific attributes 616 | 'subreddit' : postsubreddit, 617 | 'permalink' : postpermalink, 618 | 'postid' : posthex, 619 | # Comment-specific attributes 620 | 'hexid' : comhexid, 621 | 'author' : comauthor, 622 | 'body' : combody, 623 | 'ups' : comups, 624 | 'downs' : comdowns, 625 | 'created' : comcreated, 626 | 'thumb' : '', 627 | # Image-specific attributes (irrelevant) 628 | 'imageurl': '', 629 | 'width' : 0, 630 | 'height' : 0, 631 | 'size' : 0 632 | } 633 | items.append(item) 634 | return items 635 | 636 | ######################## 637 | # Helper methods 638 | 639 | def print_error(text): 640 | print json.dumps({'error': text}) 641 | 642 | def get_keys(): 643 | """ Returns key/value pairs from query, uses CLI args if none found. """ 644 | form = cgi.FieldStorage() 645 | keys = {} 646 | for key in form.keys(): 647 | keys[key] = form[key].value 648 | if len(keys) == 0 and len(argv) > 2: 649 | keys = { argv[1] : argv[2] } 650 | return keys 651 | 652 | def sort_by_ranking(objs): 653 | """ Sorts list of posts/comments based on heuristic. """ 654 | for obj in objs: 655 | if 'comments' in obj: 656 | obj['ranking'] = int(obj['comments']) 657 | obj['ranking'] += int(obj['ups']) 658 | else: 659 | obj['ranking'] = int(obj['ups']) 660 | if 'url' in obj and 'imgur.com/a/' in obj['url'] \ 661 | or 'imageurl' in obj and 'imgur.com/a/' in obj['imageurl']: 662 | obj['ranking'] += 600 663 | if obj['author'] in TRUSTED_AUTHORS: 664 | obj['ranking'] += 500 665 | if obj['subreddit'] in TRUSTED_SUBREDDITS: 666 | obj['ranking'] += 400 667 | return sorted(objs, reverse=True, key=lambda tup: tup['ranking']) 668 | 669 | def sanitize_url(url): 670 | """ 671 | Retrieves direct link to image based on URL, 672 | Strips excess data from imgur albums, 673 | Throws Exception if unable to find direct image. 674 | """ 675 | url = url.strip() 676 | if '?' in url: url = url[:url.find('?')] 677 | if '#' in url: url = url[:url.find('#')] 678 | if url == '' or not '.' in url: 679 | raise Exception('invalid URL') 680 | 681 | if not '://' in url: url = 'http://%s' % url # Fix for what'shisface who forgets to prepend http:// 682 | 683 | while url.endswith('/'): url = url[:-1] 684 | if 'imgur.com' in url: 685 | if '.com/a/' in url: 686 | # Album 687 | url = url.replace('http://', '').replace('https://', '') 688 | while url.endswith('/'): url = url[:-1] 689 | while url.count('/') > 2: url = url[:url.rfind('/')] 690 | if '?' in url: url = url[:url.find('?')] 691 | if '#' in url: url = url[:url.find('#')] 692 | url = 'http://%s' % url # How the URL will be stored in the DB 693 | return url 694 | 695 | elif url.lower().endswith('.jpeg') or \ 696 | url.lower().endswith('.jpg') or \ 697 | url.lower().endswith('.png') or \ 698 | url.lower().endswith('.gif'): 699 | # Direct imgur link, find highest res 700 | url = imgur_get_highest_res(url) 701 | # Drop out of if statement & parse image 702 | else: 703 | # Indirect imgur link (e.g. "imgur.com/abcde") 704 | r = web.get(url) 705 | if '"image_src" href="' in r: 706 | url = web.between(r, '"image_src" href="', '"')[0] 707 | else: 708 | raise Exception("unable to find imgur image (404?)") 709 | elif 'gfycat.com' in url and not 'thumbs.gfycat.com' in url: 710 | r = web.get(url) 711 | if "og:image' content='" in r: 712 | url = web.between(r, "og:image' content='", "'")[-1] 713 | else: 714 | raise Exception("unable to find gfycat poster image") 715 | elif url.lower().endswith('.jpg') or \ 716 | url.lower().endswith('.jpeg') or \ 717 | url.lower().endswith('.png') or \ 718 | url.lower().endswith('.gif'): 719 | # Direct link to non-imgur image 720 | pass # Drop out of if statement & parse image 721 | else: 722 | # Not imgur, not a direct link; no way to parse 723 | raise Exception("unable to parse non-direct, non-imgur link") 724 | return url 725 | 726 | def imgur_get_highest_res(url): 727 | """ Retrieves highest-res imgur image """ 728 | if not 'h.' in url: 729 | return url 730 | temp = url.replace('h.', '.') 731 | m = web.get_meta(temp) 732 | if 'Content-Type' in m and 'image' in m['Content-Type'].lower() and \ 733 | 'Content-Length' in m and m['Content-Length'] != '503': 734 | return temp 735 | else: 736 | return url 737 | 738 | def is_user_valid(username): 739 | """ Checks if username is valid reddit name, assumes lcase/strip """ 740 | allowed = 'abcdefghijklmnopqrstuvwxyz1234567890_-' 741 | valid = True 742 | for c in username.lower(): 743 | if not c in allowed: 744 | valid = False 745 | break 746 | return valid 747 | 748 | if __name__ == '__main__': 749 | """ Entry point. Only run when executed; not imported. """ 750 | #search_google('http://fap.to/images/full/45/465/465741907.jpg') 751 | #search_google('http://i.imgur.com/TgYeS8u.png') 752 | #search_google('http://i.imgur.com/T4Wtb6f.jpg') 753 | print "Content-Type: application/json" 754 | print "" 755 | main() # Main & it's called functions will print as needed 756 | print '\n' 757 | -------------------------------------------------------------------------------- /search.js: -------------------------------------------------------------------------------- 1 | /* Everything related to searching, displaying, fancy UI tweaks, etc. 2 | I Should probably split this into separate JS files... */ 3 | 4 | // Shortened version of getElementById 5 | function gebi(id) { return document.getElementById(id); } 6 | 7 | // Statusbar (overwrites previous text) 8 | function statusbar(text) { gebi("status").innerHTML = text; } 9 | 10 | // Output (appends text) 11 | function output(text) { gebi("output").innerHTML += text + "
"; } 12 | function output_posts(text) { gebi("output_posts").innerHTML = text; } 13 | function output_comments(text) { gebi("output_comments").innerHTML = text; } 14 | function output_related(text) { gebi("output_related").innerHTML = text; } 15 | 16 | // Redirect to the page so the URL changes & we know what image is being searched 17 | function redirect_search() { 18 | var url = gebi("url").value; 19 | url = url.replace(/[.]/g, '%2E'); 20 | url = encodeURIComponent(url); 21 | document.location.href = document.location.pathname + '?url=' + url; 22 | } 23 | 24 | function search_click() { 25 | var url = gebi("url").value; 26 | // Handle modifiers 27 | if (url.indexOf('text:') == 0) { 28 | sendSearchRequest('search.cgi?text=' + url.substr(5)); 29 | } else if (url.indexOf('cache:') == 0) { 30 | sendSearchRequest('search.cgi?cache=' + url.substr(6)); 31 | } else if (url.indexOf('user:') == 0) { 32 | sendSearchRequest('search.cgi?user=' + url.substr(5)); 33 | } else if (url.indexOf('google:') == 0) { 34 | sendSearchRequest('search.cgi?google=' + url.substr(7)); 35 | } else if (url.indexOf('.') == -1) { 36 | // No period, assume username 37 | sendSearchRequest('search.cgi?user=' + url); 38 | } else { 39 | // Assume URL search 40 | if (url.indexOf('://') == -1) { 41 | url = 'http://' + url; 42 | } 43 | sendSearchRequest('search.cgi?url=' + url); 44 | gebi('url').blur(); 45 | } 46 | } 47 | 48 | function redirect_user() { 49 | var user = gebi("user").value; 50 | document.location.href = document.location.pathname + '?user=' + user; 51 | } 52 | 53 | function user_click() { 54 | var user = gebi("user"); 55 | sendSearchRequest('search.cgi?user=' + user.value); 56 | user.blur(); 57 | } 58 | 59 | function searchKeyDown(evt) { 60 | var theEvent = evt || window.event; 61 | var key = theEvent.keyCode || theEvent.which; 62 | key = String.fromCharCode( key ); 63 | if (theEvent.keyCode == 13) { 64 | redirect_search(); // search_click(); 65 | } 66 | } 67 | 68 | function userKeyDown(evt) { 69 | var theEvent = evt || window.event; 70 | var key = theEvent.keyCode || theEvent.which; 71 | key = String.fromCharCode( key ); 72 | if (theEvent.keyCode == 13) { 73 | redirect_user(); //user_click(); 74 | } 75 | } 76 | 77 | function getExternalSearchLinks(url) { 78 | var out = ''; 79 | out += '
'; 80 | out += ''; 106 | out += '
'; 107 | return out; 108 | } 109 | 110 | // Sends asynchronous XML request, handles response 111 | function sendSearchRequest(query) { 112 | var request = makeHttpObject(); 113 | statusbar(' searching...'); 114 | setTimeout( function() { 115 | var status = gebi("status"); 116 | if (status.innerHTML.indexOf('searching...') >= 0) { 117 | status.innerHTML += '
some searches may take up to 20 seconds. please be patient.'; 118 | var url = gebi('url').value.replace(//g, ''); 119 | if (url.indexOf('imgur.com/a/') == -1 && url.indexOf('text:') == -1 && url.indexOf('user:') == -1 && url.indexOf('cache:') == -1) { 120 | var out = getExternalSearchLinks(url); 121 | status.innerHTML += out; 122 | } 123 | } 124 | }, 5000); 125 | gebi("output").innerHTML = ''; 126 | output_posts(''); 127 | output_comments(''); 128 | request.open("GET", query, true); 129 | request.send(null); 130 | request.onreadystatechange = function() { 131 | if (request.readyState == 4) { 132 | if (request.status == 200) { 133 | // success 134 | handleSearchResponse(request.responseText); 135 | } else { 136 | // error 137 | statusbar('error: status ' + request.status + ''); 138 | } 139 | } 140 | } 141 | } 142 | 143 | function handleSearchResponse(responseText) { 144 | if (responseText == null || responseText == '') { 145 | statusbar('invalid URL') 146 | return; 147 | } 148 | var resp = JSON.parse(responseText); 149 | if (resp['error'] != null) { 150 | statusbar('error: ' + resp['error'] + ''); 151 | return; 152 | } 153 | if (resp['err'] != null) { 154 | statusbar('' + resp['err'] + ''); 155 | return; 156 | } 157 | if (resp['url'] != null) { 158 | gebi('url').value = resp['url'] 159 | } 160 | if (resp['images'] != null) { 161 | // Image results for (cached) album 162 | var out = '
'; 163 | out += ''; 164 | out += ''; 165 | for (var i = 0; i < resp.images.length; i++) { 166 | var url = resp.images[i].url; 167 | var thumb = ''; 168 | var USE_IMGUR_BY_DEFAULT = true; 169 | if (USE_IMGUR_BY_DEFAULT || resp.images[i].thumb == null) { 170 | var tempi = url.lastIndexOf('.'); 171 | thumb = url.substr(0, tempi) + 's' + url.substr(tempi); 172 | } else { 173 | thumb = resp.images[i].thumb; 174 | console.log(thumb); 175 | } 176 | out += ''; 182 | if (i % 5 == 4) { 183 | out += ''; 184 | } 185 | } 186 | out += ''; 187 | out += ''; 188 | out += ''; 189 | for (var i = 0; i < resp.images.length; i++) { 190 | if (resp.images[i].thumb) { 191 | out += ''; 192 | } else { 193 | out += ''; 194 | if (i % 5 == 4) { 195 | out += ''; 196 | } 197 | } 198 | } 199 | out += ''; 200 | out += '
' + resp.images.length + ' album images (imgur)
'; 177 | out += ''; 178 | out += ''; 179 | out += ''; 180 | //out += resp.images[i]; 181 | out += '
' + resp.images.length + ' thumbnails (rarchives)
(none)
'; 201 | out += '
' + resp.images.length + ' album links
'; 202 | out += '
'; 203 | for (var i = 0; i < resp.images.length; i++) { 204 | out += ''; 205 | out += resp.images[i].url; 206 | out += '
'; 207 | } 208 | out += '
'; 209 | output(out); 210 | statusbar(''); 211 | return; 212 | } 213 | statusbar(''); 214 | if (resp.posts.length == 0 && resp.comments.length == 0) { 215 | // No results 216 | statusbar('no results'); 217 | } 218 | if (resp.checked != null && resp.total != null) { 219 | // Album search; number of results 220 | var stat = gebi('status'); 221 | if (stat.innerHTML !== '') { 222 | stat.innerHTML += '

'; 223 | } 224 | stat.innerHTML += 'searched ' + resp.checked + ' of ' + resp.total + ' images'; 225 | if (resp.cached) { 226 | stat.innerHTML += ' (cached)'; 227 | } 228 | } 229 | 230 | // POSTS 231 | if (resp.posts.length > 0) { 232 | var result = []; 233 | result.push(''); 234 | var s = (resp.posts.length == 1) ? '' : 's'; 235 | result.push(''); 236 | for (var i in resp['posts']) { 237 | var post = resp['posts'][i]; 238 | result.push(display_post(post)); 239 | } 240 | result.push('
' + resp.posts.length + ' post' + s + '
'); 241 | output_posts(result.join('')); 242 | } 243 | 244 | // COMMENTS 245 | if (resp.comments.length > 0) { 246 | var result = []; 247 | result.push(''); 248 | var s = (resp.comments.length == 1) ? '' : 's'; 249 | result.push(''); 250 | for (var i in resp['comments']) { 251 | var comment = resp['comments'][i]; 252 | result.push(display_comment(comment)); 253 | } 254 | result.push('
' + resp.comments.length + ' comment' + s + '
'); 255 | output_comments(result.join('')); 256 | } 257 | 258 | // RELATED COMMENTS 259 | for (var i = resp.related.length - 1; i >= 0 && resp.related.length; i--) { 260 | // Remove comments that don't contain imgur albums 261 | if (resp.related[i].body.indexOf('imgur.com/a/') == -1) { 262 | resp.related.splice(i, 1); 263 | } 264 | } 265 | if (resp.related.length > 0) { 266 | var result = []; 267 | result.push(''); 268 | var s = (resp.related.length == 1) ? '' : 's'; 269 | result.push(''); 270 | for (var i in resp.related) { 271 | var related = resp.related[i]; 272 | result.push(display_comment(related)); 273 | } 274 | result.push('
' + resp.related.length + ' related comment' + s + '
'); 275 | output_related(result.join('')); 276 | } 277 | var url = gebi('url').value.replace(//g, ''); 278 | if (url.indexOf('imgur.com/a/') == -1 && url.indexOf('text:') == -1 && url.indexOf('user:') == -1 && url.indexOf('cache:') == -1) { 279 | var out = getExternalSearchLinks(url); 280 | output(out); 281 | } 282 | 283 | } 284 | 285 | function display_post(post) { 286 | var txt; 287 | var url = post.url; var score = post.score; var ups = post.ups; var downs = post.downs; 288 | var title = post.title; var permalink = post.permalink; var created = post.created; 289 | var author = post.author; var thumb = post.thumb; var subreddit = post.subreddit; 290 | var comments = post.comments; var width = post.width; var height = post.height; var size = post.size; 291 | var imageurl = post.imageurl; 292 | var date = new Date(0); 293 | date.setUTCSeconds(created); 294 | txt = ''; 295 | txt += ''; 296 | txt += ''; 297 | txt += ''; 299 | txt += ''; 301 | txt += '' 302 | txt += ''; 303 | txt += '' 304 | txt += ''; 305 | txt += ''; 306 | txt += ''; 307 | txt += ''; 308 | txt += ''; 309 | txt += ''; 314 | txt += '' 320 | txt += '' 333 | txt += ''; 334 | txt +=' '; 335 | return txt; 336 | } 337 | 338 | function display_comment(comment) { 339 | var txt = ''; 340 | var score = comment.ups - comment.downs; 341 | var hexid = comment.hexid; var postid = comment.postid; 342 | var created = comment.created; var author = comment.author; 343 | var body = comment.body; var imageurl = comment.imageurl; 344 | var width = comment.width; var height = comment.height; var size = comment.size; 345 | var date = new Date(0); 346 | date.setUTCSeconds(created); 347 | if (comment.url != null) { 348 | body = markdown_to_html(body, comment.url); 349 | } else { 350 | body = markdown_to_html(body, comment.imageurl); 351 | } 352 | txt = ''; 353 | txt += ''; 354 | txt += ''; 355 | txt += ''; 356 | txt += ''; // '; 357 | txt += ''; 358 | txt += ''; 359 | txt += ''; 360 | txt += ''; 361 | txt += ''; 362 | txt += ''; 369 | txt += '' 372 | txt += '' 380 | txt += ''; 381 | txt +=' '; 382 | return txt; 383 | } 384 | 385 | function markdown_to_html(text, relevant_url) { 386 | var h = text; 387 | h = h.replace(/\n /g, '\n').replace(/ \n/g, '\n').replace(/\n\n/g, '\n').replace(/\n/g, '
') 388 | var result = ''; 389 | var i = h.indexOf("http://"); 390 | var j; var url; var re; var previous = 0; 391 | while (i >= 0) { 392 | result += h.substring(previous, i); 393 | j = i + 7; 394 | while (j < h.length && h.charAt(j) != '\n' && h.charAt(j) != ' ' && h.charAt(j) != ')' && h.charAt(j) != '<' && h.charAt(j) != ']') { 395 | j++; 396 | } 397 | url = h.substring(i, j); 398 | if (url == relevant_url) { 399 | result += '' + url + ''; 400 | } else { 401 | result += '' + url + ''; 402 | } 403 | previous = j; 404 | i = h.indexOf("http://", previous); 405 | } 406 | result += h.substring(previous); 407 | return result; 408 | } 409 | 410 | function get_time(seconds) { 411 | var diff = Math.round(new Date().getTime() / 1000) - seconds; 412 | var d = { 413 | 'second' : 60, 414 | 'minute' : 60, 415 | 'hour' : 24, 416 | 'day' : 30, 417 | 'month' : 12, 418 | 'year' : 1000 419 | }; 420 | for (var key in d) { 421 | if (diff <= d[key]) { 422 | diff = diff.toFixed(0); 423 | var result = diff + ' '; 424 | result += key; 425 | if (diff != 1) 426 | result += 's'; 427 | result += ' ago'; 428 | return result; 429 | } 430 | diff /= d[key]; 431 | } 432 | return '? days ago'; 433 | } 434 | 435 | function bytes_to_readable(bytes) { 436 | var scale = ['B', 'kB', 'mB']; 437 | for (var i = scale.length - 1; i >= 0; i--) { 438 | var cur = Math.pow(1024, i); 439 | if (cur < bytes) { 440 | return (bytes / cur).toFixed(1) + scale[i]; 441 | } 442 | } 443 | return '?bytes' 444 | } 445 | 446 | function get_subreddits() { 447 | var request = makeHttpObject(); 448 | gebi("subreddits").innerHTML = 'loading...'; 449 | request.open("GET", 'subreddits.cgi?get=true', true); 450 | request.send(null); 451 | request.onreadystatechange = function() { 452 | if (request.readyState == 4) { 453 | if (request.status == 200) { 454 | // success 455 | handleGetSubredditsResponse(request.responseText); 456 | } else { 457 | // error 458 | gebi('subreddits').innerHTML = "error: " + request.status + ""; 459 | } 460 | } 461 | } 462 | } 463 | function handleGetSubredditsResponse(responseText) { 464 | var json = JSON.parse(responseText); 465 | if (json['error'] != null) { 466 | gebi('subreddits').innerHTML = 'error: ' + error; 467 | return; 468 | } 469 | var subreddits = json['subreddits']; 470 | var output = '
monitoring ' + subreddits.length + ' subreddits

'; 471 | for (var i in subreddits) { 472 | output += ''; 473 | output += '' + subreddits[i] + ' '; 474 | } 475 | gebi('subreddits').innerHTML = output; 476 | } 477 | 478 | function sendStatusRequest() { 479 | var request = makeHttpObject(); 480 | request.open("GET", 'status.cgi', true); 481 | request.send(null); 482 | request.onreadystatechange = function() { 483 | if (request.readyState == 4) { 484 | if (request.status == 200) { 485 | // success 486 | handleStatusResponse(request.responseText); 487 | } else { 488 | // error 489 | gebi('database_status').innerHTML = "error! async request status code: " + request.status; 490 | } 491 | } 492 | } 493 | } 494 | function handleStatusResponse(responseText) { 495 | var resp = JSON.parse(responseText)["status"]; 496 | gebi("db_images").innerHTML = number_commas(resp['images']); 497 | gebi("db_posts").innerHTML = number_commas(resp['posts']); 498 | gebi("db_comments").innerHTML = number_commas(resp['comments']); 499 | gebi("db_albums").innerHTML = number_commas(resp['albums']); 500 | gebi("db_subreddits").innerHTML = number_commas(resp['subreddits']); 501 | } 502 | 503 | // Add commas to the thousands places in a number 504 | function number_commas(x) { 505 | return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ","); 506 | } 507 | 508 | // Create new XML request object 509 | function makeHttpObject() { 510 | try { return new XMLHttpRequest(); 511 | } catch (error) {} 512 | try { return new ActiveXObject("Msxml2.XMLHTTP"); 513 | } catch (error) {} 514 | try { return new ActiveXObject("Microsoft.XMLHTTP"); 515 | } catch (error) {} 516 | throw new Error("Could not create HTTP request object."); 517 | } 518 | 519 | // Check URL to see if a query was passed and we need to search immediately 520 | function checkURL() { 521 | var query = parent.document.URL; 522 | if (query.indexOf('?url=') >= 0) { 523 | var url = query.substring(query.indexOf('?url=') + 5); 524 | url = decodeURIComponent(url); 525 | url = decodeURIComponent(url); 526 | gebi("url").value = url; 527 | search_click(); 528 | return true; 529 | } else if (query.indexOf('?user=') >= 0) { 530 | var user = query.substring(query.indexOf('?user=') + 6); 531 | gebi("url").value = 'user:' + user; 532 | search_click(); 533 | return true; 534 | } 535 | return false; 536 | } 537 | 538 | function setTheme() { 539 | var theme = getCookie('theme'); 540 | if (theme == '') 541 | theme = 'dark'; 542 | var oldlink = document.getElementsByTagName("link")[0]; 543 | 544 | var newlink = document.createElement("link") 545 | newlink.setAttribute("rel", "stylesheet"); 546 | newlink.setAttribute("type", "text/css"); 547 | newlink.setAttribute("href", theme + '.css'); 548 | 549 | document.getElementsByTagName("head")[0].replaceChild(newlink, oldlink); 550 | } 551 | 552 | function setCookie(key, value) { 553 | document.cookie = key + '=' + value + '; expires=Fri, 27 Dec 2999 00:00:00 UTC; path=/'; 554 | } 555 | function getCookie(key) { 556 | var cookies = document.cookie.split('; '); 557 | for (var i in cookies) { 558 | var pair = cookies[i].split('='); 559 | if (pair[0] == key) 560 | return pair[1]; 561 | } 562 | return ""; 563 | } 564 | 565 | function over18() { 566 | if (getCookie('over18') != 'true') { 567 | var body = document.body; 568 | var out = ''; 569 | out += '
'; 570 | out += '
'; 571 | out += '

this website contains explicit content

'; 572 | out += 'this website contains material that that is not suitable for persons under the age of 18.
'; 573 | out += '
'; 574 | out += ''; 575 | out += ''; 576 | out += '
'; 577 | body.innerHTML = out; 578 | } 579 | } 580 | 581 | function menu_database_click() { 582 | var menu = gebi("database_menu"); 583 | if (menu.className == 'menuActive') { 584 | collapseMenu(); 585 | return; 586 | } 587 | if (!menu.alreadyRequested) { 588 | sendStatusRequest(); 589 | } 590 | gebi('database_dropdown').style.display = 'table-cell'; 591 | gebi('subreddit_dropdown').style.display = 'none'; 592 | gebi('about_dropdown').style.display = 'none'; 593 | menu.className = 'menuActive'; 594 | gebi('subreddit_menu').className = 'menu'; 595 | gebi('about_menu').className = 'menu'; 596 | // Disable further requests for updates after 1 597 | menu.alreadyRequested = true; 598 | } 599 | function menu_subreddit_click() { 600 | var menu = gebi("subreddit_menu"); 601 | if (menu.className == 'menuActive') { 602 | collapseMenu(); 603 | return; 604 | } 605 | if (!menu.alreadyRequested) { 606 | get_subreddits(); 607 | } 608 | gebi('database_dropdown').style.display = 'none'; 609 | gebi('subreddit_dropdown').style.display = 'table-cell'; 610 | gebi('about_dropdown').style.display = 'none'; 611 | gebi('database_menu').className = 'menu'; 612 | menu.className = 'menuActive'; 613 | gebi('about_menu').className = 'menu'; 614 | // Disable further updates 615 | menu.alreadyRequested = true; 616 | } 617 | function menu_about_click() { 618 | if (gebi('about_menu').className == 'menuActive') { 619 | collapseMenu(); 620 | return; 621 | } 622 | gebi('database_dropdown').style.display = 'none'; 623 | gebi('subreddit_dropdown').style.display = 'none'; 624 | gebi('about_dropdown').style.display = 'table-cell'; 625 | gebi('database_menu').className = 'menu'; 626 | gebi('subreddit_menu').className = 'menu'; 627 | gebi('about_menu').className = 'menuActive'; 628 | } 629 | function collapseMenu() { 630 | gebi('database_dropdown').style.display = 'none'; 631 | gebi('subreddit_dropdown').style.display = 'none'; 632 | gebi('about_dropdown').style.display = 'none'; 633 | gebi('database_menu').className = 'menu'; 634 | gebi('subreddit_menu').className = 'menu'; 635 | gebi('about_menu').className = 'menu'; 636 | } 637 | 638 | function gotoRoot() { 639 | window.location = document.location.pathname; 640 | } 641 | 642 | function user_redirect_check(user) { 643 | var re = /^([-_]?[A-Za-z0-9])*$/; 644 | if (!re.test(user)) { return false; } 645 | gebi("user").value = user; 646 | redirect_user(); 647 | return true; 648 | } 649 | 650 | // Function to run after window has loaded 651 | function init() { 652 | over18(); 653 | setTheme(); 654 | if (!checkURL()) { 655 | // Not loading an image; randomly pick a url to display 656 | var urls = ['http://i.imgur.com/Xz42HQa.jpg', 'http://i.imgur.com/IFdWn.jpg', 'http://i.imgur.com/3qrBM.jpg', 'http://i.minus.com/ibu7TXSVaN73Nn.gif', 'http://i.imgur.com/O1IXj.jpg', 'http://i.imgur.com/QNj8w.jpg', 'http://i.imgur.com/xA1wr.jpg', 'http://i.imgur.com/54SAK.jpg', 'http://i.imgur.com/EpMv9.jpg', 'http://i.imgur.com/9VAfG.jpg', 'http://i.imgur.com/OaSfh.gif', 'http://i.imgur.com/iHjXO.jpg', 'http://i.imgur.com/IDLu8.jpg', 'http://i.imgur.com/ReKZC.jpg', 'http://i.imgur.com/mhvSa.jpg', 'http://i.imgur.com/qfzpA.jpg']; 657 | if (gebi('url') != null) { 658 | gebi('url').value = urls[Math.floor(Math.random() * urls.length)]; 659 | } 660 | } 661 | } 662 | 663 | window.onload = init; 664 | 665 | -------------------------------------------------------------------------------- /ReddiWrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Reddit.com API Wrapper (ReddiWrap) 5 | 6 | Intuitive middleware between you and reddit.com 7 | 8 | (C) 2012 Derv Merkler @ github.com/derv82/reddiwrap 9 | 10 | TODO: 11 | include 'depth' in comments to know how deep into replies we are. 12 | test all use cases (user about page, /r/none, etc) 13 | 14 | throw exceptions when receiving errors from server (403) 15 | 16 | """ 17 | 18 | from Httpy import Httpy # Class for communicating with the web server. 19 | 20 | from sys import exit 21 | 22 | import json 23 | 24 | from datetime import date # For converting unix epoch time in seconds to date/time 25 | from time import time # For getting current... and possibly throttling requests 26 | 27 | 28 | def pretty_string(dict, indent=0): 29 | """ Returns string containing all keys and values in a dict. Makes it 'Pretty'. """ 30 | result = [] 31 | for key, value in dict.iteritems(): 32 | if isinstance(value, unicode): 33 | result.append('\t' * indent + ('%s:\t "%s"' % (key, value)).encode('ascii', 'ignore')) 34 | elif isinstance(value, list): 35 | if len(value) == 0: 36 | result.append('\t' * indent + '%s:\t []' % key) 37 | else: 38 | result.append('\t' * indent + '%s:\t' % key) 39 | for element in dict[key]: 40 | result.append(pretty_string(element.__dict__, indent + 1)) 41 | else: 42 | result.append('\t' * indent + '%s:\t "%s"' % (key, value)) 43 | return '\n'.join(result) 44 | 45 | 46 | class Post(object): 47 | """ 48 | Stores information and logic about reddit "post"s. 49 | A reddit post is a submission that contains either a link or self-text. 50 | Posts contain child comments. 51 | """ 52 | def __init__(self): 53 | self.modhash = '' # base36 string for communicating with account 54 | self.id = '' # base36 id for a post (usually 5 characters) 55 | self.name = '' # example: t1_czwe3. t# is content type, the rest is the ID 56 | self.title = '' # Title of post 57 | self.url = '' # URL to post 58 | self.author = '' # Username of author 59 | self.domain = '' # Domain posted ot 60 | self.subreddit = '' # Subreddit posted to 61 | self.subreddit_id = '' # base36 ID for subreddit. E.g. t5_2uehl 62 | self.permalink = '' # Link to the post (including comments) 63 | self.is_self = False # Self-post 64 | self.selftext = '' # Self-post text 65 | self.selftext_html = '' # HTML for self-post text 66 | self.num_comments = '' # Number of comments 67 | self.score = 0 # upvotes - downvotes * crazy reddit vote fuzzing constant 68 | self.upvotes = 0 69 | self.downvotes = 0 70 | self.over_18 = False # NSFW post 71 | self.hidden = False 72 | self.saved = False 73 | self.edited = False 74 | self.created = 0 75 | self.created_utc = 0 76 | self.comments = [] # List of Comment objects that are replies to the Post 77 | self.has_more_comments = False # Contains comments that have not been loaded 78 | self.more_comments = '' # JSON data containing information about comments to load 79 | self.num_reports = 0 80 | self.banned_by = False 81 | self.approved_by = None 82 | self.media_embed = {} 83 | self.media = None 84 | self.thumbnail = '' 85 | self.link_flair_text = '' 86 | self.link_flair_class = '' # link_flair_css_class": null, 87 | self.author_flair_text = '' # "author_flair_css_class": null, 88 | self.author_flair_class = '' 89 | 90 | def set_using_json_data(self, data): 91 | """ Sets fields using json data. Assumes all fields in JSON exist. """ 92 | self.id = data['id'] 93 | self.name = data['name'] 94 | self.title = data['title'] 95 | self.url = data['url'] 96 | self.author = data['author'] 97 | self.domain = data['domain'] 98 | self.subreddit = data['subreddit'] 99 | self.subreddit_id = data['subreddit_id'] 100 | self.permalink = data['permalink'] 101 | self.is_self = data['is_self'] 102 | self.selftext = data['selftext'] 103 | self.selftext_html = data['selftext_html'] 104 | self.num_comments = data['num_comments'] 105 | self.score = data['score'] 106 | self.upvotes = data['ups'] 107 | self.downvotes = data['downs'] 108 | self.over_18 = data['over_18'] 109 | self.hidden = data['hidden'] 110 | self.saved = data['saved'] 111 | self.edited = data['edited'] 112 | self.created = data['created'] 113 | self.created_utc = data['created_utc'] 114 | self.num_reports = data['num_reports'] 115 | self.banned_by = data['banned_by'] 116 | self.approved_by = data['approved_by'] 117 | self.media_embed = data['media_embed'] 118 | self.media = data['media'] 119 | self.thumbnail = data['thumbnail'] 120 | self.link_flair_text = data['link_flair_text'] 121 | self.link_flair_class = data['link_flair_css_class'] 122 | self.author_flair_text = data['author_flair_text'] 123 | self.author_flair_class = data['author_flair_css_class'] 124 | 125 | 126 | def __str__(self): 127 | """ STRING summary of comment; author and body. """ 128 | return ('"%s" by %s in /r/%s' % (self.title, self.author, self.subreddit)).encode('ascii', 'ignore') 129 | 130 | def __repr__(self): 131 | return self.__str__() 132 | 133 | def verbose(self): 134 | """ Returns string containing all fields and their values. Verbose. """ 135 | return pretty_string(self.__dict__) 136 | 137 | 138 | class Comment(object): 139 | """ 140 | Stores information and logic about a comment. 141 | Comments are either direct replies to a Post or replies to other Comments. 142 | """ 143 | def __init__(self): 144 | self.modhash = '' # Modhash included with this comment 145 | self.id = '' 146 | self.name = '' 147 | self.link_id = '' 148 | self.parent_id = '' 149 | self.author = '' 150 | self.body = '' 151 | self.body_html = '' 152 | self.subreddit = '' 153 | self.upvotes = 0 154 | self.downvotes = 0 155 | self.score = 0 156 | self.created = 0 157 | self.created_utc = 0 158 | self.edited = False 159 | self.children = [] 160 | self.has_more_comments = False 161 | self.more_comments = '' 162 | self.num_reports = 0 163 | self.banned_by = '' 164 | self.approved_by = '' 165 | self.flair_class = '' 166 | self.flair_text = '' 167 | 168 | def set_using_json_data(self, data): 169 | """ Initializes object using JSON data. Assumes fields in JSON exist. """ 170 | self.id = data['id'] 171 | self.name = data['name'] 172 | if data.get('link_id') != None: 173 | self.link_id = data['link_id'] 174 | if data.get('parent_id') != None: 175 | self.parent_id = data['parent_id'] 176 | self.author = data['author'] 177 | self.body = data['body'] 178 | self.body_html = data['body_html'] 179 | self.subreddit = data['subreddit'] 180 | self.subreddit_id= data['subreddit_id'] 181 | self.upvotes = data['ups'] 182 | self.downvotes = data['downs'] 183 | if data.get('score') != None: 184 | self.score = data['score'] 185 | self.created = data['created'] 186 | self.created_utc = data['created_utc'] 187 | self.edited = data['edited'] 188 | self.num_reports = data['num_reports'] 189 | self.banned_by = data['banned_by'] 190 | self.approved_by = data['approved_by'] 191 | self.flair_class = data['author_flair_css_class'] 192 | self.flair_text = data['author_flair_text'] 193 | 194 | # Adding other comments / more 195 | if data.get('replies') == None: return 196 | replies = data['replies'] 197 | if replies == '' or replies.get('data') == None: return 198 | repdata = replies['data'] 199 | if repdata.get('children') == None: return 200 | for child in repdata['children']: 201 | cdata = child['data'] 202 | ckind = child['kind'] 203 | if ckind == 'more': 204 | self.has_more_comments = True 205 | self.more_comments = cdata 206 | continue 207 | comment = Comment() 208 | comment.set_using_json_data(cdata) 209 | # Recursive call! Parses and stores child comments 210 | self.children.append(comment) 211 | 212 | def __str__(self): 213 | """ STRING summary of comment; author and body. """ 214 | return ('%s: "%s"' % (self.author, self.body)).encode('ascii', 'ignore') 215 | 216 | def __repr__(self): 217 | return self.__str__() 218 | 219 | def verbose(self): 220 | """ Returns string containing all fields and their values. Verbose. """ 221 | return pretty_string(self.__dict__) 222 | 223 | 224 | class UserInfo(object): 225 | """ Contains information about the currently-logged-in reddit user. See user_info() """ 226 | def __init__(self, json_data): 227 | if json_data.get('error') == 404: 228 | self.error = 404 229 | else: 230 | self.id = json_data['id'] 231 | self.has_mail = json_data['has_mail'] # Boolean, True if user has unread mail. 232 | self.name = json_data['name'] # String, username 233 | self.created = json_data['created'] # Time since 1/1/1970 when acct was created 234 | self.created_utc = json_data['created_utc'] # Same as 'created', but in UTC 235 | #self.modhash = json_data['modhash'] # Unique hash for interacting with account 236 | self.link_karma = json_data['link_karma'] # Integer, total score of submissions 237 | self.comment_karma = json_data['comment_karma'] # Integer, total score of comments 238 | self.is_gold = json_data['is_gold'] # Boolean 239 | self.has_mod_mail = json_data['has_mod_mail'] # Boolean 240 | self.is_mod = json_data['is_mod'] # Boolean 241 | 242 | def __repr__(self): 243 | """ Returns string containing all fields and their values. Verbose. """ 244 | return pretty_string(self.__dict__) 245 | 246 | 247 | class Subreddit(object): 248 | """ 249 | Contains information about a single subreddit. 250 | Used by get_reddits() 251 | """ 252 | def __init__(self, json_data): 253 | self.id = json_data['id'] # 2qh0u 254 | self.name = json_data['name'] # t5_2qh0u 255 | self.display_name = json_data['display_name'] # pics 256 | self.header_img = json_data['header_img'] # .png 257 | self.title = json_data['title'] # /r/Pics 258 | self.url = json_data['url'] # /r/pics/ 259 | self.description = json_data['description'] # 260 | self.created = json_data['created'] # time since 1/1/1970, local 261 | self.created_utc = json_data['created_utc'] # time since 1/1/1970, UTC 262 | self.over18 = json_data['over18'] # false 263 | self.subscribers = json_data['subscribers'] # 1979507 264 | self.public_desc = json_data['public_description'] # 265 | self.header_title = json_data['header_title'] # "Pictures and Images" 266 | 267 | def __repr__(self): 268 | """ Returns string containing all fields and their values. Verbose. """ 269 | return pretty_string(self.__dict__) 270 | 271 | 272 | class Message(object): 273 | """ 274 | Contains information about a single message (PM). 275 | """ 276 | def __init__(self, json_data): 277 | self.id = json_data['id'] # base36 ID for comment/message 278 | self.name = json_data['name'] # t4_c51d3 for message, t1_c52351 for comment reply 279 | self.author = json_data['author'] # Username of author of message 280 | self.subject = json_data['subject'] # Subject of message, or "comment reply" if comment 281 | self.body = json_data['body'] # Text of message 282 | self.body_html = json_data['body_html'] # Text of message, including HTML markup 283 | self.new = json_data['new'] # True if message/comment is unread, False otherwise 284 | self.was_comment = json_data['was_comment'] # True if message is comment, False otherwise 285 | self.first_message = json_data['first_message'] # None of first message, otherwise ID of first msg 286 | self.created = json_data['created'] # Time since 1/1/1970, local time 287 | self.created_utc = json_data['created_utc'] # Time since 1/1/1970, UTC 288 | self.parent_id = json_data['parent_id'] # base36 ID of parent of message 289 | self.context = json_data['context'] # Permalink to comment with context, "" if message 290 | self.dest = json_data['dest'] # Destination username 291 | self.subreddit = json_data['subreddit'] # Subreddit comment was made in, None if message 292 | # Messages with no replies have an empty list for 'replies' [] 293 | # Otherwise, the replies contain the actual replied Message object 294 | self.replies = [] 295 | jreplies = json_data.get('replies') 296 | if jreplies != None and isinstance(jreplies, dict): 297 | jdata = jreplies.get('data') 298 | if jdata != None: 299 | jchildren = jdata.get('children') 300 | if jchildren != None and isinstance(jchildren, list): 301 | for jreply in jchildren: 302 | cdata = jreply.get('data') 303 | ckind = jreply.get('kind') 304 | if cdata == None: continue 305 | # Recursive call 306 | msg = Message(cdata) 307 | self.replies.append(msg) 308 | 309 | def __repr__(self): 310 | """ Returns brief summary of message. """ 311 | return '%s sent PM: "%s"' % (self.author, self.body) 312 | 313 | def verbose(self): 314 | """ Returns string containing all fields and their values. Verbose. """ 315 | return pretty_string(self.__dict__) 316 | 317 | 318 | class ReddiWrap: 319 | """ 320 | Class for interacting with reddit.com 321 | Uses reddit's API. 322 | """ 323 | 324 | def __init__(self, user='', password='', user_agent=None): 325 | """ 326 | Initializes instance fields, sets user agent. 327 | Logs into reddit if user and password are given. 328 | """ 329 | 330 | # Default user agent is awesome! 331 | if user_agent == None: 332 | user_agent = 'ReddiWrap' 333 | 334 | # Create object we will use to communicate with reddit's servers 335 | self.web = Httpy(user_agent=user_agent) 336 | 337 | self.modhash = '' # Hash used to authenticate/interact with user account 338 | self.last_url = '' # The last URL retrieved 339 | self.before = None # ID pointing to 'previous' page 340 | self.after = None # ID pointing to 'next' page 341 | self.logged_in = False # Flag to detect if we are logged in or not 342 | 343 | # Sets instance fields, logs in user if needed. 344 | self.login(user, password) 345 | 346 | 347 | 348 | #################### 349 | # LOGGING IN & OUT # 350 | #################### 351 | 352 | def login(self, user='', password=''): 353 | """ 354 | Clears cookies/modhash, then logs into reddit if applicable. 355 | Logs out user if user or password is '' or None 356 | 357 | Returns 0 if login (or logout) is successful, 358 | Returns 1 if user/pass is invalid, 359 | Returns 2 if login rate limit is reached, 360 | Returns -1 if some unknown error is encountered 361 | """ 362 | 363 | self.web.clear_cookies() # Removes any traces of previous activity 364 | self.modhash = '' 365 | self.logged_in = False 366 | 367 | if user == '' or user == None or \ 368 | password == '' or password == None: 369 | # "Log out" 370 | self.user = '' 371 | self.password = '' 372 | return 0 373 | 374 | self.user = user 375 | self.password = password 376 | 377 | dict = {} 378 | dict['user'] = self.user 379 | dict['passwd'] = self.password 380 | dict['api_type'] = 'json' 381 | 382 | r = self.web.post('http://www.reddit.com/api/login/%s' % self.user, dict) 383 | if "WRONG_PASSWORD" in r: 384 | # Invalid password 385 | return 1 386 | elif 'RATELIMIT' in r: 387 | # Rate limit reached. 388 | return 2 389 | else: #if 'redirect' in r: 390 | js = json.loads(r) 391 | if js.get('json') == None or js['json'].get('data') == None: 392 | return -1 393 | # Correct password. 394 | self.logged_in = True 395 | self.modhash = js['json']['data']['modhash'] 396 | return 0 397 | # Unexpected response. 398 | return -1 399 | 400 | 401 | def logout(self): 402 | """ 403 | "Logs out": Clears cookies, resets modhash. 404 | """ 405 | self.switch_user('', '') 406 | 407 | 408 | ################ 409 | # WEB REQUESTS # 410 | ################ 411 | 412 | @staticmethod 413 | def fix_url(url): 414 | """ 415 | 'Corrects' a given URL as needed. Ensures URL will function with API properly. 416 | 417 | Ensures: 418 | * URL begins with http:// 419 | * 'reddit.com' is used instead of 'www.reddit.com' 420 | * URL contains '.json' 421 | * URLs that are relative (start with '/') start with 'reddit.com' 422 | """ 423 | result = url 424 | if result == '': result = '/' 425 | 426 | if result.startswith('/'): 427 | result = 'http://reddit.com' + result 428 | 429 | if not result.startswith('http://'): 430 | result = 'http://' + result 431 | 432 | # Get does not like 'www.' for some reason. 433 | result = result.replace('www.reddit.com', 'reddit.com') 434 | 435 | if not '.json' in result: 436 | q = result.find('?') 437 | if q == -1: 438 | result += '.json' 439 | else: 440 | result = result[:q] + '.json' + result[q:] 441 | return result 442 | 443 | 444 | def get(self, url): 445 | """ 446 | Returns a list of Post and/or Comment and/or Message and/or Subreddit objects. 447 | 448 | Requesting comments will return a list of Comments. Examples: 449 | * .get('/r/all/comments') 450 | * .get('/user/godofatheism/comments') 451 | Requesting front pages and the like (/top) will return lists of Posts. Examples: 452 | * .get('') 453 | * .get('/r/all') 454 | * .get('/user/blackstar9000/submitted') 455 | Requesting user pages will return lists of Posts AND Comments. Example: 456 | * .get('/user/violentacrez') 457 | Requesting "reddits" will return a list of Subreddit objects. Example: 458 | * .get('/reddits') 459 | Requesting messages will return a list of Comment and/or Message objects. Examples: 460 | * .get('/message/inbox') 461 | 462 | Returns None if unable to get data from URL. 463 | Returns empty list [] if no results are found. 464 | 465 | 'url' must be within reddit.com domain. 466 | 467 | This method automatically updates self.modhash so you don't have to. 468 | 469 | """ 470 | 471 | # "Fix" URL to ensure it is formatted for reddit queries 472 | url = self.fix_url(url) 473 | 474 | r = self.web.get(url) # Get the response 475 | 476 | if r == '' or r == '""' or r == '"{}"': 477 | return None # Server gave null response. 478 | 479 | try: 480 | js = json.loads(r) 481 | except ValueError: 482 | # If it's not JSON, we don't want to parse it. 483 | return None 484 | except TypeError: 485 | # Parsing JSON led to a TypeError (probably unpack non-sequence) 486 | return None 487 | 488 | posts = [] 489 | # If the response json contains a LIST of objects: post (0) & comments (1) 490 | if isinstance(js, list): 491 | if len(js) < 2: return None 492 | # Main Post 493 | data = js[0]['data'] 494 | for child in data.get('children'): 495 | cdata = child['data'] 496 | post = Post() 497 | post.modhash = data['modhash'] 498 | post.set_using_json_data(cdata) 499 | posts.append(post) 500 | # Comment 501 | data = js[1]['data'] 502 | for child in data.get('children'): 503 | cdata = child['data'] 504 | ckind = child['kind'] 505 | if ckind == 'more': 506 | post.has_more_comments = True 507 | post.more_comments = cdata 508 | continue 509 | comment = Comment() 510 | comment.set_using_json_data(cdata) 511 | post.comments.append(comment) 512 | 513 | # Or simply the data object (subreddit page, user page, etc) 514 | elif isinstance(js, dict): 515 | data = js.get('data') 516 | if data == None or data.get('children') == None: 517 | return posts 518 | for child in data.get('children'): 519 | cdata = child['data'] 520 | if child['kind'] == 't3': 521 | # Post 522 | post = Post() 523 | post.modhash = data['modhash'] 524 | post.set_using_json_data(cdata) 525 | posts.append(post) 526 | elif child['kind'] == 't1': 527 | # Comment 528 | comment = Comment() 529 | comment.modhash = data['modhash'] 530 | comment.set_using_json_data(cdata) 531 | posts.append(comment) 532 | elif child['kind'] == 't4': 533 | # Message/PM (inbox) 534 | msg = Message(cdata) 535 | posts.append(msg) 536 | elif child['kind'] == 't5': 537 | # Subreddit 538 | subr = Subreddit(cdata) 539 | posts.append(subr) 540 | 541 | # Set the variables to keep track of the user hash and current page. 542 | self.modhash = data.get('modhash') 543 | if not '/comments/' in url: 544 | # Only set before/after (get_next()/get_prev()) if we 545 | # loaded something OTHER than a post's comments 546 | # This allows us to continue to use .get_prev/.get_next 547 | self.before = data.get('before') 548 | self.after = data.get('after') 549 | # Save last URL in case user wants to get_next() or get_previous() 550 | self.last_url = url 551 | 552 | return posts 553 | 554 | 555 | def fetch_comments(self, post, limit=0): 556 | """ 557 | Retrieves comments for a given Post. 558 | Sets the comments to the given Post object. 559 | Can be used to "refresh" comments for a Post. 560 | "limit" is the number of posts to grab, uses account's preference as default. 561 | """ 562 | # Retrieve Post 563 | url = '/r/%s/comments/%s' % (post.subreddit, post.id) 564 | if limit != 0: url += '?limit=%d' % (limit) 565 | posts = self.get(url) 566 | # We only expect 1 result: posts[0] 567 | if posts == None or len(posts) == 0: return 568 | post.comments = posts[0].comments 569 | post.num_comments = posts[0].num_comments 570 | 571 | 572 | ########## 573 | # VOTING # 574 | ########## 575 | 576 | def vote(self, post, direction): 577 | """ 578 | Votes for a post or comment. 579 | "post" is the Post/Comment object to vote on. 580 | "direction" is vote type: 1 to upvote, -1 to downvote, 0 to rescind vote. 581 | Returns True if vote was casted successful, False otherwise. 582 | """ 583 | if self.modhash == '': return False # Modhash required to vote 584 | dict = {} 585 | if isinstance(post, Post): 586 | dict['id'] = 't3_%s' % post.id 587 | else: 588 | dict['id'] = 't1_%s' % post.id 589 | dict['dir'] = str(direction) 590 | dict['uh'] = self.modhash 591 | response = self.web.post('http://www.reddit.com/api/vote', dict) 592 | # Reddit should respond with '{}' if vote was successful. 593 | return (response == '{}') 594 | 595 | def upvote(self, post): 596 | return self.vote(post, 1) 597 | def downvote(self, post): 598 | return self.vote(post, -1) 599 | def novote(self, post): 600 | return self.vote(post, 0) 601 | 602 | 603 | ############## 604 | # COMMENTING # 605 | ############## 606 | 607 | 608 | def get_user_comments(self, user, sort=''): 609 | """ 610 | Returns list of Comments made by "user". 611 | "sort" changes the order of comments; use "new", "old" or "top" 612 | Returns None if unable to retrieve. 613 | """ 614 | return self.get('/user/%s/comments/' % user) 615 | 616 | def get_user_posts(self, user, sort=''): 617 | """ 618 | Returns list of Posts made by "user". 619 | "sort" changes the order of posts; use "new", "old" or "top" 620 | Returns None if unable to retrieve. 621 | """ 622 | return self.get('/user/%s/submitted/' % user) 623 | 624 | 625 | def reply(self, post, text): 626 | """ 627 | Reply to given Post, Comment, or Message. 628 | "post" is the Post, Comment, or Message object to reply to. 629 | "text" is the text to reply with. 630 | 631 | Returns empty dict {} if unable to reply. 632 | Otherwise, returns dict containing reply information: 633 | 'content': javascript for updating layout on main site 634 | 'contentText': Plaintext of reply's body. Probably identical to 'text' parameter 635 | 'contentHTML': HTML-formatted text of reply's body 636 | 'id': base36 ID of reply E.g. t1_c58sfuc (Comment) or t4_cqug9 (Message) 637 | 'parent': base36 ID of parent E.g. t1_c58sfog (Comment) or t4_cpgyw (Message) 638 | Comments/Posts have additional keys in dict: 639 | 'replies': List of replies to reply (?) probably empty everytime... 640 | 'link': base36 ID of post reply was inside of. E.g. t3_vvtts 641 | 642 | TODO Return a new Comment/Message object, containing expected values. 643 | """ 644 | result = {} 645 | dict = {} 646 | dict['uh'] = self.modhash 647 | dict['text'] = text 648 | 649 | if isinstance(post, Post): 650 | dict['thing_id'] = 't3_%s' % post.id 651 | elif isinstance(post, Comment): 652 | dict['parent'] = 't1_%s' % post.id 653 | elif isinstance(post, Message): 654 | dict['thing_id'] = post.name 655 | 656 | response = self.web.post('http://www.reddit.com/api/comment', dict) 657 | if '".error.USER_REQUIRED"' in response: return result 658 | # Extract appropriate dict out of response 659 | jres = json.loads(response) 660 | jquery = jres.get('jquery') 661 | if jquery == None: 662 | return result 663 | 664 | for i in xrange(0, len(jquery)): 665 | if not isinstance(jquery[i][3], list) or len(jquery[i][3]) == 0: continue 666 | if not isinstance(jquery[i][3][0], list) or len(jquery[i][3][0]) == 0: continue 667 | jdict = jquery[i][3][0][0] 668 | result = jdict.get('data') 669 | break 670 | return result 671 | 672 | 673 | 674 | ############# 675 | # SEARCHING # 676 | ############# 677 | 678 | def search(self, query, subreddit='', sort=''): 679 | """ 680 | Searches reddit, returns list of results. 681 | "query" is the text to search for on reddit 682 | "subreddit" is the subreddit to restrict the search to. Use '' to search all of reddit. 683 | "sort" is the order of results. Use "new", "top" or "relevance" (default) 684 | 685 | Examples: 686 | results = reddit.search('girlfriend') 687 | results = reddit.search('skateboard', subreddit='pics') 688 | results = reddit.search('birthday', subreddit='pics', sort='new') 689 | After calling search(), you can call get_next() and get_previous() to navigate. 690 | """ 691 | url = '/search?q=' + query 692 | if sort != '': 693 | url += '&sort=' + sort 694 | if subreddit != '': 695 | url = '/r/' + subreddit + url + '&restrict_sr=on' 696 | return self.get(url) 697 | 698 | 699 | 700 | ############## 701 | # NAVIGATING # 702 | ############## 703 | 704 | """ 705 | Notice that inside of the 'get()' method, we store: 706 | * the last URL retrieved (self.last_url) 707 | * the 'before' tag which links to the previous page (self.before) 708 | * the 'after' tag which links to the next page (self.after) 709 | Because of this, we can load the 'next' or 'previous' pages of some results. 710 | This will only go to the 'next' or 'previous' page of the LAST PAGE RETRIEVED using get() 711 | This means get_next() and get_previous() will only be operational AFTER retrieving: 712 | * subreddits: .get('/r/subreddit') 713 | * the main page: .get('') 714 | * search results: .search('my face when') 715 | * user pages: .get('/user/krispykrackers') 716 | * ...possibly others? 717 | """ 718 | def navigate(self, after=True): 719 | """ 720 | Helper method, used by get_next() and get_previous(). 721 | Used to retrieve the 'next' (or 'previous') page on reddit. 722 | If "after" == True, it loads the next page; otherwise, loads the previous 723 | Returns the same format of information as get(): 724 | * None if unable to retrieve, 725 | * [] if no results 726 | * Otherwise, list of relevantPost and/or Comment objects 727 | """ 728 | if after: 729 | nav_text = 'after' 730 | nav_id = self.after 731 | else: 732 | nav_text = 'before' 733 | nav_id = self.before 734 | if nav_id == None: return [] # No previous/next link to navigate with. 735 | url = self.last_url 736 | # Strip out after/before params from the previous URL. 737 | if '?before' in url: url = url[:url.find('?before')] 738 | if '&before' in url: url = url[:url.find('&before')] 739 | if '?after' in url: url = url[:url.find('?after')] 740 | if '&after' in url: url = url[:url.find('&after')] 741 | 742 | if '?' in url: 743 | url += '&%s=%s' % (nav_text, nav_id) 744 | else: 745 | url += '?%s=%s' % (nav_text, nav_id) 746 | url += '&count=25' # Include "count=#" the navigation to work properly! 747 | return self.get(url) 748 | 749 | 750 | def get_previous(self): 751 | """ 752 | Go "back" -- that is, retrieve previous 25/50/100 posts. See navigate() 753 | Returns None if unable to retrieve, or [] if no results are found. 754 | """ 755 | return self.navigate(after=False) 756 | 757 | def get_next(self): 758 | """ 759 | Go "next" -- retrieve the next 25/50/100 posts. See navigate() 760 | Returns None if unable to retrieve, or [] if no results are found. 761 | """ 762 | return self.navigate(after=True) 763 | 764 | 765 | def has_previous(self): 766 | """ Returns True if there is a 'previous' page, False otherwise. """ 767 | return (self.before != None) 768 | 769 | 770 | def has_next(self): 771 | """ Returns True if there is a 'next' page, False otherwise. """ 772 | return (self.after != None) 773 | 774 | 775 | 776 | ########### 777 | # POSTING # 778 | ########### 779 | 780 | def post_link(self, title, link, subreddit): 781 | """ 782 | Submits a new link (URL) to reddit. 783 | No captcha support! User must have verified email address 784 | "title" is the title of the submission, "link" is the submission's URL. 785 | "subreddit" is the NAME of the subreddit to post to, e.g. 'funny' NOT '/r/funny'. 786 | Returns permalink to post if successful, e.g. 'r/Subreddit/comments/id/title' 787 | Returns permalink of EXISTING link (with ?already_submitted=true) if the link already exists. 788 | Returns '' if unable to post (not logged in, unverified email). 789 | """ 790 | if not self.logged_in: return '' 791 | dict = {} 792 | dict['uh'] = self.modhash 793 | dict['kind'] = 'link' 794 | dict['url'] = link 795 | dict['sr'] = subreddit 796 | dict['title'] = title 797 | dict['r'] = subreddit 798 | dict['renderstyle'] = 'html' 799 | response = self.web.post('http://www.reddit.com/api/submit', dict) 800 | if "You haven't verified your email address" in response: 801 | return '' 802 | 803 | if 'already_submitted=true' in response: 804 | # Link already exists in that subreddit! 805 | jres = json.loads(response) 806 | existing_link = jres['jquery'][10][3][0] 807 | # Return existing link 808 | return existing_link 809 | link = self.web.between(response, 'call", ["http://www.reddit.com/', '"]')[0] 810 | return link 811 | 812 | 813 | def post_self(self, title, text, subreddit): 814 | """ 815 | Submits a new "self-post" (text-based post) reddit. 816 | "title" is the title of the submission. "text" is the self-text. 817 | "subreddit" is the NAME of the subreddit to post to, e.g. 'funny' NOT '/r/funny'. 818 | Returns permalink to post if successful, e.g. 'r/Subreddit/comments/id/title' 819 | Returns '' if unable to post (not logged in, unverified email) 820 | """ 821 | dict = {} 822 | dict['uh'] = self.modhash 823 | dict['title'] = title 824 | dict['kind'] = 'self' 825 | dict['thing_id'] = '' 826 | dict['text'] = text 827 | dict['sr'] = subreddit 828 | dict['id'] = '#newlink' 829 | dict['r'] = subreddit 830 | dict['renderstyle'] = 'html' 831 | response = self.web.post('http://www.reddit.com/api/submit', dict) 832 | if "You haven't verified your email address" in response: 833 | return '' 834 | link = self.web.between(response, 'call", ["http://www.reddit.com/', '"]')[0] 835 | return link 836 | 837 | 838 | ############ 839 | # MESSAGES # 840 | ############ 841 | def compose(self, recipient, subject, message): 842 | """ 843 | Sends PM to recipient. 844 | Returns True if message was sent successfully, False otherwise. 845 | """ 846 | dict = {} 847 | dict['id'] = '#compose-message' 848 | dict['uh'] = self.modhash 849 | dict['to'] = recipient 850 | dict['text'] = message 851 | dict['subject'] = subject 852 | dict['thing-id'] = '' 853 | dict['renderstyle'] = 'html' 854 | r = self.web.post('http://www.reddit.com/api/compose', dict) 855 | return ('your message has been delivered' in r) 856 | 857 | def mark_message(self, message, mark_as_read=True): 858 | """ Marks passed message as either 'read' or 'unread' depending on mark_as_read's value """ 859 | dict = {} 860 | dict['id'] = message.name 861 | dict['uh'] = self.modhash 862 | dict['renderstyle'] = 'html' 863 | r = self.web.post('http://www.reddit.com/api/read_message', dict) 864 | message.new = not mark_as_read 865 | 866 | 867 | ######################## 868 | # USER-RELATED METHODS # 869 | ######################## 870 | 871 | def user_info(self, username=None): 872 | """ 873 | If username is unset (None), returns UserInfo object for the currently-logged-in user. 874 | If username is set (String), returns UserInfo object for the given 'username' 875 | 876 | Returns a userinfo with .error = 404 if user page is not found. example: 877 | uinfo = reddit.user_info('violentacres') 878 | if uinfo.error == 404: print 'violentacres is still gone!' 879 | else: print 'Who unbanned him?' 880 | 881 | Returns None object if unable to retrieve data. 882 | """ 883 | if username == None: 884 | if not self.logged_in: return None 885 | url = 'http://reddit.com/api/me.json' 886 | else: 887 | url = 'http://reddit.com/user/%s/about.json' % username 888 | r = self.web.get(url) 889 | if r == '' or r == '""': return None # Server gave null response. 890 | try: 891 | js = json.loads(r) 892 | except ValueError: return None # If it's not JSON, we can't parse it. 893 | if js == None: return None 894 | return UserInfo(js.get('data')) 895 | 896 | 897 | def save(self, post): 898 | """ Saves Post to user account. "post" is the actual Post object to save. """ 899 | dict = {} 900 | dict['id'] = post.id 901 | dict['uh'] = self.modhash 902 | response = self.web.post('http://www.reddit.com/api/save', dict) 903 | return (response == '{}') 904 | 905 | def unsave(self, post): 906 | """ Un-saves Post from user account. "post" is the actual Post object to un-save. """ 907 | dict = {} 908 | dict['id'] = post.id 909 | dict['uh'] = self.modhash 910 | response = self.web.post('http://www.reddit.com/api/unsave', dict) 911 | return (response == '{}') 912 | 913 | def hide(self, post): 914 | """ Hides Post from user's visibility. "post" is the actual Post object to hide. """ 915 | dict = {} 916 | dict['id'] = post.id 917 | dict['uh'] = self.modhash 918 | dict['executed'] = 'hidden' 919 | response = self.web.post('http://www.reddit.com/api/hide', dict) 920 | return (response == '{}') 921 | 922 | def unhide(self, post): 923 | """ Un-hides Post from user's visibility. "post" is the actual Post object to un-hide. """ 924 | dict = {} 925 | dict['id'] = post.id 926 | dict['uh'] = self.modhash 927 | dict['executed'] = 'unhidden' 928 | response = self.web.post('http://www.reddit.com/api/unhide', dict) 929 | return (response == '{}') 930 | 931 | def report(self, post): 932 | """ Reports a post or comment to the mods of the current subreddit. """ 933 | dict = {} 934 | dict['id'] = post.name 935 | dict['uh'] = self.modhash 936 | dict['r'] = post.subreddit 937 | dict['executed'] = 'reported' 938 | dict['renderstyle'] = 'html' 939 | r = self.web.post('http://www.reddit.com/api/report', dict) 940 | return (r == '{}') 941 | 942 | def share(self, post, from_username, from_email, to_email, message): 943 | """ Share a post with someone via email. """ 944 | dict = {} 945 | dict['id'] = '#sharelink_' + post.name 946 | dict['uh'] = self.modhash 947 | dict['r'] = post.subreddit 948 | dict['parent'] = post.name 949 | dict['message'] = message 950 | dict['replyto'] = from_email 951 | dict['share_to'] = to_email 952 | dict['share_from'] = from_username 953 | dict['renderstyle'] = 'html' 954 | r = self.web.post('http://www.reddit.com/api/share', dict) 955 | return ('your link has been shared' in r) 956 | 957 | def mark_nsfw(self, post): 958 | """ Marks a Post as NSFW. """ 959 | dict = {} 960 | dict['id'] = post.name 961 | dict['uh'] = self.modhash 962 | dict['r'] = post.subreddit 963 | dict['renderstyle'] = 'html' 964 | r = self.web.post('http://www.reddit.com/api/marknsfw', dict) 965 | return (r == '{}') 966 | 967 | def unmark_nsfw(self, post): 968 | """ Removes NSFW mark from a Post. """ 969 | dict = {} 970 | dict['id'] = post.name 971 | dict['uh'] = self.modhash 972 | dict['r'] = post.subreddit 973 | dict['renderstyle'] = 'html' 974 | r = self.web.post('http://www.reddit.com/api/unmarknsfw', dict) 975 | return (r == '{}') 976 | 977 | def subscribe(self, subreddit, unsub=False): 978 | """ Subscribes (or unsubscribes) user to/from subreddit. """ 979 | dict = {} 980 | dict['sr'] = subreddit.name 981 | dict['uh'] = self.modhash 982 | dict['r'] = subreddit.display_name 983 | dict['renderstyle'] = 'html' 984 | if not unsub: dict['action'] = 'sub' 985 | else: dict['action'] = 'unsub' 986 | r = self.web.post('http://www.reddit.com/api/subscribe', dict) 987 | return (r == '{}') 988 | 989 | 990 | 991 | ############# 992 | # MODERATOR # 993 | ############# 994 | 995 | def spam(self, post): 996 | """ Marks a Post (or Comment) as 'spam'. """ 997 | dict = {} 998 | dict['id'] = post.name 999 | dict['uh'] = self.modhash 1000 | dict['r'] = post.subreddit 1001 | dict['renderstyle'] = 'html' 1002 | r = self.web.post('http://www.reddit.com/api/remove', dict) 1003 | return (r == '{}') 1004 | 1005 | def approve(self, post): 1006 | """ Un-removes ('approves') a Post or Comment. """ 1007 | dict = {} 1008 | dict['id'] = post.name 1009 | dict['uh'] = self.modhash 1010 | dict['r'] = post.subreddit 1011 | dict['renderstyle'] = 'html' 1012 | r = self.web.post('http://www.reddit.com/api/approve', dict) 1013 | return (r == '{}') 1014 | 1015 | def remove(self, post): 1016 | """ Removes a Post or Comment from public view. """ 1017 | dict = {} 1018 | dict['id'] = post.name 1019 | dict['uh'] = self.modhash 1020 | dict['r'] = post.subreddit 1021 | dict['spam'] = 'False' 1022 | dict['renderstyle'] = 'html' 1023 | r = self.web.post('http://www.reddit.com/api/remove', dict) 1024 | return (r == '{}') 1025 | 1026 | def distinguish(self, post, turn_on=True): 1027 | """ Distinguishes a Post or Comment with moderator flair. """ 1028 | dict = {} 1029 | dict['id'] = post.name 1030 | dict['uh'] = self.modhash 1031 | dict['r'] = post.subreddit 1032 | dict['renderstyle'] = 'html' 1033 | url = 'http://www.reddit.com/api/distinguish/' 1034 | if turn_on: 1035 | url += 'yes' 1036 | else: 1037 | url += 'no' 1038 | r = self.web.post(url, dict) 1039 | return (r != '') 1040 | 1041 | def approved_submitter(self, subreddit, username, add_user=True): 1042 | """ 1043 | Add/remove user as an Approved Submitter for a given Subreddit. 1044 | subreddit is a Subreddit object! Must have .name and .display_name 1045 | Must be logged in as a moderator of the Subreddit. 1046 | """ 1047 | dict = {} 1048 | dict['id'] = '#contributor' 1049 | dict['uh'] = self.modhash 1050 | dict['r'] = subreddit.display_name 1051 | dict['name'] = username 1052 | dict['type'] = 'contributor' 1053 | dict['action'] = 'add' 1054 | dict['container'] = subreddit.name 1055 | dict['renderstyle'] = 'html' 1056 | url = 'http://www.reddit.com/api/' 1057 | if add_user: url += 'friend' 1058 | else: url += 'unfriend' 1059 | r = self.web.post(url, dict) 1060 | return (r != '') 1061 | 1062 | def moderator(self, subreddit, username): 1063 | """ 1064 | Add/remove user as a moderator of a given Subreddit 1065 | subreddit is a Subreddit object! Must have .name and .display_name 1066 | Must be logged in as a moderator of the Subreddit. 1067 | """ 1068 | dict = {} 1069 | dict['id'] = '#moderator' 1070 | dict['uh'] = self.modhash 1071 | dict['r'] = subreddit.display_name 1072 | dict['name'] = username 1073 | dict['type'] = 'moderator' 1074 | dict['action'] = 'add' 1075 | dict['container'] = subreddit.name 1076 | dict['renderstyle'] = 'html' 1077 | url = 'http://www.reddit.com/api/' 1078 | if add_user: url += 'friend' 1079 | else: url += 'unfriend' 1080 | r = self.web.post(url, dict) 1081 | return (r != '') 1082 | 1083 | def time_to_date(self, seconds): 1084 | """ Returns date object based on given seconds. """ 1085 | return date.fromtimestamp(seconds) 1086 | 1087 | def time_since(self, seconds): 1088 | """ Returns time elapsed since current time in human-readable format. """ 1089 | delta = time() - seconds 1090 | factors = [ 1091 | ('second', 60), 1092 | ('minute', 60), 1093 | ('hour', 24), 1094 | ('day', 365), 1095 | ('year', 10), 1096 | ('decade',100) 1097 | ] 1098 | current = delta 1099 | for (unit, factor) in factors: 1100 | if current < factor: 1101 | plural = 's' 1102 | if current == 1: plural = '' 1103 | return '%d %s%s' % (current, unit, plural) 1104 | current /= factor 1105 | current /= 365 1106 | plural = 's' 1107 | if current == 1: plural = '' 1108 | return '%d %s%s' % (current, 'year', plural) 1109 | --------------------------------------------------------------------------------