├── README.md ├── acadgrade.py ├── drcalc.py ├── gif_maker.py ├── mail.py └── redditScrape.py /README.md: -------------------------------------------------------------------------------- 1 | RedditScrape: A general purpose reddit scraper 2 | ------------------------------------------------------------------------------------- 3 | 4 | Downloads images, images from imgur, all types of gifs, and videos via youtube-dl. 5 | 6 | Requirements: 7 | 8 | praw , bs4 , youtube-dl 9 | 10 | `sudo apt-get install youtube-dl` 11 | 12 | `sudo pip install praw` 13 | 14 | `sudo pip install bs4` 15 | 16 | Run: 17 | 18 | `python redditScrape.py` 19 | 20 | Acad Grade: IITG Status detector 21 | ------------------------------------------------------------------------------------- 22 | Shows your current course status. 23 | 24 | Requirements - 25 | mechanize, Python 3.6 26 | 27 | Steps: 28 | 29 | `virtualenv -p python3.6 venv` 30 | 31 | `source venv/bin/active` 32 | 33 | `pip install mechanize` 34 | 35 | `python acadgrade.py` 36 | 37 | ------------------------------------------------------------------------------------- 38 | 39 | DRCalc: DR Calculator from GradeCards 40 | ------------------------------------------------------------------------------------- 41 | 42 | Calculates Department Ranks from PDFs of Grade Cards and outputs a sorted CSV. 43 | 44 | Requirements: 45 | pdfminer,slate 46 | 47 | `pip install pdfminer` 48 | 49 | `pip install slate` 50 | 51 | Run: 52 | 53 | `python drcalc.py` 54 | 55 | ------------------------------------------------------------------------------------- 56 | 57 | Gif Maker: A HQ gif generator 58 | ------------------------------------------------------------------------------------- 59 | 60 | Finds the scene related to the query and generates a HQ gif from it. 61 | 62 | Requirements: 63 | ffmpeg, pysrt 64 | 65 | `pip install pysrt` 66 | 67 | [GET ffmpeg](https://ffmpeg.org/download.html#build-linux) 68 | 69 | 70 | Run: 71 | 72 | `python gif_maker.py -q **query** -f **input video file** -s **input srt file** -o **output file**` 73 | 74 | ------------------------------------------------------------------------------------- 75 | -------------------------------------------------------------------------------- /acadgrade.py: -------------------------------------------------------------------------------- 1 | import json 2 | from getpass import getpass 3 | from pathlib import Path 4 | 5 | import mechanicalsoup 6 | 7 | 8 | home_dir = Path.home() 9 | data_file = home_dir.joinpath('.acadgrade') 10 | 11 | if Path.exists(data_file): 12 | with open(data_file) as f: 13 | saved_data = json.load(f) 14 | else: 15 | saved_data = {} 16 | 17 | browser = mechanicalsoup.StatefulBrowser() 18 | 19 | browser.open('https://auto.iitg.ernet.in/acadgrade/index.jsp') 20 | browser.select_form() 21 | 22 | if 'uid' not in saved_data or 'pass' not in saved_data: 23 | saved_data['uid'] = input('Please enter your webmail username:\n') 24 | saved_data['pass'] = getpass('Please enter your webmail password:\n') 25 | with open(data_file, 'w') as f: 26 | json.dump(saved_data, f) 27 | 28 | browser['uid'] = saved_data['uid'] 29 | browser['pass'] = saved_data['pass'] 30 | 31 | browser.submit_selected() 32 | 33 | page = browser.get_current_page() 34 | data = page.find_all('tr')[1:] 35 | 36 | print(f"Hello {saved_data['uid']}, your current course status is as follows:") 37 | 38 | course_status = {} 39 | for row in data: 40 | row_data = list(map(lambda cell: cell.text, row.find_all('td'))) 41 | name, status = row_data[1], row_data[3] 42 | print(f'Course: {name}, Status: {status}') 43 | -------------------------------------------------------------------------------- /drcalc.py: -------------------------------------------------------------------------------- 1 | import os 2 | x = "/home/sam/Desktop/nf/" 3 | 4 | import slate 5 | def listdir_fullpath(d): 6 | return [os.path.join(d, f) for f in os.listdir(d)] 7 | 8 | a = listdir_fullpath(x) 9 | pdfs = [] 10 | for i in a: 11 | pdfs.append(listdir_fullpath(i)[0]) 12 | 13 | final = [] 14 | for i in pdfs: 15 | with open(i) as f: 16 | x = slate.PDF(f) 17 | try: 18 | txt = x[0].split() 19 | cpi=x[0].split()[-8] 20 | 21 | roll=x[0].split()[19][1:] 22 | if len(cpi)!=4: 23 | print i 24 | # print x[0].split() 25 | ns= 0 26 | ne = 0 27 | for j in range(len(txt)): 28 | if "NAME" in txt[j]: 29 | ns=j+1 30 | if "DISCIPLINE" in txt[j]: 31 | ne= j 32 | txt[ns] = txt[ns][1:] 33 | name = " ".join(txt[ns:ne]) 34 | final.append([cpi,roll,name]) 35 | 36 | except: 37 | print i 38 | final.sort(reverse=True) 39 | f = open("sorted1.csv","w") 40 | for i in final: 41 | f.write(i[0]+","+i[1]+","+i[2]+"\n") -------------------------------------------------------------------------------- /gif_maker.py: -------------------------------------------------------------------------------- 1 | import pysrt 2 | from datetime import datetime 3 | import argparse 4 | import subprocess 5 | 6 | 7 | def find_gif(subsfile,query): 8 | subs = pysrt.open(subsfile) 9 | start = 0 10 | duration = 0 11 | print(query) 12 | for sub in subs: 13 | text = " ".join(sub.text.lower().split()) 14 | if query in text: 15 | start = datetime.strptime(str(sub.start),"%H:%M:%S,%f").strftime("%H:%M:%S") 16 | duration = (datetime.strptime(str(sub.end),"%H:%M:%S,%f")-datetime.strptime(str(sub.start),"%H:%M:%S,%f")).seconds 17 | break 18 | return start,duration 19 | 20 | def make_gif(video,subs,query,output): 21 | start,duration = find_gif(subs,query) 22 | filters="fps=15,scale=320:-1:flags=lanczos" 23 | pallete_generate = "ffmpeg -y -ss {} -t {} -i {} -vf fps=10,scale=320:-1:flags=lanczos,palettegen /tmp/palette.png".format(start,duration,video) 24 | gif_generate = 'ffmpeg -ss {} -t {} -i {} -i /tmp/palette.png -filter_complex "fps=10,scale=320:-1:flags=lanczos[x];[x][1:v]paletteuse" {}'.format(start,duration,video,output) 25 | # print(pallete_generate) 26 | subprocess.call(pallete_generate,shell=True) 27 | subprocess.call(gif_generate,shell=True) 28 | 29 | 30 | def main(): 31 | parser = argparse.ArgumentParser(prog="Gif Maker") 32 | parser.add_argument('-f','--file',required=True) 33 | parser.add_argument('-s','--subs',required=True) 34 | parser.add_argument('-q','--query',required=True) 35 | parser.add_argument('-o','--output',required=True) 36 | parsed = parser.parse_args() 37 | make_gif(parsed.file,parsed.subs,parsed.query,parsed.output) 38 | 39 | if __name__ == "__main__": 40 | main() -------------------------------------------------------------------------------- /mail.py: -------------------------------------------------------------------------------- 1 | import poplib 2 | from email import parser 3 | import requests 4 | import os 5 | import json 6 | 7 | import getpass 8 | 9 | class MailGun(object): 10 | """docstring for MainGun""" 11 | def __init__(self, api_key, domain, recipient): 12 | super(MailGun, self).__init__() 13 | self.api_key = api_key 14 | self.domain = domain 15 | self.recipient = recipient 16 | self.url = "https://api.mailgun.net/v3/"+self.domain+"/messages" 17 | 18 | def send_message(self,sender,subject,body): 19 | return requests.post(self.url,auth=("api",self.api_key),data={"from":sender,"to":self.recipient,"subject":subject,"text":body}) 20 | 21 | 22 | import inspect, os 23 | script_path = os.path.abspath(inspect.getfile(inspect.currentframe())) # script directory 24 | import shutil 25 | home_dir = os.path.expanduser('~') 26 | cred_dir = os.path.join(home_dir, '.webmail') 27 | final_path = os.path.join(cred_dir,'mail.py') 28 | log_path = os.path.join(cred_dir,'logs') 29 | credential_path = os.path.join(cred_dir,'creds.txt') 30 | if not os.path.exists(credential_path): 31 | if not os.path.exists(cred_dir): 32 | os.makedirs(cred_dir) 33 | 34 | shutil.copy2(script_path, final_path) 35 | creds = {} 36 | creds['webmail'] = raw_input("Please enter your webmail username:\n") 37 | creds['password'] = getpass.getpass() 38 | print("\n\nSelect your webmail server:\n[1] Teesta\n[2] Naambor\n[3] Disang\n[4] Tamdil\n[5] Dikrong") 39 | server_in = input("Enter the number corresponding to the server:\n")-1 40 | server_list = ['202.141.80.12','202.141.80.9','202.141.80.10','202.141.80.11','202.141.80.13'] 41 | creds['server'] = server_list[server_in] 42 | creds['api'] = raw_input("Please enter you mailgun api-key:\n") 43 | creds['domain'] = raw_input("Please enter your mailgun sandbox url:\n") 44 | creds['to'] = raw_input("Please enter the email address you want to forward to:\n") 45 | creds['flast'] = input("Since this is your first time running the script, \nplease enter the number of existing emails you want to forward:\n") 46 | json.dump(creds, open(credential_path,'w')) 47 | print("Paste the following line to crontab -e :\n\n\n*/2 * * * * /usr/bin/python {} >> {}\n\n\n").format(final_path,log_path) 48 | 49 | else: 50 | creds = json.load(open(credential_path,'r')) 51 | 52 | 53 | 54 | pop_conn = poplib.POP3_SSL(creds['server']) 55 | pop_conn.user(creds['webmail']) 56 | pop_conn.pass_(creds['password']) 57 | total = len(pop_conn.list()[1]) 58 | if creds['flast']!=-1: 59 | creds['last'] = total - creds['flast'] 60 | creds['flast'] = -1 61 | # print total 62 | #Get messages from server: 63 | # print total 64 | messages = [pop_conn.retr(i) for i in range(creds['last']+1,total+1)] 65 | 66 | # print messages 67 | # Concat message pieces: 68 | messages = ["\n".join(mssg[1]) for mssg in messages] 69 | # print messages[0] 70 | #Parse message intom an email object: 71 | mailSender = MailGun(creds['api'],creds['domain'],[creds['to']]) 72 | messages = [parser.Parser().parsestr(mssg) for mssg in messages] 73 | succ = 0 74 | for i,b in enumerate(messages): 75 | body = "" 76 | if b.is_multipart(): 77 | for part in b.walk(): 78 | ctype = part.get_content_type() 79 | cdispo = str(part.get('Content-Disposition')) 80 | if ctype == 'text/plain' and 'attachment' not in cdispo: 81 | body += part.get_payload(decode=True) # decode 82 | break 83 | else: 84 | body += b.get_payload(decode=True) 85 | mailSender.send_message(b['from'],b['subject'],body) 86 | succ+=1 87 | 88 | pop_conn.quit() 89 | # print str(succ)+" new mails forwarded" 90 | creds['last'] = total 91 | json.dump(creds, open(credential_path,'w')) 92 | 93 | -------------------------------------------------------------------------------- /redditScrape.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import praw 3 | import urllib 4 | from urlparse import urljoin 5 | from os.path import expanduser 6 | from bs4 import BeautifulSoup as bs 7 | import os 8 | import requests 9 | from subprocess import call 10 | 11 | home = expanduser("~")+"/Desktop/" 12 | def dl(each): 13 | filename=each.split('/')[-1] 14 | urllib.urlretrieve(each, filename) 15 | 16 | def dligifv(each): 17 | filename=each.split('/')[-1] 18 | each1 = each.split(".") 19 | each1[-1] = "webm" 20 | if not os.path.exists(home+filename): 21 | urllib.urlretrieve(".".join(each1), home+filename) 22 | 23 | def dlalbum(x): 24 | response = requests.get(x+"/all") 25 | soup = bs(response.text,"lxml") 26 | listimg = soup.find(id="imagelist") 27 | directory = home+"/"+x.split("/")[-1] 28 | if not os.path.exists(directory): 29 | os.makedirs(directory) 30 | images = listimg.find_all("a",href=True) 31 | for i in range(len(images)): 32 | try: 33 | print "Downloading album image "+str(i+1)+" of "+str(len(images)) 34 | # print images[i]['href'] 35 | iimgurdl(urljoin(x,images[i]['href']),directory,i) 36 | except: 37 | pass 38 | 39 | def gfydl(link): 40 | response = requests.get(link) 41 | soup = bs(response.text,"lxml") 42 | each = soup.find(id="mp4Source")['src'] 43 | filename=each.split('/')[-1] 44 | mp3file = urllib2.urlopen(each) 45 | if not os.path.exists(home+filename): 46 | with open(home+filename,'wb') as f: 47 | f.write(mp3file.read()) 48 | 49 | 50 | def iimgurdl(link,directory=None,i=None): 51 | # print link,directory,i 52 | if "gifv" in link: 53 | dligifv(link) 54 | 55 | else: 56 | if directory and i: 57 | filename=link.split('/')[-1] 58 | if len(filename.split(".")[0])>8 and filename.split(".")[0][-1]=="b": 59 | pass 60 | else: 61 | fpath = directory+"/"+str(i)+"_"+filename 62 | if not os.path.exists(fpath): 63 | 64 | urllib.urlretrieve(link,fpath) 65 | else: 66 | filename=link.split('/')[-1] 67 | fpath = home+filename 68 | if not os.path.exists(fpath): 69 | 70 | urllib.urlretrieve(link,fpath) 71 | 72 | def imgurDL(link): 73 | if "i.imgur" in link: 74 | iimgurdl(link) 75 | elif "imgur.com/a/" in link: 76 | dlalbum(link) 77 | elif "imgur" in link: 78 | pass 79 | def typeget(link): 80 | if "imgur" in link: 81 | imgurDL(link) 82 | elif "gfycat" in link: 83 | gfydl(link) 84 | else: 85 | viddl(link) 86 | def viddl(link): 87 | 88 | command = "youtube-dl --output " + home +"%(title)s.%(ext)s"+ " "+link 89 | try: 90 | call(command.split(), shell=False) 91 | except: 92 | pass 93 | 94 | r = praw.Reddit(user_agent='Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36') 95 | 96 | linkorsub = raw_input("Single Link or Subreddit? 1 , 2 :\n") 97 | if linkorsub == "1": 98 | home = expanduser("~") + "/.scrape/"+"common"+"/" 99 | link = raw_input("Enter link:\n") 100 | typeget(link) 101 | else: 102 | subr = raw_input("Enter Subreddit of choice:\n") 103 | top = input("Enter post limitation(integer):\n") 104 | fromt = raw_input("Enter time period: hour,day,week,month,year,all\n") 105 | 106 | home = expanduser("~") + "/.scrape/"+subr+"/" 107 | 108 | if not os.path.exists(home): 109 | os.makedirs(home) 110 | 111 | print "Your files will be stored in " +home 112 | 113 | submissions = r.get_subreddit(subr).get_top(limit=top,params={"t":fromt}) 114 | count=1 115 | for i in submissions: 116 | print "Downloading "+str(count)+": " +i.url +" ... " 117 | try: 118 | typeget(i.url) 119 | 120 | except: 121 | pass 122 | count+=1 123 | --------------------------------------------------------------------------------