├── .gitignore ├── celeryconfig.py ├── chromedriver.exe ├── config_change_me.py ├── newQuestionsBot.py └── tasks.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | celerybeat-schedule 3 | *.*~ 4 | -------------------------------------------------------------------------------- /celeryconfig.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | CELERYBEAT_SCHEDULE = { 4 | 'fetchAndMailNewQuoraQuestions': { 5 | 'task': 'tasks.fetchAndMailNewQuoraQuestions', 6 | 'schedule': timedelta(seconds=60*30) 7 | }, 8 | } 9 | 10 | CELERY_TIMEZONE = 'Asia/Calcutta' 11 | -------------------------------------------------------------------------------- /chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/praveen97uma/QuoraBot/6017cfa4ad9ff38fa6c61eb263e23a76887b7f13/chromedriver.exe -------------------------------------------------------------------------------- /config_change_me.py: -------------------------------------------------------------------------------- 1 | """ 2 | Change the name of this file to config.py 3 | """ 4 | 5 | #Number of page down key press events selenium should send on quora page 6 | NO_OF_PAGEDOWNS = 10 7 | 8 | QUORA_USERNAME = "" 9 | QUORA_PASSWORD = "" 10 | 11 | GMAIL_USERNAME = "" 12 | GMAIL_PASSWORD = "" 13 | 14 | GMAIL_SMTP_HOST = 'smtp.gmail.com' 15 | GMAIL_SMTP_PORT = 587 16 | 17 | EMAIL_SUBJECT = "New Quora Questions" 18 | RECIPIENT_ADDRESS = "" 19 | 20 | EMAIL_HEADERS = "\r\n".join(["from: My Quora Bot", 21 | "subject: %s"%(EMAIL_SUBJECT), 22 | "to: %s"%(RECIPIENT_ADDRESS), 23 | "mime-version: 1.0", 24 | "content-type: text/html"]) 25 | 26 | -------------------------------------------------------------------------------- /newQuestionsBot.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.common.keys import Keys 3 | 4 | from datetime import datetime 5 | 6 | import time 7 | import smtplib 8 | from pyvirtualdisplay import Display 9 | import config 10 | 11 | def loginIntoQuora(email, password): 12 | print "Opening browser" 13 | browser = webdriver.Chrome() 14 | browser.get("http://www.quora.com") 15 | print "Logging into Quora" 16 | elem = browser.find_element_by_name("email") 17 | elem.send_keys(email) 18 | elem = browser.find_element_by_name("password") 19 | elem.send_keys(password) 20 | elem.send_keys(Keys.RETURN) 21 | print "Logged into quora" 22 | time.sleep(5) 23 | return browser 24 | 25 | def sendPageDownPressEvents(browser, no_of_page_downs): 26 | print "Scrolling down" 27 | body = browser.find_element_by_tag_name("body") 28 | while(no_of_page_downs>0): 29 | body.send_keys(Keys.PAGE_DOWN) 30 | time.sleep(0.4) 31 | no_of_page_downs-=1 32 | time.sleep(5) 33 | 34 | def getNewQuestions(browser): 35 | print "Retrieving new questions" 36 | questions = [] 37 | question_elems = browser.find_elements_by_class_name("pagedlist_item") 38 | for q_elem in question_elems: 39 | question_tip_elem = q_elem.find_element_by_class_name("feed_item_title") 40 | if "Question added" in question_tip_elem.text: 41 | ques_link_elem = q_elem.find_element_by_class_name("question_link") 42 | ques_link = ques_link_elem.get_attribute("href") 43 | ques_text_elem = q_elem.find_element_by_class_name("link_text") 44 | ques_text = ques_text_elem.text 45 | print ques_text 46 | topics = [] 47 | topics_elem = q_elem.find_elements_by_class_name("topic_name") 48 | for topic_elem in topics_elem: 49 | topic_link = topic_elem.get_attribute("href") 50 | topics.append(topic_link) 51 | 52 | questions.append({"link":ques_link, "text": ques_text, 'topics':topics}) 53 | return questions 54 | 55 | def prepareEmailBody(data): 56 | body = "" 57 | for question_data in data: 58 | topics = question_data["topics"] 59 | topics = ['%s'%(topic, topic.split("/")[-1]) for topic in topics] 60 | topics = ', '.join(topics) +"
Time: %s
"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 75 | body = current_time + body 76 | headers = config.EMAIL_HEADERS 77 | email_content = headers + "\r\n\r\n" + body 78 | print "Sending mail" 79 | mail_server.sendmail(config.GMAIL_USERNAME, config.RECIPIENT_ADDRESS, email_content) 80 | mail_server.quit() 81 | 82 | def fetchAndMailNewQuestions(disable_display=False): 83 | if disable_display: 84 | display = Display(visible=0, size=(1024, 768)) 85 | display.start() 86 | print "Fetching questions" 87 | browser = loginIntoQuora(config.QUORA_USERNAME, config.QUORA_PASSWORD) 88 | sendPageDownPressEvents(browser, config.NO_OF_PAGEDOWNS) 89 | questions = getNewQuestions(browser) 90 | browser.quit() 91 | send_mail(questions) 92 | 93 | if __name__=='__main__': 94 | fetchAndMailNewQuestions() 95 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from celery import Celery 2 | 3 | import newQuestionsBot 4 | 5 | 6 | app = Celery('tasks', broker='amqp://guest@localhost//') 7 | app.config_from_object('celeryconfig') 8 | 9 | 10 | @app.task 11 | def fetchAndMailNewQuoraQuestions(): 12 | newQuestionsBot.fetchAndMailNewQuestions() 13 | --------------------------------------------------------------------------------