├── README.md ├── .gitignore ├── K9py ├── aggrtest.py ├── find_test.py ├── darenet │ └── K9_darenet.py ├── K9.py └── rizon │ └── K9_rizon.py ├── bugs.txt~ └── bugs.txt /README.md: -------------------------------------------------------------------------------- 1 | K9NewsIrcBot 2 | ============ 3 | 4 | K9NewsIrcBot Development and Release Repository -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # NoHup 60 | nohup.out 61 | -------------------------------------------------------------------------------- /K9py/aggrtest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # used for testing rss feeds to see if feedparser can read them 3 | 4 | from concurrent import futures 5 | import feedparser 6 | import sched, time 7 | 8 | rss_urls = ['http://feeds.feedburner.com/ImgurGallery?format=xml'] 9 | entries = [] 10 | s = sched.scheduler(time.time, time.sleep) 11 | 12 | def getentries(): 13 | with futures.ThreadPoolExecutor(max_workers=2) as executor: 14 | future_to_url = dict((executor.submit(feedparser.parse, url), url) for url in rss_urls) 15 | 16 | feeds = [future.result() for future in futures.as_completed(future_to_url)] 17 | #print [future.result() for future in futures.as_completed(future_to_url)] 18 | for feed in feeds: 19 | for item in feed: 20 | print "\n" 21 | print item 22 | print feed[item] 23 | print "\n" 24 | entries.extend(feed["feed"]) 25 | print entries.extend(feed["feed"]) 26 | entries.extend(feed["items"]) 27 | sorted_entries = sorted(entries, key=lambda entry: entry["date_parsed"], reverse=True) 28 | sorted_entries = sorted_entries.reverse() 29 | s.enter(5, 1, getentries, ()) 30 | print entries 31 | #entsize = len(sorted_entries) 32 | #for i in range(0, entsize): 33 | # if sorted_entries[i]['link'].find("bbc") != -1: 34 | # print "bbc" 35 | # elif sorted_entries[i]['link'].find("jazeera") != -1: 36 | # print "aje" 37 | #print sorted_entries[0]['link'].find("bbc") 38 | 39 | #def displayentries(): 40 | #print getentries() 41 | #entsize = len(sentries) 42 | #for i in range(0, entsize): 43 | # if sentries[i]['link'].find("bbc") != -1: 44 | # print "bbc" 45 | # elif sentries[i]['link'].find("jazeera") != -1: 46 | # print "aje" 47 | #print sentries[0]['link'].find("bbc") 48 | 49 | s.enter(5, 1, getentries, ()) 50 | s.run() 51 | #displayentries() 52 | -------------------------------------------------------------------------------- /bugs.txt~: -------------------------------------------------------------------------------- 1 | Traceback (most recent call last): 2 | File "./K9.py", line 130, in 3 | connection.next() 4 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 149, in connect 5 | self.connect_cb(self) 6 | File "./K9.py", line 113, in connect_callback 7 | rsched.run() 8 | File "/usr/lib/python2.7/sched.py", line 117, in run 9 | action(*argument) 10 | File "./K9.py", line 97, in news_stream 11 | helpers.msg(cli, channel, "{%s} %s - %s\n" % (name,sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']))) 12 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/helpers.py", line 24, in msg 13 | cli.send("PRIVMSG", user, ":%s" % line) 14 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 127, in send 15 | self.socket.send(msg + bytes("\r\n", "ascii")) 16 | socket.error: [Errno 11] Resource temporarily unavailable 17 | 18 | Traceback (most recent call last): 19 | File "./K9.py", line 125, in 20 | connection.next() 21 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 149, in connect 22 | self.connect_cb(self) 23 | File "./K9.py", line 113, in connect_callback 24 | rsched.run() 25 | File "/usr/lib/python2.7/sched.py", line 117, in run 26 | action(*argument) 27 | File "./K9.py", line 91, in news_stream 28 | helpers.msg(cli, channel, "{hacker news 100} %s - %s\n" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']))) 29 | File "/usr/local/lib/python2.7/dist-packages/tinyurl.py", line 32, in create_one 30 | ret = urllib.urlopen(API_CREATE, data=url_data).read().strip() 31 | File "/usr/lib/python2.7/urllib.py", line 88, in urlopen 32 | return opener.open(url, data) 33 | File "/usr/lib/python2.7/urllib.py", line 209, in open 34 | return getattr(self, name)(url, data) 35 | File "/usr/lib/python2.7/urllib.py", line 344, in open_http 36 | h.endheaders(data) 37 | File "/usr/lib/python2.7/httplib.py", line 958, in endheaders 38 | self._send_output(message_body) 39 | File "/usr/lib/python2.7/httplib.py", line 818, in _send_output 40 | self.send(msg) 41 | File "/usr/lib/python2.7/httplib.py", line 780, in send 42 | self.connect() 43 | File "/usr/lib/python2.7/httplib.py", line 761, in connect 44 | self.timeout, self.source_address) 45 | File "/usr/lib/python2.7/socket.py", line 571, in create_connection 46 | raise err 47 | IOError: [Errno socket error] [Errno 110] Connection timed out 48 | -------------------------------------------------------------------------------- /bugs.txt: -------------------------------------------------------------------------------- 1 | Traceback (most recent call last): 2 | File "./K9.py", line 130, in 3 | connection.next() 4 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 149, in connect 5 | self.connect_cb(self) 6 | File "./K9.py", line 113, in connect_callback 7 | rsched.run() 8 | File "/usr/lib/python2.7/sched.py", line 117, in run 9 | action(*argument) 10 | File "./K9.py", line 97, in news_stream 11 | helpers.msg(cli, channel, "{%s} %s - %s\n" % (name,sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']))) 12 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/helpers.py", line 24, in msg 13 | cli.send("PRIVMSG", user, ":%s" % line) 14 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 127, in send 15 | self.socket.send(msg + bytes("\r\n", "ascii")) 16 | socket.error: [Errno 11] Resource temporarily unavailable 17 | 18 | Traceback (most recent call last): 19 | File "./K9.py", line 125, in 20 | connection.next() 21 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 149, in connect 22 | self.connect_cb(self) 23 | File "./K9.py", line 113, in connect_callback 24 | rsched.run() 25 | File "/usr/lib/python2.7/sched.py", line 117, in run 26 | action(*argument) 27 | File "./K9.py", line 91, in news_stream 28 | helpers.msg(cli, channel, "{hacker news 100} %s - %s\n" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']))) 29 | File "/usr/local/lib/python2.7/dist-packages/tinyurl.py", line 32, in create_one 30 | ret = urllib.urlopen(API_CREATE, data=url_data).read().strip() 31 | File "/usr/lib/python2.7/urllib.py", line 88, in urlopen 32 | return opener.open(url, data) 33 | File "/usr/lib/python2.7/urllib.py", line 209, in open 34 | return getattr(self, name)(url, data) 35 | File "/usr/lib/python2.7/urllib.py", line 344, in open_http 36 | h.endheaders(data) 37 | File "/usr/lib/python2.7/httplib.py", line 958, in endheaders 38 | self._send_output(message_body) 39 | File "/usr/lib/python2.7/httplib.py", line 818, in _send_output 40 | self.send(msg) 41 | File "/usr/lib/python2.7/httplib.py", line 780, in send 42 | self.connect() 43 | File "/usr/lib/python2.7/httplib.py", line 761, in connect 44 | self.timeout, self.source_address) 45 | File "/usr/lib/python2.7/socket.py", line 571, in create_connection 46 | raise err 47 | IOError: [Errno socket error] [Errno 110] Connection timed out 48 | 49 | Traceback (most recent call last): 50 | File "./K9.py", line 130, in 51 | connection.next() 52 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 149, in connect 53 | self.connect_cb(self) 54 | File "./K9.py", line 113, in connect_callback 55 | rsched.run() 56 | File "/usr/lib/python2.7/sched.py", line 117, in run 57 | action(*argument) 58 | File "./K9.py", line 91, in news_stream 59 | helpers.msg(cli, channel, "{hacker news 100} %s - %s\n" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']))) 60 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/helpers.py", line 24, in msg 61 | cli.send("PRIVMSG", user, ":%s" % line) 62 | File "/usr/local/lib/python2.7/dist-packages/oyoyo/client.py", line 127, in send 63 | self.socket.send(msg + bytes("\r\n", "ascii")) 64 | socket.error: [Errno 11] Resource temporarily unavailable 65 | 66 | -------------------------------------------------------------------------------- /K9py/find_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #suggestions 4 | # another suggestion 5 | # would be like PMs 6 | # say I want news with a keyword 7 | # examples "bulldozer" for amd bulldozer specific news 8 | # !subscribe "bulldozer" or some crap 9 | # search or subscribe, really 10 | # I think search would be smarter, yes 11 | 12 | import time 13 | import sched 14 | import feedparser 15 | from datetime import datetime 16 | from concurrent import futures 17 | 18 | rss_urls = ['http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989', 19 | 'http://feeds.bbci.co.uk/news/rss.xml', 20 | 'http://rss.cnn.com/rss/cnn_topstories.rss', 21 | 'http://rss.slashdot.org/Slashdot/slashdot', 22 | 'http://www.hackinthebox.org/backend.php', 23 | 'http://rss.dw-world.de/rdf/rss-de-all', 24 | 'http://www3.nhk.or.jp/rss/news/cat0.xml', 25 | 'http://www.france24.com/fr/monde/rss', 26 | 'http://www.npr.org/rss/rss.php?id=1001', 27 | 'http://feeds.reuters.com/reuters/topNews', 28 | 'http://feeds.feedburner.com/newsyc150'] 29 | 30 | rss_sources = set(['aljazeera','bbc','cnn','reuters','slashdot','hitb','dw-world','nhk','france24','npr','reuters']) 31 | 32 | rsched = sched.scheduler(time.time, time.sleep) 33 | 34 | def getentries(): 35 | entries = [] 36 | with futures.ThreadPoolExecutor(max_workers=2) as executor: 37 | future_to_url = dict((executor.submit(feedparser.parse, url), url) for url in rss_urls) 38 | feeds = [future.result() for future in futures.as_completed(future_to_url)] 39 | #print [future.result() for future in futures.as_completed(future_to_url)] 40 | for feed in feeds: 41 | entries.extend(feed["items"]) 42 | try: 43 | sorted_entries = sorted(entries, key=lambda entry: entry["date"], reverse=True) 44 | except KeyError: 45 | sorted_entries = sorted(entries, key=lambda entry: entry["updated"], reverse=True) 46 | print "========================================================================" 47 | print "feed items loaded @ " + datetime.now().strftime("%m/%w/%Y %H:%M:%S %Z") 48 | print "========================================================================" 49 | #print rss feed to channel slowly 50 | entsize = len(sorted_entries) 51 | print "========================================================================" 52 | print "Entry Size: " + str(entsize) 53 | print "========================================================================" 54 | print "\n" 55 | 56 | for i in range(0, entsize): 57 | #print sorted_entries[i] 58 | #print "\n" 59 | if sorted_entries[i]['summary_detail']['base'] == u'http://feeds.feedburner.com/newsyc150': 60 | print "{HACKER NEWS 100} %s - %s" % (sorted_entries[i]['title'],sorted_entries[i]['link']) 61 | else: 62 | for name in rss_sources: 63 | if sorted_entries[i]['link'].find(name) != -1: 64 | print "{%s} %s - %s" % (name.upper(),sorted_entries[i]['title'],sorted_entries[i]['link']) 65 | else: 66 | continue 67 | 68 | 69 | #if sorted_entries[i]['link'].find("bbc") != -1: 70 | # print "[BBC WORLD NEWS] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 71 | #elif sorted_entries[i]['link'].find("jazeera") != -1: 72 | # print "[AJE] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 73 | #elif sorted_entries[i]['link'].find("cnn") != -1: 74 | # print "[CNN] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 75 | #elif sorted_entries[i]['link'].find("reuters") != -1: 76 | # print "[REUTERS BUSINESS] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 77 | #elif sorted_entries[i]['link'].find("slashdot") != -1: 78 | # print "[\.] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 79 | #elif sorted_entries[i]['link'].find("wired") != -1: 80 | # print "[WIRED] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 81 | #elif sorted_entries[i]['link'].find("popsci") != -1: 82 | # print "[POPULAR SCIENCE] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 83 | #elif sorted_entries[i]['link'].find("telegraph") != -1: 84 | # print "[TELEGRAPH] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 85 | #elif sorted_entries[i]['link'].find("google") != -1: 86 | # print "[GOOGLE NEWS] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 87 | #elif sorted_entries[i]['link'].find("hitb") != -1: 88 | # print "[HACK IN THE BOX] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 89 | # elif sorted_entries[i]['link'].find("dw-world") != -1: 90 | # print "[DEUTSCHE WELLE ENG] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 91 | # elif sorted_entries[i]['link'].find("pcgamer") != -1: 92 | # print "[PC GAMER] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 93 | # elif sorted_entries[i]['link'].find("eluniversal") != -1: 94 | # print "[El Universal] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 95 | # elif sorted_entries[i]['link'].find("nhk") != -1: 96 | # print "[NHK] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 97 | # elif sorted_entries[i]['link'].find("france24") != -1: 98 | # print "[FRANCE24] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 99 | # elif sorted_entries[i]['link'].find("chosun") != -1: 100 | # print "[CHOSUN] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 101 | # elif sorted_entries[i]['link'].find("theregister") != -1: 102 | # print "[THEREGISTER] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 103 | # elif sorted_entries[i]['link'].find("gameinformer") != -1: 104 | # print "[GAME INFORMER] %s - %s" % (sorted_entries[i]['title'],tinyurl.create_one(sorted_entries[i]['link']) 105 | time.sleep(15) 106 | rsched.enter(5, 1, getentries, ()) 107 | 108 | rsched.enter(5, 1, getentries, ()) 109 | rsched.run() 110 | -------------------------------------------------------------------------------- /K9py/darenet/K9_darenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #--------------------------------------------------- 4 | # K-9 News Irc Stream Bot | 5 | # drwho ~ I Love To Party Fake Industries © | 6 | #--------------------------------------------------- 7 | # Version 0.3 | 8 | #--------------------------------------------------- 9 | # add in python twitter and make k-9news account 10 | # change: no oyoyo 11 | 12 | import time, threading, feedparser, tinyurl 13 | import socket, re, sys, os, getpass, time, codecs 14 | from datetime import datetime 15 | from concurrent import futures 16 | from HTMLParser import HTMLParser 17 | from random import shuffle 18 | from pymongo import MongoClient 19 | from bson.objectid import ObjectId 20 | 21 | import threading 22 | 23 | class MLStripper(HTMLParser): 24 | def __init__(self): 25 | self.reset() 26 | self.fed = [] 27 | def handle_data(self, d): 28 | self.fed.append(d) 29 | def get_data(self): 30 | return ''.join(self.fed) 31 | 32 | def strip_tags(html): 33 | s = MLStripper() 34 | try: 35 | s.feed(html) 36 | return s.get_data() 37 | except: 38 | return html 39 | 40 | class RSSStream: 41 | def __init__(self, client): 42 | # RSS information 43 | self.rss_urls = ['http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989', 44 | 'http://www.theregister.co.uk/headlines.atom', 45 | 'http://feeds.bbci.co.uk/news/rss.xml', 46 | 'http://rss.slashdot.org/Slashdot/slashdot', 47 | 'http://www.hackinthebox.org/backend.php', 48 | 'http://www.npr.org/rss/rss.php?id=1001', 49 | 'http://feeds.reuters.com/reuters/topNews', 50 | 'http://feeds.feedburner.com/newsyc150', 51 | 'http://rt.com/rss/news/', 52 | 'http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml', 53 | 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories', 54 | 'http://www.washingtonpost.com/rss/world', 55 | 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME', 56 | 'https://news.google.com/?output=rss', 57 | 'http://www.drudgereportfeed.com/', 58 | 'http://edition.presstv.ir/rss/', 59 | 'http://www.scmp.com/rss/91/feed', 60 | 'http://feeds.arstechnica.com/arstechnica/index', 61 | 'http://feeds.feedburner.com/TheHackersNews', 62 | 'http://english.chosun.com/site/data/rss/rss.xml', 63 | 'http://vuxml.freebsd.org/freebsd/rss.xml'] 64 | 65 | self.rss_sources = set(['aljazeera.net', 66 | 'bbc.com', 67 | 'bbc.co.uk', 68 | 'reuters.com', 69 | 'slashdot.org', 70 | 'npr.org', 71 | 'theregister.co.uk', 72 | 'theregister.com', 73 | 'rt.com', 74 | 'hitb', 75 | 'nytimes.com', 76 | 'usatoday.com', 77 | 'washingtonpost.com', 78 | 'ap.org', 79 | 'arstechnica.com', 80 | 'chosun.com', 81 | 'scmp.com']) 82 | 83 | self.rss_titles = { 84 | 'aljazeera.net': 'Al Jazeera', 85 | 'aljazeera.com': 'Al Jazeera', 86 | 'bbc.com': 'BBC News', 87 | 'bbc.co.uk': 'BBC News', 88 | 'bbci.co.uk': 'BBC News', 89 | 'reuters.com': 'Reuters', 90 | 'slashdot.org': 'Slashdot', 91 | 'npr.org': 'National Public Radio', 92 | 'theregister.co.uk': 'The Register', 93 | 'theregister.com': 'The Register', 94 | 'rt.com': 'Russia Today', 95 | 'hitb': 'HITB', 96 | 'nytimes.com': 'New York Times', 97 | 'usatoday.com': 'USA Today', 98 | 'washingtonpost.com': 'Washington Post', 99 | 'ap.org': 'Associated Press', 100 | 'arstechnica.com': 'Ars Technica', 101 | 'chosun.com': 'The Chosun Ilbo', 102 | 'scmp.com': 'South China Morning Post', 103 | } 104 | 105 | self.client = client 106 | 107 | def get_feeds(self): 108 | with futures.ThreadPoolExecutor(max_workers=13) as executor: 109 | future_to_url = dict((executor.submit(feedparser.parse, url), url) for url in self.rss_urls) 110 | feeds = [future.result() for future in futures.as_completed(future_to_url)] 111 | for feed in feeds: 112 | self.client.entries.extend(feed["items"]) 113 | try: 114 | self.client.sorted_entries = sorted(self.client.entries, key=lambda entry: entry["date"], reverse=True) 115 | except KeyError: 116 | self.client.sorted_entries = sorted(self.client.entries, key=lambda entry: entry["updated"], reverse=True) 117 | 118 | shuffle(self.client.sorted_entries) 119 | 120 | print "========================================================================" 121 | print "feed items loaded @ " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") 122 | print "========================================================================" 123 | 124 | self.client.entsize = len(self.client.sorted_entries) 125 | print "========================================================================" 126 | print "Entry Size: " + str(self.client.entsize) 127 | print "========================================================================" 128 | 129 | def print_article(self): 130 | rssitem = {} 131 | i = 0 132 | if not self.client.sorted_entries: 133 | self.get_feeds() 134 | 135 | summary_detail = self.client.sorted_entries[i].get('summary_detail', None) 136 | if summary_detail is not None and summary_detail['base'] == u'http://feeds.feedburner.com/newsyc150': 137 | if 'published_parsed' in self.client.sorted_entries[i]: 138 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 139 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 140 | else: 141 | rssitem['date_published'] = None 142 | 143 | rssitem['news_source'] = "hacker news 100" 144 | rssitem['title'] = self.client.sorted_entries[i]['title'] 145 | rssitem['url'] = tinyurl.create_one(self.client.sorted_entries[i]['link']) 146 | rssitem['description'] = strip_tags(self.client.sorted_entries[i]['description']) 147 | rssitem['rss_raw'] = self.client.sorted_entries[i] 148 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 149 | 150 | self.client.sorted_entries.pop(i) 151 | 152 | self.client.entsize = len(self.client.sorted_entries) 153 | print "========================================================================" 154 | print "Entry Size: " + str(self.client.entsize) 155 | print "========================================================================" 156 | 157 | else: 158 | namefound = False 159 | for name in self.rss_sources: 160 | print name 161 | if self.client.sorted_entries[i]['link'].find(name) != -1: 162 | if 'author_detail' in self.client.sorted_entries[i]: 163 | try: 164 | rssitem['news_author'] = self.client.sorted_entries[i]['author_detail']['name'] 165 | except KeyError: 166 | rssitem['news_author'] = None 167 | else: 168 | rssitem['news_author'] = None 169 | 170 | if 'published_parsed' in self.client.sorted_entries[i]: 171 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 172 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 173 | else: 174 | rssitem['date_published'] = None 175 | 176 | if 'media_thumbnail' in self.client.sorted_entries[i]: 177 | rssitem['media_thumbnail'] = self.client.sorted_entries[i]['media_thumbnail'] 178 | else: 179 | rssitem['media_thumbnail'] = None 180 | 181 | rssitem['news_source'] = self.rss_titles[name] 182 | if self.client.sorted_entries[i]['description'] != None: 183 | rssitem['description'] = strip_tags(self.client.sorted_entries[i]['description']) 184 | else: 185 | rssitem['description'] = self.client.sorted_entries[i]['description'] 186 | 187 | rssitem['title'] = self.client.sorted_entries[i]['title'] 188 | rssitem['url'] = tinyurl.create_one(self.client.sorted_entries[i]['link']) 189 | rssitem['rss_raw'] = self.client.sorted_entries[i] 190 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 191 | 192 | # save rss item 193 | rsstmp = { 194 | 'news_source': rssitem['news_source'], 195 | 'description': rssitem['description'], 196 | 'title': rssitem['title'], 197 | 'url': rssitem['url'], 198 | 'scrape_date': rssitem['scrape_date'], 199 | 'news_author': rssitem['news_author'], 200 | 'date_published': rssitem['date_published'], 201 | 'media_thumbnail': rssitem['media_thumbnail'] 202 | } 203 | 204 | self.save_rss(rsstmp) 205 | print rsstmp 206 | del rsstmp 207 | 208 | for c in self.client.channels: 209 | self.client.say("\002[%s] %s \037%s" % (rssitem['news_source'], rssitem['title'], rssitem['url']), c) 210 | self.client.rssitem = rssitem 211 | print rssitem 212 | print "\n\n" 213 | namefound = True 214 | time.sleep(60) 215 | break 216 | else: 217 | namefound = False 218 | continue 219 | 220 | if namefound == False: 221 | if 'author_detail' in self.client.sorted_entries[i]: 222 | rssitem['news_author'] = self.client.sorted_entries[i]['author_detail']['name'] 223 | else: 224 | rssitem['news_author'] = None 225 | 226 | if 'published_parsed' in self.client.sorted_entries[i]: 227 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 228 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 229 | else: 230 | rssitem['date_published'] = None 231 | 232 | if 'media_thumbnail' in self.client.sorted_entries[i]: 233 | rssitem['media_thumbnail'] = self.client.sorted_entries[i]['media_thumbnail'] 234 | else: 235 | rssitem['media_thumbnail'] = None 236 | 237 | rssitem['news_source'] = "K9 World News" 238 | 239 | rss_description = self.client.sorted_entries[i].get('summary_detail', None) 240 | if rss_description is not None: 241 | rssitem['description'] = strip_tags(rss_description) 242 | else: 243 | rssitem['description'] = rss_description 244 | 245 | rssitem['title'] = self.client.sorted_entries[i]['title'] 246 | rssitem['url'] = self.client.sorted_entries[i]['link'] 247 | rssitem['rss_raw'] = self.client.sorted_entries[i] 248 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 249 | 250 | # save rss item 251 | rsstmp = { 252 | 'news_source': rssitem['news_source'], 253 | 'description': rssitem['description'], 254 | 'title': rssitem['title'], 255 | 'url': rssitem['url'], 256 | 'scrape_date': rssitem['scrape_date'], 257 | 'news_author': rssitem['news_author'], 258 | 'date_published': rssitem['date_published'], 259 | 'media_thumbnail': rssitem['media_thumbnail'] 260 | } 261 | 262 | self.save_rss(rsstmp) 263 | print rsstmp 264 | del rsstmp 265 | 266 | for c in self.client.channels: 267 | self.client.say("\002[%s] %s \037%s" % (rssitem['news_source'], rssitem['title'], rssitem['url']), c) 268 | self.client.rssitem = rssitem 269 | print rssitem 270 | print "\n\n" 271 | 272 | time.sleep(60) 273 | 274 | self.client.sorted_entries.pop(i) 275 | self.client.entsize = len(self.client.sorted_entries) 276 | print "========================================================================" 277 | print "Entry Size: " + str(self.client.entsize) 278 | print "========================================================================" 279 | 280 | def save_rss(self, rssdata): 281 | mg_cnn = MongoClient() 282 | mg_db = mg_cnn["qnews"] 283 | 284 | if "rss_data" not in mg_db.collection_names(): 285 | mg_db.create_collection("rss_data") 286 | 287 | if mg_db["rss_data"].find_one(rssdata): 288 | mgtd = mg_db["rss_data"].find_one(rssdata) 289 | mg_db["rss_data"].update({"_id": ObjectId(mgtd["_id"])}, {"$set": rssdata}) 290 | else: 291 | mg_db["rss_data"].insert(rssdata) 292 | 293 | def wait_print(self): 294 | print datetime.now() 295 | 296 | class IRCClient: 297 | 298 | # irc information 299 | socket = None 300 | connected = False 301 | joined = False 302 | registered = False 303 | nickname = 'WNews' 304 | channels = ['#worldnews'] 305 | network = 'eth.ca.us.darenet.org' 306 | 307 | # mysql information 308 | host = 'localhost' 309 | dbuser = '' 310 | chatdb = None 311 | dbpassw = None 312 | dbname = None 313 | 314 | def __init__(self): 315 | #self.dbpassw = getpass.getpass('[MySQL Pass] ') 316 | self.socket = socket.socket() 317 | self.socket.connect((self.network, 6667)) 318 | self.send("NICK %s" % self.nickname) 319 | self.send("USER %(nick)s %(nick)s %(nick)s :%(nick)s" % {'nick':self.nickname}) 320 | self.rss_stream = RSSStream(self) 321 | self.stream_started = False 322 | self.ctx = {} 323 | self.entries = [] 324 | self.entsize = None 325 | self.sorted_entries = None 326 | self.rssitem = None 327 | self.rss_loaded = False 328 | 329 | 330 | while True: 331 | buf = self.socket.recv(4096) 332 | lines = buf.split("\n") 333 | for data in lines: 334 | data = str(data).strip() 335 | if data == '': 336 | continue 337 | #print "I<", data 338 | 339 | # server ping/pong? 340 | if data.find('PING') != -1: 341 | n = data.split(':')[1] 342 | print self.rss_stream.wait_print() 343 | self.send('PONG :' + n) 344 | if self.connected == False: 345 | self.perform() 346 | self.connected = True 347 | if self.connected and self.stream_started: 348 | self.rss_stream.print_article() 349 | continue 350 | 351 | args = data.split(None, 3) 352 | if len(args) != 4: 353 | continue 354 | self.ctx['sender'] = args[0][1:] 355 | self.ctx['type'] = args[1] 356 | self.ctx['target'] = args[2] 357 | self.ctx['msg'] = args[3][1:] 358 | 359 | # check to start rss 360 | if self.ctx['type'] == '366': 361 | self.joined = True 362 | 363 | # register 364 | print self.ctx['type'] 365 | print self.ctx['sender'] 366 | print self.ctx['target'] 367 | print self.ctx['msg'] 368 | #if self.ctx['type'] == '332' and self.registered is False: 369 | #print 'PRIVMSG NickServ@network.net :IDENTIFY %s' % self.register_pass 370 | #self.send('PRIVMSG NickServ@network.net :IDENTIFY %s' % self.register_pass) 371 | #self.registered = True 372 | 373 | # whom to reply? 374 | target = self.ctx['target'] 375 | if self.ctx['target'] == self.nickname: 376 | target = self.ctx['sender'].split("!")[0] 377 | 378 | 379 | # some basic commands 380 | if self.ctx['msg'] == '!test': 381 | self.say('fuck off', target) 382 | 383 | 384 | # directed to the bot? 385 | if self.ctx['type'] == 'PRIVMSG' and (self.ctx['msg'].lower()[0:len(self.nickname)] == self.nickname.lower() or self.ctx['target'] == self.nickname): 386 | # something is speaking to the bot 387 | query = self.ctx['msg'] 388 | if self.ctx['target'] != self.nickname: 389 | query = query[len(self.nickname):] 390 | query = query.lstrip(':,;. ') 391 | 392 | # do something intelligent here, like query a chatterbot 393 | #print 'someone spoke to us: ', query 394 | #self.say('alright :|', target) 395 | 396 | if self.connected and self.joined and not self.stream_started: 397 | self.rss_stream.get_feeds() 398 | self.stream_started = True 399 | self.rss_stream.print_article() 400 | continue 401 | 402 | if self.connected and self.joined and self.stream_started: 403 | self.rss_stream.print_article() 404 | continue 405 | 406 | # IRC message protocol methods 407 | def send(self, msg): 408 | print "I>",msg.encode('utf-8') 409 | self.socket.send(bytearray(msg+"\r\n", "utf-8")) 410 | 411 | def say(self, msg, to): 412 | self.send("PRIVMSG %s :%s" % (to, msg)) 413 | 414 | # long text chunker 415 | def chunks(s, n): 416 | """Produce `n`-character chunks from `s`.""" 417 | for start in range(0, len(s), n): 418 | yield s[start:start+n] 419 | 420 | # MySQL methods 421 | def mysql_connect(self): 422 | #db=mysql.connector.connect(user=self.dbuser, passwd=self.dbpassw, database=self.dbname, use_unicode=True, charset='utf8') 423 | self.chatdb = db 424 | return db 425 | 426 | # bot methods 427 | def shutdown(self, channel=None): 428 | if channel: 429 | self.send("QUIT %s" % channel) 430 | 431 | if self.chatdb: 432 | self.chatdb.close() 433 | 434 | self.socket.close() 435 | sys.exit() 436 | 437 | def perform(self): 438 | #self.send("PRIVMSG R : Register <>" 439 | #self.send("PRIVMSG R : Login <>") 440 | self.send("MODE %s +x" % self.nickname) 441 | for c in self.channels: 442 | self.send("JOIN %s" % c) 443 | print 'News Stream Started' 444 | 445 | 446 | if __name__ == '__main__': 447 | IRCClient() 448 | -------------------------------------------------------------------------------- /K9py/K9.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #--------------------------------------------------- 4 | # K-9 News Irc Stream Bot | 5 | # drwho ~ I Love To Party Fake Industries © | 6 | #--------------------------------------------------- 7 | # Version 0.3 | 8 | #--------------------------------------------------- 9 | # add in python twitter and make k-9news account 10 | # change: no oyoyo 11 | 12 | import time, threading, feedparser, tinyurl 13 | import socket, re, sys, os, getpass, time, codecs 14 | from datetime import datetime 15 | from concurrent import futures 16 | from HTMLParser import HTMLParser 17 | from random import shuffle 18 | from pymongo import MongoClient 19 | from bson.objectid import ObjectId 20 | 21 | import threading 22 | 23 | class MLStripper(HTMLParser): 24 | def __init__(self): 25 | self.reset() 26 | self.fed = [] 27 | def handle_data(self, d): 28 | self.fed.append(d) 29 | def get_data(self): 30 | return ''.join(self.fed) 31 | 32 | def strip_tags(html): 33 | s = MLStripper() 34 | try: 35 | s.feed(html) 36 | return s.get_data() 37 | except: 38 | return html 39 | 40 | class RSSStream: 41 | def __init__(self, client): 42 | # RSS information 43 | self.rss_urls = ['http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989', 44 | 'http://www.theregister.co.uk/headlines.atom', 45 | 'http://feeds.bbci.co.uk/news/rss.xml', 46 | 'http://rss.slashdot.org/Slashdot/slashdot', 47 | 'http://www.hackinthebox.org/backend.php', 48 | 'http://www.npr.org/rss/rss.php?id=1001', 49 | 'http://feeds.reuters.com/reuters/topNews', 50 | 'http://feeds.feedburner.com/newsyc150', 51 | 'http://rt.com/rss/news/', 52 | 'http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml', 53 | 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories', 54 | 'http://www.washingtonpost.com/rss/world', 55 | 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME', 56 | 'https://news.google.com/?output=rss', 57 | 'http://www.drudgereportfeed.com/', 58 | 'http://edition.presstv.ir/rss/', 59 | 'http://www.scmp.com/rss/91/feed', 60 | 'http://feeds.arstechnica.com/arstechnica/index', 61 | 'http://feeds.feedburner.com/TheHackersNews', 62 | 'http://english.chosun.com/site/data/rss/rss.xml', 63 | 'http://vuxml.freebsd.org/freebsd/rss.xml'] 64 | 65 | self.rss_sources = set(['aljazeera.net', 66 | 'bbc.com', 67 | 'bbc.co.uk', 68 | 'reuters.com', 69 | 'slashdot.org', 70 | 'npr.org', 71 | 'theregister.co.uk', 72 | 'theregister.com', 73 | 'rt.com', 74 | 'hitb', 75 | 'nytimes.com', 76 | 'usatoday.com', 77 | 'washingtonpost.com', 78 | 'ap.org', 79 | 'arstechnica.com', 80 | 'chosun.com', 81 | 'scmp.com']) 82 | 83 | self.rss_titles = { 84 | 'aljazeera.net': 'Al Jazeera', 85 | 'aljazeera.com': 'Al Jazeera', 86 | 'bbc.com': 'BBC News', 87 | 'bbc.co.uk': 'BBC News', 88 | 'bbci.co.uk': 'BBC News', 89 | 'reuters.com': 'Reuters', 90 | 'slashdot.org': 'Slashdot', 91 | 'npr.org': 'National Public Radio', 92 | 'theregister.co.uk': 'The Register', 93 | 'theregister.com': 'The Register', 94 | 'rt.com': 'Russia Today', 95 | 'hitb': 'HITB', 96 | 'nytimes.com': 'New York Times', 97 | 'usatoday.com': 'USA Today', 98 | 'washingtonpost.com': 'Washington Post', 99 | 'ap.org': 'Associated Press', 100 | 'arstechnica.com': 'Ars Technica', 101 | 'chosun.com': 'The Chosun Ilbo', 102 | 'scmp.com': 'South China Morning Post', 103 | } 104 | 105 | self.client = client 106 | 107 | def get_feeds(self): 108 | with futures.ThreadPoolExecutor(max_workers=13) as executor: 109 | future_to_url = dict((executor.submit(feedparser.parse, url), url) for url in self.rss_urls) 110 | feeds = [future.result() for future in futures.as_completed(future_to_url)] 111 | for feed in feeds: 112 | self.client.entries.extend(feed["items"]) 113 | try: 114 | self.client.sorted_entries = sorted(self.client.entries, key=lambda entry: entry["date"], reverse=True) 115 | except KeyError: 116 | self.client.sorted_entries = sorted(self.client.entries, key=lambda entry: entry["updated"], reverse=True) 117 | 118 | shuffle(self.client.sorted_entries) 119 | 120 | print "========================================================================" 121 | print "feed items loaded @ " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") 122 | print "========================================================================" 123 | 124 | self.client.entsize = len(self.client.sorted_entries) 125 | print "========================================================================" 126 | print "Entry Size: " + str(self.client.entsize) 127 | print "========================================================================" 128 | 129 | def print_article(self): 130 | rssitem = {} 131 | i = 0 132 | if not self.client.sorted_entries: 133 | self.get_feeds() 134 | 135 | summary_detail = self.client.sorted_entries[i].get('summary_detail', None) 136 | if summary_detail is not None and summary_detail['base'] == u'http://feeds.feedburner.com/newsyc150': 137 | if 'published_parsed' in self.client.sorted_entries[i]: 138 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 139 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 140 | else: 141 | rssitem['date_published'] = None 142 | 143 | rssitem['news_source'] = "hacker news 100" 144 | rssitem['title'] = self.client.sorted_entries[i]['title'] 145 | rssitem['url'] = tinyurl.create_one(self.client.sorted_entries[i]['link']) 146 | rssitem['description'] = strip_tags(self.client.sorted_entries[i]['description']) 147 | rssitem['rss_raw'] = self.client.sorted_entries[i] 148 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 149 | 150 | self.client.sorted_entries.pop(i) 151 | 152 | self.client.entsize = len(self.client.sorted_entries) 153 | print "========================================================================" 154 | print "Entry Size: " + str(self.client.entsize) 155 | print "========================================================================" 156 | 157 | else: 158 | namefound = False 159 | for name in self.rss_sources: 160 | print name 161 | if self.client.sorted_entries[i]['link'].find(name) != -1: 162 | if 'author_detail' in self.client.sorted_entries[i]: 163 | try: 164 | rssitem['news_author'] = self.client.sorted_entries[i]['author_detail']['name'] 165 | except KeyError: 166 | rssitem['news_author'] = None 167 | else: 168 | rssitem['news_author'] = None 169 | 170 | if 'published_parsed' in self.client.sorted_entries[i]: 171 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 172 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 173 | else: 174 | rssitem['date_published'] = None 175 | 176 | if 'media_thumbnail' in self.client.sorted_entries[i]: 177 | rssitem['media_thumbnail'] = self.client.sorted_entries[i]['media_thumbnail'] 178 | else: 179 | rssitem['media_thumbnail'] = None 180 | 181 | rssitem['news_source'] = self.rss_titles[name] 182 | if self.client.sorted_entries[i]['description'] != None: 183 | rssitem['description'] = strip_tags(self.client.sorted_entries[i]['description']) 184 | else: 185 | rssitem['description'] = self.client.sorted_entries[i]['description'] 186 | 187 | rssitem['title'] = self.client.sorted_entries[i]['title'] 188 | rssitem['url'] = tinyurl.create_one(self.client.sorted_entries[i]['link']) 189 | rssitem['rss_raw'] = self.client.sorted_entries[i] 190 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 191 | 192 | # save rss item 193 | rsstmp = { 194 | 'news_source': rssitem['news_source'], 195 | 'description': rssitem['description'], 196 | 'title': rssitem['title'], 197 | 'url': rssitem['url'], 198 | 'scrape_date': rssitem['scrape_date'], 199 | 'news_author': rssitem['news_author'], 200 | 'date_published': rssitem['date_published'], 201 | 'media_thumbnail': rssitem['media_thumbnail'] 202 | } 203 | 204 | self.save_rss(rsstmp) 205 | print rsstmp 206 | del rsstmp 207 | 208 | for c in self.client.channels: 209 | self.client.say("\002[%s] %s" % (rssitem['news_source'], rssitem['title']), c) 210 | self.client.say("\035%s" % rssitem['description'], c) 211 | self.client.say("\037%s" % rssitem['url'], c) 212 | self.client.rssitem = rssitem 213 | print rssitem 214 | print "\n\n" 215 | namefound = True 216 | time.sleep(60) 217 | break 218 | else: 219 | namefound = False 220 | continue 221 | 222 | if namefound == False: 223 | if 'author_detail' in self.client.sorted_entries[i]: 224 | rssitem['news_author'] = self.client.sorted_entries[i]['author_detail']['name'] 225 | else: 226 | rssitem['news_author'] = None 227 | 228 | if 'published_parsed' in self.client.sorted_entries[i]: 229 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 230 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 231 | else: 232 | rssitem['date_published'] = None 233 | 234 | if 'media_thumbnail' in self.client.sorted_entries[i]: 235 | rssitem['media_thumbnail'] = self.client.sorted_entries[i]['media_thumbnail'] 236 | else: 237 | rssitem['media_thumbnail'] = None 238 | 239 | rssitem['news_source'] = "K9 World News" 240 | 241 | rss_description = self.client.sorted_entries[i].get('summary_detail', None) 242 | if rss_description is not None: 243 | rssitem['description'] = strip_tags(rss_description) 244 | else: 245 | rssitem['description'] = rss_description 246 | 247 | rssitem['title'] = self.client.sorted_entries[i]['title'] 248 | rssitem['url'] = self.client.sorted_entries[i]['link'] 249 | rssitem['rss_raw'] = self.client.sorted_entries[i] 250 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 251 | 252 | # save rss item 253 | rsstmp = { 254 | 'news_source': rssitem['news_source'], 255 | 'description': rssitem['description'], 256 | 'title': rssitem['title'], 257 | 'url': rssitem['url'], 258 | 'scrape_date': rssitem['scrape_date'], 259 | 'news_author': rssitem['news_author'], 260 | 'date_published': rssitem['date_published'], 261 | 'media_thumbnail': rssitem['media_thumbnail'] 262 | } 263 | 264 | self.save_rss(rsstmp) 265 | print rsstmp 266 | del rsstmp 267 | 268 | for c in self.client.channels: 269 | self.client.say("\002[%s] %s" % (rssitem['news_source'], rssitem['title']), c) 270 | self.client.say("\035%s" % rssitem['description'], c) 271 | self.client.say("\037%s" % rssitem['url'], c) 272 | self.client.rssitem = rssitem 273 | print rssitem 274 | print "\n\n" 275 | 276 | time.sleep(60) 277 | 278 | self.client.sorted_entries.pop(i) 279 | self.client.entsize = len(self.client.sorted_entries) 280 | print "========================================================================" 281 | print "Entry Size: " + str(self.client.entsize) 282 | print "========================================================================" 283 | 284 | def save_rss(self, rssdata): 285 | mg_cnn = MongoClient() 286 | mg_db = mg_cnn["qnews"] 287 | 288 | if "rss_data" not in mg_db.collection_names(): 289 | mg_db.create_collection("rss_data") 290 | 291 | if mg_db["rss_data"].find_one(rssdata): 292 | mgtd = mg_db["rss_data"].find_one(rssdata) 293 | mg_db["rss_data"].update({"_id": ObjectId(mgtd["_id"])}, {"$set": rssdata}) 294 | else: 295 | mg_db["rss_data"].insert(rssdata) 296 | 297 | def wait_print(self): 298 | print datetime.now() 299 | 300 | class IRCClient: 301 | 302 | # irc information 303 | socket = None 304 | connected = False 305 | joined = False 306 | registered = False 307 | nickname = '' # bot nick name string 308 | channels = [] # channel(s) name string ex: ['#chan1', '#chan2'] 309 | network = '' # irc server address string 310 | 311 | # mysql information 312 | host = 'localhost' 313 | dbuser = '' 314 | chatdb = None 315 | dbpassw = None 316 | dbname = None 317 | 318 | def __init__(self): 319 | #self.dbpassw = getpass.getpass('[MySQL Pass] ') 320 | self.socket = socket.socket() 321 | self.socket.connect((self.network, 6667)) 322 | self.send("NICK %s" % self.nickname) 323 | self.send("USER %(nick)s %(nick)s %(nick)s :%(nick)s" % {'nick':self.nickname}) 324 | self.rss_stream = RSSStream(self) 325 | self.stream_started = False 326 | self.ctx = {} 327 | self.entries = [] 328 | self.entsize = None 329 | self.sorted_entries = None 330 | self.rssitem = None 331 | self.rss_loaded = False 332 | 333 | 334 | while True: 335 | buf = self.socket.recv(4096) 336 | lines = buf.split("\n") 337 | for data in lines: 338 | data = str(data).strip() 339 | if data == '': 340 | continue 341 | #print "I<", data 342 | 343 | # server ping/pong? 344 | if data.find('PING') != -1: 345 | n = data.split(':')[1] 346 | print self.rss_stream.wait_print() 347 | self.send('PONG :' + n) 348 | if self.connected == False: 349 | self.perform() 350 | self.connected = True 351 | if self.connected and self.stream_started: 352 | self.rss_stream.print_article() 353 | continue 354 | 355 | args = data.split(None, 3) 356 | if len(args) != 4: 357 | continue 358 | self.ctx['sender'] = args[0][1:] 359 | self.ctx['type'] = args[1] 360 | self.ctx['target'] = args[2] 361 | self.ctx['msg'] = args[3][1:] 362 | 363 | # check to start rss 364 | if self.ctx['type'] == '366': 365 | self.joined = True 366 | 367 | # register 368 | print self.ctx['type'] 369 | print self.ctx['sender'] 370 | print self.ctx['target'] 371 | print self.ctx['msg'] 372 | #if self.ctx['type'] == '332' and self.registered is False: 373 | #print 'PRIVMSG NickServ@network.net :IDENTIFY %s' % self.register_pass 374 | #self.send('PRIVMSG NickServ@network.net :IDENTIFY %s' % self.register_pass) 375 | #self.registered = True 376 | 377 | # whom to reply? 378 | target = self.ctx['target'] 379 | if self.ctx['target'] == self.nickname: 380 | target = self.ctx['sender'].split("!")[0] 381 | 382 | 383 | # some basic commands 384 | if self.ctx['msg'] == '!test': 385 | self.say('fuck off', target) 386 | 387 | 388 | # directed to the bot? 389 | if self.ctx['type'] == 'PRIVMSG' and (self.ctx['msg'].lower()[0:len(self.nickname)] == self.nickname.lower() or self.ctx['target'] == self.nickname): 390 | # something is speaking to the bot 391 | query = self.ctx['msg'] 392 | if self.ctx['target'] != self.nickname: 393 | query = query[len(self.nickname):] 394 | query = query.lstrip(':,;. ') 395 | 396 | # do something intelligent here, like query a chatterbot 397 | #print 'someone spoke to us: ', query 398 | #self.say('alright :|', target) 399 | 400 | if self.connected and self.joined and not self.stream_started: 401 | self.rss_stream.get_feeds() 402 | self.stream_started = True 403 | self.rss_stream.print_article() 404 | continue 405 | 406 | if self.connected and self.joined and self.stream_started: 407 | self.rss_stream.print_article() 408 | continue 409 | 410 | # IRC message protocol methods 411 | def send(self, msg): 412 | print "I>",msg.encode('utf-8') 413 | self.socket.send(bytearray(msg+"\r\n", "utf-8")) 414 | 415 | def say(self, msg, to): 416 | self.send("PRIVMSG %s :%s" % (to, msg)) 417 | 418 | # long text chunker 419 | def chunks(s, n): 420 | """Produce `n`-character chunks from `s`.""" 421 | for start in range(0, len(s), n): 422 | yield s[start:start+n] 423 | 424 | # MySQL methods 425 | def mysql_connect(self): 426 | #db=mysql.connector.connect(user=self.dbuser, passwd=self.dbpassw, database=self.dbname, use_unicode=True, charset='utf8') 427 | self.chatdb = db 428 | return db 429 | 430 | # bot methods 431 | def shutdown(self, channel=None): 432 | if channel: 433 | self.send("QUIT %s" % channel) 434 | 435 | if self.chatdb: 436 | self.chatdb.close() 437 | 438 | self.socket.close() 439 | sys.exit() 440 | 441 | def perform(self): 442 | #self.send("PRIVMSG R : Register <>" 443 | #self.send("PRIVMSG R : Login <>") 444 | self.send("MODE %s +x" % self.nickname) 445 | for c in self.channels: 446 | self.send("JOIN %s" % c) 447 | print 'News Stream Started' 448 | 449 | 450 | if __name__ == '__main__': 451 | IRCClient() -------------------------------------------------------------------------------- /K9py/rizon/K9_rizon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #--------------------------------------------------- 4 | # K-9 News Irc Stream Bot | 5 | # drwho ~ I Love To Party Fake Industries © | 6 | #--------------------------------------------------- 7 | # Version 0.3 | 8 | #--------------------------------------------------- 9 | # add in python twitter and make k-9news account 10 | # change: no oyoyo 11 | 12 | import time, threading, feedparser, tinyurl 13 | import socket, re, sys, os, getpass, time, codecs 14 | from datetime import datetime 15 | from concurrent import futures 16 | from HTMLParser import HTMLParser 17 | from random import shuffle 18 | from pymongo import MongoClient 19 | from bson.objectid import ObjectId 20 | 21 | import threading 22 | 23 | class MLStripper(HTMLParser): 24 | def __init__(self): 25 | self.reset() 26 | self.fed = [] 27 | def handle_data(self, d): 28 | self.fed.append(d) 29 | def get_data(self): 30 | return ''.join(self.fed) 31 | 32 | def strip_tags(html): 33 | s = MLStripper() 34 | try: 35 | s.feed(html) 36 | return s.get_data() 37 | except: 38 | return html 39 | 40 | class RSSStream: 41 | def __init__(self, client): 42 | # RSS information 43 | self.rss_urls = ['http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989', 44 | 'http://www.theregister.co.uk/headlines.atom', 45 | 'http://feeds.bbci.co.uk/news/rss.xml', 46 | 'http://rss.slashdot.org/Slashdot/slashdot', 47 | 'http://www.hackinthebox.org/backend.php', 48 | 'http://www.npr.org/rss/rss.php?id=1001', 49 | 'http://feeds.reuters.com/reuters/topNews', 50 | 'http://feeds.feedburner.com/newsyc150', 51 | 'http://rt.com/rss/news/', 52 | 'http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml', 53 | 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories', 54 | 'http://www.washingtonpost.com/rss/world', 55 | 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME', 56 | 'https://news.google.com/?output=rss', 57 | 'http://www.drudgereportfeed.com/', 58 | 'http://edition.presstv.ir/rss/', 59 | 'http://www.scmp.com/rss/91/feed', 60 | 'http://feeds.arstechnica.com/arstechnica/index', 61 | 'http://feeds.feedburner.com/TheHackersNews', 62 | 'http://english.chosun.com/site/data/rss/rss.xml', 63 | 'http://vuxml.freebsd.org/freebsd/rss.xml'] 64 | 65 | self.rss_sources = set(['aljazeera.net', 66 | 'bbc.com', 67 | 'bbc.co.uk', 68 | 'reuters.com', 69 | 'slashdot.org', 70 | 'npr.org', 71 | 'theregister.co.uk', 72 | 'theregister.com', 73 | 'rt.com', 74 | 'hitb', 75 | 'nytimes.com', 76 | 'usatoday.com', 77 | 'washingtonpost.com', 78 | 'ap.org', 79 | 'arstechnica.com', 80 | 'chosun.com', 81 | 'scmp.com']) 82 | 83 | self.rss_titles = { 84 | 'aljazeera.net': 'Al Jazeera', 85 | 'aljazeera.com': 'Al Jazeera', 86 | 'bbc.com': 'BBC News', 87 | 'bbc.co.uk': 'BBC News', 88 | 'bbci.co.uk': 'BBC News', 89 | 'reuters.com': 'Reuters', 90 | 'slashdot.org': 'Slashdot', 91 | 'npr.org': 'National Public Radio', 92 | 'theregister.co.uk': 'The Register', 93 | 'theregister.com': 'The Register', 94 | 'rt.com': 'Russia Today', 95 | 'hitb': 'HITB', 96 | 'nytimes.com': 'New York Times', 97 | 'usatoday.com': 'USA Today', 98 | 'washingtonpost.com': 'Washington Post', 99 | 'ap.org': 'Associated Press', 100 | 'arstechnica.com': 'Ars Technica', 101 | 'chosun.com': 'The Chosun Ilbo', 102 | 'scmp.com': 'South China Morning Post', 103 | } 104 | 105 | self.client = client 106 | 107 | def get_feeds(self): 108 | with futures.ThreadPoolExecutor(max_workers=13) as executor: 109 | future_to_url = dict((executor.submit(feedparser.parse, url), url) for url in self.rss_urls) 110 | feeds = [future.result() for future in futures.as_completed(future_to_url)] 111 | for feed in feeds: 112 | self.client.entries.extend(feed["items"]) 113 | try: 114 | self.client.sorted_entries = sorted(self.client.entries, key=lambda entry: entry["date"], reverse=True) 115 | except KeyError: 116 | self.client.sorted_entries = sorted(self.client.entries, key=lambda entry: entry["updated"], reverse=True) 117 | 118 | shuffle(self.client.sorted_entries) 119 | 120 | print "========================================================================" 121 | print "feed items loaded @ " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") 122 | print "========================================================================" 123 | 124 | self.client.entsize = len(self.client.sorted_entries) 125 | print "========================================================================" 126 | print "Entry Size: " + str(self.client.entsize) 127 | print "========================================================================" 128 | 129 | def print_article(self): 130 | rssitem = {} 131 | i = 0 132 | if not self.client.sorted_entries: 133 | self.get_feeds() 134 | 135 | summary_detail = self.client.sorted_entries[i].get('summary_detail', None) 136 | if summary_detail is not None and summary_detail['base'] == u'http://feeds.feedburner.com/newsyc150': 137 | if 'published_parsed' in self.client.sorted_entries[i]: 138 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 139 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 140 | else: 141 | rssitem['date_published'] = None 142 | 143 | rssitem['news_source'] = "hacker news 100" 144 | rssitem['title'] = self.client.sorted_entries[i]['title'] 145 | rssitem['url'] = tinyurl.create_one(self.client.sorted_entries[i]['link']) 146 | rssitem['description'] = strip_tags(self.client.sorted_entries[i]['description']) 147 | rssitem['rss_raw'] = self.client.sorted_entries[i] 148 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 149 | 150 | self.client.sorted_entries.pop(i) 151 | 152 | self.client.entsize = len(self.client.sorted_entries) 153 | print "========================================================================" 154 | print "Entry Size: " + str(self.client.entsize) 155 | print "========================================================================" 156 | 157 | else: 158 | namefound = False 159 | for name in self.rss_sources: 160 | print name 161 | if self.client.sorted_entries[i]['link'].find(name) != -1: 162 | if 'author_detail' in self.client.sorted_entries[i]: 163 | try: 164 | rssitem['news_author'] = self.client.sorted_entries[i]['author_detail']['name'] 165 | except KeyError: 166 | rssitem['news_author'] = None 167 | else: 168 | rssitem['news_author'] = None 169 | 170 | if 'published_parsed' in self.client.sorted_entries[i]: 171 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 172 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 173 | else: 174 | rssitem['date_published'] = None 175 | 176 | if 'media_thumbnail' in self.client.sorted_entries[i]: 177 | rssitem['media_thumbnail'] = self.client.sorted_entries[i]['media_thumbnail'] 178 | else: 179 | rssitem['media_thumbnail'] = None 180 | 181 | rssitem['news_source'] = self.rss_titles[name] 182 | if self.client.sorted_entries[i]['description'] != None: 183 | rssitem['description'] = strip_tags(self.client.sorted_entries[i]['description']) 184 | else: 185 | rssitem['description'] = self.client.sorted_entries[i]['description'] 186 | 187 | rssitem['title'] = self.client.sorted_entries[i]['title'] 188 | rssitem['url'] = tinyurl.create_one(self.client.sorted_entries[i]['link']) 189 | rssitem['rss_raw'] = self.client.sorted_entries[i] 190 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 191 | 192 | # save rss item 193 | rsstmp = { 194 | 'news_source': rssitem['news_source'], 195 | 'description': rssitem['description'], 196 | 'title': rssitem['title'], 197 | 'url': rssitem['url'], 198 | 'scrape_date': rssitem['scrape_date'], 199 | 'news_author': rssitem['news_author'], 200 | 'date_published': rssitem['date_published'], 201 | 'media_thumbnail': rssitem['media_thumbnail'] 202 | } 203 | 204 | self.save_rss(rsstmp) 205 | print rsstmp 206 | del rsstmp 207 | 208 | for c in self.client.channels: 209 | self.client.say("\002[%s]\002 %s \037\00307%s" % (rssitem['news_source'], rssitem['title'], rssitem['url']), c) 210 | self.client.rssitem = rssitem 211 | print rssitem 212 | print "\n\n" 213 | namefound = True 214 | time.sleep(60) 215 | break 216 | else: 217 | namefound = False 218 | continue 219 | 220 | if namefound == False: 221 | if 'author_detail' in self.client.sorted_entries[i]: 222 | rssitem['news_author'] = self.client.sorted_entries[i]['author_detail']['name'] 223 | else: 224 | rssitem['news_author'] = None 225 | 226 | if 'published_parsed' in self.client.sorted_entries[i]: 227 | dt = datetime.fromtimestamp(time.mktime(self.client.sorted_entries[i]['published_parsed'])) 228 | rssitem['date_published'] = dt.strftime('%Y-%m-%d %H:%M:%S') 229 | else: 230 | rssitem['date_published'] = None 231 | 232 | if 'media_thumbnail' in self.client.sorted_entries[i]: 233 | rssitem['media_thumbnail'] = self.client.sorted_entries[i]['media_thumbnail'] 234 | else: 235 | rssitem['media_thumbnail'] = None 236 | 237 | rssitem['news_source'] = "K9 World News" 238 | 239 | rss_description = self.client.sorted_entries[i].get('summary_detail', None) 240 | if rss_description is not None: 241 | rssitem['description'] = strip_tags(rss_description) 242 | rssitem['description'] = rssitem['description'][:100] + '..' if len(rssitem['description']) > 100 else rssitem['description'] 243 | else: 244 | rssitem['description'] = rss_description 245 | 246 | rssitem['title'] = self.client.sorted_entries[i]['title'] 247 | rssitem['url'] = self.client.sorted_entries[i]['link'] 248 | rssitem['rss_raw'] = self.client.sorted_entries[i] 249 | rssitem['scrape_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 250 | 251 | # save rss item 252 | rsstmp = { 253 | 'news_source': rssitem['news_source'], 254 | 'description': rssitem['description'], 255 | 'title': rssitem['title'], 256 | 'url': rssitem['url'], 257 | 'scrape_date': rssitem['scrape_date'], 258 | 'news_author': rssitem['news_author'], 259 | 'date_published': rssitem['date_published'], 260 | 'media_thumbnail': rssitem['media_thumbnail'] 261 | } 262 | 263 | self.save_rss(rsstmp) 264 | print rsstmp 265 | del rsstmp 266 | 267 | for c in self.client.channels: 268 | self.client.say("\002[%s]\002 %s \037\00307%s" % (rssitem['news_source'], rssitem['title'], rssitem['url']), c) 269 | self.client.rssitem = rssitem 270 | print rssitem 271 | print "\n\n" 272 | 273 | time.sleep(60) 274 | 275 | self.client.sorted_entries.pop(i) 276 | self.client.entsize = len(self.client.sorted_entries) 277 | print "========================================================================" 278 | print "Entry Size: " + str(self.client.entsize) 279 | print "========================================================================" 280 | 281 | def save_rss(self, rssdata): 282 | mg_cnn = MongoClient() 283 | mg_db = mg_cnn["qnews"] 284 | 285 | if "rss_data" not in mg_db.collection_names(): 286 | mg_db.create_collection("rss_data") 287 | 288 | if mg_db["rss_data"].find_one(rssdata): 289 | mgtd = mg_db["rss_data"].find_one(rssdata) 290 | mg_db["rss_data"].update({"_id": ObjectId(mgtd["_id"])}, {"$set": rssdata}) 291 | else: 292 | mg_db["rss_data"].insert(rssdata) 293 | 294 | def wait_print(self): 295 | print datetime.now() 296 | 297 | class IRCClient: 298 | 299 | # irc information 300 | socket = None 301 | connected = False 302 | joined = False 303 | registered = False 304 | nickname = 'WNews' 305 | channels = ['#worldnews'] 306 | network = 'irc.rizon.net' 307 | 308 | # mysql information 309 | host = 'localhost' 310 | dbuser = '' 311 | chatdb = None 312 | dbpassw = None 313 | dbname = None 314 | 315 | def __init__(self): 316 | #self.dbpassw = getpass.getpass('[MySQL Pass] ') 317 | self.socket = socket.socket() 318 | self.socket.connect((self.network, 6667)) 319 | self.send("NICK %s" % self.nickname) 320 | self.send("USER %(nick)s %(nick)s %(nick)s :%(nick)s" % {'nick':self.nickname}) 321 | self.rss_stream = RSSStream(self) 322 | self.stream_started = False 323 | self.ctx = {} 324 | self.entries = [] 325 | self.entsize = None 326 | self.sorted_entries = None 327 | self.rssitem = None 328 | self.rss_loaded = False 329 | 330 | 331 | while True: 332 | buf = self.socket.recv(4096) 333 | lines = buf.split("\n") 334 | for data in lines: 335 | data = str(data).strip() 336 | if data == '': 337 | continue 338 | #print "I<", data 339 | 340 | # server ping/pong? 341 | if data.find('PING') != -1: 342 | n = data.split(':')[1] 343 | print self.rss_stream.wait_print() 344 | self.send('PONG :' + n) 345 | if self.connected == False: 346 | self.perform() 347 | self.connected = True 348 | if self.connected and self.stream_started: 349 | self.rss_stream.print_article() 350 | continue 351 | 352 | args = data.split(None, 3) 353 | if len(args) != 4: 354 | continue 355 | self.ctx['sender'] = args[0][1:] 356 | self.ctx['type'] = args[1] 357 | self.ctx['target'] = args[2] 358 | self.ctx['msg'] = args[3][1:] 359 | 360 | # check to start rss 361 | if self.ctx['type'] == '366': 362 | self.joined = True 363 | 364 | # register 365 | print self.ctx['type'] 366 | print self.ctx['sender'] 367 | print self.ctx['target'] 368 | print self.ctx['msg'] 369 | #if self.ctx['type'] == '332' and self.registered is False: 370 | #print 'PRIVMSG NickServ@network.net :IDENTIFY %s' % self.register_pass 371 | #self.send('PRIVMSG NickServ@network.net :IDENTIFY %s' % self.register_pass) 372 | #self.registered = True 373 | 374 | # whom to reply? 375 | target = self.ctx['target'] 376 | if self.ctx['target'] == self.nickname: 377 | target = self.ctx['sender'].split("!")[0] 378 | 379 | 380 | # some basic commands 381 | if self.ctx['msg'] == '!test': 382 | self.say('fuck off', target) 383 | 384 | 385 | # directed to the bot? 386 | if self.ctx['type'] == 'PRIVMSG' and (self.ctx['msg'].lower()[0:len(self.nickname)] == self.nickname.lower() or self.ctx['target'] == self.nickname): 387 | # something is speaking to the bot 388 | query = self.ctx['msg'] 389 | if self.ctx['target'] != self.nickname: 390 | query = query[len(self.nickname):] 391 | query = query.lstrip(':,;. ') 392 | 393 | # do something intelligent here, like query a chatterbot 394 | #print 'someone spoke to us: ', query 395 | #self.say('alright :|', target) 396 | 397 | if self.connected and self.joined and not self.stream_started: 398 | self.rss_stream.get_feeds() 399 | self.stream_started = True 400 | self.rss_stream.print_article() 401 | continue 402 | 403 | if self.connected and self.joined and self.stream_started: 404 | self.rss_stream.print_article() 405 | continue 406 | 407 | # IRC message protocol methods 408 | def send(self, msg): 409 | print "I>",msg.encode('utf-8') 410 | self.socket.send(bytearray(msg+"\r\n", "utf-8")) 411 | 412 | def say(self, msg, to): 413 | self.send("PRIVMSG %s :%s" % (to, msg)) 414 | 415 | # long text chunker 416 | def chunks(s, n): 417 | """Produce `n`-character chunks from `s`.""" 418 | for start in range(0, len(s), n): 419 | yield s[start:start+n] 420 | 421 | # MySQL methods 422 | def mysql_connect(self): 423 | #db=mysql.connector.connect(user=self.dbuser, passwd=self.dbpassw, database=self.dbname, use_unicode=True, charset='utf8') 424 | self.chatdb = db 425 | return db 426 | 427 | # bot methods 428 | def shutdown(self, channel=None): 429 | if channel: 430 | self.send("QUIT %s" % channel) 431 | 432 | if self.chatdb: 433 | self.chatdb.close() 434 | 435 | self.socket.close() 436 | sys.exit() 437 | 438 | def perform(self): 439 | #self.send("PRIVMSG R : Register <>" 440 | #self.send("PRIVMSG R : Login <>") 441 | self.send("MODE %s +x" % self.nickname) 442 | for c in self.channels: 443 | self.send("JOIN %s" % c) 444 | print 'News Stream Started' 445 | 446 | 447 | if __name__ == '__main__': 448 | IRCClient() 449 | --------------------------------------------------------------------------------