├── .gitignore ├── LICENSE ├── README.md ├── cyberprefixer.py └── offensive.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | .idea/* 38 | secrets.py 39 | *.log 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013-2016 Molly White 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CyberPrefixer 2 | ============= 3 | 4 | Twitter bot to prefix "cyber" to news headlines. Idea shamelessly stolen from 5 | [cyberfiction](https://github.com/dariusk/cyberfiction) by [@dariusk](https://github.com/dariusk). 6 | 7 | ## Requires ## 8 | * [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/) for HTML parsing 9 | * [topia.termextract](https://pypi.python.org/pypi/topia.termextract/) for POS tagging 10 | * [tweepy](https://github.com/tweepy/tweepy) for Tweeting 11 | -------------------------------------------------------------------------------- /cyberprefixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2016 Molly White 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import codecs 22 | import HTMLParser 23 | import os 24 | import tweepy 25 | import urllib2 26 | from offensive import tact 27 | from secrets import * 28 | from bs4 import BeautifulSoup 29 | from topia.termextract import tag 30 | from time import gmtime, strftime 31 | 32 | __location__ = os.path.realpath( 33 | os.path.join(os.getcwd(), os.path.dirname(__file__))) 34 | tagger = tag.Tagger() 35 | tagger.initialize() 36 | hparser = HTMLParser.HTMLParser() 37 | 38 | auth = tweepy.OAuthHandler(C_KEY, C_SECRET) 39 | auth.set_access_token(A_TOKEN, A_TOKEN_SECRET) 40 | api = tweepy.API(auth) 41 | tweets = api.user_timeline('CyberPrefixer') 42 | 43 | 44 | def get(): 45 | try: 46 | request = urllib2.Request( 47 | "http://news.google.com/news?pz=1&cf=all&ned=us&hl=en&output=rss") 48 | response = urllib2.urlopen(request) 49 | except urllib2.URLError as e: 50 | print e.reason 51 | else: 52 | html = BeautifulSoup(response.read(), features="html.parser") 53 | items = html.find_all('item') 54 | for item in items: 55 | headline = item.title.string 56 | h_split = headline.split() 57 | 58 | # We don't want to use incomplete headlines 59 | if "..." in headline: 60 | continue 61 | 62 | # Try to weed out all-caps headlines 63 | if count_caps(h_split) >= len(h_split) - 3: 64 | continue 65 | 66 | # Skip anything too offensive 67 | if not tact(headline): 68 | continue 69 | 70 | # Remove attribution string 71 | if "-" in headline: 72 | headline = headline.split("-")[:-1] 73 | headline = ' '.join(headline).strip() 74 | 75 | if process(headline): 76 | break 77 | else: 78 | continue 79 | 80 | 81 | def process(headline): 82 | headline = hparser.unescape(headline).strip() 83 | tagged = tagger(headline) 84 | for i, word in enumerate(tagged): 85 | # Avoid having two "cybers" in a row 86 | if is_replaceable(word) and not is_replaceable(tagged[i-1]): 87 | headline = headline.replace(" " + word[0], " cyber" + word[0], 1) 88 | 89 | # Don't tweet anything that's too long 90 | if len(headline) > 280: 91 | return False 92 | 93 | # Don't tweet anything where a replacement hasn't been made 94 | if "cyber" not in headline: 95 | return False 96 | else: 97 | return tweet(headline) 98 | 99 | 100 | def tweet(headline): 101 | # Check that we haven't tweeted this before 102 | for tweet in tweets: 103 | if headline == tweet.text: 104 | return False 105 | 106 | # Log tweet to file 107 | f = codecs.open(os.path.join(__location__, "cyberprefixer.log"), 'a', encoding='utf-8') 108 | t = strftime("%d %b %Y %H:%M:%S", gmtime()) 109 | f.write("\n" + t + " " + headline) 110 | f.close() 111 | 112 | # Post tweet 113 | api.update_status(headline) 114 | return True 115 | 116 | 117 | def count_caps(headline): 118 | count = 0 119 | for word in headline: 120 | if word[0].isupper(): 121 | count += 1 122 | return count 123 | 124 | 125 | def is_replaceable(word): 126 | # Prefix any noun (singular or plural) that begins with a lowercase letter 127 | if (word[1] == 'NN' or word[1] == 'NNS') and word[0][0].isalpha \ 128 | and word[0][0].islower() and len(word[0]) > 1: 129 | return True 130 | else: 131 | return False 132 | 133 | if __name__ == "__main__": 134 | get() 135 | -------------------------------------------------------------------------------- /offensive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2016 Molly White 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | #============================================================================== 22 | # NOTE/CAUTION/WARNING: 23 | # 24 | # This file is used to store offensive terms so that triggering or offensive 25 | # headlines are not satirized by CyberPrefixer and other bots. Accordingly, it 26 | # contains a lot of terms that are triggering, be they sexual, violent or 27 | # otherwise. 28 | #============================================================================== 29 | 30 | import re 31 | 32 | offensive = re.compile( 33 | r"\b(deaths?|dead(ly)?|die(s|d)?|hurts?|(sex(ual(ly)?)?|child)[ -]?(abused?|trafficking|" 34 | r"assault(ed|s)?)|injur(e|i?es|ed|y)|kill(ing|ed|er|s)?s?|wound(ing|ed|s)?|fatal(ly|ity)?|" 35 | r"shoo?t(s|ing|er)?s?|crash(es|ed|ing)?|attack(s|ers?|ing|ed)?|murder(s|er|ed|ing)?s?|" 36 | r"hostages?|(gang)?rap(e|es|ed|ist|ists|ing)|assault(s|ed)?|pile-?ups?|massacre(s|d)?|" 37 | r"assassinate(d|s)?|sla(y|in|yed|ys|ying|yings)|victims?|tortur(e|ed|ing|es)|" 38 | r"execut(e|ion|ed|ioner)s?|gun(man|men|ned)|suicid(e|al|es)|bomb(s|ed|ing|ings|er|ers)?|" 39 | r"mass[- ]?graves?|bloodshed|state[- ]?of[- ]?emergency|al[- ]?Qaeda|blasts?|violen(t|ce)|" 40 | r"lethal|cancer(ous)?|stab(bed|bing|ber)?s?|casualt(y|ies)|sla(y|ying|yer|in)|" 41 | r"drown(s|ing|ed|ings)?|bod(y|ies)|kidnap(s|ped|per|pers|ping|pings)?|rampage|beat(ings?|en)|" 42 | r"terminal(ly)?|abduct(s|ed|ion)?s?|missing|behead(s|ed|ings?)?|homicid(e|es|al)|" 43 | r"burn(s|ed|ing)? alive|decapitated?s?|jihadi?s?t?|hang(ed|ing|s)?|funerals?|traged(y|ies)|" 44 | r"autops(y|ies)|child sex|sob(s|bing|bed)?|pa?edophil(e|es|ia)|9(/|-)11|Sept(ember|\.)? 11|" 45 | r"genocide)\W?\b", 46 | flags=re.IGNORECASE) 47 | 48 | 49 | def tact(headline): 50 | # Avoid producing particularly tactless tweets 51 | if re.search(offensive, headline) is None: 52 | return True 53 | else: 54 | return False 55 | --------------------------------------------------------------------------------