├── .gitignore ├── Gemfile ├── Gemfile.lock ├── Procfile ├── README.md ├── app.json ├── boodoo.rb ├── bots.rb ├── corpus └── .gitignore ├── defaults.env └── model └── .gitignore /.gitignore: -------------------------------------------------------------------------------- 1 | secret.env 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'http://rubygems.org' 2 | ruby '2.1.3' 3 | 4 | gem 'twitter_ebooks', '3.0.6' 5 | gem 'dotenv' 6 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | addressable (2.3.6) 5 | awesome_print (1.6.1) 6 | bloomfilter-rb (2.1.1) 7 | redis 8 | buftok (0.2.0) 9 | coderay (1.1.0) 10 | dotenv (1.0.2) 11 | engtagger (0.2.0) 12 | equalizer (0.0.9) 13 | eventmachine (1.0.4) 14 | faraday (0.9.1) 15 | multipart-post (>= 1.2, < 3) 16 | fast-stemmer (1.0.2) 17 | gingerice (1.2.2) 18 | addressable 19 | awesome_print 20 | highscore (1.2.0) 21 | bloomfilter-rb (>= 2.1.1) 22 | whatlanguage (>= 1.0.0) 23 | htmlentities (4.3.3) 24 | http (0.6.3) 25 | http_parser.rb (~> 0.6.0) 26 | http_parser.rb (0.6.0) 27 | json (1.8.1) 28 | memoizable (0.4.2) 29 | thread_safe (~> 0.3, >= 0.3.1) 30 | method_source (0.8.2) 31 | multipart-post (2.0.0) 32 | naught (1.0.0) 33 | oauth (0.4.7) 34 | pry (0.10.1) 35 | coderay (~> 1.1.0) 36 | method_source (~> 0.8.1) 37 | slop (~> 3.4) 38 | redis (3.2.0) 39 | rufus-scheduler (3.0.9) 40 | tzinfo 41 | simple_oauth (0.3.1) 42 | slop (3.6.0) 43 | thread_safe (0.3.4) 44 | twitter (5.13.0) 45 | addressable (~> 2.3) 46 | buftok (~> 0.2.0) 47 | equalizer (~> 0.0.9) 48 | faraday (~> 0.9.0) 49 | http (~> 0.6.0) 50 | http_parser.rb (~> 0.6.0) 51 | json (~> 1.8) 52 | memoizable (~> 0.4.0) 53 | naught (~> 1.0) 54 | simple_oauth (~> 0.3.0) 55 | twitter_ebooks (3.0.6) 56 | engtagger 57 | eventmachine (~> 1.0.3) 58 | fast-stemmer 59 | gingerice 60 | highscore 61 | htmlentities 62 | oauth 63 | pry 64 | rufus-scheduler 65 | twitter (~> 5.0) 66 | tzinfo (1.2.2) 67 | thread_safe (~> 0.1) 68 | whatlanguage (1.0.5) 69 | 70 | PLATFORMS 71 | ruby 72 | 73 | DEPENDENCIES 74 | dotenv 75 | twitter_ebooks (= 3.0.6) 76 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | worker: bundle exec ebooks start 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # boodoo_ebooks 2 | 3 | A turn-key, beginner-friendly, ready-to-deploy implementation of a traditional \_ebooks bot using Mispy's [twitter_ebooks](https://github.com/mispy/twitter_ebooks) library. 4 | 5 | ## Usage 6 | 7 | Create your [Twitter app](https://apps.twitter.com) and generate access tokens with *Read, Write and Direct Messages* privileges. 8 | 9 | [![Deploy](https://www.herokucdn.com/deploy/button.png)](https://heroku.com/deploy?template=https://github.com/BooDoo/ebooks_example/tree/deploy) 10 | 11 | Put your BOT_NAME, SOURCE_USERNAME, and API secrets into Heroku Config Vars using the web dashboard. 12 | 13 | Scale your app to 1 dyno using the Heroku web dashboard. 14 | 15 | Bob's your uncle. 16 | 17 | ## Default Behavior 18 | Tweets once on startup. 19 | Has 80% chance of tweeting every 2 hours. 20 | Responds to mentions/DMs 21 | Favorites tweets that it likes. 22 | 23 | ## Special Features 24 | - **BLACKLIST**: accounts to not interact with 25 | - **BANNED_TERMS**: words or phrases to obscure/censor 26 | - DM commands (tweet, follow, unfollow, block, mention...) 27 | - Follower parity (periodically compares following/followers and follows/unfollows as needed) 28 | - Want something else? Create an [issue](https://github.com/BooDoo/ebooks_example/issues). No promises. 29 | 30 | ## TODO: 31 | - Support full archive (via CSV uploaded elsewhere) 32 | 33 | # DISCLAIMER: 34 | I'm making this because I wrote a two-part tutorial for an older version of the twitter_ebooks gem and my mentions turned into a tech support hellscape for months. 35 | Please [create issues](https://github.com/BooDoo/ebooks_example/issues) if you have trouble. 🙏 Please do not tweet at me. 🙏 36 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "BooDooBooks", 3 | "description": "Turn-key _ebooks deployment using twitter_ebooks", 4 | "repository": "https://github.com/BooDoo/ebooks_example/tree/deploy", 5 | "keywords": [ 6 | "twitter", 7 | "bot", 8 | "ebooks" 9 | ], 10 | "addons": [ 11 | "papertrail:choklad" 12 | ], 13 | "env": { 14 | "LANG": "en_US.UTF-8", 15 | "BOT_NAME": "", 16 | "SOURCE_USERNAME": "", 17 | "CONSUMER_KEY": "", 18 | "CONSUMER_SECRET": "", 19 | "ACCESS_TOKEN": "", 20 | "ACCESS_TOKEN_SECRET": "", 21 | "TWEET_INTERVAL": "2h", 22 | "TWEET_CHANCE": "0.8", 23 | "BLACKLIST": { 24 | "required": false, 25 | "value": "tinysubversions, dril" 26 | }, 27 | "BANNED_TERMS": { 28 | "required": false, 29 | "value": "voldemort, evgeny morozov, heroku" 30 | }, 31 | "SPECIAL_TERMS": { 32 | "required": false, 33 | "value": "" 34 | }, 35 | "DEFAULT_DELAY": "5..60", 36 | "DM_DELAY": { 37 | "required": false, 38 | "value": "0" 39 | }, 40 | "MENTION_DELAY": { 41 | "required": false, 42 | "value": "5..60" 43 | }, 44 | "TIMELINE_DELAY": { 45 | "required": false, 46 | "value": "10..600" 47 | }, 48 | "TIMEOUT_SLEEP": "5", 49 | "MAX_ERROR_RETRIES": "10", 50 | "UPDATE_FOLLOWS_INTERVAL": "90m" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /boodoo.rb: -------------------------------------------------------------------------------- 1 | require 'twitter_ebooks' 2 | include Ebooks 3 | 4 | module Ebooks::Boodoo 5 | # supports Ruby Range literal, Fixnum, or Float as string 6 | def parse_num(value) 7 | eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s) 8 | end 9 | 10 | # Make expected/possible Range 11 | def parse_range(value) 12 | value = parse_num(value) 13 | if value.nil? 14 | value = nil 15 | elsif !value.respond_to?(:to_a) 16 | value = Range.new(value, value) 17 | end 18 | value 19 | end 20 | 21 | def obscure_curse(len) 22 | s = [] 23 | c = ['!', '@', '$', '%', '^', '&', '*'] 24 | len.times do 25 | s << c.sample 26 | end 27 | s.join('') 28 | end 29 | 30 | def obscure_curses(tweet) 31 | # TODO: Ignore banned terms that are part of @-mentions 32 | $banned_terms.each do |term| 33 | re = Regexp.new("\\b#{term}\\b", "i") 34 | tweet.gsub!(re, Ebooks::Boodoo.obscure_curse(term.size)) 35 | end 36 | tweet 37 | end 38 | 39 | def parse_array(value, array_splitter=nil) 40 | array_splitter ||= / *[,;]+ */ 41 | value.split(array_splitter).map(&:strip) 42 | end 43 | 44 | def make_client 45 | Twitter::REST::Client.new do |config| 46 | config.consumer_key = @consumer_key 47 | config.consumer_secret = @consumer_secret 48 | config.access_token = @access_token 49 | config.access_token_secret = @access_token_secret 50 | end 51 | end 52 | end 53 | 54 | ## Retweet check based on Really-Existing-RT practices 55 | class Ebooks::TweetMeta 56 | def is_retweet? 57 | tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i] 58 | end 59 | end 60 | 61 | class Ebooks::Boodoo::BoodooBot < Ebooks::Bot 62 | $required_fields = ['consumer_key', 'consumer_secret', 63 | 'access_token', 'access_token_secret', 64 | 'bot_name', 'original'] 65 | 66 | # Unfollow a user -- OVERRIDE TO FIX TYPO 67 | # @param user [String] username or user id 68 | def unfollow(user, *args) 69 | log "Unfollowing #{user}" 70 | twitter.unfollow(user, *args) 71 | end 72 | 73 | # A rough error-catch/retry for rate limit, dupe fave, server timeouts 74 | def catch_twitter 75 | begin 76 | yield 77 | rescue Twitter::Error => error 78 | @retries += 1 79 | raise if @retries > @max_error_retries 80 | if error.class == Twitter::Error::TooManyRequests 81 | reset_in = error.rate_limit.reset_in 82 | log "RATE: Going to sleep for ~#{reset_in / 60} minutes..." 83 | sleep reset_in 84 | retry 85 | elsif error.class == Twitter::Error::Forbidden 86 | # don't count "Already faved/followed" message against attempts 87 | @retries -= 1 if error.to_s.include?("already") 88 | log "WARN: #{error.to_s}" 89 | return true 90 | elsif ["execution", "capacity"].any?(&error.to_s.method(:include?)) 91 | log "ERR: Timeout?\n\t#{error}\nSleeping for #{@timeout_sleep} seconds..." 92 | sleep @timeout_sleep 93 | retry 94 | else 95 | log "Unhandled exception from Twitter: #{error.to_s}" 96 | raise 97 | end 98 | end 99 | end 100 | 101 | # Override Ebooks::Bot#blacklisted? to ensure lower<=>lower check 102 | def blacklisted?(username) 103 | if @blacklist.map(&:downcase).include?(username.downcase) 104 | true 105 | else 106 | false 107 | end 108 | end 109 | 110 | # Follow new followers, unfollow lost followers 111 | def follow_parity 112 | followers = catch_twitter { twitter.followers(:count=>200).map(&:screen_name) } 113 | following = catch_twitter { twitter.following(:count=>200).map(&:screen_name) } 114 | to_follow = followers - following 115 | to_unfollow = following - followers 116 | twitter.follow(to_follow) unless to_follow.empty? 117 | twitter.unfollow(to_unfollow) unless to_unfollow.empty? 118 | @followers = followers 119 | @following = following - to_unfollow 120 | if !(to_follow.empty? || to_unfollow.empty?) 121 | log "Followed #{to_follow.size}; unfollowed #{to_unfollow.size}." 122 | end 123 | end 124 | 125 | def has_model? 126 | File.exists? @model_path 127 | end 128 | 129 | def has_archive? 130 | File.exists? @archive_path 131 | end 132 | 133 | def get_archive! 134 | @archive = Archive.new(@original, @archive_path, make_client).sync 135 | end 136 | 137 | def block(*args) 138 | twitter.block(*args) 139 | end 140 | 141 | def make_model! 142 | log "Updating model: #{@model_path}" 143 | Ebooks::Model.consume(@archive_path).save(@model_path) 144 | log "Loading model..." 145 | @model = Ebooks::Model.load(@model_path) 146 | end 147 | 148 | def can_run? 149 | missing_fields.empty? 150 | end 151 | 152 | def missing_fields 153 | $required_fields.select { |field| 154 | # p "#{field} = #{send(field)}" 155 | send(field).nil? || send(field).empty? 156 | } 157 | end 158 | end 159 | -------------------------------------------------------------------------------- /bots.rb: -------------------------------------------------------------------------------- 1 | require 'twitter_ebooks' 2 | require_relative 'boodoo' 3 | require 'dotenv' 4 | 5 | include Ebooks::Boodoo 6 | 7 | # Read defaults and lay env vars on top: 8 | SETTINGS = Dotenv.load('defaults.env').merge(ENV) 9 | 10 | 11 | # Information about a particular Twitter user we know 12 | class UserInfo 13 | attr_reader :username 14 | 15 | # @return [Integer] how many times we can pester this user unprompted 16 | attr_accessor :pesters_left 17 | 18 | # @param username [String] 19 | def initialize(username) 20 | @username = username 21 | @pesters_left = parse_num(SETTINGS['PESTER_COUNT']) || 1 22 | end 23 | end 24 | 25 | class BoodooBot 26 | attr_accessor :original, :model, :model_path, :auth_name, :archive_path, :archive 27 | attr_accessor :followers, :following 28 | def configure 29 | # create attr_accessors for all SETTINGS fields 30 | SETTINGS.keys.map(&:to_s).map(&:downcase).each(&Ebooks::Bot.method(:attr_accessor)) 31 | 32 | # String fields taken as-is: 33 | @consumer_key = SETTINGS['CONSUMER_KEY'] 34 | @consumer_secret = SETTINGS['CONSUMER_SECRET'] 35 | @access_token = SETTINGS['ACCESS_TOKEN'] 36 | @access_token_secret =SETTINGS['ACCESS_TOKEN_SECRET'] 37 | @tweet_interval = SETTINGS['TWEET_INTERVAL'] 38 | @tweet_on_hour = to_boolean(SETTINGS['TWEET_ON_HOUR']) 39 | @update_follows_interval = SETTINGS['UPDATE_FOLLOWS_INTERVAL'] 40 | @refresh_model_interval = SETTINGS['REFRESH_MODEL_INTERVAL'] 41 | 42 | # String fields forced to downcase: 43 | @bot_name = SETTINGS['BOT_NAME'] 44 | @original = SETTINGS['SOURCE_USERNAME'] 45 | 46 | # Array fields are CSV or SSV 47 | @blacklist = parse_array(SETTINGS['BLACKLIST']) 48 | @banned_terms = parse_array(SETTINGS['BANNED_TERMS']) 49 | $banned_terms = @banned_terms 50 | @special_terms = parse_array(SETTINGS['SPECIAL_TERMS']) 51 | 52 | # Fields parsed as Fixnum, Float, or Range: 53 | @default_delay = parse_range(SETTINGS['DEFAULT_DELAY']) 54 | @dm_delay = parse_range(SETTINGS['DM_DELAY']) || parse_range(SETTINGS['DEFAULT_DELAY']) 55 | @mention_delay = parse_range(SETTINGS['MENTION_DELAY']) || parse_range(SETTINGS['DEFAULT_DELAY']) 56 | @timeline_delay = parse_range(SETTINGS['TIMELINE_DELAY']) || parse_range(SETTINGS['DEFAULT_DELAY']) 57 | @tweet_chance = parse_num(SETTINGS['TWEET_CHANCE']) 58 | @timeout_sleep = parse_num(SETTINGS['TIMEOUT_SLEEP']) 59 | 60 | # from upstream example 61 | @userinfo = {} 62 | 63 | # added for BooDoo variant 64 | @attempts = 0 65 | @followers = [] 66 | @following = [] 67 | @archive_path = "corpus/#{@original}.json" 68 | @model_path = "model/#{@original}.model" 69 | 70 | if can_run? 71 | get_archive! 72 | make_model! 73 | else 74 | missing_fields.each {|missing| 75 | log "Can't run without #{missing}" 76 | } 77 | log "Heroku will automatically try again immediately or in 10 minutes..." 78 | Kernel.exit(1) 79 | end 80 | end 81 | 82 | def top100; @top100 ||= model.keywords.take(100); end 83 | def top20; @top20 ||= model.keywords.take(20); end 84 | 85 | # Overwrites Ebooks::Bot#delay, but do we gain anything doing so? 86 | def delay(d, &b) 87 | d ||= default_delay 88 | sleep (d || [0]).to_a.sample 89 | b.call 90 | end 91 | 92 | def on_startup 93 | log "I started up!" 94 | tweet(model.make_statement) 95 | 96 | scheduler.interval @tweet_interval do 97 | if rand < @tweet_chance 98 | if @tweet_on_hour 99 | tweet(model.make_statement) 100 | else 101 | # schedule tweet to happen at a random minute this hour 102 | this_many_min = rand(1..59).to_s + 'm' 103 | log "Scheduling tweet in #{this_many_min} min!" 104 | scheduler.in.this_many_min do 105 | tweet(model.make_statement) 106 | end 107 | end 108 | end 109 | end 110 | 111 | scheduler.interval @update_follows_interval do 112 | follow_parity 113 | end 114 | 115 | scheduler.interval @refresh_model_interval do 116 | log "Refreshing archive/model..." 117 | get_archive! 118 | make_model! 119 | end 120 | end 121 | 122 | def on_message(dm) 123 | from_owner = dm.sender.screen_name.downcase == @original.downcase 124 | log "[DM from owner? #{from_owner}]" 125 | if from_owner 126 | action = dm.text.split.first.downcase 127 | strip_re = Regexp.new("^#{action}\s*", "i") 128 | payload = dm.text.sub(strip_re, "") 129 | #TODO: Add blacklist/whitelist/reject(banned phrase) 130 | #TODO? Move this into a DMController class or equivalent? 131 | case action 132 | when "tweet" 133 | tweet model.make_response(payload, 140) 134 | when "follow", "unfollow", "block" 135 | payload = parse_array(payload.gsub("@", ''), / *[,; ]+ */) # Strip @s and make array 136 | send(action.to_sym, payload) 137 | when "mention" 138 | pre = payload + " " 139 | limit = 140 - pre.size 140 | message = "#{pre}#{model.make_statement(limit)}" 141 | tweet message 142 | when "cheating" 143 | tweet payload 144 | else 145 | log "Don't have behavior for action: #{action}" 146 | reply(dm, model.make_response(dm.text)) 147 | end 148 | else 149 | #otherwise, just reply like a mention 150 | delay(dm_delay) do 151 | reply(dm, model.make_response(dm.text)) 152 | end 153 | end 154 | end 155 | 156 | def on_mention(tweet) 157 | # Become more inclined to pester a user when they talk to us 158 | userinfo(tweet.user.screen_name).pesters_left += 1 159 | 160 | delay(mention_delay) do 161 | reply(tweet, model.make_response(meta(tweet).mentionless, meta(tweet).limit)) 162 | end 163 | end 164 | 165 | def on_timeline(tweet) 166 | return if tweet.retweeted_status? 167 | return unless can_pester?(tweet.user.screen_name) 168 | 169 | tokens = Ebooks::NLP.tokenize(tweet.text) 170 | 171 | interesting = tokens.find { |t| top100.include?(t.downcase) } 172 | very_interesting = tokens.find_all { |t| top20.include?(t.downcase) }.length > 2 173 | 174 | delay(timeline_delay) do 175 | if very_interesting 176 | favorite(tweet) if rand < 0.5 177 | retweet(tweet) if rand < 0.01 178 | if rand < 0.01 179 | userinfo(tweet.user.screen_name).pesters_left -= 1 180 | reply(tweet, model.make_response(meta(tweet).mentionless, meta(tweet).limit)) 181 | end 182 | elsif interesting 183 | favorite(tweet) if rand < 0.05 184 | reply(tweet, model.make_response(meta(tweet).mentionless, meta(tweet).limit)) if rand < 0.01 185 | end 186 | end 187 | end 188 | 189 | # Find information we've collected about a user 190 | # @param username [String] 191 | # @return [Ebooks::UserInfo] 192 | def userinfo(username) 193 | @userinfo[username] ||= UserInfo.new(username) 194 | end 195 | 196 | # Check if we're allowed to send unprompted tweets to a user 197 | # @param username [String] 198 | # @return [Boolean] 199 | def can_pester?(username) 200 | userinfo(username).pesters_left > 0 201 | end 202 | 203 | # Only follow our original user or people who are following our original user 204 | # @param user [Twitter::User] 205 | def can_follow?(username) 206 | @original.nil? || username == @original || twitter.friendship?(username, @original) || twitter.friendship?(username, @username) 207 | end 208 | 209 | def favorite(tweet) 210 | if can_follow?(tweet.user.screen_name) 211 | super(tweet) 212 | else 213 | log "Unfollowing @#{tweet.user.screen_name}" 214 | twitter.unfollow(tweet.user.screen_name) 215 | end 216 | end 217 | 218 | def on_follow(user) 219 | if can_follow?(user.screen_name) 220 | follow(user.screen_name) 221 | else 222 | log "Not following @#{user.screen_name}" 223 | end 224 | end 225 | 226 | # Prefilter for banned terms before tweeting 227 | def tweet(text, *args) 228 | text = obscure_curses(text) 229 | super(text, *args) 230 | end 231 | 232 | # Prefilter for banned terms before replying 233 | def reply(ev, text, opts={}) 234 | text = obscure_curses(text) 235 | super(ev, text, opts) 236 | end 237 | 238 | # Helps us convert usage of "true" and "false" strings in .env files to booleans 239 | def to_boolean(str) 240 | str == 'true' 241 | end 242 | 243 | private 244 | def load_model! 245 | return if @model 246 | 247 | @model_path ||= "model/#{original}.model" 248 | 249 | log "Loading model #{model_path}" 250 | @model = Ebooks::Model.load(model_path) 251 | end 252 | end 253 | 254 | BoodooBot.new(SETTINGS['BOT_NAME']) do |bot| 255 | # BoodooBot#configure does everything! 256 | bot 257 | end 258 | -------------------------------------------------------------------------------- /corpus/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BooDoo/ebooks_example/640ff28357e54cd92f00b7c067c9b2fcbb6d54ee/corpus/.gitignore -------------------------------------------------------------------------------- /defaults.env: -------------------------------------------------------------------------------- 1 | BOT_NAME= 2 | SOURCE_USERNAME= 3 | CONSUMER_KEY= 4 | CONSUMER_SECRET= 5 | ACCESS_TOKEN= 6 | ACCESS_TOKEN_SECRET= 7 | TWEET_INTERVAL=2h 8 | TWEET_CHANCE=0.8 9 | TWEET_ON_HOUR=true 10 | BLACKLIST=tinysubversions, dril 11 | BANNED_TERMS=voldemort, evgeny morozov, heroku 12 | SPECIAL_TERMS= 13 | DEFAULT_DELAY=5..60 14 | DM_DELAY=0 15 | MENTION_DELAY=5..60 16 | TIMELINE_DELAY=10..600 17 | TIMEOUT_SLEEP=5 18 | MAX_ERROR_RETRIES=10 19 | UPDATE_FOLLOWS_INTERVAL=90m 20 | REFRESH_MODEL_INTERVAL=8h 21 | -------------------------------------------------------------------------------- /model/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BooDoo/ebooks_example/640ff28357e54cd92f00b7c067c9b2fcbb6d54ee/model/.gitignore --------------------------------------------------------------------------------