├── .dockerignore ├── .gitignore ├── .ruby-version ├── .travis.yml ├── Dockerfile ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── Procfile ├── README.md ├── Rakefile ├── app.rb ├── config.ru ├── data └── tweets │ └── tweets.csv ├── lib ├── post_tweet.rb ├── reply_daemon.rb └── twitter_bot │ ├── tweet.rb │ └── tweet_generator.rb └── scripts └── dict.rb /.dockerignore: -------------------------------------------------------------------------------- 1 | scripts 2 | ipadic 3 | tweet.markov 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.swp 3 | 4 | ipadic/ 5 | *.markov 6 | .envrc 7 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | 2.5.0 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | services: 4 | - docker 5 | 6 | language: ruby 7 | 8 | env: 9 | - TAG=registry.heroku.com/takuti-twitter-bot/web 10 | 11 | before_install: 12 | - docker build -t $TAG . 13 | - docker run -it -d -p 5000:5000 $TAG 14 | - docker ps -a 15 | - docker run -it $TAG /bin/sh -c "bundle exec rake test" 16 | 17 | script: 18 | - curl localhost:5000 19 | 20 | after_success: 21 | - if [ "$TRAVIS_BRANCH" == "master" ]; then 22 | docker login -u "$HEROKU_USERNAME" -p "$HEROKU_API_KEY" registry.heroku.com; 23 | docker push $TAG; 24 | fi 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:2.5.0-alpine 2 | 3 | ENV APP_ROOT='/src/github.com/takuti/twitter-bot' 4 | 5 | ADD . ${APP_ROOT} 6 | WORKDIR ${APP_ROOT} 7 | 8 | RUN apk update && \ 9 | apk add --no-cache --virtual .builddeps ca-certificates wget openjdk7-jre build-base libxml2-dev libxslt-dev && \ 10 | update-ca-certificates && \ 11 | wget 'http://osdn.jp/frs/redir.php?m=jaist&f=%2Figo%2F52344%2Figo-0.4.3.jar' -O igo.jar && \ 12 | wget 'https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7MWVlSDBCSXZMTXM' -O mecab-ipadic.tar.gz && \ 13 | tar zxfv mecab-ipadic.tar.gz && \ 14 | java -cp igo.jar net.reduls.igo.bin.BuildDic ipadic mecab-ipadic-2.7.0-20070801 EUC-JP && \ 15 | rm -rf igo.jar mecab-ipadic-2.7.0-20070801 mecab-ipadic.tar.gz && \ 16 | bundle install && \ 17 | apk del .builddeps 18 | 19 | CMD ["bundle", "exec", "foreman", "start"] 20 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | ruby '2.5.0' 4 | 5 | gem 'sinatra', '~> 2.0.2' 6 | 7 | gem 'twitter', '~> 5.15.0' 8 | gem 'tweetstream', '~> 2.6.1' 9 | gem 'kusari', '~> 0.2.0' 10 | gem 'json', '~> 1.8.3' 11 | gem 'moji', '~> 1.6.0' 12 | gem 'rake', '~> 10.4.2' 13 | 14 | gem 'foreman' 15 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | addressable (2.5.2) 5 | public_suffix (>= 2.0.2, < 4.0) 6 | buftok (0.2.0) 7 | cookiejar (0.3.3) 8 | daemons (1.2.6) 9 | domain_name (0.5.20180417) 10 | unf (>= 0.0.5, < 1.0.0) 11 | em-http-request (1.1.5) 12 | addressable (>= 2.3.4) 13 | cookiejar (!= 0.3.1) 14 | em-socksify (>= 0.3) 15 | eventmachine (>= 1.0.3) 16 | http_parser.rb (>= 0.6.0) 17 | em-socksify (0.3.2) 18 | eventmachine (>= 1.0.0.beta.4) 19 | em-twitter (0.3.5) 20 | buftok (~> 0.2) 21 | eventmachine (~> 1.0) 22 | http_parser.rb (~> 0.6) 23 | simple_oauth (~> 0.2) 24 | equalizer (0.0.10) 25 | eventmachine (1.2.6) 26 | faraday (0.9.2) 27 | multipart-post (>= 1.2, < 3) 28 | foreman (0.84.0) 29 | thor (~> 0.19.1) 30 | http (0.9.9) 31 | addressable (~> 2.3) 32 | http-cookie (~> 1.0) 33 | http-form_data (~> 1.0.1) 34 | http_parser.rb (~> 0.6.0) 35 | http-cookie (1.0.3) 36 | domain_name (~> 0.5) 37 | http-form_data (1.0.3) 38 | http_parser.rb (0.6.0) 39 | igo-ruby (0.1.5) 40 | json (1.8.6) 41 | kusari (0.2.0) 42 | igo-ruby (~> 0.1.5) 43 | msgpack 44 | memoizable (0.4.2) 45 | thread_safe (~> 0.3, >= 0.3.1) 46 | moji (1.6) 47 | msgpack (1.2.4) 48 | multi_json (1.13.1) 49 | multipart-post (2.0.0) 50 | mustermann (1.0.2) 51 | naught (1.1.0) 52 | public_suffix (3.0.2) 53 | rack (2.0.5) 54 | rack-protection (2.0.3) 55 | rack 56 | rake (10.4.2) 57 | simple_oauth (0.3.1) 58 | sinatra (2.0.3) 59 | mustermann (~> 1.0) 60 | rack (~> 2.0) 61 | rack-protection (= 2.0.3) 62 | tilt (~> 2.0) 63 | thor (0.19.4) 64 | thread_safe (0.3.6) 65 | tilt (2.0.8) 66 | tweetstream (2.6.1) 67 | daemons (~> 1.1) 68 | em-http-request (>= 1.1.1) 69 | em-twitter (~> 0.3) 70 | multi_json (~> 1.3) 71 | twitter (~> 5.5) 72 | twitter (5.15.0) 73 | addressable (~> 2.3) 74 | buftok (~> 0.2.0) 75 | equalizer (= 0.0.10) 76 | faraday (~> 0.9.0) 77 | http (>= 0.4, < 0.10) 78 | http_parser.rb (~> 0.6.0) 79 | json (~> 1.8) 80 | memoizable (~> 0.4.0) 81 | naught (~> 1.0) 82 | simple_oauth (~> 0.3.0) 83 | unf (0.1.4) 84 | unf_ext 85 | unf_ext (0.0.7.5) 86 | 87 | PLATFORMS 88 | ruby 89 | 90 | DEPENDENCIES 91 | foreman 92 | json (~> 1.8.3) 93 | kusari (~> 0.2.0) 94 | moji (~> 1.6.0) 95 | rake (~> 10.4.2) 96 | sinatra (~> 2.0.2) 97 | tweetstream (~> 2.6.1) 98 | twitter (~> 5.15.0) 99 | 100 | RUBY VERSION 101 | ruby 2.5.0p0 102 | 103 | BUNDLED WITH 104 | 1.16.1 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016: Takuya Kitazawa. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: bundle exec rackup config.ru -p $PORT -o 0.0.0.0 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Markov-Chain-Based Japanese Twitter Bot 2 | === 3 | 4 | [![Build Status](https://travis-ci.org/takuti/twitter-bot.svg)](https://travis-ci.org/takuti/twitter-bot) 5 | 6 | ***Since this project is strongly optimized for Japanese, other languages are not supported :sushi:*** 7 | 8 | ## Description 9 | 10 | - Generate a tweet based on so-called **Markov Chain** from particular user's tweet history 11 | - Sample: [@yootakuti](https://twitter.com/yootakuti) 12 | - My Japanese article: [マルコフ連鎖でTwitter Botをつくりました | takuti.me](http://takuti.me/note/twitter-bot/) 13 | 14 | ## Installation 15 | 16 | If you want to host this bot directly on your local or server machine, you first need to install Ruby gems: 17 | 18 | $ gem install bundle 19 | $ bundle install 20 | 21 | Note that this application specially depends on [***kusari***](https://github.com/takuti/kusari), a gem for Japanese Markov chain. 22 | 23 | Next, you should generate a directory **ipadic/**, the IPA dictionary for Japanese tokenization, as described in the [Igo documentation](http://igo.osdn.jp/index.html#usage). 24 | 25 | Additionally, in order to connect to a Twitter account, the following environment variables need to be appropriately set: 26 | 27 | ```sh 28 | export SCREEN_NAME=yootakuti 29 | 30 | export CONSUMER_KEY=foo 31 | export CONSUMER_SECRET=bar 32 | export OAUTH_TOKEN=hoge 33 | export OAUTH_TOKEN_SECRET=piyo 34 | ``` 35 | 36 | FYI: we can use [direnv](https://github.com/direnv/direnv) to flexibly configure project-specific environment variables: 37 | 38 | $ brew install direnv 39 | $ touch .envrc # and write the above `export` statements 40 | $ direnv allow 41 | 42 | ## Usage 43 | 44 | Before enjoying this bot, you must download your tweet history from [the Twitter setting page](https://twitter.com/settings/account). The downloaded folder must be placed under **/path/to/twitter-bot/data/**, and the bot will use *text* column of **data/tweets/tweets.csv**. Note that this repository contains [sample tweets.csv file](data/tweets/tweets.csv). 45 | 46 | ### Post on Twitter 47 | 48 | After setting the environment variables, we can generate and post a markov tweet as: 49 | 50 | $ ruby lib/post_tweet.rb 51 | 52 | If you just want to check if a markov tweet is generated correctly, `dry-run` option is available. 53 | 54 | $ ruby lib/post_tweet.rb dry-run 55 | 56 | #### Hourly post by cron 57 | 58 | Set your crontab as: 59 | 60 | $ echo "01 * * * * /usr/local/rvm/wrappers/ruby-2.2.3/ruby /path/to/twitter-bot/lib/post_tweet.rb" > cron.txt 61 | $ crontab cron.txt 62 | 63 | For more detail of RVM+cron setting: [RVM: Ruby Version Manager - Using Cron with RVM](https://rvm.io/deployment/cron) 64 | 65 | #### Build API server 66 | 67 | This repository implements a tiny Sinatra-based API server. 68 | 69 | Run: 70 | 71 | ```sh 72 | $ bundle exec foreman start # PORT=5000 by default 73 | ``` 74 | 75 | Eventually, http://localhost:5000/ and http://localhost:5000/tweet respectively execute `lib/post_tweet.rb dry-run` and `lib/post_tweet.rb`. 76 | 77 | In case that you publicly build this API server, scheduling a request to `/tweet` would be an alternative choice to periodically post Markov-chain-based tweet. 78 | 79 | ### Reply daemon 80 | 81 | `reply_daemon` tracks tweets which contain `SCREEN_NAME` of your bot and replies to all of them: 82 | 83 | $ ruby lib/reply_daemon.rb start 84 | 85 | Stop the process: 86 | 87 | $ ruby lib/reply_daemon.rb stop 88 | 89 | ## Docker 90 | 91 | You can easily setup this application as a Docker image: 92 | 93 | ```sh 94 | $ docker build -t takuti/twitter-bot 95 | ``` 96 | 97 | Once the image has been created, running the scripts in container is straightforward: 98 | 99 | ```sh 100 | $ docker run -it takuti/twitter-bot /bin/sh -c "ruby lib/post_tweet.rb" 101 | $ docker run -it takuti/twitter-bot /bin/sh -c "ruby lib/post_tweet.rb dry-run" 102 | ``` 103 | 104 | By default, container automatically launches the API sever on port 5000 via `bundle exec foreman start`, so you can get access to http://localhost:5000/ once a container started running: 105 | 106 | ```sh 107 | $ docker run -it -d -p 5000:5000 takuti/twitter-bot 108 | ``` 109 | 110 | Notice that, as long as the required environmental variables are properly set in container, http://localhost:5000/tweet also works as we expected. 111 | 112 | ### Deploy on Heroku 113 | 114 | Our Docker image enables us to make the API server public on Heroku: 115 | 116 | ```sh 117 | $ heroku create takuti-twitter-bot 118 | $ heroku container:push web 119 | ``` 120 | 121 | See https://takuti-twitter-bot.herokuapp.com/, for example. 122 | 123 | While https://takuti-twitter-bot.herokuapp.com/tweet currently returns an error, you can make it available by [configuration of variables](https://devcenter.heroku.com/articles/config-vars#setting-up-config-vars-for-a-deployed-application). 124 | 125 | ### Deploy on Dokku 126 | 127 | [Dokku](https://github.com/dokku/dokku) is a Docker-powered OSS PaaS which enables you to build mini-Heroku-like platform on your own server. Similarly to the deployment on Heroku, once you have set up a server with Dokku, the API server can be easily deployed as a Docker image. 128 | 129 | Server: 130 | 131 | ```sh 132 | $ dokku apps:create twitter-bot 133 | $ dokku config:set twitter-bot DOKKU_DOCKERFILE_START_CMD="bundle exec foreman start" 134 | ``` 135 | 136 | Note that setting a way to start running a container to `DOKKU_DOCKERFILE_START_CMD` is important, because Dokku directly [executes a task defined as the `web` process in `Procfile`](http://dokku.viewdocs.io/dokku/deployment/methods/dockerfiles/#procfiles-and-multiple-processes) by default. Consequently, your container launches differently from what `CMD` defines in `Dockerfile`. 137 | 138 | Local: 139 | 140 | ```sh 141 | $ cd /path/to/takuti/twitter-bot 142 | $ git remote add dokku dokku@dokku.example.com:twitter-bot 143 | $ git push dokku master 144 | ``` 145 | 146 | Eventually, an "Application deployed" message shows up on your local screen with corresponding URL, and you can get access to the API server. -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | task :test do 2 | # save markov table on local as "tweet.markov" 3 | system "ruby lib/post_tweet.rb dry-run" 4 | 5 | # load "tweet.markov"; at the second time, generating tweet will be much faster 6 | system "ruby lib/post_tweet.rb dry-run" 7 | end 8 | -------------------------------------------------------------------------------- /app.rb: -------------------------------------------------------------------------------- 1 | require 'sinatra' 2 | require 'json' 3 | require 'twitter' 4 | require_relative 'lib/twitter_bot/tweet_generator' 5 | 6 | # NOTE: This workaround might be danger. 7 | # https://stackoverflow.com/a/16125324 8 | set :protection, :except => [:json_csrf] 9 | 10 | before do 11 | content_type :json 12 | end 13 | 14 | after do 15 | response.body = JSON.dump(response.body) 16 | end 17 | 18 | get '/' do 19 | begin 20 | {'tweet' => TwitterBot::TweetGenerator.new.generate} 21 | rescue 22 | halt 500, {'error' => 'Failed to generate a tweet. Make sure a directory /ipadic exists.'} 23 | end 24 | end 25 | 26 | get '/tweet' do 27 | begin 28 | tweet = TwitterBot::TweetGenerator.new.generate 29 | rescue 30 | halt 500, {'error' => 'Failed to generate a tweet. Make sure a directory /ipadic exists.'} 31 | end 32 | 33 | rest = Twitter::REST::Client.new do |config| 34 | config.consumer_key = ENV['CONSUMER_KEY'] 35 | config.consumer_secret = ENV['CONSUMER_SECRET'] 36 | config.access_token = ENV['OAUTH_TOKEN'] 37 | config.access_token_secret = ENV['OAUTH_TOKEN_SECRET'] 38 | end 39 | 40 | begin 41 | rest.update(tweet) 42 | rescue 43 | halt 500, {'error' => "Failed to post a generated tweet: #{tweet}"} 44 | end 45 | {'success' => "Tweeted: #{tweet}"} 46 | end 47 | -------------------------------------------------------------------------------- /config.ru: -------------------------------------------------------------------------------- 1 | require './app.rb' 2 | run Sinatra::Application 3 | -------------------------------------------------------------------------------- /data/tweets/tweets.csv: -------------------------------------------------------------------------------- 1 | "tweet_id","in_reply_to_status_id","in_reply_to_user_id","retweeted_status_id","retweeted_status_user_id","timestamp","source","text","expanded_urls" 2 | "354627056144896000","","","","","2013-07-09 15:44:13 +0000","foursquare","I'm at 幸楽苑 一箕町店 (会津若松市, 福島県) http://t.co/qNFl3eWUIY","http://4sq.com/12oJK9o" 3 | "354569798761459712","354569588199002112","1468580610","","","2013-07-09 11:56:42 +0000","web","@araco_uu 何もしてないのに全部消えた" 4 | "354569513624276993","","","","","2013-07-09 11:55:34 +0000","さらそば岬","死" 5 | "354569483664375808","","","","","2013-07-09 11:55:27 +0000","さらそば岬","Dropboxの共有フォルダ全部吹っ飛んだヾ(。╹ω╹。)ノ" 6 | "354564520481525761","","","","","2013-07-09 11:35:43 +0000","Buffer","WordPressでLaTeXを使おう | ヨーグルト大学情報学部現代メディアコンテンツ科 http://t.co/iphxWpXpn3 やったぜ。","http://buff.ly/15tfoIa" 7 | "354177435740286976","354177032294379520","130838979","","","2013-07-08 09:57:35 +0000","web","@Takabatake_Y んま" 8 | "354176890304602115","","","","","2013-07-08 09:55:25 +0000","Instagram","ちゃんぷる http://t.co/o9bwTrD4xc","http://instagram.com/p/bgC29vRWOb/" 9 | "353673668808818688","","","","","2013-07-07 00:35:48 +0000","foursquare","真夏のゆゆ式みます (@ 郡山テアトル) http://t.co/PJQUgKxplQ","http://4sq.com/1a2ctYb" 10 | "353192479434092548","","","","","2013-07-05 16:43:43 +0000","web","Cおもしろかった。TOEICなどない。" 11 | "353157590290665472","353156774808924160","263691332","","","2013-07-05 14:25:05 +0000","web","@Araraak_xx いきます(^^ゞ" 12 | "353156711382646784","","","","","2013-07-05 14:21:35 +0000","さらそば岬","Cがおもしろいから明日のTOEIC休みたい" 13 | "353120873886130178","","","","","2013-07-05 11:59:11 +0000","Buffer","パソコンについて バブルソートやクイックソート、ヒープソート、マージソート、バケットソート、基数ソートの特徴... - Yahoo!知恵袋 http://t.co/h1IPYOg5rs よくわかんないけど狂気を感じた。","http://buff.ly/15k19p4" 14 | "353068934930501632","353068804839981056","41596696","","","2013-07-05 08:32:48 +0000","web","@korakoi !!!さあ!!!はやくおにいちゃんのところへ!!!もどっておいで!!!!!!!1" 15 | "353068684580896769","353067622461489152","37958429","","","2013-07-05 08:31:48 +0000","web","@carme16d Japanese Tradition..." 16 | "353068538459717633","353068421438644224","45116715","","","2013-07-05 08:31:13 +0000","web","@stanihara お前だったのかー!!!!" 17 | "353068482553851904","","","353058164129796096","14052309","2013-07-05 08:31:00 +0000","web","RT @ui_nyan: Reading : 「株式会社○○を退職しました」という記事を見る度に思うこと http://t.co/vHitV6rFEp","http://anond.hatelabo.jp/20130624224858" 18 | "353068279872491520","","","","","2013-07-05 08:30:12 +0000","web","4時間くらい前に信号待ちの僕に向かって車から「たくち〜〜〜〜〜〜」って叫んで去っていったのは一体誰だったんだろう。" 19 | "353067685631889408","353048711842107393","1468580610","","","2013-07-05 08:27:50 +0000","web","@araco_uu 「家にDVDプレーヤーとかないの?」「ないです・・・」「テレビは?」「あっあります」「それについてないの?」「ついてないです・・・DVDみるならパソコンですね・・・」「パソコンは画面ちいせぇよなぁ」" 20 | "353047957341339648","","","","","2013-07-05 07:09:27 +0000","twicca","「君勉強しかしてないの?」いや、勉強すらしてないです…" 21 | "353047484580368389","","","","","2013-07-05 07:07:34 +0000","twicca","目医者さんきたら、「君もっと本読んで映画やテレビ見たほうがいいねぇ」といわれました。がんばります。" 22 | "353005540328210432","","","","","2013-07-05 04:20:53 +0000","foursquare","天国 (@ 会津図書館) http://t.co/OP6f8v2nJJ","http://4sq.com/1cXkbCp" 23 | "352946046399287298","","","","","2013-07-05 00:24:29 +0000","さらそば岬","それはプレゼンでも同じだからね!?(悲痛)" 24 | "352945470038999041","","","","","2013-07-05 00:22:12 +0000","Buffer","これはウェブページです。 http://t.co/vYr7Fg9RFN デザインは問題解決だというけれど、それだけで片付けちゃいけませんですね。","http://buff.ly/12genxL" 25 | "352766767703146497","","","","","2013-07-04 12:32:06 +0000","さらそば岬","万華鏡写輪眼がいい。" 26 | "352766296577933312","","","","","2013-07-04 12:30:13 +0000","さらそば岬","会津若松市内の眼科、一番つよそうなのどこだろう" 27 | "352763936480825345","","","","","2013-07-04 12:20:51 +0000","Buffer","リーダーシップとかチームマネジメントなんかより大切なもの | ヨーグルト大学情報学部現代メディアコンテンツ科 http://t.co/pgmlIp6zy3 けっこう本音","http://buff.ly/12fif24" 28 | "352690683368308736","352669417592729600","176721126","","","2013-07-04 07:29:46 +0000","web","@labyrinth17 @Takabatake_Y @motoki_uzu 21時からでいいんじゃねー" 29 | "352591251905839106","","","","","2013-07-04 00:54:39 +0000","さらそば岬","真夏のゆゆ式(福山雅治でません)" 30 | "352370561843859456","","","","","2013-07-03 10:17:43 +0000","Buffer","逆求人イベントに参加してみたら想像以上に良かった | ヨーグルト大学情報学部現代メディアコンテンツ科 http://t.co/oNfCqIo705 すばらっ","http://buff.ly/12dhFlp" 31 | "352355700837597184","","","","","2013-07-03 09:18:40 +0000","さらそば岬","コーヒー飲まないと頭痛くなる。" 32 | "352000457234841600","","","","","2013-07-02 09:47:03 +0000","さらそば岬","グリーンスムージーはじめたい" 33 | "351960631798284288","","","","","2013-07-02 07:08:48 +0000","IFTTT","意図せず運動不足型からやせ型に進化してて地味に嬉しい。 http://t.co/wgldgoQYct","http://twitter.com/takuti/status/351960631798284288/photo/1" 34 | "351454919968043010","","","","","2013-06-30 21:39:17 +0000","Tweetbot for iOS","ねむすぎぽよよん" 35 | "351336461754314752","","","","","2013-06-30 13:48:34 +0000","twicca","さすがに5,6月は疲れましたわ(ヽ´ω`)" 36 | "351327010536173568","","","","","2013-06-30 13:11:01 +0000","IFTTT","緊急事態!!! http://t.co/HbPKTv8rx3","http://twitter.com/takuti/status/351327010536173568/photo/1" 37 | "351313462397374464","","","350604321408315392","203143855","2013-06-30 12:17:11 +0000","twicca","RT @makkuro_megane: パンチングマシーンやった http://t.co/HmjZkUbbnB","http://twitter.com/makkuro_megane/status/350604321408315392/photo/1" 38 | "351288546776195072","","","","","2013-06-30 10:38:10 +0000","foursquare","I'm at ジュンク堂書店 池袋本店 (豊島区, 東京都) w/ 5 others http://t.co/q0jbr8N4Xq","http://4sq.com/15U7ep7" 39 | "351266013096050689","","","","","2013-06-30 09:08:38 +0000","IFTTT","ふむ http://t.co/zlKqYNqKxd","http://twitter.com/takuti/status/351266013096050689/photo/1" 40 | "351201819164295168","","","","","2013-06-30 04:53:33 +0000","foursquare","I'm at 神田駅 (Kanda Sta.) (千代田区, 東京都) w/ 2 others http://t.co/aZPUv9VcqX","http://4sq.com/1aqKC6B" 41 | "351162429507530752","","","","","2013-06-30 02:17:02 +0000","foursquare","I'm at ゴーゴーカレー 六本木スタジアム - @gogo_curry (港区, 東京都) http://t.co/PKk2qMwQ5o","http://4sq.com/14i1J5G" 42 | "351132626591031296","","","","","2013-06-30 00:18:36 +0000","foursquare","I'm at 六本木駅 (Roppongi Sta.) (港区, 東京都) http://t.co/leqjs94iuz","http://4sq.com/18nvw0Z" 43 | "351124319914577920","","","","","2013-06-29 23:45:36 +0000","foursquare","I'm at 森下駅 (Morishita Sta.) (江東区, 東京都) http://t.co/fSGyr3V2up","http://4sq.com/15SMqhI" 44 | "350960716343308288","","","","","2013-06-29 12:55:29 +0000","foursquare","I'm at ナンクルナイサ きばいやんせー 森下店 http://t.co/9TxCzL1P77","http://4sq.com/13dQRJe" 45 | "350868902324076544","","","","","2013-06-29 06:50:39 +0000","さらそば岬","「""国の""セキュリティ人材育成合宿」" 46 | "350868837719212032","","","","","2013-06-29 06:50:24 +0000","さらそば岬","セプキャンを知らない人に簡潔に説明する時みなさんどうしてますか" 47 | "350630725743157248","350630553269190659","19161647","","","2013-06-28 15:04:14 +0000","web","@grapswiz おはよう^ー^" 48 | "350630433039466496","","","","","2013-06-28 15:03:04 +0000","web","寝るね;;" 49 | "350630344669671425","","","","","2013-06-28 15:02:43 +0000","web","やべぇプリンターあんじゃん最強!!!!!と思って調子乗って6枚くらい印刷したら、紙1枚10円ですって言われた。" 50 | "350630081653256193","350629605612335105","225819818","","","2013-06-28 15:01:40 +0000","web","@century_sho Wさんと行きました!間違いないですね!!" 51 | "350630001416216576","350629495847391236","273291712","","","2013-06-28 15:01:21 +0000","web","@kamoti_Lm 会津若松市民になったばかりなのに・・・。" 52 | "350629730023768065","","","","","2013-06-28 15:00:16 +0000","さらそば岬","ただ、ラウンジで周りにいる人3人くらいみんな履歴書書いてて静かな戦いの幕開けを感じさせる。" 53 | "350628434029985792","","","","","2013-06-28 14:55:07 +0000","web","ここのカプセルホテル飲食物持ち込み可だし全館無線LAN完備だしプリンターあるし最強な感じする。" 54 | "350623677991239682","","","","","2013-06-28 14:36:13 +0000","さらそば岬","焼き鳥おいしかった。カプセルホテル童貞捨てた。" 55 | "350576722502103041","","","","","2013-06-28 11:29:38 +0000","foursquare","I'm at 四文屋 秋葉原店 (千代田区, 東京都) http://t.co/CJUibHGQ7J","http://4sq.com/1520bO2" 56 | "350571554939228160","","","","","2013-06-28 11:09:06 +0000","IFTTT","やば http://t.co/o540Adrznz","http://twitter.com/takuti/status/350571554939228160/photo/1" 57 | "350562074855866370","","","","","2013-06-28 10:31:26 +0000","twicca","ここで携帯の電池切れてる人探すってどんなクエストだよ…" 58 | "350561806382673920","350561096492531712","19161647","","","2013-06-28 10:30:22 +0000","twicca","@grapswiz" 59 | "350561073449009152","","","","","2013-06-28 10:27:27 +0000","foursquare","I'm at 秋葉原駅 電気街口 (千代田区, 東京都) w/ 10 others http://t.co/1K85a5tVTl","http://4sq.com/111vA44" 60 | "350557945383223296","","","","","2013-06-28 10:15:01 +0000","twicca","セプキャンのアンケート、ファイル名文字化けから推察するにかなり高度な問題だと考えてまだ触れていない。" 61 | "350557247237144577","","","","","2013-06-28 10:12:15 +0000","twicca","リマ…" 62 | "350555787027947522","","558569844","","","2013-06-28 10:06:27 +0000","twicca","@lis2501 @xemdmx そそそそんなにさきなんですね" 63 | "350555637018660866","","","","","2013-06-28 10:05:51 +0000","twicca","駅のホームで僕がフリスクをぶちまける事案が発生(2粒くらい)" 64 | "350554217909460993","","","","","2013-06-28 10:00:13 +0000","foursquare","へい (@ 池袋駅 (Ikebukuro Sta.) w/ 39 others) http://t.co/9TO6KLoDvD","http://4sq.com/1aTnAE0" 65 | "350487510243213312","","","","","2013-06-28 05:35:08 +0000","twicca","かわりゆくまちなみ" 66 | "350487425518288896","350487194659598338","173464519","","","2013-06-28 05:34:48 +0000","twicca","@xemdmx わわわわかりません…" 67 | "350487087100854273","","","","","2013-06-28 05:33:27 +0000","twicca","駅前にステーキ宮できてるしらなかった…" 68 | "350485692540923904","","","","","2013-06-28 05:27:55 +0000","foursquare","地獄 (@ 若松駅前 バスターミナル) http://t.co/1h7ASGXzm8","http://4sq.com/1aSN58v" 69 | "350473098589503490","","","","","2013-06-28 04:37:52 +0000","さらそば岬","????????" 70 | "350326599167639552","350224600015192064","1189412966","","","2013-06-27 18:55:44 +0000","web","@chinosque 北海道を閉じ込めたらしい。" 71 | "350324420289634304","","","","","2013-06-27 18:47:05 +0000","さらそば岬","マジかよ・・・・" 72 | "350224206627233792","","","","","2013-06-27 12:08:52 +0000","IFTTT","わけわかんねぇ買い物しちまった…(罪悪感 http://t.co/MX3nNP53tF","http://twitter.com/takuti/status/350224206627233792/photo/1" 73 | "350222977830359041","350222581502197760","1261308476","","","2013-06-27 12:03:59 +0000","web","@alpaca_p 死罪!!!" 74 | "350221904621215747","","","","","2013-06-27 11:59:43 +0000","web","TENPURA" 75 | "350221745485123584","350221659896168448","1468580610","","","2013-06-27 11:59:05 +0000","web","@araco_uu 天使" 76 | "350221627436441600","","","","","2013-06-27 11:58:37 +0000","web","天" 77 | "350221559186722817","","","","","2013-06-27 11:58:21 +0000","web","天ぷら" 78 | "350221543307096064","","","","","2013-06-27 11:58:17 +0000","web","天ぷら食べたい" 79 | "350217999065690113","","","","","2013-06-27 11:44:12 +0000","web","みたらし団子のおいしさに震えて会いたい" 80 | "350217903397814275","350124523422629888","262247094","","","2013-06-27 11:43:49 +0000","web","@kyohon123 あつめてない・・・" 81 | "350118392532983808","","","","","2013-06-27 05:08:24 +0000","IFTTT","銀なら五枚 http://t.co/M6OWonII4r","http://twitter.com/takuti/status/350118392532983808/photo/1" 82 | "350118382424715264","","","","","2013-06-27 05:08:22 +0000","IFTTT","金なら一枚 http://t.co/u2i07GhEP7","http://twitter.com/takuti/status/350118382424715264/photo/1" 83 | "349800985595621376","","","","","2013-06-26 08:07:08 +0000","さらそば岬","ヨーグルト" 84 | "349798558058287104","","","","","2013-06-26 07:57:29 +0000","さらそば岬","いろいろやりすぎて自己PRで話の軸を持たせるのがツラいんだけどみんなどうやってんの" 85 | "349793348485251072","","","","","2013-06-26 07:36:47 +0000","さらそば岬","セブンの唐揚げ棒食べなきゃだめだめ病が" 86 | "349519476376612865","","","","","2013-06-25 13:28:31 +0000","Tweetbot for iOS","大学の汚点それすなわち自虐ネタ" 87 | "349490415642230786","","","","","2013-06-25 11:33:02 +0000","さらそば岬","もずく とても うまい" 88 | "349461715156602881","","","","","2013-06-25 09:39:00 +0000","web","ぐっちょぐちょで恥ずかしすぎる汚点だよね。でも僕そういうとこも結構好きだよ。" 89 | "349461387963154432","","","349431592978104321","176721126","2013-06-25 09:37:42 +0000","web","RT @labyrinth17: 正直こういう話題Twitterでぐちぐちやるの、会津大学の汚点だからやめてくれませんかね、話があるなら直接どうぞ。敵対的な意味でもなく、疑問や提案に対してこちらで考慮したり回答する準備はございますので。" 90 | "349015498018668545","","","","","2013-06-24 04:05:53 +0000","さらそば岬","「時間をたくさん費やした=頑張った」みたいな勘違いはやめましょう。" 91 | "348830364149624832","","","","","2013-06-23 15:50:14 +0000","Tweetbot for iOS","頭弱そうだよね" 92 | "348819209381351426","","","","","2013-06-23 15:05:55 +0000","IFTTT","昨日の深夜ラーメン速報です(忘れてた) http://t.co/CuB2tkMcoA","http://twitter.com/takuti/status/348819209381351426/photo/1" 93 | "348796982489387008","","","","","2013-06-23 13:37:35 +0000","twicca","目覚めたら新幹線が停車してて窓の外には郡山の文字が見えるこのスーパーエキサイティングな感じ。" 94 | "348795605696536576","","","","","2013-06-23 13:32:07 +0000","foursquare","新幹線乗り過ごし寸前を体感した。 (@ 郡山駅 (Koriyama Sta.) w/ 3 others) http://t.co/DlOI8UBTwr","http://4sq.com/171I5f9" 95 | "348784512458706944","","","","","2013-06-23 12:48:02 +0000","twicca","やまびこ快適すぎ" 96 | "348783733337387008","","","","","2013-06-23 12:44:56 +0000","foursquare","I'm at 仙台駅 (Sendai Sta.) (仙台市青葉区, 宮城県) w/ 11 others http://t.co/yOvwHg8SDm","http://4sq.com/10d47tt" 97 | "348752265240248321","","","","","2013-06-23 10:39:54 +0000","foursquare","I'm at SK7 Bistro & Bar (仙台市宮城野区, 宮城県) http://t.co/z4BKKswZoz","http://4sq.com/10cQ3QK" 98 | "348483543963418625","348335275891630080","176721126","","","2013-06-22 16:52:06 +0000","twicca","@labyrinth17 @Takabatake_Y @motoki_uzu ごめん次の土日は東京だ…" 99 | "348065303860768768","","","","","2013-06-21 13:10:09 +0000","IFTTT","たんたん http://t.co/d1vEpj8BP2","http://twitter.com/takuti/status/348065303860768768/photo/1" 100 | "348065283191234560","","","","","2013-06-21 13:10:05 +0000","IFTTT","牛たん http://t.co/JET5pKcLm5","http://twitter.com/takuti/status/348065283191234560/photo/1" 101 | -------------------------------------------------------------------------------- /lib/post_tweet.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | require 'twitter' 4 | require_relative 'twitter_bot/tweet_generator' 5 | 6 | generator = TwitterBot::TweetGenerator.new 7 | 8 | case ARGV.first 9 | when 'dry-run' 10 | puts "[tweet] #{generator.generate}" 11 | else 12 | rest = Twitter::REST::Client.new do |config| 13 | config.consumer_key = ENV['CONSUMER_KEY'] 14 | config.consumer_secret = ENV['CONSUMER_SECRET'] 15 | config.access_token = ENV['OAUTH_TOKEN'] 16 | config.access_token_secret = ENV['OAUTH_TOKEN_SECRET'] 17 | end 18 | rest.update(generator.generate) 19 | end 20 | -------------------------------------------------------------------------------- /lib/reply_daemon.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | require 'tweetstream' 4 | require 'twitter' 5 | require_relative 'twitter_bot/tweet_generator' 6 | 7 | # Since streaming feature of Twitter gem is still experimental, 8 | # TweetStream gem will be used to track tweets on stream 9 | TweetStream.configure do |config| 10 | config.consumer_key = ENV['CONSUMER_KEY'] 11 | config.consumer_secret = ENV['CONSUMER_SECRET'] 12 | config.oauth_token = ENV['OAUTH_TOKEN'] 13 | config.oauth_token_secret = ENV['OAUTH_TOKEN_SECRET'] 14 | config.auth_method = :oauth 15 | end 16 | 17 | rest = Twitter::REST::Client.new do |config| 18 | config.consumer_key = ENV['CONSUMER_KEY'] 19 | config.consumer_secret = ENV['CONSUMER_SECRET'] 20 | config.access_token = ENV['OAUTH_TOKEN'] 21 | config.access_token_secret = ENV['OAUTH_TOKEN_SECRET'] 22 | end 23 | 24 | generator = TwitterBot::TweetGenerator.new 25 | 26 | # non-daemonized tracking: 27 | # TweetStream::Client.new.track(ENV['SCREEN_NAME']) do |status| 28 | 29 | TweetStream::Daemon.new('tracker').track(ENV['SCREEN_NAME']) do |status| 30 | t = "@#{status.user.screen_name} #{generator.generate}" 31 | rest.update(t, in_reply_to_status_id: status.id) 32 | end 33 | -------------------------------------------------------------------------------- /lib/twitter_bot/tweet.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | require 'nkf' 4 | 5 | module TwitterBot 6 | class Tweet 7 | attr_accessor :text 8 | 9 | def initialize(text) 10 | @text = text.to_s # 数字だけのツイートでunpack('U*')がエラーを吐くので全てtoString 11 | @text = nil if NKF.guess(@text) != NKF::UTF8 12 | end 13 | 14 | def normalize! 15 | @text.gsub!(/[^\u{0}-\u{FFFF}]/, '?') # 絵文字は ? に置換 16 | @text.gsub!(/\.?\s*@[0-9A-Za-z_]+/, '') # リプライをすべて削除 17 | @text.gsub!(/(RT|QT)\s*@?[0-9A-Za-z_]+.*$/, '') # RT/QT以降行末まで削除 18 | @text.gsub!(/http:\/\/\S+/, '') # URLを削除 スペースが入るまで消える 19 | @text.gsub!(/#[0-9A-Za-z_]+/, '') # ハッシュタグを削除 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/twitter_bot/tweet_generator.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | require 'kusari' 4 | require 'open-uri' 5 | require 'json' 6 | require 'csv' 7 | 8 | require_relative 'tweet' 9 | 10 | module TwitterBot 11 | class TweetGenerator 12 | def initialize 13 | @root = File.expand_path('../../../', __FILE__) 14 | @generator = Kusari::Generator.new(3, "#{@root}/ipadic") 15 | create_markov if @generator.load("#{@root}/tweet.markov") == false 16 | end 17 | 18 | def generate 19 | t = @generator.generate(140) 20 | t.length > 70 ? get_kaomoji : t # avoid very long random sentence 21 | end 22 | 23 | private 24 | 25 | def create_markov 26 | CSV.foreach("#{@root}/data/tweets/tweets.csv", :headers => true) do |row| 27 | t = Tweet.new(row['text']) 28 | next if t.text.nil? 29 | t.normalize! 30 | @generator.add_string(t.text) 31 | end 32 | @generator.save("#{@root}/tweet.markov") 33 | end 34 | 35 | def get_kaomoji 36 | begin 37 | open('http://kaomoji.n-at.me/random.json') { |f| JSON.load(f)['record']['text'] } 38 | rescue 39 | @generator.generate(70) # alternative short tweet 40 | end 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /scripts/dict.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | require 'moji' 4 | require 'igo-ruby' 5 | require 'csv' 6 | 7 | module CustomDictionary 8 | 9 | class Writer 10 | attr_accessor :count 11 | 12 | def initialize(filename) 13 | # 元々のMeCab辞書のエンコーディングであるEUCに合わせる 14 | @dictionary_file = CSV.open(filename, 'w', encoding: 'utf-8:euc-jp') 15 | @count = 0 16 | end 17 | 18 | def write(word, furigana='*') 19 | @dictionary_file << [word, 0, 0, cost(word), '名詞', '一般', '*', '*', '*', '*', word, furigana, furigana] 20 | @count += 1 21 | end 22 | 23 | def close 24 | @dictionary_file.close 25 | end 26 | 27 | private 28 | 29 | def cost(word) 30 | [-32768, 6000 - 200 * (word.length**1.3)].max.to_i 31 | end 32 | end 33 | 34 | class << self 35 | def hatena(tagger, writer) 36 | File.open('keywordlist_furigana.csv', encoding: 'euc-jp:utf-8', undef: :replace) do |f| 37 | CSV.new(f, :col_sep => "\t").each do |row| 38 | word = row[1] 39 | 40 | next if /^[0-9]{1,4}-[0-9]{2}-[0-9]{2}$/ =~ word # 2009-09-04 のような年月は飛ばす 41 | next if /^[0-9]{1,4}年$/ =~ word # 1945年 のような年は飛ばす 42 | next if word.include?(',') # 単語そのものにカンマが含まれるものは飛ばす(応急処置) 43 | next if tagger.wakati(word).size == 1 # すでに1単語として認識されるものは飛ばす 44 | 45 | furigana = row[0] ? Moji.hira_to_kata(row[0]) : String.new 46 | writer.write(word, furigana) 47 | end 48 | end 49 | end 50 | 51 | def wikipedia(tagger, writer) 52 | File.open('jawiki-latest-all-titles-in-ns0', encoding: 'euc-jp:utf-8', undef: :replace) do |f| 53 | CSV.new(f, :col_sep => "\t").each do |row| 54 | word = row[0] 55 | 56 | next if /^[0-9]{2}月[0-9]{2}日$/ =~ word # 09月04日 のような月日は飛ばす 57 | next if /^[0-9\*\-]{1,4}年.*$/ =~ word # 年から始まるものに使えそうなものはほぼ無い 58 | next if word.include?('_') # 単語そのものにアンダーバーが含まれるものはスペースの入ったタイトルで複雑なので飛ばす(応急処置) 59 | next if word.include?(',') # 単語そのものにカンマが含まれるものは飛ばす(応急処置) 60 | next if tagger.wakati(word).size == 1 # すでに1単語として認識されるものは飛ば 61 | 62 | writer.write(word) 63 | end 64 | end 65 | end 66 | end 67 | 68 | end 69 | 70 | 71 | if !['hatena', 'wikipedia'].include?(ARGV.first) 72 | abort('`hatena` or `wikipedia` must be specified') 73 | end 74 | 75 | tagger = Igo::Tagger.new('../ipadic') 76 | writer = CustomDictionary::Writer.new("#{ARGV.first}.csv") 77 | 78 | case ARGV.first 79 | when 'hatena' 80 | CustomDictionary::hatena(tagger, writer) 81 | when 'wikipedia' 82 | CustomDictionary::wikipedia(tagger, writer) 83 | end 84 | 85 | writer.close 86 | 87 | puts "Wrote #{writer.count} words" 88 | --------------------------------------------------------------------------------