├── .dockerignore
├── .gitignore
├── .ruby-version
├── .travis.yml
├── Dockerfile
├── Gemfile
├── Gemfile.lock
├── LICENSE
├── Procfile
├── README.md
├── Rakefile
├── app.rb
├── config.ru
├── data
└── tweets
│ └── tweets.csv
├── lib
├── post_tweet.rb
├── reply_daemon.rb
└── twitter_bot
│ ├── tweet.rb
│ └── tweet_generator.rb
└── scripts
└── dict.rb
/.dockerignore:
--------------------------------------------------------------------------------
1 | scripts
2 | ipadic
3 | tweet.markov
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.swp
3 |
4 | ipadic/
5 | *.markov
6 | .envrc
7 |
--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.5.0
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | services:
4 | - docker
5 |
6 | language: ruby
7 |
8 | env:
9 | - TAG=registry.heroku.com/takuti-twitter-bot/web
10 |
11 | before_install:
12 | - docker build -t $TAG .
13 | - docker run -it -d -p 5000:5000 $TAG
14 | - docker ps -a
15 | - docker run -it $TAG /bin/sh -c "bundle exec rake test"
16 |
17 | script:
18 | - curl localhost:5000
19 |
20 | after_success:
21 | - if [ "$TRAVIS_BRANCH" == "master" ]; then
22 | docker login -u "$HEROKU_USERNAME" -p "$HEROKU_API_KEY" registry.heroku.com;
23 | docker push $TAG;
24 | fi
25 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ruby:2.5.0-alpine
2 |
3 | ENV APP_ROOT='/src/github.com/takuti/twitter-bot'
4 |
5 | ADD . ${APP_ROOT}
6 | WORKDIR ${APP_ROOT}
7 |
8 | RUN apk update && \
9 | apk add --no-cache --virtual .builddeps ca-certificates wget openjdk7-jre build-base libxml2-dev libxslt-dev && \
10 | update-ca-certificates && \
11 | wget 'http://osdn.jp/frs/redir.php?m=jaist&f=%2Figo%2F52344%2Figo-0.4.3.jar' -O igo.jar && \
12 | wget 'https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7MWVlSDBCSXZMTXM' -O mecab-ipadic.tar.gz && \
13 | tar zxfv mecab-ipadic.tar.gz && \
14 | java -cp igo.jar net.reduls.igo.bin.BuildDic ipadic mecab-ipadic-2.7.0-20070801 EUC-JP && \
15 | rm -rf igo.jar mecab-ipadic-2.7.0-20070801 mecab-ipadic.tar.gz && \
16 | bundle install && \
17 | apk del .builddeps
18 |
19 | CMD ["bundle", "exec", "foreman", "start"]
20 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | ruby '2.5.0'
4 |
5 | gem 'sinatra', '~> 2.0.2'
6 |
7 | gem 'twitter', '~> 5.15.0'
8 | gem 'tweetstream', '~> 2.6.1'
9 | gem 'kusari', '~> 0.2.0'
10 | gem 'json', '~> 1.8.3'
11 | gem 'moji', '~> 1.6.0'
12 | gem 'rake', '~> 10.4.2'
13 |
14 | gem 'foreman'
15 |
--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
1 | GEM
2 | remote: https://rubygems.org/
3 | specs:
4 | addressable (2.5.2)
5 | public_suffix (>= 2.0.2, < 4.0)
6 | buftok (0.2.0)
7 | cookiejar (0.3.3)
8 | daemons (1.2.6)
9 | domain_name (0.5.20180417)
10 | unf (>= 0.0.5, < 1.0.0)
11 | em-http-request (1.1.5)
12 | addressable (>= 2.3.4)
13 | cookiejar (!= 0.3.1)
14 | em-socksify (>= 0.3)
15 | eventmachine (>= 1.0.3)
16 | http_parser.rb (>= 0.6.0)
17 | em-socksify (0.3.2)
18 | eventmachine (>= 1.0.0.beta.4)
19 | em-twitter (0.3.5)
20 | buftok (~> 0.2)
21 | eventmachine (~> 1.0)
22 | http_parser.rb (~> 0.6)
23 | simple_oauth (~> 0.2)
24 | equalizer (0.0.10)
25 | eventmachine (1.2.6)
26 | faraday (0.9.2)
27 | multipart-post (>= 1.2, < 3)
28 | foreman (0.84.0)
29 | thor (~> 0.19.1)
30 | http (0.9.9)
31 | addressable (~> 2.3)
32 | http-cookie (~> 1.0)
33 | http-form_data (~> 1.0.1)
34 | http_parser.rb (~> 0.6.0)
35 | http-cookie (1.0.3)
36 | domain_name (~> 0.5)
37 | http-form_data (1.0.3)
38 | http_parser.rb (0.6.0)
39 | igo-ruby (0.1.5)
40 | json (1.8.6)
41 | kusari (0.2.0)
42 | igo-ruby (~> 0.1.5)
43 | msgpack
44 | memoizable (0.4.2)
45 | thread_safe (~> 0.3, >= 0.3.1)
46 | moji (1.6)
47 | msgpack (1.2.4)
48 | multi_json (1.13.1)
49 | multipart-post (2.0.0)
50 | mustermann (1.0.2)
51 | naught (1.1.0)
52 | public_suffix (3.0.2)
53 | rack (2.0.5)
54 | rack-protection (2.0.3)
55 | rack
56 | rake (10.4.2)
57 | simple_oauth (0.3.1)
58 | sinatra (2.0.3)
59 | mustermann (~> 1.0)
60 | rack (~> 2.0)
61 | rack-protection (= 2.0.3)
62 | tilt (~> 2.0)
63 | thor (0.19.4)
64 | thread_safe (0.3.6)
65 | tilt (2.0.8)
66 | tweetstream (2.6.1)
67 | daemons (~> 1.1)
68 | em-http-request (>= 1.1.1)
69 | em-twitter (~> 0.3)
70 | multi_json (~> 1.3)
71 | twitter (~> 5.5)
72 | twitter (5.15.0)
73 | addressable (~> 2.3)
74 | buftok (~> 0.2.0)
75 | equalizer (= 0.0.10)
76 | faraday (~> 0.9.0)
77 | http (>= 0.4, < 0.10)
78 | http_parser.rb (~> 0.6.0)
79 | json (~> 1.8)
80 | memoizable (~> 0.4.0)
81 | naught (~> 1.0)
82 | simple_oauth (~> 0.3.0)
83 | unf (0.1.4)
84 | unf_ext
85 | unf_ext (0.0.7.5)
86 |
87 | PLATFORMS
88 | ruby
89 |
90 | DEPENDENCIES
91 | foreman
92 | json (~> 1.8.3)
93 | kusari (~> 0.2.0)
94 | moji (~> 1.6.0)
95 | rake (~> 10.4.2)
96 | sinatra (~> 2.0.2)
97 | tweetstream (~> 2.6.1)
98 | twitter (~> 5.15.0)
99 |
100 | RUBY VERSION
101 | ruby 2.5.0p0
102 |
103 | BUNDLED WITH
104 | 1.16.1
105 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016: Takuya Kitazawa.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: bundle exec rackup config.ru -p $PORT -o 0.0.0.0
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Markov-Chain-Based Japanese Twitter Bot
2 | ===
3 |
4 | [](https://travis-ci.org/takuti/twitter-bot)
5 |
6 | ***Since this project is strongly optimized for Japanese, other languages are not supported :sushi:***
7 |
8 | ## Description
9 |
10 | - Generate a tweet based on so-called **Markov Chain** from particular user's tweet history
11 | - Sample: [@yootakuti](https://twitter.com/yootakuti)
12 | - My Japanese article: [マルコフ連鎖でTwitter Botをつくりました | takuti.me](http://takuti.me/note/twitter-bot/)
13 |
14 | ## Installation
15 |
16 | If you want to host this bot directly on your local or server machine, you first need to install Ruby gems:
17 |
18 | $ gem install bundle
19 | $ bundle install
20 |
21 | Note that this application specially depends on [***kusari***](https://github.com/takuti/kusari), a gem for Japanese Markov chain.
22 |
23 | Next, you should generate a directory **ipadic/**, the IPA dictionary for Japanese tokenization, as described in the [Igo documentation](http://igo.osdn.jp/index.html#usage).
24 |
25 | Additionally, in order to connect to a Twitter account, the following environment variables need to be appropriately set:
26 |
27 | ```sh
28 | export SCREEN_NAME=yootakuti
29 |
30 | export CONSUMER_KEY=foo
31 | export CONSUMER_SECRET=bar
32 | export OAUTH_TOKEN=hoge
33 | export OAUTH_TOKEN_SECRET=piyo
34 | ```
35 |
36 | FYI: we can use [direnv](https://github.com/direnv/direnv) to flexibly configure project-specific environment variables:
37 |
38 | $ brew install direnv
39 | $ touch .envrc # and write the above `export` statements
40 | $ direnv allow
41 |
42 | ## Usage
43 |
44 | Before enjoying this bot, you must download your tweet history from [the Twitter setting page](https://twitter.com/settings/account). The downloaded folder must be placed under **/path/to/twitter-bot/data/**, and the bot will use *text* column of **data/tweets/tweets.csv**. Note that this repository contains [sample tweets.csv file](data/tweets/tweets.csv).
45 |
46 | ### Post on Twitter
47 |
48 | After setting the environment variables, we can generate and post a markov tweet as:
49 |
50 | $ ruby lib/post_tweet.rb
51 |
52 | If you just want to check if a markov tweet is generated correctly, `dry-run` option is available.
53 |
54 | $ ruby lib/post_tweet.rb dry-run
55 |
56 | #### Hourly post by cron
57 |
58 | Set your crontab as:
59 |
60 | $ echo "01 * * * * /usr/local/rvm/wrappers/ruby-2.2.3/ruby /path/to/twitter-bot/lib/post_tweet.rb" > cron.txt
61 | $ crontab cron.txt
62 |
63 | For more detail of RVM+cron setting: [RVM: Ruby Version Manager - Using Cron with RVM](https://rvm.io/deployment/cron)
64 |
65 | #### Build API server
66 |
67 | This repository implements a tiny Sinatra-based API server.
68 |
69 | Run:
70 |
71 | ```sh
72 | $ bundle exec foreman start # PORT=5000 by default
73 | ```
74 |
75 | Eventually, http://localhost:5000/ and http://localhost:5000/tweet respectively execute `lib/post_tweet.rb dry-run` and `lib/post_tweet.rb`.
76 |
77 | In case that you publicly build this API server, scheduling a request to `/tweet` would be an alternative choice to periodically post Markov-chain-based tweet.
78 |
79 | ### Reply daemon
80 |
81 | `reply_daemon` tracks tweets which contain `SCREEN_NAME` of your bot and replies to all of them:
82 |
83 | $ ruby lib/reply_daemon.rb start
84 |
85 | Stop the process:
86 |
87 | $ ruby lib/reply_daemon.rb stop
88 |
89 | ## Docker
90 |
91 | You can easily setup this application as a Docker image:
92 |
93 | ```sh
94 | $ docker build -t takuti/twitter-bot
95 | ```
96 |
97 | Once the image has been created, running the scripts in container is straightforward:
98 |
99 | ```sh
100 | $ docker run -it takuti/twitter-bot /bin/sh -c "ruby lib/post_tweet.rb"
101 | $ docker run -it takuti/twitter-bot /bin/sh -c "ruby lib/post_tweet.rb dry-run"
102 | ```
103 |
104 | By default, container automatically launches the API sever on port 5000 via `bundle exec foreman start`, so you can get access to http://localhost:5000/ once a container started running:
105 |
106 | ```sh
107 | $ docker run -it -d -p 5000:5000 takuti/twitter-bot
108 | ```
109 |
110 | Notice that, as long as the required environmental variables are properly set in container, http://localhost:5000/tweet also works as we expected.
111 |
112 | ### Deploy on Heroku
113 |
114 | Our Docker image enables us to make the API server public on Heroku:
115 |
116 | ```sh
117 | $ heroku create takuti-twitter-bot
118 | $ heroku container:push web
119 | ```
120 |
121 | See https://takuti-twitter-bot.herokuapp.com/, for example.
122 |
123 | While https://takuti-twitter-bot.herokuapp.com/tweet currently returns an error, you can make it available by [configuration of variables](https://devcenter.heroku.com/articles/config-vars#setting-up-config-vars-for-a-deployed-application).
124 |
125 | ### Deploy on Dokku
126 |
127 | [Dokku](https://github.com/dokku/dokku) is a Docker-powered OSS PaaS which enables you to build mini-Heroku-like platform on your own server. Similarly to the deployment on Heroku, once you have set up a server with Dokku, the API server can be easily deployed as a Docker image.
128 |
129 | Server:
130 |
131 | ```sh
132 | $ dokku apps:create twitter-bot
133 | $ dokku config:set twitter-bot DOKKU_DOCKERFILE_START_CMD="bundle exec foreman start"
134 | ```
135 |
136 | Note that setting a way to start running a container to `DOKKU_DOCKERFILE_START_CMD` is important, because Dokku directly [executes a task defined as the `web` process in `Procfile`](http://dokku.viewdocs.io/dokku/deployment/methods/dockerfiles/#procfiles-and-multiple-processes) by default. Consequently, your container launches differently from what `CMD` defines in `Dockerfile`.
137 |
138 | Local:
139 |
140 | ```sh
141 | $ cd /path/to/takuti/twitter-bot
142 | $ git remote add dokku dokku@dokku.example.com:twitter-bot
143 | $ git push dokku master
144 | ```
145 |
146 | Eventually, an "Application deployed" message shows up on your local screen with corresponding URL, and you can get access to the API server.
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | task :test do
2 | # save markov table on local as "tweet.markov"
3 | system "ruby lib/post_tweet.rb dry-run"
4 |
5 | # load "tweet.markov"; at the second time, generating tweet will be much faster
6 | system "ruby lib/post_tweet.rb dry-run"
7 | end
8 |
--------------------------------------------------------------------------------
/app.rb:
--------------------------------------------------------------------------------
1 | require 'sinatra'
2 | require 'json'
3 | require 'twitter'
4 | require_relative 'lib/twitter_bot/tweet_generator'
5 |
6 | # NOTE: This workaround might be danger.
7 | # https://stackoverflow.com/a/16125324
8 | set :protection, :except => [:json_csrf]
9 |
10 | before do
11 | content_type :json
12 | end
13 |
14 | after do
15 | response.body = JSON.dump(response.body)
16 | end
17 |
18 | get '/' do
19 | begin
20 | {'tweet' => TwitterBot::TweetGenerator.new.generate}
21 | rescue
22 | halt 500, {'error' => 'Failed to generate a tweet. Make sure a directory /ipadic exists.'}
23 | end
24 | end
25 |
26 | get '/tweet' do
27 | begin
28 | tweet = TwitterBot::TweetGenerator.new.generate
29 | rescue
30 | halt 500, {'error' => 'Failed to generate a tweet. Make sure a directory /ipadic exists.'}
31 | end
32 |
33 | rest = Twitter::REST::Client.new do |config|
34 | config.consumer_key = ENV['CONSUMER_KEY']
35 | config.consumer_secret = ENV['CONSUMER_SECRET']
36 | config.access_token = ENV['OAUTH_TOKEN']
37 | config.access_token_secret = ENV['OAUTH_TOKEN_SECRET']
38 | end
39 |
40 | begin
41 | rest.update(tweet)
42 | rescue
43 | halt 500, {'error' => "Failed to post a generated tweet: #{tweet}"}
44 | end
45 | {'success' => "Tweeted: #{tweet}"}
46 | end
47 |
--------------------------------------------------------------------------------
/config.ru:
--------------------------------------------------------------------------------
1 | require './app.rb'
2 | run Sinatra::Application
3 |
--------------------------------------------------------------------------------
/data/tweets/tweets.csv:
--------------------------------------------------------------------------------
1 | "tweet_id","in_reply_to_status_id","in_reply_to_user_id","retweeted_status_id","retweeted_status_user_id","timestamp","source","text","expanded_urls"
2 | "354627056144896000","","","","","2013-07-09 15:44:13 +0000","foursquare","I'm at 幸楽苑 一箕町店 (会津若松市, 福島県) http://t.co/qNFl3eWUIY","http://4sq.com/12oJK9o"
3 | "354569798761459712","354569588199002112","1468580610","","","2013-07-09 11:56:42 +0000","web","@araco_uu 何もしてないのに全部消えた"
4 | "354569513624276993","","","","","2013-07-09 11:55:34 +0000","さらそば岬","死"
5 | "354569483664375808","","","","","2013-07-09 11:55:27 +0000","さらそば岬","Dropboxの共有フォルダ全部吹っ飛んだヾ(。╹ω╹。)ノ"
6 | "354564520481525761","","","","","2013-07-09 11:35:43 +0000","Buffer","WordPressでLaTeXを使おう | ヨーグルト大学情報学部現代メディアコンテンツ科 http://t.co/iphxWpXpn3 やったぜ。","http://buff.ly/15tfoIa"
7 | "354177435740286976","354177032294379520","130838979","","","2013-07-08 09:57:35 +0000","web","@Takabatake_Y んま"
8 | "354176890304602115","","","","","2013-07-08 09:55:25 +0000","Instagram","ちゃんぷる http://t.co/o9bwTrD4xc","http://instagram.com/p/bgC29vRWOb/"
9 | "353673668808818688","","","","","2013-07-07 00:35:48 +0000","foursquare","真夏のゆゆ式みます (@ 郡山テアトル) http://t.co/PJQUgKxplQ","http://4sq.com/1a2ctYb"
10 | "353192479434092548","","","","","2013-07-05 16:43:43 +0000","web","Cおもしろかった。TOEICなどない。"
11 | "353157590290665472","353156774808924160","263691332","","","2013-07-05 14:25:05 +0000","web","@Araraak_xx いきます(^^ゞ"
12 | "353156711382646784","","","","","2013-07-05 14:21:35 +0000","さらそば岬","Cがおもしろいから明日のTOEIC休みたい"
13 | "353120873886130178","","","","","2013-07-05 11:59:11 +0000","Buffer","パソコンについて バブルソートやクイックソート、ヒープソート、マージソート、バケットソート、基数ソートの特徴... - Yahoo!知恵袋 http://t.co/h1IPYOg5rs よくわかんないけど狂気を感じた。","http://buff.ly/15k19p4"
14 | "353068934930501632","353068804839981056","41596696","","","2013-07-05 08:32:48 +0000","web","@korakoi !!!さあ!!!はやくおにいちゃんのところへ!!!もどっておいで!!!!!!!1"
15 | "353068684580896769","353067622461489152","37958429","","","2013-07-05 08:31:48 +0000","web","@carme16d Japanese Tradition..."
16 | "353068538459717633","353068421438644224","45116715","","","2013-07-05 08:31:13 +0000","web","@stanihara お前だったのかー!!!!"
17 | "353068482553851904","","","353058164129796096","14052309","2013-07-05 08:31:00 +0000","web","RT @ui_nyan: Reading : 「株式会社○○を退職しました」という記事を見る度に思うこと http://t.co/vHitV6rFEp","http://anond.hatelabo.jp/20130624224858"
18 | "353068279872491520","","","","","2013-07-05 08:30:12 +0000","web","4時間くらい前に信号待ちの僕に向かって車から「たくち〜〜〜〜〜〜」って叫んで去っていったのは一体誰だったんだろう。"
19 | "353067685631889408","353048711842107393","1468580610","","","2013-07-05 08:27:50 +0000","web","@araco_uu 「家にDVDプレーヤーとかないの?」「ないです・・・」「テレビは?」「あっあります」「それについてないの?」「ついてないです・・・DVDみるならパソコンですね・・・」「パソコンは画面ちいせぇよなぁ」"
20 | "353047957341339648","","","","","2013-07-05 07:09:27 +0000","twicca","「君勉強しかしてないの?」いや、勉強すらしてないです…"
21 | "353047484580368389","","","","","2013-07-05 07:07:34 +0000","twicca","目医者さんきたら、「君もっと本読んで映画やテレビ見たほうがいいねぇ」といわれました。がんばります。"
22 | "353005540328210432","","","","","2013-07-05 04:20:53 +0000","foursquare","天国 (@ 会津図書館) http://t.co/OP6f8v2nJJ","http://4sq.com/1cXkbCp"
23 | "352946046399287298","","","","","2013-07-05 00:24:29 +0000","さらそば岬","それはプレゼンでも同じだからね!?(悲痛)"
24 | "352945470038999041","","","","","2013-07-05 00:22:12 +0000","Buffer","これはウェブページです。 http://t.co/vYr7Fg9RFN デザインは問題解決だというけれど、それだけで片付けちゃいけませんですね。","http://buff.ly/12genxL"
25 | "352766767703146497","","","","","2013-07-04 12:32:06 +0000","さらそば岬","万華鏡写輪眼がいい。"
26 | "352766296577933312","","","","","2013-07-04 12:30:13 +0000","さらそば岬","会津若松市内の眼科、一番つよそうなのどこだろう"
27 | "352763936480825345","","","","","2013-07-04 12:20:51 +0000","Buffer","リーダーシップとかチームマネジメントなんかより大切なもの | ヨーグルト大学情報学部現代メディアコンテンツ科 http://t.co/pgmlIp6zy3 けっこう本音","http://buff.ly/12fif24"
28 | "352690683368308736","352669417592729600","176721126","","","2013-07-04 07:29:46 +0000","web","@labyrinth17 @Takabatake_Y @motoki_uzu 21時からでいいんじゃねー"
29 | "352591251905839106","","","","","2013-07-04 00:54:39 +0000","さらそば岬","真夏のゆゆ式(福山雅治でません)"
30 | "352370561843859456","","","","","2013-07-03 10:17:43 +0000","Buffer","逆求人イベントに参加してみたら想像以上に良かった | ヨーグルト大学情報学部現代メディアコンテンツ科 http://t.co/oNfCqIo705 すばらっ","http://buff.ly/12dhFlp"
31 | "352355700837597184","","","","","2013-07-03 09:18:40 +0000","さらそば岬","コーヒー飲まないと頭痛くなる。"
32 | "352000457234841600","","","","","2013-07-02 09:47:03 +0000","さらそば岬","グリーンスムージーはじめたい"
33 | "351960631798284288","","","","","2013-07-02 07:08:48 +0000","IFTTT","意図せず運動不足型からやせ型に進化してて地味に嬉しい。 http://t.co/wgldgoQYct","http://twitter.com/takuti/status/351960631798284288/photo/1"
34 | "351454919968043010","","","","","2013-06-30 21:39:17 +0000","Tweetbot for iOS","ねむすぎぽよよん"
35 | "351336461754314752","","","","","2013-06-30 13:48:34 +0000","twicca","さすがに5,6月は疲れましたわ(ヽ´ω`)"
36 | "351327010536173568","","","","","2013-06-30 13:11:01 +0000","IFTTT","緊急事態!!! http://t.co/HbPKTv8rx3","http://twitter.com/takuti/status/351327010536173568/photo/1"
37 | "351313462397374464","","","350604321408315392","203143855","2013-06-30 12:17:11 +0000","twicca","RT @makkuro_megane: パンチングマシーンやった http://t.co/HmjZkUbbnB","http://twitter.com/makkuro_megane/status/350604321408315392/photo/1"
38 | "351288546776195072","","","","","2013-06-30 10:38:10 +0000","foursquare","I'm at ジュンク堂書店 池袋本店 (豊島区, 東京都) w/ 5 others http://t.co/q0jbr8N4Xq","http://4sq.com/15U7ep7"
39 | "351266013096050689","","","","","2013-06-30 09:08:38 +0000","IFTTT","ふむ http://t.co/zlKqYNqKxd","http://twitter.com/takuti/status/351266013096050689/photo/1"
40 | "351201819164295168","","","","","2013-06-30 04:53:33 +0000","foursquare","I'm at 神田駅 (Kanda Sta.) (千代田区, 東京都) w/ 2 others http://t.co/aZPUv9VcqX","http://4sq.com/1aqKC6B"
41 | "351162429507530752","","","","","2013-06-30 02:17:02 +0000","foursquare","I'm at ゴーゴーカレー 六本木スタジアム - @gogo_curry (港区, 東京都) http://t.co/PKk2qMwQ5o","http://4sq.com/14i1J5G"
42 | "351132626591031296","","","","","2013-06-30 00:18:36 +0000","foursquare","I'm at 六本木駅 (Roppongi Sta.) (港区, 東京都) http://t.co/leqjs94iuz","http://4sq.com/18nvw0Z"
43 | "351124319914577920","","","","","2013-06-29 23:45:36 +0000","foursquare","I'm at 森下駅 (Morishita Sta.) (江東区, 東京都) http://t.co/fSGyr3V2up","http://4sq.com/15SMqhI"
44 | "350960716343308288","","","","","2013-06-29 12:55:29 +0000","foursquare","I'm at ナンクルナイサ きばいやんせー 森下店 http://t.co/9TxCzL1P77","http://4sq.com/13dQRJe"
45 | "350868902324076544","","","","","2013-06-29 06:50:39 +0000","さらそば岬","「""国の""セキュリティ人材育成合宿」"
46 | "350868837719212032","","","","","2013-06-29 06:50:24 +0000","さらそば岬","セプキャンを知らない人に簡潔に説明する時みなさんどうしてますか"
47 | "350630725743157248","350630553269190659","19161647","","","2013-06-28 15:04:14 +0000","web","@grapswiz おはよう^ー^"
48 | "350630433039466496","","","","","2013-06-28 15:03:04 +0000","web","寝るね;;"
49 | "350630344669671425","","","","","2013-06-28 15:02:43 +0000","web","やべぇプリンターあんじゃん最強!!!!!と思って調子乗って6枚くらい印刷したら、紙1枚10円ですって言われた。"
50 | "350630081653256193","350629605612335105","225819818","","","2013-06-28 15:01:40 +0000","web","@century_sho Wさんと行きました!間違いないですね!!"
51 | "350630001416216576","350629495847391236","273291712","","","2013-06-28 15:01:21 +0000","web","@kamoti_Lm 会津若松市民になったばかりなのに・・・。"
52 | "350629730023768065","","","","","2013-06-28 15:00:16 +0000","さらそば岬","ただ、ラウンジで周りにいる人3人くらいみんな履歴書書いてて静かな戦いの幕開けを感じさせる。"
53 | "350628434029985792","","","","","2013-06-28 14:55:07 +0000","web","ここのカプセルホテル飲食物持ち込み可だし全館無線LAN完備だしプリンターあるし最強な感じする。"
54 | "350623677991239682","","","","","2013-06-28 14:36:13 +0000","さらそば岬","焼き鳥おいしかった。カプセルホテル童貞捨てた。"
55 | "350576722502103041","","","","","2013-06-28 11:29:38 +0000","foursquare","I'm at 四文屋 秋葉原店 (千代田区, 東京都) http://t.co/CJUibHGQ7J","http://4sq.com/1520bO2"
56 | "350571554939228160","","","","","2013-06-28 11:09:06 +0000","IFTTT","やば http://t.co/o540Adrznz","http://twitter.com/takuti/status/350571554939228160/photo/1"
57 | "350562074855866370","","","","","2013-06-28 10:31:26 +0000","twicca","ここで携帯の電池切れてる人探すってどんなクエストだよ…"
58 | "350561806382673920","350561096492531712","19161647","","","2013-06-28 10:30:22 +0000","twicca","@grapswiz"
59 | "350561073449009152","","","","","2013-06-28 10:27:27 +0000","foursquare","I'm at 秋葉原駅 電気街口 (千代田区, 東京都) w/ 10 others http://t.co/1K85a5tVTl","http://4sq.com/111vA44"
60 | "350557945383223296","","","","","2013-06-28 10:15:01 +0000","twicca","セプキャンのアンケート、ファイル名文字化けから推察するにかなり高度な問題だと考えてまだ触れていない。"
61 | "350557247237144577","","","","","2013-06-28 10:12:15 +0000","twicca","リマ…"
62 | "350555787027947522","","558569844","","","2013-06-28 10:06:27 +0000","twicca","@lis2501 @xemdmx そそそそんなにさきなんですね"
63 | "350555637018660866","","","","","2013-06-28 10:05:51 +0000","twicca","駅のホームで僕がフリスクをぶちまける事案が発生(2粒くらい)"
64 | "350554217909460993","","","","","2013-06-28 10:00:13 +0000","foursquare","へい (@ 池袋駅 (Ikebukuro Sta.) w/ 39 others) http://t.co/9TO6KLoDvD","http://4sq.com/1aTnAE0"
65 | "350487510243213312","","","","","2013-06-28 05:35:08 +0000","twicca","かわりゆくまちなみ"
66 | "350487425518288896","350487194659598338","173464519","","","2013-06-28 05:34:48 +0000","twicca","@xemdmx わわわわかりません…"
67 | "350487087100854273","","","","","2013-06-28 05:33:27 +0000","twicca","駅前にステーキ宮できてるしらなかった…"
68 | "350485692540923904","","","","","2013-06-28 05:27:55 +0000","foursquare","地獄 (@ 若松駅前 バスターミナル) http://t.co/1h7ASGXzm8","http://4sq.com/1aSN58v"
69 | "350473098589503490","","","","","2013-06-28 04:37:52 +0000","さらそば岬","????????"
70 | "350326599167639552","350224600015192064","1189412966","","","2013-06-27 18:55:44 +0000","web","@chinosque 北海道を閉じ込めたらしい。"
71 | "350324420289634304","","","","","2013-06-27 18:47:05 +0000","さらそば岬","マジかよ・・・・"
72 | "350224206627233792","","","","","2013-06-27 12:08:52 +0000","IFTTT","わけわかんねぇ買い物しちまった…(罪悪感 http://t.co/MX3nNP53tF","http://twitter.com/takuti/status/350224206627233792/photo/1"
73 | "350222977830359041","350222581502197760","1261308476","","","2013-06-27 12:03:59 +0000","web","@alpaca_p 死罪!!!"
74 | "350221904621215747","","","","","2013-06-27 11:59:43 +0000","web","TENPURA"
75 | "350221745485123584","350221659896168448","1468580610","","","2013-06-27 11:59:05 +0000","web","@araco_uu 天使"
76 | "350221627436441600","","","","","2013-06-27 11:58:37 +0000","web","天"
77 | "350221559186722817","","","","","2013-06-27 11:58:21 +0000","web","天ぷら"
78 | "350221543307096064","","","","","2013-06-27 11:58:17 +0000","web","天ぷら食べたい"
79 | "350217999065690113","","","","","2013-06-27 11:44:12 +0000","web","みたらし団子のおいしさに震えて会いたい"
80 | "350217903397814275","350124523422629888","262247094","","","2013-06-27 11:43:49 +0000","web","@kyohon123 あつめてない・・・"
81 | "350118392532983808","","","","","2013-06-27 05:08:24 +0000","IFTTT","銀なら五枚 http://t.co/M6OWonII4r","http://twitter.com/takuti/status/350118392532983808/photo/1"
82 | "350118382424715264","","","","","2013-06-27 05:08:22 +0000","IFTTT","金なら一枚 http://t.co/u2i07GhEP7","http://twitter.com/takuti/status/350118382424715264/photo/1"
83 | "349800985595621376","","","","","2013-06-26 08:07:08 +0000","さらそば岬","ヨーグルト"
84 | "349798558058287104","","","","","2013-06-26 07:57:29 +0000","さらそば岬","いろいろやりすぎて自己PRで話の軸を持たせるのがツラいんだけどみんなどうやってんの"
85 | "349793348485251072","","","","","2013-06-26 07:36:47 +0000","さらそば岬","セブンの唐揚げ棒食べなきゃだめだめ病が"
86 | "349519476376612865","","","","","2013-06-25 13:28:31 +0000","Tweetbot for iOS","大学の汚点それすなわち自虐ネタ"
87 | "349490415642230786","","","","","2013-06-25 11:33:02 +0000","さらそば岬","もずく とても うまい"
88 | "349461715156602881","","","","","2013-06-25 09:39:00 +0000","web","ぐっちょぐちょで恥ずかしすぎる汚点だよね。でも僕そういうとこも結構好きだよ。"
89 | "349461387963154432","","","349431592978104321","176721126","2013-06-25 09:37:42 +0000","web","RT @labyrinth17: 正直こういう話題Twitterでぐちぐちやるの、会津大学の汚点だからやめてくれませんかね、話があるなら直接どうぞ。敵対的な意味でもなく、疑問や提案に対してこちらで考慮したり回答する準備はございますので。"
90 | "349015498018668545","","","","","2013-06-24 04:05:53 +0000","さらそば岬","「時間をたくさん費やした=頑張った」みたいな勘違いはやめましょう。"
91 | "348830364149624832","","","","","2013-06-23 15:50:14 +0000","Tweetbot for iOS","頭弱そうだよね"
92 | "348819209381351426","","","","","2013-06-23 15:05:55 +0000","IFTTT","昨日の深夜ラーメン速報です(忘れてた) http://t.co/CuB2tkMcoA","http://twitter.com/takuti/status/348819209381351426/photo/1"
93 | "348796982489387008","","","","","2013-06-23 13:37:35 +0000","twicca","目覚めたら新幹線が停車してて窓の外には郡山の文字が見えるこのスーパーエキサイティングな感じ。"
94 | "348795605696536576","","","","","2013-06-23 13:32:07 +0000","foursquare","新幹線乗り過ごし寸前を体感した。 (@ 郡山駅 (Koriyama Sta.) w/ 3 others) http://t.co/DlOI8UBTwr","http://4sq.com/171I5f9"
95 | "348784512458706944","","","","","2013-06-23 12:48:02 +0000","twicca","やまびこ快適すぎ"
96 | "348783733337387008","","","","","2013-06-23 12:44:56 +0000","foursquare","I'm at 仙台駅 (Sendai Sta.) (仙台市青葉区, 宮城県) w/ 11 others http://t.co/yOvwHg8SDm","http://4sq.com/10d47tt"
97 | "348752265240248321","","","","","2013-06-23 10:39:54 +0000","foursquare","I'm at SK7 Bistro & Bar (仙台市宮城野区, 宮城県) http://t.co/z4BKKswZoz","http://4sq.com/10cQ3QK"
98 | "348483543963418625","348335275891630080","176721126","","","2013-06-22 16:52:06 +0000","twicca","@labyrinth17 @Takabatake_Y @motoki_uzu ごめん次の土日は東京だ…"
99 | "348065303860768768","","","","","2013-06-21 13:10:09 +0000","IFTTT","たんたん http://t.co/d1vEpj8BP2","http://twitter.com/takuti/status/348065303860768768/photo/1"
100 | "348065283191234560","","","","","2013-06-21 13:10:05 +0000","IFTTT","牛たん http://t.co/JET5pKcLm5","http://twitter.com/takuti/status/348065283191234560/photo/1"
101 |
--------------------------------------------------------------------------------
/lib/post_tweet.rb:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | require 'twitter'
4 | require_relative 'twitter_bot/tweet_generator'
5 |
6 | generator = TwitterBot::TweetGenerator.new
7 |
8 | case ARGV.first
9 | when 'dry-run'
10 | puts "[tweet] #{generator.generate}"
11 | else
12 | rest = Twitter::REST::Client.new do |config|
13 | config.consumer_key = ENV['CONSUMER_KEY']
14 | config.consumer_secret = ENV['CONSUMER_SECRET']
15 | config.access_token = ENV['OAUTH_TOKEN']
16 | config.access_token_secret = ENV['OAUTH_TOKEN_SECRET']
17 | end
18 | rest.update(generator.generate)
19 | end
20 |
--------------------------------------------------------------------------------
/lib/reply_daemon.rb:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | require 'tweetstream'
4 | require 'twitter'
5 | require_relative 'twitter_bot/tweet_generator'
6 |
7 | # Since streaming feature of Twitter gem is still experimental,
8 | # TweetStream gem will be used to track tweets on stream
9 | TweetStream.configure do |config|
10 | config.consumer_key = ENV['CONSUMER_KEY']
11 | config.consumer_secret = ENV['CONSUMER_SECRET']
12 | config.oauth_token = ENV['OAUTH_TOKEN']
13 | config.oauth_token_secret = ENV['OAUTH_TOKEN_SECRET']
14 | config.auth_method = :oauth
15 | end
16 |
17 | rest = Twitter::REST::Client.new do |config|
18 | config.consumer_key = ENV['CONSUMER_KEY']
19 | config.consumer_secret = ENV['CONSUMER_SECRET']
20 | config.access_token = ENV['OAUTH_TOKEN']
21 | config.access_token_secret = ENV['OAUTH_TOKEN_SECRET']
22 | end
23 |
24 | generator = TwitterBot::TweetGenerator.new
25 |
26 | # non-daemonized tracking:
27 | # TweetStream::Client.new.track(ENV['SCREEN_NAME']) do |status|
28 |
29 | TweetStream::Daemon.new('tracker').track(ENV['SCREEN_NAME']) do |status|
30 | t = "@#{status.user.screen_name} #{generator.generate}"
31 | rest.update(t, in_reply_to_status_id: status.id)
32 | end
33 |
--------------------------------------------------------------------------------
/lib/twitter_bot/tweet.rb:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | require 'nkf'
4 |
5 | module TwitterBot
6 | class Tweet
7 | attr_accessor :text
8 |
9 | def initialize(text)
10 | @text = text.to_s # 数字だけのツイートでunpack('U*')がエラーを吐くので全てtoString
11 | @text = nil if NKF.guess(@text) != NKF::UTF8
12 | end
13 |
14 | def normalize!
15 | @text.gsub!(/[^\u{0}-\u{FFFF}]/, '?') # 絵文字は ? に置換
16 | @text.gsub!(/\.?\s*@[0-9A-Za-z_]+/, '') # リプライをすべて削除
17 | @text.gsub!(/(RT|QT)\s*@?[0-9A-Za-z_]+.*$/, '') # RT/QT以降行末まで削除
18 | @text.gsub!(/http:\/\/\S+/, '') # URLを削除 スペースが入るまで消える
19 | @text.gsub!(/#[0-9A-Za-z_]+/, '') # ハッシュタグを削除
20 | end
21 | end
22 | end
23 |
--------------------------------------------------------------------------------
/lib/twitter_bot/tweet_generator.rb:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | require 'kusari'
4 | require 'open-uri'
5 | require 'json'
6 | require 'csv'
7 |
8 | require_relative 'tweet'
9 |
10 | module TwitterBot
11 | class TweetGenerator
12 | def initialize
13 | @root = File.expand_path('../../../', __FILE__)
14 | @generator = Kusari::Generator.new(3, "#{@root}/ipadic")
15 | create_markov if @generator.load("#{@root}/tweet.markov") == false
16 | end
17 |
18 | def generate
19 | t = @generator.generate(140)
20 | t.length > 70 ? get_kaomoji : t # avoid very long random sentence
21 | end
22 |
23 | private
24 |
25 | def create_markov
26 | CSV.foreach("#{@root}/data/tweets/tweets.csv", :headers => true) do |row|
27 | t = Tweet.new(row['text'])
28 | next if t.text.nil?
29 | t.normalize!
30 | @generator.add_string(t.text)
31 | end
32 | @generator.save("#{@root}/tweet.markov")
33 | end
34 |
35 | def get_kaomoji
36 | begin
37 | open('http://kaomoji.n-at.me/random.json') { |f| JSON.load(f)['record']['text'] }
38 | rescue
39 | @generator.generate(70) # alternative short tweet
40 | end
41 | end
42 | end
43 | end
44 |
--------------------------------------------------------------------------------
/scripts/dict.rb:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | require 'moji'
4 | require 'igo-ruby'
5 | require 'csv'
6 |
7 | module CustomDictionary
8 |
9 | class Writer
10 | attr_accessor :count
11 |
12 | def initialize(filename)
13 | # 元々のMeCab辞書のエンコーディングであるEUCに合わせる
14 | @dictionary_file = CSV.open(filename, 'w', encoding: 'utf-8:euc-jp')
15 | @count = 0
16 | end
17 |
18 | def write(word, furigana='*')
19 | @dictionary_file << [word, 0, 0, cost(word), '名詞', '一般', '*', '*', '*', '*', word, furigana, furigana]
20 | @count += 1
21 | end
22 |
23 | def close
24 | @dictionary_file.close
25 | end
26 |
27 | private
28 |
29 | def cost(word)
30 | [-32768, 6000 - 200 * (word.length**1.3)].max.to_i
31 | end
32 | end
33 |
34 | class << self
35 | def hatena(tagger, writer)
36 | File.open('keywordlist_furigana.csv', encoding: 'euc-jp:utf-8', undef: :replace) do |f|
37 | CSV.new(f, :col_sep => "\t").each do |row|
38 | word = row[1]
39 |
40 | next if /^[0-9]{1,4}-[0-9]{2}-[0-9]{2}$/ =~ word # 2009-09-04 のような年月は飛ばす
41 | next if /^[0-9]{1,4}年$/ =~ word # 1945年 のような年は飛ばす
42 | next if word.include?(',') # 単語そのものにカンマが含まれるものは飛ばす(応急処置)
43 | next if tagger.wakati(word).size == 1 # すでに1単語として認識されるものは飛ばす
44 |
45 | furigana = row[0] ? Moji.hira_to_kata(row[0]) : String.new
46 | writer.write(word, furigana)
47 | end
48 | end
49 | end
50 |
51 | def wikipedia(tagger, writer)
52 | File.open('jawiki-latest-all-titles-in-ns0', encoding: 'euc-jp:utf-8', undef: :replace) do |f|
53 | CSV.new(f, :col_sep => "\t").each do |row|
54 | word = row[0]
55 |
56 | next if /^[0-9]{2}月[0-9]{2}日$/ =~ word # 09月04日 のような月日は飛ばす
57 | next if /^[0-9\*\-]{1,4}年.*$/ =~ word # 年から始まるものに使えそうなものはほぼ無い
58 | next if word.include?('_') # 単語そのものにアンダーバーが含まれるものはスペースの入ったタイトルで複雑なので飛ばす(応急処置)
59 | next if word.include?(',') # 単語そのものにカンマが含まれるものは飛ばす(応急処置)
60 | next if tagger.wakati(word).size == 1 # すでに1単語として認識されるものは飛ば
61 |
62 | writer.write(word)
63 | end
64 | end
65 | end
66 | end
67 |
68 | end
69 |
70 |
71 | if !['hatena', 'wikipedia'].include?(ARGV.first)
72 | abort('`hatena` or `wikipedia` must be specified')
73 | end
74 |
75 | tagger = Igo::Tagger.new('../ipadic')
76 | writer = CustomDictionary::Writer.new("#{ARGV.first}.csv")
77 |
78 | case ARGV.first
79 | when 'hatena'
80 | CustomDictionary::hatena(tagger, writer)
81 | when 'wikipedia'
82 | CustomDictionary::wikipedia(tagger, writer)
83 | end
84 |
85 | writer.close
86 |
87 | puts "Wrote #{writer.count} words"
88 |
--------------------------------------------------------------------------------