├── data └── .gitkeep ├── .gitignore ├── static ├── key.jpg ├── followee.jpg ├── verified.jpg ├── post.js ├── get.js ├── style.css └── fbutton.css ├── requirements.txt ├── templates ├── index.html ├── layout.html └── table.html ├── app.py ├── utils.py ├── README.md └── data_manager.py /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .idea/ 3 | data/** 4 | !data/.gitkeep 5 | -------------------------------------------------------------------------------- /static/key.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaaaaanquish/twitter_manager/HEAD/static/key.jpg -------------------------------------------------------------------------------- /static/followee.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaaaaanquish/twitter_manager/HEAD/static/followee.jpg -------------------------------------------------------------------------------- /static/verified.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaaaaanquish/twitter_manager/HEAD/static/verified.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tweepy<4.0.0 2 | tqdm 3 | pandas 4 | feather-format 5 | pyarrow 6 | dash 7 | dash-daq 8 | dash-renderer 9 | dash-html-components 10 | dash-core-components 11 | plotly -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |
5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 17 | 18 |
19 |
20 |
21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /static/post.js: -------------------------------------------------------------------------------- 1 | $('.lsubmit').on('click', function(e){ 2 | var j = $(this).parent('div').find("input[name='uls']").map( 3 | function () { 4 | return JSON.stringify({"key":$(this).val(), 'checked': $(this).prop('checked')}); 5 | } 6 | ).get(); 7 | var jData = JSON.stringify({'id':$(this).parent('div').attr('id'),'ul':j}); 8 | $.ajax({ 9 | url: $(this).parent('div').attr('action'), 10 | type: 'post', 11 | data: jData, 12 | dataType: 'json', 13 | contentType: 'application/json', 14 | dataType: 'text', 15 | }).done(function(data, textStatus, jqXHR) { 16 | toastr.info(jqXHR.responseText); 17 | }).fail(function(jqXHR, textStatus, errorThrown){ 18 | toastr.warning('error status:'+jqXHR.status); 19 | }); 20 | }); 21 | 22 | 23 | 24 | $('.swInput').on('click', function(e){ 25 | var jData = JSON.stringify( 26 | {'id':$(this).parent('div').find('input').attr('id'), 27 | 'follow':$(this).parent('div').find('input').prop('checked')}); 28 | $.ajax({ 29 | url: '/follow', 30 | type: 'post', 31 | data: jData, 32 | dataType: 'json', 33 | contentType: 'application/json', 34 | dataType: 'text', 35 | }).done(function(data, textStatus, jqXHR) { 36 | toastr.info(jqXHR.responseText); 37 | }).fail(function(jqXHR, textStatus, errorThrown){ 38 | toastr.warning('error status:'+jqXHR.status); 39 | }); 40 | }); 41 | 42 | -------------------------------------------------------------------------------- /templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | FollowTools 9 | 10 | 11 | {% block content %} 12 | {% endblock %} 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /static/get.js: -------------------------------------------------------------------------------- 1 | $('.qsubmit').on('click', function(e){ 2 | var jData = { 3 | 'query':$(this).parent('div').find('.nq').val(), 4 | 'sample': $(this).parent('div').find('.samq').val(), 5 | 'tocreated': $(this).parent('div').find('.tcrq').val(), 6 | 'tolasttw': $(this).parent('div').find('.tltq').val(), 7 | 'fromcreated': $(this).parent('div').find('.fcrq').val(), 8 | 'fromlasttw': $(this).parent('div').find('.fltq').val(), 9 | 'sort': $(this).parent('div').find('.sorq').val(), 10 | 'ascend': $(this).parent('div').find('.asq').val(), 11 | }; 12 | $.ajax({ 13 | url: '/get_table', 14 | type: 'get', 15 | data: jData, 16 | dataType: 'html', 17 | }).done(function(data, textStatus, jqXHR) { 18 | $('#table').html(data); 19 | }).fail(function(jqXHR, textStatus, errorThrown){ 20 | toastr.warning('error status:'+jqXHR.status); 21 | }); 22 | }); 23 | 24 | 25 | $('.qmake').on('click', function(e){ 26 | var jData = 27 | '?nq=' + $(this).parent('div').find('.nq').val() + 28 | '&tcrq='+ $(this).parent('div').find('.tcrq').val() + 29 | '&tltq='+ $(this).parent('div').find('.tltq').val() + 30 | '&fcrq='+ $(this).parent('div').find('.fcrq').val() + 31 | '&fltq='+ $(this).parent('div').find('.fltq').val() + 32 | '&sorq='+ $(this).parent('div').find('.sorq').val() + 33 | '&asq='+ $(this).parent('div').find('.asq').val() + 34 | '&samq='+ $(this).parent('div').find('.samq').val(); 35 | $(this).parent('div').append('

http://127.0.0.1:5000'+jData+'

'); 36 | }); 37 | 38 | -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | /* main */ 2 | body,html { 3 | border: 0px; 4 | } 5 | 6 | .main-container tr { 7 | border: dashed; 8 | width: 60% 9 | } 10 | 11 | .main-container { 12 | margin-right: auto; 13 | margin-left: auto; 14 | display: table; 15 | } 16 | 17 | /* query form */ 18 | .query-form{ 19 | width:60%; 20 | margin-top: 50px; 21 | margin-bottom: 50px; 22 | margin-right: auto; 23 | margin-left: auto; 24 | display: table; 25 | } 26 | .querys{ 27 | width: 84%; 28 | } 29 | .qlabel{ 30 | width:15%; 31 | margin-right: 1%; 32 | text-align: right; 33 | } 34 | .query-form button{ 35 | margin:10px; 36 | margin-right: auto; 37 | margin-left: auto; 38 | display: table; 39 | } 40 | 41 | 42 | /* table */ 43 | .main-table{ 44 | width:60%; 45 | margin-right: auto; 46 | margin-left: auto; 47 | } 48 | 49 | /* list form*/ 50 | .ulist{ 51 | width: 150px; 52 | font-size: 11px; 53 | display: grid; 54 | padding: .30rem; 55 | } 56 | .ulist label{ 57 | margin: 0px; 58 | } 59 | .ulist button{ 60 | margin-top: 5px; 61 | display: inline-block; 62 | padding: 0.3em 1em; 63 | text-decoration: none; 64 | color: #021901; 65 | border: solid 2px #021901; 66 | border-radius: 3px; 67 | transition: .4s; 68 | } 69 | 70 | .ulist button:hover { 71 | background: #021901; 72 | color: white; 73 | } 74 | 75 | /* prof images */ 76 | .uimages { 77 | border: 0px; 78 | padding-right: 0px; 79 | } 80 | 81 | .images{ 82 | display: inline-flex; 83 | padding: .30rem; 84 | } 85 | 86 | .icon { 87 | height: 90px; 88 | width: 90px; 89 | } 90 | 91 | .banner { 92 | height: 90px; 93 | width: 180px; 94 | } 95 | .keyicon{ 96 | width: 35px; 97 | height: 35px; 98 | padding: 5px; 99 | } 100 | .fkey{ 101 | padding-bottom: 4px; 102 | padding-top: 7px; 103 | } 104 | .vericon{ 105 | width: 35px; 106 | height: 35px; 107 | padding: 5px; 108 | } 109 | 110 | /* prof Info*/ 111 | .uinfo tr{ 112 | font-size: 11px; 113 | border: 0px; 114 | } 115 | .uinfo td{ 116 | border: 0px; 117 | padding: 1px; 118 | word-break: break-all; 119 | } 120 | .uinfo{ 121 | padding: .30rem; 122 | width: 250px; 123 | display: inline-block; 124 | } 125 | .keys{ 126 | width:50px; 127 | text-align: right; 128 | } 129 | 130 | /* tweet */ 131 | .tweet{ 132 | font-size: 11px; 133 | width: 200px; 134 | } 135 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, request, Response 2 | from data_manager import DataManager 3 | from utils import get_users, reconv_list 4 | import json 5 | import os 6 | import argparse 7 | import sys 8 | app = Flask(__name__) 9 | 10 | # env 11 | config = {} 12 | try: 13 | config['CONSUMER_KEY'] = os.environ['CONSUMER_KEY'] 14 | config['CONSUMER_SECRET'] = os.environ['CONSUMER_SECRET'] 15 | config['ACCESS_TOKEN'] = os.environ['ACCESS_TOKEN'] 16 | config['ACCESS_TOKEN_SECRET'] = os.environ['ACCESS_TOKEN_SECRET'] 17 | my_id = os.environ['TWITTER_ACCOUNT'] 18 | except Exception as e: 19 | print(e) 20 | print('[[ Please set twitter token in environment variables !!]]') 21 | sys.exit() 22 | dm = DataManager(my_id, config) 23 | 24 | 25 | @app.route('/') 26 | def main(): 27 | r = request.args 28 | req = {} 29 | req['nq'] = r.get('nq', 'followed') 30 | req['fcrq'] = r.get('fcrq', 'yy-mm-dd') 31 | req['tcrq'] = r.get('tcrq', 'yy-mm-dd') 32 | req['fltq'] = r.get('fltq', 'yy-mm-dd') 33 | req['tltq'] = r.get('tltq', 'yy-mm-dd') 34 | req['sorq'] = r.get('sorq', 'follower_number') 35 | req['asq'] = r.get('asq', 'True') 36 | req['samq'] = r.get('samq', '100') 37 | return render_template("index.html", req=req) 38 | 39 | 40 | @app.route('/get_table') 41 | def get_table(): 42 | req = request.args 43 | users = get_users(dm, req) 44 | return render_template("table.html", users=users) 45 | 46 | 47 | @app.route("/list_submit", methods=['POST']) 48 | def list_update(): 49 | d = request.json 50 | uid = d['id'].split('_')[1] 51 | ul = [json.loads(x)['key'] for x in d['ul'] if json.loads(x)['checked']] 52 | try: 53 | dm.list_manage(uid, reconv_list(dm, ul)) 54 | response = Response('checked: ' + ','.join(ul), mimetype='text') 55 | response.status_code = 200 56 | return response 57 | except Exception as e: 58 | print(e) 59 | return Response(str(e), 500) 60 | 61 | 62 | @app.route("/follow", methods=['POST']) 63 | def follow(): 64 | d = request.json 65 | uid = d['id'].split('_')[1] 66 | try: 67 | if d['follow']: 68 | dm.unfollow(uid) 69 | t = 'unfollow' 70 | else: 71 | dm.follow(uid) 72 | t = 'follow' 73 | except Exception as e: 74 | print(e) 75 | return Response(str(e), 500) 76 | response = Response(f'success: {t}', mimetype='text') 77 | response.status_code = 200 78 | return response 79 | 80 | 81 | if __name__ == "__main__": 82 | try: 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument('--host', default=None, type=str) 85 | parser.add_argument('--port', default=None, type=str) 86 | parser.add_argument('--debug', default=True, type=bool) 87 | args = parser.parse_args() 88 | print(args) 89 | app.run(debug=args.debug, host=args.host, port=args.port) 90 | except KeyboardInterrupt: 91 | dm.save() 92 | except Exception: 93 | raise 94 | -------------------------------------------------------------------------------- /static/fbutton.css: -------------------------------------------------------------------------------- 1 | 80 | 81 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | date_reg = re.compile(r'(\d{4})-(\d{1,2})-(\d{1,2})') 3 | 4 | 5 | def convert_list(dm, l): 6 | return [{'key': x[0], 'flag': x[0] in l} for x in dm.user_list] 7 | 8 | 9 | def reconv_list(dm, l): 10 | return [y[1] for x in l for y in dm.user_list if y[0] == x] 11 | 12 | 13 | def split_tag(x): 14 | if x is None: 15 | return '' 16 | if type(x) != str: 17 | return str(x) 18 | return x.split('<')[1].split('>')[1] if '<' in x else x 19 | 20 | 21 | def convert_url(url): 22 | return url if url is None else url.replace('_normal', '') 23 | 24 | 25 | def get_url(row): 26 | if type(row['expanded_url_0']) != str: 27 | return '' 28 | if len(row['expanded_url_0']) > 70: 29 | return row['url'] 30 | return row['expanded_url_0'] 31 | 32 | 33 | def shortu(url): 34 | if len(url) > 33: 35 | return url[:32] + '…' 36 | return url 37 | 38 | 39 | def req2df(dm, req): 40 | df = dm.data_df 41 | query = req.get('query', '') 42 | if query: 43 | df = df.query(query) 44 | at = req.get('fromcreated', '') 45 | if at and date_reg.search(at): 46 | df = df[df['created_at'] >= at] 47 | at = req.get('tocreated', '') 48 | if at and date_reg.search(at): 49 | df = df[df['created_at'] <= at] 50 | at = req.get('fromlasttw', '') 51 | if at and date_reg.search(at): 52 | df = df[df['toptweet_created_at'] >= at] 53 | at = req.get('tolasttw', '') 54 | if at and date_reg.search(at): 55 | df = df[df['toptweet_created_at'] <= at] 56 | sort = req.get('sort', '') 57 | if sort and sort in df.columns: 58 | ascend = req.get('ascend', '').lower() == 'true' 59 | df = df.sort_values(sort, ascending=ascend) 60 | sample = req.get('sample', '') 61 | if sample: 62 | sample = int(sample) 63 | if len(df) >= sample: 64 | df = df.head(sample) 65 | return df 66 | 67 | 68 | def get_users(dm, req): 69 | print(req) 70 | df = req2df(dm, req) 71 | users = [] 72 | for _, row in df.iterrows(): 73 | users.append({ 74 | 'idstr': row['id_str'], 75 | 'ulist': convert_list(dm, row['joined_list']), 76 | 'images': { 77 | 'name': row['name'], 78 | 'sn': f'@{row["screen_name"]}', 79 | 'profurl': f'https://twitter.com/{row["screen_name"]}', 80 | 'banner': row['profile_banner_url'], 81 | 'icon': convert_url(row['profile_image_url']), 82 | 'protect': row['protected'], 83 | 'verified': row['verified'], 84 | 'following': row['following'], 85 | 'followed': row['followed'] 86 | }, 87 | 'info': { 88 | 'created': row["created_at"], 89 | 'lang': row["lang"], 90 | 'location': row["location"], 91 | 'url': get_url(row), 92 | 'urlt': shortu(get_url(row)), 93 | 'tweet': row["statuses_count"], 94 | 'follower': row["followers_count"], 95 | 'follow': row["friends_count"], 96 | 'fav': row["favourites_count"], 97 | 'inlist': row["listed_count"], 98 | 'bio': row['description'] 99 | }, 100 | 'tweet': { 101 | 'text': row['toptweet_text'], 102 | 'created': row['toptweet_created_at'], 103 | 'source': split_tag(row['toptweet_source']) 104 | } 105 | }) 106 | return users 107 | -------------------------------------------------------------------------------- /templates/table.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |
5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | {% for user in users %} 17 | 18 | 19 | 31 | 32 | 33 | 66 | 67 | 68 | 82 | 83 | 84 | 89 | 90 | {% endfor %} 91 | 92 |
ListImagesInfoTweet
20 |
21 | {% for u in user.ulist %} 22 | {% if u.flag %} 23 | 24 | {% else %} 25 | 26 | {% endif %} 27 | {% endfor %} 28 | 29 |
30 |
34 | 35 | 38 | 42 | 64 |
36 | {{ user.images.name }} {{ user.images.sn }} 37 |
39 | 40 | 41 |
43 |
44 | {% if user.images.following %} 45 | 46 | {% else %} 47 | 48 | {% endif %} 49 | 50 |
51 |
52 | 53 | {% if user.images.followed %} 54 | 55 | {% endif %} 56 | {% if user.images.protect %} 57 | 58 | {% endif %} 59 | {% if user.images.verified %} 60 | 61 | {% endif %} 62 | 63 |
65 |
69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 |
created:{{ user.info.created }}
lang:{{ user.info.lang }}
location:{{ user.info.location }}
url:{{ user.info.urlt }}
tweet:{{ user.info.tweet }}
follower:{{ user.info.follower }}
follow:{{ user.info.follow }}
fav:{{ user.info.fav }}
list:{{ user.info.inlist }}
bio:{{ user.info.bio }}
81 |
85 |

{{ user.tweet.text }}

86 |

{{ user.tweet.created }}

87 |

{{ user.tweet.source }}

88 |
93 |
94 |
95 | {% endblock %} 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | フォロワーを管理したりデータ見たりするやつ 4 | 5 | # なんかできること 6 | - APIを利用したデータの取得 7 | - jsonからpandas.DataFrameへの変換 8 | - Flaskによるフォロワー管理App 9 | 10 | # いるやつ 11 | 12 | - pyhton 3.x 13 | - Twitter API key 14 | - Twitter Consumer Key 15 | 16 | python 3.x環境に各パッケージをインストール 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | [Twitter Developer](https://developer.twitter.com/)よりTwitter API Key、Consumer Keyを取得 21 | アクセストークン情報と自身のTwitterIDを環境変数に 22 | ``` 23 | export ACCESS_TOKEN=100000-piyopiyo 24 | export ACCESS_TOKEN_SECRET=hogehoge 25 | export CONSUMER_KEY=fugafuga 26 | export CONSUMER_SECRET=hogerahogera 27 | export TWITTER_ACCOUNT=vaaaaanquish 28 | ``` 29 | 30 | # うごかす 31 | data_managerでAPIを利用しjsonを手元に落として、pandas化 32 | ゼロから10000アカウントでAPI規制で止まり止まり動作して半日くらい 33 | ``` 34 | python data_manager.py --follower_update --list_update --list_user_update --df_update --user_json_update=1 35 | ``` 36 | 37 | Excel等の他のアプリで読みたい場合はCSVファイルを出力 38 | ``` 39 | python data_manager.py --output_csv 40 | ``` 41 | 42 | Flask Appを立ててフォロワー管理 43 | ``` 44 | python app.py 45 | ``` 46 | 47 | リモートマシンやDockerコンテナ内で動かす時には、ホストIPアドレスを `0.0.0.0` にしておけばアクセスできる 48 | (デフォルトの`127.0.0.1`だと外部のクライアントからアクセスできない) 49 | ``` 50 | python app.py --host 0.0.0.0 51 | ``` 52 | 53 | # data manager args 54 | |args|description| 55 | |---|---| 56 | |--follower_update|フォロー,フォロワー情報をアップデート| 57 | |--user_json_update|0: userデータのアップデートはしない(default)| 58 | | |1: userデータをゼロベースで取得| 59 | | |2: 未取得の差分userのみ取得| 60 | |--df_update|全userデータをロードし直してdf作り直す| 61 | |--list_update|自分が作っているリスト情報を更新| 62 | |--list_user_update|各リスト内のユーザ情報を更新| 63 | 64 |   65 | 新しいフォロワーだけ更新したい時はこんな感じ 66 | ``` 67 | python data_manager.py --follower_update --list_user_update --df_update --user_json_update=2 68 | ``` 69 | 70 | # form 71 | |label|default|description| 72 | |---|---|---| 73 | |query|followed|pandas.query| 74 | |from|yy-mm-dd|from:アカウント作成日| 75 | |to|yy-mm-dd|to:アカウント作成日| 76 | |fromtw|yy-mm-dd|from:最新ツイートの日| 77 | |totw|yy-mm-dd|to:最新ツイート日| 78 | |sort|follower_number|ソートに利用するColumn名| 79 | |ascend|True|ソート逆順か| 80 | |sample|100|表示数| 81 | |submit| |form条件でtable表示| 82 | |gen url| |form情報をURLパラメータとして表示| 83 | |reset| |formリセット| 84 | 85 | # default column 86 | 87 | |column|discription|type|sample| 88 | |----|----|----|----| 89 | |contributors_enabled|tweet_deckで共有アカウント設定してるかどうか|bool|False| 90 | |created_at|アカウント作成日|datetime|Fri Jun 07 03:09:31 +0000 2013| 91 | |default_profile|デフォルトのプロフィールのままか|bool|True| 92 | |default_profile_image|デフォルトの画像のままか|bool|False| 93 | |description|bioに書いてある説明文|str|冷やし中華始めました| 94 | |description_expanded_url_0|description内のURL展開結果|str|http~| 95 | |description_expanded_url_1|description内のURL展開結果|str|http~| 96 | |description_expanded_url_2|description内のURL展開結果|str|http~| 97 | |description_expanded_url_3|description内のURL展開結果|str|http~| 98 | |description_expanded_url_num|description内のURL数|int|3| 99 | |expanded_url_0|bio内のURL展開結果|str|http~| 100 | |expanded_url_num|bio内のURL数|int|1| 101 | |favourites_count|お気に入りの数|int|28878| 102 | |follow_request_sent|フォローのリクエストを投げているか|bool|False| 103 | |followers_count|フォロワー数|int|683| 104 | |following|フォローしているか|bool|True| 105 | |followed|フォローされているか|bool|True| 106 | |friends_count|フォロー数|int|703| 107 | |get_date|データの取得日(API叩いた時間)|datetime|Mon Feb 03 04:34:50 +0000 2019| 108 | |id_str|ユニークID|str|231~| 109 | |lang|言語設定|str|ja| 110 | |listed_count|オープンなリストに入ってる数|int|14| 111 | |location|位置情報|str|日本 東京| 112 | |name|twitterアカウントの名前|str|ばんくし| 113 | |profile_banner_url|ヘッダーの画像|str|https://pbs.twimg.com~| 114 | |profile_image_url|プロフィールの画像|str|http://pbs.twimg.com~| 115 | |protected|鍵垢かどうかのbool|bool|False| 116 | |screen_name|twitterアカウントのSN|str|vaaaaanquish| 117 | |statuses_count|ツイートの数|int|53045| 118 | |time_zone|タイムゾーン|str|None or tokyo| 119 | |toptweet_created_at|topのツイートの投稿日|datetime|Mon Feb 03 04:34:50 +0000 2014| 120 | |toptweet_id|topのツイートのID|str|123~| 121 | |toptweet_lang|topのツイートの言語|str|ja| 122 | |toptweet_retweet_flag|topのツイートがリツイートかどうかのフラグ|bool|False| 123 | |toptweet_source|topのツイートの投稿アプリ|str|TweetDeck| 124 | |toptweet_text|topのツイートのテキスト|str|冷やし中華がうまい| 125 | |url|bioに書いてあるURL|str|http~| 126 | |verified|認証アカウントかのbool|bool|False| 127 | |description_length|bioの長さ|int|30| 128 | |diff_created_at|アカウント作成日から取得日までの日数|int|2000| 129 | |diff_toptweet_created_at|最新ツイートから取得日までの日数|int|2000| 130 | |sn_length|SNの長さ|int|12| 131 | |joined_list|入っているリスト|list|[hoge, piyo]| 132 | |follower_number|フォロワーになった順にならべた時のid|int|20| 133 | |followee_number|フォローした順にならべた時のid|int|20| 134 | 135 |   136 | Columnを追加する場合は `data_manager._add_data` 辺りをよしなに編集 137 | ``` 138 | python data_manager.py --df_update 139 | ``` 140 | 141 | # file tree 142 | - app.py: flaskでフォロワー管理アプリ建てるやつ 143 | - data_manager.py: フォロワー情報のダウンロード、書き込み、読み込み 144 | - utils.py: flaskに渡すテーブルとか作ったりするやつ 145 | - static 146 | - followee.jpg: フォローされてたら出るやつ 147 | - key.jpg: 鍵垢だったら出るやつ 148 | - verified.jpg: 認証アカウントだったら出るやつ 149 | - get.js: なんかget周りをAjaxしてくれる色々 150 | - post.js: 同上 151 | - style.css: 全体的ないろいろ 152 | - fbutton.css: なんかフォローボタンのやつ 153 | - templetes 154 |  - layout.html: ベースのやつ 155 | - index.html: Formとかtable読み込んだりとか 156 |  - table.html: フォロワーテーブルのとこ 157 | - ipynb: ちょっと分析してみるだけのjupyter notebook 158 | - README.md: お前が読んでるこれ 159 | - requirements.txt: pip 160 | 161 | 162 | # 将来的になんかできたらいいかも 163 | - バックエンドにSQLかもうちょっと簡易な何か 164 | - JSライブラリ何か使う 165 | - 統計情報も見れるようにする 166 | 167 | # ヒント 168 | [@vaaaaanquish](https://vaaaaanquish.jp/)に聞いたら多分わかる 169 | -------------------------------------------------------------------------------- /data_manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import os 3 | import tweepy 4 | import json 5 | from datetime import datetime 6 | from tqdm import tqdm 7 | from pathlib import Path 8 | import pandas as pd 9 | from collections import defaultdict 10 | import pickle 11 | import sys 12 | import argparse 13 | 14 | must_key = [ 15 | 'id_str', 'following', 'follow_request_sent', 'name', 'screen_name', 'location', 'time_zone', 'description', 'url', 'protected', 'created_at', 16 | 'followers_count', 'friends_count', 'listed_count', 'favourites_count', 'statuses_count', 'verified', 'lang', 'contributors_enabled', 'profile_image_url', 17 | 'profile_banner_url', 'default_profile_image', 'default_profile' 18 | ] 19 | must_json_key = ['entities', 'status'] 20 | 21 | 22 | class DataManager(): 23 | def __init__(self, my_id, config, follower_update=False, user_json_update=False, df_update=False, list_update=False, list_user_update=False): 24 | """ 25 | follower_update: フォロー情報をアップデート 26 | user_json_update: 27 | - 0: jsonデータのアップデートはしない 28 | - 1: ユーザ情報をゼロからアップデート 29 | - 2: json未取得の差分のみアップデート 30 | df_update: jsonファイルをロードし直してdf作り直すか 31 | list_update: 自分が作っているリストを更新するか 32 | list_user_update: リストに入っているユーザを更新するか 33 | """ 34 | # make dir 35 | self.data_path = './data' 36 | if not os.path.exists('./data'): 37 | os.mkdir('./data') 38 | self.tsv_path = 'tsv_files' 39 | if not os.path.exists(os.path.join(self.data_path, self.tsv_path)): 40 | os.mkdir(os.path.join('./data', self.tsv_path)) 41 | self.json_path = 'json_files' 42 | if not os.path.exists(os.path.join(self.data_path, self.json_path)): 43 | os.mkdir(os.path.join('./data', self.json_path)) 44 | self.my_id = my_id 45 | # tweepy API 46 | auth = tweepy.OAuthHandler(config['CONSUMER_KEY'], config['CONSUMER_SECRET']) 47 | auth.set_access_token(config['ACCESS_TOKEN'], config['ACCESS_TOKEN_SECRET']) 48 | self.api = tweepy.API(auth, wait_on_rate_limit=True) 49 | # follower, followee 50 | self.follower_update = follower_update 51 | self.follower, self.follower_num = [], {} 52 | self.followee, self.followee_num = [], {} 53 | self._get_follower() 54 | self._get_followee() 55 | # json getter 56 | self.user_json_update = user_json_update 57 | self._download_user_info(list(set(self.follower) | set(self.followee))) 58 | # user list 59 | self.list_update = list_update 60 | self.list_user_update = list_user_update 61 | self.user_list = [] 62 | self.list_dic = defaultdict(list) 63 | self._get_list() 64 | for x in self.user_list: 65 | self._get_list_users(x) 66 | # all json convert pandas 67 | self.df_update = df_update 68 | self.data_df = self._convert_pandas() 69 | self.user_list_dic = self._user_list() 70 | 71 | def _update_file(self, l, flag): 72 | for x in l: 73 | if os.path.exists(os.path.join(self.data_path, self.json_path, f'{x}.json')): 74 | with open(os.path.join(self.data_path, self.json_path, f'{x}.json'), 'r') as f: 75 | j = json.load(f) 76 | j['followering'] = flag 77 | with open(os.path.join(self.data_path, self.json_path, f'{x}.json'), 'w') as f: 78 | json.dump(j, f) 79 | 80 | def _get_follower(self): 81 | '''フォロワーのID取得''' 82 | if self.follower_update: 83 | print('_get_follower') 84 | tmp = list() 85 | if os.path.exists(os.path.join(self.data_path, self.tsv_path, 'follower.tsv')): 86 | with open(os.path.join(self.data_path, self.tsv_path, 'follower.tsv'), 'r') as f: 87 | tmp = set([x.strip() for x in f if x != '']) 88 | followers_ids = tweepy.Cursor(self.api.followers_ids, id=self.my_id, cursor=-1).items() 89 | for i, followers_id in enumerate(followers_ids): 90 | self.follower.append(str(followers_id)) 91 | self.follower_num.update({str(followers_id): str(i)}) 92 | # diff 93 | newfd = set(self.follower) - set(tmp) 94 | self._update_file(list(newfd), True) 95 | removed = set(tmp) - set(self.follower) 96 | self._update_file(list(removed), False) 97 | # save 98 | with open(os.path.join(self.data_path, self.tsv_path, 'follower.tsv'), 'w') as f: 99 | for x in self.follower: 100 | f.write(f'{x}\n') 101 | with open(os.path.join(self.data_path, self.tsv_path, 'follower_num.tsv'), 'w') as f: 102 | for k, v in self.follower_num.items(): 103 | f.write(f'{k}\t{v}\n') 104 | else: 105 | with open(os.path.join(self.data_path, self.tsv_path, 'follower.tsv'), 'r') as f: 106 | self.follower = [x.strip() for x in f if x != ''] 107 | with open(os.path.join(self.data_path, self.tsv_path, 'follower_num.tsv'), 'r') as f: 108 | self.follower_num = {x.strip().split('\t')[0]: x.strip().split('\t')[1] for x in f if x != ''} 109 | 110 | def _get_followee(self): 111 | '''フォロイーのID取得''' 112 | if self.follower_update: 113 | print('_get_followee') 114 | followee_ids = tweepy.Cursor(self.api.friends_ids, id=self.my_id, cursor=-1).items() 115 | for i, followee_id in enumerate(followee_ids): 116 | self.followee.append(str(followee_id)) 117 | self.followee_num.update({str(followee_id): str(i)}) 118 | with open(os.path.join(self.data_path, self.tsv_path, 'followee.tsv'), 'w') as f: 119 | for x in self.followee: 120 | f.write(f'{x}\n') 121 | with open(os.path.join(self.data_path, self.tsv_path, 'followee_num.tsv'), 'w') as f: 122 | for k, v in self.followee_num.items(): 123 | f.write(f'{k}\t{v}\n') 124 | else: 125 | with open(os.path.join(self.data_path, self.tsv_path, 'followee.tsv'), 'r') as f: 126 | self.followee = [x.strip() for x in f if x != ''] 127 | with open(os.path.join(self.data_path, self.tsv_path, 'followee_num.tsv'), 'r') as f: 128 | self.followee_num = {x.strip().split('\t')[0]: x.strip().split('\t')[1] for x in f if x != ''} 129 | 130 | def _download_user_info(self, l): 131 | '''ユーザ情報を取得しjsonにdumpしつつログを取る''' 132 | if int(self.user_json_update) > 0: 133 | print('_download_user_info') 134 | with open(os.path.join(self.data_path, 'twlog.txt'), 'a') as fl: 135 | for i, u in enumerate(tqdm(l)): 136 | if int(self.user_json_update) == 2: 137 | # non update 138 | if os.path.exists(os.path.join(self.data_path, self.json_path, f'{u}.json')): 139 | continue 140 | fl.write('[{}][{}]user:{}\n'.format(datetime.now(), i, u)) 141 | try: 142 | user = self.api.get_user(u) 143 | except tweepy.error.TweepError: 144 | # 凍結されたアカウント等 145 | continue 146 | with open(os.path.join(self.data_path, self.json_path, f'{u}.json'), 'w') as f: 147 | json.dump(user._json, f) 148 | 149 | def _get_twlog(self): 150 | '''_download_user_infoで生成されるログの雑parser''' 151 | twlog = {} 152 | with open(os.path.join(self.data_path, './twlog.txt'), 'r') as f: 153 | for row in f: 154 | x = row.strip().split(']') 155 | twlog[x[2].split(':')[1]] = x[0].replace('[', '') 156 | return twlog 157 | 158 | def _list_check(self, user_id): 159 | '''idが入っているListのリストを返す''' 160 | user_id = str(user_id) 161 | return [x[1] for x in self.user_list if user_id in self.list_dic[x[0]]] 162 | 163 | def _add_data(self, data_df): 164 | '''pandasに追加するデータ''' 165 | # descriptionの長さ 166 | data_df['description_length'] = data_df.description.apply(lambda x: len(x) if type(x) == str else 0) 167 | # アカウント作成から今日までの日数 168 | data_df['diff_created_at'] = data_df.created_at.apply(lambda x: (pd.Timestamp.utcnow() - x).days) 169 | # 最新ツイート投稿から今日までの日数 170 | data_df['diff_toptweet_created_at'] = data_df.toptweet_created_at.apply(lambda x: (pd.Timestamp.utcnow() - x).days) 171 | # SNの長さ 172 | data_df['sn_length'] = data_df.screen_name.apply(lambda x: len(x)) 173 | # followerかどうか(フォローしているかどうかはAPIが返してくれる) 174 | data_df['followed'] = data_df.id_str.apply(lambda x: x in self.follower) 175 | # 自分が入れたリスト 176 | data_df['joined_list'] = data_df.id_str.apply(self._list_check) 177 | # フォロー、フォロワーとなった順番 178 | data_df['follower_number'] = data_df.id_str.apply(lambda x: int(self.follower_num.get(x, 9999999))) 179 | data_df['followee_number'] = data_df.id_str.apply(lambda x: int(self.followee_num.get(x, 9999999))) 180 | return data_df 181 | 182 | def _convert_pandas(self): 183 | '''jsonを全てロードしてdfにしてdump''' 184 | if not self.df_update: 185 | with open(os.path.join(self.data_path, 'twdata.pkl'), 'rb') as f: 186 | df = pickle.load(f) 187 | return df 188 | print('_convert_pandas') 189 | twlog = self._get_twlog() 190 | all_data_list = [] 191 | for x in tqdm(Path(os.path.join(self.data_path, self.json_path)).glob('*.json')): 192 | j = json.load(x.open('r')) 193 | d = {} 194 | for k, v in j.items(): 195 | if k in must_key: 196 | d[k] = v 197 | elif k in must_json_key: 198 | if k == 'entities': 199 | # urlのURL展開結果を保存 200 | for i, url_data in enumerate(v.get('url', {}).get('urls', [])): 201 | for urlk, urlv in url_data.items(): 202 | if urlk == 'expanded_url': 203 | d['expanded_url_{}'.format(i)] = urlv 204 | # bio内のURL展開結果を保存 205 | for i, url_data in enumerate(v.get('description', {}).get('urls', [])): 206 | for urlk, urlv in url_data.items(): 207 | if urlk == 'expanded_url': 208 | d['description_expanded_url_{}'.format(i)] = urlv 209 | # 個数も保存 210 | d['expanded_url_num'] = len(v.get('url', {}).get('urls', [])) 211 | d['description_expanded_url_num'] = len(v.get('description', {}).get('urls', [])) 212 | elif k == 'status': 213 | # 最新ツイートの情報も保存() 214 | d['toptweet_created_at'] = v['created_at'] 215 | d['toptweet_id'] = v['id_str'] 216 | d['toptweet_text'] = v['text'] 217 | d['toptweet_created_at'] = v['created_at'] 218 | d['toptweet_source'] = v['source'] 219 | d['toptweet_lang'] = v['lang'] 220 | # 公式RTかどうか 221 | if 'retweeted_status' in v.keys(): 222 | d['toptweet_retweet_flag'] = True 223 | else: 224 | d['toptweet_retweet_flag'] = False 225 | # twlogに記録されたデータを取得した日時 226 | d['get_date'] = twlog[j['id_str']] 227 | all_data_list.append(d) 228 | # convert pandas 229 | data_df = pd.DataFrame(all_data_list) 230 | data_df['created_at'] = pd.to_datetime(data_df['created_at'], utc=True) 231 | data_df['get_date'] = pd.to_datetime(data_df['get_date'], utc=True) 232 | data_df['toptweet_created_at'] = pd.to_datetime(data_df['toptweet_created_at'], utc=True) 233 | for x in ['toptweet_id', 'id_str']: 234 | data_df[x] = data_df[x].astype('object') 235 | data_df = self._add_data(data_df) 236 | print('dump pkl...') 237 | with open(os.path.join(self.data_path, 'twdata.pkl'), 'wb') as f: 238 | pickle.dump(data_df, f) 239 | return data_df 240 | 241 | def _get_list(self): 242 | if self.list_update: 243 | print('_get_list') 244 | with open(os.path.join(self.data_path, self.tsv_path, 'user_list.tsv'), 'w') as f: 245 | for twilist in self.api.lists_all(screen_name=self.my_id): 246 | f.write(f'{twilist.name}\t{twilist.slug}\n') 247 | self.user_list.append([twilist.name, twilist.slug]) 248 | else: 249 | with open(os.path.join(self.data_path, self.tsv_path, 'user_list.tsv'), 'r') as f: 250 | for x in f: 251 | self.user_list.append(x.strip().split('\t')) 252 | 253 | def _get_list_users(self, slug): 254 | if self.list_user_update: 255 | print(f'_get_list_users:{slug[0]}') 256 | with open(os.path.join(self.data_path, self.tsv_path, f'list_{slug[1]}.tsv'), 'w') as f: 257 | for member in tweepy.Cursor(self.api.list_members, slug=slug[1], owner_screen_name=self.my_id).items(): 258 | self.list_dic[slug[1]].append(member.id_str) 259 | f.write(f'{member.id_str}\n') 260 | else: 261 | with open(os.path.join(self.data_path, self.tsv_path, f'list_{slug[1]}.tsv'), 'r') as f: 262 | for x in f: 263 | self.list_dic[slug[1]].append(x.strip()) 264 | 265 | def _user_list(self): 266 | dic = {} 267 | for row in self.data_df.itertuples(): 268 | tmp = [] 269 | for x in row.joined_list: 270 | for y in self.user_list: 271 | if x == y[1]: 272 | tmp.append(y[0]) 273 | dic[row.id_str] = tmp 274 | return dic 275 | 276 | def unfollow(self, x): 277 | x = str(x) 278 | self.api.destroy_friendship(x) 279 | self.data_df.loc[self.data_df['id_str'] == x, 'following'] = False 280 | self.save() 281 | 282 | def follow(self, x): 283 | x = str(x) 284 | self.api.create_friendship(x, True) 285 | self.data_df.loc[self.data_df['id_str'] == x, 'following'] = True 286 | self.save() 287 | 288 | def list_manage(self, uid, l): 289 | '''リストに追加削除''' 290 | uid = str(uid) 291 | slugs = set(self.user_list_dic[uid]) 292 | ls = set(l) 293 | # 追加 294 | for x in list(ls - slugs): 295 | self.api.add_list_member(user_id=uid, slug=x, owner_screen_name=self.my_id) 296 | self.user_list_dic[uid].append(x) 297 | # 削除 298 | for x in list(slugs - ls): 299 | self.api.remove_list_member(user_id=uid, slug=x, owner_screen_name=self.my_id) 300 | self.user_list_dic[uid].remove(x) 301 | self.save() 302 | 303 | def save(self): 304 | self.data_df['joined_list'] = self.data_df.id_str.apply(lambda x: self.user_list_dic[x]) 305 | with open(os.path.join(self.data_path, 'twdata.pkl'), 'wb') as f: 306 | pickle.dump(self.data_df, f) 307 | 308 | 309 | if __name__ == '__main__': 310 | # env 311 | config = {} 312 | try: 313 | config['CONSUMER_KEY'] = os.environ['CONSUMER_KEY'] 314 | config['CONSUMER_SECRET'] = os.environ['CONSUMER_SECRET'] 315 | config['ACCESS_TOKEN'] = os.environ['ACCESS_TOKEN'] 316 | config['ACCESS_TOKEN_SECRET'] = os.environ['ACCESS_TOKEN_SECRET'] 317 | my_id = os.environ['TWITTER_ACCOUNT'] 318 | except Exception as e: 319 | print(e) 320 | print('[[ Please set twitter token in environment variables !!]]') 321 | sys.exit() 322 | parser = argparse.ArgumentParser() 323 | parser.add_argument('--follower_update', action='store_true') 324 | parser.add_argument('--list_update', action='store_true') 325 | parser.add_argument('--list_user_update', action='store_true') 326 | parser.add_argument('--df_update', action='store_true') 327 | parser.add_argument('--user_json_update', default=0, type=int) 328 | parser.add_argument('--output_csv', action='store_true') 329 | args = parser.parse_args() 330 | print(args) 331 | print('data making') 332 | DM = DataManager(my_id, config, args.follower_update, args.user_json_update, args.df_update, args.list_update, args.list_user_update) 333 | if args.output_csv: 334 | with open(os.path.join(DM.data_path, 'twdata.pkl'), 'rb') as f: 335 | df = pickle.load(f) 336 | df.to_csv(os.path.join(DM.data_path, 'twdata.csv'), index=False) 337 | print('data managed') 338 | --------------------------------------------------------------------------------