├── .gitignore
├── LICENSE
├── README.md
├── conf.sample.ini
├── requirements.txt
├── run.py
└── utils
├── __init__.py
├── feed2toot.py
├── feed_parser.py
├── get_config.py
├── media_downloader.py
├── toot_poster.py
└── tweet_decoder.py
/.gitignore:
--------------------------------------------------------------------------------
1 | temp
2 | ffmpeg.exe
3 | conf.ini
4 | db.txt
5 | __pycache__
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Mashiro / tweet2toot contributors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # tweet2toot
2 |
3 | A simple script that transport tweets from twitter to Mastodon. Based on the Twitter RSS feed powered by [RSSHub](https://rsshub.app).
4 |
5 | ```
6 | pip3 install -r requirements.txt
7 | cp conf.sample.ini conf.ini
8 | nano conf.ini
9 | python3 run.py
10 | ```
11 |
12 | Please install FFmpeg if you need support for twitter's GIFs.
13 |
14 | ```
15 | sudo apt install ffmpeg
16 | ```
17 |
18 | crontab job setting:
19 | ```
20 | crontab -e
21 | ```
22 | or (Ubuntu 18.04)
23 | ```
24 | nano /etc/crontab
25 | /etc/init.d/cron restart
26 | ```
27 |
28 | Recommand do job hourly:
29 | ```
30 | #m h dom mon dow user command
31 | 13 * * * * root cd /tweet2toot && python3 run.py
32 | ```
33 |
34 | 一个将推特搬运到长毛象的脚本——基于[RSSHub](https://rsshub.app)生成的推特RSS。
35 |
36 | 推特的开发者账号很长时间没有申请到,所以只能暂时用RSSHub作为数据源了。😢
--------------------------------------------------------------------------------
/conf.sample.ini:
--------------------------------------------------------------------------------
1 | [PROXY]
2 | ProxyOn = false
3 | HttpProxy = http://127.0.0.1:7890
4 | HttpsProxy = https://127.0.0.1:7890
5 |
6 | [MASTODON]
7 | BaseUrl = https://hello.2heng.xin/
8 | # register your application here: https://hello.2heng.xin/settings/applications
9 | AccessToken = your_app_token
10 | # add your custom filter here, only accept one regular expression, see: https://docs.python.org/3/library/re.html
11 | Filter = None
12 | # 'direct' - post will be visible only to mentioned users
13 | # 'private' - post will be visible only to followers
14 | # 'unlisted' - post will be public but not appear on the public timeline
15 | # 'public' - post will be public
16 | TootVisibility = unlisted
17 | ShowSource = true
18 | # Icon/prefix shows before tweet's source link
19 | TweetSourcePrefix = :sys_twitter:
20 | # Also transport video?
21 | IncludeVideo = true
22 | # Below is necessary even you've filled in 'false' above
23 | VideoSourcePrefix = :sys_video:
24 |
25 | [TWITTER]
26 | TwitterRss = https://rss.zeka.cloud/twitter/user/ruanyf
27 | BackupRss = https://rsshub.app/twitter/user/ruanyf
28 | # you can also use a Nginx like reverse proxy
29 | ImageProxy = https://pbs.twimg.com/
30 | VideoProxy = https://video.twimg.com/
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.9.1
2 | Pillow==9.0.1
3 | ffmpy==0.2.3
4 | feedparser==5.2.1
5 | Mastodon.py==1.5.1
6 | filetype==1.0.7
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on May 29, 2020
5 | Desc: Twitter feed to toot (based on RSSHub's feed)
6 | Author: Mashiro
7 | URL: https://2heng.xin
8 | License: MIT
9 | """
10 | from utils.feed_parser import FeedParaser
11 | from utils.feed2toot import Feed2Toot
12 | from utils.get_config import GetConfig
13 | import os
14 |
15 | config = GetConfig()
16 |
17 | if __name__ == '__main__':
18 | if config['PROXY']['ProxyOn'] == 'true':
19 | os.environ['HTTP_PROXY'] = config['PROXY']['HttpProxy']
20 | os.environ['HTTPS_PROXY'] = config['PROXY']['HttpsProxy']
21 | rss_url = config['TWITTER']['TwitterRss']
22 | try:
23 | RSS_dict = FeedParaser(rss_url)
24 | except Exception:
25 | backup_rss = config['TWITTER'].get('BackupRss', None)
26 | print('WARN: source {} seems unavailable, switch to {}'.format(rss_url, backup_rss))
27 | if backup_rss:
28 | RSS_dict = FeedParaser(backup_rss)
29 | else:
30 | print('ERROR: no backup source found, exit')
31 | exit(-1)
32 | Feed2Toot(RSS_dict)
33 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mashirozx/tweet2toot/636e7133f611deac5a259b5949ae33f20391e03a/utils/__init__.py
--------------------------------------------------------------------------------
/utils/feed2toot.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on May 29, 2020
5 | Desc: feed to toot
6 | Author: Mashiro
7 | URL: https://2heng.xin
8 | License: MIT
9 | """
10 | from os import path, makedirs
11 | import re
12 | import shutil
13 | from .tweet_decoder import TweetDecoder
14 | from .media_downloader import MediaDownloader
15 | from .toot_poster import TootPoster
16 | from .get_config import GetConfig
17 |
18 | config = GetConfig()
19 |
20 |
21 | def TwitterFilter(feed_data):
22 | if config['TWITTER'] is None:
23 | return feed_data
24 | try:
25 | twitter_filter = config['TWITTER']['Filter']
26 | except KeyError:
27 | twitter_filter = None
28 | if (twitter_filter is None) or (twitter_filter == 'False') or (twitter_filter == 'None'):
29 | return feed_data
30 | pat = re.compile(twitter_filter)
31 | result = []
32 | for feed in feed_data:
33 | if pat.match(feed['summary']):
34 | result.append(feed)
35 | return result
36 |
37 |
38 | def Feed2Toot(feed_data):
39 | feed_data = TwitterFilter(feed_data)
40 | if path.exists('db.txt'):
41 | historyList = [line.rstrip('\n') for line in open('db.txt')]
42 | else:
43 | historyList = []
44 |
45 | for tweet in reversed(feed_data):
46 | if not path.exists('temp'):
47 | makedirs('temp')
48 |
49 | if tweet['id'] not in historyList:
50 | print('INFO: decode ' + tweet['id'])
51 | tweet_decoded = TweetDecoder(tweet)
52 | print('INFO: download ' + tweet['id'])
53 | try:
54 | toot_content = MediaDownloader(tweet_decoded)
55 | print('INFO: download succeed ' + tweet['id'])
56 | except Exception:
57 | print('ERRO: download failed ' + tweet['id'])
58 | print('INFO: post toot ' + tweet['id'])
59 | try:
60 | TootPoster(toot_content)
61 | print('INFO: post succeed ' + tweet['id'])
62 | except Exception:
63 | print('ERRO: post failed ' + tweet['id'])
64 | historyList.append(tweet['id'])
65 |
66 | if path.exists('temp'):
67 | shutil.rmtree('temp')
68 |
69 | print('INFO: save to db ' + tweet['id'])
70 | with open('db.txt', 'w+') as db:
71 | for row in historyList:
72 | db.write(str(row) + '\n')
73 |
74 | if __name__ == '__main__':
75 | test_feed = [{
76 | 'title': "content",
77 | 'summary': 'content
',
78 | 'id': 'https://twitter.com/zlj517/status/1266540485973180416',
79 | 'link': 'https://twitter.com/zlj517/status/1266540485973180416',
80 | }]
81 | Feed2Toot(test_feed)
--------------------------------------------------------------------------------
/utils/feed_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on May 29, 2020
5 | Desc: RSS feed parser
6 | Author: Mashiro
7 | URL: https://2heng.xin
8 | License: MIT
9 | """
10 | import feedparser
11 |
12 | def FeedParaser(rss_link):
13 | """
14 | :param str: RSS URL
15 | :return object: rss object
16 | """
17 | RssHubFeed = feedparser.parse(rss_link)
18 |
19 | rss = []
20 |
21 | for item in RssHubFeed.entries:
22 | data={}
23 | # for detail in item.keys():
24 | # data[detail]=item[detail]
25 | data['title']=item['title']
26 | data['summary']=item['summary']
27 | data['id']=item['id']
28 | data['link']=item['link']
29 | rss.append(data)
30 |
31 | # print(rss)
32 | return rss
33 |
34 | if __name__ == '__main__':
35 | print(str(FeedParaser("https://rsshub.app/twitter/user/jk_rowling")))
--------------------------------------------------------------------------------
/utils/get_config.py:
--------------------------------------------------------------------------------
1 | import configparser
2 |
3 | config = configparser.ConfigParser()
4 | config.read('conf.ini')
5 |
6 | def GetConfig():
7 | for i in config:
8 | for t in i:
9 | t = str(t)
10 | return config
--------------------------------------------------------------------------------
/utils/media_downloader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on May 29, 2020
5 | Desc: Media file downloader
6 | Author: Mashiro
7 | URL: https://2heng.xin
8 | License: MIT
9 | """
10 | import urllib.request
11 | import ffmpy
12 | from .get_config import GetConfig
13 |
14 | config = GetConfig()
15 |
16 | def MediaDownloader(data):
17 | """
18 | :param object: Data return from TweetDecoder
19 | :return {'gif_count': (max+1)gif_id, 'video_count': video_id, 'image_count': img_id, 'plain': str}
20 | """
21 | # set header
22 | opener = urllib.request.build_opener()
23 | opener.addheaders = opener.addheaders = [
24 | ('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36')]
25 | urllib.request.install_opener(opener)
26 |
27 | res = {'gif_count': None, 'video_count': None, 'image_count': None, 'plain': None, 'video_link': None}
28 |
29 | if data['image']:
30 | img_id = 1
31 | for url in data['image']:
32 | if (img_id <= 4):
33 | try:
34 | urllib.request.urlretrieve(url.replace('https://pbs.twimg.com/',config['TWITTER']['ImageProxy']), 'temp/img'+str(img_id)+'.png')
35 | img_id = img_id+1
36 | except Exception:
37 | print(f'ERRO: failed[img]: {url}')
38 | # for e in Exception:
39 | # print(e)
40 |
41 | res['image_count']=img_id
42 |
43 | if data['gif']:
44 | gif_id = 1
45 | for url in data['gif']:
46 | if (gif_id <= 4):
47 | try:
48 | urllib.request.urlretrieve(url.replace('https://video.twimg.com/',config['TWITTER']['VideoProxy']), 'temp/gif'+str(gif_id)+'.mp4')
49 | ff = ffmpy.FFmpeg(
50 | inputs={'temp/gif'+str(gif_id)+'.mp4': None},
51 | outputs={'temp/gif'+str(gif_id)+'.gif': ['-vf', 'fps=10,scale=\'min(600,iw)\':-1:flags=lanczos', '-y']}
52 | )
53 | ff.run()
54 | gif_id = gif_id+1
55 | except Exception:
56 | print(f'ERRO: failed[gif]: {url}')
57 | try:
58 | urllib.request.urlretrieve(data['gif_poster'][gif_id-1].replace('https://pbs.twimg.com/',config['TWITTER']['ImageProxy']), 'temp/gif'+str(gif_id)+'.gif')
59 | gif_id = gif_id+1
60 | except Exception:
61 | print(f'ERRO: failed[gif]: {url}')
62 | # for e in Exception:
63 | # print(e)
64 |
65 | res['gif_count']=gif_id
66 |
67 | if data['video']:
68 | video_id = 1
69 | for url in data['video']:
70 | if (video_id <= 1):
71 | try:
72 | if config['MASTODON']['IncludeVideo'] != 'false':
73 | urllib.request.urlretrieve(url.replace('https://video.twimg.com/',config['TWITTER']['VideoProxy']), 'temp/video'+str(video_id)+'.mp4')
74 |
75 | urllib.request.urlretrieve(data['video_poster'][video_id-1].replace('https://pbs.twimg.com/',config['TWITTER']['ImageProxy']), 'temp/video'+str(video_id)+'.png')
76 | res['video_link']=url
77 | video_id = video_id+1
78 | except Exception:
79 | print(f'ERRO: failed[vid]: {url}')
80 | # for e in Exception:
81 | # print(e)
82 |
83 | res['video_count']=video_id
84 |
85 | res['plain']=data['plain']
86 |
87 | return res
88 |
89 | if __name__ == '__main__':
90 | test_data = {'gif': ['https://video.twimg.com/tweet_video/EZLxKmTUMAARbSa.mp4'], 'gif_poster': ['https://pbs.twimg.com/tweet_video_thumb/EZLxKmTUMAARbSa.jpg'], 'video': ['https://video.twimg.com/ext_tw_video/1265470079203827712/pu/vid/1280x720/B-BRCBM0djUAqJl0.mp4?tag=10'], 'video_poster': ['https://pbs.twimg.com/ext_tw_video_thumb/1265470079203827712/pu/img/VujsmqbQORfHDeCP.jpg'], 'image': ['https://pbs.twimg.com/media/EZJh5RPUMAEz4aS?format=jpg&name=orig','https://s3-view.2heng.xin/aws_cached/2019/07/14/53c2adbc381e3aa17968d5d36feee002.md.png', 'https://s3-view.2heng.xin/aws_cached/2020/05/19/b1a7d8ff391616ad152f9958c6302ba0.md.jpg', 'https://s3-view.2heng.xin/aws_cached/2020/05/18/671a82563dfe40885196166683bf6f0b.md.jpg'], 'plain': '流程图工具 Excalidraw 可以做出下面这样的图示效果,可惜中文没有手写效果。 https://excalidraw.com/ '}
91 | MediaDownloader(test_data)
--------------------------------------------------------------------------------
/utils/toot_poster.py:
--------------------------------------------------------------------------------
1 | from mastodon import Mastodon
2 | import filetype
3 | from .get_config import GetConfig
4 |
5 | config = GetConfig()
6 |
7 | mastodon = Mastodon(
8 | access_token = config['MASTODON']['AccessToken'],
9 | api_base_url = config['MASTODON']['BaseUrl']
10 | )
11 |
12 | def media_post(file):
13 | kind = filetype.guess(file)
14 | # print('File extension: %s' % kind.extension)
15 | # print('File MIME type: %s' % kind.mime)
16 | return mastodon.media_post(file, kind.mime)
17 |
18 | def TootPoster(data):
19 | """
20 | :data object: Return from media_downloader
21 | :return void
22 | """
23 | media_ids_arr = []
24 |
25 | if data['video_count'] is not None:
26 | id=1
27 | if config['MASTODON']['IncludeVideo'] == 'false':
28 | media_ids_arr.append(media_post('temp/video%d.png' % id))
29 | data['plain'] = data['plain'] + '\n'+config['MASTODON']['VideoSourcePrefix']+' ' + data['video_link']
30 | else:
31 | try:
32 | media_ids_arr.append(media_post('temp/video%d.mp4' % id))
33 | except Exception:
34 | media_ids_arr.append(media_post('temp/video%d.png' % id))
35 | data['plain'] = data['plain'] + '\n'+config['MASTODON']['VideoSourcePrefix']+' ' + data['video_link']
36 |
37 | else:
38 | if data['image_count'] is not None:
39 | for id in range(1, data['image_count']):
40 | media_ids_arr.append(media_post('temp/img%d.png' % id))
41 |
42 | if data['gif_count'] is not None:
43 | for id in range(1, data['gif_count']):
44 | media_ids_arr.append(media_post('temp/gif%d.gif' % id))
45 |
46 | while len(media_ids_arr) > 4:
47 | media_ids_arr.pop()
48 |
49 | mastodon.status_post(status=data['plain'], media_ids=media_ids_arr, visibility=config['MASTODON']['TootVisibility'])
50 |
51 | if __name__ == '__main__':
52 | test_data = {'gif_count': 1, 'video_count': None, 'image_count': 3, 'plain': 'Tooting from python using `status_post` #mastodonpy !'}
53 | TootPoster(test_data)
--------------------------------------------------------------------------------
/utils/tweet_decoder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on May 29, 2020
5 | Desc: Twitter HTML parser
6 | Author: Mashiro
7 | URL: https://2heng.xin
8 | License: MIT
9 | """
10 | from bs4 import BeautifulSoup
11 | from html import unescape
12 | from .get_config import GetConfig
13 |
14 | config = GetConfig()
15 |
16 | def TweetDecoder(rss_data):
17 | """
18 | :params object: Summary from FeedParaser
19 | :return object
20 | """
21 | soup = BeautifulSoup(rss_data['summary'], features='html.parser')
22 |
23 | data = {
24 | 'gif': [],
25 | 'gif_poster': [],
26 | 'video': [],
27 | 'video_poster': [],
28 | 'image': [],
29 | 'plain': None
30 | }
31 |
32 | for link in soup.find_all('a'):
33 | link.replace_with(' ' + link.get('href') + ' ')
34 |
35 | for video in soup.find_all('video'):
36 | # print(video.get('src'))
37 | if ('https://video.twimg.com/tweet_video' in video.get('src')):
38 | data['gif'].append(video.get('src'))
39 | data['gif_poster'].append(video.get('poster'))
40 | video.replace_with('')
41 | if ('https://video.twimg.com/ext_tw_video' in video.get('src')):
42 | data['video'].append(video.get('src'))
43 | data['video_poster'].append(video.get('poster'))
44 | video.replace_with('')
45 | if ('https://video.twimg.com/amplify_video' in video.get('src')):
46 | data['video'].append(video.get('src'))
47 | data['video_poster'].append(video.get('poster'))
48 | video.replace_with('')
49 |
50 | for image in soup.find_all('img'):
51 | # print(video.get('src'))
52 | data['image'].append(image.get('src'))
53 | image.replace_with('')
54 |
55 | for br in soup.find_all('br'):
56 | br.replace_with('\n')
57 |
58 | # print(soup.prettify())
59 | # print(str(data))
60 | if config['MASTODON']['ShowSource'] == 'true':
61 | data['plain'] = unescape(soup.prettify()) + '\n'+config['MASTODON']['TweetSourcePrefix']+' ' + rss_data['link']
62 | else:
63 | data['plain'] = unescape(soup.prettify())
64 | return data
65 |
66 | if __name__ == '__main__':
67 | test_normal = """
68 | 流程图工具 Excalidraw 可以做出下面这样的图示效果,可惜中文没有手写效果。https://excalidraw.com/https://2heng.xin/



69 | """
70 |
71 | test_gif = """
72 | 【Vitafield Rewilder Series - Wilted Cypress - Firewatch】
Now available at the Store until June 10, 03:59(UTC-7)!
#Arknights #Yostar
73 | """
74 |
75 | test_video = """
76 | Arknights Official Trailer – Code of Brawl
"Doctor, relying on me isn't a very wise decision"
HD version:
#Arknights #Yostar http://youtu.be/SJ1qvqEmkVQ
77 | """
78 | print(TweetDecoder(test_video))
--------------------------------------------------------------------------------