├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── weibo2toot.iml ├── LICENSE ├── README.md ├── conf.sample.ini ├── requirements.txt ├── run.py └── utils ├── __init__.py ├── feed2toot.py ├── feed_decoder.py ├── feed_parser.py ├── get_config.py ├── media_downloader.py └── toot_poster.py /.gitignore: -------------------------------------------------------------------------------- 1 | temp 2 | ffmpeg.exe 3 | conf.ini 4 | db.txt 5 | __pycache__ 6 | venv -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/weibo2toot.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Mashiro / bili2toot contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Moved to 2 | 项目已重构: 3 | 4 | # weibo2toot 5 | 6 | A simple script that transport Weibo content to Mastodon. Based on the Weibo RSS feed powered by [RSSHub](https://rsshub.app). 7 | 8 | 一个将微博搬运到长毛象的脚本——基于[RSSHub](https://rsshub.app)生成的微博RSS。 9 | 10 |
11 | 一些说明 12 | 13 | 1、微博视频有严格的反盗链措施,一定频率内请求次数过多可能导致请求返回403,所以输出中看到视频下载失败问题不大,默认情况下会用视频缩略图顶替,视频原链接会在嘟文中标出(完全保留微博风格)。 14 | 15 | 2、`TypeError: Cannot read property 'screen_name' of undefined` 16 | 这是一个 RSSHub 方面的问题,部分微博博主的内容需要登陆才可见,不支持订阅,可以通过打开 https://m.weibo.cn/u/:uid 验证。 17 | 18 | 3、表情处理:微博表情暂时没有找到好的索引及批处理的方式(我们需要先将表情批量扒下来并上传为Mastodon的自定义表情),所以目前暂时还没法处理表情。 19 |
20 | 21 | ``` 22 | pip3 install -r requirements.txt 23 | cp conf.sample.ini conf.ini 24 | nano conf.ini 25 | python3 run.py 26 | ``` 27 | 28 | crontab job setting: 29 | ``` 30 | crontab -e 31 | ``` 32 | or (Ubuntu 18.04) 33 | ``` 34 | nano /etc/crontab 35 | /etc/init.d/cron restart 36 | ``` 37 | 38 | Recommand do job every 20 minutes (RSSHub default cache periods): 39 | ``` 40 | #m h dom mon dow user command 41 | */20 * * * * root cd /weibo2toot && python3 run.py 42 | ``` 43 | -------------------------------------------------------------------------------- /conf.sample.ini: -------------------------------------------------------------------------------- 1 | [PROXY] 2 | ProxyOn = false 3 | HttpProxy = http://127.0.0.1:7890 4 | HttpsProxy = https://127.0.0.1:7890 5 | 6 | [MASTODON] 7 | BaseUrl = https://hello.2heng.xin/ 8 | # register your application here: https://hello.2heng.xin/settings/applications 9 | AccessToken = your_app_token 10 | # 'direct' - post will be visible only to mentioned users 11 | # 'private' - post will be visible only to followers 12 | # 'unlisted' - post will be public but not appear on the public timeline 13 | # 'public' - post will be public 14 | TootVisibility = unlisted 15 | IncludeVideo = false 16 | SourcePrefix = :icon_weibo: 17 | ExternalLinkPrefix = :sys_link: 18 | VideoSourcePrefix = :sys_video: 19 | 20 | [WEIBO] 21 | WeiboRss = https://rsshub.app/weibo/user/3264072325 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.9.1 2 | Pillow==9.0.1 3 | ffmpy==0.2.3 4 | feedparser==5.2.1 5 | Mastodon.py==1.5.1 6 | filetype==1.0.7 -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 29, 2020 5 | Desc: Twitter feed to toot (based on RSSHub's feed) 6 | Author: Mashiro 7 | URL: https://2heng.xin 8 | License: MIT 9 | """ 10 | from utils.feed_parser import FeedParaser 11 | from utils.feed2toot import Feed2Toot 12 | from utils.get_config import GetConfig 13 | import os 14 | 15 | config = GetConfig() 16 | 17 | if __name__ == '__main__': 18 | if config['PROXY']['ProxyOn'] == 'true': 19 | os.environ['HTTP_PROXY'] = config['PROXY']['HttpProxy'] 20 | os.environ['HTTPS_PROXY'] = config['PROXY']['HttpsProxy'] 21 | 22 | RSS_dict = FeedParaser(config['WEIBO']['WeiboRss']) 23 | Feed2Toot(RSS_dict) 24 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mashirozx/weibo2toot/32f0d3a4baf0bae589277b3c2b2136f0940fd5b7/utils/__init__.py -------------------------------------------------------------------------------- /utils/feed2toot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 29, 2020 5 | Desc: feed to toot 6 | Author: Mashiro 7 | URL: https://2heng.xin 8 | License: MIT 9 | """ 10 | from os import path, makedirs 11 | import shutil 12 | from .feed_decoder import TweetDecoder 13 | from .media_downloader import MediaDownloader 14 | from .toot_poster import TootPoster 15 | 16 | def Feed2Toot(feed_data): 17 | if path.exists('db.txt'): 18 | historyList = [line.rstrip('\n') for line in open('db.txt')] 19 | else: 20 | historyList = [] 21 | 22 | for tweet in reversed(feed_data): 23 | if not path.exists('temp'): 24 | makedirs('temp') 25 | 26 | if tweet['id'] not in historyList: 27 | print('INFO: decode ' + tweet['id']) 28 | tweet_decoded = TweetDecoder(tweet) 29 | print('INFO: download ' + tweet['id']) 30 | try: 31 | toot_content = MediaDownloader(tweet_decoded) 32 | print('INFO: download succeed ' + tweet['id']) 33 | except Exception: 34 | print('ERRO: download failed ' + tweet['id']) 35 | # for e in Exception: 36 | # print(e) 37 | print('INFO: post toot ' + tweet['id']) 38 | try: 39 | TootPoster(toot_content) 40 | print('INFO: post succeed ' + tweet['id']) 41 | except Exception: 42 | print('ERRO: post failed ' + tweet['id']) 43 | historyList.append(tweet['id']) 44 | 45 | if path.exists('temp'): 46 | shutil.rmtree('temp') 47 | 48 | print('INFO: save to db ' + tweet['id']) 49 | with open('db.txt', 'w+') as db: 50 | for row in historyList: 51 | db.write(str(row) + '\n') 52 | 53 | if __name__ == '__main__': 54 | test_feed = [{ 55 | 'title': "content", 56 | 'summary': 'content
', 57 | 'id': 'https://twitter.com/zlj517/status/1266540485973180416', 58 | 'link': 'https://twitter.com/zlj517/status/1266540485973180416', 59 | }] 60 | Feed2Toot(test_feed) -------------------------------------------------------------------------------- /utils/feed_decoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 29, 2020 5 | Desc: Twitter HTML parser 6 | Author: Mashiro 7 | URL: https://2heng.xin 8 | License: MIT 9 | """ 10 | from bs4 import BeautifulSoup 11 | from html import unescape 12 | import re 13 | from .get_config import GetConfig 14 | 15 | config = GetConfig() 16 | 17 | def TweetDecoder(rss_data): 18 | """ 19 | :params object: Summary from FeedParaser 20 | :return object 21 | """ 22 | soup = BeautifulSoup(rss_data['summary'], features='html.parser') 23 | 24 | data = { 25 | 'video': [], 26 | 'video_poster': [], 27 | 'image': [], 28 | 'plain': None 29 | } 30 | 31 | for link in soup.find_all('a'): 32 | # link.replace_with(' ' + link.get('href') + ' ') 33 | if (link.has_attr('data-url')): 34 | if ('://t.cn/' in link.get('data-url')): 35 | if ('微博视频' in link.getText()): 36 | link.replace_with(f'''[?bs4_replace_flag?] {config['MASTODON']['VideoSourcePrefix']} {link.getText()} {link.get('data-url')} [?bs4_replace_flag?]''') 37 | else: 38 | link.replace_with(f'''[?bs4_replace_flag?] {config['MASTODON']['ExternalLinkPrefix']} {link.getText()} {link.get('data-url')} [?bs4_replace_flag?]''') 39 | else: 40 | link.replace_with(f'''[?bs4_replace_flag?] {config['MASTODON']['ExternalLinkPrefix']} {link.getText()} {link.get('href')} [?bs4_replace_flag?]''') 41 | elif (link.getText()[-1] == '#'): 42 | link.replace_with(f'''[?bs4_replace_flag?] {link.getText()[:-1]} [?bs4_replace_flag?]''') 43 | else: 44 | link.replace_with('[?bs4_replace_flag?]'+link.getText()+'[?bs4_replace_flag?]') 45 | 46 | for span in soup.find_all('span'): 47 | if('url-icon' in span.get('class')): 48 | # print(span) 49 | img = span.find('img') 50 | img.replace_with(img.get('alt')) 51 | 52 | for video in soup.find_all('video'): 53 | # print(video.get('src')) 54 | if ('://f.video.weibocdn.com' in video.get('src')): 55 | # need to add a reffer i guess. 56 | data['video'].append(video.get('src')) 57 | data['video_poster'].append(video.get('poster')) 58 | video.replace_with('') 59 | 60 | for image in soup.find_all('img'): 61 | # print(video.get('src')) 62 | data['image'].append(image.get('src')) 63 | image.replace_with('') 64 | 65 | for br in soup.find_all('br'): 66 | br.replace_with('<|n>') 67 | 68 | for span in soup.find_all('span'): 69 | span.replace_with('') 70 | # span.replace_with(span.text) 71 | 72 | for div in soup.find_all('div'): 73 | div.replace_with('') 74 | 75 | for blockquote in soup.find_all('blockquote'): 76 | blockquote.unwrap() 77 | 78 | # print(soup.prettify()) 79 | # print(str(data)) 80 | plain_content = unescape(soup.prettify()) 81 | plain_content = plain_content.replace(' ', ' ') 82 | plain_content = plain_content.replace('\n[?bs4_replace_flag?]',' ').replace('[?bs4_replace_flag?]\n',' ').replace('[?bs4_replace_flag?]','').replace('\n- ','\n\- ').replace('<|n>','\n') 83 | # plain_content = re.sub(r'(#[^#]+)#', lambda m : m.group(1)+' ', plain_content) 84 | data['plain'] = plain_content + '\n'+config['MASTODON']['SourcePrefix']+' ' + rss_data['link'] 85 | return data 86 | 87 | if __name__ == '__main__': 88 | test_video = """ 89 | Xin Chun Kuai Le to my dear friends in China! Follow @GranityStudios to discover all the New Year gifts from me. KobeBryant的微博视频
90 | """ 91 | print(TweetDecoder(test_video)) 92 | -------------------------------------------------------------------------------- /utils/feed_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 29, 2020 5 | Desc: RSS feed parser 6 | Author: Mashiro 7 | URL: https://2heng.xin 8 | License: MIT 9 | """ 10 | import feedparser 11 | 12 | def FeedParaser(rss_link): 13 | """ 14 | :param str: RSS URL 15 | :return object: rss object 16 | """ 17 | RssHubFeed = feedparser.parse(rss_link) 18 | 19 | rss = [] 20 | 21 | for item in RssHubFeed.entries: 22 | data={} 23 | # for detail in item.keys(): 24 | # data[detail]=item[detail] 25 | data['title']=item['title'] 26 | data['summary']=item['summary'] 27 | data['id']=item['id'] 28 | data['link']=item['link'] 29 | rss.append(data) 30 | 31 | # print(rss) 32 | return rss 33 | 34 | if __name__ == '__main__': 35 | print(str(FeedParaser("https://rsshub.app/bilibili/user/dynamic/161775300"))) -------------------------------------------------------------------------------- /utils/get_config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | 3 | config = configparser.ConfigParser() 4 | config.read('conf.ini') 5 | 6 | def GetConfig(): 7 | for i in config: 8 | for t in i: 9 | t = str(t) 10 | return config -------------------------------------------------------------------------------- /utils/media_downloader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 29, 2020 5 | Desc: Media file downloader 6 | Author: Mashiro 7 | URL: https://2heng.xin 8 | License: MIT 9 | """ 10 | import urllib.request 11 | # import ffmpy 12 | from .get_config import GetConfig 13 | 14 | config = GetConfig() 15 | 16 | def MediaDownloader(data): 17 | """ 18 | :param object: Data return from TweetDecoder 19 | :return {'gif_count': (max+1)gif_id, 'video_count': video_id, 'image_count': img_id, 'plain': str} 20 | """ 21 | # set header 22 | opener = urllib.request.build_opener() 23 | opener.addheaders = [] 24 | opener.addheaders.append(('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36')) 25 | opener.addheaders.append(('Referer', 'https://weibo.com/')) 26 | urllib.request.install_opener(opener) 27 | 28 | res = {'video_count': None, 'image_count': None, 'plain': None, 'video_link': None} 29 | 30 | if data['image']: 31 | img_id = 1 32 | for url in data['image']: 33 | if (img_id <= 4): 34 | try: 35 | urllib.request.urlretrieve(url, 'temp/img'+str(img_id)+'.png') 36 | img_id = img_id+1 37 | except Exception: 38 | print(f'ERRO: failed[img]: {url}') 39 | # for e in Exception: 40 | # print(e) 41 | 42 | res['image_count']=img_id 43 | 44 | if data['video']: 45 | video_id = 1 46 | for url in data['video']: 47 | if (video_id <= 1): 48 | try: 49 | if config['MASTODON']['IncludeVideo'] != 'false': 50 | urllib.request.urlretrieve(url, 'temp/video'+str(video_id)+'.mp4') 51 | 52 | urllib.request.urlretrieve(data['video_poster'][video_id-1], 'temp/video'+str(video_id)+'.png') 53 | res['video_link']=url 54 | video_id = video_id+1 55 | except Exception: 56 | print(f'ERRO: failed[vid]: {url}') 57 | # for e in Exception: 58 | # print(e) 59 | 60 | res['video_count']=video_id 61 | 62 | res['plain']=data['plain'] 63 | 64 | return res 65 | 66 | if __name__ == '__main__': 67 | test_data = {'gif': ['https://video.twimg.com/tweet_video/EZLxKmTUMAARbSa.mp4'], 'gif_poster': ['https://pbs.twimg.com/tweet_video_thumb/EZLxKmTUMAARbSa.jpg'], 'video': ['https://video.twimg.com/ext_tw_video/1265470079203827712/pu/vid/1280x720/B-BRCBM0djUAqJl0.mp4?tag=10'], 'video_poster': ['https://pbs.twimg.com/ext_tw_video_thumb/1265470079203827712/pu/img/VujsmqbQORfHDeCP.jpg'], 'image': ['https://pbs.twimg.com/media/EZJh5RPUMAEz4aS?format=jpg&name=orig','https://s3-view.2heng.xin/aws_cached/2019/07/14/53c2adbc381e3aa17968d5d36feee002.md.png', 'https://s3-view.2heng.xin/aws_cached/2020/05/19/b1a7d8ff391616ad152f9958c6302ba0.md.jpg', 'https://s3-view.2heng.xin/aws_cached/2020/05/18/671a82563dfe40885196166683bf6f0b.md.jpg'], 'plain': '流程图工具 Excalidraw 可以做出下面这样的图示效果,可惜中文没有手写效果。 https://excalidraw.com/ '} 68 | MediaDownloader(test_data) -------------------------------------------------------------------------------- /utils/toot_poster.py: -------------------------------------------------------------------------------- 1 | from mastodon import Mastodon 2 | import filetype 3 | from .get_config import GetConfig 4 | 5 | config = GetConfig() 6 | 7 | mastodon = Mastodon( 8 | access_token = config['MASTODON']['AccessToken'], 9 | api_base_url = config['MASTODON']['BaseUrl'] 10 | ) 11 | 12 | def media_post(file): 13 | kind = filetype.guess(file) 14 | # print('File extension: %s' % kind.extension) 15 | # print('File MIME type: %s' % kind.mime) 16 | return mastodon.media_post(file, kind.mime) 17 | 18 | def TootPoster(data): 19 | """ 20 | :data object: Return from media_downloader 21 | :return void 22 | """ 23 | media_ids_arr = [] 24 | 25 | if data['video_count'] is not None: 26 | id=1 27 | if config['MASTODON']['IncludeVideo'] == 'false': 28 | media_ids_arr.append(media_post('temp/video%d.png' % id)) 29 | # data['plain'] = data['plain'] + '\n'+config['MASTODON']['VideoSourcePrefix']+' ' + data['video_link'] 30 | else: 31 | try: 32 | media_ids_arr.append(media_post('temp/video%d.mp4' % id)) 33 | except Exception: 34 | media_ids_arr.append(media_post('temp/video%d.png' % id)) 35 | # data['plain'] = data['plain'] + '\n'+config['MASTODON']['VideoSourcePrefix']+' ' + data['video_link'] 36 | 37 | if data['image_count'] is not None: 38 | for id in range(1, min(data['image_count'], 5)): 39 | media_ids_arr.append(media_post('temp/img%d.png' % id)) 40 | 41 | try: 42 | mastodon.status_post(status=data['plain'], media_ids=media_ids_arr, visibility=config['MASTODON']['TootVisibility']) 43 | except Exception: 44 | print(f'ERRO: failed[mastodon.status_post]') 45 | # for e in Exception: 46 | # print(e) 47 | 48 | if __name__ == '__main__': 49 | test_data = {'gif_count': 1, 'video_count': None, 'image_count': 3, 'plain': 'Tooting from python using `status_post` #mastodonpy !'} 50 | TootPoster(test_data) --------------------------------------------------------------------------------