├── .coveragerc ├── .dockerignore ├── .flaskenv ├── .github └── FUNDING.yml ├── .gitignore ├── Dockerfile ├── Pipfile ├── Pipfile.lock ├── README.md ├── app.json ├── flask.conf ├── gunicorn.conf ├── main.py ├── requirements.txt ├── rsshub ├── __init__.py ├── blueprints │ ├── __init__.py │ └── main.py ├── config.py ├── extensions.py ├── google_analytics.py ├── spiders │ ├── __init__.py │ ├── aisixiang │ │ └── search.py │ ├── appstore │ │ └── top.py │ ├── baidu │ │ └── suggest.py │ ├── bbwc │ │ └── realtime.py │ ├── benzinga │ │ └── ratings.py │ ├── bjnews │ │ └── channel.py │ ├── caixin │ │ └── scroll.py │ ├── chaindd │ │ └── column.py │ ├── chouti │ │ ├── search.py │ │ ├── section.py │ │ └── user.py │ ├── chuansongme │ │ └── articles.py │ ├── cls │ │ ├── subject.py │ │ └── telegraph.py │ ├── cninfo │ │ └── announcement.py │ ├── csrc │ │ └── audit.py │ ├── ctolib │ │ └── topics.py │ ├── dxzg │ │ └── notice.py │ ├── earningsdate │ │ ├── businesswire.py │ │ ├── globenewswire.py │ │ └── prnewswire.py │ ├── eastmoney │ │ └── report.py │ ├── economist │ │ └── worldbrief.py │ ├── futu │ │ └── live.py │ ├── hnzcy │ │ └── bidding.py │ ├── infoq │ │ ├── profile.py │ │ ├── recommend.py │ │ ├── search.py │ │ └── topic.py │ ├── interotc │ │ └── cpgg.py │ ├── jiemian │ │ └── newsflash.py │ ├── jintiankansha │ │ └── column.py │ ├── mp │ │ ├── gh.py │ │ ├── rtag.py │ │ ├── tag.py │ │ └── youwuqiong.py │ ├── nasdaq │ │ └── symbol_change.py │ ├── netease │ │ └── comment.py │ ├── nhk │ │ ├── newseasy.py │ │ └── topic.py │ ├── pgyer │ │ └── app.py │ ├── producthunt │ │ └── search.py │ ├── readhub │ │ └── topic.py │ ├── rssfilter │ │ └── filter.py │ ├── sysu │ │ └── ifcen.py │ ├── tadoku │ │ └── books.py │ ├── techcrunch │ │ └── tag.py │ ├── weiyangx │ │ ├── express.py │ │ ├── home.py │ │ └── tag.py │ ├── word │ │ ├── Pipfile │ │ ├── ieltswords_cn.txt │ │ ├── ieltswords_同义词.txt │ │ ├── toeflwords.txt │ │ └── word.py │ ├── xinhuanet │ │ ├── shizhenglianbo.py │ │ ├── utils.py │ │ ├── world.py │ │ ├── yaodianjujiao.py │ │ └── zuixinbobao.py │ ├── xuangubao │ │ └── xuangubao.py │ ├── yfchuhai │ │ └── express.py │ ├── zaobao │ │ └── realtime.py │ └── zhihu │ │ ├── article.py │ │ ├── collection.py │ │ ├── explore.py │ │ └── roundtable.py ├── static │ ├── css │ │ └── style.css │ └── favicon.ico ├── templates │ ├── errors │ │ ├── 400.html │ │ ├── 404.html │ │ └── 500.html │ ├── layout.html │ └── main │ │ ├── atom.xml │ │ ├── feeds.html │ │ ├── index.html │ │ └── word.html └── utils.py ├── setup.py ├── supervisord.conf ├── tests ├── __init__.py ├── base.py ├── test_cli.py ├── test_errors.py └── test_main.py └── vercel.json /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = rsshub -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | vercel.json 2 | tests 3 | bin 4 | Pipfile 5 | Pipfile.lock -------------------------------------------------------------------------------- /.flaskenv: -------------------------------------------------------------------------------- 1 | FLASK_ENV=development 2 | FLASK_APP=rsshub 3 | FLASK_DEBUG=1 -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: hillerliao 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | .pytest_cache 5 | 6 | # Distribution / packaging 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | .idea 11 | venv 12 | 13 | # Others 14 | .vscode 15 | .coverage 16 | htmlcov/ 17 | data-dev.db 18 | 19 | .env 20 | #Dockerfile 21 | # google_analytics.py 22 | .deta 23 | .vercel -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用官方的 Python 镜像作为基础镜像 2 | FROM python:3.8-slim 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | # 复制应用程序代码 8 | COPY . . 9 | 10 | # 安装Python依赖 11 | RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple 12 | 13 | # 暴露端口 14 | EXPOSE 5000 15 | 16 | # 启动应用程序 17 | CMD ["gunicorn", "-b", "0.0.0.0:5000", "main:app"] 18 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://mirrors.aliyun.com/pypi/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | beautifulsoup4 = "*" 8 | bootstrap-flask = "*" 9 | feedparser = "6.0.8" 10 | flask-debugtoolbar = "*" 11 | flask-moment = "*" 12 | Flask = "*" 13 | python-dotenv = "*" 14 | markupsafe = "2.0.1" 15 | gunicorn = "*" 16 | requests = "*" 17 | parsel = "*" 18 | flask-script = "*" 19 | icecream = "*" 20 | flask-analytics = "*" 21 | arrow = "1.2.2" 22 | undetected-chromedriver = "*" 23 | pyjsparser = "*" 24 | charset-normalizer = "*" 25 | pyppeteer = "*" 26 | flask-caching = "*" 27 | 28 | [dev-packages] 29 | coverage = "*" 30 | pdir2 = "*" 31 | ptpython = "*" 32 | yapf = "*" 33 | pylint = "*" 34 | 35 | [scripts] 36 | test = "python -m unittest discover" 37 | coverage = "coverage run -m unittest discover" 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RSSHub 2 | 3 | > 🍰 万物皆可 RSS 4 | 5 | RSSHub 是一个轻量、易于扩展的 RSS 生成器,可以给任何奇奇怪怪的内容生成 RSS 订阅源 6 | 7 | 本项目是[原RSSHub](https://github.com/DIYgod/RSSHub)的Python实现。 8 | 9 | 10 | **其实用Python写爬虫要比JS更方便:p** 11 | 12 | DEMO地址:https://pyrsshub.vercel.app 13 | 14 | 15 | ## 交流 16 | 17 | Discord Server: [https://discord.gg/4BZBZuyx7p](https://discord.gg/4BZBZuyx7p) 18 | 19 | ## RSS过滤 20 | 21 | 你可以通过以下查询字符串来过滤RSS的内容: 22 | 23 | - include_title: 搜索标题,支持多关键词 24 | - include_description: 搜索描述 25 | - exclude_title: 排除标题 26 | - exclude_description: 排除描述 27 | - limit: 限制条数 28 | 29 | ## 贡献 RSS 方法 30 | 31 | 1. fork这份仓库 32 | 2. 在spiders文件夹下创建新的爬虫目录和脚本,编写爬虫,参考我的[爬虫教程](https://juejin.cn/post/6953881777756700709) 33 | 3. 在blueprints的main.py中添加对应的路由(按照之前路由的格式) 34 | 4. 在templates中的main目录下的feeds.html上写上说明文档,同样可参照格式写 35 | 5. 提pr 36 | 37 | ## 部署 38 | 39 | ### 本地测试 40 | 41 | 首先确保安装了[pipenv](https://github.com/pypa/pipenv) 42 | 43 | ``` bash 44 | git clone https://github.com/alphardex/RSSHub-python 45 | cd RSSHub-python 46 | pipenv install --dev 47 | pipenv shell 48 | flask run 49 | ``` 50 | 51 | ### 生产环境 52 | 53 | ``` bash 54 | gunicorn main:app -b 0.0.0.0:5000 55 | ``` 56 | 57 | ### 部署到 Vercel 58 | 59 | [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fhillerliao%2Frsshub-python) 60 | 61 | ### Docker 部署 62 | 63 | 创建docker容器 `docker run -dt --name pyrsshub -p 5000:5000 hillerliao/pyrsshub:latest` 64 | 65 | ## Requirements 66 | 67 | - Python 3.8 68 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "RSSHub", 3 | "description": "A rsshub powered by flask.", 4 | "repository": "https://github.com/alphardex/RSSHub-python", 5 | "keywords": [ 6 | "python", 7 | "flask" 8 | ] 9 | } -------------------------------------------------------------------------------- /flask.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | location / { 4 | proxy_pass http://localhost:5000/; 5 | proxy_set_header Host $host; 6 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 7 | } 8 | } -------------------------------------------------------------------------------- /gunicorn.conf: -------------------------------------------------------------------------------- 1 | [program:gunicorn] 2 | command=/usr/local/bin/gunicorn main:app -b localhost:5000 3 | directory=/app -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | 4 | dotenv_path = os.path.join(os.path.dirname(__file__), '.env') 5 | if os.path.exists(dotenv_path): 6 | load_dotenv(dotenv_path) 7 | 8 | from rsshub import create_app 9 | 10 | app = create_app('production') 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | arrow==1.2.2 2 | asttokens==2.0.5 3 | beautifulsoup4==4.11.1 4 | blinker==1.4 5 | Bootstrap-Flask==1.8.0 6 | certifi==2021.10.8 7 | charset-normalizer==2.0.9 8 | click==8.0.3 9 | colorama==0.4.4 10 | cssselect==1.1.0 11 | executing==0.8.2 12 | feedparser==6.0.8 13 | Flask==2.0.2 14 | Flask-Analytics==0.6.0 15 | Flask-Caching==2.0.2 16 | Flask-DebugToolbar==0.11.0 17 | Flask-Moment==1.0.2 18 | Flask-Script==2.0.6 19 | gunicorn==20.1.0 20 | icecream==2.1.1 21 | idna==3.3 22 | itsdangerous==2.0.1 23 | Jinja2==3.0.3 24 | lxml==4.6.3 25 | MarkupSafe==2.0.1 26 | parsel==1.6.0 27 | pyppeteer==1.0.2 28 | Pygments==2.11.1 29 | python-dotenv==0.19.2 30 | requests==2.26.0 31 | six==1.16.0 32 | undetected-chromedriver==3.1.7 33 | urllib3==1.26.7 34 | w3lib==1.22.0 35 | Werkzeug==2.1.2 36 | zipp==3.6.0 37 | -------------------------------------------------------------------------------- /rsshub/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | import click 4 | from flask import Flask, render_template 5 | from flask.cli import with_appcontext 6 | from rsshub.config import config 7 | from rsshub.extensions import * 8 | from rsshub.blueprints.main import bp as main_bp 9 | from rsshub.utils import XMLResponse 10 | from rsshub.extensions import cache 11 | 12 | 13 | def create_app(config_name=None): 14 | if config_name is None: 15 | # config_name = os.getenv('FLASK_CONFIG', 'development') 16 | config_name = os.getenv('FLASK_CONFIG', 'production') 17 | 18 | app = Flask(__name__) 19 | app.config.from_object(config[config_name]) 20 | app.response_class = XMLResponse 21 | cache.init_app(app) 22 | 23 | # Add analytics 24 | from flask_analytics import Analytics 25 | from rsshub.google_analytics import ga_account 26 | 27 | Analytics(app) 28 | app.config['ANALYTICS']['GOOGLE_UNIVERSAL_ANALYTICS']['ACCOUNT'] = ga_account 29 | app.config['ANALYTICS']['ENABLED'] = True 30 | 31 | register_blueprints(app) 32 | register_extensions(app) 33 | register_errors(app) 34 | register_context_processors(app) 35 | register_cli(app) 36 | 37 | return app 38 | 39 | 40 | def register_extensions(app): 41 | bootstrap.init_app(app) 42 | debugtoolbar.init_app(app) 43 | moment.init_app(app) 44 | 45 | 46 | def register_blueprints(app): 47 | app.register_blueprint(main_bp) 48 | 49 | 50 | def register_errors(app): 51 | @app.errorhandler(400) 52 | def bad_request(e): 53 | return render_template('errors/400.html'), 400 54 | 55 | @app.errorhandler(404) 56 | def page_not_found(e): 57 | return render_template('errors/404.html'), 404 58 | 59 | @app.errorhandler(500) 60 | def internal_server_error(e): 61 | return render_template('errors/500.html'), 500 62 | 63 | 64 | def register_context_processors(app): 65 | @app.context_processor 66 | def inject_date_now(): 67 | now = datetime.utcnow() 68 | return {'now': now} 69 | 70 | 71 | def register_cli(app): 72 | @app.cli.command() 73 | @with_appcontext 74 | def ptshell(): 75 | """Use ptpython as shell.""" 76 | try: 77 | from ptpython.repl import embed 78 | if not app.config['TESTING']: 79 | embed(app.make_shell_context()) 80 | except ImportError: 81 | click.echo('ptpython not installed! Use the default shell instead.') 82 | -------------------------------------------------------------------------------- /rsshub/blueprints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hillerliao/RSSHub-python/a51a80606a4148224867f9f77e5411fe99ac4d5e/rsshub/blueprints/__init__.py -------------------------------------------------------------------------------- /rsshub/blueprints/main.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, render_template, request 2 | from rsshub.extensions import cache 3 | 4 | bp = Blueprint('main', __name__) 5 | 6 | @bp.route('/word/') 7 | @bp.route('/') 8 | @cache.cached(timeout=3600) 9 | def word(category=''): 10 | from rsshub.spiders.word.word import ctx 11 | return render_template('main/word.html', **ctx(category)) 12 | 13 | @bp.route('/index') 14 | def index(): 15 | return render_template('main/index.html') 16 | 17 | @bp.route('/feeds') 18 | def feeds(): 19 | return render_template('main/feeds.html') 20 | 21 | 22 | @bp.app_template_global() 23 | def filter_content(ctx): 24 | include_title = request.args.get('include_title') 25 | include_description = request.args.get('include_description') 26 | exclude_title = request.args.get('exclude_title') 27 | exclude_description = request.args.get('exclude_description') 28 | limit = request.args.get('limit', type=int) 29 | items = ctx['items'].copy() 30 | 31 | if include_title: 32 | include_keywords = include_title.split('|') if '|' in include_title else [include_title] 33 | items = [item for item in items if any(keyword in item['title'] for keyword in include_keywords)] 34 | 35 | if include_description: 36 | include_keywords = include_description.split('|') if '|' in include_description else [include_description] 37 | items = [item for item in items if any(keyword in item['description'] for keyword in include_keywords)] 38 | 39 | if exclude_title: 40 | exclude_keywords = exclude_title.split('|') if '|' in exclude_title else [exclude_title] 41 | items = [item for item in items if all(keyword not in item['title'] for keyword in exclude_keywords)] 42 | 43 | if exclude_description: 44 | exclude_keywords = exclude_description.split('|') if '|' in exclude_description else [exclude_description] 45 | items = [item for item in items if all(keyword not in item['description'] for keyword in exclude_keywords)] 46 | 47 | if limit: 48 | items = items[:limit] 49 | 50 | ctx = ctx.copy() 51 | ctx['items'] = items 52 | return ctx 53 | 54 | 55 | 56 | 57 | #---------- feed路由从这里开始 -----------# 58 | @bp.route('/cninfo/announcement//') 59 | @bp.route('/cninfo/announcement') 60 | def cninfo_announcement(stock_id='', category=''): 61 | from rsshub.spiders.cninfo.announcement import ctx 62 | return render_template('main/atom.xml', **filter_content(ctx(stock_id,category))) 63 | 64 | 65 | @bp.route('/chuansongme/articles/') 66 | @bp.route('/chuansongme/articles') 67 | def chuansongme_articles(category=''): 68 | from rsshub.spiders.chuansongme.articles import ctx 69 | return render_template('main/atom.xml', **filter_content(ctx(category))) 70 | 71 | 72 | @bp.route('/ctolib/topics/') 73 | @bp.route('/ctolib/topics') 74 | def ctolib_topics(category=''): 75 | from rsshub.spiders.ctolib.topics import ctx 76 | return render_template('main/atom.xml', **filter_content(ctx(category))) 77 | 78 | @bp.route('/bbwc/realtime') 79 | def bbwc_realtime(category=''): 80 | from rsshub.spiders.bbwc.realtime import ctx 81 | return render_template('main/atom.xml', **filter_content(ctx(category))) 82 | 83 | 84 | @bp.route('/infoq/recommend') 85 | def infoq_recommend(): 86 | from rsshub.spiders.infoq.recommend import ctx 87 | return render_template('main/atom.xml', **filter_content(ctx())) 88 | 89 | 90 | @bp.route('/infoq/topic/') 91 | def infoq_topic(category=''): 92 | from rsshub.spiders.infoq.topic import ctx 93 | return render_template('main/atom.xml', **filter_content(ctx(category))) 94 | 95 | @bp.route('/readhub/topic//') 96 | def readhub_topic(type='', uid=''): 97 | from rsshub.spiders.readhub.topic import ctx 98 | return render_template('main/atom.xml', **filter_content(ctx(type,uid))) 99 | 100 | @bp.route('/infoq/profile/') 101 | def infoq_profile(category=''): 102 | from rsshub.spiders.infoq.profile import ctx 103 | return render_template('main/atom.xml', **filter_content(ctx(category))) 104 | 105 | @bp.route('/infoq/search//') 106 | def infoq_search(category='', type=''): 107 | from rsshub.spiders.infoq.search import ctx 108 | return render_template('main/atom.xml', **filter_content(ctx(category, type))) 109 | 110 | @bp.route('/dxzg/notice') 111 | def dxzg_notice(): 112 | from rsshub.spiders.dxzg.notice import ctx 113 | return render_template('main/atom.xml', **filter_content(ctx())) 114 | 115 | 116 | @bp.route('/earningsdate/prnewswire') 117 | def earningsdate_prnewswire(): 118 | from rsshub.spiders.earningsdate.prnewswire import ctx 119 | return render_template('main/atom.xml', **filter_content(ctx())) 120 | 121 | @bp.route('/earningsdate/globenewswire') 122 | def earningsdate_globenewswire(): 123 | from rsshub.spiders.earningsdate.globenewswire import ctx 124 | return render_template('main/atom.xml', **filter_content(ctx())) 125 | 126 | @bp.route('/earningsdate/businesswire') 127 | def earningsdate_businesswire(): 128 | from rsshub.spiders.earningsdate.businesswire import ctx 129 | return render_template('main/atom.xml', **filter_content(ctx())) 130 | 131 | @bp.route('/jiemian/newsflash/') 132 | def jiemian_newsflash(category=''): 133 | from rsshub.spiders.jiemian.newsflash import ctx 134 | return render_template('main/atom.xml', **filter_content(ctx(category))) 135 | 136 | @bp.route('/csrc/audit/') 137 | def csrc_audit(category=''): 138 | from rsshub.spiders.csrc.audit import ctx 139 | return render_template('main/atom.xml', **filter_content(ctx(category))) 140 | 141 | @bp.route('/caixin/scroll/') 142 | def caixin_scroll(category=''): 143 | from rsshub.spiders.caixin.scroll import ctx 144 | return render_template('main/atom.xml', **filter_content(ctx(category))) 145 | 146 | @bp.route('/eastmoney/report//') 147 | def eastmoney_report(category='', type=''): 148 | from rsshub.spiders.eastmoney.report import ctx 149 | return render_template('main/atom.xml', **filter_content(ctx(type,category))) 150 | 151 | @bp.route('/xuangubao//') 152 | def xuangubao_xuangubao(type='', category=''): 153 | from rsshub.spiders.xuangubao.xuangubao import ctx 154 | return render_template('main/atom.xml', **filter_content(ctx(type, category))) 155 | 156 | @bp.route('/cls/subject/') 157 | def cls_subject(category=''): 158 | from rsshub.spiders.cls.subject import ctx 159 | return render_template('main/atom.xml', **filter_content(ctx(category))) 160 | 161 | @bp.route('/cls/telegraph/') 162 | def cls_telegraph(): 163 | from rsshub.spiders.cls.telegraph import ctx 164 | return render_template('main/atom.xml', **filter_content(ctx())) 165 | 166 | @bp.route('/chaindd/column/') 167 | def chaindd_column(category=''): 168 | from rsshub.spiders.chaindd.column import ctx 169 | return render_template('main/atom.xml', **filter_content(ctx(category))) 170 | 171 | @bp.route('/techcrunch/tag/') 172 | def techcrunch_tag(category=''): 173 | from rsshub.spiders.techcrunch.tag import ctx 174 | return render_template('main/atom.xml', **filter_content(ctx(category))) 175 | 176 | @bp.route('/weiyangx/home') 177 | def weiyangx_home(): 178 | from rsshub.spiders.weiyangx.home import ctx 179 | return render_template('main/atom.xml', **filter_content(ctx())) 180 | 181 | @bp.route('/weiyangx/express/') 182 | def weiyangx_express(): 183 | from rsshub.spiders.weiyangx.express import ctx 184 | return render_template('main/atom.xml', **filter_content(ctx())) 185 | 186 | @bp.route('/weiyangx/tag/') 187 | def weiyangx_tag(category=''): 188 | from rsshub.spiders.weiyangx.tag import ctx 189 | return render_template('main/atom.xml', **filter_content(ctx(category))) 190 | 191 | @bp.route('/jintiankansha/column/') 192 | def jintiankansha_column(category=''): 193 | from rsshub.spiders.jintiankansha.column import ctx 194 | return render_template('main/atom.xml', **filter_content(ctx(category))) 195 | 196 | @bp.route('/interotc/cpgg/') 197 | def interotc_cpgg(category=''): 198 | from rsshub.spiders.interotc.cpgg import ctx 199 | return render_template('main/atom.xml', **filter_content(ctx(category))) 200 | 201 | @bp.route('/benzinga/ratings/') 202 | def benzinga_ratings(category=''): 203 | from rsshub.spiders.benzinga.ratings import ctx 204 | return render_template('main/atom.xml', **filter_content(ctx(category))) 205 | 206 | @bp.route('/chouti/section/') 207 | def chouti_section(category=''): 208 | from rsshub.spiders.chouti.section import ctx 209 | return render_template('main/atom.xml', **filter_content(ctx(category))) 210 | 211 | @bp.route('/chouti/search/') 212 | def chouti_search(category=''): 213 | from rsshub.spiders.chouti.search import ctx 214 | return render_template('main/atom.xml', **filter_content(ctx(category))) 215 | 216 | @bp.route('/chouti/user/') 217 | def chouti_user(category=''): 218 | from rsshub.spiders.chouti.user import ctx 219 | return render_template('main/atom.xml', **filter_content(ctx(category))) 220 | 221 | @bp.route('/zaobao/realtime/') 222 | def zaobao_realtime(category=''): 223 | from rsshub.spiders.zaobao.realtime import ctx 224 | return render_template('main/atom.xml', **filter_content(ctx(category))) 225 | 226 | @bp.route('/mp/tag//') 227 | def mp_tag(mp='', tag=''): 228 | from rsshub.spiders.mp.tag import ctx 229 | return render_template('main/atom.xml', **filter_content(ctx(mp,tag))) 230 | 231 | @bp.route('/mp/rtag//') 232 | def mp_rtag(c1='', tag=''): 233 | from rsshub.spiders.mp.rtag import ctx 234 | return render_template('main/atom.xml', **filter_content(ctx(c1, tag))) 235 | 236 | @bp.route('/producthunt/search//') 237 | def producthunt_search(keyword='', period=''): 238 | from rsshub.spiders.producthunt.search import ctx 239 | return render_template('main/atom.xml', **filter_content(ctx(keyword,period))) 240 | 241 | @bp.route('/pgyer/') 242 | def pgyer_app(category=''): 243 | from rsshub.spiders.pgyer.app import ctx 244 | return render_template('main/atom.xml', **filter_content(ctx(category))) 245 | 246 | @bp.route('/economist/worldbrief') 247 | def economist_wordlbrief(category=''): 248 | from rsshub.spiders.economist.worldbrief import ctx 249 | return render_template('main/atom.xml', **filter_content(ctx(category))) 250 | 251 | @bp.route('/nasdaq/symbol_change') 252 | @cache.cached(timeout=3600) 253 | def nasdaq_symbol_change(category=''): 254 | from rsshub.spiders.nasdaq.symbol_change import ctx 255 | return render_template('main/atom.xml', **filter_content(ctx(category))) 256 | 257 | @bp.route('/futu/live/') 258 | def futu_live(lang=''): 259 | from rsshub.spiders.futu.live import ctx 260 | return render_template('main/atom.xml', **filter_content(ctx(lang))) 261 | 262 | @bp.route('/baidu/suggest/') 263 | def baidu_suggest(category=''): 264 | from rsshub.spiders.baidu.suggest import ctx 265 | return render_template('main/atom.xml', **filter_content(ctx(category))) 266 | 267 | @bp.route('/mp/gh/') 268 | def mp_gh(gh=''): 269 | from rsshub.spiders.mp.gh import ctx 270 | return render_template('main/atom.xml', **filter_content(ctx(gh))) 271 | 272 | @bp.route('/mp/youwuqiong/') 273 | def mp_youwuqiong(author=''): 274 | from rsshub.spiders.mp.youwuqiong import ctx 275 | return render_template('main/atom.xml', **filter_content(ctx(author))) 276 | 277 | 278 | @bp.route('/xinhuanet/zuixinbobao') 279 | def xinhuanet_zuixinbobao(): 280 | from rsshub.spiders.xinhuanet.zuixinbobao import ctx 281 | return render_template('main/atom.xml', **filter_content(ctx())) 282 | 283 | 284 | @bp.route('/xinhuanet/shizhenglianbo') 285 | def xinhuanet_shizhenglianbo(): 286 | from rsshub.spiders.xinhuanet.shizhenglianbo import ctx 287 | return render_template('main/atom.xml', **filter_content(ctx())) 288 | 289 | 290 | @bp.route('/xinhuanet/yaodianjujiao') 291 | def xinhuanet_yaodianjujiao(): 292 | from rsshub.spiders.xinhuanet.yaodianjujiao import ctx 293 | return render_template('main/atom.xml', **filter_content(ctx())) 294 | 295 | 296 | @bp.route('/xinhuanet/world') 297 | def xinhuanet_world(): 298 | from rsshub.spiders.xinhuanet.world import ctx 299 | return render_template('main/atom.xml', **filter_content(ctx())) 300 | 301 | 302 | @bp.route('/yfchuhai/express/') 303 | def yfchuhai_express(): 304 | from rsshub.spiders.yfchuhai.express import ctx 305 | return render_template('main/atom.xml', **filter_content(ctx())) 306 | 307 | @bp.route('/bjnews/') 308 | def bjnews_channel(category=''): 309 | from rsshub.spiders.bjnews.channel import ctx 310 | return render_template('main/atom.xml', **filter_content(ctx(category))) 311 | 312 | @bp.route('/appstore/top//') 313 | def appstore_top(cc='', genreid=''): 314 | from rsshub.spiders.appstore.top import ctx 315 | return render_template('main/atom.xml', **filter_content(ctx(cc,genreid))) 316 | 317 | @bp.route('/netease/comment/') 318 | def netease_comment(category=''): 319 | from rsshub.spiders.netease.comment import ctx 320 | return render_template('main/atom.xml', **filter_content(ctx(category))) 321 | 322 | @bp.route('/aisixiang/search//') 323 | def aisixiang_search(category='', keywords=''): 324 | from rsshub.spiders.aisixiang.search import ctx 325 | return render_template('main/atom.xml', **filter_content(ctx(category, keywords))) 326 | 327 | @bp.route('/hnzcy/bidding/') 328 | @cache.cached(timeout=3600) 329 | def hnzcy_bidding(type=''): 330 | from rsshub.spiders.hnzcy.bidding import ctx 331 | return render_template('main/atom.xml', **filter_content(ctx(type))) 332 | 333 | @bp.route('/sysu/ifcen') 334 | @cache.cached(timeout=3600) 335 | def sysu_ifcen(category='', keywords=''): 336 | from rsshub.spiders.sysu.ifcen import ctx 337 | return render_template('main/atom.xml', **filter_content(ctx(category))) 338 | 339 | @bp.route('/nhk/newseasy') 340 | @cache.cached(timeout=3600) 341 | def nhk_newseasy(category='', keywords=''): 342 | from rsshub.spiders.nhk.newseasy import ctx 343 | return render_template('main/atom.xml', **filter_content(ctx(category))) 344 | 345 | @bp.route('/nhk/topic/') 346 | @cache.cached(timeout=3600) 347 | def nhk_topic(category='', keywords=''): 348 | from rsshub.spiders.nhk.topic import ctx 349 | return render_template('main/atom.xml', **filter_content(ctx(category))) 350 | 351 | @bp.route('/tadoku/books/') 352 | @cache.cached(timeout=3600) 353 | def tadoku_books(category=''): 354 | from rsshub.spiders.tadoku.books import ctx 355 | return render_template('main/atom.xml', **filter_content(ctx(category))) 356 | 357 | @bp.route('/filter/') 358 | def rss_filter(): 359 | from rsshub.spiders.rssfilter.filter import ctx 360 | feed_url = request.args.get("feed") 361 | return render_template('main/atom.xml', **filter_content(ctx(feed_url))) 362 | 363 | @bp.route('/zhihu/explore') 364 | def zhihu_explore(): 365 | from rsshub.spiders.zhihu.explore import ctx 366 | return render_template('main/atom.xml', **filter_content(ctx())) 367 | 368 | @bp.route('/zhihu/question/') 369 | def zhihu_question(qid): 370 | from rsshub.spiders.zhihu.article import ctx_question 371 | return render_template('main/atom.xml', **filter_content(ctx_question(qid))) 372 | -------------------------------------------------------------------------------- /rsshub/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | basedir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 6 | 7 | 8 | class BaseConfig: 9 | SITE_NAME = 'RSSHub' 10 | GITHUB_USERNAME = 'alphardex' 11 | EMAIL = '2582347430@qq.com' 12 | SECRET_KEY = os.environ.get('SECRET_KEY') or 'f43hrt53et53' 13 | DEBUG_TB_INTERCEPT_REDIRECTS = False 14 | 15 | 16 | class DevelopmentConfig(BaseConfig): 17 | pass 18 | 19 | 20 | class TestingConfig(BaseConfig): 21 | TESTING = True 22 | 23 | 24 | class ProductionConfig(BaseConfig): 25 | pass 26 | 27 | 28 | config = { 29 | 'development': DevelopmentConfig, 30 | 'testing': TestingConfig, 31 | 'production': ProductionConfig 32 | } 33 | -------------------------------------------------------------------------------- /rsshub/extensions.py: -------------------------------------------------------------------------------- 1 | from flask_bootstrap import Bootstrap 2 | from flask_debugtoolbar import DebugToolbarExtension 3 | from flask_moment import Moment 4 | from flask_caching import Cache 5 | 6 | 7 | bootstrap = Bootstrap() 8 | debugtoolbar = DebugToolbarExtension() 9 | moment = Moment() 10 | 11 | cache = Cache(config={ 12 | "DEBUG": True, # some Flask specific configs 13 | "CACHE_TYPE": "simple", # Flask-Caching related configs 14 | "CACHE_DEFAULT_TIMEOUT": 3600 # cache half hour 15 | }) -------------------------------------------------------------------------------- /rsshub/google_analytics.py: -------------------------------------------------------------------------------- 1 | ga_account = 'UA-79917414-1' -------------------------------------------------------------------------------- /rsshub/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hillerliao/RSSHub-python/a51a80606a4148224867f9f77e5411fe99ac4d5e/rsshub/spiders/__init__.py -------------------------------------------------------------------------------- /rsshub/spiders/aisixiang/search.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import quote, unquote 2 | from rsshub.utils import fetch, DEFAULT_HEADERS 3 | 4 | 5 | domain = 'https://www.aisixiang.com' 6 | 7 | 8 | def parse(post): 9 | item = {} 10 | item['description'] = item['title'] = post.css('a::text').getall()[-1] 11 | item['link'] = f"{domain}{post.css('a::attr(href)').getall()[-1]}" 12 | item['pubDate'] = post.css('span::text').extract_first() 13 | return item 14 | 15 | 16 | def ctx(category='', keywords=''): 17 | keywords = unquote(keywords,encoding='utf-8') 18 | keywords_gbk = quote(keywords, encoding='gbk') 19 | url = f"{domain}/data/search.php?keyWords={keywords_gbk}&searchfield={category}" 20 | tree = fetch(url, headers=DEFAULT_HEADERS) 21 | posts = tree.css('.search_list').css('li') 22 | return { 23 | 'title': f'{keywords} - {category}搜索 - 爱思想', 24 | 'link': url, 25 | 'description': f'{keywords} - {category}搜索 - 爱思想', 26 | 'author': 'hillerliao', 27 | 'items': list(map(parse, posts)) 28 | } 29 | -------------------------------------------------------------------------------- /rsshub/spiders/appstore/top.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | domain = 'https://itunes.apple.com' 5 | 6 | countries = {"CN": "143465-19", 7 | "US": "143441-1", 8 | "JP": "143462-9", 9 | "KR": "143466-13", 10 | "HK": "143463-18", 11 | "AU": "143460", 12 | "TW": "143470-18", 13 | "CA": "143455-6", 14 | "DK": "143458-2", 15 | "RU": "143469-16", 16 | "ID": "143476-2", 17 | "TR": "143480-2", 18 | "GR": "143448-2", 19 | "DE": "143443-4", 20 | "IT": "143450-7", 21 | "NO": "143457-2", 22 | "FR": "143442-3", 23 | "TH": "143475-2", 24 | "SE": "143456-17", 25 | "FI": "143447-2", 26 | "GB": "143444", 27 | "NL": "143452-10", 28 | "BR": "143503-15", 29 | "PT": "143453-24", 30 | "MX": "143468-28", 31 | "ES": "143454-8", 32 | "VN": "143471-2"} 33 | 34 | def gen_headers(cc=''): 35 | headers = { 36 | "Accept-Language": f"{cc}", 37 | "User-Agent": "AppStore/2.0 iOS/10.2 model/iPhone6,1 hwp/s5l8960x build/14C92 (6; dt:89)", 38 | 'Accept': '*/*' , 39 | 'X-Apple-Store-Front': f'{countries[cc.upper()]},29' , 40 | } 41 | return headers 42 | 43 | def parse(post): 44 | print(post) 45 | item = {} 46 | subtitle = post['name'] + '

' + post['subtitle'] if post.__contains__('subtitle') else post['name'] 47 | item['title'] = post['name'] 48 | item['description'] = subtitle + '

开发者: ' + '' + post['artistName'] + ' ' \ 49 | + '

Rating: ' + str( post['userRating']['value'] ) \ 50 | + ',数量:' + str( post['userRating']['ratingCount'] ) 51 | url_paths = post['shortUrl'].split('/') 52 | item['author'] = post['artistName'] 53 | del url_paths[-2] 54 | item['link'] = '/'.join(url_paths) 55 | return item 56 | 57 | def ctx(cc='', genreid=''): 58 | top_url = f"{domain}/WebObjects/MZStore.woa/wa/viewTop?cc={cc}&genreId={genreid}&l=en" 59 | res = requests.get(top_url, headers=gen_headers(cc)).json() 60 | posts = res['storePlatformData']['lockup']['results'].values() 61 | 62 | return { 63 | 'title': f'Top Apps in {cc} - App Store', 64 | 'link': top_url, 65 | 'description': f'Top Apps in {cc} - App Store', 66 | 'author': 'hillerliao', 67 | 'items': list(map(parse, posts)) 68 | } -------------------------------------------------------------------------------- /rsshub/spiders/baidu/suggest.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://baidu.com' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['q'] 12 | item['description'] = post['q'] 13 | item['link'] = f'{domain}/s?ie=UTF-8&wd=' + post['q'] 14 | item['pubDate'] = arrow.now().isoformat() 15 | item['author'] = '百度' 16 | return item 17 | 18 | 19 | def ctx(category=''): 20 | url = f'{domain}/sugrec?wd={category}&pre=1&p=3&ie=utf-8&json=1&prod=pc&from=pc_web&sugsid=37858,36557,37691,37908,37919,37758,37903,26350,37957,37881&req=2&csor=3&pwd=ruhe%20&cb=jQuery110209380107568499061_1671113820948&_=1671113820958' 21 | posts = requests.get(url).text.split('(')[-1].split(')')[0] 22 | posts = json.loads(posts)['g'] 23 | return { 24 | 'title': f'{category} - 搜索提示 - 百度', 25 | 'link': f'https://www.baidu.com/s?ie=UTF-8&wd={category}', 26 | 'description': f'百度搜索提示', 27 | 'author': 'hillerliao', 28 | 'items': list(map(parse, posts)) 29 | } -------------------------------------------------------------------------------- /rsshub/spiders/bbwc/realtime.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://api.bbwc.cn' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['title'] 12 | item['description'] = post['outline'] 13 | item['link'] = post['url'] 14 | item['pubDate'] = arrow.get(int(post['inputtime'])).isoformat() 15 | item['author'] = 'Bloomberg' 16 | return item 17 | 18 | 19 | def ctx(category=''): 20 | url = f'{domain}/web/home/articlelist/device/30/p/1' 21 | posts = requests.get(url) 22 | print(posts) 23 | posts = json.loads(posts.text)['data']['list'] 24 | return { 25 | 'title': f'即时新闻 - 商业周刊', 26 | 'link': f'{domain}/realtime/index.html', 27 | 'description': f'抓取彭博商业周刊即时新闻栏目的快讯', 28 | 'author': 'hillerliao', 29 | 'items': list(map(parse, posts)) 30 | } -------------------------------------------------------------------------------- /rsshub/spiders/benzinga/ratings.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import DEFAULT_HEADERS 2 | from rsshub.utils import fetch 3 | 4 | domain = 'https://www.benzinga.com' 5 | 6 | def ctx(category=''): 7 | 8 | stock = category 9 | 10 | def parse(post): 11 | item = {} 12 | item['description'] = item['title'] = stock.upper() + '的评级:' + ', '.join(post.css('td::text').extract()) 13 | item['link'] = url 14 | return item 15 | 16 | 17 | url = f'{domain}/stock/{category}/ratings' 18 | tree = fetch(url, headers=DEFAULT_HEADERS) 19 | posts = tree.css('tbody tr') 20 | items = list(map(parse, posts)) 21 | 22 | column_title = tree.css('title::text').extract_first() 23 | return { 24 | 'title': f'{column_title} - benzinga', 25 | 'description': f'{column_title} - benzinga', 26 | 'link': url, 27 | 'author': f'hillerliao', 28 | 'items': items 29 | } 30 | -------------------------------------------------------------------------------- /rsshub/spiders/bjnews/channel.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | 3 | domain = 'http://www.bjnews.com.cn' 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['description'] = item['title'] = post.css('a::text').extract_first() 9 | item['link'] = post.css('a::attr(href)').extract_first() 10 | return item 11 | 12 | 13 | def ctx(category=''): 14 | r_url = f"{domain}/{category}" 15 | tree = fetch(r_url) 16 | html = tree.css('body') 17 | posts = tree.css('.list-a').css('li') 18 | channel_title = html.css('a.cur::text').extract_first().strip() 19 | return { 20 | 'title': f'{channel_title} - 新京报', 21 | 'link': r_url, 22 | 'description': f'新京报「{channel_title}」频道新闻', 23 | 'author': 'hillerliao', 24 | 'items': list(map(parse, posts)) 25 | } 26 | -------------------------------------------------------------------------------- /rsshub/spiders/caixin/scroll.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | 3 | domain = 'http://www.caixin.com' 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['title'] = post.css('a::text').extract_first() 9 | item['description'] = post.css('p::text').extract_first() 10 | item['link'] = post.css('a::attr(href)').extract_first() 11 | item['pubDate'] = post.css('span::text').extract_first() 12 | return item 13 | 14 | 15 | def ctx(category=''): 16 | tree = fetch(f"{domain}/search/scroll/{category}.jsp") 17 | posts = tree.css('dl') 18 | channel_title = tree.css('b').css('b::text').extract_first() 19 | return { 20 | 'title': channel_title, 21 | 'link': f'{domain}/search/scroll/{category}.jsp', 22 | 'description': '财新网滚动新闻', 23 | 'author': 'hillerliao', 24 | 'items': list(map(parse, posts)) 25 | } -------------------------------------------------------------------------------- /rsshub/spiders/chaindd/column.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://www.chaindd.com' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post.css('a::text').extract_first() 10 | item['description'] = post.css('p::text').extract_first() 11 | item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}" 12 | item['author'] = post.css('a.name::text').extract_first() 13 | return item 14 | 15 | 16 | def ctx(category=''): 17 | DEFAULT_HEADERS.update({'Referer': f'https://www.chaindd.com/column/{category}'}) 18 | tree = fetch(f"{domain}/column/{category}") 19 | posts = tree.css('li .cont') 20 | return { 21 | 'title': f'链得得栏目{category}最新文章', 22 | 'link': f'{domain}/column/{category}', 23 | 'description': f'链得得栏目{category}最新文章', 24 | 'author': 'hillerliao', 25 | 'items': list(map(parse, posts)) 26 | } -------------------------------------------------------------------------------- /rsshub/spiders/chouti/search.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://dig.ichouti.cn' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = re.sub(r'<[^>]*>', '', post['title']).strip() 12 | chouti_link = domain + '/link/' + str(post['id']) 13 | item['description'] = post['title'] + '

' + f'抽屉链接' 14 | item['link'] = post['originalUrl'] 15 | item['pubDate'] = arrow.get(post['created_time']).isoformat() 16 | item['author'] = post['submitted_user']['nick'] 17 | return item 18 | 19 | 20 | def ctx(category=''): 21 | DEFAULT_HEADERS.update({'Referer': domain}) 22 | from urllib.parse import unquote 23 | category = unquote(category, 'utf-8') 24 | r_url = f'{domain}/search/show' 25 | post_data = {'words':category,'searchType':'2','linkType':'ALL', 'subjectId':'-1'} 26 | posts = requests.post(r_url, data=post_data, headers=DEFAULT_HEADERS).json()['data']['linksList'] 27 | return { 28 | 'title': f'{category} - 抽屉热榜', 29 | 'link': r_url, 30 | 'description': f'抽屉热榜 - {r_url}', 31 | 'author': 'hillerliao', 32 | 'items': list(map(parse, posts)) 33 | } 34 | -------------------------------------------------------------------------------- /rsshub/spiders/chouti/section.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://dig.ichouti.cn' 7 | 8 | def parse(post): 9 | item = {} 10 | item['title'] = re.sub(r'<[^>]*>', '', post['title']).strip() 11 | chouti_link = domain + '/link/' + str(post['id']) 12 | item['description'] = post['title'] + '

' + f'抽屉链接' 13 | item['link'] = post['originalUrl'] 14 | item['pubDate'] = arrow.get(post['created_time']).isoformat() 15 | item['author'] = post['submitted_user']['nick'] 16 | return item 17 | 18 | def ctx(category=''): 19 | DEFAULT_HEADERS.update({'Referer': domain}) 20 | post_data = {'sectionId':category} 21 | r_url = f'{domain}/section/links' 22 | posts = requests.post(r_url, data=post_data, headers=DEFAULT_HEADERS).json()['data'] 23 | return { 24 | 'title': f'{category} - 抽屉热榜', 25 | 'link': r_url, 26 | 'description': f'抽屉热榜 - {r_url}', 27 | 'author': 'hillerliao', 28 | 'items': list(map(parse, posts)) 29 | } -------------------------------------------------------------------------------- /rsshub/spiders/chouti/user.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://dig.ichouti.cn' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = re.sub(r'<[^>]*>', '', post['title']).strip() 12 | chouti_link = domain + '/link/' + str(post['id']) 13 | item['description'] = post['title'] + '

' + f'抽屉链接' 14 | item['link'] = post['originalUrl'] 15 | item['pubDate'] = arrow.get(post['created_time']).isoformat() 16 | item['author'] = post['submitted_user']['nick'] 17 | return item 18 | 19 | 20 | def ctx(category=''): 21 | DEFAULT_HEADERS.update({'Referer': domain}) 22 | r_url = f'{domain}/publish/links/ajax?userId={category}' 23 | posts = requests.get(r_url, headers=DEFAULT_HEADERS).json()['data'] 24 | user_name = posts[0]['submitted_user']['nick'] 25 | return { 26 | 'title': f'{user_name} - 个人主页 - 抽屉热榜', 27 | 'link': f'{domain}/publish/links/ctu_{category}', 28 | 'description': f'{user_name} - 个人主页 - 抽屉热榜', 29 | 'author': 'hillerliao', 30 | 'items': list(map(parse, posts)) 31 | } -------------------------------------------------------------------------------- /rsshub/spiders/chuansongme/articles.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | 3 | domain = 'https://chuansongme.com' 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['title'] = post.css('a.question_link::text').extract()[-1].strip() 9 | link = f"{domain}{post.css('a.question_link::attr(href)').extract_first()}" 10 | item['link'] = link 11 | return item 12 | 13 | 14 | def ctx(category=''): 15 | tree = fetch(f"{domain}/{category}") 16 | posts = tree.css('.feed_body .pagedlist_item') 17 | return { 18 | 'title': '传送门', 19 | 'link': domain, 20 | 'description': '传送门:微信公众号订阅', 21 | 'author': 'alphardex', 22 | 'items': list(map(parse, posts)) 23 | } -------------------------------------------------------------------------------- /rsshub/spiders/cls/subject.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post['ArticleTitle'] 10 | item['description'] = post['ArticleBrief'] 11 | articleid = post['ArticleId'] 12 | item['link'] = f'https://m.cls.cn/detail/{articleid}' 13 | item['author'] = post['ArticleAuthor'] 14 | item['pubDate'] = arrow.get(int(post['ArticleTime'])).isoformat() 15 | return item 16 | 17 | 18 | def ctx(category=''): 19 | url = f'https://i.cls.cn/articles/subject/v1/{category}?sign=ab07b305da92f72ea5e509ba6d1216ff&app=cailianpress&LastTime=&PageNum=20&os=android&sv=734' 20 | res = requests.get(url, headers=DEFAULT_HEADERS) 21 | res = json.loads(res.text) 22 | posts = res 23 | items = list(map(parse, posts)) 24 | return { 25 | 'title': f'{category} - 主题 - 财联社', 26 | 'link': f'https://www.cls.cn/subject/{category}', 27 | 'description': f'{category} - 主题 - 财联社', 28 | 'author': 'hillerliao', 29 | 'items': items 30 | } 31 | -------------------------------------------------------------------------------- /rsshub/spiders/cls/telegraph.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rsshub.utils import DEFAULT_HEADERS 3 | import arrow 4 | 5 | def parse(post): 6 | item = {} 7 | item['title'] = post['title'] if post['title'] != '' else post['content'] 8 | item['description'] = post['content'] 9 | item['link'] = post['shareurl'] 10 | item['pubDate'] = arrow.get(int(post['ctime'])).isoformat() 11 | return item 12 | 13 | 14 | def ctx(): 15 | url = f'https://www.cls.cn/nodeapi/telegraphList' 16 | res = requests.get(url, headers=DEFAULT_HEADERS) 17 | posts = res.json()['data']['roll_data'] 18 | items = list(map(parse, posts)) 19 | return { 20 | 'title': f'电报 - 财联社', 21 | 'link': f'https://www.cls.cn/telegraph', 22 | 'description': f'财联社电报', 23 | 'author': 'hillerliao', 24 | 'items': items 25 | } 26 | -------------------------------------------------------------------------------- /rsshub/spiders/cninfo/announcement.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'http://www.cninfo.com.cn' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post['secName'] + '(' + post['secCode'] + ')' + ': ' + post['announcementTitle'] 10 | item['description'] = item['title'] 11 | item['link'] = 'http://static.cninfo.com.cn/' + post['adjunctUrl'] 12 | item['pubDate'] = post['announcementTime'] 13 | return item 14 | 15 | 16 | def ctx(stock_id='', category=''): 17 | stock_id = '' if stock_id == 'all' else stock_id 18 | stock_name = '' 19 | stock_list = requests.get('http://www.cninfo.com.cn/new/data/szse_stock.json', headers=DEFAULT_HEADERS).json()['stockList'] 20 | for stock in stock_list: 21 | if stock['code'] == stock_id : 22 | stock_id = stock['orgId'] 23 | stock_name = stock['zwjc'] 24 | break 25 | 26 | import datetime 27 | nowtime = datetime.datetime.now() 28 | deltaday=datetime.timedelta(days=1) 29 | start_date = datetime.datetime.strftime(nowtime- 700 * deltaday, '%Y-%m-%d') 30 | end_date = datetime.datetime.strftime(nowtime + 2 * deltaday, '%Y-%m-%d') 31 | seDate = start_date + '~' + end_date 32 | 33 | searchkey = '' 34 | column = '' 35 | if '_' in category: 36 | searchkey = category.split('_')[-1] 37 | category = category.split('_')[0] 38 | category = '' if category == 'all' else f'category_{category}_szsh' 39 | # column = 'szse' 40 | 41 | 42 | DEFAULT_HEADERS.update({'Referer': domain}) 43 | post_data = {'pageNum':'1', 'pageSize': '30','column': column, 'tabName':'fulltext', 'plate': '', \ 44 | 'category': category, 'secid': stock_id, 'seDate': seDate, 'searchkey': searchkey } 45 | print(post_data) 46 | posts = requests.post(f'{domain}/new/hisAnnouncement/query', \ 47 | data=post_data, headers=DEFAULT_HEADERS).json()['announcements'] 48 | return { 49 | 'title': f'{stock_name}-{category}-公告-巨潮资讯', 50 | 'link': f'{domain}/new/commonUrl/pageOfSearch?url=disclosure/list/search&checkedCategory=category_{category}_szsh&searchkey={searchkey}', 51 | 'description': f'{stock_name}关于{category}的公告-巨潮资讯', 52 | 'author': 'hillerliao', 53 | 'items': list(map(parse, posts)) 54 | } -------------------------------------------------------------------------------- /rsshub/spiders/csrc/audit.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from parsel import Selector 3 | 4 | domain = 'https://neris.csrc.gov.cn' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post.css('li.templateTip').css('li::text').extract_first() 10 | audit_status = post.css('td[style="font-weight:100 ;color: black ;position: relative;left:20px"]').css('td::text').extract() 11 | audit_date = post.css('td[style="font-weight:100 ;color:black;position: relative; "]').css('td::text').extract() 12 | 13 | description = item['title'] + ';' 14 | for i in range(len(audit_status)): 15 | description += '<' + audit_date[i] + ' ' + audit_status[i] + '>\n' 16 | 17 | item['title'] += ',' + audit_status[-1] 18 | item['description'] = description 19 | item['pubDate'] = audit_date[-1] 20 | return item 21 | 22 | 23 | def ctx(category=''): 24 | q_url = f"{domain}/alappl/home1/onlinealog.do" 25 | items = [] 26 | for i in range(1,4): 27 | q_data = {"appMatrCde": category, "pageNo": str(i), "pageSize": "10"} 28 | res = requests.post(q_url,data=q_data, verify=False) 29 | tree = Selector(res.text) 30 | posts = tree.css('tr[height="50"]') 31 | items.extend(list(map(parse, posts))) 32 | return { 33 | 'title': f'申请事项进度查询 - {category} - 中国证监会', 34 | 'link': f'{domain}/alappl/home1/onlinealog?appMatrCde={category}', 35 | 'description': f'{category} 申请事项进度查询 - 中国证监会', 36 | 'author': 'hillerliao', 37 | 'items': items 38 | } 39 | -------------------------------------------------------------------------------- /rsshub/spiders/ctolib/topics.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | 3 | domain = 'https://www.ctolib.com' 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['title'] = post.css('a.title::text').extract_first() 9 | item['description'] = post.css('p.abstract::text').extract_first() 10 | item['link'] = f"{domain}{post.css('a.title::attr(href)').extract_first()}" 11 | return item 12 | 13 | 14 | def ctx(category=''): 15 | tree = fetch(f'{domain}/python/topics/{category}') 16 | posts = tree.css('ul.note-list li') 17 | return { 18 | 'title': 'CTOLib码库', 19 | 'link': domain, 20 | 'description': 'Python开发社区', 21 | 'author': 'alphardex', 22 | 'items': list(map(parse, posts)) 23 | } -------------------------------------------------------------------------------- /rsshub/spiders/dxzg/notice.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | 3 | domain = 'http://www.dxzq.net' 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['description'] = item['title'] = post.css('a::text').extract_first() 9 | link = f"{domain}{post.css('a::attr(href)').extract_first()}" 10 | item['link'] = link 11 | item['pubDate'] = post.css('span.time::text').extract_first() 12 | return item 13 | 14 | 15 | def ctx(category=''): 16 | tree = fetch(f"{domain}/main/zcgl/zxgg/index.shtml?catalogId=1,5,228") 17 | posts = tree.css('.news_list li') 18 | return { 19 | 'title': '东兴资管产品最新公告', 20 | 'link': f'{domain}/main/zcgl/zxgg/index.shtml?catalogId=1,5,228', 21 | 'description': '东兴资管产品最新公告', 22 | 'author': 'hillerliao', 23 | 'items': list(map(parse, posts)) 24 | } -------------------------------------------------------------------------------- /rsshub/spiders/earningsdate/businesswire.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch, filter_content 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'businesswire.com' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post.css('title::text').extract_first().strip() 10 | item['description'] = post.css('description::text').extract_first() 11 | item['link'] = post.extract().split(' ')[-2].split('>')[-1].strip() 12 | item['pubDate'] = post.css('pubDate::text').extract_first() 13 | return item 14 | 15 | 16 | def ctx(category=''): 17 | tree = fetch(f"https://feed.{domain}/rss/home/?rss=G1QFDERJXkJeGVtYWA==", 18 | headers=DEFAULT_HEADERS) 19 | posts = tree.css('item') 20 | items = list(map(parse, posts)) 21 | items = filter_content(items) 22 | return { 23 | 'title': 'Earnings Date - Businesswire', 24 | 'link': f'https://www.{domain}/portal/site/home/news/subject/?vnsId=31407', 25 | 'description': 'Earnings Date - Businesswire', 26 | 'author': 'hillerliao', 27 | 'items': items 28 | } 29 | -------------------------------------------------------------------------------- /rsshub/spiders/earningsdate/globenewswire.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch, filter_content 2 | 3 | domain = 'https://www.globenewswire.com' 4 | 5 | def parse(post): 6 | item = {} 7 | item['title'] = post.css('title::text').extract_first().strip() 8 | item['description'] = post.css('description::text').extract_first().strip(']]>') 9 | item['link'] = post.css('guid::text').extract_first() 10 | item['pubDate'] = post.css('pubDate::text').extract_first() 11 | return item 12 | 13 | def ctx(category=''): 14 | tree = fetch(f"{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results") 15 | posts = tree.css('item') 16 | items = list(map(parse, posts)) 17 | items = filter_content(items) 18 | return { 19 | 'title': 'Earnings Date - Globenewswire', 20 | 'link': f'{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results', 21 | 'description': 'Earnings Date - Globenewswire', 22 | 'author': 'hillerliao', 23 | 'items': items 24 | } -------------------------------------------------------------------------------- /rsshub/spiders/earningsdate/prnewswire.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch, filter_content, DEFAULT_HEADERS 2 | 3 | domain = 'https://www.prnewswire.com' 4 | 5 | def parse(post): 6 | item = {} 7 | item['title'] = post.css('h3::text').getall()[1] 8 | item['description'] = post.css('p::text').extract_first() 9 | item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}" 10 | item['pubDate'] = post.css('small::text').extract_first() 11 | return item 12 | 13 | def ctx(category=''): 14 | # DEFAULT_HEADERS.update({'upgrade-insecure-requests': 1}) 15 | url = f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=1&pagesize=100" 16 | tree = fetch(url, headers=DEFAULT_HEADERS) 17 | posts = tree.css('.card-list-hr .row') 18 | items = list(map(parse, posts)) 19 | items = filter_content(items) 20 | return { 21 | 'title': 'Earnings Date - Prnewswire', 22 | 'link': f'{domain}/news-releases/financial-services-latest-news/earnings-list/', 23 | 'description': 'Earnings Date - Prnewswire', 24 | 'author': 'hillerliao', 25 | 'items': items 26 | } 27 | -------------------------------------------------------------------------------- /rsshub/spiders/eastmoney/report.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from parsel import Selector 4 | from datetime import datetime, date 5 | from rsshub.utils import DEFAULT_HEADERS 6 | 7 | 8 | def parse(post): 9 | item = {} 10 | if post['stockName']!='': 11 | post['stockName'] = '[' + post['stockName'] + '] ' 12 | item['title'] = post['stockName'] + ' ' + post['title'] 13 | item['title'] = item['title'].strip() 14 | item['description'] = item['title'] 15 | item['link'] = f"http://data.eastmoney.com/report/zw_industry.jshtml?encodeUrl={post['encodeUrl']}" 16 | item['author'] = post['orgSName'] + ' ' + post['researcher'] 17 | item['pubDate'] = post['publishDate'] 18 | return item 19 | 20 | 21 | def ctx(type='', category=''): 22 | qTypes = {'industry': '1', 'stock': '0'} 23 | qType = qTypes[type] 24 | url = f'http://reportapi.eastmoney.com/report/list?\ 25 | cb=&industryCode={category}\ 26 | &pageSize=50&industry=*&rating=*&ratingChange=*\ 27 | &beginTime=&endTime=&pageNo=1&fields=&qType={qType}&orgCode=&rcode=&_=1583647953800' 28 | res = requests.get(url) 29 | posts = json.loads(res.text)['data'] 30 | items = list(map(parse, posts)) 31 | return { 32 | 'title': f'{category} {type}研报 - 东方财富网', 33 | 'link': f'http://data.eastmoney.com/report/{type}.jshtml?hyid={category}', 34 | 'description': f'{category} {type} 研报 - 东方财富网', 35 | 'author': 'hillerliao', 36 | 'items': items 37 | } 38 | -------------------------------------------------------------------------------- /rsshub/spiders/economist/worldbrief.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from bs4 import BeautifulSoup 4 | from rsshub.utils import DEFAULT_HEADERS 5 | from rsshub.utils import fetch 6 | 7 | domain = 'https://www.economist.com' 8 | 9 | def extract_text(node): 10 | if isinstance(node, dict): 11 | if 'data' in node: 12 | return node['data'] 13 | elif 'children' in node: 14 | return ''.join(extract_text(child) for child in node['children']) 15 | elif isinstance(node, list): 16 | return ''.join(extract_text(child) for child in node) 17 | return '' 18 | 19 | def parse_news(gobbet): 20 | """ 21 | 生成单条 news 的新闻内容,提取标题和正文。 22 | """ 23 | title = re.sub(r'<[^>]+>', '', gobbet.strip()) 24 | item = { 25 | 'title': title, 26 | 'description': gobbet, # 简单设置正文为描述 27 | 'link': f"{domain}/the-world-in-brief?from={title[:100]}" # 生成链接 28 | } 29 | return item 30 | 31 | def ctx(category=''): 32 | """ 33 | 解析 JSON 数据,提取所有brief news的内容。 34 | """ 35 | url = f"{domain}/the-world-in-brief" 36 | html = fetch(url, headers=DEFAULT_HEADERS).get() 37 | soup = BeautifulSoup(html, 'html.parser') 38 | script_tag = soup.find('script', id="__NEXT_DATA__", type="application/json") 39 | 40 | if not script_tag: 41 | raise ValueError("Could not find __NEXT_DATA__ script tag.") 42 | 43 | # Load JSON content 44 | data = json.loads(script_tag.string) 45 | 46 | news_list = data.get('props', {}).get('pageProps', {}).get('content', {}).get('gobbets', []) 47 | 48 | # 使用 parse_gobbet 解析每一条新闻 49 | items = [parse_news(news) for news in news_list] 50 | 51 | return { 52 | 'title': 'World Brief - Economist', 53 | 'link': url, 54 | 'description': 'The world in brief: Catch up quickly on the global stories that matter', 55 | 'author': 'hillerliao', 56 | 'items': items 57 | } 58 | -------------------------------------------------------------------------------- /rsshub/spiders/futu/live.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import requests 4 | from datetime import datetime 5 | from rsshub.utils import DEFAULT_HEADERS 6 | 7 | domain = 'https://news.futunn.com' 8 | 9 | def parse_news(news): 10 | title = news.get('content', '') if news.get('title', '')=='' else news.get('title', '') 11 | 12 | content = news.get('content', '') 13 | detail_url = news.get('detailUrl', '') 14 | time = datetime.utcfromtimestamp(int(news['time'])).strftime('%Y-%m-%dT%H:%M:%SZ') 15 | 16 | item = { 17 | 'title': title, 18 | 'description': content, 19 | 'link': detail_url, 20 | 'pubDate': time 21 | } 22 | 23 | return item 24 | 25 | def ctx(lang=''): 26 | """ 27 | 解析 JSON 数据,提取所有live news的内容。 28 | """ 29 | url = f"{domain}/news-site-api/main/get-flash-list?pageSize=50&lang={lang}" 30 | response = requests.get(url, headers=DEFAULT_HEADERS) 31 | data = response.json() 32 | 33 | # 检查数据是否有效 34 | if data['code'] != 0 or not data['data']['data']['news']: 35 | return Response("No data available", mimetype='text/plain') 36 | 37 | news_list = data.get('data', {}).get('data', {}).get('news', []) 38 | print(news_list) 39 | 40 | # 使用 parse_gobbet 解析每一条新闻 41 | items = [parse_news(news) for news in news_list] 42 | 43 | return { 44 | 'title': 'Futunn Live News', 45 | 'link': url, 46 | 'description': 'Futunn Live News', 47 | 'author': 'hillerliao', 48 | 'items': items 49 | } 50 | -------------------------------------------------------------------------------- /rsshub/spiders/hnzcy/bidding.py: -------------------------------------------------------------------------------- 1 | import json 2 | import datetime 3 | import arrow 4 | import requests 5 | from rsshub.utils import DEFAULT_HEADERS 6 | 7 | domain = 'https://hunan.zcygov.cn' 8 | 9 | headers = { 10 | 'Accept': 'application/json, text/plain, */*', 11 | 'Accept-Language': 'ja,en-US;q=0.9,en;q=0.8', 12 | 'Connection': 'keep-alive', 13 | 'Content-Type': 'application/json;charset=UTF-8', 14 | 'DNT': '1', 15 | 'Origin': 'https://hunan.zcygov.cn', 16 | 'Referer': 'https://hunan.zcygov.cn/bidding/newest?tradeModel=BIDDING&utm=luban.luban-PC-64.82-hunan-bidding-pc.1.7b6b17b01d7111ee97b97bde6f3bef69', 17 | 'Sec-Fetch-Dest': 'empty', 18 | 'Sec-Fetch-Mode': 'cors', 19 | 'Sec-Fetch-Site': 'same-origin', 20 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.67', 21 | 'X-Requested-With': 'XMLHttpRequest', 22 | 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Microsoft Edge";v="114"', 23 | 'sec-ch-ua-mobile': '?0', 24 | 'sec-ch-ua-platform': '"macOS"', 25 | } 26 | 27 | 28 | def parse(post): 29 | item = {} 30 | item['title'] = f'[{post["districtName"]}] {post["title"]} ' 31 | budget = "{:.2f}".format( 32 | round ( post["budget"] / 100.0 , 2) 33 | ) 34 | end_time = post['endTimestamp'] / 1000 35 | 36 | dt_object = datetime.datetime.fromtimestamp(end_time) 37 | end_time = dt_object.strftime("%Y-%m-%d %H:%M:%S") 38 | 39 | item['description'] = f'{item["title"]};采购单位:{post["orgName"]}; 金额:{budget}元;截止:{end_time}' 40 | item['link'] = f"{domain}/bidding/detail?requisitionId={post['requisitionId']}&type={post['type']}" 41 | item['author'] = post['orgName'] 42 | item['pubDate'] = arrow.get(post['pubTimestamp']).isoformat() 43 | return item 44 | 45 | 46 | def ctx(type=''): 47 | url = f'{domain}/front/api/sparta/announcement/list{type}' 48 | json_data = { 49 | 'backCategoryName': '', 50 | 'pageNo': 1, 51 | 'pageSize': 16, 52 | 'stateList': [], 53 | 'otherSearch': '', 54 | 'instanceCode': 'HNDZMC', 55 | 'sortField': 'GMT_MODIFIED', 56 | 'sortMethod': 'DESC', 57 | 'districtCodeList': [], 58 | 'administrativeDistrictCodeList': [], 59 | 'tradeModel': 'BIDDING', 60 | } 61 | 62 | response = requests.post( 63 | url, 64 | headers=headers, 65 | json=json_data, 66 | ) 67 | 68 | posts = json.loads(response.text)['result']['list'] 69 | items = list(map(parse, posts)) 70 | 71 | 72 | return { 73 | 'title': f'{type} - 湖南竞价', 74 | 'link': f'{domain}/bidding/', 75 | 'description': f'政采云”是政府采购云计算服务平台的简称。以互联⽹为基础,充分运⽤云计算和⼤数据技术,以政府采购电⼦化交易和管理为重点,涉及政府采购全流程、各领域、多用户,集政府采购、网上交易、⽹上监管和⽹上服务为一体的综合性云服务平台。', 76 | 'author': 'hillerliao', 77 | 'items': items 78 | } 79 | -------------------------------------------------------------------------------- /rsshub/spiders/infoq/profile.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from rsshub.utils import DEFAULT_HEADERS 4 | from rsshub.utils import fetch 5 | 6 | domain = 'https://www.infoq.cn' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['article_title'] 12 | item['description'] = f"{post['article_summary']}
" 13 | item['link'] = f"{domain}/article/{post['uuid']}" 14 | item['pubDate'] = post['publish_time'] 15 | return item 16 | 17 | 18 | def ctx(category=''): 19 | referer = f'{domain}/profile/{category}/publish' 20 | DEFAULT_HEADERS.update({'Referer': referer}) 21 | url = f'{domain}/public/v1/user/getListByAuthor' 22 | posts = requests.post(url, json={'size': 12, 'id': category, 'type': 0}, headers=DEFAULT_HEADERS) 23 | tree = fetch(referer,headers=DEFAULT_HEADERS) 24 | feed_title = tree.css('title::text').get() 25 | posts = json.loads(posts.text)['data'] 26 | return { 27 | 'title': f'{feed_title} - Profile - InfoQ', 28 | 'link': referer, 29 | 'description': 'InfoQ - 促进软件开发领域知识与创新的传播', 30 | 'author': 'hillerliao', 31 | 'items': list(map(parse, posts)) 32 | } -------------------------------------------------------------------------------- /rsshub/spiders/infoq/recommend.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://www.infoq.cn' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post['article_title'] 10 | item['description'] = f"{post['article_summary']}
" 11 | item['link'] = f"{domain}/article/{post['uuid']}" 12 | return item 13 | 14 | 15 | def ctx(): 16 | DEFAULT_HEADERS.update({'Referer': 'https://www.infoq.cn'}) # 必须设置Referer,不然会451错误 17 | import json 18 | posts = requests.post(f'{domain}/public/v1/my/recommond', json={'size': 20}, headers=DEFAULT_HEADERS) 19 | posts = json.loads(posts.text)['data'] 20 | return { 21 | 'title': 'infoq', 22 | 'link': domain, 23 | 'description': 'InfoQ - 促进软件开发领域知识与创新的传播', 24 | 'author': 'alphardex', 25 | 'items': list(map(parse, posts)) 26 | } -------------------------------------------------------------------------------- /rsshub/spiders/infoq/search.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import re 4 | from urllib.parse import unquote 5 | from rsshub.utils import DEFAULT_HEADERS 6 | from rsshub.utils import fetch 7 | 8 | domain = 'https://s.geekbang.org' 9 | 10 | 11 | def parse(post): 12 | item = {} 13 | item['title'] = re.sub(r'<[^>]*>', '', post['title']).strip() 14 | item['description'] = re.sub(r'<[^>]*>', '', post['simple_content']).strip() 15 | item['link'] = post['content_url'] 16 | item['author'] = post['author'] 17 | item['pubDate'] = post['release_time'] 18 | return item 19 | 20 | 21 | def ctx(category='', type=''): 22 | category1 = category.encode("utf-8").decode("latin-1") 23 | referer = f'{domain}/search/c=0/k={category1}/t={type}' 24 | DEFAULT_HEADERS.update({'Referer': referer}) 25 | url = f'{domain}/api/gksearch/search' 26 | category = unquote(category, 'utf-8') 27 | payload = {"q":category,"t": type,"s":20,"p":1} 28 | posts = requests.post(url, json=payload, headers=DEFAULT_HEADERS) 29 | posts = json.loads(posts.text)['data']['list'] 30 | return { 31 | 'title': f'{category} - 搜索 - InfoQ', 32 | 'link': f'{domain}/search/c=0/k={category}/t=0', 33 | 'description': f'{category} - 极客邦搜索 - InfoQ', 34 | 'author': 'hillerliao', 35 | 'items': list(map(parse, posts)) 36 | } -------------------------------------------------------------------------------- /rsshub/spiders/infoq/topic.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://www.infoq.cn' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post['article_title'] 10 | item['description'] = f"{post['article_summary']}
" 11 | item['link'] = f"{domain}/article/{post['uuid']}" 12 | item['pubDate'] = post['publish_time'] 13 | return item 14 | 15 | 16 | def ctx(category=''): 17 | referer = f'{domain}/topic/{category}' 18 | DEFAULT_HEADERS.update({'Referer': referer}) 19 | url = f'{domain}/public/v1/article/getList' 20 | import json 21 | posts = requests.post(url, json={'size': 20, 'id': category, 'type': 0}, headers=DEFAULT_HEADERS) 22 | 23 | posts = json.loads(posts.text)['data'] 24 | return { 25 | 'title': f'{category} - Topic - InfoQ', 26 | 'link': referer, 27 | 'description': 'InfoQ - 促进软件开发领域知识与创新的传播', 28 | 'author': 'hillerliao', 29 | 'items': list(map(parse, posts)) 30 | } -------------------------------------------------------------------------------- /rsshub/spiders/interotc/cpgg.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://www.interotc.com.cn' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | end_date = '' 10 | if '东兴证券' in post['TITLE']: 11 | end_date = post['CONTENT'].split('存续期到期日')[1].split('。')[0] 12 | item['title'] = post['TITLE'] + ' (' + post['CPDM'] + ', ' + end_date + ', ' + post['CPMC'] + ')' 13 | item['description'] = post['CONTENT'] 14 | item['link'] = f'{domain}/portal/newportal/cpggDetail.html?bdid=' + str(post['BDID']) 15 | item['pubDate'] = post['FBSJ'] 16 | return item 17 | 18 | 19 | def ctx(category=''): 20 | DEFAULT_HEADERS.update({'Host': 'www.interotc.com.cn'}) 21 | url = f'{domain}/zzjsInterface/interface/fixedIncome/lettersListNew.json' 22 | # req_params = {'pageSize': '10','startDate':'-1', 'keyword': category, 'pageIndex': '1'} 23 | # posts = requests.post(url, \ 24 | # data=req_params, headers=DEFAULT_HEADERS) 25 | req_params = f'?keyword={category}&pageSize=150' 26 | posts = requests.get(url+req_params) 27 | import json 28 | posts = json.loads(posts.text)['resultSet'] 29 | return { 30 | 'title': f'{category} - 产品公告 - 机构间市场', 31 | 'link': f'{domain}/portal/newportal/cpgg.html', 32 | 'description': f'{category}的产品公告', 33 | 'author': 'hillerliao', 34 | 'items': list(map(parse, posts)) 35 | } -------------------------------------------------------------------------------- /rsshub/spiders/jiemian/newsflash.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from parsel import Selector 4 | from datetime import datetime, date 5 | 6 | domain = 'https://jiemian.com' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post.css('a::text').extract_first() 12 | item['description'] = post.css('p::text').extract()[-1]\ 13 | .strip('】\n\t\t\t\t\t') 14 | item['link'] = post.css('a::attr(href)').extract_first() 15 | pubdate = post.xpath('//div[@class="item-news "]/\ 16 | preceding::div[@class="col-date"][last()-1]')\ 17 | .css('div::text').extract_first() 18 | cur_t = datetime.now().time().strftime("%H%M") 19 | pub_t = post.css('.item-date').css('div::text').extract_first() 20 | if pub_t.replace(':', '') < cur_t: 21 | pubdate = date.today().isoformat() 22 | item['pubDate'] = pubdate + ' ' + pub_t 23 | return item 24 | 25 | 26 | def ctx(category=''): 27 | res = requests.get(f"https://a.jiemian.com/index.php?\ 28 | m=lists&a=ajaxNews&page=1&cid={category}") 29 | res = res.text[1:-1] 30 | res = json.loads(res)['rst'] 31 | tree = Selector(text=res) 32 | posts = tree.css('.item-news') 33 | items = list(map(parse, posts)) 34 | return { 35 | 'title': f'{category} 快讯 - 界面新闻', 36 | 'link': f'{domain}//lists/{category}.html', 37 | 'description': f'{category} 快讯 - 界面新闻', 38 | 'author': 'hillerliao', 39 | 'items': items 40 | } 41 | -------------------------------------------------------------------------------- /rsshub/spiders/jintiankansha/column.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import DEFAULT_HEADERS 2 | from rsshub.utils import fetch 3 | 4 | domain = 'http://www.jintiankansha.me' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['description'] = item['title'] = post.css('a::text').extract_first() 10 | item['link'] = post.css('a::attr(href)').extract_first() 11 | return item 12 | 13 | 14 | def ctx(category=''): 15 | url = f'{domain}/column/{category}' 16 | DEFAULT_HEADERS.update({'Host': 'www.jintiankansha.me'}) 17 | tree = fetch(url, headers=DEFAULT_HEADERS) 18 | # posts = tree.css('.cell.item') 19 | posts = tree.css('.item_title') 20 | items = list(map(parse, posts)) 21 | 22 | column_title = tree.css('title::text').extract_first() 23 | return { 24 | 'title': f'{column_title}', 25 | 'description': f'{category}', 26 | 'link': url, 27 | 'author': f'hillerliao', 28 | 'items': items 29 | } 30 | -------------------------------------------------------------------------------- /rsshub/spiders/mp/gh.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://weixin.sogou.com' 5 | 6 | def parse(post): 7 | item = {} 8 | 9 | if dd_num > 1: 10 | item['description'] = item['title'] = post.css('a::text').get() 11 | item['link'] = domain + post.css('a::attr(href)').get() 12 | item['pubDate'] = post.css('script::text').get().split('\'')[-2] 13 | else: 14 | item['description'] = item['title'] = '近期没有新文章' 15 | item['link'] = url 16 | return item 17 | 18 | def ctx(gh=''): 19 | global url 20 | url = f"{domain}/weixin?type=1&s_from=input&query={gh}&ie=utf8&_sug_=n&_sug_type_=&w=01019900&sut=1554&sst0=1628603087755&lkt=0%2C0%2C0" 21 | tree = fetch(url=url, headers=DEFAULT_HEADERS) 22 | global dd_num 23 | dd_num = len( tree.css('dd') ) 24 | posts = [ tree.css('dd')[-1] ] 25 | mp_name = tree.css('p.tit a::text').get() 26 | mp_description = tree.css('dd::text')[0].get() 27 | return { 28 | 'title': f'{mp_name}-公众号', 29 | 'link': url, 30 | 'description': mp_description, 31 | 'author': 'hillerliao', 32 | 'items': list(map(parse, posts)) 33 | } -------------------------------------------------------------------------------- /rsshub/spiders/mp/rtag.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import pyjsparser 4 | import arrow 5 | 6 | domain = 'https://mp.weixin.qq.com' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['properties'][4]['value']['value'] 12 | item['description'] = post['properties'][5]['value']['value'] 13 | item['link'] = post['properties'][6]['value']['value'] 14 | item['pubDate'] = post['properties'][16]['value']['left']['value'] 15 | item['pubDate'] = arrow.get(int(item['pubDate'])).isoformat() 16 | test = item['author'] = post['properties'][1]['value']['properties'][0]['value']['value'] 17 | return item 18 | 19 | def ctx(c1='', tag=''): 20 | url = f"{domain}/mp/recommendtag?c1={c1}&tag={tag}&msg_type=1&sn=2fGf6B-xDlazPj5_t_KgEH0Gpkw" 21 | r = requests.get(url) 22 | soup = BeautifulSoup(r.text, 'html.parser') 23 | scripts = soup.findAll("script")[12].text 24 | scripts = scripts.split('mp_msgs: ')[-1].split('isSubscribed')[0][:-6] 25 | posts = pyjsparser.parse(scripts)['body'][0]['expression']['elements'] 26 | 27 | return { 28 | 'title': f'{tag} - 微信公众号推荐话题', 29 | 'link': url, 30 | 'description': f'{tag} - 微信公众号推荐话题', 31 | 'author': 'hillerliao', 32 | 'items': list(map(parse, posts)) 33 | } -------------------------------------------------------------------------------- /rsshub/spiders/mp/tag.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | 3 | domain = 'https://mp.weixin.qq.com' 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['description'] = item['title'] = post.css('span.album__item-title-wrp::text').extract_first() 9 | link = f"{post.css('li::attr(data-link)').extract_first()}" 10 | item['link'] = link 11 | item['pubDate'] = post.css('span.js_article_create_time::text').extract_first() 12 | return item 13 | 14 | 15 | def ctx(biz='', tag=''): 16 | url = f"{domain}/mp/appmsgalbum?__biz={biz}==&action=getalbum&album_id={tag}" 17 | tree = fetch(url) 18 | posts = tree.css('.js_album_list li') 19 | mp_name = tree.css('div.album__author-name::text').extract_first() 20 | tag_name = tree.css('div#js_tag_name::text').extract_first() 21 | return { 22 | 'title': f'{tag_name} - {mp_name}', 23 | 'link': url, 24 | 'description': f'{tag_name} - {mp_name}', 25 | 'author': 'hillerliao', 26 | 'items': list(map(parse, posts)) 27 | } -------------------------------------------------------------------------------- /rsshub/spiders/mp/youwuqiong.py: -------------------------------------------------------------------------------- 1 | from icecream import ic 2 | from rsshub.utils import fetch 3 | from rsshub.utils import DEFAULT_HEADERS 4 | 5 | 6 | domain = 'https://youwuqiong.com' 7 | 8 | def get_content(url): 9 | tree = fetch(url=url,headers=DEFAULT_HEADERS) 10 | content = tree.css('.single-content').get() 11 | return content 12 | 13 | def parse(post): 14 | item = {} 15 | item['description'] = post.css('p::text').get() 16 | item['title'] = post.css('a::text')[1].get() 17 | item['link'] = post.css('a::attr(href)')[1].get() 18 | item['pubDate'] = post.css('time::text').extract_first() 19 | # item['description'] = get_content(item['link']) 20 | # ic(item['description']) 21 | return item 22 | 23 | 24 | def ctx(author=''): 25 | url = f"{domain}/author/{author}" 26 | tree = fetch(url=url,headers=DEFAULT_HEADERS) 27 | html = tree.css('body') 28 | mp_name = html.css('h1::text').get().split(':')[-1] 29 | mp_description = html.css('.archive-description::text').get() 30 | posts = html.css('.entry-content-wrap') 31 | return { 32 | 'title': f'{mp_name} - 公众号', 33 | 'link': url, 34 | 'description': mp_description, 35 | 'author': 'hillerliao', 36 | 'items': list(map(parse, posts)) 37 | } -------------------------------------------------------------------------------- /rsshub/spiders/nasdaq/symbol_change.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from rsshub.utils import DEFAULT_HEADERS 4 | 5 | domain = 'https://www.nasdaq.com' 6 | 7 | 8 | def parse(post): 9 | item = {} 10 | item['title'] = post['effective'] + ',' + post['oldSymbol'] + ' -> ' + post['newSymbol'] 11 | item['description'] = "代码变更:" + item['title'] + '。公司:' + post['companyName'] 12 | item['link'] = domain + post['url'] + f'?mark={post["oldSymbol"]}2{post["newSymbol"]}' 13 | return item 14 | 15 | 16 | def ctx(category=''): 17 | url = 'https://api.nasdaq.com/api/quote/list-type-extended/symbolchangehistory' 18 | DEFAULT_HEADERS.update({ 19 | 'Referer': 'https://www.nasdaq.com/market-activity/stocks/symbol-change-history', 20 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' 21 | }) 22 | 23 | response = requests.get(url, headers=DEFAULT_HEADERS) 24 | response.raise_for_status() # 确保请求成功 25 | posts = json.loads(response.text)['data']['symbolChangeHistoryTable']['rows'] 26 | 27 | return { 28 | 'title': 'Stock Symbol Change History - Nasdaq', 29 | 'link': 'https://www.nasdaq.com/market-activity/stocks/symbol-change-history', 30 | 'description': 'View the history of stock symbol changes on Nasdaq. Stay informed on corporate actions, mergers, and rebrandings that result in symbol updates', 31 | 'author': 'hillerliao', 32 | 'items': list(map(parse, posts)), 33 | } 34 | -------------------------------------------------------------------------------- /rsshub/spiders/netease/comment.py: -------------------------------------------------------------------------------- 1 | from unicodedata import category 2 | import requests 3 | import json 4 | import arrow 5 | from rsshub.utils import DEFAULT_HEADERS 6 | 7 | domain = 'https://comment.api.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856' 8 | 9 | type = '' 10 | 11 | def parse(post): 12 | item = {} 13 | item['title'] = '【原文】' + post['thread']['title'] + ' → 【跟贴】' + post['comments'][0]['1']['content'] 14 | item['description'] = '【回帖】' + post['comments'][1]['1']['content'] if len(post['comments']) > 1 \ 15 | else '【回帖】' + post['comments'][0]['2']['content'] if '2' in post['comments'][0] \ 16 | else '' 17 | thread_link = post['thread']['url'] 18 | item['description'] = item['description'] + f' 原文链接' 19 | item['link'] = f"https://comment.tie.163.com/{post['thread']['docId']}.html" 20 | item['author'] = '' 21 | item['pubDate'] = arrow.now().isoformat() 22 | return item 23 | 24 | 25 | def ctx(category=''): 26 | type = category 27 | paths = {"heated":"/heatedList/allSite?ibc=newspc&page=1", 28 | "splendid":"/recommendList/single?ibc=newspc&offset=0&limit=30", 29 | "build":"/recommendList/build?ibc=newspc&offset=0&limit=15&showLevelThreshold=72"} 30 | url = domain + paths[category] 31 | res = requests.get(url, headers=DEFAULT_HEADERS) 32 | res = json.loads(res.text) 33 | posts = res 34 | items = list(map(parse, posts)) 35 | return { 36 | 'title': f'{category} - 网易跟贴', 37 | 'link': "https://comment.163.com/#/" + category, 38 | 'description': f'{category} - 网易跟贴', 39 | 'author': 'hillerliao', 40 | 'items': items 41 | } 42 | -------------------------------------------------------------------------------- /rsshub/spiders/nhk/newseasy.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from rsshub.utils import DEFAULT_HEADERS 4 | 5 | domain = 'https://www3.nhk.or.jp' 6 | 7 | 8 | def parse(post): 9 | item = {} 10 | item['title'] = post['title'] 11 | item['description'] = post['title_with_ruby'] + '

' + post['outline_with_ruby'] 12 | item['link'] = f"{domain}/news/easy/{post['news_id']}/{post['news_id']}.html" 13 | return item 14 | 15 | 16 | def ctx(category=''): 17 | url = f'{domain}/news/easy/top-list.json' 18 | posts = requests.get( 19 | url, 20 | headers=DEFAULT_HEADERS, 21 | ).text 22 | posts = json.loads(posts) 23 | return { 24 | 'title': 'News Web Easy - NHK', 25 | 'link': f'{domain}/news/easy/', 26 | 'description': 'NEWS WEB EASYは、小学生・中学生の皆さんや、日本に住んでいる外国人のみなさんに、わかりやすいことば でニュースを伝えるウェブサイトです。', 27 | 'author': 'hillerliao', 28 | 'items': list(map(parse, posts)), 29 | } 30 | -------------------------------------------------------------------------------- /rsshub/spiders/nhk/topic.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import arrow 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://www3.nhk.or.jp' 7 | 8 | 9 | def date_format(pubDate): 10 | date = arrow.get(pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z') 11 | iso = date.isoformat() 12 | return iso 13 | 14 | def parse(post): 15 | item = {} 16 | item['title'] = post['title'] 17 | item['description'] = post['title'] 18 | item['link'] = domain + post['link'] 19 | item['pubDate'] = date_format(post['pubDate']) 20 | item['author'] = 'NHK' 21 | return item 22 | 23 | 24 | def ctx(category=''): 25 | url = f'{domain}/news/json16/word/{category}_001.json?_=1705840617679' 26 | posts = requests.get(url) 27 | word = json.loads(posts.text)['channel']['word'] 28 | posts = json.loads(posts.text)['channel']['item'] 29 | return { 30 | 'title': f'{word} - NHK News', 31 | 'link': f'{domain}/news/word/{category}.html', 32 | 'description': f'{word}の最新ニュース・特集一覧', 33 | 'author': 'hillerliao', 34 | 'items': list(map(parse, posts)) 35 | } -------------------------------------------------------------------------------- /rsshub/spiders/pgyer/app.py: -------------------------------------------------------------------------------- 1 | import re 2 | from rsshub.utils import DEFAULT_HEADERS 3 | from rsshub.utils import fetch 4 | 5 | domain = 'https://www.pgyer.com' 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post.xpath('//meta[@property="og:description"]').attrib['content'] 10 | item['description'] = item['title'] 11 | if post.css('div.update-description').extract_first(): 12 | item['description'] = post.css('div.update-description').extract_first() 13 | item['description'] = re.sub(r'<[^>]*>', '', item['description'] )\ 14 | .split('备注信息:')[1].split('执行人')[0].strip() 15 | link = post.css('img.qrcode').attrib['src'].split('app/qrcode/') 16 | item['link'] = link[0] + link[1] 17 | return item 18 | 19 | def ctx(category=''): 20 | url = f"{domain}/{category}" 21 | tree = fetch(url,headers=DEFAULT_HEADERS) 22 | posts = tree.css('.container.content.pt-10') 23 | posts = tree.css('html') 24 | title = tree.xpath('//meta[@property="og:description"]').attrib['content'] 25 | app_name = tree.css('title::text').get() 26 | return { 27 | 'title': f'{title} - 蒲公英', 28 | 'link': url, 29 | 'description': f'{app_name} 安装包更新 - 蒲公英', 30 | 'author': 'hillerliao', 31 | 'items': list(map(parse, posts)) 32 | } 33 | 34 | -------------------------------------------------------------------------------- /rsshub/spiders/producthunt/search.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | 4 | from bs4 import BeautifulSoup 5 | import undetected_chromedriver as uc 6 | 7 | from rsshub.utils import DEFAULT_HEADERS 8 | 9 | domain = 'https://www.producthunt.com' 10 | 11 | 12 | def parse(post): 13 | item = {} 14 | item['title'] = post['name'] 15 | item['description'] = post['tagline'] 16 | item['link'] = post['url'] 17 | return item 18 | 19 | 20 | def ctx2(keyword='', period=''): 21 | DEFAULT_HEADERS.update({'Referer': domain}) 22 | r_url = f'{domain}' + f'/search?q={keyword}&postedAfter={period}:days' 23 | browser = uc.Chrome() 24 | browser.get(r_url) 25 | import time 26 | time.sleep(3) 27 | html = browser.page_source 28 | 29 | soup = BeautifulSoup(html, 'html.parser') 30 | script = soup.find('script', id='__NEXT_DATA__') 31 | data = json.loads(script.text)['props']['apolloState'] 32 | browser.quit() 33 | posts = [ v for k, v in data.items() if k.startswith('Product')] 34 | 35 | items = list(map(parse, posts)) 36 | 37 | return { 38 | 'title': f'{keyword} - Producthunt', 39 | 'link': r_url, 40 | 'description': f'{keyword} - Producthunt', 41 | 'author': 'hillerliao', 42 | 'items': items 43 | } -------------------------------------------------------------------------------- /rsshub/spiders/readhub/topic.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from rsshub.utils import DEFAULT_HEADERS 4 | 5 | domain = 'https://readhub.cn' 6 | api_domain = 'https://api.readhub.cn' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['title'] 12 | item['description'] = post['summary'] 13 | item['link'] = f"{domain}/topic/{post['uid']}" 14 | item['author'] = post['siteNameDisplay'] 15 | item['pubDate'] = post['publishDate'] 16 | return item 17 | 18 | 19 | def ctx(type='', uid=''): 20 | referer = f'{domain}/entity_topics?type=22&uid={uid}&tb=0' 21 | DEFAULT_HEADERS.update({'Referer': referer}) 22 | type_name = 'entity' if type == '10' else 'tag' 23 | url = f'{api_domain}/topic/list_pro?{type_name}_id={uid}&size=10' 24 | posts = requests.get(url, headers=DEFAULT_HEADERS) 25 | topic_name = json.loads(posts.text)['data']['self'][f'{type_name}List'][0]['name'] 26 | 27 | posts = json.loads(posts.text)['data']['items'] 28 | return { 29 | 'title': f'{topic_name} - 主题 - Readhub', 30 | 'link': referer, 31 | 'description': f'"{topic_name}"动态', 32 | 'author': 'hillerliao', 33 | 'items': list(map(parse, posts)) 34 | } -------------------------------------------------------------------------------- /rsshub/spiders/rssfilter/filter.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import feedparser 3 | import arrow 4 | 5 | from rsshub.utils import DEFAULT_HEADERS 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post.title 10 | item['description'] = post.summary if hasattr(post,'summary') else post.title 11 | item['pubDate'] = post.published if post.has_key('published') else arrow.now().isoformat() 12 | item['link'] = post.link if hasattr(post,'link') else '' 13 | item['author'] = post.author if post.has_key('author') else '' 14 | return item 15 | 16 | def ctx(feed_url=''): 17 | res = requests.get(feed_url,headers=DEFAULT_HEADERS,verify=False) 18 | feed = feedparser.parse(res.text) 19 | title = feed.feed.title 20 | description = feed.feed.subtitle if feed.feed.has_key('subtitle') \ 21 | else feed.feed.title 22 | author = feed.feed.author if feed.feed.has_key('author') \ 23 | else feed.feed.generator if feed.feed.has_key('generator') \ 24 | else title 25 | posts = feed.entries 26 | 27 | return { 28 | 'title': title, 29 | 'link': feed_url, 30 | 'description': description, 31 | 'author': author, 32 | 'items': list(map(parse, posts)) 33 | } -------------------------------------------------------------------------------- /rsshub/spiders/sysu/ifcen.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch_by_puppeteer 2 | import asyncio 3 | 4 | domain = 'https://ifcen.sysu.edu.cn/' 5 | 6 | 7 | def parse(selector): 8 | 9 | items = list() 10 | 11 | # 公告通知 12 | xpath = '//div[@id="news-2"]/ul//a' 13 | announces = selector.xpath(xpath + '/text()').getall() 14 | urls = selector.xpath(xpath + '/@href').getall() 15 | urls = [domain + i for i in urls] 16 | announces = ['公告通知 | ' + i for i in announces] 17 | for i in range(len(announces)): 18 | item = dict() 19 | item['title'] = announces[i] 20 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 21 | item['link'] = urls[i] 22 | items.append(item) 23 | 24 | # 学院新闻 25 | xpath = '//*[@id="news-1"]/ul/li/a' 26 | news = selector.xpath(xpath + '/text()').getall() 27 | urls = selector.xpath(xpath + '/@href').getall() 28 | urls = [domain + i for i in urls] 29 | for i in range(len(news)): 30 | item = dict() 31 | item['title'] = news[i] 32 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 33 | item['link'] = urls[i] 34 | items.append(item) 35 | 36 | # 人才工作 37 | xpath = '//*[@id="notice-1"]/div//a' 38 | works = selector.xpath(xpath + '/text()').getall() 39 | urls = selector.xpath(xpath + '/@href').getall() 40 | urls = [domain + i for i in urls] 41 | works = ['人才工作 | ' + i for i in works] 42 | for i in range(len(works)): 43 | item = dict() 44 | item['title'] = works[i] 45 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 46 | item['link'] = urls[i] 47 | items.append(item) 48 | 49 | # 本科生教育 50 | xpath = '//*[@id="notice-2"]/div//a' 51 | ues = selector.xpath(xpath + '/text()').getall() 52 | urls = selector.xpath(xpath + '/@href').getall() 53 | urls = [domain + i for i in urls] 54 | ues = ['本科生教育 | ' + i for i in ues] 55 | for i in range(len(ues)): 56 | item = dict() 57 | item['title'] = ues[i] 58 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 59 | item['link'] = urls[i] 60 | items.append(item) 61 | 62 | # 研究生教育 63 | xpath = '//*[@id="notice-3"]/div//a' 64 | pgs = selector.xpath(xpath + '/text()').getall() 65 | urls = selector.xpath(xpath + '/@href').getall() 66 | urls = [domain + i for i in urls] 67 | pgs = ['研究生教育 | ' + i for i in pgs] 68 | for i in range(len(pgs)): 69 | item = dict() 70 | item['title'] = pgs[i] 71 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 72 | item['link'] = urls[i] 73 | items.append(item) 74 | 75 | # 科研信息 76 | xpath = '//*[@id="notice-4"]/div//a' 77 | research = selector.xpath(xpath + '/text()').getall() 78 | urls = selector.xpath(xpath + '/@href').getall() 79 | urls = [domain + i for i in urls] 80 | research = ['科研信息 | ' + i for i in research] 81 | for i in range(len(research)): 82 | item = dict() 83 | item['title'] = research[i] 84 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 85 | item['link'] = urls[i] 86 | items.append(item) 87 | 88 | # 学工信息 89 | xpath = '//*[@id="notice-5"]/div//a' 90 | students = selector.xpath(xpath + '/text()').getall() 91 | urls = selector.xpath(xpath + '/@href').getall() 92 | urls = [domain + i for i in urls] 93 | students = ['学工信息 | ' + i for i in students] 94 | for i in range(len(students)): 95 | item = dict() 96 | item['title'] = students[i] 97 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 98 | item['link'] = urls[i] 99 | items.append(item) 100 | 101 | # 党建通知 102 | xpath = '//*[@id="notice-6"]/div//a' 103 | party = selector.xpath(xpath + '/text()').getall() 104 | urls = selector.xpath(xpath + '/@href').getall() 105 | urls = [domain + i for i in urls] 106 | party = ['党建通知 | ' + i for i in party] 107 | for i in range(len(party)): 108 | item = dict() 109 | item['title'] = party[i] 110 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 111 | item['link'] = urls[i] 112 | items.append(item) 113 | 114 | # 工会工作 115 | xpath = '//*[@id="notice-7"]/div//a' 116 | union = selector.xpath(xpath + '/text()').getall() 117 | urls = selector.xpath(xpath + '/@href').getall() 118 | urls = [domain + i for i in urls] 119 | union = ['工会工作 | ' + i for i in union] 120 | for i in range(len(union)): 121 | item = dict() 122 | item['title'] = union[i] 123 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 124 | item['link'] = urls[i] 125 | items.append(item) 126 | 127 | xpath = '//*[@id="event-1"]/li//a' 128 | report = selector.xpath(xpath + '/text()').getall() 129 | author = selector.xpath('//*[@id="event-1"]/li//span[@class="content"]/text()').getall() 130 | urls = selector.xpath(xpath + '/@href').getall() 131 | urls = [domain + i for i in urls] 132 | for i in range(len(report)) : 133 | report[i] = report[i] + author[i] 134 | report = ['学术报告 | ' + i for i in report] 135 | for i in range(len(report)): 136 | item = dict() 137 | item['title'] = report[i] 138 | item['description'] = "网站严格反爬,请进入网站查看具体内容" 139 | item['link'] = urls[i] 140 | items.append(item) 141 | 142 | return items 143 | 144 | def ctx(category=''): 145 | tree = asyncio.run(fetch_by_puppeteer(domain)) 146 | return { 147 | 'title': '中山大学中法核官网信息', 148 | 'link': domain, 149 | 'description': '中山大学中法核官网通知公告', 150 | 'author': 'echo', 151 | 'items': parse(tree) 152 | } -------------------------------------------------------------------------------- /rsshub/spiders/tadoku/books.py: -------------------------------------------------------------------------------- 1 | import re 2 | from rsshub.utils import DEFAULT_HEADERS 3 | from rsshub.utils import fetch 4 | 5 | domain = 'https://tadoku.org' 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post.css('.bl-title').css('a::text').extract_first() 10 | item['description'] = post.css('.bl-thumb').extract_first() + post.css('.bl-title').extract_first() 11 | item['link'] = post.css('.bl-title').css('a::attr(href)').extract_first() 12 | return item 13 | 14 | def ctx(category=''): 15 | category = category if category != '0' else '' 16 | url = f"{domain}/japanese/book-search?level={category}" 17 | tree = fetch(url,headers=DEFAULT_HEADERS) 18 | posts = tree.css('.col-6.col-sm-4.col-md-3.col-lg-2.bl-wrap-small') 19 | return { 20 | 'title': f'{category} Books - TADOKU.ORG', 21 | 'link': url, 22 | 'description': f'Book searching result - TADOKU.ORG', 23 | 'author': 'hillerliao', 24 | 'items': list(map(parse, posts)) 25 | } 26 | 27 | -------------------------------------------------------------------------------- /rsshub/spiders/techcrunch/tag.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from rsshub.utils import DEFAULT_HEADERS 4 | 5 | domain = 'https://techcrunch.com' 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post['title']['rendered'] 10 | item['description'] = post['content']['rendered'] 11 | item['link'] = post['link'] 12 | item['pubDate'] = post['date_gmt'] 13 | return item 14 | 15 | def ctx(category=''): 16 | url = f'{domain}/wp-json/tc/v1/magazine?tags={category}' 17 | res = requests.get(url, headers=DEFAULT_HEADERS) 18 | res = json.loads(res.text) 19 | posts = res 20 | items = list(map(parse, posts)) 21 | return { 22 | 'title': f'{category} - tag - Techcrunch', 23 | 'description': f'{category} - tag - Techcrunch', 24 | 'link': f'f{domain}/tag/fintech/', 25 | 'author': f'hillerliao', 26 | 'items': items 27 | } -------------------------------------------------------------------------------- /rsshub/spiders/weiyangx/express.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from parsel import Selector 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://www.weiyangx.com' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['post_title'] 12 | item['description'] = post['post_content'] 13 | post_id = post['post_id'] 14 | item['link'] = f'{domain}/{post_id}.html' 15 | item['pubDate'] = post['post_date'][0] + '-' + \ 16 | post['post_date'][1] + '-' + \ 17 | post['post_date'][2] 18 | return item 19 | 20 | 21 | def ctx(): 22 | url = f'https://www.weiyangx.com/category/express' 23 | res = requests.get(url, headers=DEFAULT_HEADERS) 24 | res = Selector(res.text) 25 | posts = res.css('script::text')[-4].extract().split('=')[-1] 26 | posts = json.loads(posts) 27 | items = list(map(parse, posts)) 28 | return { 29 | 'title': f'快讯 - 未央网', 30 | 'description': f'快讯 - 未央网', 31 | 'link': f'{domain}/category/express', 32 | 'author': f'hillerliao', 33 | 'items': items 34 | } 35 | -------------------------------------------------------------------------------- /rsshub/spiders/weiyangx/home.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from parsel import Selector 4 | from rsshub.utils import DEFAULT_HEADERS 5 | 6 | domain = 'https://www.weiyangx.com' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['title'] 12 | item['description'] = post['content'] 13 | post_id = post['id'] 14 | item['link'] = f'{domain}/{post_id}.html' 15 | return item 16 | 17 | 18 | def ctx(): 19 | url = f'https://www.weiyangx.com/' 20 | res = requests.get(url, headers=DEFAULT_HEADERS) 21 | res = Selector(res.text) 22 | posts = res.css('script::text')[-5].extract().split('=')[-1] 23 | posts = json.loads(posts) 24 | items = list(map(parse, posts)) 25 | return { 26 | 'title': f'首页 - 未央网', 27 | 'description': f'首页推荐栏目 - 未央网', 28 | 'link': f'{domain}', 29 | 'author': f'hillerliao', 30 | 'items': items 31 | } 32 | -------------------------------------------------------------------------------- /rsshub/spiders/weiyangx/tag.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import DEFAULT_HEADERS 2 | import requests 3 | import json 4 | from parsel import Selector 5 | 6 | domain = 'https://www.weiyangx.com' 7 | 8 | 9 | def parse(post): 10 | item = {} 11 | item['title'] = post['post_title'] 12 | item['description'] = post['post_content'] 13 | post_id = post['post_id'] 14 | item['link'] = f'{domain}/{post_id}.html' 15 | return item 16 | 17 | 18 | def ctx(category=''): 19 | url = f'https://www.weiyangx.com/tag/{category}' 20 | res = requests.get(url, headers=DEFAULT_HEADERS) 21 | res = Selector(res.text) 22 | posts = res.css('script::text')[-4].extract().split('=')[-1] 23 | posts = json.loads(posts) 24 | # posts = tree.css('script::text')[-5].extract().split('=')[-1] 25 | items = list(map(parse, posts)) 26 | return { 27 | 'title': f'{category} - 文章 - 未央网', 28 | 'description': f'文章 - 未央网', 29 | 'link': f'{domain}/tag/{category}', 30 | 'author': f'hillerliao', 31 | 'items': items 32 | } 33 | -------------------------------------------------------------------------------- /rsshub/spiders/word/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | 10 | [requires] 11 | python_version = "2.7" 12 | -------------------------------------------------------------------------------- /rsshub/spiders/word/ieltswords_同义词.txt: -------------------------------------------------------------------------------- 1 | Amazing → Incredible, Fantastic, Fabulous, Astonishing, Extraordinary 2 | Answer → Respond 3 | Awful → Terrible, Abominable, Dreadful 4 | Bad → Evil, Spoiled, Imperfect, Infamous, Dismal 5 | Beautiful → Gorgeous, Ravishing, Dazzling, Exquisite, Stunning 6 | Begin → Initiate, Commence, Inaugurate 7 | Big → Huge, Enormous, Gigantic, Humongous, Substantial, Mammoth 8 | Break → Rupture, Fracture, Shatter 9 | Calm → Serene, Peace, Tranquil 10 | Come → Approach, Arrive 11 | Cool → Chilly, Frosty, Icy 12 | Cut → Chop, Slash, Slit 13 | Dangerous → Hazardous, Risky, Precarious 14 | Decide → Determine, Settle 15 | Definite → Certain, Positive, Obvious 16 | Delicious → Savoury, Titbit, Delectable 17 | Describe → Portray, Characterise 18 | Destroy → Demolish, Slay, Ruin, Raze 19 | Difference → Disagreement, Inequity, Dissimilarity 20 | Dull → Boring, Uninteresting, Monotonous, Humdrum, Dreary 21 | End → Terminate, Conclude, Cessation 22 | Explain → Elaborate, Interpret 23 | Fall → Drop, Descend, Topple 24 | Famous → Well-known, Renowned, Eminent, Illustrious 25 | Fast → Quick, Rapid, Hasty, Snappy, Swift 26 | Fat → Stout, Corpulent, Chubby, Bulky 27 | Funny → Amusing, Humorous, Droll, Hilarious 28 | Get → Acquire, Obtain, Secure, Procure, Gather 29 | Good → Excellent, Fine, Wonderful, Superior, Gracious, Superb, Splendid, Genuine, Sterling, Top-notch, 30 | Great → Worthy, Distinguished, Grand, Considerable, Mighty 31 | Happy → Pleased, Delighted, Elated, Joyful, Ecstatic, Jubilant, Jaunty 32 | Hate → Despise, Loathe, Abhor, Abominate 33 | Have → Possess, Own, Acquire, 34 | Help → Aid, Assist, Support, Encourage, Relieve 35 | Hide → Conceal, Cover, Mask, Veil 36 | Idea → Thought, Concept, Notion 37 | Important → Necessary, Vital, Critical, Indispensable, Valuable, Essential, Famous, Notable 38 | Interesting → Fascinating, Engaging, Spirited, Intriguing, Gripping, Enthralling, Captivating 39 | Little → Tiny, Diminutive, Exiguous, Dinky, Cramped 40 | Look → Gaze, Glance, Peek, Glimpse, Stare, Leer 41 | Love → Like, Admire, Fancy, Care for, Adore 42 | Make → Create, Originate, Invent, Construct, Manufacture, Produce, Compose 43 | Move → Plod, Creep, Crawl, Drag, Toddle, shuffle, Trot, Lumber, Meander 44 | Neat → Orderly, Tidy, Trim, Natty, Smart, Elegant 45 | New → Unique, Modern, Current, Recent 46 | Old → Feeble, Ancient, Aged, Veteran, Mature, Primitive, Stale 47 | Place → Draw, Map, Diagram, Procedure, Method, Blueprint 48 | Show → Display, Exhibit, Indicate, Reveal, Demonstrate 49 | Tell → Disclose, Reveal, Expose, Narrate, Inform, Divulge 50 | Use → Employ, Utilise, Exhaust, Spend 51 | Wrong → Incorrect, Inaccurate, Mistaken, Erroneous, Improper, Unsuitable 52 | -------------------------------------------------------------------------------- /rsshub/spiders/word/word.py: -------------------------------------------------------------------------------- 1 | import re 2 | import csv 3 | import random 4 | import requests 5 | import linecache 6 | from os import path 7 | import requests 8 | from rsshub.utils import DEFAULT_HEADERS 9 | 10 | 11 | file_path = path.dirname(path.realpath(__file__)) 12 | 13 | def get_csv_line(url): 14 | response = requests.get(url) 15 | lines = response.text.splitlines() 16 | reader = csv.reader(lines) 17 | data = list(reader) 18 | data = data[1:] 19 | random_line = random.choice(data) 20 | return random_line 21 | 22 | def remove_html_tags(text): 23 | """Remove html tags from a string""" 24 | clean = re.compile('<.*?>') 25 | return re.sub(clean, '', text) 26 | 27 | def ctx(category=''): 28 | word = '' 29 | if category == 'ja': 30 | url = 'https://raw.githubusercontent.com/henrylovemiller/img/main/words.csv' 31 | res = get_csv_line(url) 32 | word = f"{res[1]} 〔{res[2]} {res[4]}〕 {res[3]} " 33 | elif category == 'jlpt3': 34 | url = 'https://raw.githubusercontent.com/henrylovemiller/img/main/hongbaoshu_N3.csv' 35 | res = get_csv_line(url) 36 | word = f"{res[0]}〔{res[1]} {res[2]}〕 ➡{res[3]} ➡{res[4]} ➡ {res[5]} ➡ {res[6]} " 37 | word = remove_html_tags(word) 38 | else: 39 | file = path.join(file_path,'toeflwords.txt') 40 | with open(file, encoding='utf-8') as inf: 41 | f = inf.readlines() 42 | count = len(f) 43 | wordnum = random.randrange(0, count, 1) 44 | word = linecache.getline(file, wordnum) 45 | return {"word": word} -------------------------------------------------------------------------------- /rsshub/spiders/xinhuanet/shizhenglianbo.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | 5 | from rsshub.utils import DEFAULT_HEADERS 6 | 7 | domain = 'http://www.news.cn' 8 | 9 | 10 | def parse(post): 11 | item = {} 12 | item['title'] = post['Title'] 13 | item['description'] = post['Abstract'] 14 | item['link'] = post['LinkUrl'] 15 | return item 16 | 17 | 18 | def ctx(): 19 | url = 'http://da.wa.news.cn/nodeart/page' 20 | posts = requests.get( 21 | url, 22 | params={'nid': '113351', 'pgnum': '1', 'cnt': '20'}, 23 | headers=DEFAULT_HEADERS, 24 | ).text 25 | posts = json.loads(posts)['data']['list'] 26 | return { 27 | 'title': '新华网 - 时政联播', 28 | 'link': url, 29 | 'description': '新华网 - 时政联播', 30 | 'author': 'flyingicedragon', 31 | 'items': list(map(parse, posts)), 32 | } 33 | -------------------------------------------------------------------------------- /rsshub/spiders/xinhuanet/utils.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import DEFAULT_HEADERS, fetch 2 | 3 | 4 | def parse_html(post): 5 | item = {} 6 | item['title'] = post.xpath('text()').extract_first() 7 | item['link'] = post.xpath('@href').extract_first() 8 | print(item['link']) 9 | item['description'] = ( 10 | fetch(item['link'], headers=DEFAULT_HEADERS) 11 | .xpath('//div[@id=\'detail\']') 12 | .get() 13 | ) 14 | return item 15 | -------------------------------------------------------------------------------- /rsshub/spiders/xinhuanet/world.py: -------------------------------------------------------------------------------- 1 | from rsshub.spiders.xinhuanet.utils import parse_html as parse 2 | from rsshub.utils import DEFAULT_HEADERS, fetch 3 | 4 | domain = 'http://www.news.cn/world/index.html' 5 | 6 | 7 | def ctx(): 8 | url = f'{domain}' 9 | tree = fetch(url, headers=DEFAULT_HEADERS) 10 | posts = tree.xpath('//div[@id=\'recommendDepth\']//a') 11 | return { 12 | 'title': '新华网 - 国际要闻', 13 | 'link': url, 14 | 'description': '新华网 - 国际要闻', 15 | 'author': 'flyingicedragon', 16 | 'items': list(map(parse, posts)), 17 | } 18 | -------------------------------------------------------------------------------- /rsshub/spiders/xinhuanet/yaodianjujiao.py: -------------------------------------------------------------------------------- 1 | from rsshub.spiders.xinhuanet.utils import parse_html as parse 2 | from rsshub.utils import DEFAULT_HEADERS, fetch 3 | 4 | domain = 'http://www.news.cn' 5 | 6 | 7 | def ctx(): 8 | url = f'{domain}' 9 | tree = fetch(url, headers=DEFAULT_HEADERS) 10 | posts = tree.xpath('//div[@id=\'depth\']//li/a') 11 | return { 12 | 'title': '新华网 - 要点聚焦', 13 | 'link': url, 14 | 'description': '新华网 - 要点聚焦', 15 | 'author': 'flyingicedragon', 16 | 'items': list(map(parse, posts)), 17 | } 18 | -------------------------------------------------------------------------------- /rsshub/spiders/xinhuanet/zuixinbobao.py: -------------------------------------------------------------------------------- 1 | from rsshub.spiders.xinhuanet.utils import parse_html as parse 2 | from rsshub.utils import DEFAULT_HEADERS, fetch 3 | 4 | domain = 'http://www.news.cn' 5 | 6 | 7 | def ctx(): 8 | url = f'{domain}' 9 | tree = fetch(url, headers=DEFAULT_HEADERS) 10 | posts = tree.xpath('//div[@id=\'latest\']//li/a') 11 | return { 12 | 'title': '新华网 - 最新播报', 13 | 'link': url, 14 | 'description': '新华网 - 最新播报', 15 | 'author': 'flyingicedragon', 16 | 'items': list(map(parse, posts)), 17 | } 18 | -------------------------------------------------------------------------------- /rsshub/spiders/xuangubao/xuangubao.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from rsshub.utils import DEFAULT_HEADERS 4 | 5 | 6 | def parse(post): 7 | item = {} 8 | item['title'] = post['Title'] 9 | item['description'] = post['Summary'] if post['Summary'] != '' \ 10 | else post['Title'] 11 | item['link'] = post['OriginalUrl'] if post['OriginalUrl'] != '' else \ 12 | post['Url'] if post['Url'] != '' else post['ShareUrl2'] 13 | item['author'] = post['Source'] + post['DisplayAuthor'] 14 | item['pubDate'] = post['CreatedAt'] 15 | return item 16 | 17 | 18 | def ctx(type='', category=''): 19 | api_subpath = 'bkjMsgs' if type == 'theme' else 'subj' 20 | url = f'https://api.xuangubao.cn/api/pc/{api_subpath}/{category}?limit=20' 21 | print(url) 22 | res = requests.get(url, headers=DEFAULT_HEADERS) 23 | res = json.loads(res.text) 24 | posts = res['Messages'] 25 | theme_name = res['BkjName'] if type == 'theme' else res['Subject']['Title'] 26 | items = list(map(parse, posts)) 27 | sub_path = '/bkj' if type == 'theme' else '' 28 | return { 29 | 'title': f'{theme_name} - 主题 - 选股宝', 30 | 'link': f'https://xuangubao.cn/subject{sub_path}/{category}', 31 | 'description': f'{theme_name} 板块/主题动态 - 选股宝', 32 | 'author': 'hillerliao', 33 | 'items': items 34 | } 35 | -------------------------------------------------------------------------------- /rsshub/spiders/yfchuhai/express.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://www.yfchuhai.com' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['title'] = post['title'] 10 | item['description'] = post['content'] 11 | item['link'] = f"https://www.yfchuhai.com/news/{post['id']}.html" 12 | #item['pubDate'] = post['createTime'] 13 | item['author'] = post['source'] 14 | return item 15 | 16 | 17 | def ctx(category=''): 18 | DEFAULT_HEADERS.update({'Referer': 'https://www.yfchuhai.com/news/'}) 19 | r_url = f'{domain}/api/News/getList' 20 | print(r_url) 21 | posts = requests.get(r_url, headers=DEFAULT_HEADERS).json()['data']['list'] 22 | user_name = posts[0] 23 | return { 24 | 'title': '快讯 - 扬帆出海', 25 | 'link': 'https://www.yfchuhai.com/news/', 26 | 'description': '快讯 - 扬帆出海', 27 | 'author': 'hillerliao', 28 | 'items': list(map(parse, posts)) 29 | } -------------------------------------------------------------------------------- /rsshub/spiders/zaobao/realtime.py: -------------------------------------------------------------------------------- 1 | from rsshub.utils import fetch 2 | from rsshub.utils import DEFAULT_HEADERS 3 | 4 | domain = 'https://www.zaobao.com' 5 | 6 | 7 | def parse(post): 8 | item = {} 9 | item['description'] = item['title'] = post.css('div.f18.m-eps::text').extract_first() 10 | item['link'] = domain + post.css('a::attr(href)').extract_first() 11 | return item 12 | 13 | 14 | def ctx(category=''): 15 | url = f"{domain}/realtime/{category}" 16 | tree = fetch(url,headers=DEFAULT_HEADERS) 17 | posts = tree.css('.col-lg-4.col-12.list-block.no-gutters') 18 | # print(posts) 19 | return { 20 | 'title': f'{category} - 早报网即时新闻', 21 | 'link': url, 22 | 'description': f'{category} - 早报网即时新闻', 23 | 'author': 'hillerliao', 24 | 'items': list(map(parse, posts)) 25 | } -------------------------------------------------------------------------------- /rsshub/spiders/zhihu/article.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | 4 | from dataclasses import dataclass, field, asdict 5 | from datetime import datetime 6 | 7 | import requests 8 | from rsshub.utils import fetch 9 | 10 | 11 | def get_value(d): 12 | return list(d.values())[0] 13 | 14 | 15 | @dataclass 16 | class Feed: 17 | link: str 18 | title: str = '' 19 | author: str = '未知作者' 20 | description: str = '' 21 | items: list = field(default_factory=list) 22 | 23 | 24 | @dataclass 25 | class AtomEntry: 26 | link: str 27 | title: str = '' 28 | author: str = '未知作者' 29 | pubDate: datetime = datetime.now() 30 | updated_time: datetime = datetime.now() 31 | 32 | description: str = '' 33 | content: str = '' 34 | 35 | 36 | class ZhihuAnswer(AtomEntry): 37 | def get(self): 38 | tree = fetch(self.link) 39 | self.title = tree.css('h1::text').get() 40 | self.content = zhihu_figure_transfer(tree.css('.RichText').get()) 41 | self.description = self.content 42 | 43 | # author 44 | 45 | self.author = json.loads(tree.xpath('//div[@class="ContentItem AnswerItem"]/@data-zop').get())['authorName'] 46 | 47 | meta: dict = get_value(json.loads(tree.css("#js-initialData::text").get()) 48 | ['initialState']['entities']['questions']) 49 | 50 | self.pubDate = datetime.fromtimestamp(meta['created']) 51 | self.updated_time = datetime.fromtimestamp(meta['updatedTime']) 52 | 53 | 54 | class ZhihuZhuanlanArticle(AtomEntry): 55 | def get(self): 56 | tree = fetch(self.link) 57 | self.title = tree.css('h1::text').get() 58 | author = tree.xpath('//meta[@itemProp="name"]/@content').get() 59 | if author: 60 | self.author = author 61 | self.content = zhihu_figure_transfer(tree.css('article').css('.RichText').get()) 62 | self.description = self.content 63 | 64 | # 65 | data = json.loads(tree.css("#js-initialData::text").get()) 66 | metadata = list(data['initialState']['entities']['articles'].values())[0] 67 | self.pubDate = datetime.fromtimestamp(metadata['created']) 68 | self.updated_time = datetime.fromtimestamp(metadata['updated']) 69 | 70 | 71 | class ZhihuQuestion(Feed): 72 | 73 | def get_description(self): 74 | tree = fetch(self.link) 75 | self.title = tree.css('title::text').get() 76 | self.description = tree.xpath('//meta[@name="description"]/text()').get() 77 | 78 | data = json.loads(tree.css("#js-initialData::text").get()) 79 | for answer_id in list(data['initialState']['question']['answers'].values())[0]['ids']: 80 | assert answer_id['targetType'] == 'answer' 81 | item = ZhihuAnswer(f'{self.link}/answer/{answer_id["target"]}') 82 | item.get() 83 | self.items.append(item) 84 | 85 | self.next = list(data['initialState']['question']['answers'].values())[0]['next'] 86 | 87 | def get_all(self): 88 | if 'next' not in self.__dict__: 89 | self.get_description() 90 | 91 | while True: 92 | data = json.loads(requests.get(self.next).text) 93 | 94 | for d in data['data']: 95 | target = d['target'] 96 | author = target['author']['name'] 97 | content = zhihu_figure_transfer(target['content']) 98 | 99 | self.items.append(ZhihuAnswer( 100 | title=f'{author}的回答', 101 | author=author, 102 | link=f'{self.link}/answer/{target["id"]}', 103 | pubDate=datetime.fromtimestamp(target['created_time']), 104 | updated_time=datetime.fromtimestamp(target['updated_time']), 105 | description=zhihu_figure_transfer(content) 106 | )) 107 | 108 | if data['paging']['is_end']: 109 | del self.next 110 | break 111 | 112 | self.next = data['paging']['next'] 113 | 114 | 115 | def zhihu_figure_transfer(content): 116 | pattern = r'(.*?)(.*?)' 117 | return re.sub(pattern, lambda match: match.group(2), content) 118 | 119 | 120 | def ctx_question(qid): 121 | url = f'https://www.zhihu.com/question/{qid}' 122 | question = ZhihuQuestion(url) 123 | question.get_all() 124 | return asdict(question) -------------------------------------------------------------------------------- /rsshub/spiders/zhihu/collection.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | from .article import ZhihuAnswer, ZhihuZhuanlanArticle 5 | 6 | 7 | 8 | def get_metadata(collection_id): 9 | response = requests.get(f'https://api.zhihu.com/collections/{collection_id}') 10 | response.raise_for_status() 11 | data = json.loads(response.text)['collection'] 12 | 13 | metadata = dict() 14 | metadata['link'] = data['url'] 15 | metadata['title'] = data['title'] 16 | # metadata['created_time'] = data['created_time'] 17 | # metadata['updated_time'] = data['updated_time'] 18 | return metadata 19 | 20 | def ctx(collection_id): 21 | 22 | # meta 23 | metadata = get_metadata(collection_id) 24 | 25 | # content 26 | 27 | response = requests.get(f'https://www.zhihu.com/api/v4/collections/{collection_id}/items?limit=20&offset=0') 28 | response.raise_for_status() 29 | data = json.loads(response.text) 30 | items = [] 31 | 32 | for d in data['data']: 33 | if d['content']['type'] == 'answer': 34 | item = ZhihuAnswer(d['content']['url']) 35 | elif d['content']['type'] == 'article': 36 | item = ZhihuZhuanlanArticle(d['content']['url']) 37 | else: 38 | assert False 39 | item.get() 40 | items.append(item) 41 | 42 | metadata['items'] = items 43 | 44 | return metadata 45 | -------------------------------------------------------------------------------- /rsshub/spiders/zhihu/explore.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | 3 | from .article import * 4 | 5 | 6 | def ctx(): 7 | r_url = 'https://www.zhihu.com/explore' 8 | tree = fetch(r_url) 9 | items = {} 10 | channel = {} 11 | 12 | hot_question = tree.css('.css-1nd7dqm') 13 | newest_topic = tree.css('.ExploreSpecialCard-contentTitle') 14 | discussion = tree.css('.ExploreRoundtableCard-questionTitle') 15 | collection_card = tree.css('.ExploreCollectionCard-contentTitle') 16 | 17 | for post in chain(hot_question, collection_card, discussion): #, newest_topic): 18 | title = post.css('a::text').extract_first() 19 | link: str = post.css('a::attr(href)').extract_first() 20 | 21 | if link: 22 | if not (link.startswith('https://www.zhihu.com') 23 | or link.startswith('https://zhuanlan.zhihu.com')): 24 | link = f'https://www.zhihu.com{link}' 25 | 26 | if link.startswith('https://www.zhihu.com/question/'): 27 | item = ZhihuQuestion(link, title=title) 28 | channel[link] = item 29 | elif link.startswith('https://zhuanlan.zhihu.com/p'): 30 | item = ZhihuZhuanlanArticle(link) 31 | item.get() 32 | items[link] = item 33 | elif link.startswith('https://www.zhihu.com/answer/'): 34 | item = ZhihuAnswer(link) 35 | item.get() 36 | items[link] = item 37 | else: 38 | items[link] = { 39 | 'title': title, 40 | 'link': link, 41 | 'description': title 42 | } 43 | 44 | for c in channel.values(): 45 | c.get_description() 46 | for i in c.items: 47 | items[i.link] = i 48 | 49 | return { 50 | 'title': f'发现 - 知乎', 51 | 'link': r_url, 52 | 'items': list(items.values()) 53 | } 54 | -------------------------------------------------------------------------------- /rsshub/spiders/zhihu/roundtable.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | from .article import ZhihuQuestion 5 | 6 | def ctx(name): 7 | url = f'https://www.zhihu.com/api/v4/roundtables/{name}/hot-questions?include=data[*].question.relationship' 8 | response = requests.get(url) 9 | response.raise_for_status() 10 | 11 | data = json.loads(response.text) 12 | items = [] 13 | 14 | for d in data['data']: 15 | item = ZhihuQuestion(f'https://www.zhihu.com/question/{d["question"]["id"]}') 16 | item.get_description() 17 | 18 | items.append(item) 19 | 20 | return { 21 | 'title': 'roundtable', 22 | 'items': items 23 | } 24 | -------------------------------------------------------------------------------- /rsshub/static/css/style.css: -------------------------------------------------------------------------------- 1 | /* global */ 2 | 3 | nav { 4 | margin-bottom: 30px; 5 | } 6 | 7 | .jumbotron { 8 | margin-top: 20px; 9 | padding-top: 38px; 10 | padding-bottom: 38px; 11 | } 12 | 13 | .tip { 14 | /* from github.com */ 15 | position: relative; 16 | padding: 40px; 17 | text-align: center; 18 | background-color: #fafbfc; 19 | border: 1px solid #e1e4e8; 20 | border-radius: 3px; 21 | box-shadow: inset 0 0 10px rgba(27, 31, 35, 0.05); 22 | } 23 | 24 | .hide { 25 | display: none; 26 | } 27 | 28 | .inline { 29 | display: inline; 30 | } 31 | 32 | .page-header { 33 | padding-top: 20px; 34 | padding-bottom: 20px; 35 | } 36 | 37 | .page-footer { 38 | padding-top: 40px; 39 | } 40 | 41 | 42 | /* footer */ 43 | 44 | footer { 45 | margin: 30px 0; 46 | padding: 20px 0; 47 | border-top: 1px solid #e5e5e5; 48 | } 49 | -------------------------------------------------------------------------------- /rsshub/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hillerliao/RSSHub-python/a51a80606a4148224867f9f77e5411fe99ac4d5e/rsshub/static/favicon.ico -------------------------------------------------------------------------------- /rsshub/templates/errors/400.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | {% block title %}400 错误{% endblock %} 4 | 5 | {% block content %} 6 |
7 |

400 Bad Request

8 |
9 | {% endblock %} -------------------------------------------------------------------------------- /rsshub/templates/errors/404.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | {% block title %}404 错误{% endblock %} 4 | 5 | {% block content %} 6 |
7 |

404 Not Found

8 |
9 | {% endblock %} -------------------------------------------------------------------------------- /rsshub/templates/errors/500.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | {% block title %}500 错误{% endblock %} 4 | 5 | {% block content %} 6 |
7 |

服务器出错

8 |
9 | {% endblock %} -------------------------------------------------------------------------------- /rsshub/templates/layout.html: -------------------------------------------------------------------------------- 1 | {% from 'bootstrap/nav.html' import render_nav_item %} 2 | 3 | 4 | 5 | 6 | {% block head %} 7 | 8 | 9 | {% block title %}{% endblock title %} 10 | 11 | {% block styles %} {{ bootstrap.load_css() }} 12 | 13 | {% endblock styles %} {% endblock head %} 14 | 15 | {{ analytics }} 16 | 29 | 30 | 46 | 47 | 48 | 49 | 50 | 51 | 53 | 54 | {% block nav %} 55 | 68 | {% endblock nav %} 69 |
70 | {% with messages = get_flashed_messages(with_categories=true) %} {% if messages %} {% for category, message in messages %} 71 | 74 | {% endfor %} {% endif %} {% endwith %} {% block content %}{% endblock %} {% block footer %} 75 | 86 | {% endblock footer %} 87 |
88 | {% block scripts %} {{ bootstrap.load_js() }} {{ moment.include_moment() }} {{ moment.locale('zh-cn') }} {% endblock %} 89 | 90 | 91 | -------------------------------------------------------------------------------- /rsshub/templates/main/atom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | {{config['SITE_NAME']}} 4 | {{config['EMAIL']}} 5 | zh-cn 6 | {{link}} 7 | <![CDATA[{{title|safe}}]]> 8 | 9 | 10 | 11 | 12 | 13 | {% for item in items %} 14 | 15 | {{item.link}} 16 | <![CDATA[{{item.title|safe}}]]> 17 | 18 | {{item.pubDate|default(now)}} 19 | {{item.pubDate|default(now)}} 20 | 21 | 22 | 23 | 24 | {% endfor %} 25 | -------------------------------------------------------------------------------- /rsshub/templates/main/feeds.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} {% block title %}All Feeds{% endblock title %} {% block content %} 2 | 3 | 4 |
5 |
6 |

RSS Filter参数

7 |
RSS 过滤参数by hillerliao
8 |

举例:https://pyrsshub.vercel.app/filter?feed=https://sspai.com/feed&include_title=征文|派早报

9 |

参数:include_title 匹配标题,支持多关键词,用 | 分隔。

10 |

参数:include_description 匹配摘要

11 |

参数:exclude_title 排除标题

12 |

参数:exclude_description 排除摘要

13 |

参数:limit 限制条数

14 | 15 |
16 |
17 |
18 | 19 | 20 | 21 |
22 |
23 |

RSS 代理

24 |
RSS 代理 by hillerliao
25 |

举例:https://pyrsshub.vercel.app/filter?feed=https://sspai.com/feed

26 |

路由:/filter?feed=:feed

27 |

参数:feed [rss 地址]

28 |
29 |
30 |
31 | 32 | 33 | 34 |
35 |
36 |

The world in brief - The Economist

37 |
World Brief by hillerliao
38 |

举例:https://pyrsshub.vercel.app/economist/worldbrief

39 |

路由:/economist/worldbrief

40 |
41 |
42 |
43 | 44 | 45 | 46 |
47 |
48 |

Symbol Change History - Nasdaq

49 |
World Brief by hillerliao
50 |

举例:https://pyrsshub.vercel.app/nasdaq/symbol_change

51 |

路由:/nasdaq/symbol_change

52 |
53 |
54 |
55 | 56 | 57 | 58 |
59 |
60 |

NHK

61 |
NHK - Web News Easy by hillerliao
62 |

举例:https://pyrsshub.vercel.app/nhk/newseasy

63 |

路由:/nhk/newseasy

64 | 65 |
NHK - topic news by hillerliao
66 |

举例:https://pyrsshub.vercel.app/nhk/topic/0001595

67 |

路由:/nhk/topic/:category

68 |

参数:category, topic id

69 | 70 |
71 |
72 |
73 | 74 | 75 | 76 |
77 |
78 |

搜索提示 - 百度

79 |
搜索提示 - 百度by hillerliao
80 |

举例:https://pyrsshub.vercel.app/baidu/suggest/weishenme

81 |

路由:/baidu/suggest/:category

82 |

参数:category,关键词

83 |
84 |
85 |
86 | 87 | 88 | 89 | 90 |
91 |
92 |

湖南竞价

93 |
湖南竞价by hillerliao
94 |

举例:https://pyrsshub.vercel.app/hnzcy/bidding/Newest

95 |

路由:/hnzcy/bidding/:type

96 |

参数:type,类型,必填, Newest | Result,注意首字母大写

97 |
98 |
99 |
100 | 101 | 102 | 103 |
104 |
105 |

Realtime - 彭博商业周刊

106 |
Realtime by hillerliao
107 |

举例:https://pyrsshub.vercel.app/bbwc/realtime/2

108 |

路由:/bbwc/realtime/:category

109 |

参数:category,分类,1 - 中文、2 - 英文

110 |
111 |
112 |
113 | 114 | 115 | 116 |
117 |
118 |

Top App - App Store

119 |
Top App by hillerliao
120 |

举例:https://pyrsshub.vercel.app/appstore/top/us/36

121 |

路由:/appstore/top/:countrycode/:genreid

122 |

参数:countrycode,国家/地区代码, 如 us、cn、ru、br;genreid,类别,如 36, 6000,6014,6026,更多详见 https://t.ly/vYJI

123 |
124 |
125 |
126 | 127 | 128 |
129 |
130 |

传送门-失效

131 |
文章 by alphardex
132 |

举例:https://pyrsshub.vercel.app/chuansongme/articles

133 |

路由:/chuansongme/articles/:category

134 |

参数:category [默认为“最新”]

135 | 136 | 137 | 138 | {% for th in ['精选','区块链','汽车','创意科技','媒体达人','电影音乐','娱乐休闲','生活旅行','学习工具','历史读书','金融理财','美食菜谱'] %} 139 | 140 | {% endfor %} 141 | 142 | 143 | 144 | 145 | {% for td in ['select', 'blockchain', 'auto', 'ideatech', 'newsmedia', 'moviemusic', 'fun', 'lifejourney', 'utility', 'hisbook', 'finance', 'food']%} 146 | 147 | {% endfor %} 148 | 149 | 150 |
{{th}}
{{td}}
151 |
152 |
153 |
154 | 155 |
156 |
157 |

CTOLib-失效

158 |
话题 by alphardex
159 |

举例:https://pyrsshub.vercel.app/ctolib/topics

160 |

路由:/ctolib/topics/:category

161 |

参数:category [默认为“默认排序”]

162 | 163 | 164 | 165 | {% for th in ['最新发布', '优质主题'] %} 166 | 167 | {% endfor %} 168 | 169 | 170 | 171 | 172 | {% for td in ['last', 'popular']%} 173 | 174 | {% endfor %} 175 | 176 | 177 |
{{th}}
{{td}}
178 |
179 |
180 |
181 |
182 |
183 |

InfoQ

184 |
推荐内容 by alphardex hillerliao
185 |

举例:https://pyrsshub.vercel.app/infoq/recommend

186 |

路由:/infoq/recommend

187 | 188 |
主题内容 by hillerliao
189 |

举例:https://pyrsshub.vercel.app/infoq/topic/159

190 |

路由:/infoq/topic/:category

191 | 192 |
作者内容by hillerliao
193 |

举例:https://pyrsshub.vercel.app/infoq/profile/8D1F7C1F2C23FD

194 |

路由:/infoq/profile/:category

195 | 196 |
搜索结果by hillerliao
197 |

举例:https://pyrsshub.vercel.app/infoq/search/金融/2

198 |

路由:/infoq/search/:category/:type

199 |

参数:category [必填,关键词],type [必填,类型,0 全部 | 1 微信公众号 | 2 infoQ网站 ]

200 |
201 |
202 |
203 | 204 |
205 |
206 |

Readhub

207 |
主题内容 by hillerliao
208 |

举例:https://pyrsshub.vercel.app/readhub/topic/10/0acaf84bdef38ea9

209 |

路由:/readhub/topic/:type/:category

210 |

参数:type [必填,类型,10 entity(实体) | 22 tag(标签)]

211 | 212 |
213 |
214 |
215 | 216 |
217 |
218 |

Futu

219 |
快讯 by hillerliao
220 |

举例:https://pyrsshub.vercel.app/futu/live/zh-cn

221 |

路由:/futu/live/:lang

222 |

参数:lang [必填,语言,zh-cn / en-us]

223 | 224 |
225 |
226 |
227 | 228 |
229 |
230 |

巨潮资讯

231 |
公司公告 by hillerliao
232 |

举例:https://pyrsshub.vercel.app/cninfo/announcement/all/gqjl

233 |

举例:https://pyrsshub.vercel.app/cninfo/announcement/all/gqbd_预披露,股权变动类公告中标题含有「预披露」的公告

234 |

路由:/cninfo/announcement/:stock_id/:category

235 |
236 |
237 |
238 | 239 |
240 |
241 |

东兴资管

242 |
产品公告 by hillerliao
243 |

举例:https://pyrsshub.vercel.app/dxzg/notice

244 |

路由:/dxzg/notice

245 |
246 |
247 |
248 | 249 | 250 | 251 |
252 |
253 |

Earnings Date

254 |
Earnings Date by hillerliao
255 |

举例:https://pyrsshub.vercel.app/earningsdate/businesswire

256 |

路由:/earningsdate/:category

257 |

参数:category [必填,可以为“businesswire、globenewswire、prnewswire”]

258 |
259 |
260 |
261 | 262 | 263 | 264 |
265 |
266 |

界面快讯

267 |
界面快讯 by hillerliao
268 |

举例:https://pyrsshub.vercel.app/jiemian/newsflash/166

269 |

路由:/jiemian/newsflash/:category

270 |

参数:category [必填,见界面快讯栏目https://www.jiemian.com/lists/4.html]

271 |
272 |
273 |
274 | 275 | 276 | 277 |
278 |
279 |

TADOKU.ORG

280 |
图书 by hillerliao
281 |

举例:https://pyrsshub.vercel.app/tadoku/books/l0

282 |

路由:/tadoku/books/:category

283 |

参数:category [必填,难度等级,l0、l1、l2、l3、l4、l5、0。0为不指定 ]

284 |
285 |
286 |
287 | 288 | 289 | 290 |
291 |
292 |

证监会审核进度

293 |
证监会审核进度 by hillerliao
294 |

举例:https://pyrsshub.vercel.app/csrc/audit/a1d50077cd7f4b15bd1c8d6163f32850

295 |

路由:/csrc/audit/:category

296 |

参数:category [必填,见证监会栏目 https://neris.csrc.gov.cn/alappl/home/gongshi]

297 |
298 |
299 |
300 | 301 | 302 | 303 |
304 |
305 |

财新网滚动新闻

306 |
财新网滚动新闻 by hillerliao
307 |

举例:https://pyrsshub.vercel.app/caixin/scroll/125

308 |

路由:/caixin/scroll/:category

309 |

参数:category [必填,见财新网滚动频道 http://www.caixin.com/search/scroll/0.jsp]

310 |
311 |
312 |
313 | 314 | 315 | 316 |
317 |
318 |

网易跟贴

319 |
网易跟贴 by hillerliao
320 |

举例:https://pyrsshub.vercel.app/netease/comment/heated

321 |

路由:/netease/comment/:category

322 |

参数:category [必填, heated|build|splendid]

323 |
324 |
325 |
326 | 327 | 328 | 329 |
330 |
331 |

爱思想搜索结果

332 |
爱思想搜索结果 by hillerliao
333 |

举例:https://pyrsshub.vercel.app/aisixiang/search/author/郑永年

334 |

路由:/aisixiang/search/:category/:keywords

335 |

参数:category [必填,author|title|keywords], keywords 必填

336 |
337 |
338 |
339 | 340 | 341 | 342 |
343 |
344 |

东方财富网行业/个股研报

345 |
东方财富网行业/个股研报 by hillerliao
346 |

举例:https://pyrsshub.vercel.app/eastmoney/report/stock/473

347 |

路由:/eastmoney/report/:type/:category

348 |

参数:type, category [必填,见东方财富网研报频道 ]

349 |
350 |
351 |
352 | 353 | 354 | 355 |
356 |
357 |

选股宝板块/主题动态

358 |
选股宝板块/主题动态 by hillerliao
359 |

举例:https://pyrsshub.vercel.app/xuangubao/theme/17006066

360 |

路由:/xuangubao/:type/:category

361 |

参数:type = theme|subject, category [必填,板块/主题ID]

362 |
363 |
364 |
365 | 366 | 367 | 368 |
369 |
370 |

财联社主题动态

371 |
财联社主题动态 by hillerliao
372 |

举例:https://pyrsshub.vercel.app/cls/subject/1345

373 |

路由:/cls/subject/:category

374 |

参数:category [必填,见财联社APP主题栏目]

375 |
376 |
377 |
378 | 379 | 380 | 381 |
382 |
383 |

财联社电报

384 |
财联社电报 by hillerliao
385 |

举例:https://pyrsshub.vercel.app/cls/telegraph

386 |

路由:/cls/telegraph

387 |
388 |
389 |
390 | 391 | 392 | 393 |
394 |
395 |

链得得栏目动态

396 |
链得得栏目动态 by hillerliao
397 |

举例:https://pyrsshub.vercel.app/chaindd/column/3158465

398 |

路由:/chaindd/column/:category

399 |

参数:category [必填,见链得得栏目url中的数字编号]

400 |
401 |
402 |
403 | 404 | 405 | 406 |
407 |
408 |

早报网 即时新闻文章列表

409 |
早报网 即时新闻文章列表 by hillerliao
410 |

举例:https://pyrsshub.vercel.app/zaobao/realtime/china

411 |

路由:/zaobao/realtime/:category

412 |

参数:category [必填,见 早报网 官网]

413 |
414 |
415 |
416 | 417 | 418 | 419 |
420 |
421 |

Techcrunch tag文章列表

422 |
Techcrunch tag文章列表 by hillerliao
423 |

举例:https://pyrsshub.vercel.app/techcrunch/tag/216504

424 |

路由:/techcrunch/tag/:category

425 |

参数:category [必填,见 techcrunch 官网]

426 |
427 |
428 |
429 | 430 | 431 | 432 |
433 |
434 |

未央网-首页

435 |
未央网-首页 by hillerliao
436 |

举例:https://pyrsshub.vercel.app/weiyangx/home/

437 |

路由:/weiyangx/home/

438 |
439 |
440 |
441 | 442 | 443 | 444 |
445 |
446 |

未央网-国际快讯

447 |
未央网-国际快讯 by hillerliao
448 |

举例:https://pyrsshub.vercel.app/weiyangx/express/

449 |

路由:/weiyangx/express/

450 |
451 |
452 |
453 | 454 | 455 | 456 |
457 |
458 |

扬帆出海-快讯

459 |
扬帆出海-快讯 by hillerliao
460 |

举例:https://pyrsshub.vercel.app/yfchuhai/express/

461 |

路由:/yfchuhai/express/

462 |
463 |
464 |
465 | 466 | 467 | 468 |
469 |
470 |

未央网 tag文章列表

471 |
未央网 tag文章列表 by hillerliao
472 |

举例:https://pyrsshub.vercel.app/weiyangx/tag/金融科技

473 |

路由:/weiyangx/tag/:category

474 |

参数:category [必填,见 weiyangx 官网]

475 |
476 |
477 |
478 | 479 | 480 | 481 |
482 |
483 |

今天看啥专栏文章列表

484 |
今天看啥专栏文章列表 by hillerliao
485 |

举例:https://pyrsshub.vercel.app/jintiankansha/column/KgkNwnDrsy

486 |

路由:/jintiankansha/column/:category

487 |

参数:category [必填,见 jintiankansha.me]

488 |
489 |
490 |
491 | 492 | 493 | 494 |
495 |
496 |

产品公告 - 机构间市场

497 |
产品公告 - 机构间市场 by hillerliao
498 |

举例:https://pyrsshub.vercel.app/interotc/cpgg/东兴证券

499 |

路由:/interotc/cpgg/:category

500 |

参数:category [必填,标题中的关键词]

501 |
502 |
503 |
504 | 505 | 506 | 507 |
508 |
509 |

股票评级 - Benzinga

510 |
股票评级 - Benzinga by hillerliao
511 |

举例:https://pyrsshub.vercel.app/benzinga/ratings/wb

512 |

路由:/benzinga/ratings/:category

513 |

参数:category [必填, 股票代码]

514 |
515 |
516 |
517 | 518 | 519 | 520 |
521 |
522 |

抽屉新热榜 - 用户

523 |
抽屉新热榜 - 用户 by hillerliao
524 |

举例:https://pyrsshub.vercel.app/chouti/user/wb_5517143496

525 |

路由:/chouti/user/:category

526 |

参数:category [必填, 用户id]

527 |
528 |
529 |
530 | 531 | 532 | 533 |
534 |
535 |

抽屉新热榜 - 话题

536 |
抽屉新热榜 - 话题 by hillerliao
537 |

举例:https://pyrsshub.vercel.app/chouti/section/1116

538 |

路由:/chouti/section/:category

539 |

参数:category [必填, 话题id]

540 |
541 |
542 |
543 | 544 | 545 | 546 |
547 |
548 |

抽屉新热榜 - 搜索

549 |
抽屉新热榜 - 搜索 by hillerliao
550 |

举例:https://pyrsshub.vercel.app/chouti/search/China

551 |

路由:/chouti/search/:category

552 |

参数:category [必填, 搜索关键词]

553 |
554 |
555 |
556 | 557 | 558 | 559 |
560 |
561 |

微信公众号 - 标签文章列表

562 |
微信公众号 - 标签文章列表 by hillerliao
563 |

举例:https://pyrsshub.vercel.app/mp/tag/MzI5MjM3OTA0MA/1500461858015772673

564 |

路由:/mp/tag/:biz/:tag

565 |

biz [必填, 公众号id],tag,[必填, 标签 id]

566 |
567 |
568 |
569 | 570 | 571 | 572 |
573 |
574 |

微信公众号 - 推荐标签文章列表

575 |
微信公众号 - 推荐标签文章列表 by hillerliao
576 |

举例:https://pyrsshub.vercel.app/mp/rtag/科技/ChatGPT

577 |

路由:/mp/rtag/:c1/:tag

578 |

c1 [必填, 分类],tag,[必填, 标签 id]

579 |
580 |
581 |
582 | 583 | 584 | 585 |
586 |
587 |

Producthunt - 搜索结果

588 |
Producthunt - 搜索结果 by hillerliao
589 |

举例:https://pyrsshub.vercel.app/producthunt/search/wechat/30

590 |

路由:/producthunt/search/:keyword/:period

591 |

keyword [必填, 搜索关键词],period,[必填, 时间范围]

592 |
593 |
594 |
595 | 596 | 597 | 598 |
599 |
600 |

蒲公英 - App 更新日志

601 |
蒲公英 - App 更新日志by hillerliao
602 |

举例:https://pyrsshub.vercel.app/pgyer/22bY

603 |

路由:/pgyer/:pageid

604 |

pageid [必填, 页面 id]

605 |
606 |
607 |
608 | 609 | 610 | 611 |
612 |
613 |

新京报 - 频道

614 |
新京报 - 频道by hillerliao
615 |

举例:https://pyrsshub.vercel.app/bjnews/beijing

616 |

路由:/bjnews/:channelid

617 |

channelid [必填, 频道 id]

618 |
619 |
620 |
621 | 622 | 623 | 624 |
625 |
626 |

新华网

627 |
新华网 - 最新播报by flyingicedragon
628 |

举例:https://pyrsshub.vercel.app/xinhuanet/zuixinbobao

629 |

路由:/xinhuanet/zuixinbobao

630 |
新华网 - 时政联播by flyingicedragon
631 |

举例:https://pyrsshub.vercel.app/xinhuanet/shizhenglianbo

632 |

路由:/xinhuanet/shizhenglianbo

633 |
新华网 - 要点聚焦by flyingicedragon
634 |

举例:https://pyrsshub.vercel.app/xinhuanet/yaodianjujiao

635 |

路由:/xinhuanet/yaodianjujiao

636 |
新华网 - 时政联播by flyingicedragon
637 |

举例:https://pyrsshub.vercel.app/xinhuanet/world

638 |

路由:/xinhuanet/world

639 |
640 |
641 |
642 | 643 | 644 | 645 |
646 |
647 |

微信公众号 - 最新文章 - 搜狗方案

648 |
微信公众号 - 最新文章 by hillerliao
649 |

举例:https://pyrsshub.vercel.app/mp/gh/mao-talk

650 |

路由:/mp/tag/:biz/:tag

651 |

gh [必填, 公众号id]

652 |
653 |
654 |
655 | 656 | 657 | 658 |
659 |
660 |

微信公众号 - 最新文章 - 游无穷

661 |
微信公众号 - 最新文章 by hillerliao
662 |

举例:https://pyrsshub.vercel.app/mp/youwuqiong/maoyouhuashuo

663 |

路由:/mp/youwuqiong/:author

664 |

author [必填, 作者id,在 youwuqiong.com 文章列表页上找]

665 |
666 |
667 |
668 | 669 | 670 | 671 |
672 |
673 |

知乎

674 | 675 |
Explore by https://github.com/JeffersonYoung
676 |

举例:https://pyrsshub.vercel.app/zhihu/explore

677 |

路由:/zhihu/explore

678 | 679 |
问题 by https://github.com/JeffersonYoung
680 |

举例:https://pyrsshub.vercel.app/zhihu/question/509768617

681 |

路由:/zhihu/question/:qid

682 |

qid [必填, 问题id,如例所示,和网站url上的id相同]

683 |
684 |
685 |
686 | 687 | 688 | {% endblock content %} -------------------------------------------------------------------------------- /rsshub/templates/main/index.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | {% block title %}Welcome to RSShub!{% endblock title %} 4 | 5 | {% block content %} 6 |
7 |

Welcome to RSSHub!

8 |

If you see this page, the RSSHub is successfully installed and working.

9 |

10 | View Source 11 |

12 |
13 | {% endblock %} -------------------------------------------------------------------------------- /rsshub/templates/main/word.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} {% block content %} 2 |

{{ word }}

3 | {% endblock %} -------------------------------------------------------------------------------- /rsshub/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from flask import Response 3 | import requests 4 | from parsel import Selector 5 | 6 | DEFAULT_HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'} 7 | 8 | 9 | class XMLResponse(Response): 10 | def __init__(self, response, **kwargs): 11 | if 'mimetype' not in kwargs and 'contenttype' not in kwargs: 12 | if response.startswith('