├── .gitignore ├── docker-compose.yml ├── publish_deprecated.py ├── readme.md ├── requirements.txt ├── server.py ├── standalone.py └── wxgzh_api └── updater ├── __init__.py └── exceptions.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .vscode/ 3 | cookies.json 4 | geckodriver.log -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | wxgzh-api: 4 | image: beautyyu/python-selenium:latest 5 | command: sh -c "pip3 install -r requirements.txt && pip3 install waitress && python3 -m waitress server:app" 6 | restart: always 7 | ports: 8 | - 11459:8080 9 | working_dir: /app 10 | volumes: 11 | - .:/app 12 | environment: 13 | COOKIE_FILE: cookies.json 14 | -------------------------------------------------------------------------------- /publish_deprecated.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.common.keys import Keys 3 | from selenium.webdriver.common.action_chains import ActionChains 4 | from selenium.webdriver.support.ui import Select 5 | from selenium.webdriver.common.by import By 6 | from selenium.webdriver.support.ui import WebDriverWait 7 | from selenium.webdriver.support import expected_conditions as EC 8 | from selenium.webdriver.firefox.options import Options 9 | import sys 10 | import time 11 | import json 12 | import urllib 13 | import os 14 | from xvfbwrapper import Xvfb 15 | delay = int(os.getenv('DELAY')) 16 | 17 | 18 | def get_by_css(driver, cssstr, multi=0, button=0): 19 | try: 20 | if button == 0: 21 | myElem = WebDriverWait(driver, 60).until( 22 | EC.presence_of_all_elements_located((By.CSS_SELECTOR, cssstr))) 23 | else: 24 | myElem = [WebDriverWait(driver, 60).until( 25 | EC.element_to_be_clickable((By.CSS_SELECTOR, cssstr)))] 26 | if multi == 1: 27 | return myElem 28 | else: 29 | return myElem[0] 30 | except: 31 | return 0 32 | 33 | 34 | def publish(appid=0): 35 | # load driver and cookies 36 | # vdis = Xvfb() 37 | # vdis.start() 38 | try: 39 | os.remove('geckodriver.log') 40 | except: 41 | pass 42 | options = Options() 43 | options.log.level = "trace" 44 | driver = webdriver.Firefox(options=options) 45 | with open('cookies.json', 'r') as f: 46 | cookies = f.read() 47 | cookies = json.loads(cookies) 48 | driver.get('https://mp.weixin.qq.com/') 49 | for i in cookies: 50 | driver.add_cookie(i) 51 | time.sleep(delay / 3) 52 | driver.get('https://mp.weixin.qq.com/') 53 | real_url = driver.current_url 54 | token = urllib.parse.parse_qs(real_url)['token'][0] 55 | # check list and get appid 56 | driver.get('https://mp.weixin.qq.com/cgi-bin/appmsg?begin=0&count=10&type=77&action=list_card&token={}&lang=zh_CN'.format(token)) 57 | appid = get_by_css( 58 | driver, 'div.publish_card_container:nth-child(2) > div:nth-child(1)').get_attribute('data-appid') 59 | # publish 60 | try: 61 | driver.get('https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit&action=edit&type=77&appmsgid={}&isMul=1&replaceScene=0&isSend=1&isFreePublish=0&token={}&lang=zh_CN'.format(appid, token)) 62 | get_by_css( 63 | driver, '.mass-send__footer .weui-desktop-btn_primary', button=1).click() 64 | al = get_by_css( 65 | driver, '#vue_app > div:nth-child(2) > div:nth-child(2) > div:nth-child(1) > div:nth-child(1) > div:nth-child(3) > div:nth-child(1) > div:nth-child(1) > button', button=1) 66 | print(al) 67 | al.click() 68 | time.sleep(delay * 3) 69 | except: 70 | print('error when publishing') 71 | return -1 72 | 73 | driver.close() 74 | # vdis.stop() 75 | return 0 76 | 77 | 78 | if __name__ == "__main__": 79 | publish() 80 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # wxgzh-api - 获取任意微信公众号的最近文章 2 | 3 | 本项目提供一种基于微信公众平台的方法,获取任意微信公众号的最近文章。 4 | 5 | ## 开始使用 6 | 7 | ### Step 1. 准备 8 | 9 | 0. 创建一个`微信公众平台订阅号` 10 | 1. 若使用`docker`和`docker-compose`部署, 则安装`docker`和`docker-compose` 11 | 12 | 若手动部署,则安装`python3.10+`, `firefox`, `geckodriver`, 以及`requirements.txt`中的依赖 13 | 2. 将项目克隆至本地 14 | ``` 15 | git clone https://github.com/BeautyYuYanli/wxgzh-api.git 16 | ``` 17 | 3. 登录`微信公众平台`, 将`mp.weixin.qq.com`域名下的cookie以`json`格式保存至`wxgzh-api/cookies.json`. 你可以使用这个插件:[chrome](https://chrome.google.com/webstore/detail/%E3%82%AF%E3%83%83%E3%82%AD%E3%83%BCjson%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E5%87%BA%E5%8A%9B-for-puppet/nmckokihipjgplolmcmjakknndddifde) [firefox](https://addons.mozilla.org/en-US/firefox/addon/%E3%82%AF%E3%83%83%E3%82%AD%E3%83%BCjson%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E5%87%BA%E5%8A%9B-for-puppeteer/) 18 | 19 | ### Step 2. 部署 20 | 21 | #### 方法一、 docker + docker-compose 22 | 23 | 1. `docker-compose up -d` 24 | 25 | #### 方法二、 手动部署 26 | 27 | 1. `pip3 install waitress` 28 | 2. `python3 -m waitress --port=11459 server:app` 29 | 30 | ### Step 3. 使用 31 | 32 | 已部署的服务默认在 `localhost:11459` 监听。 33 | 34 | `/json_feed`: `GET`, 多个参数 `target`, 返回 [RSS JSON Feed](https://www.jsonfeed.org/2020/08/07/json-feed-version.html) (`application/feed+json`)格式的数据: 35 | ``` 36 | http://127.0.0.1:11459/json_feeds?target=声动活泼&target=汪小喵爱大工 37 | ``` 38 | ```json 39 | { 40 | "title": "微信公众号", 41 | "version": "https://jsonfeed.org/version/1.1", 42 | "description": "微信公众号文章更新推送", 43 | "home_page_url": "https://github.com/BeautyyuYanli/wxgzh-api", 44 | "items": [ 45 | { 46 | "authors": [ 47 | { 48 | "name": "声动活泼" 49 | } 50 | ], 51 | "date_published": "2023-03-05T00:00:00+08:00", 52 | "title": "一年了!声动胡同有了这些新变化,邀请你来加入", 53 | "url": "http://mp.weixin.qq.com/s?__biz=MzIwMDczNTE3OQ==&mid=2247496071&idx=1&sn=7024a904a4cf6f448ebbcc2888cf282c&chksm=96fa1123a18d983599aba221d7d1a8cbd0c80d2773f9cdb1b13c2ead9cadcfe905f8ad94fe4f#rd" 54 | }, 55 | { 56 | "authors": [ 57 | { 58 | "name": "汪小喵爱大工" 59 | } 60 | ], 61 | "date_published": "2023-03-05T00:00:00+08:00", 62 | "title": "公告&记录 | 查询汪小喵精神状态", 63 | "url": "http://mp.weixin.qq.com/s?__biz=MzI4NzYwMTYxMQ==&mid=2247487382&idx=1&sn=7774bfc2e7fed6473982b191fac1c225&chksm=ebca6932dcbde024ed77f52f855fe61beb6fcfb49f3700482a52d7c6e0185ddab043968ab052#rd" 64 | } 65 | ] 66 | } 67 | 68 | ``` 69 | 70 | 也可以在不部署的情形下直接拉取数据: 71 | ``` 72 | python standalone.py -h 73 | ``` 74 | ``` 75 | usage: standalone.py [-h] [--cookiefile COOKIEFILE] [--target TARGET [TARGET ...]] 76 | 77 | options: 78 | -h, --help show this help message and exit 79 | --cookiefile COOKIEFILE 80 | --target TARGET [TARGET ...] 81 | 82 | ``` 83 | 84 | ## 开发 85 | 86 | 本项目还可以直接作为 Python 模块使用(TODO) 87 | 88 | ## 其他 89 | 90 | - 频繁请求可能导致账号被风控。 91 | - Cookies 有效期为 3 天,过期后需要重新获取。 92 | 93 | 本项目还可能对账号造成其它未知的影响, 请自行承担风险。 94 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.28.1 2 | selenium>=4.7.2 3 | python-dateutil>=2.8.2 4 | flask>=2.2.0 -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | import os 2 | from flask import Flask, request, make_response 3 | from wxgzh_api.updater import Updater 4 | from wxgzh_api.updater.exceptions import CookieException 5 | 6 | app = Flask(__name__) 7 | 8 | 9 | @app.route('/json_feeds', methods=['GET']) 10 | def json_feeds(): 11 | try: 12 | updater = Updater(cookiefile=os.getenv('COOKIE_FILE')) 13 | except CookieException as e: 14 | return make_response(str(e), 500) 15 | except Exception as e: 16 | return make_response(str(e), 500) 17 | result = updater.update(request.args.getlist('target')) 18 | content = { 19 | "version": "https://jsonfeed.org/version/1.1", 20 | "title": "微信公众号", 21 | "home_page_url": "https://github.com/BeautyyuYanli/wxgzh-api", 22 | "description": "微信公众号文章更新推送", 23 | "items": [item for sublist in result.values() for item in sublist] 24 | } 25 | return make_response(content, 200, {'Content-Type': 'application/feed+json'}) 26 | -------------------------------------------------------------------------------- /standalone.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from wxgzh_api.updater import Updater 4 | from wxgzh_api.updater.exceptions import CookieException 5 | 6 | if __name__ == "__main__": 7 | argparser = argparse.ArgumentParser() 8 | argparser.add_argument('--cookiefile', type=str, 9 | default=os.getenv('COOKIE_FILE')) 10 | argparser.add_argument('--target', type=str, nargs='+', default=[]) 11 | args = argparser.parse_args() 12 | updater = Updater(cookiefile=args.cookiefile) 13 | result = updater.update(args.target) 14 | print(result) 15 | -------------------------------------------------------------------------------- /wxgzh_api/updater/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from selenium import webdriver 3 | from selenium.webdriver.common.keys import Keys 4 | from selenium.webdriver.common.by import By 5 | from selenium.webdriver.support.ui import WebDriverWait 6 | from selenium.webdriver.support import expected_conditions as EC 7 | from selenium.webdriver.firefox.options import Options 8 | from selenium.webdriver.remote.webelement import WebElement 9 | import time 10 | import json 11 | import urllib 12 | from dateutil.parser import isoparse 13 | from .exceptions import CookieException 14 | 15 | 16 | class Updater: 17 | def __init__(self, cookiefile: str | None = None, cookies: dict | None = None, loglevel: str = "warn", headless: bool = True) -> None: 18 | # Load driver and cookies 19 | options = Options() 20 | options.log.level = loglevel 21 | options.headless = headless 22 | self.driver = webdriver.Firefox(options=options) 23 | if cookies == None: 24 | with open(cookiefile if cookiefile else "cookies.json", 'r') as f: 25 | cookies = f.read() 26 | cookies = json.loads(cookies) 27 | self.driver.get('https://mp.weixin.qq.com/') 28 | for i in cookies: 29 | self.driver.add_cookie(i) 30 | # Magic delay 31 | time.sleep(1) 32 | # Refresh the page 33 | self.driver.get('https://mp.weixin.qq.com/') 34 | self.get_by_css('#footer.mp-foot') 35 | real_url = self.driver.current_url 36 | if real_url.split('qq.com')[1] == '/': 37 | raise CookieException( 38 | "Not logged in. Maybe the cookie is expired?") 39 | token = urllib.parse.parse_qs(real_url)['token'][0] 40 | editor_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=10&createType=10&token=' + token + '&lang=zh_CN' 41 | self.driver.get(editor_url) 42 | self.get_by_css('#js_text_editor_tool_link').click() 43 | 44 | def __del__(self) -> None: 45 | self.driver.quit() 46 | pass 47 | 48 | def get_by_css(self, css: str, multi: bool = False, base_elem: WebElement | None = None) -> List[WebElement] | WebElement | None: 49 | try: 50 | myElem = WebDriverWait(base_elem if base_elem else self.driver, 10).until( 51 | EC.presence_of_all_elements_located((By.CSS_SELECTOR, css))) 52 | if multi == True: 53 | return myElem 54 | else: 55 | return myElem[0] 56 | except: 57 | None 58 | 59 | def update(self, subscribe_list: List[str]): 60 | update_pool = {} 61 | 62 | # search for articles 63 | for entry in subscribe_list: 64 | update_pool[entry] = [] 65 | othergzh_button = self.get_by_css( 66 | '.weui-desktop-btn.weui-desktop-btn_default') 67 | othergzh_button.click() 68 | 69 | input_box = self.get_by_css( 70 | '.weui-desktop-form__input_append-in > input') 71 | input_box.send_keys(entry) 72 | input_box.send_keys(Keys.ENTER) 73 | 74 | flag = 0 75 | for i in range(5): 76 | gzh_entry = self.get_by_css( 77 | 'ul.inner_link_account_list > li:nth-child({})'.format(i+1)) 78 | if gzh_entry == 0: 79 | break 80 | if self.get_by_css( 81 | 'ul.inner_link_account_list > li:nth-child({}) strong'.format( 82 | i+1) 83 | ).text == entry: 84 | flag = 1 85 | break 86 | if flag == 0: 87 | gzh_entry = self.get_by_css( 88 | 'ul.inner_link_account_list > li:nth-child(1)') 89 | print('no match for {}, got:'.format(entry)) 90 | print(self.get_by_css( 91 | 'ul.inner_link_account_list > li:nth-child(1) strong').text) 92 | gzh_entry.click() 93 | article_entries: List[WebElement] = self.get_by_css( 94 | '.inner_link_article_item', True) 95 | for article_entry in article_entries: 96 | link_element = self.get_by_css( 97 | 'label.inner_link_article_item > span:nth-child(3) > a', base_elem=article_entry) 98 | title_element = self.get_by_css( 99 | 'div.inner_link_article_title > span:nth-child(2)', base_elem=article_entry) 100 | date_element = self.get_by_css( 101 | 'div.inner_link_article_date', base_elem=article_entry) 102 | link = link_element.get_attribute('href') 103 | title = title_element.get_attribute('innerHTML') 104 | date = date_element.get_attribute('innerHTML') 105 | # it is compatible with JSON feeds 1.1 item 106 | update_pool[entry].append( 107 | { 108 | "url": link, 109 | "title": title, 110 | "authors": [{"name": entry}], 111 | "date_published": isoparse("2023-03-05 +08").isoformat() 112 | }) 113 | 114 | return (update_pool) 115 | 116 | 117 | if __name__ == "__main__": 118 | subscribe_list = [ 119 | '大连理工大学', 120 | ] 121 | updater = Updater(cookiefile='cookies.json', 122 | loglevel="trace", headless=True) 123 | msg = updater.update(subscribe_list) 124 | print(msg) 125 | -------------------------------------------------------------------------------- /wxgzh_api/updater/exceptions.py: -------------------------------------------------------------------------------- 1 | class CookieException(Exception): 2 | pass 3 | --------------------------------------------------------------------------------