├── Qt └── WeChat │ ├── ui转py.bat │ ├── AddMe.txt │ ├── main.cpp │ ├── mainwindow.cpp │ ├── mainwindow.h │ ├── mergy.spec │ ├── WeChat.pro │ ├── WeChat.py │ ├── WeChat.pro.user │ └── mainwindow.ui ├── yf.png ├── icon.ico ├── 效果图.png ├── 抓取文章图.png ├── requirements.txt ├── README.md ├── LICENSE ├── tk └── WeChat_tk.py ├── WeChat.py └── main.py /Qt/WeChat/ui转py.bat: -------------------------------------------------------------------------------- 1 | pyuic5 -o WeChat.py mainwindow.ui -------------------------------------------------------------------------------- /yf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1061700625/WeChat_Article/HEAD/yf.png -------------------------------------------------------------------------------- /icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1061700625/WeChat_Article/HEAD/icon.ico -------------------------------------------------------------------------------- /效果图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1061700625/WeChat_Article/HEAD/效果图.png -------------------------------------------------------------------------------- /抓取文章图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1061700625/WeChat_Article/HEAD/抓取文章图.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyQt5 2 | requests 3 | bs4 4 | selenium 5 | goto-statement 6 | configparser 7 | undetected_chromedriver 8 | lxml 9 | pyautogui 10 | pyinstaller 11 | webdriver_manager -------------------------------------------------------------------------------- /Qt/WeChat/AddMe.txt: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if __name__ == "__main__": 4 | app = QtWidgets.QApplication(sys.argv) 5 | MainWindow = QtWidgets.QMainWindow() 6 | ui = Ui_Form() 7 | ui.setupUi(MainWindow) 8 | MainWindow.show() 9 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /Qt/WeChat/main.cpp: -------------------------------------------------------------------------------- 1 | #include "mainwindow.h" 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | QApplication a(argc, argv); 7 | MainWindow w; 8 | w.show(); 9 | 10 | return a.exec(); 11 | } 12 | -------------------------------------------------------------------------------- /Qt/WeChat/mainwindow.cpp: -------------------------------------------------------------------------------- 1 | #include "mainwindow.h" 2 | #include "ui_mainwindow.h" 3 | 4 | MainWindow::MainWindow(QWidget *parent) : 5 | QMainWindow(parent), 6 | ui(new Ui::MainWindow) 7 | { 8 | ui->setupUi(this); 9 | } 10 | 11 | MainWindow::~MainWindow() 12 | { 13 | delete ui; 14 | } 15 | -------------------------------------------------------------------------------- /Qt/WeChat/mainwindow.h: -------------------------------------------------------------------------------- 1 | #ifndef MAINWINDOW_H 2 | #define MAINWINDOW_H 3 | 4 | #include 5 | 6 | namespace Ui { 7 | class MainWindow; 8 | } 9 | 10 | class MainWindow : public QMainWindow 11 | { 12 | Q_OBJECT 13 | 14 | public: 15 | explicit MainWindow(QWidget *parent = nullptr); 16 | ~MainWindow(); 17 | 18 | private: 19 | Ui::MainWindow *ui; 20 | }; 21 | 22 | #endif // MAINWINDOW_H 23 | -------------------------------------------------------------------------------- /Qt/WeChat/mergy.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['mergy.py'], 7 | pathex=['C:\\Users\\Administrator\\Desktop\\Qt\\WeChat'], 8 | binaries=[], 9 | datas=[], 10 | hiddenimports=[], 11 | hookspath=[], 12 | runtime_hooks=[], 13 | excludes=[], 14 | win_no_prefer_redirects=False, 15 | win_private_assemblies=False, 16 | cipher=block_cipher, 17 | noarchive=False) 18 | pyz = PYZ(a.pure, a.zipped_data, 19 | cipher=block_cipher) 20 | exe = EXE(pyz, 21 | a.scripts, 22 | a.binaries, 23 | a.zipfiles, 24 | a.datas, 25 | [], 26 | name='mergy', 27 | debug=False, 28 | bootloader_ignore_signals=False, 29 | strip=False, 30 | upx=True, 31 | runtime_tmpdir=None, 32 | console=True , icon='C:\\Users\\Administrator\\Desktop\\icon.ico') 33 | -------------------------------------------------------------------------------- /Qt/WeChat/WeChat.pro: -------------------------------------------------------------------------------- 1 | #------------------------------------------------- 2 | # 3 | # Project created by QtCreator 2019-02-23T11:54:42 4 | # 5 | #------------------------------------------------- 6 | 7 | QT += core gui widgets 8 | 9 | TARGET = WeChat 10 | TEMPLATE = app 11 | 12 | # The following define makes your compiler emit warnings if you use 13 | # any feature of Qt which has been marked as deprecated (the exact warnings 14 | # depend on your compiler). Please consult the documentation of the 15 | # deprecated API in order to know how to port your code away from it. 16 | DEFINES += QT_DEPRECATED_WARNINGS 17 | 18 | # You can also make your code fail to compile if you use deprecated APIs. 19 | # In order to do so, uncomment the following line. 20 | # You can also select to disable deprecated APIs only up to a certain version of Qt. 21 | #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 22 | 23 | CONFIG += c++11 24 | 25 | SOURCES += \ 26 | main.cpp \ 27 | mainwindow.cpp 28 | 29 | HEADERS += \ 30 | mainwindow.h 31 | 32 | FORMS += \ 33 | mainwindow.ui 34 | 35 | # Default rules for deployment. 36 | qnx: target.path = /tmp/$${TARGET}/bin 37 | else: unix:!android: target.path = /opt/$${TARGET}/bin 38 | !isEmpty(target.path): INSTALLS += target 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WeChat_Article 2 | 爬取微信公众号文章 3 | 4 | > Bilibili视频演示:https://www.bilibili.com/video/BV1vN411D7Y3/ 5 | 6 | **注意,除非你要断点续传,否则删除目录下conf.ini和url.json再启动!!!!** 7 | 8 | ![image](https://user-images.githubusercontent.com/31002981/217465357-d0737b23-55ec-47d3-b12c-ee8973a04291.png) 9 | 10 | 11 | ## 使用方法: 12 | 1、下载[**Chrome.rar**](https://sxf1024.lanzouo.com/iJ2Rp0mwy50j)并解压为**Chrome**; 13 | 2、点击页面右上角绿色“**Code**”按钮,再点“**Download Zip**”按钮,解压为**default**; 14 | 3、将解压的**Chrome**放到解压的**default**里; 15 | 4、进入**default**目录,运行**main.exe**或者**main.py**; 16 | 5、填入信息,点击“**启动**”即可。 17 | 6、如果想修改UI,可以安装这个:[Qt Designer](https://build-system.fman.io/qt-designer-download) 18 | **************************************************************************************************** 19 | 20 | ## 背景知识: 21 | 使用公众号写文章时支持搜索其他公众号的文章的方式,来实现爬取指定公众号所有文章的目的。 22 | **************************************************************************************************** 23 | 24 | ## 程序原理: 25 | 通过selenium登录获取token和cookie,再自动爬取和下载 26 | * 使用前提: 27 | 1、申请一个免费的微信公众号,个人订阅号即可(https://mp.weixin.qq.com) 28 | **************************************************************************************************** 29 | 30 | ## 更新记录: 31 | 1. 下载文章文字内容到txt 32 | 2. 下载文章图片 33 | 3. 保存HTML文件,并将图片链接指向本地 34 | 4. 添加按时间范围下载 35 | 5. 添加cookie登陆,不成功才selenium浏览器登陆 36 | 6. 增加记住密码功能 37 | 7. 修复一些问题,如requests卡死 38 | 8. 添加按关键词下载 39 | 9. 多线程优化下载速度 40 | 10. 增加断点续传功能(可能存在bug,推荐不要用) 41 | 11. 拟增加备用公众号功能(暂未完成) 42 | 12. 下载PDF格式 43 | 13. 不需要再手动下载Chrome,启动时会自动下载 44 | **************************************************************************************************** 45 | 46 | ## 使用说明: 47 | 创建虚拟环境 48 | ```bash 49 | conda create -n wechat python=3.9 -y 50 | ``` 51 | 52 | 进入虚拟环境 53 | ```bash 54 | conda activate wechat 55 | ``` 56 | 57 | 安装三方库 58 | ```bash 59 | pip install -r requirements.txt 60 | ``` 61 | > 对于mac用户,安装pyqt5可能会报错,可以尝试: 62 | > ```bash 63 | > brew install pyqt@5 64 | > cp -r /opt/homebrew/Cellar/pyqt@5/5.15.7_2/lib/python3.9/site-packages/* /Users/songxf/miniconda3/envs/wechat/lib/python3.9/site-packages/ 65 | > ``` 66 | > 然后就可以导入了: 67 | > ```bash 68 | > import PyQt5 69 | > ``` 70 | 71 | 运行脚本 72 | ```bash 73 | python main.py 74 | ``` 75 | 76 | 打包exe(生成在dist下) 77 | ```bash 78 | pyinstaller -F -w -i icon.ico main.py 79 | ``` 80 | 81 | 82 | ## 其他说明: 83 | - 爬取间隔太快,容易遇到“**访问频繁**”或“**freq_control**”,这时候可以删除**cookie.json**,再重新运行软件,**换个号**继续运行; 84 | - Qt打包完实在是太大了,有大佬会转成Tkinter吗? 85 | 86 | 87 | 欢迎关注微信公众号:xfxuezhang 88 | 89 | # 相关项目 90 | > 感谢大佬们的贡献 ♪(・ω・)ノ 91 | - [web版](https://github.com/wechat-article/wechat-article-exporter) 92 | - [QT6版](https://github.com/wooodypan/WeChat_Article) 93 | 94 | 95 | 96 | --- 97 | ## 打赏 98 | 如果这个项目帮助到了你,欢迎请我喝杯阔落👏🏻 99 | ![yf](yf.png) 100 | 101 | 102 | ## Star History 103 | 104 | [![Star History Chart](https://api.star-history.com/svg?repos=1061700625/WeChat_Article&type=Date)](https://www.star-history.com/#1061700625/WeChat_Article&Date) 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /tk/WeChat_tk.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk 3 | import tkinter.font as tkFont 4 | from datetime import datetime 5 | 6 | class ToolTip: 7 | def __init__(self, widget, text): 8 | self.widget = widget 9 | self.text = text 10 | self.tooltip = None 11 | self.widget.bind("", self.show) 12 | self.widget.bind("", self.hide) 13 | 14 | def show(self, event=None): 15 | x, y, _, _ = self.widget.bbox("insert") 16 | x += self.widget.winfo_rootx() + 25 17 | y += self.widget.winfo_rooty() + 25 18 | self.tooltip = tk.Toplevel(self.widget) 19 | self.tooltip.wm_overrideredirect(True) 20 | self.tooltip.wm_geometry(f"+{x}+{y}") 21 | label = tk.Label(self.tooltip, text=self.text, background="#ffffe0", relief="solid", borderwidth=1) 22 | label.pack() 23 | 24 | def hide(self, event=None): 25 | if self.tooltip: 26 | self.tooltip.destroy() 27 | self.tooltip = None 28 | 29 | class PlaceholderEntry(tk.Entry): 30 | def __init__(self, master=None, placeholder="", color="grey", **kwargs): 31 | super().__init__(master, **kwargs) 32 | self.placeholder = placeholder 33 | self.placeholder_color = color 34 | self.default_fg_color = self["fg"] 35 | 36 | self.bind("", self.on_focus_in) 37 | self.bind("", self.on_focus_out) 38 | 39 | self.put_placeholder() 40 | 41 | def put_placeholder(self): 42 | self.delete(0, tk.END) 43 | self.insert(0, self.placeholder) 44 | self.config(fg=self.placeholder_color) 45 | 46 | def on_focus_in(self, event): 47 | if self.get() == self.placeholder: 48 | self.delete(0, tk.END) 49 | self.config(fg=self.default_fg_color) 50 | 51 | def on_focus_out(self, event): 52 | if not self.get(): 53 | self.put_placeholder() 54 | 55 | def get(self): 56 | content = super().get() 57 | if content == self.placeholder: 58 | return "" 59 | else: 60 | return content.strip() 61 | 62 | class Ui_MainWindow: 63 | def setupUi(self, root): 64 | root.title("微信公众号文章 by 小锋学长") 65 | root.geometry("800x700") # 固定窗口大小 66 | root.minsize(800, 700) # 最小尺寸与初始尺寸相同 67 | root.maxsize(800, 700) # 最大尺寸与初始尺寸相同 68 | root.resizable(False, False) # 禁止窗口缩放(宽度和高度都不可调整) 69 | root.configure(bg="#f0f0f0") 70 | try: 71 | root.iconbitmap("../../icon.jpg") 72 | except: 73 | pass 74 | 75 | self.header_font = tkFont.Font(family="华文楷体", size=12, weight="bold") 76 | self.label_font = tkFont.Font(family="宋体", size=10) 77 | self.entry_font = tkFont.Font(family="宋体", size=12) # 保持稍大的字体增加高度 78 | 79 | self.tab_control = ttk.Notebook(root) 80 | self.tab_control.grid(row=0, column=0, sticky='nsew') 81 | 82 | # 配置窗口权重,使内容随窗口大小调整 83 | root.grid_rowconfigure(0, weight=1) 84 | root.grid_columnconfigure(0, weight=1) 85 | 86 | self.tab1 = ttk.Frame(self.tab_control) 87 | self.tab2 = ttk.Frame(self.tab_control) 88 | self.tab_control.add(self.tab1, text=' 公众号搜文章 ') 89 | self.tab_control.add(self.tab2, text=' 关键词搜文章 ') 90 | 91 | self.setup_tab1() 92 | self.setup_tab2() 93 | 94 | def setup_tab1(self): 95 | # 顶部标题区域 96 | top_frame = tk.Frame(self.tab1, bg="#f0f0f0") 97 | top_frame.grid(row=0, column=0, columnspan=2, sticky='nsew', padx=5, pady=5) 98 | self.label_head = tk.Label(top_frame, text="****************************************************************************\n* 程序原理: 通过selenium登录获取token和cookie,再自动爬取和下载\n* 使用前提: 申请一个微信公众号 (https://mp.weixin.qq.com)\n开源链接: https://github.com/1061700625/WeChat_Article\n Copyright © SXF 本软件禁止一切形式的商业活动\n****************************************************************************", 99 | font=self.header_font, justify="left", bg="#e0f7fa", fg="#006064", padx=10, pady=10, relief="flat", wraplength=780) 100 | self.label_head.grid(row=0, column=0, sticky='nsew') 101 | 102 | # 配置顶部框架权重 103 | top_frame.grid_columnconfigure(0, weight=1) 104 | top_frame.grid_rowconfigure(0, weight=1) 105 | 106 | # 中间区域:表单 + 按钮(使用 grid 布局) 107 | middle_frame = tk.Frame(self.tab1, bg="#f0f0f0") 108 | middle_frame.grid(row=1, column=0, columnspan=2, sticky='nsew', padx=5, pady=5) 109 | 110 | # 左侧表单区域(垂直排列) 111 | form_frame = tk.Frame(middle_frame, bg="#f0f0f0") 112 | form_frame.grid(row=0, column=0, sticky='nsew', padx=(0, 5)) 113 | 114 | labels = ["目标公众号英文名", "个人公众号账号", "个人公众号密码", "查询间隔(s)", "微信uin和key", "时间范围(年)"] 115 | placeholders = [ 116 | "为空则默认新华社(xinhuashefabu1)", 117 | "为空则自动打开页面后手动输入", 118 | "为空则自动打开页面后手动输入", 119 | "为空则默认为5s,一页约10条", 120 | "URL全复制进来,通过Fiddler抓包", 121 | "" 122 | ] 123 | self.entries = {} 124 | label_width = 15 125 | for i, (label, placeholder) in enumerate(zip(labels, placeholders)): 126 | tk.Label(form_frame, text=label, font=self.label_font, bg="#f0f0f0", width=label_width, anchor='w').grid(row=i, column=0, padx=5, pady=2, sticky='w') 127 | if label == "时间范围(年)": 128 | time_frame = tk.Frame(form_frame, bg="#f0f0f0") 129 | time_frame.grid(row=i, column=1, sticky='w', pady=2) 130 | self.entries["timeStart"] = PlaceholderEntry(time_frame, placeholder="1999", width=8, font=self.entry_font) 131 | self.entries["timeStart"].grid(row=0, column=0, padx=2, pady=2) 132 | tk.Label(time_frame, text="-", bg="#f0f0f0").grid(row=0, column=1, padx=2) 133 | self.entries["timeEnd"] = PlaceholderEntry(time_frame, placeholder=str(datetime.now().year), width=8, font=self.entry_font) 134 | self.entries["timeEnd"].grid(row=0, column=2, padx=2, pady=2) 135 | tk.Label(time_frame, text="关键词", font=self.label_font, bg="#f0f0f0", width=8, anchor='w').grid(row=0, column=3, padx=5, pady=2, sticky='w') 136 | self.entries["keyword"] = tk.Entry(time_frame, width=20, font=self.entry_font) 137 | self.entries["keyword"].grid(row=0, column=4, padx=2, pady=2) 138 | else: 139 | self.entries[label] = PlaceholderEntry(form_frame, placeholder=placeholder, width=60, font=self.entry_font) 140 | self.entries[label].grid(row=i, column=1, padx=5, pady=2, sticky='w') 141 | if label == "个人公众号密码": 142 | self.entries[label].config(show='*') 143 | ToolTip(self.entries[label], placeholder) 144 | 145 | # 配置表单框架权重 146 | form_frame.grid_columnconfigure(1, weight=1) 147 | for i in range(len(labels)): 148 | form_frame.grid_rowconfigure(i, weight=0) 149 | 150 | # 右侧按钮区域(垂直排列) 151 | button_frame = tk.Frame(middle_frame, bg="#f0f0f0") 152 | button_frame.grid(row=0, column=1, sticky='ns', padx=5) 153 | 154 | self.pushButton_start = tk.Button(button_frame, text="启动 (*^▽^*)", command=self.Start_Run, bg="#4caf50", fg="white", font=self.label_font, relief="flat", padx=10, pady=5) 155 | self.pushButton_start.grid(row=0, column=0, pady=5, sticky='ew') 156 | self.CheckVar = tk.IntVar() # 创建变量 157 | self.checkBox = tk.Checkbutton(button_frame, text="记住密码", font=self.label_font, bg="#f0f0f0", variable=self.CheckVar) 158 | self.checkBox.grid(row=1, column=0, pady=5, sticky='w') 159 | self.pushButton_stop = tk.Button(button_frame, text="终止  ̄へ ̄", command=self.Stop_Run, bg="#f44336", fg="white", font=self.label_font, relief="flat", padx=10, pady=5) 160 | self.pushButton_stop.grid(row=2, column=0, pady=5, sticky='ew') 161 | 162 | # 配置按钮框架权重 163 | button_frame.grid_rowconfigure((0, 2), weight=0) 164 | button_frame.grid_rowconfigure(1, weight=1) 165 | button_frame.grid_columnconfigure(0, weight=1) 166 | 167 | # 底部区域:进度条 + 表格 + 调试信息 168 | bottom_frame = tk.Frame(self.tab1, bg="#f0f0f0") 169 | bottom_frame.grid(row=2, column=0, columnspan=2, sticky='nsew', padx=5, pady=5) 170 | 171 | self.progressBar = ttk.Progressbar(bottom_frame, orient='horizontal', mode='determinate', maximum=100) 172 | self.progressBar.grid(row=0, column=0, columnspan=2, sticky='ew', pady=(0, 5)) 173 | 174 | table_notes_frame = tk.Frame(bottom_frame, bg="#f0f0f0") 175 | table_notes_frame.grid(row=1, column=0, columnspan=2, sticky='nsew') 176 | 177 | self.tableWidget_result = ttk.Treeview(table_notes_frame, columns=('Title', 'URL'), show='headings', height=10) 178 | self.tableWidget_result.heading('Title', text='标题') 179 | self.tableWidget_result.heading('URL', text='链接') 180 | self.tableWidget_result.column('Title', width=300) # 确保不超过窗口 181 | self.tableWidget_result.column('URL', width=200) 182 | self.tableWidget_result.grid(row=0, column=0, sticky='nsew', padx=(0, 5)) 183 | 184 | self.label_notes = tk.Label(table_notes_frame, text="调试信息窗口", font=self.label_font, bg="#fff3e0", fg="#e65100", relief="sunken", anchor='nw', justify='left', wraplength=300, width=35, height=10, padx=5, pady=5) 185 | self.label_notes.grid(row=0, column=1, sticky='nsew', padx=5) 186 | 187 | # 配置底部框架权重 188 | bottom_frame.grid_rowconfigure(1, weight=1) 189 | bottom_frame.grid_columnconfigure(0, weight=1) 190 | table_notes_frame.grid_columnconfigure(0, weight=3) # 表格占更多空间 191 | table_notes_frame.grid_columnconfigure(1, weight=1) # 调试窗口占少部分 192 | table_notes_frame.grid_rowconfigure(0, weight=1) 193 | 194 | # 配置主窗口权重 195 | self.tab1.grid_rowconfigure((0, 1, 2), weight=0) 196 | self.tab1.grid_columnconfigure(0, weight=3) # 表单和表格占更多 197 | self.tab1.grid_columnconfigure(1, weight=1) # 按钮占少部分 198 | 199 | def setup_tab2(self): 200 | # 顶部标题区域 201 | top_frame = tk.Frame(self.tab2, bg="#f0f0f0") 202 | top_frame.grid(row=0, column=0, columnspan=2, sticky='nsew', padx=5, pady=5) 203 | self.label_head_2 = tk.Label(top_frame, text="****************************************************************************************************\n* Demo 说明: \n先在“公众号搜文章”页填完整信息,再在本页填入关键词,点击“启动”即可\n Copyright © SXF 本软件禁止一切形式的商业活动\n****************************************************************************************************", 204 | font=self.header_font, justify="left", bg="#e0f7fa", fg="#006064", padx=10, pady=10, relief="flat", wraplength=780) 205 | self.label_head_2.grid(row=0, column=0, sticky='nsew') 206 | 207 | # 配置顶部框架权重 208 | top_frame.grid_columnconfigure(0, weight=1) 209 | top_frame.grid_rowconfigure(0, weight=1) 210 | 211 | # 中间区域:关键词 + 按钮 212 | middle_frame = tk.Frame(self.tab2, bg="#f0f0f0") 213 | middle_frame.grid(row=1, column=0, columnspan=2, sticky='nsew', padx=5, pady=5) 214 | 215 | content_frame = tk.Frame(middle_frame, bg="#f0f0f0") 216 | content_frame.grid(row=0, column=0, sticky='w', pady=10) 217 | tk.Label(content_frame, text="关键词", font=self.label_font, bg="#f0f0f0", width=10, anchor='w').grid(row=0, column=0, padx=5, pady=2, sticky='w') # 减少标签宽度到100 218 | self.lineEdit_keyword_2 = tk.Entry(content_frame, width=35, font=self.entry_font) # 保持35,确保不超过窗口 219 | self.lineEdit_keyword_2.grid(row=0, column=1, padx=5, pady=2, sticky='w') 220 | 221 | button_frame = tk.Frame(middle_frame, bg="#f0f0f0") 222 | button_frame.grid(row=0, column=1, sticky='e', pady=10) 223 | self.pushButton_start_2 = tk.Button(button_frame, text="启动 (*^▽^*)", command=self.Start_Run_2, bg="#4caf50", fg="white", font=self.label_font, relief="flat", padx=10, pady=5) 224 | self.pushButton_start_2.grid(row=0, column=0, padx=5, pady=5, sticky='e') 225 | self.pushButton_stop_2 = tk.Button(button_frame, text="终止  ̄へ ̄", command=self.Stop_Run_2, bg="#f44336", fg="white", font=self.label_font, relief="flat", padx=10, pady=5) 226 | self.pushButton_stop_2.grid(row=1, column=0, padx=5, pady=5, sticky='e') 227 | 228 | # 配置中间框架权重 229 | middle_frame.grid_columnconfigure(0, weight=3) # 关键词占更多 230 | middle_frame.grid_columnconfigure(1, weight=1) # 按钮占少部分 231 | middle_frame.grid_rowconfigure(0, weight=1) 232 | 233 | # 配置主窗口权重 234 | self.tab2.grid_rowconfigure((0, 1), weight=0) 235 | self.tab2.grid_columnconfigure((0, 1), weight=1) 236 | 237 | def Start_Run(self): 238 | self.progressBar['value'] = 20 239 | self.label_notes.config(text="正在启动...\n请稍候...") 240 | 241 | def Stop_Run(self): 242 | self.progressBar['value'] = 0 243 | self.label_notes.config(text="已终止") 244 | 245 | def Start_Run_2(self): 246 | pass 247 | 248 | def Stop_Run_2(self): 249 | pass 250 | 251 | if __name__ == "__main__": 252 | root = tk.Tk() 253 | ui = Ui_MainWindow() 254 | ui.setupUi(root) 255 | root.mainloop() -------------------------------------------------------------------------------- /WeChat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'mainwindow.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.10 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_MainWindow(object): 15 | def setupUi(self, MainWindow): 16 | MainWindow.setObjectName("MainWindow") 17 | MainWindow.setEnabled(True) 18 | MainWindow.resize(787, 739) 19 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred) 20 | sizePolicy.setHorizontalStretch(0) 21 | sizePolicy.setVerticalStretch(0) 22 | sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth()) 23 | MainWindow.setSizePolicy(sizePolicy) 24 | MainWindow.setMinimumSize(QtCore.QSize(620, 520)) 25 | MainWindow.setMouseTracking(False) 26 | icon = QtGui.QIcon() 27 | icon.addPixmap(QtGui.QPixmap("../../icon.jpg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) 28 | MainWindow.setWindowIcon(icon) 29 | self.centralWidget = QtWidgets.QWidget(MainWindow) 30 | self.centralWidget.setObjectName("centralWidget") 31 | self.gridLayout = QtWidgets.QGridLayout(self.centralWidget) 32 | self.gridLayout.setContentsMargins(11, 11, 11, 11) 33 | self.gridLayout.setSpacing(6) 34 | self.gridLayout.setObjectName("gridLayout") 35 | self.tabWidget = QtWidgets.QTabWidget(self.centralWidget) 36 | self.tabWidget.setObjectName("tabWidget") 37 | self.tab = QtWidgets.QWidget() 38 | self.tab.setObjectName("tab") 39 | self.layoutWidget = QtWidgets.QWidget(self.tab) 40 | self.layoutWidget.setGeometry(QtCore.QRect(10, 394, 715, 236)) 41 | self.layoutWidget.setObjectName("layoutWidget") 42 | self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.layoutWidget) 43 | self.horizontalLayout_2.setContentsMargins(11, 11, 11, 11) 44 | self.horizontalLayout_2.setSpacing(6) 45 | self.horizontalLayout_2.setObjectName("horizontalLayout_2") 46 | self.tableWidget_result = QtWidgets.QTableWidget(self.layoutWidget) 47 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding) 48 | sizePolicy.setHorizontalStretch(0) 49 | sizePolicy.setVerticalStretch(0) 50 | sizePolicy.setHeightForWidth(self.tableWidget_result.sizePolicy().hasHeightForWidth()) 51 | self.tableWidget_result.setSizePolicy(sizePolicy) 52 | self.tableWidget_result.viewport().setProperty("cursor", QtGui.QCursor(QtCore.Qt.IBeamCursor)) 53 | self.tableWidget_result.setAutoFillBackground(False) 54 | self.tableWidget_result.setFrameShape(QtWidgets.QFrame.StyledPanel) 55 | self.tableWidget_result.setFrameShadow(QtWidgets.QFrame.Sunken) 56 | self.tableWidget_result.setLineWidth(1) 57 | self.tableWidget_result.setMidLineWidth(1) 58 | self.tableWidget_result.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarAsNeeded) 59 | self.tableWidget_result.setSizeAdjustPolicy(QtWidgets.QAbstractScrollArea.AdjustToContents) 60 | self.tableWidget_result.setAutoScroll(True) 61 | self.tableWidget_result.setAlternatingRowColors(True) 62 | self.tableWidget_result.setVerticalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel) 63 | self.tableWidget_result.setHorizontalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel) 64 | self.tableWidget_result.setGridStyle(QtCore.Qt.SolidLine) 65 | self.tableWidget_result.setRowCount(5) 66 | self.tableWidget_result.setColumnCount(2) 67 | self.tableWidget_result.setObjectName("tableWidget_result") 68 | item = QtWidgets.QTableWidgetItem() 69 | self.tableWidget_result.setHorizontalHeaderItem(0, item) 70 | item = QtWidgets.QTableWidgetItem() 71 | self.tableWidget_result.setHorizontalHeaderItem(1, item) 72 | self.tableWidget_result.horizontalHeader().setSortIndicatorShown(False) 73 | self.tableWidget_result.horizontalHeader().setStretchLastSection(True) 74 | self.tableWidget_result.verticalHeader().setCascadingSectionResizes(False) 75 | self.horizontalLayout_2.addWidget(self.tableWidget_result) 76 | spacerItem = QtWidgets.QSpacerItem(10, 20, QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum) 77 | self.horizontalLayout_2.addItem(spacerItem) 78 | self.label_notes = QtWidgets.QLabel(self.layoutWidget) 79 | self.label_notes.setMinimumSize(QtCore.QSize(200, 25)) 80 | self.label_notes.setMaximumSize(QtCore.QSize(200, 16777215)) 81 | font = QtGui.QFont() 82 | font.setFamily("华文楷体") 83 | font.setPointSize(10) 84 | self.label_notes.setFont(font) 85 | self.label_notes.setAutoFillBackground(False) 86 | self.label_notes.setFrameShape(QtWidgets.QFrame.Panel) 87 | self.label_notes.setFrameShadow(QtWidgets.QFrame.Sunken) 88 | self.label_notes.setText("") 89 | self.label_notes.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop) 90 | self.label_notes.setObjectName("label_notes") 91 | self.horizontalLayout_2.addWidget(self.label_notes) 92 | self.layoutWidget1 = QtWidgets.QWidget(self.tab) 93 | self.layoutWidget1.setGeometry(QtCore.QRect(10, 140, 715, 237)) 94 | self.layoutWidget1.setObjectName("layoutWidget1") 95 | self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget1) 96 | self.horizontalLayout.setSizeConstraint(QtWidgets.QLayout.SetDefaultConstraint) 97 | self.horizontalLayout.setContentsMargins(11, 11, 11, 11) 98 | self.horizontalLayout.setSpacing(6) 99 | self.horizontalLayout.setObjectName("horizontalLayout") 100 | self.formLayout = QtWidgets.QFormLayout() 101 | self.formLayout.setFormAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter) 102 | self.formLayout.setSpacing(6) 103 | self.formLayout.setObjectName("formLayout") 104 | self.Label_target = QtWidgets.QLabel(self.layoutWidget1) 105 | self.Label_target.setObjectName("Label_target") 106 | self.formLayout.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.Label_target) 107 | self.LineEdit_target = QtWidgets.QLineEdit(self.layoutWidget1) 108 | self.LineEdit_target.setMinimumSize(QtCore.QSize(200, 25)) 109 | self.LineEdit_target.setStatusTip("") 110 | self.LineEdit_target.setObjectName("LineEdit_target") 111 | self.formLayout.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.LineEdit_target) 112 | self.Label_user = QtWidgets.QLabel(self.layoutWidget1) 113 | self.Label_user.setLayoutDirection(QtCore.Qt.LeftToRight) 114 | self.Label_user.setObjectName("Label_user") 115 | self.formLayout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.Label_user) 116 | self.LineEdit_user = QtWidgets.QLineEdit(self.layoutWidget1) 117 | self.LineEdit_user.setMinimumSize(QtCore.QSize(200, 25)) 118 | self.LineEdit_user.setObjectName("LineEdit_user") 119 | self.formLayout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.LineEdit_user) 120 | self.Label_pwd = QtWidgets.QLabel(self.layoutWidget1) 121 | self.Label_pwd.setObjectName("Label_pwd") 122 | self.formLayout.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.Label_pwd) 123 | self.LineEdit_pwd = QtWidgets.QLineEdit(self.layoutWidget1) 124 | self.LineEdit_pwd.setMinimumSize(QtCore.QSize(200, 25)) 125 | self.LineEdit_pwd.setText("") 126 | self.LineEdit_pwd.setEchoMode(QtWidgets.QLineEdit.Password) 127 | self.LineEdit_pwd.setObjectName("LineEdit_pwd") 128 | self.formLayout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.LineEdit_pwd) 129 | self.gapLabel = QtWidgets.QLabel(self.layoutWidget1) 130 | self.gapLabel.setObjectName("gapLabel") 131 | self.formLayout.setWidget(3, QtWidgets.QFormLayout.LabelRole, self.gapLabel) 132 | self.LineEdit_timegap = QtWidgets.QLineEdit(self.layoutWidget1) 133 | self.LineEdit_timegap.setMinimumSize(QtCore.QSize(200, 25)) 134 | self.LineEdit_timegap.setObjectName("LineEdit_timegap") 135 | self.formLayout.setWidget(3, QtWidgets.QFormLayout.FieldRole, self.LineEdit_timegap) 136 | self.wechatLabel = QtWidgets.QLabel(self.layoutWidget1) 137 | self.wechatLabel.setObjectName("wechatLabel") 138 | self.formLayout.setWidget(4, QtWidgets.QFormLayout.LabelRole, self.wechatLabel) 139 | self.LineEdit_wechat = QtWidgets.QLineEdit(self.layoutWidget1) 140 | self.LineEdit_wechat.setMinimumSize(QtCore.QSize(200, 25)) 141 | self.LineEdit_wechat.setObjectName("LineEdit_wechat") 142 | self.formLayout.setWidget(4, QtWidgets.QFormLayout.FieldRole, self.LineEdit_wechat) 143 | self.Label_time = QtWidgets.QLabel(self.layoutWidget1) 144 | self.Label_time.setObjectName("Label_time") 145 | self.formLayout.setWidget(5, QtWidgets.QFormLayout.LabelRole, self.Label_time) 146 | self.horizontalLayout_3 = QtWidgets.QHBoxLayout() 147 | self.horizontalLayout_3.setSizeConstraint(QtWidgets.QLayout.SetDefaultConstraint) 148 | self.horizontalLayout_3.setSpacing(6) 149 | self.horizontalLayout_3.setObjectName("horizontalLayout_3") 150 | self.lineEdit_timeStart = QtWidgets.QLineEdit(self.layoutWidget1) 151 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) 152 | sizePolicy.setHorizontalStretch(0) 153 | sizePolicy.setVerticalStretch(0) 154 | sizePolicy.setHeightForWidth(self.lineEdit_timeStart.sizePolicy().hasHeightForWidth()) 155 | self.lineEdit_timeStart.setSizePolicy(sizePolicy) 156 | self.lineEdit_timeStart.setMinimumSize(QtCore.QSize(20, 0)) 157 | self.lineEdit_timeStart.setAlignment(QtCore.Qt.AlignCenter) 158 | self.lineEdit_timeStart.setObjectName("lineEdit_timeStart") 159 | self.horizontalLayout_3.addWidget(self.lineEdit_timeStart) 160 | self.lineEdit_timeEnd = QtWidgets.QLineEdit(self.layoutWidget1) 161 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed) 162 | sizePolicy.setHorizontalStretch(0) 163 | sizePolicy.setVerticalStretch(0) 164 | sizePolicy.setHeightForWidth(self.lineEdit_timeEnd.sizePolicy().hasHeightForWidth()) 165 | self.lineEdit_timeEnd.setSizePolicy(sizePolicy) 166 | self.lineEdit_timeEnd.setMinimumSize(QtCore.QSize(50, 0)) 167 | self.lineEdit_timeEnd.setAlignment(QtCore.Qt.AlignCenter) 168 | self.lineEdit_timeEnd.setObjectName("lineEdit_timeEnd") 169 | self.horizontalLayout_3.addWidget(self.lineEdit_timeEnd) 170 | spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) 171 | self.horizontalLayout_3.addItem(spacerItem1) 172 | self.label = QtWidgets.QLabel(self.layoutWidget1) 173 | self.label.setMinimumSize(QtCore.QSize(60, 0)) 174 | self.label.setAlignment(QtCore.Qt.AlignCenter) 175 | self.label.setObjectName("label") 176 | self.horizontalLayout_3.addWidget(self.label) 177 | self.lineEdit_keyword = QtWidgets.QLineEdit(self.layoutWidget1) 178 | self.lineEdit_keyword.setMinimumSize(QtCore.QSize(20, 0)) 179 | self.lineEdit_keyword.setObjectName("lineEdit_keyword") 180 | self.horizontalLayout_3.addWidget(self.lineEdit_keyword) 181 | self.formLayout.setLayout(5, QtWidgets.QFormLayout.FieldRole, self.horizontalLayout_3) 182 | self.progressBar = QtWidgets.QProgressBar(self.layoutWidget1) 183 | self.progressBar.setProperty("value", 0) 184 | self.progressBar.setObjectName("progressBar") 185 | self.formLayout.setWidget(7, QtWidgets.QFormLayout.SpanningRole, self.progressBar) 186 | self.label_total_Page = QtWidgets.QLabel(self.layoutWidget1) 187 | self.label_total_Page.setText("") 188 | self.label_total_Page.setObjectName("label_total_Page") 189 | self.formLayout.setWidget(6, QtWidgets.QFormLayout.LabelRole, self.label_total_Page) 190 | self.horizontalLayout.addLayout(self.formLayout) 191 | spacerItem2 = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) 192 | self.horizontalLayout.addItem(spacerItem2) 193 | self.verticalLayout = QtWidgets.QVBoxLayout() 194 | self.verticalLayout.setContentsMargins(0, -1, -1, -1) 195 | self.verticalLayout.setSpacing(6) 196 | self.verticalLayout.setObjectName("verticalLayout") 197 | self.pushButton_start = QtWidgets.QPushButton(self.layoutWidget1) 198 | self.pushButton_start.setMinimumSize(QtCore.QSize(20, 50)) 199 | self.pushButton_start.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 200 | self.pushButton_start.setIconSize(QtCore.QSize(24, 24)) 201 | self.pushButton_start.setObjectName("pushButton_start") 202 | self.verticalLayout.addWidget(self.pushButton_start) 203 | self.checkBox = QtWidgets.QCheckBox(self.layoutWidget1) 204 | self.checkBox.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 205 | self.checkBox.setStatusTip("") 206 | self.checkBox.setAutoFillBackground(True) 207 | self.checkBox.setChecked(True) 208 | self.checkBox.setObjectName("checkBox") 209 | self.verticalLayout.addWidget(self.checkBox) 210 | self.pushButton_stop = QtWidgets.QPushButton(self.layoutWidget1) 211 | self.pushButton_stop.setMinimumSize(QtCore.QSize(20, 50)) 212 | self.pushButton_stop.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 213 | self.pushButton_stop.setObjectName("pushButton_stop") 214 | self.verticalLayout.addWidget(self.pushButton_stop) 215 | self.horizontalLayout.addLayout(self.verticalLayout) 216 | spacerItem3 = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) 217 | self.horizontalLayout.addItem(spacerItem3) 218 | self.label_head = QtWidgets.QLabel(self.tab) 219 | self.label_head.setGeometry(QtCore.QRect(10, 10, 715, 124)) 220 | self.label_head.setMinimumSize(QtCore.QSize(0, 120)) 221 | self.label_head.setObjectName("label_head") 222 | self.tabWidget.addTab(self.tab, "") 223 | self.tab_2 = QtWidgets.QWidget() 224 | self.tab_2.setObjectName("tab_2") 225 | self.Label_time_2 = QtWidgets.QLabel(self.tab_2) 226 | self.Label_time_2.setGeometry(QtCore.QRect(90, 190, 36, 21)) 227 | self.Label_time_2.setObjectName("Label_time_2") 228 | self.lineEdit_keyword_2 = QtWidgets.QLineEdit(self.tab_2) 229 | self.lineEdit_keyword_2.setGeometry(QtCore.QRect(130, 190, 158, 20)) 230 | self.lineEdit_keyword_2.setMinimumSize(QtCore.QSize(20, 0)) 231 | self.lineEdit_keyword_2.setObjectName("lineEdit_keyword_2") 232 | self.label_head_2 = QtWidgets.QLabel(self.tab_2) 233 | self.label_head_2.setGeometry(QtCore.QRect(10, 0, 715, 124)) 234 | self.label_head_2.setMinimumSize(QtCore.QSize(0, 120)) 235 | self.label_head_2.setObjectName("label_head_2") 236 | self.pushButton_stop_2 = QtWidgets.QPushButton(self.tab_2) 237 | self.pushButton_stop_2.setGeometry(QtCore.QRect(520, 230, 80, 50)) 238 | self.pushButton_stop_2.setMinimumSize(QtCore.QSize(20, 50)) 239 | self.pushButton_stop_2.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 240 | self.pushButton_stop_2.setObjectName("pushButton_stop_2") 241 | self.pushButton_start_2 = QtWidgets.QPushButton(self.tab_2) 242 | self.pushButton_start_2.setGeometry(QtCore.QRect(520, 154, 80, 50)) 243 | self.pushButton_start_2.setMinimumSize(QtCore.QSize(20, 50)) 244 | self.pushButton_start_2.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 245 | self.pushButton_start_2.setIconSize(QtCore.QSize(24, 24)) 246 | self.pushButton_start_2.setObjectName("pushButton_start_2") 247 | self.tabWidget.addTab(self.tab_2, "") 248 | self.gridLayout.addWidget(self.tabWidget, 0, 0, 1, 1) 249 | MainWindow.setCentralWidget(self.centralWidget) 250 | self.menuBar = QtWidgets.QMenuBar(MainWindow) 251 | self.menuBar.setGeometry(QtCore.QRect(0, 0, 787, 23)) 252 | self.menuBar.setObjectName("menuBar") 253 | MainWindow.setMenuBar(self.menuBar) 254 | self.mainToolBar = QtWidgets.QToolBar(MainWindow) 255 | self.mainToolBar.setObjectName("mainToolBar") 256 | MainWindow.addToolBar(QtCore.Qt.TopToolBarArea, self.mainToolBar) 257 | 258 | self.retranslateUi(MainWindow) 259 | self.tabWidget.setCurrentIndex(0) 260 | self.pushButton_start.clicked.connect(self.Start_Run) # type: ignore 261 | self.pushButton_stop.clicked.connect(self.Stop_Run) # type: ignore 262 | self.pushButton_start_2.clicked.connect(self.Start_Run_2) # type: ignore 263 | self.pushButton_stop_2.clicked.connect(self.Stop_Run_2) # type: ignore 264 | QtCore.QMetaObject.connectSlotsByName(MainWindow) 265 | 266 | def retranslateUi(self, MainWindow): 267 | _translate = QtCore.QCoreApplication.translate 268 | MainWindow.setWindowTitle(_translate("MainWindow", "微信公众号文章 by 小锋学长")) 269 | self.tableWidget_result.setSortingEnabled(False) 270 | item = self.tableWidget_result.horizontalHeaderItem(0) 271 | item.setText(_translate("MainWindow", "Title")) 272 | item = self.tableWidget_result.horizontalHeaderItem(1) 273 | item.setText(_translate("MainWindow", "URL")) 274 | self.label_notes.setWhatsThis(_translate("MainWindow", "调试窗口")) 275 | self.Label_target.setText(_translate("MainWindow", "目标公众号英文名")) 276 | self.LineEdit_target.setPlaceholderText(_translate("MainWindow", "为空则默认新华社(xinhuashefabu1)")) 277 | self.Label_user.setText(_translate("MainWindow", "个人公众号账号")) 278 | self.LineEdit_user.setPlaceholderText(_translate("MainWindow", "为空则自动打开页面后手动输入")) 279 | self.Label_pwd.setText(_translate("MainWindow", "个人公众号密码")) 280 | self.LineEdit_pwd.setPlaceholderText(_translate("MainWindow", "为空则自动打开页面后手动输入")) 281 | self.gapLabel.setText(_translate("MainWindow", "查询间隔(s)")) 282 | self.LineEdit_timegap.setPlaceholderText(_translate("MainWindow", "为空则默认为5s,一页约10条,越短越快被限制")) 283 | self.wechatLabel.setText(_translate("MainWindow", "微信uin和key")) 284 | self.LineEdit_wechat.setPlaceholderText(_translate("MainWindow", "URL全复制进来;下载评论和阅读数时需要,通过Fiddler抓包微信uin和key(约20分钟失效一次)")) 285 | self.Label_time.setText(_translate("MainWindow", "时间范围(年)")) 286 | self.lineEdit_timeStart.setPlaceholderText(_translate("MainWindow", "1999")) 287 | self.lineEdit_timeEnd.setPlaceholderText(_translate("MainWindow", "2019")) 288 | self.label.setText(_translate("MainWindow", "关键词")) 289 | self.pushButton_start.setText(_translate("MainWindow", "启动(*^▽^*)")) 290 | self.checkBox.setWhatsThis(_translate("MainWindow", "记住密码")) 291 | self.checkBox.setText(_translate("MainWindow", "记住密码")) 292 | self.pushButton_stop.setText(_translate("MainWindow", "终止 ̄へ ̄")) 293 | self.label_head.setText(_translate("MainWindow", "****************************************************************************************************\n" 294 | "* 程序原理:\n" 295 | ">> 通过selenium登录获取token和cookie,再自动爬取和下载\n" 296 | "* 使用前提: \n" 297 | ">> 申请一个微信公众号(https://mp.weixin.qq.com)\n" 298 | "开源链接:https://github.com/1061700625/WeChat_Article\n" 299 | " Copyright © SXF 本软件禁止一切形式的商业活动\n" 300 | "****************************************************************************************************")) 301 | self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", " 公众号搜文章 ")) 302 | self.Label_time_2.setText(_translate("MainWindow", "关键词")) 303 | self.label_head_2.setText(_translate("MainWindow", "****************************************************************************************************\n" 304 | "* demo说明:\n" 305 | ">> 现在“公众号搜文章”页填完整信息\n" 306 | ">> 再在本页填入关键词\n" 307 | ">> 点击“启动”即可\n" 308 | " Copyright © SXF 本软件禁止一切形式的商业活动\n" 309 | "****************************************************************************************************")) 310 | self.pushButton_stop_2.setText(_translate("MainWindow", "终止 ̄へ ̄")) 311 | self.pushButton_start_2.setText(_translate("MainWindow", "启动(*^▽^*)")) 312 | self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_2), _translate("MainWindow", " 关键词搜文章 ")) 313 | -------------------------------------------------------------------------------- /Qt/WeChat/WeChat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'mainwindow.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.10 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_MainWindow(object): 15 | def setupUi(self, MainWindow): 16 | MainWindow.setObjectName("MainWindow") 17 | MainWindow.setEnabled(True) 18 | MainWindow.resize(787, 739) 19 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred) 20 | sizePolicy.setHorizontalStretch(0) 21 | sizePolicy.setVerticalStretch(0) 22 | sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth()) 23 | MainWindow.setSizePolicy(sizePolicy) 24 | MainWindow.setMinimumSize(QtCore.QSize(620, 520)) 25 | MainWindow.setMouseTracking(False) 26 | icon = QtGui.QIcon() 27 | icon.addPixmap(QtGui.QPixmap("../../icon.jpg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) 28 | MainWindow.setWindowIcon(icon) 29 | self.centralWidget = QtWidgets.QWidget(MainWindow) 30 | self.centralWidget.setObjectName("centralWidget") 31 | self.gridLayout = QtWidgets.QGridLayout(self.centralWidget) 32 | self.gridLayout.setContentsMargins(11, 11, 11, 11) 33 | self.gridLayout.setSpacing(6) 34 | self.gridLayout.setObjectName("gridLayout") 35 | self.tabWidget = QtWidgets.QTabWidget(self.centralWidget) 36 | self.tabWidget.setObjectName("tabWidget") 37 | self.tab = QtWidgets.QWidget() 38 | self.tab.setObjectName("tab") 39 | self.layoutWidget = QtWidgets.QWidget(self.tab) 40 | self.layoutWidget.setGeometry(QtCore.QRect(10, 394, 715, 236)) 41 | self.layoutWidget.setObjectName("layoutWidget") 42 | self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.layoutWidget) 43 | self.horizontalLayout_2.setContentsMargins(11, 11, 11, 11) 44 | self.horizontalLayout_2.setSpacing(6) 45 | self.horizontalLayout_2.setObjectName("horizontalLayout_2") 46 | self.tableWidget_result = QtWidgets.QTableWidget(self.layoutWidget) 47 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding) 48 | sizePolicy.setHorizontalStretch(0) 49 | sizePolicy.setVerticalStretch(0) 50 | sizePolicy.setHeightForWidth(self.tableWidget_result.sizePolicy().hasHeightForWidth()) 51 | self.tableWidget_result.setSizePolicy(sizePolicy) 52 | self.tableWidget_result.viewport().setProperty("cursor", QtGui.QCursor(QtCore.Qt.IBeamCursor)) 53 | self.tableWidget_result.setAutoFillBackground(False) 54 | self.tableWidget_result.setFrameShape(QtWidgets.QFrame.StyledPanel) 55 | self.tableWidget_result.setFrameShadow(QtWidgets.QFrame.Sunken) 56 | self.tableWidget_result.setLineWidth(1) 57 | self.tableWidget_result.setMidLineWidth(1) 58 | self.tableWidget_result.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarAsNeeded) 59 | self.tableWidget_result.setSizeAdjustPolicy(QtWidgets.QAbstractScrollArea.AdjustToContents) 60 | self.tableWidget_result.setAutoScroll(True) 61 | self.tableWidget_result.setAlternatingRowColors(True) 62 | self.tableWidget_result.setVerticalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel) 63 | self.tableWidget_result.setHorizontalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel) 64 | self.tableWidget_result.setGridStyle(QtCore.Qt.SolidLine) 65 | self.tableWidget_result.setRowCount(5) 66 | self.tableWidget_result.setColumnCount(2) 67 | self.tableWidget_result.setObjectName("tableWidget_result") 68 | item = QtWidgets.QTableWidgetItem() 69 | self.tableWidget_result.setHorizontalHeaderItem(0, item) 70 | item = QtWidgets.QTableWidgetItem() 71 | self.tableWidget_result.setHorizontalHeaderItem(1, item) 72 | self.tableWidget_result.horizontalHeader().setSortIndicatorShown(False) 73 | self.tableWidget_result.horizontalHeader().setStretchLastSection(True) 74 | self.tableWidget_result.verticalHeader().setCascadingSectionResizes(False) 75 | self.horizontalLayout_2.addWidget(self.tableWidget_result) 76 | spacerItem = QtWidgets.QSpacerItem(10, 20, QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum) 77 | self.horizontalLayout_2.addItem(spacerItem) 78 | self.label_notes = QtWidgets.QLabel(self.layoutWidget) 79 | self.label_notes.setMinimumSize(QtCore.QSize(200, 25)) 80 | self.label_notes.setMaximumSize(QtCore.QSize(200, 16777215)) 81 | font = QtGui.QFont() 82 | font.setFamily("华文楷体") 83 | font.setPointSize(10) 84 | self.label_notes.setFont(font) 85 | self.label_notes.setAutoFillBackground(False) 86 | self.label_notes.setFrameShape(QtWidgets.QFrame.Panel) 87 | self.label_notes.setFrameShadow(QtWidgets.QFrame.Sunken) 88 | self.label_notes.setText("") 89 | self.label_notes.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop) 90 | self.label_notes.setObjectName("label_notes") 91 | self.horizontalLayout_2.addWidget(self.label_notes) 92 | self.layoutWidget1 = QtWidgets.QWidget(self.tab) 93 | self.layoutWidget1.setGeometry(QtCore.QRect(10, 140, 715, 237)) 94 | self.layoutWidget1.setObjectName("layoutWidget1") 95 | self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget1) 96 | self.horizontalLayout.setSizeConstraint(QtWidgets.QLayout.SetDefaultConstraint) 97 | self.horizontalLayout.setContentsMargins(11, 11, 11, 11) 98 | self.horizontalLayout.setSpacing(6) 99 | self.horizontalLayout.setObjectName("horizontalLayout") 100 | self.formLayout = QtWidgets.QFormLayout() 101 | self.formLayout.setFormAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter) 102 | self.formLayout.setSpacing(6) 103 | self.formLayout.setObjectName("formLayout") 104 | self.Label_target = QtWidgets.QLabel(self.layoutWidget1) 105 | self.Label_target.setObjectName("Label_target") 106 | self.formLayout.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.Label_target) 107 | self.LineEdit_target = QtWidgets.QLineEdit(self.layoutWidget1) 108 | self.LineEdit_target.setMinimumSize(QtCore.QSize(200, 25)) 109 | self.LineEdit_target.setStatusTip("") 110 | self.LineEdit_target.setObjectName("LineEdit_target") 111 | self.formLayout.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.LineEdit_target) 112 | self.Label_user = QtWidgets.QLabel(self.layoutWidget1) 113 | self.Label_user.setLayoutDirection(QtCore.Qt.LeftToRight) 114 | self.Label_user.setObjectName("Label_user") 115 | self.formLayout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.Label_user) 116 | self.LineEdit_user = QtWidgets.QLineEdit(self.layoutWidget1) 117 | self.LineEdit_user.setMinimumSize(QtCore.QSize(200, 25)) 118 | self.LineEdit_user.setObjectName("LineEdit_user") 119 | self.formLayout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.LineEdit_user) 120 | self.Label_pwd = QtWidgets.QLabel(self.layoutWidget1) 121 | self.Label_pwd.setObjectName("Label_pwd") 122 | self.formLayout.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.Label_pwd) 123 | self.LineEdit_pwd = QtWidgets.QLineEdit(self.layoutWidget1) 124 | self.LineEdit_pwd.setMinimumSize(QtCore.QSize(200, 25)) 125 | self.LineEdit_pwd.setText("") 126 | self.LineEdit_pwd.setEchoMode(QtWidgets.QLineEdit.Password) 127 | self.LineEdit_pwd.setObjectName("LineEdit_pwd") 128 | self.formLayout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.LineEdit_pwd) 129 | self.gapLabel = QtWidgets.QLabel(self.layoutWidget1) 130 | self.gapLabel.setObjectName("gapLabel") 131 | self.formLayout.setWidget(3, QtWidgets.QFormLayout.LabelRole, self.gapLabel) 132 | self.LineEdit_timegap = QtWidgets.QLineEdit(self.layoutWidget1) 133 | self.LineEdit_timegap.setMinimumSize(QtCore.QSize(200, 25)) 134 | self.LineEdit_timegap.setObjectName("LineEdit_timegap") 135 | self.formLayout.setWidget(3, QtWidgets.QFormLayout.FieldRole, self.LineEdit_timegap) 136 | self.wechatLabel = QtWidgets.QLabel(self.layoutWidget1) 137 | self.wechatLabel.setObjectName("wechatLabel") 138 | self.formLayout.setWidget(4, QtWidgets.QFormLayout.LabelRole, self.wechatLabel) 139 | self.LineEdit_wechat = QtWidgets.QLineEdit(self.layoutWidget1) 140 | self.LineEdit_wechat.setMinimumSize(QtCore.QSize(200, 25)) 141 | self.LineEdit_wechat.setObjectName("LineEdit_wechat") 142 | self.formLayout.setWidget(4, QtWidgets.QFormLayout.FieldRole, self.LineEdit_wechat) 143 | self.Label_time = QtWidgets.QLabel(self.layoutWidget1) 144 | self.Label_time.setObjectName("Label_time") 145 | self.formLayout.setWidget(5, QtWidgets.QFormLayout.LabelRole, self.Label_time) 146 | self.horizontalLayout_3 = QtWidgets.QHBoxLayout() 147 | self.horizontalLayout_3.setSizeConstraint(QtWidgets.QLayout.SetDefaultConstraint) 148 | self.horizontalLayout_3.setSpacing(6) 149 | self.horizontalLayout_3.setObjectName("horizontalLayout_3") 150 | self.lineEdit_timeStart = QtWidgets.QLineEdit(self.layoutWidget1) 151 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) 152 | sizePolicy.setHorizontalStretch(0) 153 | sizePolicy.setVerticalStretch(0) 154 | sizePolicy.setHeightForWidth(self.lineEdit_timeStart.sizePolicy().hasHeightForWidth()) 155 | self.lineEdit_timeStart.setSizePolicy(sizePolicy) 156 | self.lineEdit_timeStart.setMinimumSize(QtCore.QSize(20, 0)) 157 | self.lineEdit_timeStart.setAlignment(QtCore.Qt.AlignCenter) 158 | self.lineEdit_timeStart.setObjectName("lineEdit_timeStart") 159 | self.horizontalLayout_3.addWidget(self.lineEdit_timeStart) 160 | self.lineEdit_timeEnd = QtWidgets.QLineEdit(self.layoutWidget1) 161 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed) 162 | sizePolicy.setHorizontalStretch(0) 163 | sizePolicy.setVerticalStretch(0) 164 | sizePolicy.setHeightForWidth(self.lineEdit_timeEnd.sizePolicy().hasHeightForWidth()) 165 | self.lineEdit_timeEnd.setSizePolicy(sizePolicy) 166 | self.lineEdit_timeEnd.setMinimumSize(QtCore.QSize(50, 0)) 167 | self.lineEdit_timeEnd.setAlignment(QtCore.Qt.AlignCenter) 168 | self.lineEdit_timeEnd.setObjectName("lineEdit_timeEnd") 169 | self.horizontalLayout_3.addWidget(self.lineEdit_timeEnd) 170 | spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) 171 | self.horizontalLayout_3.addItem(spacerItem1) 172 | self.label = QtWidgets.QLabel(self.layoutWidget1) 173 | self.label.setMinimumSize(QtCore.QSize(60, 0)) 174 | self.label.setAlignment(QtCore.Qt.AlignCenter) 175 | self.label.setObjectName("label") 176 | self.horizontalLayout_3.addWidget(self.label) 177 | self.lineEdit_keyword = QtWidgets.QLineEdit(self.layoutWidget1) 178 | self.lineEdit_keyword.setMinimumSize(QtCore.QSize(20, 0)) 179 | self.lineEdit_keyword.setObjectName("lineEdit_keyword") 180 | self.horizontalLayout_3.addWidget(self.lineEdit_keyword) 181 | self.formLayout.setLayout(5, QtWidgets.QFormLayout.FieldRole, self.horizontalLayout_3) 182 | self.progressBar = QtWidgets.QProgressBar(self.layoutWidget1) 183 | self.progressBar.setProperty("value", 0) 184 | self.progressBar.setObjectName("progressBar") 185 | self.formLayout.setWidget(7, QtWidgets.QFormLayout.SpanningRole, self.progressBar) 186 | self.label_total_Page = QtWidgets.QLabel(self.layoutWidget1) 187 | self.label_total_Page.setText("") 188 | self.label_total_Page.setObjectName("label_total_Page") 189 | self.formLayout.setWidget(6, QtWidgets.QFormLayout.LabelRole, self.label_total_Page) 190 | self.horizontalLayout.addLayout(self.formLayout) 191 | spacerItem2 = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) 192 | self.horizontalLayout.addItem(spacerItem2) 193 | self.verticalLayout = QtWidgets.QVBoxLayout() 194 | self.verticalLayout.setContentsMargins(0, -1, -1, -1) 195 | self.verticalLayout.setSpacing(6) 196 | self.verticalLayout.setObjectName("verticalLayout") 197 | self.pushButton_start = QtWidgets.QPushButton(self.layoutWidget1) 198 | self.pushButton_start.setMinimumSize(QtCore.QSize(20, 50)) 199 | self.pushButton_start.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 200 | self.pushButton_start.setIconSize(QtCore.QSize(24, 24)) 201 | self.pushButton_start.setObjectName("pushButton_start") 202 | self.verticalLayout.addWidget(self.pushButton_start) 203 | self.checkBox = QtWidgets.QCheckBox(self.layoutWidget1) 204 | self.checkBox.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 205 | self.checkBox.setStatusTip("") 206 | self.checkBox.setAutoFillBackground(True) 207 | self.checkBox.setChecked(True) 208 | self.checkBox.setObjectName("checkBox") 209 | self.verticalLayout.addWidget(self.checkBox) 210 | self.pushButton_stop = QtWidgets.QPushButton(self.layoutWidget1) 211 | self.pushButton_stop.setMinimumSize(QtCore.QSize(20, 50)) 212 | self.pushButton_stop.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 213 | self.pushButton_stop.setObjectName("pushButton_stop") 214 | self.verticalLayout.addWidget(self.pushButton_stop) 215 | self.horizontalLayout.addLayout(self.verticalLayout) 216 | spacerItem3 = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) 217 | self.horizontalLayout.addItem(spacerItem3) 218 | self.label_head = QtWidgets.QLabel(self.tab) 219 | self.label_head.setGeometry(QtCore.QRect(10, 10, 715, 124)) 220 | self.label_head.setMinimumSize(QtCore.QSize(0, 120)) 221 | self.label_head.setObjectName("label_head") 222 | self.tabWidget.addTab(self.tab, "") 223 | self.tab_2 = QtWidgets.QWidget() 224 | self.tab_2.setObjectName("tab_2") 225 | self.Label_time_2 = QtWidgets.QLabel(self.tab_2) 226 | self.Label_time_2.setGeometry(QtCore.QRect(90, 190, 36, 21)) 227 | self.Label_time_2.setObjectName("Label_time_2") 228 | self.lineEdit_keyword_2 = QtWidgets.QLineEdit(self.tab_2) 229 | self.lineEdit_keyword_2.setGeometry(QtCore.QRect(130, 190, 158, 20)) 230 | self.lineEdit_keyword_2.setMinimumSize(QtCore.QSize(20, 0)) 231 | self.lineEdit_keyword_2.setObjectName("lineEdit_keyword_2") 232 | self.label_head_2 = QtWidgets.QLabel(self.tab_2) 233 | self.label_head_2.setGeometry(QtCore.QRect(10, 0, 715, 124)) 234 | self.label_head_2.setMinimumSize(QtCore.QSize(0, 120)) 235 | self.label_head_2.setObjectName("label_head_2") 236 | self.pushButton_stop_2 = QtWidgets.QPushButton(self.tab_2) 237 | self.pushButton_stop_2.setGeometry(QtCore.QRect(520, 230, 80, 50)) 238 | self.pushButton_stop_2.setMinimumSize(QtCore.QSize(20, 50)) 239 | self.pushButton_stop_2.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 240 | self.pushButton_stop_2.setObjectName("pushButton_stop_2") 241 | self.pushButton_start_2 = QtWidgets.QPushButton(self.tab_2) 242 | self.pushButton_start_2.setGeometry(QtCore.QRect(520, 154, 80, 50)) 243 | self.pushButton_start_2.setMinimumSize(QtCore.QSize(20, 50)) 244 | self.pushButton_start_2.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) 245 | self.pushButton_start_2.setIconSize(QtCore.QSize(24, 24)) 246 | self.pushButton_start_2.setObjectName("pushButton_start_2") 247 | self.tabWidget.addTab(self.tab_2, "") 248 | self.gridLayout.addWidget(self.tabWidget, 0, 0, 1, 1) 249 | MainWindow.setCentralWidget(self.centralWidget) 250 | self.menuBar = QtWidgets.QMenuBar(MainWindow) 251 | self.menuBar.setGeometry(QtCore.QRect(0, 0, 787, 23)) 252 | self.menuBar.setObjectName("menuBar") 253 | MainWindow.setMenuBar(self.menuBar) 254 | self.mainToolBar = QtWidgets.QToolBar(MainWindow) 255 | self.mainToolBar.setObjectName("mainToolBar") 256 | MainWindow.addToolBar(QtCore.Qt.TopToolBarArea, self.mainToolBar) 257 | 258 | self.retranslateUi(MainWindow) 259 | self.tabWidget.setCurrentIndex(0) 260 | self.pushButton_start.clicked.connect(MainWindow.Start_Run) # type: ignore 261 | self.pushButton_stop.clicked.connect(MainWindow.Stop_Run) # type: ignore 262 | self.pushButton_start_2.clicked.connect(MainWindow.Start_Run_2) # type: ignore 263 | self.pushButton_stop_2.clicked.connect(MainWindow.Stop_Run_2) # type: ignore 264 | QtCore.QMetaObject.connectSlotsByName(MainWindow) 265 | 266 | def retranslateUi(self, MainWindow): 267 | _translate = QtCore.QCoreApplication.translate 268 | MainWindow.setWindowTitle(_translate("MainWindow", "微信公众号文章 by 小锋学长")) 269 | self.tableWidget_result.setSortingEnabled(False) 270 | item = self.tableWidget_result.horizontalHeaderItem(0) 271 | item.setText(_translate("MainWindow", "Title")) 272 | item = self.tableWidget_result.horizontalHeaderItem(1) 273 | item.setText(_translate("MainWindow", "URL")) 274 | self.label_notes.setWhatsThis(_translate("MainWindow", "调试窗口")) 275 | self.Label_target.setText(_translate("MainWindow", "目标公众号英文名")) 276 | self.LineEdit_target.setPlaceholderText(_translate("MainWindow", "为空则默认新华社(xinhuashefabu1)")) 277 | self.Label_user.setText(_translate("MainWindow", "个人公众号账号")) 278 | self.LineEdit_user.setPlaceholderText(_translate("MainWindow", "为空则自动打开页面后手动输入")) 279 | self.Label_pwd.setText(_translate("MainWindow", "个人公众号密码")) 280 | self.LineEdit_pwd.setPlaceholderText(_translate("MainWindow", "为空则自动打开页面后手动输入")) 281 | self.gapLabel.setText(_translate("MainWindow", "查询间隔(s)")) 282 | self.LineEdit_timegap.setPlaceholderText(_translate("MainWindow", "为空则默认为5s,一页约10条,越短越快被限制")) 283 | self.wechatLabel.setText(_translate("MainWindow", "微信uin和key")) 284 | self.LineEdit_wechat.setPlaceholderText(_translate("MainWindow", "URL全复制进来;下载评论和阅读数时需要,通过Fiddler抓包微信uin和key(约20分钟失效一次)")) 285 | self.Label_time.setText(_translate("MainWindow", "时间范围(年)")) 286 | self.lineEdit_timeStart.setPlaceholderText(_translate("MainWindow", "1999")) 287 | self.lineEdit_timeEnd.setPlaceholderText(_translate("MainWindow", "2019")) 288 | self.label.setText(_translate("MainWindow", "关键词")) 289 | self.pushButton_start.setText(_translate("MainWindow", "启动(*^▽^*)")) 290 | self.checkBox.setWhatsThis(_translate("MainWindow", "记住密码")) 291 | self.checkBox.setText(_translate("MainWindow", "记住密码")) 292 | self.pushButton_stop.setText(_translate("MainWindow", "终止 ̄へ ̄")) 293 | self.label_head.setText(_translate("MainWindow", "****************************************************************************************************\n" 294 | "* 程序原理:\n" 295 | ">> 通过selenium登录获取token和cookie,再自动爬取和下载\n" 296 | "* 使用前提: \n" 297 | ">> 申请一个微信公众号(https://mp.weixin.qq.com)\n" 298 | "开源链接:https://github.com/1061700625/WeChat_Article\n" 299 | " Copyright © SXF 本软件禁止一切形式的商业活动\n" 300 | "****************************************************************************************************")) 301 | self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", " 公众号搜文章 ")) 302 | self.Label_time_2.setText(_translate("MainWindow", "关键词")) 303 | self.label_head_2.setText(_translate("MainWindow", "****************************************************************************************************\n" 304 | "* demo说明:\n" 305 | ">> 现在“公众号搜文章”页填完整信息\n" 306 | ">> 再在本页填入关键词\n" 307 | ">> 点击“启动”即可\n" 308 | " Copyright © SXF 本软件禁止一切形式的商业活动\n" 309 | "****************************************************************************************************")) 310 | self.pushButton_stop_2.setText(_translate("MainWindow", "终止 ̄へ ̄")) 311 | self.pushButton_start_2.setText(_translate("MainWindow", "启动(*^▽^*)")) 312 | self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_2), _translate("MainWindow", " 关键词搜文章 ")) 313 | -------------------------------------------------------------------------------- /Qt/WeChat/WeChat.pro.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | EnvironmentId 7 | {a59e6a57-da79-4cae-a44b-15af72be8378} 8 | 9 | 10 | ProjectExplorer.Project.ActiveTarget 11 | 0 12 | 13 | 14 | ProjectExplorer.Project.EditorSettings 15 | 16 | true 17 | false 18 | true 19 | 20 | Cpp 21 | 22 | CppGlobal 23 | 24 | 25 | 26 | QmlJS 27 | 28 | QmlJSGlobal 29 | 30 | 31 | 2 32 | UTF-8 33 | false 34 | 4 35 | false 36 | 80 37 | true 38 | true 39 | 1 40 | true 41 | false 42 | 0 43 | true 44 | true 45 | 0 46 | 8 47 | true 48 | 1 49 | true 50 | true 51 | true 52 | false 53 | 54 | 55 | 56 | ProjectExplorer.Project.PluginSettings 57 | 58 | 59 | -fno-delayed-template-parsing 60 | 61 | true 62 | 63 | 64 | 65 | ProjectExplorer.Project.Target.0 66 | 67 | Desktop Qt 5.11.1 MinGW 32bit 68 | Desktop Qt 5.11.1 MinGW 32bit 69 | qt.qt5.5111.win32_mingw53_kit 70 | 0 71 | 0 72 | 0 73 | 74 | C:/Users/Administrator/Desktop/Qt/build-WeChat-Desktop_Qt_5_11_1_MinGW_32bit-Debug 75 | 76 | 77 | true 78 | qmake 79 | 80 | QtProjectManager.QMakeBuildStep 81 | true 82 | 83 | false 84 | false 85 | false 86 | 87 | 88 | true 89 | Make 90 | 91 | Qt4ProjectManager.MakeStep 92 | 93 | false 94 | 95 | 96 | 97 | 2 98 | Build 99 | 100 | ProjectExplorer.BuildSteps.Build 101 | 102 | 103 | 104 | true 105 | Make 106 | 107 | Qt4ProjectManager.MakeStep 108 | 109 | true 110 | clean 111 | 112 | 113 | 1 114 | Clean 115 | 116 | ProjectExplorer.BuildSteps.Clean 117 | 118 | 2 119 | false 120 | 121 | Debug 122 | Debug 123 | Qt4ProjectManager.Qt4BuildConfiguration 124 | 2 125 | true 126 | 127 | 128 | C:/Users/Administrator/Desktop/Qt/build-WeChat-Desktop_Qt_5_11_1_MinGW_32bit-Release 129 | 130 | 131 | true 132 | qmake 133 | 134 | QtProjectManager.QMakeBuildStep 135 | false 136 | 137 | false 138 | false 139 | true 140 | 141 | 142 | true 143 | Make 144 | 145 | Qt4ProjectManager.MakeStep 146 | 147 | false 148 | 149 | 150 | 151 | 2 152 | Build 153 | 154 | ProjectExplorer.BuildSteps.Build 155 | 156 | 157 | 158 | true 159 | Make 160 | 161 | Qt4ProjectManager.MakeStep 162 | 163 | true 164 | clean 165 | 166 | 167 | 1 168 | Clean 169 | 170 | ProjectExplorer.BuildSteps.Clean 171 | 172 | 2 173 | false 174 | 175 | Release 176 | Release 177 | Qt4ProjectManager.Qt4BuildConfiguration 178 | 0 179 | true 180 | 181 | 182 | C:/Users/Administrator/Desktop/Qt/build-WeChat-Desktop_Qt_5_11_1_MinGW_32bit-Profile 183 | 184 | 185 | true 186 | qmake 187 | 188 | QtProjectManager.QMakeBuildStep 189 | true 190 | 191 | false 192 | true 193 | true 194 | 195 | 196 | true 197 | Make 198 | 199 | Qt4ProjectManager.MakeStep 200 | 201 | false 202 | 203 | 204 | 205 | 2 206 | Build 207 | 208 | ProjectExplorer.BuildSteps.Build 209 | 210 | 211 | 212 | true 213 | Make 214 | 215 | Qt4ProjectManager.MakeStep 216 | 217 | true 218 | clean 219 | 220 | 221 | 1 222 | Clean 223 | 224 | ProjectExplorer.BuildSteps.Clean 225 | 226 | 2 227 | false 228 | 229 | Profile 230 | Profile 231 | Qt4ProjectManager.Qt4BuildConfiguration 232 | 0 233 | true 234 | 235 | 3 236 | 237 | 238 | 0 239 | 部署 240 | 241 | ProjectExplorer.BuildSteps.Deploy 242 | 243 | 1 244 | Deploy Configuration 245 | 246 | ProjectExplorer.DefaultDeployConfiguration 247 | 248 | 1 249 | 250 | 251 | false 252 | false 253 | 1000 254 | 255 | true 256 | 257 | false 258 | false 259 | false 260 | false 261 | true 262 | 0.01 263 | 10 264 | true 265 | 1 266 | 25 267 | 268 | 1 269 | true 270 | false 271 | true 272 | valgrind 273 | 274 | 0 275 | 1 276 | 2 277 | 3 278 | 4 279 | 5 280 | 6 281 | 7 282 | 8 283 | 9 284 | 10 285 | 11 286 | 12 287 | 13 288 | 14 289 | 290 | 2 291 | 292 | WeChat 293 | 294 | Qt4ProjectManager.Qt4RunConfiguration:C:/Users/Administrator/Desktop/Qt/WeChat/WeChat.pro 295 | true 296 | 297 | WeChat.pro 298 | 299 | C:/Users/Administrator/Desktop/Qt/build-WeChat-Desktop_Qt_5_11_1_MinGW_32bit-Debug 300 | 3768 301 | false 302 | true 303 | false 304 | false 305 | true 306 | 307 | 1 308 | 309 | 310 | 311 | ProjectExplorer.Project.TargetCount 312 | 1 313 | 314 | 315 | ProjectExplorer.Project.Updater.FileVersion 316 | 18 317 | 318 | 319 | Version 320 | 18 321 | 322 | 323 | -------------------------------------------------------------------------------- /Qt/WeChat/mainwindow.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | MainWindow 4 | 5 | 6 | true 7 | 8 | 9 | 10 | 0 11 | 0 12 | 787 13 | 739 14 | 15 | 16 | 17 | 18 | 0 19 | 0 20 | 21 | 22 | 23 | 24 | 620 25 | 520 26 | 27 | 28 | 29 | false 30 | 31 | 32 | 微信公众号文章 by 小锋学长 33 | 34 | 35 | 36 | ../../icon.jpg../../icon.jpg 37 | 38 | 39 | 40 | 41 | 42 | 43 | 0 44 | 45 | 46 | 47 | 公众号搜文章 48 | 49 | 50 | 51 | 52 | 10 53 | 394 54 | 715 55 | 236 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 0 64 | 0 65 | 66 | 67 | 68 | IBeamCursor 69 | 70 | 71 | false 72 | 73 | 74 | QFrame::StyledPanel 75 | 76 | 77 | QFrame::Sunken 78 | 79 | 80 | 1 81 | 82 | 83 | 1 84 | 85 | 86 | Qt::ScrollBarAsNeeded 87 | 88 | 89 | QAbstractScrollArea::AdjustToContents 90 | 91 | 92 | true 93 | 94 | 95 | true 96 | 97 | 98 | QAbstractItemView::ScrollPerPixel 99 | 100 | 101 | QAbstractItemView::ScrollPerPixel 102 | 103 | 104 | Qt::SolidLine 105 | 106 | 107 | false 108 | 109 | 110 | 5 111 | 112 | 113 | 2 114 | 115 | 116 | false 117 | 118 | 119 | true 120 | 121 | 122 | false 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | Title 132 | 133 | 134 | 135 | 136 | URL 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | Qt::Horizontal 145 | 146 | 147 | QSizePolicy::Preferred 148 | 149 | 150 | 151 | 10 152 | 20 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 200 162 | 25 163 | 164 | 165 | 166 | 167 | 200 168 | 16777215 169 | 170 | 171 | 172 | 173 | 华文楷体 174 | 10 175 | 176 | 177 | 178 | 调试窗口 179 | 180 | 181 | false 182 | 183 | 184 | QFrame::Panel 185 | 186 | 187 | QFrame::Sunken 188 | 189 | 190 | 191 | 192 | 193 | Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 10 203 | 140 204 | 715 205 | 237 206 | 207 | 208 | 209 | 210 | QLayout::SetDefaultConstraint 211 | 212 | 213 | 214 | 215 | Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter 216 | 217 | 218 | 219 | 220 | 目标公众号英文名 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 200 229 | 25 230 | 231 | 232 | 233 | 234 | 235 | 236 | 为空则默认新华社(xinhuashefabu1) 237 | 238 | 239 | 240 | 241 | 242 | 243 | Qt::LeftToRight 244 | 245 | 246 | 个人公众号账号 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 200 255 | 25 256 | 257 | 258 | 259 | 为空则自动打开页面后手动输入 260 | 261 | 262 | 263 | 264 | 265 | 266 | 个人公众号密码 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 200 275 | 25 276 | 277 | 278 | 279 | 280 | 281 | 282 | QLineEdit::Password 283 | 284 | 285 | 为空则自动打开页面后手动输入 286 | 287 | 288 | 289 | 290 | 291 | 292 | 查询间隔(s) 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 200 301 | 25 302 | 303 | 304 | 305 | 为空则默认为5s,一页约10条,越短越快被限制 306 | 307 | 308 | 309 | 310 | 311 | 312 | 微信uin和key 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 200 321 | 25 322 | 323 | 324 | 325 | URL全复制进来;下载评论和阅读数时需要,通过Fiddler抓包微信uin和key(约20分钟失效一次) 326 | 327 | 328 | 329 | 330 | 331 | 332 | 时间范围(年) 333 | 334 | 335 | 336 | 337 | 338 | 339 | 6 340 | 341 | 342 | QLayout::SetDefaultConstraint 343 | 344 | 345 | 346 | 347 | 348 | 0 349 | 0 350 | 351 | 352 | 353 | 354 | 20 355 | 0 356 | 357 | 358 | 359 | Qt::AlignCenter 360 | 361 | 362 | 1999 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 0 371 | 0 372 | 373 | 374 | 375 | 376 | 50 377 | 0 378 | 379 | 380 | 381 | Qt::AlignCenter 382 | 383 | 384 | 2019 385 | 386 | 387 | 388 | 389 | 390 | 391 | Qt::Horizontal 392 | 393 | 394 | 395 | 40 396 | 20 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 60 406 | 0 407 | 408 | 409 | 410 | 关键词 411 | 412 | 413 | Qt::AlignCenter 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 20 422 | 0 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 0 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | Qt::Horizontal 449 | 450 | 451 | 452 | 20 453 | 20 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 0 462 | 463 | 464 | 465 | 466 | 467 | 20 468 | 50 469 | 470 | 471 | 472 | PointingHandCursor 473 | 474 | 475 | 启动(*^▽^*) 476 | 477 | 478 | 479 | 24 480 | 24 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | PointingHandCursor 489 | 490 | 491 | 492 | 493 | 494 | 记住密码 495 | 496 | 497 | true 498 | 499 | 500 | 记住密码 501 | 502 | 503 | true 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 20 512 | 50 513 | 514 | 515 | 516 | PointingHandCursor 517 | 518 | 519 | 终止 ̄へ ̄ 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | Qt::Horizontal 529 | 530 | 531 | 532 | 20 533 | 20 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 10 544 | 10 545 | 715 546 | 124 547 | 548 | 549 | 550 | 551 | 0 552 | 120 553 | 554 | 555 | 556 | **************************************************************************************************** 557 | * 程序原理: 558 | >> 通过selenium登录获取token和cookie,再自动爬取和下载 559 | * 使用前提: 560 | >> 申请一个微信公众号(https://mp.weixin.qq.com) 561 | 开源链接:https://github.com/1061700625/WeChat_Article 562 | Copyright © SXF 本软件禁止一切形式的商业活动 563 | **************************************************************************************************** 564 | 565 | 566 | 567 | 568 | 569 | 关键词搜文章 570 | 571 | 572 | 573 | 574 | 90 575 | 190 576 | 36 577 | 21 578 | 579 | 580 | 581 | 关键词 582 | 583 | 584 | 585 | 586 | 587 | 130 588 | 190 589 | 158 590 | 20 591 | 592 | 593 | 594 | 595 | 20 596 | 0 597 | 598 | 599 | 600 | 601 | 602 | 603 | 10 604 | 0 605 | 715 606 | 124 607 | 608 | 609 | 610 | 611 | 0 612 | 120 613 | 614 | 615 | 616 | **************************************************************************************************** 617 | * demo说明: 618 | >> 现在“公众号搜文章”页填完整信息 619 | >> 再在本页填入关键词 620 | >> 点击“启动”即可 621 | Copyright © SXF 本软件禁止一切形式的商业活动 622 | **************************************************************************************************** 623 | 624 | 625 | 626 | 627 | 628 | 520 629 | 230 630 | 80 631 | 50 632 | 633 | 634 | 635 | 636 | 20 637 | 50 638 | 639 | 640 | 641 | PointingHandCursor 642 | 643 | 644 | 终止 ̄へ ̄ 645 | 646 | 647 | 648 | 649 | 650 | 520 651 | 154 652 | 80 653 | 50 654 | 655 | 656 | 657 | 658 | 20 659 | 50 660 | 661 | 662 | 663 | PointingHandCursor 664 | 665 | 666 | 启动(*^▽^*) 667 | 668 | 669 | 670 | 24 671 | 24 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 0 684 | 0 685 | 787 686 | 23 687 | 688 | 689 | 690 | 691 | 692 | TopToolBarArea 693 | 694 | 695 | false 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | pushButton_start 704 | clicked() 705 | MainWindow 706 | Start_Run() 707 | 708 | 709 | 707 710 | 282 711 | 712 | 713 | 396 714 | 104 715 | 716 | 717 | 718 | 719 | pushButton_stop 720 | clicked() 721 | MainWindow 722 | Stop_Run() 723 | 724 | 725 | 707 726 | 412 727 | 728 | 729 | 471 730 | 241 731 | 732 | 733 | 734 | 735 | pushButton_start_2 736 | clicked() 737 | MainWindow 738 | Start_Run_2() 739 | 740 | 741 | 593 742 | 251 743 | 744 | 745 | 786 746 | 250 747 | 748 | 749 | 750 | 751 | pushButton_stop_2 752 | clicked() 753 | MainWindow 754 | Stop_Run_2() 755 | 756 | 757 | 587 758 | 312 759 | 760 | 761 | 801 762 | 301 763 | 764 | 765 | 766 | 767 | 768 | Start_Run() 769 | Stop_Run() 770 | Table_Update() 771 | Start_Run_2() 772 | Stop_Run_2() 773 | 774 | 775 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 设置文件编码为 UTF-8,支持中文字符处理 3 | 4 | import json # 用于处理 JSON 文件读写 5 | import os # 用于文件和目录操作 6 | import random # 用于生成随机数(如请求参数中的随机值) 7 | import re # 用于正则表达式匹配 8 | import threading # 用于多线程操作 9 | import time # 用于时间处理和延时 10 | from math import ceil # 用于计算分页数(向上取整) 11 | from pathlib import Path # 用于跨平台路径处理 12 | 13 | import configparser # 用于解析和写入配置文件(conf.ini) 14 | import pyautogui # 用于弹出提示框(如登录提醒) 15 | import requests # 用于发送 HTTP 请求 16 | from bs4 import BeautifulSoup # 用于解析 HTML 内容 17 | from PyQt5 import QtCore, QtGui, QtWidgets # PyQt5 核心模块,用于 GUI 界面 18 | from PyQt5.QtCore import Qt # PyQt5 核心常量(如对齐方式) 19 | from PyQt5.QtGui import QPixmap # 用于处理图片显示 20 | from selenium import webdriver # 用于自动化浏览器操作 21 | from selenium.webdriver.chrome.options import Options # 配置 Chrome 浏览器选项 22 | from selenium.webdriver.chrome.service import Service # 配置 ChromeDriver 服务 23 | from selenium.webdriver.common.by import By # 用于定位网页元素 24 | from selenium.webdriver.support import expected_conditions as EC # 用于等待条件判断 25 | from selenium.webdriver.support.ui import WebDriverWait # 用于显式等待 26 | from webdriver_manager.chrome import ChromeDriverManager # 自动管理 ChromeDriver 27 | 28 | import sys # 系统相关操作(如设置递归深度) 29 | import ctypes # 用于低级别线程控制(如强制终止线程) 30 | import inspect # 用于检查对象类型(如线程异常处理) 31 | import WeChat # 自定义模块,提供界面基础类 Ui_MainWindow 32 | 33 | # 设置递归深度为100万,避免递归调用过深导致栈溢出 34 | sys.setrecursionlimit(1_000_000) 35 | 36 | 37 | class MyMainWindow(WeChat.Ui_MainWindow): 38 | """微信爬虫主窗口类,继承自 WeChat.Ui_MainWindow 39 | 40 | 该类负责管理微信公众号文章爬取的图形界面和核心逻辑,包括登录、文章抓取、内容下载等功能。 41 | """ 42 | 43 | def __init__(self): 44 | """初始化主窗口和全局变量 45 | 46 | 初始化爬虫所需的变量,包括网络请求会话、文件路径、时间参数、线程列表等。 47 | """ 48 | # 创建一个 HTTP 会话对象,用于持久化的网络请求 49 | self.session = requests.Session() 50 | # 设置默认的 HTTP 请求头,模拟浏览器访问 51 | self.headers = { 52 | "Host": "mp.weixin.qq.com", 53 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0", 54 | } 55 | # 浏览器和驱动程序的路径(未使用,保留以兼容可能的未来需求) 56 | self.browser_path = "Chrome/BitBrowser.exe" 57 | self.driver_path = "Chrome/chromedriver.exe" 58 | 59 | # 初始化工作目录和默认存储路径 60 | self.init_path = os.getcwd() # 获取当前工作目录 61 | self.root_path = os.path.join(self.init_path, "spider") # 存储爬取结果的根目录 62 | self.time_gap = 5 # 每页爬取之间的默认等待时间(秒) 63 | self.time_start = 1999 # 默认起始年份 64 | self.year_now = time.localtime().tm_year # 获取当前年份 65 | self.time_end = self.year_now + 1 # 默认结束年份为下一年 66 | 67 | # 初始化线程管理和调试信息 68 | self.thread_list = [] # 存储运行中的线程 69 | self.label_debug_string = "" # 调试信息字符串,用于界面显示 70 | self.label_debug_count = 0 # 调试信息行计数,用于控制显示行数 71 | self.total_articles = 0 # 已爬取的文章总数 72 | self.keyword = "" # 搜索关键词 73 | self.keyword_search_mode = 0 # 关键词搜索模式开关(0: 公众号模式, 1: 关键词模式) 74 | self.keyword_2 = "" # 第二个关键词(用于关键词搜索模式) 75 | self.freq_control = 0 # 频率控制标志,用于处理访问限制 76 | self.download_count = 0 # 已下载的文章数 77 | self.link_buffer_count = 0 # 链接缓冲区计数 78 | self.download_end = 0 # 下载结束标志 79 | self.is_resume = self._check_config() # 检查是否从断点恢复 80 | self._init_url_json() # 初始化 URL 存储的 JSON 文件 81 | self.title_buffer = [] # 文章标题缓冲区 82 | self.link_buffer = [] # 文章链接缓冲区 83 | self.wechat_uin = None # 微信 UIN(未使用,保留以兼容评论功能) 84 | self.wechat_key = None # 微信 Key(未使用,保留以兼容评论功能) 85 | 86 | def _init_variables(self): 87 | """初始化或重置变量 88 | 89 | 在启动新任务或停止任务时,重置所有全局变量到初始状态。 90 | """ 91 | self.root_path = os.path.join(os.getcwd(), "spider") # 重置存储路径 92 | self.thread_list = [] # 清空线程列表 93 | self.label_debug_string = "" # 清空调试信息 94 | self.label_debug_count = 0 # 重置调试计数 95 | self.total_articles = 0 # 重置文章总数 96 | self.keyword = "" # 清空关键词 97 | self.keyword_search_mode = 0 # 重置搜索模式 98 | self.keyword_2 = "" # 清空第二个关键词 99 | self._label_debug(" ") # 在界面显示空行 100 | self.freq_control = 0 # 重置频率控制 101 | self.download_count = 0 # 重置下载计数 102 | self.link_buffer_count = 0 # 重置链接缓冲计数 103 | self.download_end = 0 # 重置下载结束标志 104 | self.title_buffer.clear() # 清空标题缓冲区 105 | self.link_buffer.clear() # 清空链接缓冲区 106 | self.progressBar.setMaximum(100) # 设置进度条最大值 107 | self.progressBar.setValue(0) # 重置进度条值为0 108 | 109 | def _label_debug(self, message): 110 | """更新调试信息标签 111 | 112 | 在界面上的调试标签中追加消息,控制显示行数不超过12行。 113 | 114 | Args: 115 | message (str): 要显示的调试信息 116 | """ 117 | if self.label_debug_count >= 12: # 如果超过12行,清空并重置 118 | self.label_debug_string = "" 119 | self.label_notes.setText(self.label_debug_string) 120 | self.label_debug_count = 0 121 | self.label_debug_string += f"\n{message}" # 追加新消息 122 | self.label_notes.setText(self.label_debug_string) # 更新界面显示 123 | self.label_debug_count += 1 # 增加行计数 124 | 125 | def _clear_label_debug(self): 126 | """清除调试信息 127 | 128 | 清空调试标签的内容并重置计数。 129 | """ 130 | self.label_debug_string = "" 131 | self.label_notes.clear() # 清空界面上的调试信息 132 | self.label_debug_count = 0 133 | 134 | def setupUi(self, MainWindow): 135 | """设置用户界面 136 | 137 | 初始化图形界面,包括加载配置文件中的默认值和显示二维码图片。 138 | 注意:此方法名与父类一致,直接覆盖父类的实现。 139 | 140 | Args: 141 | MainWindow (QtWidgets.QMainWindow): 主窗口对象 142 | """ 143 | super().setupUi(MainWindow) # 调用父类的界面设置方法,保持信号连接 144 | try: 145 | login_file = os.path.join(self.init_path, "login.json") # 登录信息文件路径 146 | if os.path.exists(login_file): # 如果存在登录配置文件 147 | with open(login_file, "r", encoding="utf-8") as file: 148 | login_data = json.load(file) # 读取 JSON 数据 149 | self._label_debug("登录文件读取成功") 150 | # 设置界面上的文本框内容 151 | self.LineEdit_target.setText(login_data["target"]) # 目标公众号 152 | self.LineEdit_user.setText(login_data["user"]) # 用户名 153 | self.LineEdit_pwd.setText(login_data["pwd"]) # 密码 154 | self.LineEdit_timegap.setText(str(login_data["timegap"])) # 爬取间隔 155 | self.lineEdit_timeEnd.setText(str(self.year_now + 1)) # 结束年份 156 | self.lineEdit_timeStart.setText("1999") # 起始年份 157 | QtWidgets.QApplication.processEvents() # 刷新界面 158 | 159 | # 下载并显示二维码图片 160 | image_url = "http://xfxuezhang.cn/web/share/donate/yf.png" 161 | response = requests.get(image_url, timeout=10) # 设置10秒超时 162 | if response.status_code == 200: 163 | self.label_yf.setAlignment(Qt.AlignCenter) # 居中对齐 164 | pixmap = QPixmap() 165 | pixmap.loadFromData(response.content) # 从响应内容加载图片 166 | scaled_pixmap = pixmap.scaled( 167 | self.label_yf.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation 168 | ) # 缩放图片以适应标签大小 169 | self.label_yf.setPixmap(scaled_pixmap) # 设置图片到标签 170 | else: 171 | self.label_yf.setText("图片URL未找到") 172 | except Exception as e: 173 | print(f"UI设置错误: {e}") 174 | self._label_debug(f"UI设置错误: {e}") 175 | 176 | def Start_Run(self): 177 | """启动爬取线程 178 | 179 | 初始化文章计数并启动主爬取线程,与 WeChat.py 中的信号连接一致。 180 | """ 181 | self.total_articles = 0 # 重置文章总数 182 | process_thread = threading.Thread(target=self._process, daemon=True) # 创建守护线程 183 | process_thread.start() # 启动线程 184 | self.thread_list.append(process_thread) # 添加到线程列表 185 | 186 | def Stop_Run(self): 187 | """停止爬取线程 188 | 189 | 尝试终止所有运行中的线程并重置变量,与 WeChat.py 中的信号连接一致。 190 | """ 191 | try: 192 | for _ in range(2): # 尝试终止最多两个线程 193 | if self.thread_list: 194 | self._stop_thread(self.thread_list.pop()) # 终止并移除线程 195 | self._init_variables() # 重置变量 196 | self._label_debug("终止成功!") 197 | print("终止成功!") 198 | except Exception as e: 199 | self._label_debug("终止失败!") 200 | print(f"停止失败: {e}") 201 | 202 | def Start_Run_2(self): 203 | """启动关键词搜索模式 204 | 205 | 创建存储目录并启动关键词搜索爬取线程,与 WeChat.py 中的信号连接一致。 206 | """ 207 | Path(self.root_path).mkdir(exist_ok=True) # 创建存储目录(如果不存在) 208 | self.keyword_search_mode = 1 # 启用关键词搜索模式 209 | self.total_articles = 0 # 重置文章总数 210 | process_thread = threading.Thread(target=self._process, daemon=True) 211 | process_thread.start() 212 | self.thread_list.append(process_thread) 213 | 214 | def Stop_Run_2(self): 215 | """停止关键词搜索模式 216 | 217 | 关闭关键词搜索模式并终止相关线程,与 WeChat.py 中的信号连接一致。 218 | """ 219 | try: 220 | self.keyword_search_mode = 0 # 关闭关键词搜索模式 221 | for _ in range(2): 222 | if self.thread_list: 223 | self._stop_thread(self.thread_list.pop()) 224 | self._init_variables() 225 | self._label_debug("终止成功!") 226 | print("终止成功!") 227 | except Exception as e: 228 | self._label_debug("终止失败!") 229 | print(f"停止失败: {e}") 230 | 231 | def _check_config(self): 232 | """检查并初始化配置文件 233 | 234 | 检查 conf.ini 文件是否存在,加载或创建初始配置。 235 | 236 | Returns: 237 | int: 1 表示从现有配置恢复,0 表示新建配置 238 | """ 239 | self.config = configparser.ConfigParser() # 创建配置解析器 240 | self.config_path = os.path.join(self.init_path, "conf.ini") # 配置文件路径 241 | if os.path.exists(self.config_path): # 如果配置文件存在 242 | self.config.read(self.config_path, encoding="utf-8") # 读取配置 243 | resume = dict(self.config["resume"]) # 获取 resume 部分 244 | self.root_path = resume["rootpath"] # 存储路径 245 | self.page_num = int(resume["pagenum"]) # 页码 246 | self.link_buffer_count = int(resume["linkbuf_cnt"]) # 链接计数 247 | self.download_count = int(resume["download_cnt"]) # 下载计数 248 | self.total_articles = int(resume["total_articles"]) # 文章总数 249 | print(f"加载配置: {self.root_path}, {self.page_num}, {self.total_articles}") 250 | return 1 251 | else: # 如果配置文件不存在 252 | with open(self.config_path, "w", encoding="utf-8") as f: 253 | self.config.add_section("resume") # 添加 resume 部分 254 | self.config.set("resume", "rootpath", self.init_path) # 设置默认路径 255 | self.config.set("resume", "pagenum", "0") # 默认页码 256 | self.config.set("resume", "linkbuf_cnt", "0") # 默认链接计数 257 | self.config.set("resume", "download_cnt", "0") # 默认下载计数 258 | self.config.set("resume", "total_articles", "0") # 默认文章总数 259 | self.config.write(f) # 写入配置文件 260 | return 0 261 | 262 | def _process(self): 263 | """主爬取流程 264 | 265 | 根据界面输入执行公众号文章爬取或关键词搜索。 266 | """ 267 | try: 268 | # 从界面获取输入参数 269 | username = self.LineEdit_user.text() # 用户名 270 | password = self.LineEdit_pwd.text() # 密码 271 | query_name = self.LineEdit_target.text() # 目标公众号 272 | self.time_gap = int(self.LineEdit_timegap.text() or 10) # 爬取间隔,默认为10秒 273 | self.time_start = int(self.lineEdit_timeStart.text() or 1999) # 起始年份 274 | self.time_end = int(self.lineEdit_timeEnd.text() or self.year_now + 1) # 结束年份 275 | self.keyword = self.lineEdit_keyword.text() # 关键词 276 | 277 | # 如果勾选保存选项且密码不为空,则保存登录信息 278 | if self.checkBox.isChecked() and password: 279 | login_data = { 280 | "target": query_name, 281 | "user": username, 282 | "pwd": password, 283 | "timegap": self.time_gap, 284 | } 285 | with open(os.path.join(self.init_path, "login.json"), "w") as file: 286 | json.dump(login_data, file) 287 | 288 | # 登录并获取 token 和 cookies 289 | token, cookies = self._login(username, password) 290 | self._add_cookies(cookies) # 添加 cookies 到会话 291 | if self.keyword_search_mode == 1: # 关键词搜索模式 292 | self.keyword_2 = self.lineEdit_keyword_2.text() 293 | self._keyword_search(token, self.keyword_2) 294 | else: # 公众号模式 295 | fakeid, nickname = self._get_wechat_subscription(token, query_name) 296 | if not self.is_resume: # 如果不是恢复模式,创建新目录 297 | index_count = 0 298 | while True: 299 | try: 300 | self.root_path = os.path.join( 301 | self.init_path, f"spider-{index_count}", nickname 302 | ) 303 | os.makedirs(self.root_path, exist_ok=True) 304 | self.config.set("resume", "rootpath", self.root_path) 305 | with open(self.config_path, "w", encoding="utf-8") as f: 306 | self.config.write(f) 307 | break 308 | except FileExistsError: 309 | index_count += 1 # 如果目录已存在,尝试下一个编号 310 | self._get_articles(token, fakeid) # 获取文章 311 | except Exception as e: 312 | self._label_debug(f"处理错误: {e}") 313 | print(f"处理错误: {e}") 314 | 315 | def _init_url_json(self): 316 | """初始化URL JSON文件 317 | 318 | 创建或重置存储文章信息的 url.json 文件。 319 | """ 320 | self.url_json_path = os.path.join(self.init_path, "url.json") # JSON 文件路径 321 | if os.path.exists(self.url_json_path) and not self.is_resume: # 如果需要重置 322 | os.remove(self.url_json_path) # 删除现有文件 323 | if not os.path.exists(self.url_json_path): # 如果文件不存在 324 | with open(self.url_json_path, "w") as f: 325 | json.dump([], f) # 创建空数组 326 | with open(self.url_json_path, "r") as f: 327 | self.json_data = json.load(f) # 读取 JSON 数据 328 | self.json_data_len = len(self.json_data) # 记录初始长度 329 | 330 | def _update_url_json(self, data): 331 | """更新URL JSON文件 332 | 333 | 向 url.json 中追加一条文章记录。 334 | 335 | Args: 336 | data (dict): 包含文章标题、链接和图片的信息 337 | """ 338 | self.json_data.append(data) # 添加新记录 339 | with open(self.url_json_path, "w") as f: 340 | json.dump(self.json_data, f) # 写入文件 341 | 342 | def _login(self, username, password): 343 | """登录微信并获取token和cookies 344 | 345 | 尝试使用本地 cookie 登录,若失败则打开浏览器要求手动登录。 346 | 347 | Args: 348 | username (str): 登录用户名 349 | password (str): 登录密码 350 | 351 | Returns: 352 | tuple: (token, cookies) - 登录令牌和 cookies 列表 353 | """ 354 | try: 355 | cookie_file = os.path.join(self.init_path, "cookie.json") # cookie 文件路径 356 | if os.path.exists(cookie_file): # 如果存在 cookie 文件 357 | with open(cookie_file, "r") as f: 358 | data = json.load(f)[0] # 读取第一个记录 359 | cookies = data["COOKIES"] 360 | token = data["TOKEN"] 361 | if cookies and token: # 如果 cookie 和 token 有效 362 | self._label_debug("cookie.json读取成功") 363 | self._add_cookies(cookies) 364 | response = self.session.get( 365 | f"https://mp.weixin.qq.com/cgi-bin/home?t=home/index&lang=zh_CN&token={token}", 366 | timeout=(30, 60), # 连接超时30秒,读取超时60秒 367 | ) 368 | if "登陆" not in response.text: # 检查是否需要重新登录 369 | self._label_debug("cookie有效,无需浏览器登录") 370 | return token, cookies 371 | except Exception as e: 372 | self._label_debug("cookie无效或缺失") 373 | 374 | # 如果 cookie 无效,启动浏览器进行手动登录 375 | self._label_debug("正在打开浏览器,请稍等") 376 | options = Options() 377 | options.add_argument("--incognito") # 隐身模式 378 | options.add_argument("--disable-blink-features=AutomationControlled") # 隐藏自动化特征 379 | browser = webdriver.Chrome( 380 | options=options, service=Service(ChromeDriverManager().install()) 381 | ) # 自动下载并使用 ChromeDriver 382 | browser.maximize_window() # 最大化窗口 383 | browser.get("https://mp.weixin.qq.com") # 打开微信公众平台 384 | browser.implicitly_wait(60) # 隐式等待60秒 385 | 386 | pyautogui.alert(title="请手动完成登录", text="完成登录后,点击确认!", button="确认") # 弹出提示 387 | WebDriverWait(browser, 600, 0.5).until( 388 | EC.presence_of_element_located((By.CSS_SELECTOR, ".weui-desktop_name")) 389 | ) # 等待登录成功,最多10分钟 390 | self._label_debug("登录成功") 391 | token = re.search(r"token=([^&]+)", browser.current_url).group(1) # 从 URL 提取 token 392 | cookies = browser.get_cookies() # 获取 cookies 393 | with open(cookie_file, "w") as f: # 保存到本地 394 | json.dump([{"COOKIES": cookies, "TOKEN": token}], f) 395 | browser.quit() # 关闭浏览器 396 | return token, cookies 397 | 398 | def _add_cookies(self, cookies): 399 | """将cookies添加到session 400 | 401 | 将从浏览器获取的 cookies 添加到 HTTP 会话中。 402 | 403 | Args: 404 | cookies (list): cookies 列表,每个元素为字典 405 | """ 406 | cookie_jar = requests.cookies.RequestsCookieJar() # 创建 cookie 容器 407 | for cookie in cookies: 408 | cookie_jar.set(cookie["name"], cookie["value"]) # 设置 cookie 409 | cookie_jar.set("wxtokenkey", "777") # 设置 cookie 410 | cookie_jar.set("payforreadsn", "EXPIRED") # 设置 cookie 411 | self.session.cookies.update(cookie_jar) # 更新会话 cookies 412 | 413 | def _keyword_search(self, token, keyword): 414 | """按关键词搜索文章 415 | 416 | 根据关键词搜索微信文章并保存结果。 417 | 418 | Args: 419 | token (str): 登录令牌 420 | keyword (str): 搜索关键词 421 | """ 422 | self.url_buffer = [] # 存储文章链接 423 | self.title_buffer = [] # 存储文章标题 424 | headers = { 425 | "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", 426 | "Host": "mp.weixin.qq.com", 427 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36", 428 | "Referer": f"https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit&action=edit&type=10&isMul=1&isNew=1&share=1&lang=zh_CN&token={token}", 429 | } # 设置请求头 430 | url = "https://mp.weixin.qq.com/cgi-bin/operate_appmsg?sub=check_appmsg_copyright_stat" 431 | data = { 432 | "token": token, 433 | "lang": "zh_CN", 434 | "f": "json", 435 | "ajax": 1, 436 | "random": random.uniform(0, 1), # 随机数防缓存 437 | "url": keyword, 438 | "allow_reprint": 0, 439 | "begin": 0, 440 | "count": 10, # 每页10条 441 | } 442 | response = self.session.post(url, data=data, headers=headers).json() # 发送初始请求 443 | total = response["total"] # 总文章数 444 | total_pages = ceil(total / 10) # 计算总页数 445 | 446 | table_index = 0 # 表格行索引 447 | for i in range(total_pages): 448 | data["begin"] = i * 10 # 设置分页起始位置 449 | data["random"] = random.uniform(0, 1) 450 | response = self.session.post(url, data=data, headers=headers).json() 451 | for j, item in enumerate(response["list"]): # 遍历每页文章 452 | self.url_buffer.append(item["url"]) 453 | self.title_buffer.append(item["title"]) 454 | self._update_table(table_index, item["title"], item["url"]) # 更新界面表格 455 | table_index += 1 456 | self.total_articles += 1 457 | self._save_to_file(item["title"], item["url"], "") # 保存到文件 458 | self._get_content(self.title_buffer, self.url_buffer) # 下载内容 459 | self.url_buffer.clear() # 清空缓冲区 460 | self.title_buffer.clear() 461 | 462 | def _get_wechat_subscription(self, token, query): 463 | """获取微信公众号信息 464 | 465 | 根据公众号名称获取其 fakeid 和昵称。 466 | 467 | Args: 468 | token (str): 登录令牌 469 | query (str): 公众号名称 470 | 471 | Returns: 472 | tuple: (fakeid, nickname) - 公众号ID和昵称 473 | """ 474 | if not query: # 如果未提供公众号名称,使用默认值 475 | query = "xinhuashefabu1" 476 | url = f"https://mp.weixin.qq.com/cgi-bin/searchbiz?action=search_biz&token={token}&lang=zh_CN&f=json&ajax=1&random=0.5182749224035845&query={query}&begin=0&count=5" 477 | response = self.session.get(url, headers=self.headers, timeout=(30, 60)).json() 478 | fakeid = response["list"][0]["fakeid"] # 获取第一个匹配结果的ID 479 | nickname = response["list"][0]["nickname"] # 获取昵称 480 | self._label_debug(f"公众号昵称: {nickname}") 481 | return fakeid, nickname 482 | 483 | def _get_articles(self, token, fakeid): 484 | """获取公众号文章 485 | 486 | 获取指定公众号的所有文章并保存。 487 | 488 | Args: 489 | token (str): 登录令牌 490 | fakeid (str): 公众号ID 491 | """ 492 | img_buffer = [] # 图片链接缓冲区 493 | total_buffer = [] # 已处理标题缓冲区,用于去重 494 | url = f"https://mp.weixin.qq.com/cgi-bin/appmsg?token={token}&lang=zh_CN&f=json&ajax=1&random={random.uniform(0, 1)}&action=list_ex&begin=0&count=5&query=&fakeid={fakeid}&type=9" 495 | response = self.session.get(url, headers=self.headers, timeout=(30, 60)).json() 496 | try: 497 | total_pages = ceil(int(response["app_msg_cnt"]) / 5) # 计算总页数(每页5条) 498 | self.progressBar.setMaximum(total_pages) # 设置进度条最大值 499 | QtWidgets.QApplication.processEvents() # 刷新界面 500 | except Exception as e: 501 | self._label_debug(f"获取文章失败: {response['base_resp']['err_msg']}") 502 | return 503 | 504 | table_index = 0 # 表格行索引 505 | download_thread = threading.Thread(target=self._download_content, daemon=True) 506 | download_thread.start() # 启动下载线程 507 | self.thread_list.append(download_thread) 508 | 509 | buffer_index = 0 # 缓冲区索引 510 | for i in range(total_pages): 511 | if self.is_resume: # 如果是恢复模式,从上次页码继续 512 | i += self.page_num 513 | self._label_debug(f"第[{i + 1}/{total_pages}]页 url:{self.link_buffer_count}, article:{self.download_count}") 514 | begin = i * 5 # 计算分页起始位置 515 | url = f"https://mp.weixin.qq.com/cgi-bin/appmsg?token={token}&lang=zh_CN&f=json&ajax=1&random={random.uniform(0, 1)}&action=list_ex&begin={begin}&count=5&query=&fakeid={fakeid}&type=9" 516 | response = self._fetch_with_retry(url) # 获取分页数据 517 | if not response: 518 | break 519 | try: 520 | app_msg_list = response.json()["app_msg_list"] # 获取文章列表 521 | except Exception: 522 | self._label_debug("操作太频繁,5秒后重试") 523 | time.sleep(5) 524 | continue 525 | 526 | if not app_msg_list: # 如果列表为空,结束抓取 527 | self._label_debug("抓取结束") 528 | break 529 | 530 | for j, item in enumerate(app_msg_list): # 遍历每页文章 531 | if not self._should_process_item(item, total_buffer): # 检查是否需要处理 532 | continue 533 | self.title_buffer.append(item["title"]) 534 | self.link_buffer.append(item["link"]) 535 | img_buffer.append(item["cover"]) 536 | total_buffer.append(item["title"]) 537 | self._update_table(table_index, item["title"], item["link"]) 538 | table_index += 1 539 | self.total_articles += 1 540 | self._save_to_file(item["title"], item["link"], item["cover"]) 541 | buffer_index += j 542 | self._update_progress(i, total_pages) # 更新进度 543 | time.sleep(self.time_gap) # 每页间隔等待 544 | 545 | self._clear_label_debug() 546 | self._label_debug(">> 列表抓取结束!!! <<") 547 | self.download_end = 1 # 标记下载结束 548 | 549 | def _should_process_item(self, item, total_buffer): 550 | """检查是否应处理当前文章 551 | 552 | 根据标题、关键词和时间范围判断是否处理该文章。 553 | 554 | Args: 555 | item (dict): 文章信息 556 | total_buffer (list): 已处理的标题列表 557 | 558 | Returns: 559 | bool: True 表示需要处理,False 表示跳过 560 | """ 561 | article_time = int(time.strftime("%Y", time.localtime(int(item["update_time"])))) 562 | if item["title"] in total_buffer: 563 | self._label_debug("文章已存在,跳过") 564 | return False 565 | if self.keyword and self.keyword not in item["title"]: 566 | self._label_debug(f"不匹配关键词[{self.keyword}],跳过") 567 | return False 568 | if self.time_start > article_time: 569 | self._label_debug(f"时间[{article_time}]不在范围[{self.time_start}-{self.time_end}]内,跳过") 570 | return False 571 | if article_time > self.time_end: 572 | self._label_debug("达到结束时间,退出") 573 | self.Stop_Run() 574 | return False 575 | return True 576 | 577 | def _update_table(self, index, title, url): 578 | """更新结果表格 579 | 580 | 在界面表格中添加一行文章信息。 581 | 582 | Args: 583 | index (int): 表格行索引 584 | title (str): 文章标题 585 | url (str): 文章链接 586 | """ 587 | table_count = self.tableWidget_result.rowCount() 588 | if index >= table_count: # 如果需要新行 589 | self.tableWidget_result.insertRow(table_count) 590 | self.tableWidget_result.setItem(index, 0, QtWidgets.QTableWidgetItem(title)) 591 | self.tableWidget_result.setItem(index, 1, QtWidgets.QTableWidgetItem(url)) 592 | 593 | def _save_to_file(self, title, link, img): 594 | """保存文章信息到文件 595 | 596 | 将文章信息保存到 spider.txt 和 url.json 中。 597 | 598 | Args: 599 | title (str): 文章标题 600 | link (str): 文章链接 601 | img (str): 文章封面图链接 602 | """ 603 | os.makedirs(self.root_path, exist_ok=True) # 确保目录存在 604 | data = {"Title": title, "Link": link, "Img": img} 605 | self._update_url_json(data) # 更新 JSON 文件 606 | with open(os.path.join(self.root_path, "spider.txt"), "a+", encoding="utf-8") as f: 607 | f.write( 608 | f"{'*' * 60}\n【{self.total_articles}】\n Title: {title}\n Link: {link}\n Img: {img}\n\n" 609 | ) # 写入文本文件 610 | self._label_debug(f">> 第{self.total_articles}条写入完成:{title}") 611 | self.config.set("resume", "total_articles", str(self.total_articles)) 612 | with open(self.config_path, "w", encoding="utf-8") as f: 613 | self.config.write(f) # 更新配置文件 614 | 615 | def _fetch_with_retry(self, url, retries=3): 616 | """带重试的网络请求 617 | 618 | 对网络请求进行重试机制以处理连接失败。 619 | 620 | Args: 621 | url (str): 请求的 URL 622 | retries (int): 最大重试次数,默认为3 623 | 624 | Returns: 625 | dict or None: JSON 响应数据,或 None 如果所有重试失败 626 | """ 627 | for _ in range(retries): 628 | try: 629 | 630 | resp = self.session.get(url, headers=self.headers, timeout=(30, 60)) 631 | return resp 632 | except Exception as e: 633 | # print(url, resp.text) 634 | self._label_debug(f"连接出错,稍等2秒: {e}") 635 | time.sleep(2) 636 | return None 637 | 638 | def _update_progress(self, current_page, total_pages): 639 | """更新进度信息 640 | 641 | 更新界面进度显示和配置文件中的计数。 642 | 643 | Args: 644 | current_page (int): 当前页码 645 | total_pages (int): 总页数 646 | """ 647 | self.link_buffer_count = ( 648 | len(self.link_buffer) + self.json_data_len if self.is_resume else len(self.link_buffer) 649 | ) # 计算链接总数 650 | self.config.set("resume", "linkbuf_cnt", str(self.link_buffer_count)) 651 | self.config.set("resume", "pagenum", str(current_page)) 652 | with open(self.config_path, "w", encoding="utf-8") as f: 653 | self.config.write(f) 654 | self.label_total_Page.setText( 655 | f"第[{current_page + 1}/{total_pages}]页 linkbuf_cnt:{self.link_buffer_count}, download_cnt:{self.download_count}" 656 | ) # 更新界面显示 657 | 658 | def _get_content(self, title_buffer, link_buffer): 659 | """下载文章内容 660 | 661 | 下载并保存文章的文本、图片、HTML 和评论。 662 | 663 | Args: 664 | title_buffer (list or str): 文章标题或标题列表 665 | link_buffer (list or str): 文章链接或链接列表 666 | """ 667 | length = len(title_buffer) if self.keyword_search_mode == 1 else 1 668 | for index in range(length): 669 | title = re.sub(r'[|/<>:*?"]', "_", title_buffer[index] if self.keyword_search_mode == 1 else title_buffer) 670 | filepath = os.path.join(self.root_path, title) 671 | os.makedirs(filepath, exist_ok=True) # 创建文章目录 672 | os.chdir(filepath) # 切换到文章目录 673 | 674 | url = link_buffer[index] if self.keyword_search_mode == 1 else link_buffer 675 | html = self._fetch_with_retry(url) # 获取文章页面 676 | if not html: continue 677 | 678 | soup = BeautifulSoup(html.text, "lxml") # 解析 HTML 679 | # print(soup) 680 | self._save_article_text(soup, title) # 保存文本 681 | self._save_article_images(soup, title) # 保存图片 682 | self._save_html(soup, title) # 保存 HTML 683 | self._save_comments(url, title) # 保存评论 684 | 685 | if self.keyword_search_mode == 1: # 关键词模式下每篇文章间隔等待 686 | self._label_debug(f">> 休息 {self.time_gap} 秒") 687 | time.sleep(self.time_gap) 688 | 689 | def _save_article_text(self, soup, title): 690 | """保存文章文本 691 | 692 | 提取并保存文章的正文内容到文本文件。 693 | 694 | Args: 695 | soup (BeautifulSoup): 解析后的 HTML 对象 696 | title (str): 文章标题 697 | """ 698 | try: 699 | article = soup.find(class_="rich_media_content").find_all("p") # 查找正文段落 700 | with open(f"{title}.txt", "a+", encoding="utf-8") as f: 701 | for p in article: 702 | text = p.get_text() 703 | if text: # 如果段落有文本内容 704 | f.write(f"{text}\n") 705 | self._label_debug(">> 保存文档 - 完毕!") 706 | except Exception as e: 707 | self._label_debug(f"未匹配到文字: {e}") 708 | 709 | def _save_article_images(self, soup, title): 710 | """保存文章图片 711 | 712 | 下载并保存文章中的所有图片。 713 | 714 | Args: 715 | soup (BeautifulSoup): 解析后的 HTML 对象 716 | title (str): 文章标题 717 | """ 718 | try: 719 | img_urls = soup.find(class_="rich_media_content").find_all("img") # 查找所有图片 720 | for i, img in enumerate(img_urls): 721 | for _ in range(3): # 每张图片尝试3次下载 722 | try: 723 | response = self.session.get(img["data-src"], timeout=(30, 60)) 724 | with open(f"{i}.jpeg", "ab+") as f: 725 | f.write(response.content) 726 | break 727 | except Exception as e: 728 | self._label_debug(f"图片下载超时: {e}") 729 | else: 730 | self._label_debug("放弃此图") # 重试失败后放弃 731 | self._label_debug(f">> 保存图片{len(img_urls)}张 - 完毕!") 732 | except Exception as e: 733 | self._label_debug(f"未匹配到图片: {e}") 734 | 735 | def _save_html(self, soup, title): 736 | """保存HTML文件 737 | 738 | 保存文章的完整 HTML 内容。 739 | 740 | Args: 741 | soup (BeautifulSoup): 解析后的 HTML 对象 742 | title (str): 文章标题 743 | """ 744 | with open(f"{title}.html", "w", encoding="utf-8") as f: 745 | f.write(str(soup)) 746 | self._label_debug(">> 保存html - 完毕!") 747 | 748 | def _save_comments(self, url, title): 749 | """保存文章评论 750 | 751 | 获取并保存文章的评论到文件。 752 | 753 | Args: 754 | url (str): 文章链接 755 | title (str): 文章标题 756 | """ 757 | comments = self._get_comments(url, self.wechat_uin, self.wechat_key) 758 | with open(f"{title}_comments.txt", "a+", encoding="utf-8") as f: 759 | f.write("\n".join(comments)) 760 | self._label_debug(">> 保存评论 - 完毕!") 761 | 762 | def _get_comments(self, article_url, uin, key, offset=0): 763 | """获取文章评论 764 | 765 | 从微信接口获取文章的评论内容。 766 | 767 | Args: 768 | article_url (str): 文章链接 769 | uin (str): 微信 UIN 770 | key (str): 微信 Key 771 | offset (int): 评论偏移量,默认为0 772 | 773 | Returns: 774 | list: 评论列表,每个元素为“昵称: 内容”格式 775 | """ 776 | comments = [] 777 | if not uin or not key: # 如果缺少 UIN 或 Key,返回空列表 778 | return comments 779 | biz = re.search(r"__biz=(.*?)&", article_url).group(1) # 提取公众号标识 780 | comment_id = self._get_comment_id(article_url) # 获取评论ID 781 | if not comment_id: 782 | return comments 783 | 784 | url = "https://mp.weixin.qq.com/mp/appmsg_comment?" 785 | params = { 786 | "action": "getcomment", 787 | "comment_id": comment_id, 788 | "uin": uin, 789 | "key": key, 790 | "__biz": biz, 791 | "offset": str(offset), 792 | "limit": "100", # 每次获取100条评论 793 | "f": "json", 794 | } 795 | try: 796 | response = self.session.get(url, params=params).json() 797 | if response.get("elected_comment_total_cnt"): # 如果有精选评论 798 | for item in response["elected_comment"]: 799 | comments.append(f"{item['nick_name']}: {item['content']}") 800 | except Exception: 801 | pass # 忽略评论获取失败 802 | return comments 803 | 804 | def _get_comment_id(self, article_url): 805 | """获取文章评论ID 806 | 807 | 从文章页面提取评论ID。 808 | 809 | Args: 810 | article_url (str): 文章链接 811 | 812 | Returns: 813 | str or None: 评论ID,或 None 如果提取失败 814 | """ 815 | try: 816 | response = requests.get(article_url).text 817 | pattern = re.compile(r'comment_id\s*=\s*"(?P\d+)"') 818 | return pattern.search(response)["id"] 819 | except Exception: 820 | return None 821 | 822 | def _download_content(self): 823 | """下载内容线程 824 | 825 | 持续检查缓冲区并下载文章内容,直到任务完成。 826 | """ 827 | while True: 828 | try: 829 | if self.download_count < self.link_buffer_count: # 如果有待下载的内容 830 | if self.is_resume: # 恢复模式下从 JSON 文件读取 831 | self.json_data = json.load(open(self.url_json_path, "r")) 832 | self._get_content( 833 | self.json_data[self.download_count]["Title"], 834 | self.json_data[self.download_count]["Link"], 835 | ) 836 | else: # 正常模式下从缓冲区读取 837 | self._get_content( 838 | self.title_buffer[self.download_count], 839 | self.link_buffer[self.download_count], 840 | ) 841 | self.download_count += 1 842 | self.config.set("resume", "download_cnt", str(self.download_count)) 843 | with open(self.config_path, "w", encoding="utf-8") as f: 844 | self.config.write(f) 845 | elif self.download_count >= self.link_buffer_count and self.download_end: 846 | self._clear_label_debug() 847 | self._label_debug(">> 程序结束, 欢迎再用!!! <<") 848 | break # 下载完成,退出循环 849 | elif self.download_count == self.link_buffer_count and not self.download_end: 850 | time.sleep(2) # 等待更多内容 851 | except Exception as e: 852 | self._label_debug(f"下载内容错误: {e}") 853 | 854 | def _async_raise(self, tid, exc_type): 855 | """强制抛出异常以终止线程 856 | 857 | 使用 ctypes 向指定线程抛出异常。 858 | 859 | Args: 860 | tid (int): 线程ID 861 | exc_type (type): 要抛出的异常类型 862 | 863 | Raises: 864 | ValueError: 如果线程ID无效 865 | SystemError: 如果线程状态设置失败 866 | """ 867 | tid = ctypes.c_long(tid) 868 | if not inspect.isclass(exc_type): 869 | exc_type = type(exc_type) 870 | res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exc_type)) 871 | if res == 0: 872 | raise ValueError("无效线程ID") 873 | elif res != 1: 874 | ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None) 875 | raise SystemError("线程状态设置失败") 876 | 877 | def _stop_thread(self, thread): 878 | """停止指定线程 879 | 880 | 调用 _async_raise 强制终止线程。 881 | 882 | Args: 883 | thread (threading.Thread): 要终止的线程对象 884 | """ 885 | self._async_raise(thread.ident, SystemExit) 886 | 887 | 888 | def main(): 889 | """程序入口 890 | 891 | 初始化 PyQt5 应用程序并显示主窗口。 892 | """ 893 | app = QtWidgets.QApplication(sys.argv) 894 | MainWindow = QtWidgets.QMainWindow() 895 | ui = MyMainWindow() 896 | ui.setupUi(MainWindow) 897 | MainWindow.show() 898 | sys.exit(app.exec_()) 899 | 900 | 901 | if __name__ == "__main__": 902 | main() 903 | 904 | --------------------------------------------------------------------------------