├── Access_articles.py
├── LICENSE
├── README.md
├── README
    ├── 20250316180200.jpg
    ├── function1.1.png
    ├── function1.png
    ├── function2.1.png
    ├── function2.png
    ├── function3.1.png
    ├── function3.png
    ├── function4.png
    ├── qrcode_1749894334903.jpg
    ├── wechat_article_drawio.png
    └── 程序流程图.drawio.png
├── main.py
└── requirements.txt


/Access_articles.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re  # 使用正则表达式
  3 | import json  # 用于json转码
  4 | import time
  5 | import random
  6 | import jsonpath
  7 | import requests
  8 | import pandas as pd  # 修改excel
  9 | from bs4 import BeautifulSoup
 10 | from fake_useragent import UserAgent  # 生成随机浏览器标识
 11 | import sys
 12 | import logging
 13 | 
 14 | logging.basicConfig(  # 配置日志记录
 15 |     filename='app.log',
 16 |     level=logging.ERROR,
 17 |     format='%(asctime)s - %(levelname)s - %(message)s'
 18 | )
 19 | requests.packages.urllib3.disable_warnings()  # 去除网络请求警告
 20 | 
 21 | 
 22 | class AccessPosts:
 23 |     """功能：
 24 |         1.根据URL访问原页面获取网页文本数据
 25 |         2.按保存规则进行存储（单个、批量）
 26 |         3.检测人机验证，并去除验证
 27 |     """
 28 | 
 29 |     def __init__(self):
 30 |         self.root_path = r'./all_data/'  # 数据存储目录
 31 |         self.official_names_head = '公众号----'  # 公众号保存目录开头，用以保存对应公众号的信息，公众号: xxx
 32 |         self.headers = {
 33 |             'User-Agent': UserAgent().random,  # 生成随机的浏览器标识头
 34 |         }
 35 |         self.cookies = {"poc_sid": ''}  # 用以保存设备ID，用来去除人机验证
 36 |         os.makedirs(self.root_path, exist_ok=True)  # 创建保存路径，如果文件夹已存在，则忽略，默认为r'./all_data'
 37 | 
 38 |     def save_one_article(self, article_content, img_save_flag=True, content_save_flag=True):
 39 |         """输入：文章文本内容，是否保存图片（默认保存），是否保存文章内容到文件（默认保存）
 40 |            输出：保存flag
 41 |            功能：整理文本内容，创建保存路径
 42 |         """
 43 |         # 整理文章关键信息
 44 |         nickname = re.search(r'var nickname.*"(.*?)".*', article_content).group(1)  # 公众号名称
 45 |         article_link = re.search(r'var msg_link = .*"(.*?)".*', article_content).group(1)  # 文章链接
 46 |         createTime = re.search(r"var createTime = '(.*?)'.*", article_content).group(1)  # 文章创建时间
 47 |         # year, month, day = createTime.split(" ")[0].split("-")      # 年，月，日
 48 |         # hour, minute = createTime.split(" ")[1].split(":")          # 小时，分钟
 49 |         author = re.search(r'var author = "(.*?)".*', article_content).group(1)  # 文章作者
 50 |         article_title = re.search(r"var title = '(.*?)'.*", article_content).group(1)  # 文章标题
 51 |         article_title_win = re.sub(r'[\\/*?:"<>|].', '_', article_title)  # Windows下标题
 52 |         article_title_win = article_title_win.replace('.', '')  # Windows下标题，去除小数点，防止自动省略报错
 53 | 
 54 |         # 创建公众号保存目录
 55 |         official_path = self.root_path + self.official_names_head + nickname  # 各种公众号存储根路径
 56 |         os.makedirs(official_path, exist_ok=True)
 57 | 
 58 |         """下载文章图片"""
 59 |         if img_save_flag:  # 类属性中开启保存选项！
 60 |             print('开启保存文章图片选项，准备下载文章图片')
 61 |             # 创建文章图片保存目录
 62 |             img_save_path = (self.root_path + self.official_names_head + nickname + '/'  # 图片保存路径
 63 |                              + createTime.replace(':', '：') + ' ' + article_title_win)
 64 |             os.makedirs(img_save_path, exist_ok=True)  # 创建图片保存目录
 65 | 
 66 |             # 保存该文章图片内容
 67 |             images = article_content.split('https://mmbiz.qpic.cn/')
 68 |             # print(images)
 69 |             for i in range(0, len(images) - 1):
 70 |                 image_url = 'https://mmbiz.qpic.cn/' + images[i + 1].split('"')[0]
 71 |                 # print('正在获取图片：' + image_url)
 72 |                 image_name = ''
 73 |                 response = requests.get(image_url, cookies=self.cookies, verify=False)
 74 |                 if response.status_code == 200:
 75 |                     # 图片命名
 76 |                     img_hz = ['gif', 'jpg', 'jpeg', 'png', 'webp']
 77 |                     for imghz in img_hz:
 78 |                         if imghz in image_url:
 79 |                             image_name = str(i + 1) + '.' + imghz
 80 |                     if image_name == '':  # 如果链接中没有标明图片属性
 81 |                         image_name = str(i + 1) + '.jpg'
 82 |                     file_path = img_save_path + '/' + image_name
 83 |                     # 保存图片
 84 |                     with open(file_path, 'wb') as f:
 85 |                         f.write(response.content)
 86 |                     print(f"已成功下载图片： {file_path}")
 87 |                 else:
 88 |                     print(f"无法下载图片，状态码: {response.status_code}")
 89 |             print('已保存文章图片>>>> ' + article_title)
 90 | 
 91 |         """保存文章文本内容"""
 92 |         if content_save_flag:
 93 |             # 将文字内容转换为列表形式存储
 94 |             soup = BeautifulSoup(article_content, 'html.parser')
 95 |             original_texts = soup.getText().split('\n')  # 将页面所有的文本内容提取，并转为列表形式
 96 |             article_texts = list(filter(lambda x: bool(x.strip()), original_texts))  # filter() 函数可以根据指定的函数对可迭代对象进行过滤
 97 | 
 98 |             # 创建 or 打开表格，检查文件是否存在，判断不存在时创建表格文件
 99 |             article_contents_path = official_path + '/' + '文章内容(article_contents).xlsx'  # 文章内容文件路径
100 |             if not os.path.exists(article_contents_path): pd.DataFrame().to_excel(article_contents_path, index=False)
101 |             frame_df = pd.read_excel(article_contents_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
102 | 
103 |             # 将新数据转换为 DataFrame 并添加到现有 DataFrame 的末尾
104 |             local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())  # 本地时间
105 |             columns = ['本地存储时间', '文章发布时间', '文章名称', '文章链接', '文章文本内容']  # 列名
106 |             new_data_df = pd.DataFrame([[local_time, createTime, article_title, str(article_link), str(article_texts)]],
107 |                                        columns=columns)
108 |             df = pd.concat([frame_df, new_data_df], ignore_index=True)
109 | 
110 |             # 将更新后的数据写入 Excel 文件
111 |             df.to_excel(article_contents_path, index=False)
112 |             print(local_time + ' 已保存文章>>>> ' + article_title)
113 |             print(local_time + ' 内容存储路径>>>> ' + article_contents_path)
114 | 
115 |     def get_one_article(self, url, img_save_flag=True, content_save_flag=True):
116 |         """
117 |             输入：微信文章链接（永久链接或短链接），是否保存图片（默认保存），是否保存文章内容到文件（默认保存）
118 |             输出：无（内容保存目录在终端显示）
119 |         """
120 |         res = requests.get(url, headers=self.headers, cookies=self.cookies, verify=False)  # 发起请求
121 |         # 验证请求
122 |         if 'var createTime = ' in res.text:  # 正常获取到文章内容
123 |             print('正常获取到文章内容，开始保存操作')
124 |             try:
125 |                 self.save_one_article(res.text, img_save_flag, content_save_flag)  # 开始保存单篇文章
126 |                 return {'content_flag': 1, 'content': res.text}  # 用来获取公众号主页链接
127 |             except:
128 |                 article_title = re.search(r"var title = '(.*?)'.*", res.text)  # 文章标题
129 |                 if article_title: article_title = article_title.group(1)
130 |                 print('检测到抓取出错，文章名>>>>    ' + article_title)
131 |                 print('检测到抓取出错，文章链接>>>>    ' + url)
132 |                 return {'content_flag': 0}
133 |         elif '>当前环境异常，完成验证后即可继续访问。<' in res.text:
134 |             print('当前环境异常，请检查链接后访问！！！')  # 代码访问遇到人机验证，需进行验证操作
135 |             return {'content_flag': 0}
136 |         elif '操作频繁，请稍后再试。' in res.text:
137 |             print('操作频繁了，等会再弄或换ip弄！！！')  # 遇到次数较少，如有遇到请前往GitHub留言
138 |             return {'content_flag': 0}
139 |         else:
140 |             print('出现其他问题，请查找原因后再试！！！！')  # 出现错误信息，如有遇到请前往GitHub留言
141 |             return {'content_flag': 0}
142 | 
143 |     def get_list_article(self, name_link, img_save_flag=True, content_save_flag=True):
144 |         """ 输入：公众号名称或公众号的一篇文章，是否保存图片（默认保存），是否保存文章内容到文件（默认保存）
145 |             输出：无（内容保存目录在终端显示）
146 |             功能：保存文章列表中所有内容
147 |         """
148 |         if 'http' in name_link:
149 |             print('检测到输入为链接，开始获取公众号名称')
150 |             content = self.get_one_article(name_link, False, False)
151 |             if content['content_flag'] == 1:
152 |                 nickname = re.search(r'var nickname.*"(.*?)".*', content['content']).group(1)  # 公众号名称
153 |             else:
154 |                 print('未获取到公众号名称')
155 |                 return None
156 |         else:
157 |             print('检测到输入为公众号名称')
158 |             nickname = name_link
159 | 
160 |         official_path = self.root_path + self.official_names_head + nickname  # 公众号存储根路径
161 |         # article_contents_path = official_path + '/' + '文章列表（article_list）_原始链接.xlsx'  # 文章内容文件路径
162 |         article_list_path = official_path + '/' + '文章列表（article_list）_直连链接.xlsx'  # 文章列表文件路径
163 |         if not os.path.exists(article_list_path):  # 如果文件不存在
164 |             print('文件不存在，请检查目录文件>>>>  文章列表（article_list）_直连链接.xlsx')
165 |         else:
166 |             frame_df = pd.read_excel(article_list_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
167 |             # 开始下载文章内容
168 |             for index, row in frame_df.iterrows():
169 |                 roll_url = row.iloc[4]  # 获取直连链接
170 |                 self.get_one_article(roll_url, img_save_flag, content_save_flag)
171 | 
172 |     # def verify_user(self, url, content):
173 |     #     """
174 |     #         输入：url=请求路径，content=网页内容，如：res.text，遇到此情况时使用：  >当前环境异常，完成验证后即可继续访问。<
175 |     #         输出：验证标志（1为有效），网页内容，cookie值
176 |     #             {'verify_flag': 1, 'content': res.text, 'poc_sid': poc_sid}
177 |     #         poc_sid == deviceID
178 |     #     """
179 |     #     print('开始验证，正在获取参数poc_sid')
180 |     #     poc_token = re.search(r'poc_token.*"(.*?)"', content).group(1)
181 |     #     poc_sid = re.search(r'poc_sid.*"(.*?)"', content).group(1)  # poc_sid为cookie参数
182 |     #     cap_appid = re.search(r'cap_appid.*"(.*?)"', content).group(1)
183 |     #     cap_sid = re.search(r'cap_sid.*"(.*?)"', content).group(1)
184 |     #     target_url = re.search(r'target_url.*"(.*?)"', content).group(1)
185 |     #
186 |     #     try:
187 |     #         '''验证请求第一步'''
188 |     #         verify1_url = ('https://t.captcha.qq.com/cap_union_prehandle?' + 'protocol=https&accver=1&showtype=popup&'
189 |     #                        'ua=TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzEyNy4wLjAuMCBTYWZhcmkvNTM3LjM2IEVkZy8xMjcuMC4wLjA%3D&'
190 |     #                        'noheader=0&fb=1&aged=0&enableAged=0&enableDarkMode=1&grayscale=1&dyeid=0&clientype=2'
191 |     #                        '&aid=' + cap_appid + '&deviceID=' + poc_sid + '&sid=' + cap_sid +
192 |     #                        '&cap_cd=&uid=&lang=zh-cn&elder_captcha=0&js=%2Ftcaptcha-frame.8d77d8b0.js&login_appid='
193 |     #                        '&entry_url=https%3A%2F%2Fmp.weixin.qq.com%2Fmp%2Fwappoc_appmsgcaptcha&wb=1&version=1.1.0'
194 |     #                        '&subsid=1&callback=_aq_873604&sess=')
195 |     #         header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0'}
196 |     #         verify1 = requests.get(verify1_url, headers=header, verify=False)
197 |     #         # sess，pow_answer 为第三步中的请求数据
198 |     #         sess = re.search('sess":"(.*?)"', verify1.text).group(1)
199 |     #         pow_answer = re.search('prefix":"(.*?)"', verify1.text).group(1)
200 |     #
201 |     #         '''验证请求第二步'''
202 |     #         verify2_url = 'https://t.captcha.qq.com' + re.search('tdc_path":"(.*?)"', verify1.text).group(1)
203 |     #         verify2 = requests.get(verify2_url, headers=header, verify=False)
204 |     #         # eks 为第三步中的请求数据
205 |     #         eks = re.search(r"='(.*?)'", verify2.text).group(1)
206 |     #
207 |     #         '''验证请求第三步'''
208 |     #         verify3_url = 'https://t.captcha.qq.com/cap_union_new_verify'
209 |     #         verify3_data = {
210 |     #             'collect': 'F97A58z6EKA4CNUjzdxrYiPXOGxCX1E4UPbmPhuuy6vojKPeA0EUN5DJWtjE3y0eow298aaKR+wKb7f8wsB6K1uaS93BwGTk8a18UNChBgwMYPRdHERtNoHs66mCG3FRhfxgEi758hvugEzzsKyNStp8ChZa9NqJ0OEBsVqsaTAZoVzkIZ8KqUgoMUW9EhoXesF5tqB9arGi+ZkBPrw5w0HzVR8yx1ehQhjixIw5rjCXg98Z2Fq8P4knkq9epFQEgB6vpR7K8gZ0VhmRCLXNTM4FsHnMdHWBX7orOllWdusPAlCMnsXMj7ucO9aDyP1e2fYsJYwK9zeSi8zvQ4F/XP6a9NvOYY4dZR7HI2UaJwUG0xxPU14zymkk8CWHWG5i1kKYGUz6X/yISfEczLkCMHECgJDtMOJzb9WhkuyfD7PyvpL1rU1lgWApFJp3c46RvCTftmfhfu2IJMTZ5LwWtxJIX8zsUj42pWiWM7iiqSzoH9gBgLGyJSWKUXm4f4jIeMj4V8hECgrYT5E9Oz1zl3Yib74HV2R8NjM6e9VjI7fu3/GKVdkQP0CgnSbYJzvJpDsECdY1CSgwEtI2AaC8x2eECThJ2j/3X9pb4ypH6N6ZSDWD5I67rOUeHLi8L0NN1ISm/HiGD8mWDOGLyyFsEGuGGzuMqy+Fxehtr2uvyxRWtWadGhG34osn1aNKcJcMK4iSERJeZGBbpTQNaA626rxzjjxBEbuNyRXvSHHbB33WzGT/74wrkaTRpcpwo6IGd8Rw93kThxuEpb8SmFVDAcIexRBn/+AWPnpcbM1aS82k0aXKcKOiBqTRpcpwo6IG0wdvP4uHCcyi67Tdt1B0yTKhZOtZ+z2xwh7FEGf/4BYAswz/QxMnADq0ZSBjHYmU9mPAx29tLUZQG3YboJK5sX+m4Ga9XlnqGW7hZEE83B/5powR4JuSyIFd38tOJdxgJBM9r8WKWUcRsXosFi1NHy7U7yuIT+bQ4HsRrnHlh73wJO3rsC5ShXfIRH6l4Q/Zf5HY0ENCIULn//Cv2azN4xGZsuYK4mxG2jtSpNboVeiJruE9wXg798jpf7CJKPV+v/ffl9+s5AbTJ69l3LT3td/cgtlkCpKsxrRZbD7ZI53YESSMtjzw76PGoKGD+MFQRkucxPcXQBEBOtd9zxxMIIZiXFWsG62+HLeQVL86apMlSFzJ8zNsU5xeilajsVkeqEWZAmdfoskf5iXmDpEoxSabuF0xUPRdpNGlynCjogak0aXKcKOiBqTRpcpwo6IGJ5hRMBQehUwRZc+Z+lhesnYtjIlRt+75qu5cfqEIgUYtg2+DknP0YDRuxdVoPCN1',
211 |     #             'tlg': 1312,
212 |     #             'eks': eks,
213 |     #             'sess': sess,
214 |     #             'ans': '[{"elem_id":0,"type":"DynAnswerType_TIME","data":""}]',
215 |     #             'deviceID': poc_sid,
216 |     #             'pow_answer': pow_answer + '#104',
217 |     #             'pow_calc_time': 1,
218 |     #         }
219 |     #         header = {
220 |     #             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0',
221 |     #             'Accept': 'application / json, text / javascript, * / *; q = 0.01',
222 |     #             'Accept - Encoding': 'gzip, deflate, br, zstd',
223 |     #             'Accept-Language': 'zh-CN,zh;q=0.9',
224 |     #         }
225 |     #         verify3 = requests.post(verify3_url, headers=header, data=verify3_data, verify=False)
226 |     #         # ticket，randstr为第四步请求参数
227 |     #         ticket = json.loads(verify3.text)['ticket']
228 |     #         randstr = json.loads(verify3.text)["randstr"]
229 |     #         print(ticket)
230 |     #         print(randstr)
231 |     #         print(verify3.text)
232 |     #
233 |     #         '''验证请求第四步'''
234 |     #         verify4_url = 'https://mp.weixin.qq.com/mp/wappoc_appmsgcaptcha?action=Check&x5=0&f=json'
235 |     #         verify4_data = {
236 |     #             'target_url': target_url,
237 |     #             'poc_token': poc_token,
238 |     #             'appid': cap_appid,
239 |     #             'ticket': ticket,
240 |     #             'randstr': randstr,
241 |     #         }
242 |     #         self.cookies['poc_sid'] = poc_sid  # 重置类属性 cooikes的值
243 |     #         verify4 = requests.post(verify4_url, headers=header, cookies=self.cookies, data=verify4_data,
244 |     #                                 verify=False)
245 |     #         # print(verify4.text)
246 |     #         # print('发送成功后，poc_sid就可以正常使用了')
247 |     #
248 |     #         '''验证请求第五步'''
249 |     #         modify_url = url + '&poc_token=' + poc_token
250 |     #         res = requests.get(modify_url, headers=self.headers, cookies=self.cookies, verify=False)  # 发起请求
251 |     #         print('已完成验证请求，后续请求若仍存在异常，请检查！')
252 |     #         # print(res.text)
253 |     #         return {'verify_flag': 1, 'content': res.text}
254 |     #     except:
255 |     #         print('验证失败，请检查后再进行尝试')
256 |     #         return {'verify_flag': 0}
257 | 
258 | 
259 | class ArticleDetail(AccessPosts):
260 |     def __init__(self):
261 |         super().__init__()
262 |         self.biz = None
263 |         self.uin = None
264 |         self.key = None
265 |         self.pass_ticket = None
266 |         self.text = 'website'  # 预留位
267 | 
268 |     def get_article_link(self, url):
269 |         """
270 |             输入：公众号下任意一篇已发布的文章 短链接！！
271 |             功能：通过公众号内的文章获取到公众号的biz值，拼接出公众号主页链接
272 |         """
273 |         content = super().get_one_article(url, False, False)  # 获取网页文本内容
274 |         if content['content_flag'] == 1:
275 |             print('正在生成微信公众号主页链接……\n')
276 |             self.biz = re.search('var biz = "(.*?);', content['content']).group(1).replace('" || "', '').replace('"',
277 |                                                                                                                  '')
278 |             names = re.search(r'var nickname.*"(.*?)".*', content['content']).group(1)  # 公众号名称
279 |             main_url = ('https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=' + self.biz +
280 |                         '&scene=124#wechat_redirect')
281 |             print(names + '公众号主页链接为：' + main_url)
282 |             print('将此链接 （￣︶￣）↗　 粘贴发送到 ‘微信PC端-文件传输助手’')
283 |         else:
284 |             print('未获取到文章内容，请检查链接是否正确')
285 | 
286 |     def access_origin_list(self, access_token, pages=None, save_list=True, transform_list=True):
287 |         """ 输入：access_token(从fiddler获取的链接)，保存页数（默认全部），是否保存到文件（默认保存），是否转换链接（默认转换）
288 |             输出：无（获取的文章列表将保存在本地目录下）
289 |             功能：
290 |                 ① 请求得到文章信息（文章标题、文章链接、文章创建日期）
291 |                 ②以excel文件形式存储，文件名设置为对应公众号的名称
292 |         """
293 |         # 检验access_token是否合法
294 |         self.biz = str(re.search('biz=(.*?)&', access_token).group(1))
295 |         self.uin = str(re.search('uin=(.*?)&', access_token).group(1))
296 |         self.key = str(re.search('key=(.*?)&', access_token).group(1))
297 |         self.pass_ticket = str(re.search('pass_ticket=(.*?)&', access_token).group(1))
298 |         if self.biz and self.uin and self.pass_ticket and self.key:
299 |             print('参数齐全，开始获取文章信息，默认状态获取全部文章')
300 |         else:
301 |             print('\n※※※ 参数有误，请重新输入')
302 |             return None
303 | 
304 |         '''获取文章列表，格式化内容为一个二维数组：all_list'''
305 |         all_list = None  # 用来存储获取的文章列表
306 |         # 遍历公众号下所有文章链接
307 |         if not pages:
308 |             page = 0
309 |             passage_list = []
310 |             print('开始获取公众号下所有的文章列表')
311 |             while True:
312 |                 p_data = self.get_next_list(page)
313 |                 if p_data['m_flag'] == 1:
314 |                     for i in p_data['passage_list']:
315 |                         passage_list.append(i)
316 |                 else:
317 |                     print('请求结束，文章列表获取完毕！')
318 |                     break
319 |                 page = page + 1
320 |                 delay_time = random.uniform(1, 5)  # 延迟时间
321 |                 print('为预防被封禁,开始延时操作，延时时间：' + str(delay_time) + '秒')
322 |                 time.sleep(delay_time)  # 模拟手动操作，随机延时delay_time秒，预防被封禁
323 |             all_list = passage_list
324 |         # 获取公众号下指定页数的文章链接
325 |         else:
326 |             print('输入值为：' + str(pages) + '，开始获取前' + str(pages) + '页文章')
327 |             passage_list = []
328 |             for pages in range(pages):
329 |                 p_data = self.get_next_list(pages)
330 |                 if p_data['m_flag'] == 1:
331 |                     for i in p_data['passage_list']:
332 |                         passage_list.append(i)
333 |                 else:
334 |                     print('请求结束，文章列表获取完毕！')
335 |                     break
336 |                 delay_time = random.uniform(1, 5)  # 延迟时间
337 |                 print('为预防被封禁,开始延时操作，延时时间：' + str(delay_time) + '秒')
338 |                 time.sleep(delay_time)  # 模拟手动操作，随机延时1-5秒，预防被封禁
339 |             all_list = passage_list
340 |         print('********************共获取到 ' + str(len(all_list)) + ' 篇文章，开始保存文章，若为0篇请检查错误！！！')
341 |         if not all_list: print('获取到文章列表为空，请注意检查！！！！')
342 |         if not all_list: return None  # 如果获取为空
343 | 
344 |         '''保存文章列表到文件，保存目录'''
345 |         nickname = ''  # 临时放置公众号名称
346 |         if save_list:
347 |             print('****************************************开始保存文章，若以上为 获取到0篇 请检查错误！！！')
348 |             # 首先获取公众号名称
349 |             # new_url = all_list[0][2] + '&pass_ticket=' + self.pass_ticket + '&uin=' + self.uin + '&key=' + self.key
350 |             new_url = all_list[0][3].replace('amp;', '')
351 |             res = requests.get(new_url, headers=self.headers, verify=False)  # 使用微信客户端的token跳过验证
352 |             nickname = re.search(r'var nickname.*"(.*?)".*', res.text).group(1)  # 公众号名称
353 | 
354 |             # 创建公众号保存目录
355 |             official_path = self.root_path + self.official_names_head + nickname  # 各种公众号存储根路径
356 |             os.makedirs(official_path, exist_ok=True)
357 | 
358 |             # 创建 or 打开表格，检查文件是否存在，判断不存在时创建表格文件
359 |             article_contents_path = official_path + '/' + '文章列表（article_list）_原始链接.xlsx'  # 文章内容文件路径
360 |             if not os.path.exists(article_contents_path): pd.DataFrame().to_excel(article_contents_path, index=False)
361 |             frame_df = pd.read_excel(article_contents_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
362 | 
363 |             # 将新数据转换为 DataFrame 并添加到现有 DataFrame 的末尾
364 |             columns = ['本地保存时间', '文章发布时间', '文章名称', '文章原始链接（直接访问会提示验证）']  # 列名
365 |             new_data_df = pd.DataFrame(all_list, columns=columns)
366 |             df = pd.concat([frame_df, new_data_df], ignore_index=True)
367 | 
368 |             # 将更新后的数据写入 Excel 文件
369 |             df.to_excel(article_contents_path, index=False)
370 |             local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())  # 本地时间
371 |             print(local_time + ' 已获取公众号文章目录>>>> ' + nickname)
372 |             print(local_time + ' 存储路径>>>> ' + article_contents_path)
373 | 
374 |         """转换 文章原始链接 为可直接访问链接"""
375 |         if transform_list:
376 |             print("开始转换 " + nickname + ' 公众号的文章列表原始链接')
377 |             # 检测公众号的存储目录
378 |             official_path = self.root_path + self.official_names_head + nickname  # 公众号存储根路径
379 |             article_contents_path = official_path + '/' + '文章列表（article_list）_原始链接.xlsx'  # 文章内容文件路径
380 |             article_list_path = official_path + '/' + '文章列表（article_list）_直连链接.xlsx'  # 文章列表文件路径
381 |             if not os.path.exists(article_contents_path):  # 如果文件不存在
382 |                 print('文件不存在，请检查目录文件>>>>  文章列表（article_list）_原始链接.xlsx')
383 |             else:
384 |                 frame_df = pd.read_excel(article_contents_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
385 |                 new_links = []  # 转换后的新链接存储
386 | 
387 |                 # 修改短链接 方法1：删除元素“amp;”
388 |                 for index, row in frame_df.iterrows():
389 |                     new_url = row.iloc[3].replace('amp;', '')  # 获取原始链接，并对其进行转化
390 |                     new_links.append(new_url)  # 添加转化后的链接到数组中
391 | 
392 |                 # # 修改短链接 方法2：添加pass_ticket、uin、key三个参数实现访问（此为临时链接！！！）
393 |                 # for index, row in frame_df.iterrows():
394 |                 #     new_url = row.iloc[2].replace('amp;', '')  # 获取第 3 列的值
395 |                 #     # res = requests.get(new_url, verify=False)  # 使用微信客户端的token跳过验证
396 |                 #     # print(index)
397 | 
398 |                 # 合并 转换后的链接 到 原数据表，列合并操作
399 |                 frame_df['可直接访问链接'] = new_links  # 把列表作为新列添加到 DataFrame
400 | 
401 |                 # 将更新后的数据写入 Excel 文件
402 |                 frame_df.to_excel(article_list_path, index=False)
403 |                 local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())  # 本地时间
404 |                 print(local_time + ' 已转换公众号文章列表>>>> ' + nickname)
405 |                 print(local_time + ' 存储路径>>>> ' + article_list_path)
406 |         return all_list  # 返回
407 | 
408 |     def get_next_list(self, page):
409 |         # 从0开始计数，第 0 页相当于默认页数据
410 |         pages = int(page) * 10
411 |         print('正在获取第 ' + str(page + 1) + ' 页文章列表')
412 |         url = ('https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=' + self.biz + '&f=json&offset='
413 |                + str(pages) + '&count=10&is_ok=1&scene=124&uin=' + self.uin + '&key=' + self.key + '&pass_ticket='
414 |                + self.pass_ticket + '&wxtoken=&appmsg_token=&x5=0&f=json')
415 |         try:
416 |             res = requests.get(url=url, headers=self.headers, timeout=10, verify=False)
417 |         except:
418 |             print('失败！！！获取第 ' + str(page + 1) + ' 页文章列表失败！！！')
419 |             print('请检查错误类型，详情记录在日志中')
420 |             exc_type, exc_value, exc_traceback = sys.exc_info()  # 获取当前异常的信息
421 |             logging.error(f'发生异常: {exc_type.__name__}: {exc_value}', exc_info=True)
422 |             res = ArticleDetail()  # 保证返回值不会报错
423 |         if 'app_msg_ext_info' in res.text:
424 |             # 解码json数据
425 |             get_page = json.loads(json.loads(res.text)['general_msg_list'])['list']
426 |             ''' get_page[0]为
427 |             {'comm_msg_info': {'id': 1000000107, 'type': 49, 'datetime': 1722467332, 'fakeid': '3910318108', 'status': 2, 'content': ''}, 'app_msg_ext_info': {'title': '国务院7月重要政策', 'digest': '', 'content': '', 'fileid': 100007840, 'content_url': 'http://mp.weixin.qq.com/s?__biz=MzkxMDMxODEwOA==&amp;mid=2247491511&amp;idx=1&amp;sn=a36291fdee52a0f53d145edec8058e04&amp;chksm=c0084d6abbcac962a50153c89fe9c19b6f8b1c5e5ac50b05adcb49bdfad8638522ab426c3f4b&amp;scene=27#wechat_redirect', 'source_url': '', 'cover': 'https://mmbiz.qpic.cn/mmbiz_jpg/JRAjbHqmggrlZibDMibLP4ryNqhYXgolJOdQj2P8t2QQFVicickzAo7Gv1SzazwJY6lDylcanx2ic60HDbMvK8OKQpg/0?wx_fmt=jpeg', 'subtype': 9, 'is_multi': 1, 'multi_app_msg_item_list': [{'title': '8月起，这些新规将影响你我生活！', 'digest': '', 'content': '', 'fileid': 0, 'content_url': 'http://mp.weixin.qq.com/s?__biz=MzkxMDMxODEwOA==&amp;mid=2247491511&amp;idx=2&amp;sn=b3f5b6bcf8727c8c90fce7e588e6e7da&amp;chksm=c0eb20c99ca2f90032a6234002ed2cc9c2c000f87cff34f4d8d763878c0bb5275800db876ca7&amp;scene=27#wechat_redirect', 'source_url': '', 'cover': 'https://mmbiz.qpic.cn/mmbiz_jpg/JRAjbHqmggrc08yJMZ6CQ3VL6VzmEIymSUyATlL6o3xaDJJ0D2CtpQg31Vy7jdCaic86zqkgJ9oAFGyia78ZOq7g/0?wx_fmt=jpeg', 'author': '', 'copyright_stat': 100, 'del_flag': 1, 'item_show_type': 0, 'audio_fileid': 0, 'duration': 0, 'play_url': '', 'malicious_title_reason_id': 0, 'malicious_content_type': 0}, {'title': '8月，你好！', 'digest': '', 'content': '', 'fileid': 100007860, 'content_url': 'http://mp.weixin.qq.com/s?__biz=MzkxMDMxODEwOA==&amp;mid=2247491511&amp;idx=3&amp;sn=cd25de57b74b63b0f3b1a9888b9cd94d&amp;chksm=c0c7f30fdd5fc0ea4a2765f5fd29e1faeb0e352e888ee8556521ab23bc9528d68f42deaa9d15&amp;scene=27#wechat_redirect', 'source_url': '', 'cover': 'https://mmbiz.qpic.cn/mmbiz_jpg/JRAjbHqmggrlZibDMibLP4ryNqhYXgolJO9CnECAnMLDPY39Y9iarcFtM1ibrBvhKcGFyl1wicHysvTrYx4GfLybt8g/0?wx_fmt=jpeg', 'author': '', 'copyright_stat': 100, 'del_flag': 1, 'item_show_type': 0, 'audio_fileid': 0, 'duration': 0, 'play_url': '', 'malicious_title_reason_id': 0, 'malicious_content_type': 0}], 'author': '', 'copyright_stat': 100, 'duration': 0, 'del_flag': 1, 'item_show_type': 0, 'audio_fileid': 0, 'play_url': '', 'malicious_title_reason_id': 0, 'malicious_content_type': 0}}
428 |             存储形式为二维数组，[[时间，文章标题，文章链接],[时间，文章标题，文章链接]
429 |             '''
430 |             passage_list = []  # 存放一页内的所有文章
431 |             for i in get_page:
432 |                 # 时间戳转换
433 |                 time_tuple = time.localtime(i['comm_msg_info']['datetime'])
434 |                 create_time = time.strftime("%Y-%m-%d", time_tuple)
435 |                 title = i['app_msg_ext_info']['title']
436 |                 content_url = i['app_msg_ext_info']['content_url'].replace('#wechat_redirect', '')
437 |                 local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())  # 本地时间
438 |                 passage_list.append([local_time, create_time, title, content_url])
439 |                 if i['app_msg_ext_info']['multi_app_msg_item_list']:
440 |                     for j in i['app_msg_ext_info']['multi_app_msg_item_list']:
441 |                         title = j['title']
442 |                         content_url = j['content_url'].replace('#wechat_redirect', '')
443 |                         passage_list.append([local_time, create_time, title, content_url])
444 |             print('该页包含 ' + str(len(passage_list)) + ' 篇文章')
445 |             return {
446 |                 'm_flag': 1,
447 |                 'passage_list': passage_list,
448 |                 'length': len(passage_list)
449 |             }
450 |         elif '"home_page_list":[]' in res.text:
451 |             print('\n出现：操作频繁，请稍后再试\n该号已被封禁，请解封后再来！！！\n')
452 |             return {'m_flag': 0}
453 |         else:
454 |             print('请求结束！未获取到第 ' + str(page + 1) + ' 页文章列表')
455 |             return {'m_flag': 0}
456 | 
457 |     def get_detail_list(self, access_token):
458 |         """ 输入：access_token(从fiddler获取的链接)
459 |             输出：无（获取的文章列表将保存在本地目录下）
460 |             功能：
461 |                 ① 保存微信公众号文章的全部内容
462 |                 ②以excel文件形式存储，文件名设置为对应公众号的名称
463 |         """
464 |         # 获取该公众号名称，取公众号第一页文章列表，取第一篇文章链接
465 |         first_link = self.access_origin_list(access_token, 1, False, False)
466 |         if first_link:  # 获取到内容
467 |             new_url = first_link[0][3].replace('amp;', '')
468 |             res = requests.get(new_url, headers=self.headers, verify=False)
469 |             nickname = re.search(r'var nickname.*"(.*?)".*', res.text).group(1)  # 公众号名称
470 |         else:
471 |             print('获取失败')
472 |             return None
473 | 
474 |         # 遍历文章列表，获取各文章的详情内容
475 |         print('开始获取公众号>>>>    ' + nickname)
476 |         print('开始检测公众号的文章列表是否存在>>>>    ')
477 |         official_path = self.root_path + self.official_names_head + nickname  # 公众号存储根路径
478 |         # article_contents_path = official_path + '/' + '文章列表（article_list）_原始链接.xlsx'  # 文章内容文件路径
479 |         article_list_path = official_path + '/' + '文章列表（article_list）_直连链接.xlsx'  # 文章列表文件路径
480 |         if not os.path.exists(article_list_path):  # 如果文件不存在
481 |             print('文件不存在，请检查目录文件>>>>  ' + article_list_path)
482 |         else:
483 |             frame_df = pd.read_excel(article_list_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
484 |             error_links = []
485 |             for index, row in frame_df.iterrows():
486 |                 single_article_url = row.iloc[4]  # 获取单文章链接
487 |                 try:
488 |                     new_messages = self.get_detail_new(single_article_url)  # 获取单文章详情信息
489 |                     # 存储获取到的文章详情信息
490 |                     # 创建 or 打开表格，检查文件是否存在，判断不存在时创建表格文件
491 |                     article_detail_path = official_path + '/' + '文章详情（article_detiles）.xlsx'  # 文章详情文件路径
492 |                     if not os.path.exists(article_detail_path): pd.DataFrame().to_excel(article_detail_path, index=False)
493 |                     frame_df = pd.read_excel(article_detail_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
494 | 
495 |                     # 将新数据转换为 DataFrame 并添加到现有 DataFrame 的末尾
496 |                     columns = ['本地创建时间', '文章发布时间', '文章标题', '文章链接', '文章文本内容',
497 |                                '阅读量', '点赞数', '转发数', '在看数',
498 |                                '评论', '评论点赞']  # 列名
499 |                     new_data_df = pd.DataFrame([new_messages], columns=columns)
500 |                     df = pd.concat([frame_df, new_data_df], ignore_index=True)
501 | 
502 |                     # 将更新后的数据写入 Excel 文件
503 |                     df.to_excel(article_detail_path, index=False)
504 |                     local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())  # 本地时间
505 |                     print(local_time + ' 已保存文章详情>>>> ' + new_messages[2])
506 |                     print(local_time + ' 内容存储路径>>>> ' + article_detail_path)
507 | 
508 |                     delay_time = random.uniform(3, 6)  # 延迟时间
509 |                     print('为预防被封禁,开始延时操作，延时时间：' + str(delay_time) + '秒')
510 |                     time.sleep(delay_time)  # 模拟手动操作，随机延时delay_time秒，预防被封禁
511 |                 except:
512 |                     error_links.append(row.iloc[:])
513 |                     print('有问题的链接，文章标题为>>>>    ' + row.iloc[2])
514 |                     article_error_path = official_path + '/' + '问题链接（error_links）.xlsx'  # 文章详情文件路径
515 |                     if not os.path.exists(article_error_path): pd.DataFrame().to_excel(article_error_path, index=False)
516 |                     columns = ['本地保存时间', '文章发布时间', '文章名称', '文章原始链接（直接访问会提示验证）']  # 列名
517 |                     error_data_df = pd.DataFrame(error_links, columns=columns)
518 |                     error_forme_df = pd.read_excel(article_error_path)  # 读取表格内容，默认打开DataFrame对象包含第一个工作表中的数据
519 |                     dfs = pd.concat([error_forme_df, error_data_df], ignore_index=True)
520 |                     # 将更新后的数据写入 Excel 文件
521 |                     dfs.to_excel(article_error_path, index=False)
522 |                     local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())  # 本地时间
523 |                     print(local_time + ' 已保存问题文章链接>>>> ' + row.iloc[2])
524 |                     print(local_time + ' 内容存储路径>>>> ' + article_error_path)
525 | 
526 |     def get_detail_new(self, link):
527 |         """ 输入：文章链接（无需验证，可直接访问）
528 |             输出：单文章详情信息
529 |         """
530 |         '''获取部分请求参数'''
531 |         contents = self.get_one_article(link, False, False)
532 |         # nickname = re.search(r'var nickname.*"(.*?)".*', contents['content']).group(1)  # 公众号名称
533 |         # article_link = re.search(r'var msg_link = .*"(.*?)".*', contents['content']).group(1)  # 文章短链接
534 |         createTime = re.search(r"var createTime = '(.*?)'.*", contents['content']).group(1)  # 文章发布时间 detail_time
535 |         # author = re.search(r'var author = "(.*?)".*', contents['content']).group(1)  # 文章作者
536 |         article_title = re.search(r"var title = '(.*?)'.*", contents['content']).group(1)  # 文章标题
537 |         # 将文字内容转换为列表形式存储
538 |         soup = BeautifulSoup(contents['content'], 'html.parser')
539 |         original_texts = soup.getText().split('\n')  # 将页面所有的文本内容提取，并转为列表形式
540 |         article_texts = list(filter(lambda x: bool(x.strip()), original_texts))  # 列表形式的文章内容 texts
541 |         r = ''
542 |         for rand in range(0, 16):
543 |             r += str(random.randint(0, 9))
544 |         r = '0.' + r
545 |         appmsg_type = "9"
546 |         mid = str(link).split('mid=')[1].split('&')[0]
547 |         sn = str(link).split('sn=')[1].split('&')[0]
548 |         idx = str(link).split('idx=')[1].split('&')[0]
549 |         ct = ''
550 |         comment_id = re.search("var comment_id = '(.*?)'.*", contents['content'])
551 |         if comment_id:
552 |             comment_id = re.search("var comment_id = '(.*?)'.*", contents['content']).group(1)
553 |         else:
554 |             print('没有匹配到comment_id，文章标题为：' + article_title)
555 |             comment_id = ''
556 | 
557 |         # version = contents['content'].split('_g.clientversion = "')[1].split('"')[0]
558 |         if 'var req_id = ' in contents['content']:
559 |             req_id = contents['content'].split('var req_id = ')[1].split(';')[0].replace("'", "").replace('"', '')
560 |         else:
561 |             print('没有匹配到req_id，文章标题为：' + article_title)
562 |             req_id = ''
563 |         # print(r, appmsg_type, mid, sn, idx, ct, comment_id, version, req_id, createTime, article_texts)
564 | 
565 |         '''获取文章详情信息'''
566 |         detail_url = ('https://mp.weixin.qq.com/mp/getappmsgext?f=json&mock=&fasttmplajax=1&f=json' + '&uin=' + self.uin
567 |                       + '&key=' + self.key + '&pass_ticket=' + self.pass_ticket + '&__biz=' + self.biz)
568 |         data = {
569 |             'r': r,
570 |             'sn': sn,
571 |             'mid': mid,
572 |             'idx': idx,
573 |             'req_id': req_id,
574 |             'title': article_title,
575 |             'comment_id': comment_id,
576 |             'appmsg_type': appmsg_type,
577 |             '__biz': self.biz,
578 |             'pass_ticket': self.pass_ticket,
579 |             'abtest_cookie': '', 'devicetype': 'Windows 7 x64', 'version': '63090b13', 'is_need_ticket': '0',
580 |             'is_need_ad': '0', 'is_need_reward': '0', 'both_ad': '0', 'reward_uin_count': '0', 'send_time': '',
581 |             'msg_daily_idx': '1', 'is_original': '0', 'is_only_read': '1', 'scene': '38', 'is_temp_url': '0',
582 |             'item_show_type': '0', 'tmp_version': '1', 'more_read_type': '0', 'appmsg_like_type': '2',
583 |             'related_video_sn': '', 'related_video_num': '5', 'vid': '', 'is_pay_subscribe': '0',
584 |             'pay_subscribe_uin_count': '0', 'has_red_packet_cover': '0', 'album_id': '1296223588617486300',
585 |             'album_video_num': '5', 'cur_album_id': 'undefined', 'is_public_related_video': 'NaN',
586 |             'encode_info_by_base64': 'undefined', 'exptype': '', 'export_key_extinfo': '', 'business_type': '0',
587 |         }
588 |         res = requests.post(url=detail_url, data=data, headers=self.headers, cookies=self.cookies, verify=False)
589 |         # print(res.text)
590 |         read_num = jsonpath.jsonpath(json.loads(res.text), "$.." + "read_num")
591 |         like_num = jsonpath.jsonpath(json.loads(res.text), "$.." + "old_like_num")
592 |         share_num = jsonpath.jsonpath(json.loads(res.text), "$.." + "share_num")
593 |         show_read = jsonpath.jsonpath(json.loads(res.text), "$.." + "show_read")
594 | 
595 |         # 获取评论以及评论点赞数
596 |         comment_url = ('https://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz=' + self.biz +
597 |                        '&appmsgid=2247491372&idx=1&comment_id=' + comment_id + '&offset=0&limit=100&uin='
598 |                        + self.uin + '&key=' + self.key + '&pass_ticket=' + self.pass_ticket
599 |                        + '&wxtoken=&devicetype=Windows+10&clientversion=62060833&appmsg_token=')
600 |         response = requests.get(comment_url, headers=self.headers, cookies=self.cookies, verify=False)
601 |         json_content = json.loads(response.text)
602 |         comments = jsonpath.jsonpath(json_content, '$..content')                    # 评论
603 |         comments_star_nums = jsonpath.jsonpath(json_content, '$..like_num')         # 评论点赞数
604 | 
605 |         local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())         # 本地时间
606 |         if read_num == [] or read_num == '':
607 |             return '', '', '', ''
608 |         else:
609 |             return (local_time, createTime, article_title, link, article_texts,  # 本地创建时间，文章发布时间，标题，链接，文本，
610 |                     read_num[0], like_num[0], share_num[0], show_read[0],  # 阅读量，点赞数，转发数，在看数，
611 |                     comments, comments_star_nums)  # 评论，评论点赞
612 | 
613 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial-ShareAlike 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
 58 | Public License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License
 63 | ("Public License"). To the extent this Public License may be
 64 | interpreted as a contract, You are granted the Licensed Rights in
 65 | consideration of Your acceptance of these terms and conditions, and the
 66 | Licensor grants You such rights in consideration of benefits the
 67 | Licensor receives from making the Licensed Material available under
 68 | these terms and conditions.
 69 | 
 70 | 
 71 | Section 1 -- Definitions.
 72 | 
 73 |   a. Adapted Material means material subject to Copyright and Similar
 74 |      Rights that is derived from or based upon the Licensed Material
 75 |      and in which the Licensed Material is translated, altered,
 76 |      arranged, transformed, or otherwise modified in a manner requiring
 77 |      permission under the Copyright and Similar Rights held by the
 78 |      Licensor. For purposes of this Public License, where the Licensed
 79 |      Material is a musical work, performance, or sound recording,
 80 |      Adapted Material is always produced where the Licensed Material is
 81 |      synched in timed relation with a moving image.
 82 | 
 83 |   b. Adapter's License means the license You apply to Your Copyright
 84 |      and Similar Rights in Your contributions to Adapted Material in
 85 |      accordance with the terms and conditions of this Public License.
 86 | 
 87 |   c. BY-NC-SA Compatible License means a license listed at
 88 |      creativecommons.org/compatiblelicenses, approved by Creative
 89 |      Commons as essentially the equivalent of this Public License.
 90 | 
 91 |   d. Copyright and Similar Rights means copyright and/or similar rights
 92 |      closely related to copyright including, without limitation,
 93 |      performance, broadcast, sound recording, and Sui Generis Database
 94 |      Rights, without regard to how the rights are labeled or
 95 |      categorized. For purposes of this Public License, the rights
 96 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 97 |      Rights.
 98 | 
 99 |   e. Effective Technological Measures means those measures that, in the
100 |      absence of proper authority, may not be circumvented under laws
101 |      fulfilling obligations under Article 11 of the WIPO Copyright
102 |      Treaty adopted on December 20, 1996, and/or similar international
103 |      agreements.
104 | 
105 |   f. Exceptions and Limitations means fair use, fair dealing, and/or
106 |      any other exception or limitation to Copyright and Similar Rights
107 |      that applies to Your use of the Licensed Material.
108 | 
109 |   g. License Elements means the license attributes listed in the name
110 |      of a Creative Commons Public License. The License Elements of this
111 |      Public License are Attribution, NonCommercial, and ShareAlike.
112 | 
113 |   h. Licensed Material means the artistic or literary work, database,
114 |      or other material to which the Licensor applied this Public
115 |      License.
116 | 
117 |   i. Licensed Rights means the rights granted to You subject to the
118 |      terms and conditions of this Public License, which are limited to
119 |      all Copyright and Similar Rights that apply to Your use of the
120 |      Licensed Material and that the Licensor has authority to license.
121 | 
122 |   j. Licensor means the individual(s) or entity(ies) granting rights
123 |      under this Public License.
124 | 
125 |   k. NonCommercial means not primarily intended for or directed towards
126 |      commercial advantage or monetary compensation. For purposes of
127 |      this Public License, the exchange of the Licensed Material for
128 |      other material subject to Copyright and Similar Rights by digital
129 |      file-sharing or similar means is NonCommercial provided there is
130 |      no payment of monetary compensation in connection with the
131 |      exchange.
132 | 
133 |   l. Share means to provide material to the public by any means or
134 |      process that requires permission under the Licensed Rights, such
135 |      as reproduction, public display, public performance, distribution,
136 |      dissemination, communication, or importation, and to make material
137 |      available to the public including in ways that members of the
138 |      public may access the material from a place and at a time
139 |      individually chosen by them.
140 | 
141 |   m. Sui Generis Database Rights means rights other than copyright
142 |      resulting from Directive 96/9/EC of the European Parliament and of
143 |      the Council of 11 March 1996 on the legal protection of databases,
144 |      as amended and/or succeeded, as well as other essentially
145 |      equivalent rights anywhere in the world.
146 | 
147 |   n. You means the individual or entity exercising the Licensed Rights
148 |      under this Public License. Your has a corresponding meaning.
149 | 
150 | 
151 | Section 2 -- Scope.
152 | 
153 |   a. License grant.
154 | 
155 |        1. Subject to the terms and conditions of this Public License,
156 |           the Licensor hereby grants You a worldwide, royalty-free,
157 |           non-sublicensable, non-exclusive, irrevocable license to
158 |           exercise the Licensed Rights in the Licensed Material to:
159 | 
160 |             a. reproduce and Share the Licensed Material, in whole or
161 |                in part, for NonCommercial purposes only; and
162 | 
163 |             b. produce, reproduce, and Share Adapted Material for
164 |                NonCommercial purposes only.
165 | 
166 |        2. Exceptions and Limitations. For the avoidance of doubt, where
167 |           Exceptions and Limitations apply to Your use, this Public
168 |           License does not apply, and You do not need to comply with
169 |           its terms and conditions.
170 | 
171 |        3. Term. The term of this Public License is specified in Section
172 |           6(a).
173 | 
174 |        4. Media and formats; technical modifications allowed. The
175 |           Licensor authorizes You to exercise the Licensed Rights in
176 |           all media and formats whether now known or hereafter created,
177 |           and to make technical modifications necessary to do so. The
178 |           Licensor waives and/or agrees not to assert any right or
179 |           authority to forbid You from making technical modifications
180 |           necessary to exercise the Licensed Rights, including
181 |           technical modifications necessary to circumvent Effective
182 |           Technological Measures. For purposes of this Public License,
183 |           simply making modifications authorized by this Section 2(a)
184 |           (4) never produces Adapted Material.
185 | 
186 |        5. Downstream recipients.
187 | 
188 |             a. Offer from the Licensor -- Licensed Material. Every
189 |                recipient of the Licensed Material automatically
190 |                receives an offer from the Licensor to exercise the
191 |                Licensed Rights under the terms and conditions of this
192 |                Public License.
193 | 
194 |             b. Additional offer from the Licensor -- Adapted Material.
195 |                Every recipient of Adapted Material from You
196 |                automatically receives an offer from the Licensor to
197 |                exercise the Licensed Rights in the Adapted Material
198 |                under the conditions of the Adapter's License You apply.
199 | 
200 |             c. No downstream restrictions. You may not offer or impose
201 |                any additional or different terms or conditions on, or
202 |                apply any Effective Technological Measures to, the
203 |                Licensed Material if doing so restricts exercise of the
204 |                Licensed Rights by any recipient of the Licensed
205 |                Material.
206 | 
207 |        6. No endorsement. Nothing in this Public License constitutes or
208 |           may be construed as permission to assert or imply that You
209 |           are, or that Your use of the Licensed Material is, connected
210 |           with, or sponsored, endorsed, or granted official status by,
211 |           the Licensor or others designated to receive attribution as
212 |           provided in Section 3(a)(1)(A)(i).
213 | 
214 |   b. Other rights.
215 | 
216 |        1. Moral rights, such as the right of integrity, are not
217 |           licensed under this Public License, nor are publicity,
218 |           privacy, and/or other similar personality rights; however, to
219 |           the extent possible, the Licensor waives and/or agrees not to
220 |           assert any such rights held by the Licensor to the limited
221 |           extent necessary to allow You to exercise the Licensed
222 |           Rights, but not otherwise.
223 | 
224 |        2. Patent and trademark rights are not licensed under this
225 |           Public License.
226 | 
227 |        3. To the extent possible, the Licensor waives any right to
228 |           collect royalties from You for the exercise of the Licensed
229 |           Rights, whether directly or through a collecting society
230 |           under any voluntary or waivable statutory or compulsory
231 |           licensing scheme. In all other cases the Licensor expressly
232 |           reserves any right to collect such royalties, including when
233 |           the Licensed Material is used other than for NonCommercial
234 |           purposes.
235 | 
236 | 
237 | Section 3 -- License Conditions.
238 | 
239 | Your exercise of the Licensed Rights is expressly made subject to the
240 | following conditions.
241 | 
242 |   a. Attribution.
243 | 
244 |        1. If You Share the Licensed Material (including in modified
245 |           form), You must:
246 | 
247 |             a. retain the following if it is supplied by the Licensor
248 |                with the Licensed Material:
249 | 
250 |                  i. identification of the creator(s) of the Licensed
251 |                     Material and any others designated to receive
252 |                     attribution, in any reasonable manner requested by
253 |                     the Licensor (including by pseudonym if
254 |                     designated);
255 | 
256 |                 ii. a copyright notice;
257 | 
258 |                iii. a notice that refers to this Public License;
259 | 
260 |                 iv. a notice that refers to the disclaimer of
261 |                     warranties;
262 | 
263 |                  v. a URI or hyperlink to the Licensed Material to the
264 |                     extent reasonably practicable;
265 | 
266 |             b. indicate if You modified the Licensed Material and
267 |                retain an indication of any previous modifications; and
268 | 
269 |             c. indicate the Licensed Material is licensed under this
270 |                Public License, and include the text of, or the URI or
271 |                hyperlink to, this Public License.
272 | 
273 |        2. You may satisfy the conditions in Section 3(a)(1) in any
274 |           reasonable manner based on the medium, means, and context in
275 |           which You Share the Licensed Material. For example, it may be
276 |           reasonable to satisfy the conditions by providing a URI or
277 |           hyperlink to a resource that includes the required
278 |           information.
279 |        3. If requested by the Licensor, You must remove any of the
280 |           information required by Section 3(a)(1)(A) to the extent
281 |           reasonably practicable.
282 | 
283 |   b. ShareAlike.
284 | 
285 |      In addition to the conditions in Section 3(a), if You Share
286 |      Adapted Material You produce, the following conditions also apply.
287 | 
288 |        1. The Adapter's License You apply must be a Creative Commons
289 |           license with the same License Elements, this version or
290 |           later, or a BY-NC-SA Compatible License.
291 | 
292 |        2. You must include the text of, or the URI or hyperlink to, the
293 |           Adapter's License You apply. You may satisfy this condition
294 |           in any reasonable manner based on the medium, means, and
295 |           context in which You Share Adapted Material.
296 | 
297 |        3. You may not offer or impose any additional or different terms
298 |           or conditions on, or apply any Effective Technological
299 |           Measures to, Adapted Material that restrict exercise of the
300 |           rights granted under the Adapter's License You apply.
301 | 
302 | 
303 | Section 4 -- Sui Generis Database Rights.
304 | 
305 | Where the Licensed Rights include Sui Generis Database Rights that
306 | apply to Your use of the Licensed Material:
307 | 
308 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
309 |      to extract, reuse, reproduce, and Share all or a substantial
310 |      portion of the contents of the database for NonCommercial purposes
311 |      only;
312 | 
313 |   b. if You include all or a substantial portion of the database
314 |      contents in a database in which You have Sui Generis Database
315 |      Rights, then the database in which You have Sui Generis Database
316 |      Rights (but not its individual contents) is Adapted Material,
317 |      including for purposes of Section 3(b); and
318 | 
319 |   c. You must comply with the conditions in Section 3(a) if You Share
320 |      all or a substantial portion of the contents of the database.
321 | 
322 | For the avoidance of doubt, this Section 4 supplements and does not
323 | replace Your obligations under this Public License where the Licensed
324 | Rights include other Copyright and Similar Rights.
325 | 
326 | 
327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
328 | 
329 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
330 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
331 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
332 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
333 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
334 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
335 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
336 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
337 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
338 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
339 | 
340 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
341 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
342 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
343 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
344 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
345 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
346 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
347 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
348 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
349 | 
350 |   c. The disclaimer of warranties and limitation of liability provided
351 |      above shall be interpreted in a manner that, to the extent
352 |      possible, most closely approximates an absolute disclaimer and
353 |      waiver of all liability.
354 | 
355 | 
356 | Section 6 -- Term and Termination.
357 | 
358 |   a. This Public License applies for the term of the Copyright and
359 |      Similar Rights licensed here. However, if You fail to comply with
360 |      this Public License, then Your rights under this Public License
361 |      terminate automatically.
362 | 
363 |   b. Where Your right to use the Licensed Material has terminated under
364 |      Section 6(a), it reinstates:
365 | 
366 |        1. automatically as of the date the violation is cured, provided
367 |           it is cured within 30 days of Your discovery of the
368 |           violation; or
369 | 
370 |        2. upon express reinstatement by the Licensor.
371 | 
372 |      For the avoidance of doubt, this Section 6(b) does not affect any
373 |      right the Licensor may have to seek remedies for Your violations
374 |      of this Public License.
375 | 
376 |   c. For the avoidance of doubt, the Licensor may also offer the
377 |      Licensed Material under separate terms or conditions or stop
378 |      distributing the Licensed Material at any time; however, doing so
379 |      will not terminate this Public License.
380 | 
381 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
382 |      License.
383 | 
384 | 
385 | Section 7 -- Other Terms and Conditions.
386 | 
387 |   a. The Licensor shall not be bound by any additional or different
388 |      terms or conditions communicated by You unless expressly agreed.
389 | 
390 |   b. Any arrangements, understandings, or agreements regarding the
391 |      Licensed Material not stated herein are separate from and
392 |      independent of the terms and conditions of this Public License.
393 | 
394 | 
395 | Section 8 -- Interpretation.
396 | 
397 |   a. For the avoidance of doubt, this Public License does not, and
398 |      shall not be interpreted to, reduce, limit, restrict, or impose
399 |      conditions on any use of the Licensed Material that could lawfully
400 |      be made without permission under this Public License.
401 | 
402 |   b. To the extent possible, if any provision of this Public License is
403 |      deemed unenforceable, it shall be automatically reformed to the
404 |      minimum extent necessary to make it enforceable. If the provision
405 |      cannot be reformed, it shall be severed from this Public License
406 |      without affecting the enforceability of the remaining terms and
407 |      conditions.
408 | 
409 |   c. No term or condition of this Public License will be waived and no
410 |      failure to comply consented to unless expressly agreed to by the
411 |      Licensor.
412 | 
413 |   d. Nothing in this Public License constitutes or may be interpreted
414 |      as a limitation upon, or waiver of, any privileges and immunities
415 |      that apply to the Licensor or You, including from the legal
416 |      processes of any jurisdiction or authority.
417 | 
418 | =======================================================================
419 | 
420 | Creative Commons is not a party to its public
421 | licenses. Notwithstanding, Creative Commons may elect to apply one of
422 | its public licenses to material it publishes and in those instances
423 | will be considered the “Licensor.” The text of the Creative Commons
424 | public licenses is dedicated to the public domain under the CC0 Public
425 | Domain Dedication. Except for the limited purpose of indicating that
426 | material is shared under a Creative Commons public license or as
427 | otherwise permitted by the Creative Commons policies published at
428 | creativecommons.org/policies, Creative Commons does not authorize the
429 | use of the trademark "Creative Commons" or any other trademark or logo
430 | of Creative Commons without its prior written consent including,
431 | without limitation, in connection with any unauthorized modifications
432 | to any of its public licenses or any other arrangements,
433 | understandings, or agreements concerning use of licensed material. For
434 | the avoidance of doubt, this paragraph does not form part of the
435 | public licenses.
436 | 
437 | Creative Commons may be contacted at creativecommons.org.
438 | 
439 | 
440 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## 微信公众号/文章 获取（Access_wechat_article）
  2 | 
  3 | 更新时间：2025年7月
  4 | 
  5 | 本项目是基于Python语言的爬虫程序，支持对微信公众号文章内容获取
  6 | 
  7 | 目前支持 Windows / Linux 开箱即用，**建议使用虚拟环境运行项目**
  8 | 
  9 | 如果感兴趣，请 **Fork** 项目后自行研究使用
 10 | 
 11 | 使用过程中如遇到错误，欢迎提交 [issues](https://github.com/yeximm/Access_wechat_article/issues) 来讨论
 12 | 
 13 | **注**：请在 [GitHub](https://github.com/) 平台提交 [issues](https://github.com/yeximm/Access_wechat_article/issues)
 14 | 
 15 | ## 一、主要功能
 16 | 
 17 | 1. 获取**公众号主页链接**，通过微信内置浏览器可直接打开
 18 | 2. 获取公众号**已发布**的文章列表（**微信公众号**下的历史文章）
 19 | 3. 批量下载公众号文章的**网页文本数据**
 20 | 4. 获取微信公众号文章的**所有信息**，如阅读量、点赞数、转发数、评论、评论点赞等信息。
 21 | 
 22 | ## 二、项目所需环境及工具
 23 | 
 24 | 1. 系统环境：Windows 10 ×64
 25 | 2. 程序运行环境：python 3.12
 26 | 3. 涉及应用：微信**PC版**，当前项目适配的微信版本为3.9.11.25
 27 | 4. 使用工具：fiddler
 28 | 
 29 | ## 三、程序使用
 30 | 
 31 | ### 3.1下载 / Download
 32 | 
 33 | - 下载地址：[https://github.com/yeximm/Access_wechat_article/releases](https://github.com/yeximm/Access_wechat_article/releases)
 34 |   - 👆👆👆以上为本项目发布页地址，选取所需版本下载即可。
 35 | 
 36 | 
 37 | - 存储库快照：[Github_master](https://github.com/yeximm/Access_wechat_article/archive/refs/heads/master.zip)
 38 |   - 存储库快照等同于 [Releases](https://github.com/yeximm/Access_wechat_article/releases) 中的 [Source Code (zip)](https://github.com/yeximm/Access_wechat_article/archive/refs/heads/master.zip) 等，包含 `README` 等内容
 39 | 
 40 | ### 3.2 Python环境配置
 41 | 
 42 | （1）创建虚拟环境
 43 | 
 44 | ```bash
 45 | python -m venv access_wechat
 46 | ```
 47 | 
 48 | `venv`指定存放环境的目录，一般使用 `venv`，这是一个不成文的规定。
 49 | 
 50 | （2）**激活**环境
 51 | 
 52 | - Windows
 53 | 
 54 |   ```bash
 55 |   .\access_wechat\Scripts\activate
 56 |   ```
 57 | 
 58 | - Unix/macOS
 59 | 
 60 |   ```bash
 61 |   source access_wechat/bin/activate
 62 |   ```
 63 | 
 64 | （3）退出环境
 65 | 
 66 | ```bash
 67 | deactivate
 68 | ```
 69 | 
 70 | ### 3.3 安装包文件
 71 | 
 72 | `requirements.txt`中包含所需python包文件名称，用来批量安装python包文件
 73 | 
 74 | 安装命令：
 75 | 
 76 | ```bash
 77 | pip install -r requirements.txt
 78 | ```
 79 | 
 80 | ### 3.4 运行参数
 81 | 
 82 | 1. 项目主文件为：`main.py`，其功能调用方式详见于此。
 83 |    项目中**生成文件的存储路径**为：`./all_data`（该目录由程序**自动创建**）
 84 | 2. 运行命令：
 85 |    
 86 |    1. 首先进入**虚拟环境**（详见**激活**虚拟环境）
 87 |    
 88 |    2. 安装python包文件（如已安装则进行下一步）
 89 |    
 90 |    3. 在项目目录运行：
 91 |    
 92 |       - ```bash
 93 |         python main.py
 94 |         ```
 95 |    
 96 |    4. 根据控制台提示输入
 97 |    
 98 |    5. 如需**自定义功能**，参照`main.py`中的函数调用方式自行编写。
 99 | 
100 | ## 四、功能截图
101 | 
102 | ### 4.1 功能1
103 | 
104 | ![function1](./README/function1.png)![function1.1](./README/function1.1.png)
105 | 
106 | ### 4.2 功能2
107 | 
108 | ![function2](./README/function2.png)
109 | 
110 | ![function2.1](./README/function2.1.png)
111 | 
112 | ### 4.3 功能3
113 | 
114 | ![function3](./README/function3.png)
115 | 
116 | ![function3.1](./README/function3.1.png)
117 | 
118 | ### 4.4 功能4
119 | 
120 | ![function4](./README/function4.png)
121 | 
122 | ## 五、程序流程图
123 | 
124 | ![wechat_article_drawio](./README/wechat_article_drawio.png)
125 | 
126 | ## 六、鼓励一下
127 | 
128 | 开源不易，若此项目有帮到你，望你能动用你的发财小手**Star**☆一下。
129 | 
130 | 如有遇到代码方面的问题，欢迎一起讨论，你的鼓励是这个项目继续更新的最大动力！
131 | 
132 | <p align = "center">    
133 | <img  src="https://github.com/yeximm/Access_wechat_article/blob/master/README/qrcode_1749894334903.jpg" width="300" />
134 | </p>
135 | 
136 | 
137 | 另外，十分感谢大家对于本项目的关注。
138 | 
139 | [![Stargazers repo roster for @yeximm/Access_wechat_article](https://reporoster.com/stars/yeximm/Access_wechat_article)](https://github.com/yeximm/Access_wechat_article/stargazers)
140 | [![Forkers repo roster for @yeximm/Access_wechat_article](https://reporoster.com/forks/yeximm/Access_wechat_article)](https://github.com/yeximm/Access_wechat_article/network/members)
141 | 
142 | ## LICENSE
143 | 
144 | 本作品采用许可协议 <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International</a> ,简称 **[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)**。
145 | 
146 | 所有以任何方式查看本仓库内容的人、或直接或间接使用本仓库内容的使用者都应仔细阅读此声明。本仓库管理者保留随时更改或补充此免责声明的权利。一旦使用、复制、修改了本仓库内容，则视为您已接受此免责声明。
147 | 
148 | 项目内容仅供学习研究，请勿用于商业用途。如对本仓库内容的功能有需求，应自行开发相关功能。所有基于本仓库内容的源代码，进行的任何修改，为其他个人或组织的自发行为，与本仓库内容没有任何直接或间接的关系，所造成的一切后果亦与本仓库内容和本仓库管理者无关。
149 | 
150 | 本仓库内容中涉及的第三方硬件、软件等，与本仓库内容没有任何直接或间接的关系。本仓库内容仅对部署和使用过程进行客观描述，不代表支持使用任何第三方硬件、软件。使用任何第三方硬件、软件，所造成的一切后果由使用的个人或组织承担，与本仓库内容无关。
151 | 
152 | ## Star History
153 | 
154 | [![Star History Chart](https://api.star-history.com/svg?repos=yeximm/Access_wechat_article&type=Date)](https://www.star-history.com/#yeximm/Access_wechat_article&Date)
155 | 
156 | 


--------------------------------------------------------------------------------
/README/20250316180200.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/20250316180200.jpg


--------------------------------------------------------------------------------
/README/function1.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function1.1.png


--------------------------------------------------------------------------------
/README/function1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function1.png


--------------------------------------------------------------------------------
/README/function2.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function2.1.png


--------------------------------------------------------------------------------
/README/function2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function2.png


--------------------------------------------------------------------------------
/README/function3.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function3.1.png


--------------------------------------------------------------------------------
/README/function3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function3.png


--------------------------------------------------------------------------------
/README/function4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function4.png


--------------------------------------------------------------------------------
/README/qrcode_1749894334903.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/qrcode_1749894334903.jpg


--------------------------------------------------------------------------------
/README/wechat_article_drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/wechat_article_drawio.png


--------------------------------------------------------------------------------
/README/程序流程图.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/程序流程图.drawio.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from Access_articles import *
 2 | 
 3 | 
 4 | if __name__=="__main__":
 5 |     app = ArticleDetail()
 6 |     print('默认存储路径为：' + app.root_path)
 7 |     screen_text = '''请输入数字键！
 8 |         数字键1：获取公众号主页链接（输入公众号下任意一篇已发布的文章链接即可）
 9 |         数字键2：获取公众号下文章列表（每页约有文章几十篇）
10 |         数字键3：下载文章内容，自动下载文章列表中所有文章内容
11 |         数字键4：同功能3，下载文章内容，包括单个文章的文本内容 + 阅读量、点赞数等信息
12 |                 （请注意请求间隔，若请求太多太快可能会触发封禁！！）
13 |     输入其他任意字符退出！'''
14 |     print('欢迎使用，' + screen_text)
15 |     while True:
16 |         text = str(input('请输入功能数字：'))
17 | 
18 |         if text == '1':
19 |             random_url = (input('（默认公众号主页链接为“研招网资讯”，按回车键使用）\n请输入公众号下任意一篇已发布的文章链接：') or
20 |                           'https://mp.weixin.qq.com/s/4r_LKJu0mOeUc70ZZXK9LA')
21 |             app.get_article_link(random_url)
22 |             print('\n' + screen_text)
23 | 
24 |         elif text == '2':
25 |             access_token = input('\n以下内容需要用到fiddler工具！！！！！\n（1）在微信客户端打开步骤1获取到的链接，\n'
26 |                   '（2）在fiddler中查看——主机地址为https://mp.weixin.qq.com，URL地址为：/mp/profile_ext?acti\n'
27 |                   '（3）选中此项后按快捷键：Ctrl+U，复制此网址到剪贴板\n（4）将该内容粘贴到此处 (づ￣ 3￣)づ\n请输入复制的链接：')
28 |             pages = input('\n########## 默认获取第 1 页文章（约15篇）。如需公众号下全部文章，请输入：0 ##########\n'
29 |                           '请估算后输入需要下载的最新发布文章的页数(例：1)：') or 1
30 |             app.access_origin_list(access_token, int(pages))
31 |             print('\n' + screen_text)
32 | 
33 |         elif text == '3':   # 该功能不需要token
34 |             text_names3 = input('请输入 已下载文章列表的公众号名称 或 公众号的一篇文章链接(例如：泰山风景名胜区)：')
35 |             save_img = input('是否保存图片？是(输入任意值)，否(默认，直接按回车跳过)') or False
36 |             app.get_list_article(text_names3, save_img)
37 |             print('\n' + screen_text)
38 | 
39 |         elif text == '4':
40 |             access_token = input('\n以下内容需要用到fiddler工具！！！！！\n（1）在微信客户端打开步骤1获取到的链接，\n'
41 |                           '（2）在fiddler中查看——主机地址为https://mp.weixin.qq.com，URL地址为：/mp/profile_ext?acti\n'
42 |                           '（3）选中此项后按快捷键：Ctrl+U，复制此网址到剪贴板\n（4）将该内容粘贴到此处 (づ￣ 3￣)づ\n请输入复制的链接：')
43 |             app.get_detail_list(access_token)
44 |             print('\n未成功获取的链接已保存到本地。' + '\n' + screen_text)
45 | 
46 |         else:
47 |             print('\n已成功退出！')
48 |             break
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/requirements.txt


--------------------------------------------------------------------------------