├── Access_articles.py ├── LICENSE ├── README.md ├── README ├── 20250316180200.jpg ├── function1.1.png ├── function1.png ├── function2.1.png ├── function2.png ├── function3.1.png ├── function3.png ├── function4.png ├── qrcode_1749894334903.jpg ├── wechat_article_drawio.png └── 程序流程图.drawio.png ├── main.py └── requirements.txt /Access_articles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re # 使用正则表达式 3 | import json # 用于json转码 4 | import time 5 | import random 6 | import jsonpath 7 | import requests 8 | import pandas as pd # 修改excel 9 | from bs4 import BeautifulSoup 10 | from fake_useragent import UserAgent # 生成随机浏览器标识 11 | import sys 12 | import logging 13 | 14 | logging.basicConfig( # 配置日志记录 15 | filename='app.log', 16 | level=logging.ERROR, 17 | format='%(asctime)s - %(levelname)s - %(message)s' 18 | ) 19 | requests.packages.urllib3.disable_warnings() # 去除网络请求警告 20 | 21 | 22 | class AccessPosts: 23 | """功能: 24 | 1.根据URL访问原页面获取网页文本数据 25 | 2.按保存规则进行存储(单个、批量) 26 | 3.检测人机验证,并去除验证 27 | """ 28 | 29 | def __init__(self): 30 | self.root_path = r'./all_data/' # 数据存储目录 31 | self.official_names_head = '公众号----' # 公众号保存目录开头,用以保存对应公众号的信息,公众号: xxx 32 | self.headers = { 33 | 'User-Agent': UserAgent().random, # 生成随机的浏览器标识头 34 | } 35 | self.cookies = {"poc_sid": ''} # 用以保存设备ID,用来去除人机验证 36 | os.makedirs(self.root_path, exist_ok=True) # 创建保存路径,如果文件夹已存在,则忽略,默认为r'./all_data' 37 | 38 | def save_one_article(self, article_content, img_save_flag=True, content_save_flag=True): 39 | """输入:文章文本内容,是否保存图片(默认保存),是否保存文章内容到文件(默认保存) 40 | 输出:保存flag 41 | 功能:整理文本内容,创建保存路径 42 | """ 43 | # 整理文章关键信息 44 | nickname = re.search(r'var nickname.*"(.*?)".*', article_content).group(1) # 公众号名称 45 | article_link = re.search(r'var msg_link = .*"(.*?)".*', article_content).group(1) # 文章链接 46 | createTime = re.search(r"var createTime = '(.*?)'.*", article_content).group(1) # 文章创建时间 47 | # year, month, day = createTime.split(" ")[0].split("-") # 年,月,日 48 | # hour, minute = createTime.split(" ")[1].split(":") # 小时,分钟 49 | author = re.search(r'var author = "(.*?)".*', article_content).group(1) # 文章作者 50 | article_title = re.search(r"var title = '(.*?)'.*", article_content).group(1) # 文章标题 51 | article_title_win = re.sub(r'[\\/*?:"<>|].', '_', article_title) # Windows下标题 52 | article_title_win = article_title_win.replace('.', '') # Windows下标题,去除小数点,防止自动省略报错 53 | 54 | # 创建公众号保存目录 55 | official_path = self.root_path + self.official_names_head + nickname # 各种公众号存储根路径 56 | os.makedirs(official_path, exist_ok=True) 57 | 58 | """下载文章图片""" 59 | if img_save_flag: # 类属性中开启保存选项! 60 | print('开启保存文章图片选项,准备下载文章图片') 61 | # 创建文章图片保存目录 62 | img_save_path = (self.root_path + self.official_names_head + nickname + '/' # 图片保存路径 63 | + createTime.replace(':', ':') + ' ' + article_title_win) 64 | os.makedirs(img_save_path, exist_ok=True) # 创建图片保存目录 65 | 66 | # 保存该文章图片内容 67 | images = article_content.split('https://mmbiz.qpic.cn/') 68 | # print(images) 69 | for i in range(0, len(images) - 1): 70 | image_url = 'https://mmbiz.qpic.cn/' + images[i + 1].split('"')[0] 71 | # print('正在获取图片:' + image_url) 72 | image_name = '' 73 | response = requests.get(image_url, cookies=self.cookies, verify=False) 74 | if response.status_code == 200: 75 | # 图片命名 76 | img_hz = ['gif', 'jpg', 'jpeg', 'png', 'webp'] 77 | for imghz in img_hz: 78 | if imghz in image_url: 79 | image_name = str(i + 1) + '.' + imghz 80 | if image_name == '': # 如果链接中没有标明图片属性 81 | image_name = str(i + 1) + '.jpg' 82 | file_path = img_save_path + '/' + image_name 83 | # 保存图片 84 | with open(file_path, 'wb') as f: 85 | f.write(response.content) 86 | print(f"已成功下载图片: {file_path}") 87 | else: 88 | print(f"无法下载图片,状态码: {response.status_code}") 89 | print('已保存文章图片>>>> ' + article_title) 90 | 91 | """保存文章文本内容""" 92 | if content_save_flag: 93 | # 将文字内容转换为列表形式存储 94 | soup = BeautifulSoup(article_content, 'html.parser') 95 | original_texts = soup.getText().split('\n') # 将页面所有的文本内容提取,并转为列表形式 96 | article_texts = list(filter(lambda x: bool(x.strip()), original_texts)) # filter() 函数可以根据指定的函数对可迭代对象进行过滤 97 | 98 | # 创建 or 打开表格,检查文件是否存在,判断不存在时创建表格文件 99 | article_contents_path = official_path + '/' + '文章内容(article_contents).xlsx' # 文章内容文件路径 100 | if not os.path.exists(article_contents_path): pd.DataFrame().to_excel(article_contents_path, index=False) 101 | frame_df = pd.read_excel(article_contents_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 102 | 103 | # 将新数据转换为 DataFrame 并添加到现有 DataFrame 的末尾 104 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 105 | columns = ['本地存储时间', '文章发布时间', '文章名称', '文章链接', '文章文本内容'] # 列名 106 | new_data_df = pd.DataFrame([[local_time, createTime, article_title, str(article_link), str(article_texts)]], 107 | columns=columns) 108 | df = pd.concat([frame_df, new_data_df], ignore_index=True) 109 | 110 | # 将更新后的数据写入 Excel 文件 111 | df.to_excel(article_contents_path, index=False) 112 | print(local_time + ' 已保存文章>>>> ' + article_title) 113 | print(local_time + ' 内容存储路径>>>> ' + article_contents_path) 114 | 115 | def get_one_article(self, url, img_save_flag=True, content_save_flag=True): 116 | """ 117 | 输入:微信文章链接(永久链接或短链接),是否保存图片(默认保存),是否保存文章内容到文件(默认保存) 118 | 输出:无(内容保存目录在终端显示) 119 | """ 120 | res = requests.get(url, headers=self.headers, cookies=self.cookies, verify=False) # 发起请求 121 | # 验证请求 122 | if 'var createTime = ' in res.text: # 正常获取到文章内容 123 | print('正常获取到文章内容,开始保存操作') 124 | try: 125 | self.save_one_article(res.text, img_save_flag, content_save_flag) # 开始保存单篇文章 126 | return {'content_flag': 1, 'content': res.text} # 用来获取公众号主页链接 127 | except: 128 | article_title = re.search(r"var title = '(.*?)'.*", res.text) # 文章标题 129 | if article_title: article_title = article_title.group(1) 130 | print('检测到抓取出错,文章名>>>> ' + article_title) 131 | print('检测到抓取出错,文章链接>>>> ' + url) 132 | return {'content_flag': 0} 133 | elif '>当前环境异常,完成验证后即可继续访问。<' in res.text: 134 | print('当前环境异常,请检查链接后访问!!!') # 代码访问遇到人机验证,需进行验证操作 135 | return {'content_flag': 0} 136 | elif '操作频繁,请稍后再试。' in res.text: 137 | print('操作频繁了,等会再弄或换ip弄!!!') # 遇到次数较少,如有遇到请前往GitHub留言 138 | return {'content_flag': 0} 139 | else: 140 | print('出现其他问题,请查找原因后再试!!!!') # 出现错误信息,如有遇到请前往GitHub留言 141 | return {'content_flag': 0} 142 | 143 | def get_list_article(self, name_link, img_save_flag=True, content_save_flag=True): 144 | """ 输入:公众号名称或公众号的一篇文章,是否保存图片(默认保存),是否保存文章内容到文件(默认保存) 145 | 输出:无(内容保存目录在终端显示) 146 | 功能:保存文章列表中所有内容 147 | """ 148 | if 'http' in name_link: 149 | print('检测到输入为链接,开始获取公众号名称') 150 | content = self.get_one_article(name_link, False, False) 151 | if content['content_flag'] == 1: 152 | nickname = re.search(r'var nickname.*"(.*?)".*', content['content']).group(1) # 公众号名称 153 | else: 154 | print('未获取到公众号名称') 155 | return None 156 | else: 157 | print('检测到输入为公众号名称') 158 | nickname = name_link 159 | 160 | official_path = self.root_path + self.official_names_head + nickname # 公众号存储根路径 161 | # article_contents_path = official_path + '/' + '文章列表(article_list)_原始链接.xlsx' # 文章内容文件路径 162 | article_list_path = official_path + '/' + '文章列表(article_list)_直连链接.xlsx' # 文章列表文件路径 163 | if not os.path.exists(article_list_path): # 如果文件不存在 164 | print('文件不存在,请检查目录文件>>>> 文章列表(article_list)_直连链接.xlsx') 165 | else: 166 | frame_df = pd.read_excel(article_list_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 167 | # 开始下载文章内容 168 | for index, row in frame_df.iterrows(): 169 | roll_url = row.iloc[4] # 获取直连链接 170 | self.get_one_article(roll_url, img_save_flag, content_save_flag) 171 | 172 | # def verify_user(self, url, content): 173 | # """ 174 | # 输入:url=请求路径,content=网页内容,如:res.text,遇到此情况时使用: >当前环境异常,完成验证后即可继续访问。< 175 | # 输出:验证标志(1为有效),网页内容,cookie值 176 | # {'verify_flag': 1, 'content': res.text, 'poc_sid': poc_sid} 177 | # poc_sid == deviceID 178 | # """ 179 | # print('开始验证,正在获取参数poc_sid') 180 | # poc_token = re.search(r'poc_token.*"(.*?)"', content).group(1) 181 | # poc_sid = re.search(r'poc_sid.*"(.*?)"', content).group(1) # poc_sid为cookie参数 182 | # cap_appid = re.search(r'cap_appid.*"(.*?)"', content).group(1) 183 | # cap_sid = re.search(r'cap_sid.*"(.*?)"', content).group(1) 184 | # target_url = re.search(r'target_url.*"(.*?)"', content).group(1) 185 | # 186 | # try: 187 | # '''验证请求第一步''' 188 | # verify1_url = ('https://t.captcha.qq.com/cap_union_prehandle?' + 'protocol=https&accver=1&showtype=popup&' 189 | # 'ua=TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzEyNy4wLjAuMCBTYWZhcmkvNTM3LjM2IEVkZy8xMjcuMC4wLjA%3D&' 190 | # 'noheader=0&fb=1&aged=0&enableAged=0&enableDarkMode=1&grayscale=1&dyeid=0&clientype=2' 191 | # '&aid=' + cap_appid + '&deviceID=' + poc_sid + '&sid=' + cap_sid + 192 | # '&cap_cd=&uid=&lang=zh-cn&elder_captcha=0&js=%2Ftcaptcha-frame.8d77d8b0.js&login_appid=' 193 | # '&entry_url=https%3A%2F%2Fmp.weixin.qq.com%2Fmp%2Fwappoc_appmsgcaptcha&wb=1&version=1.1.0' 194 | # '&subsid=1&callback=_aq_873604&sess=') 195 | # header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0'} 196 | # verify1 = requests.get(verify1_url, headers=header, verify=False) 197 | # # sess,pow_answer 为第三步中的请求数据 198 | # sess = re.search('sess":"(.*?)"', verify1.text).group(1) 199 | # pow_answer = re.search('prefix":"(.*?)"', verify1.text).group(1) 200 | # 201 | # '''验证请求第二步''' 202 | # verify2_url = 'https://t.captcha.qq.com' + re.search('tdc_path":"(.*?)"', verify1.text).group(1) 203 | # verify2 = requests.get(verify2_url, headers=header, verify=False) 204 | # # eks 为第三步中的请求数据 205 | # eks = re.search(r"='(.*?)'", verify2.text).group(1) 206 | # 207 | # '''验证请求第三步''' 208 | # verify3_url = 'https://t.captcha.qq.com/cap_union_new_verify' 209 | # verify3_data = { 210 | # 'collect': 'F97A58z6EKA4CNUjzdxrYiPXOGxCX1E4UPbmPhuuy6vojKPeA0EUN5DJWtjE3y0eow298aaKR+wKb7f8wsB6K1uaS93BwGTk8a18UNChBgwMYPRdHERtNoHs66mCG3FRhfxgEi758hvugEzzsKyNStp8ChZa9NqJ0OEBsVqsaTAZoVzkIZ8KqUgoMUW9EhoXesF5tqB9arGi+ZkBPrw5w0HzVR8yx1ehQhjixIw5rjCXg98Z2Fq8P4knkq9epFQEgB6vpR7K8gZ0VhmRCLXNTM4FsHnMdHWBX7orOllWdusPAlCMnsXMj7ucO9aDyP1e2fYsJYwK9zeSi8zvQ4F/XP6a9NvOYY4dZR7HI2UaJwUG0xxPU14zymkk8CWHWG5i1kKYGUz6X/yISfEczLkCMHECgJDtMOJzb9WhkuyfD7PyvpL1rU1lgWApFJp3c46RvCTftmfhfu2IJMTZ5LwWtxJIX8zsUj42pWiWM7iiqSzoH9gBgLGyJSWKUXm4f4jIeMj4V8hECgrYT5E9Oz1zl3Yib74HV2R8NjM6e9VjI7fu3/GKVdkQP0CgnSbYJzvJpDsECdY1CSgwEtI2AaC8x2eECThJ2j/3X9pb4ypH6N6ZSDWD5I67rOUeHLi8L0NN1ISm/HiGD8mWDOGLyyFsEGuGGzuMqy+Fxehtr2uvyxRWtWadGhG34osn1aNKcJcMK4iSERJeZGBbpTQNaA626rxzjjxBEbuNyRXvSHHbB33WzGT/74wrkaTRpcpwo6IGd8Rw93kThxuEpb8SmFVDAcIexRBn/+AWPnpcbM1aS82k0aXKcKOiBqTRpcpwo6IG0wdvP4uHCcyi67Tdt1B0yTKhZOtZ+z2xwh7FEGf/4BYAswz/QxMnADq0ZSBjHYmU9mPAx29tLUZQG3YboJK5sX+m4Ga9XlnqGW7hZEE83B/5powR4JuSyIFd38tOJdxgJBM9r8WKWUcRsXosFi1NHy7U7yuIT+bQ4HsRrnHlh73wJO3rsC5ShXfIRH6l4Q/Zf5HY0ENCIULn//Cv2azN4xGZsuYK4mxG2jtSpNboVeiJruE9wXg798jpf7CJKPV+v/ffl9+s5AbTJ69l3LT3td/cgtlkCpKsxrRZbD7ZI53YESSMtjzw76PGoKGD+MFQRkucxPcXQBEBOtd9zxxMIIZiXFWsG62+HLeQVL86apMlSFzJ8zNsU5xeilajsVkeqEWZAmdfoskf5iXmDpEoxSabuF0xUPRdpNGlynCjogak0aXKcKOiBqTRpcpwo6IGJ5hRMBQehUwRZc+Z+lhesnYtjIlRt+75qu5cfqEIgUYtg2+DknP0YDRuxdVoPCN1', 211 | # 'tlg': 1312, 212 | # 'eks': eks, 213 | # 'sess': sess, 214 | # 'ans': '[{"elem_id":0,"type":"DynAnswerType_TIME","data":""}]', 215 | # 'deviceID': poc_sid, 216 | # 'pow_answer': pow_answer + '#104', 217 | # 'pow_calc_time': 1, 218 | # } 219 | # header = { 220 | # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0', 221 | # 'Accept': 'application / json, text / javascript, * / *; q = 0.01', 222 | # 'Accept - Encoding': 'gzip, deflate, br, zstd', 223 | # 'Accept-Language': 'zh-CN,zh;q=0.9', 224 | # } 225 | # verify3 = requests.post(verify3_url, headers=header, data=verify3_data, verify=False) 226 | # # ticket,randstr为第四步请求参数 227 | # ticket = json.loads(verify3.text)['ticket'] 228 | # randstr = json.loads(verify3.text)["randstr"] 229 | # print(ticket) 230 | # print(randstr) 231 | # print(verify3.text) 232 | # 233 | # '''验证请求第四步''' 234 | # verify4_url = 'https://mp.weixin.qq.com/mp/wappoc_appmsgcaptcha?action=Check&x5=0&f=json' 235 | # verify4_data = { 236 | # 'target_url': target_url, 237 | # 'poc_token': poc_token, 238 | # 'appid': cap_appid, 239 | # 'ticket': ticket, 240 | # 'randstr': randstr, 241 | # } 242 | # self.cookies['poc_sid'] = poc_sid # 重置类属性 cooikes的值 243 | # verify4 = requests.post(verify4_url, headers=header, cookies=self.cookies, data=verify4_data, 244 | # verify=False) 245 | # # print(verify4.text) 246 | # # print('发送成功后,poc_sid就可以正常使用了') 247 | # 248 | # '''验证请求第五步''' 249 | # modify_url = url + '&poc_token=' + poc_token 250 | # res = requests.get(modify_url, headers=self.headers, cookies=self.cookies, verify=False) # 发起请求 251 | # print('已完成验证请求,后续请求若仍存在异常,请检查!') 252 | # # print(res.text) 253 | # return {'verify_flag': 1, 'content': res.text} 254 | # except: 255 | # print('验证失败,请检查后再进行尝试') 256 | # return {'verify_flag': 0} 257 | 258 | 259 | class ArticleDetail(AccessPosts): 260 | def __init__(self): 261 | super().__init__() 262 | self.biz = None 263 | self.uin = None 264 | self.key = None 265 | self.pass_ticket = None 266 | self.text = 'website' # 预留位 267 | 268 | def get_article_link(self, url): 269 | """ 270 | 输入:公众号下任意一篇已发布的文章 短链接!! 271 | 功能:通过公众号内的文章获取到公众号的biz值,拼接出公众号主页链接 272 | """ 273 | content = super().get_one_article(url, False, False) # 获取网页文本内容 274 | if content['content_flag'] == 1: 275 | print('正在生成微信公众号主页链接……\n') 276 | self.biz = re.search('var biz = "(.*?);', content['content']).group(1).replace('" || "', '').replace('"', 277 | '') 278 | names = re.search(r'var nickname.*"(.*?)".*', content['content']).group(1) # 公众号名称 279 | main_url = ('https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=' + self.biz + 280 | '&scene=124#wechat_redirect') 281 | print(names + '公众号主页链接为:' + main_url) 282 | print('将此链接 ( ̄︶ ̄)↗  粘贴发送到 ‘微信PC端-文件传输助手’') 283 | else: 284 | print('未获取到文章内容,请检查链接是否正确') 285 | 286 | def access_origin_list(self, access_token, pages=None, save_list=True, transform_list=True): 287 | """ 输入:access_token(从fiddler获取的链接),保存页数(默认全部),是否保存到文件(默认保存),是否转换链接(默认转换) 288 | 输出:无(获取的文章列表将保存在本地目录下) 289 | 功能: 290 | ① 请求得到文章信息(文章标题、文章链接、文章创建日期) 291 | ②以excel文件形式存储,文件名设置为对应公众号的名称 292 | """ 293 | # 检验access_token是否合法 294 | self.biz = str(re.search('biz=(.*?)&', access_token).group(1)) 295 | self.uin = str(re.search('uin=(.*?)&', access_token).group(1)) 296 | self.key = str(re.search('key=(.*?)&', access_token).group(1)) 297 | self.pass_ticket = str(re.search('pass_ticket=(.*?)&', access_token).group(1)) 298 | if self.biz and self.uin and self.pass_ticket and self.key: 299 | print('参数齐全,开始获取文章信息,默认状态获取全部文章') 300 | else: 301 | print('\n※※※ 参数有误,请重新输入') 302 | return None 303 | 304 | '''获取文章列表,格式化内容为一个二维数组:all_list''' 305 | all_list = None # 用来存储获取的文章列表 306 | # 遍历公众号下所有文章链接 307 | if not pages: 308 | page = 0 309 | passage_list = [] 310 | print('开始获取公众号下所有的文章列表') 311 | while True: 312 | p_data = self.get_next_list(page) 313 | if p_data['m_flag'] == 1: 314 | for i in p_data['passage_list']: 315 | passage_list.append(i) 316 | else: 317 | print('请求结束,文章列表获取完毕!') 318 | break 319 | page = page + 1 320 | delay_time = random.uniform(1, 5) # 延迟时间 321 | print('为预防被封禁,开始延时操作,延时时间:' + str(delay_time) + '秒') 322 | time.sleep(delay_time) # 模拟手动操作,随机延时delay_time秒,预防被封禁 323 | all_list = passage_list 324 | # 获取公众号下指定页数的文章链接 325 | else: 326 | print('输入值为:' + str(pages) + ',开始获取前' + str(pages) + '页文章') 327 | passage_list = [] 328 | for pages in range(pages): 329 | p_data = self.get_next_list(pages) 330 | if p_data['m_flag'] == 1: 331 | for i in p_data['passage_list']: 332 | passage_list.append(i) 333 | else: 334 | print('请求结束,文章列表获取完毕!') 335 | break 336 | delay_time = random.uniform(1, 5) # 延迟时间 337 | print('为预防被封禁,开始延时操作,延时时间:' + str(delay_time) + '秒') 338 | time.sleep(delay_time) # 模拟手动操作,随机延时1-5秒,预防被封禁 339 | all_list = passage_list 340 | print('********************共获取到 ' + str(len(all_list)) + ' 篇文章,开始保存文章,若为0篇请检查错误!!!') 341 | if not all_list: print('获取到文章列表为空,请注意检查!!!!') 342 | if not all_list: return None # 如果获取为空 343 | 344 | '''保存文章列表到文件,保存目录''' 345 | nickname = '' # 临时放置公众号名称 346 | if save_list: 347 | print('****************************************开始保存文章,若以上为 获取到0篇 请检查错误!!!') 348 | # 首先获取公众号名称 349 | # new_url = all_list[0][2] + '&pass_ticket=' + self.pass_ticket + '&uin=' + self.uin + '&key=' + self.key 350 | new_url = all_list[0][3].replace('amp;', '') 351 | res = requests.get(new_url, headers=self.headers, verify=False) # 使用微信客户端的token跳过验证 352 | nickname = re.search(r'var nickname.*"(.*?)".*', res.text).group(1) # 公众号名称 353 | 354 | # 创建公众号保存目录 355 | official_path = self.root_path + self.official_names_head + nickname # 各种公众号存储根路径 356 | os.makedirs(official_path, exist_ok=True) 357 | 358 | # 创建 or 打开表格,检查文件是否存在,判断不存在时创建表格文件 359 | article_contents_path = official_path + '/' + '文章列表(article_list)_原始链接.xlsx' # 文章内容文件路径 360 | if not os.path.exists(article_contents_path): pd.DataFrame().to_excel(article_contents_path, index=False) 361 | frame_df = pd.read_excel(article_contents_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 362 | 363 | # 将新数据转换为 DataFrame 并添加到现有 DataFrame 的末尾 364 | columns = ['本地保存时间', '文章发布时间', '文章名称', '文章原始链接(直接访问会提示验证)'] # 列名 365 | new_data_df = pd.DataFrame(all_list, columns=columns) 366 | df = pd.concat([frame_df, new_data_df], ignore_index=True) 367 | 368 | # 将更新后的数据写入 Excel 文件 369 | df.to_excel(article_contents_path, index=False) 370 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 371 | print(local_time + ' 已获取公众号文章目录>>>> ' + nickname) 372 | print(local_time + ' 存储路径>>>> ' + article_contents_path) 373 | 374 | """转换 文章原始链接 为可直接访问链接""" 375 | if transform_list: 376 | print("开始转换 " + nickname + ' 公众号的文章列表原始链接') 377 | # 检测公众号的存储目录 378 | official_path = self.root_path + self.official_names_head + nickname # 公众号存储根路径 379 | article_contents_path = official_path + '/' + '文章列表(article_list)_原始链接.xlsx' # 文章内容文件路径 380 | article_list_path = official_path + '/' + '文章列表(article_list)_直连链接.xlsx' # 文章列表文件路径 381 | if not os.path.exists(article_contents_path): # 如果文件不存在 382 | print('文件不存在,请检查目录文件>>>> 文章列表(article_list)_原始链接.xlsx') 383 | else: 384 | frame_df = pd.read_excel(article_contents_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 385 | new_links = [] # 转换后的新链接存储 386 | 387 | # 修改短链接 方法1:删除元素“amp;” 388 | for index, row in frame_df.iterrows(): 389 | new_url = row.iloc[3].replace('amp;', '') # 获取原始链接,并对其进行转化 390 | new_links.append(new_url) # 添加转化后的链接到数组中 391 | 392 | # # 修改短链接 方法2:添加pass_ticket、uin、key三个参数实现访问(此为临时链接!!!) 393 | # for index, row in frame_df.iterrows(): 394 | # new_url = row.iloc[2].replace('amp;', '') # 获取第 3 列的值 395 | # # res = requests.get(new_url, verify=False) # 使用微信客户端的token跳过验证 396 | # # print(index) 397 | 398 | # 合并 转换后的链接 到 原数据表,列合并操作 399 | frame_df['可直接访问链接'] = new_links # 把列表作为新列添加到 DataFrame 400 | 401 | # 将更新后的数据写入 Excel 文件 402 | frame_df.to_excel(article_list_path, index=False) 403 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 404 | print(local_time + ' 已转换公众号文章列表>>>> ' + nickname) 405 | print(local_time + ' 存储路径>>>> ' + article_list_path) 406 | return all_list # 返回 407 | 408 | def get_next_list(self, page): 409 | # 从0开始计数,第 0 页相当于默认页数据 410 | pages = int(page) * 10 411 | print('正在获取第 ' + str(page + 1) + ' 页文章列表') 412 | url = ('https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=' + self.biz + '&f=json&offset=' 413 | + str(pages) + '&count=10&is_ok=1&scene=124&uin=' + self.uin + '&key=' + self.key + '&pass_ticket=' 414 | + self.pass_ticket + '&wxtoken=&appmsg_token=&x5=0&f=json') 415 | try: 416 | res = requests.get(url=url, headers=self.headers, timeout=10, verify=False) 417 | except: 418 | print('失败!!!获取第 ' + str(page + 1) + ' 页文章列表失败!!!') 419 | print('请检查错误类型,详情记录在日志中') 420 | exc_type, exc_value, exc_traceback = sys.exc_info() # 获取当前异常的信息 421 | logging.error(f'发生异常: {exc_type.__name__}: {exc_value}', exc_info=True) 422 | res = ArticleDetail() # 保证返回值不会报错 423 | if 'app_msg_ext_info' in res.text: 424 | # 解码json数据 425 | get_page = json.loads(json.loads(res.text)['general_msg_list'])['list'] 426 | ''' get_page[0]为 427 | {'comm_msg_info': {'id': 1000000107, 'type': 49, 'datetime': 1722467332, 'fakeid': '3910318108', 'status': 2, 'content': ''}, 'app_msg_ext_info': {'title': '国务院7月重要政策', 'digest': '', 'content': '', 'fileid': 100007840, 'content_url': 'http://mp.weixin.qq.com/s?__biz=MzkxMDMxODEwOA==&mid=2247491511&idx=1&sn=a36291fdee52a0f53d145edec8058e04&chksm=c0084d6abbcac962a50153c89fe9c19b6f8b1c5e5ac50b05adcb49bdfad8638522ab426c3f4b&scene=27#wechat_redirect', 'source_url': '', 'cover': 'https://mmbiz.qpic.cn/mmbiz_jpg/JRAjbHqmggrlZibDMibLP4ryNqhYXgolJOdQj2P8t2QQFVicickzAo7Gv1SzazwJY6lDylcanx2ic60HDbMvK8OKQpg/0?wx_fmt=jpeg', 'subtype': 9, 'is_multi': 1, 'multi_app_msg_item_list': [{'title': '8月起,这些新规将影响你我生活!', 'digest': '', 'content': '', 'fileid': 0, 'content_url': 'http://mp.weixin.qq.com/s?__biz=MzkxMDMxODEwOA==&mid=2247491511&idx=2&sn=b3f5b6bcf8727c8c90fce7e588e6e7da&chksm=c0eb20c99ca2f90032a6234002ed2cc9c2c000f87cff34f4d8d763878c0bb5275800db876ca7&scene=27#wechat_redirect', 'source_url': '', 'cover': 'https://mmbiz.qpic.cn/mmbiz_jpg/JRAjbHqmggrc08yJMZ6CQ3VL6VzmEIymSUyATlL6o3xaDJJ0D2CtpQg31Vy7jdCaic86zqkgJ9oAFGyia78ZOq7g/0?wx_fmt=jpeg', 'author': '', 'copyright_stat': 100, 'del_flag': 1, 'item_show_type': 0, 'audio_fileid': 0, 'duration': 0, 'play_url': '', 'malicious_title_reason_id': 0, 'malicious_content_type': 0}, {'title': '8月,你好!', 'digest': '', 'content': '', 'fileid': 100007860, 'content_url': 'http://mp.weixin.qq.com/s?__biz=MzkxMDMxODEwOA==&mid=2247491511&idx=3&sn=cd25de57b74b63b0f3b1a9888b9cd94d&chksm=c0c7f30fdd5fc0ea4a2765f5fd29e1faeb0e352e888ee8556521ab23bc9528d68f42deaa9d15&scene=27#wechat_redirect', 'source_url': '', 'cover': 'https://mmbiz.qpic.cn/mmbiz_jpg/JRAjbHqmggrlZibDMibLP4ryNqhYXgolJO9CnECAnMLDPY39Y9iarcFtM1ibrBvhKcGFyl1wicHysvTrYx4GfLybt8g/0?wx_fmt=jpeg', 'author': '', 'copyright_stat': 100, 'del_flag': 1, 'item_show_type': 0, 'audio_fileid': 0, 'duration': 0, 'play_url': '', 'malicious_title_reason_id': 0, 'malicious_content_type': 0}], 'author': '', 'copyright_stat': 100, 'duration': 0, 'del_flag': 1, 'item_show_type': 0, 'audio_fileid': 0, 'play_url': '', 'malicious_title_reason_id': 0, 'malicious_content_type': 0}} 428 | 存储形式为二维数组,[[时间,文章标题,文章链接],[时间,文章标题,文章链接] 429 | ''' 430 | passage_list = [] # 存放一页内的所有文章 431 | for i in get_page: 432 | # 时间戳转换 433 | time_tuple = time.localtime(i['comm_msg_info']['datetime']) 434 | create_time = time.strftime("%Y-%m-%d", time_tuple) 435 | title = i['app_msg_ext_info']['title'] 436 | content_url = i['app_msg_ext_info']['content_url'].replace('#wechat_redirect', '') 437 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 438 | passage_list.append([local_time, create_time, title, content_url]) 439 | if i['app_msg_ext_info']['multi_app_msg_item_list']: 440 | for j in i['app_msg_ext_info']['multi_app_msg_item_list']: 441 | title = j['title'] 442 | content_url = j['content_url'].replace('#wechat_redirect', '') 443 | passage_list.append([local_time, create_time, title, content_url]) 444 | print('该页包含 ' + str(len(passage_list)) + ' 篇文章') 445 | return { 446 | 'm_flag': 1, 447 | 'passage_list': passage_list, 448 | 'length': len(passage_list) 449 | } 450 | elif '"home_page_list":[]' in res.text: 451 | print('\n出现:操作频繁,请稍后再试\n该号已被封禁,请解封后再来!!!\n') 452 | return {'m_flag': 0} 453 | else: 454 | print('请求结束!未获取到第 ' + str(page + 1) + ' 页文章列表') 455 | return {'m_flag': 0} 456 | 457 | def get_detail_list(self, access_token): 458 | """ 输入:access_token(从fiddler获取的链接) 459 | 输出:无(获取的文章列表将保存在本地目录下) 460 | 功能: 461 | ① 保存微信公众号文章的全部内容 462 | ②以excel文件形式存储,文件名设置为对应公众号的名称 463 | """ 464 | # 获取该公众号名称,取公众号第一页文章列表,取第一篇文章链接 465 | first_link = self.access_origin_list(access_token, 1, False, False) 466 | if first_link: # 获取到内容 467 | new_url = first_link[0][3].replace('amp;', '') 468 | res = requests.get(new_url, headers=self.headers, verify=False) 469 | nickname = re.search(r'var nickname.*"(.*?)".*', res.text).group(1) # 公众号名称 470 | else: 471 | print('获取失败') 472 | return None 473 | 474 | # 遍历文章列表,获取各文章的详情内容 475 | print('开始获取公众号>>>> ' + nickname) 476 | print('开始检测公众号的文章列表是否存在>>>> ') 477 | official_path = self.root_path + self.official_names_head + nickname # 公众号存储根路径 478 | # article_contents_path = official_path + '/' + '文章列表(article_list)_原始链接.xlsx' # 文章内容文件路径 479 | article_list_path = official_path + '/' + '文章列表(article_list)_直连链接.xlsx' # 文章列表文件路径 480 | if not os.path.exists(article_list_path): # 如果文件不存在 481 | print('文件不存在,请检查目录文件>>>> ' + article_list_path) 482 | else: 483 | frame_df = pd.read_excel(article_list_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 484 | error_links = [] 485 | for index, row in frame_df.iterrows(): 486 | single_article_url = row.iloc[4] # 获取单文章链接 487 | try: 488 | new_messages = self.get_detail_new(single_article_url) # 获取单文章详情信息 489 | # 存储获取到的文章详情信息 490 | # 创建 or 打开表格,检查文件是否存在,判断不存在时创建表格文件 491 | article_detail_path = official_path + '/' + '文章详情(article_detiles).xlsx' # 文章详情文件路径 492 | if not os.path.exists(article_detail_path): pd.DataFrame().to_excel(article_detail_path, index=False) 493 | frame_df = pd.read_excel(article_detail_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 494 | 495 | # 将新数据转换为 DataFrame 并添加到现有 DataFrame 的末尾 496 | columns = ['本地创建时间', '文章发布时间', '文章标题', '文章链接', '文章文本内容', 497 | '阅读量', '点赞数', '转发数', '在看数', 498 | '评论', '评论点赞'] # 列名 499 | new_data_df = pd.DataFrame([new_messages], columns=columns) 500 | df = pd.concat([frame_df, new_data_df], ignore_index=True) 501 | 502 | # 将更新后的数据写入 Excel 文件 503 | df.to_excel(article_detail_path, index=False) 504 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 505 | print(local_time + ' 已保存文章详情>>>> ' + new_messages[2]) 506 | print(local_time + ' 内容存储路径>>>> ' + article_detail_path) 507 | 508 | delay_time = random.uniform(3, 6) # 延迟时间 509 | print('为预防被封禁,开始延时操作,延时时间:' + str(delay_time) + '秒') 510 | time.sleep(delay_time) # 模拟手动操作,随机延时delay_time秒,预防被封禁 511 | except: 512 | error_links.append(row.iloc[:]) 513 | print('有问题的链接,文章标题为>>>> ' + row.iloc[2]) 514 | article_error_path = official_path + '/' + '问题链接(error_links).xlsx' # 文章详情文件路径 515 | if not os.path.exists(article_error_path): pd.DataFrame().to_excel(article_error_path, index=False) 516 | columns = ['本地保存时间', '文章发布时间', '文章名称', '文章原始链接(直接访问会提示验证)'] # 列名 517 | error_data_df = pd.DataFrame(error_links, columns=columns) 518 | error_forme_df = pd.read_excel(article_error_path) # 读取表格内容,默认打开DataFrame对象包含第一个工作表中的数据 519 | dfs = pd.concat([error_forme_df, error_data_df], ignore_index=True) 520 | # 将更新后的数据写入 Excel 文件 521 | dfs.to_excel(article_error_path, index=False) 522 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 523 | print(local_time + ' 已保存问题文章链接>>>> ' + row.iloc[2]) 524 | print(local_time + ' 内容存储路径>>>> ' + article_error_path) 525 | 526 | def get_detail_new(self, link): 527 | """ 输入:文章链接(无需验证,可直接访问) 528 | 输出:单文章详情信息 529 | """ 530 | '''获取部分请求参数''' 531 | contents = self.get_one_article(link, False, False) 532 | # nickname = re.search(r'var nickname.*"(.*?)".*', contents['content']).group(1) # 公众号名称 533 | # article_link = re.search(r'var msg_link = .*"(.*?)".*', contents['content']).group(1) # 文章短链接 534 | createTime = re.search(r"var createTime = '(.*?)'.*", contents['content']).group(1) # 文章发布时间 detail_time 535 | # author = re.search(r'var author = "(.*?)".*', contents['content']).group(1) # 文章作者 536 | article_title = re.search(r"var title = '(.*?)'.*", contents['content']).group(1) # 文章标题 537 | # 将文字内容转换为列表形式存储 538 | soup = BeautifulSoup(contents['content'], 'html.parser') 539 | original_texts = soup.getText().split('\n') # 将页面所有的文本内容提取,并转为列表形式 540 | article_texts = list(filter(lambda x: bool(x.strip()), original_texts)) # 列表形式的文章内容 texts 541 | r = '' 542 | for rand in range(0, 16): 543 | r += str(random.randint(0, 9)) 544 | r = '0.' + r 545 | appmsg_type = "9" 546 | mid = str(link).split('mid=')[1].split('&')[0] 547 | sn = str(link).split('sn=')[1].split('&')[0] 548 | idx = str(link).split('idx=')[1].split('&')[0] 549 | ct = '' 550 | comment_id = re.search("var comment_id = '(.*?)'.*", contents['content']) 551 | if comment_id: 552 | comment_id = re.search("var comment_id = '(.*?)'.*", contents['content']).group(1) 553 | else: 554 | print('没有匹配到comment_id,文章标题为:' + article_title) 555 | comment_id = '' 556 | 557 | # version = contents['content'].split('_g.clientversion = "')[1].split('"')[0] 558 | if 'var req_id = ' in contents['content']: 559 | req_id = contents['content'].split('var req_id = ')[1].split(';')[0].replace("'", "").replace('"', '') 560 | else: 561 | print('没有匹配到req_id,文章标题为:' + article_title) 562 | req_id = '' 563 | # print(r, appmsg_type, mid, sn, idx, ct, comment_id, version, req_id, createTime, article_texts) 564 | 565 | '''获取文章详情信息''' 566 | detail_url = ('https://mp.weixin.qq.com/mp/getappmsgext?f=json&mock=&fasttmplajax=1&f=json' + '&uin=' + self.uin 567 | + '&key=' + self.key + '&pass_ticket=' + self.pass_ticket + '&__biz=' + self.biz) 568 | data = { 569 | 'r': r, 570 | 'sn': sn, 571 | 'mid': mid, 572 | 'idx': idx, 573 | 'req_id': req_id, 574 | 'title': article_title, 575 | 'comment_id': comment_id, 576 | 'appmsg_type': appmsg_type, 577 | '__biz': self.biz, 578 | 'pass_ticket': self.pass_ticket, 579 | 'abtest_cookie': '', 'devicetype': 'Windows 7 x64', 'version': '63090b13', 'is_need_ticket': '0', 580 | 'is_need_ad': '0', 'is_need_reward': '0', 'both_ad': '0', 'reward_uin_count': '0', 'send_time': '', 581 | 'msg_daily_idx': '1', 'is_original': '0', 'is_only_read': '1', 'scene': '38', 'is_temp_url': '0', 582 | 'item_show_type': '0', 'tmp_version': '1', 'more_read_type': '0', 'appmsg_like_type': '2', 583 | 'related_video_sn': '', 'related_video_num': '5', 'vid': '', 'is_pay_subscribe': '0', 584 | 'pay_subscribe_uin_count': '0', 'has_red_packet_cover': '0', 'album_id': '1296223588617486300', 585 | 'album_video_num': '5', 'cur_album_id': 'undefined', 'is_public_related_video': 'NaN', 586 | 'encode_info_by_base64': 'undefined', 'exptype': '', 'export_key_extinfo': '', 'business_type': '0', 587 | } 588 | res = requests.post(url=detail_url, data=data, headers=self.headers, cookies=self.cookies, verify=False) 589 | # print(res.text) 590 | read_num = jsonpath.jsonpath(json.loads(res.text), "$.." + "read_num") 591 | like_num = jsonpath.jsonpath(json.loads(res.text), "$.." + "old_like_num") 592 | share_num = jsonpath.jsonpath(json.loads(res.text), "$.." + "share_num") 593 | show_read = jsonpath.jsonpath(json.loads(res.text), "$.." + "show_read") 594 | 595 | # 获取评论以及评论点赞数 596 | comment_url = ('https://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz=' + self.biz + 597 | '&appmsgid=2247491372&idx=1&comment_id=' + comment_id + '&offset=0&limit=100&uin=' 598 | + self.uin + '&key=' + self.key + '&pass_ticket=' + self.pass_ticket 599 | + '&wxtoken=&devicetype=Windows+10&clientversion=62060833&appmsg_token=') 600 | response = requests.get(comment_url, headers=self.headers, cookies=self.cookies, verify=False) 601 | json_content = json.loads(response.text) 602 | comments = jsonpath.jsonpath(json_content, '$..content') # 评论 603 | comments_star_nums = jsonpath.jsonpath(json_content, '$..like_num') # 评论点赞数 604 | 605 | local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 本地时间 606 | if read_num == [] or read_num == '': 607 | return '', '', '', '' 608 | else: 609 | return (local_time, createTime, article_title, link, article_texts, # 本地创建时间,文章发布时间,标题,链接,文本, 610 | read_num[0], like_num[0], share_num[0], show_read[0], # 阅读量,点赞数,转发数,在看数, 611 | comments, comments_star_nums) # 评论,评论点赞 612 | 613 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 58 | Public License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 63 | ("Public License"). To the extent this Public License may be 64 | interpreted as a contract, You are granted the Licensed Rights in 65 | consideration of Your acceptance of these terms and conditions, and the 66 | Licensor grants You such rights in consideration of benefits the 67 | Licensor receives from making the Licensed Material available under 68 | these terms and conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-NC-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution, NonCommercial, and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. NonCommercial means not primarily intended for or directed towards 126 | commercial advantage or monetary compensation. For purposes of 127 | this Public License, the exchange of the Licensed Material for 128 | other material subject to Copyright and Similar Rights by digital 129 | file-sharing or similar means is NonCommercial provided there is 130 | no payment of monetary compensation in connection with the 131 | exchange. 132 | 133 | l. Share means to provide material to the public by any means or 134 | process that requires permission under the Licensed Rights, such 135 | as reproduction, public display, public performance, distribution, 136 | dissemination, communication, or importation, and to make material 137 | available to the public including in ways that members of the 138 | public may access the material from a place and at a time 139 | individually chosen by them. 140 | 141 | m. Sui Generis Database Rights means rights other than copyright 142 | resulting from Directive 96/9/EC of the European Parliament and of 143 | the Council of 11 March 1996 on the legal protection of databases, 144 | as amended and/or succeeded, as well as other essentially 145 | equivalent rights anywhere in the world. 146 | 147 | n. You means the individual or entity exercising the Licensed Rights 148 | under this Public License. Your has a corresponding meaning. 149 | 150 | 151 | Section 2 -- Scope. 152 | 153 | a. License grant. 154 | 155 | 1. Subject to the terms and conditions of this Public License, 156 | the Licensor hereby grants You a worldwide, royalty-free, 157 | non-sublicensable, non-exclusive, irrevocable license to 158 | exercise the Licensed Rights in the Licensed Material to: 159 | 160 | a. reproduce and Share the Licensed Material, in whole or 161 | in part, for NonCommercial purposes only; and 162 | 163 | b. produce, reproduce, and Share Adapted Material for 164 | NonCommercial purposes only. 165 | 166 | 2. Exceptions and Limitations. For the avoidance of doubt, where 167 | Exceptions and Limitations apply to Your use, this Public 168 | License does not apply, and You do not need to comply with 169 | its terms and conditions. 170 | 171 | 3. Term. The term of this Public License is specified in Section 172 | 6(a). 173 | 174 | 4. Media and formats; technical modifications allowed. The 175 | Licensor authorizes You to exercise the Licensed Rights in 176 | all media and formats whether now known or hereafter created, 177 | and to make technical modifications necessary to do so. The 178 | Licensor waives and/or agrees not to assert any right or 179 | authority to forbid You from making technical modifications 180 | necessary to exercise the Licensed Rights, including 181 | technical modifications necessary to circumvent Effective 182 | Technological Measures. For purposes of this Public License, 183 | simply making modifications authorized by this Section 2(a) 184 | (4) never produces Adapted Material. 185 | 186 | 5. Downstream recipients. 187 | 188 | a. Offer from the Licensor -- Licensed Material. Every 189 | recipient of the Licensed Material automatically 190 | receives an offer from the Licensor to exercise the 191 | Licensed Rights under the terms and conditions of this 192 | Public License. 193 | 194 | b. Additional offer from the Licensor -- Adapted Material. 195 | Every recipient of Adapted Material from You 196 | automatically receives an offer from the Licensor to 197 | exercise the Licensed Rights in the Adapted Material 198 | under the conditions of the Adapter's License You apply. 199 | 200 | c. No downstream restrictions. You may not offer or impose 201 | any additional or different terms or conditions on, or 202 | apply any Effective Technological Measures to, the 203 | Licensed Material if doing so restricts exercise of the 204 | Licensed Rights by any recipient of the Licensed 205 | Material. 206 | 207 | 6. No endorsement. Nothing in this Public License constitutes or 208 | may be construed as permission to assert or imply that You 209 | are, or that Your use of the Licensed Material is, connected 210 | with, or sponsored, endorsed, or granted official status by, 211 | the Licensor or others designated to receive attribution as 212 | provided in Section 3(a)(1)(A)(i). 213 | 214 | b. Other rights. 215 | 216 | 1. Moral rights, such as the right of integrity, are not 217 | licensed under this Public License, nor are publicity, 218 | privacy, and/or other similar personality rights; however, to 219 | the extent possible, the Licensor waives and/or agrees not to 220 | assert any such rights held by the Licensor to the limited 221 | extent necessary to allow You to exercise the Licensed 222 | Rights, but not otherwise. 223 | 224 | 2. Patent and trademark rights are not licensed under this 225 | Public License. 226 | 227 | 3. To the extent possible, the Licensor waives any right to 228 | collect royalties from You for the exercise of the Licensed 229 | Rights, whether directly or through a collecting society 230 | under any voluntary or waivable statutory or compulsory 231 | licensing scheme. In all other cases the Licensor expressly 232 | reserves any right to collect such royalties, including when 233 | the Licensed Material is used other than for NonCommercial 234 | purposes. 235 | 236 | 237 | Section 3 -- License Conditions. 238 | 239 | Your exercise of the Licensed Rights is expressly made subject to the 240 | following conditions. 241 | 242 | a. Attribution. 243 | 244 | 1. If You Share the Licensed Material (including in modified 245 | form), You must: 246 | 247 | a. retain the following if it is supplied by the Licensor 248 | with the Licensed Material: 249 | 250 | i. identification of the creator(s) of the Licensed 251 | Material and any others designated to receive 252 | attribution, in any reasonable manner requested by 253 | the Licensor (including by pseudonym if 254 | designated); 255 | 256 | ii. a copyright notice; 257 | 258 | iii. a notice that refers to this Public License; 259 | 260 | iv. a notice that refers to the disclaimer of 261 | warranties; 262 | 263 | v. a URI or hyperlink to the Licensed Material to the 264 | extent reasonably practicable; 265 | 266 | b. indicate if You modified the Licensed Material and 267 | retain an indication of any previous modifications; and 268 | 269 | c. indicate the Licensed Material is licensed under this 270 | Public License, and include the text of, or the URI or 271 | hyperlink to, this Public License. 272 | 273 | 2. You may satisfy the conditions in Section 3(a)(1) in any 274 | reasonable manner based on the medium, means, and context in 275 | which You Share the Licensed Material. For example, it may be 276 | reasonable to satisfy the conditions by providing a URI or 277 | hyperlink to a resource that includes the required 278 | information. 279 | 3. If requested by the Licensor, You must remove any of the 280 | information required by Section 3(a)(1)(A) to the extent 281 | reasonably practicable. 282 | 283 | b. ShareAlike. 284 | 285 | In addition to the conditions in Section 3(a), if You Share 286 | Adapted Material You produce, the following conditions also apply. 287 | 288 | 1. The Adapter's License You apply must be a Creative Commons 289 | license with the same License Elements, this version or 290 | later, or a BY-NC-SA Compatible License. 291 | 292 | 2. You must include the text of, or the URI or hyperlink to, the 293 | Adapter's License You apply. You may satisfy this condition 294 | in any reasonable manner based on the medium, means, and 295 | context in which You Share Adapted Material. 296 | 297 | 3. You may not offer or impose any additional or different terms 298 | or conditions on, or apply any Effective Technological 299 | Measures to, Adapted Material that restrict exercise of the 300 | rights granted under the Adapter's License You apply. 301 | 302 | 303 | Section 4 -- Sui Generis Database Rights. 304 | 305 | Where the Licensed Rights include Sui Generis Database Rights that 306 | apply to Your use of the Licensed Material: 307 | 308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 309 | to extract, reuse, reproduce, and Share all or a substantial 310 | portion of the contents of the database for NonCommercial purposes 311 | only; 312 | 313 | b. if You include all or a substantial portion of the database 314 | contents in a database in which You have Sui Generis Database 315 | Rights, then the database in which You have Sui Generis Database 316 | Rights (but not its individual contents) is Adapted Material, 317 | including for purposes of Section 3(b); and 318 | 319 | c. You must comply with the conditions in Section 3(a) if You Share 320 | all or a substantial portion of the contents of the database. 321 | 322 | For the avoidance of doubt, this Section 4 supplements and does not 323 | replace Your obligations under this Public License where the Licensed 324 | Rights include other Copyright and Similar Rights. 325 | 326 | 327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 328 | 329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 339 | 340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 349 | 350 | c. The disclaimer of warranties and limitation of liability provided 351 | above shall be interpreted in a manner that, to the extent 352 | possible, most closely approximates an absolute disclaimer and 353 | waiver of all liability. 354 | 355 | 356 | Section 6 -- Term and Termination. 357 | 358 | a. This Public License applies for the term of the Copyright and 359 | Similar Rights licensed here. However, if You fail to comply with 360 | this Public License, then Your rights under this Public License 361 | terminate automatically. 362 | 363 | b. Where Your right to use the Licensed Material has terminated under 364 | Section 6(a), it reinstates: 365 | 366 | 1. automatically as of the date the violation is cured, provided 367 | it is cured within 30 days of Your discovery of the 368 | violation; or 369 | 370 | 2. upon express reinstatement by the Licensor. 371 | 372 | For the avoidance of doubt, this Section 6(b) does not affect any 373 | right the Licensor may have to seek remedies for Your violations 374 | of this Public License. 375 | 376 | c. For the avoidance of doubt, the Licensor may also offer the 377 | Licensed Material under separate terms or conditions or stop 378 | distributing the Licensed Material at any time; however, doing so 379 | will not terminate this Public License. 380 | 381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 382 | License. 383 | 384 | 385 | Section 7 -- Other Terms and Conditions. 386 | 387 | a. The Licensor shall not be bound by any additional or different 388 | terms or conditions communicated by You unless expressly agreed. 389 | 390 | b. Any arrangements, understandings, or agreements regarding the 391 | Licensed Material not stated herein are separate from and 392 | independent of the terms and conditions of this Public License. 393 | 394 | 395 | Section 8 -- Interpretation. 396 | 397 | a. For the avoidance of doubt, this Public License does not, and 398 | shall not be interpreted to, reduce, limit, restrict, or impose 399 | conditions on any use of the Licensed Material that could lawfully 400 | be made without permission under this Public License. 401 | 402 | b. To the extent possible, if any provision of this Public License is 403 | deemed unenforceable, it shall be automatically reformed to the 404 | minimum extent necessary to make it enforceable. If the provision 405 | cannot be reformed, it shall be severed from this Public License 406 | without affecting the enforceability of the remaining terms and 407 | conditions. 408 | 409 | c. No term or condition of this Public License will be waived and no 410 | failure to comply consented to unless expressly agreed to by the 411 | Licensor. 412 | 413 | d. Nothing in this Public License constitutes or may be interpreted 414 | as a limitation upon, or waiver of, any privileges and immunities 415 | that apply to the Licensor or You, including from the legal 416 | processes of any jurisdiction or authority. 417 | 418 | ======================================================================= 419 | 420 | Creative Commons is not a party to its public 421 | licenses. Notwithstanding, Creative Commons may elect to apply one of 422 | its public licenses to material it publishes and in those instances 423 | will be considered the “Licensor.” The text of the Creative Commons 424 | public licenses is dedicated to the public domain under the CC0 Public 425 | Domain Dedication. Except for the limited purpose of indicating that 426 | material is shared under a Creative Commons public license or as 427 | otherwise permitted by the Creative Commons policies published at 428 | creativecommons.org/policies, Creative Commons does not authorize the 429 | use of the trademark "Creative Commons" or any other trademark or logo 430 | of Creative Commons without its prior written consent including, 431 | without limitation, in connection with any unauthorized modifications 432 | to any of its public licenses or any other arrangements, 433 | understandings, or agreements concerning use of licensed material. For 434 | the avoidance of doubt, this paragraph does not form part of the 435 | public licenses. 436 | 437 | Creative Commons may be contacted at creativecommons.org. 438 | 439 | 440 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 微信公众号/文章 获取(Access_wechat_article) 2 | 3 | 更新时间:2025年7月 4 | 5 | 本项目是基于Python语言的爬虫程序,支持对微信公众号文章内容获取 6 | 7 | 目前支持 Windows / Linux 开箱即用,**建议使用虚拟环境运行项目** 8 | 9 | 如果感兴趣,请 **Fork** 项目后自行研究使用 10 | 11 | 使用过程中如遇到错误,欢迎提交 [issues](https://github.com/yeximm/Access_wechat_article/issues) 来讨论 12 | 13 | **注**:请在 [GitHub](https://github.com/) 平台提交 [issues](https://github.com/yeximm/Access_wechat_article/issues) 14 | 15 | ## 一、主要功能 16 | 17 | 1. 获取**公众号主页链接**,通过微信内置浏览器可直接打开 18 | 2. 获取公众号**已发布**的文章列表(**微信公众号**下的历史文章) 19 | 3. 批量下载公众号文章的**网页文本数据** 20 | 4. 获取微信公众号文章的**所有信息**,如阅读量、点赞数、转发数、评论、评论点赞等信息。 21 | 22 | ## 二、项目所需环境及工具 23 | 24 | 1. 系统环境:Windows 10 ×64 25 | 2. 程序运行环境:python 3.12 26 | 3. 涉及应用:微信**PC版**,当前项目适配的微信版本为3.9.11.25 27 | 4. 使用工具:fiddler 28 | 29 | ## 三、程序使用 30 | 31 | ### 3.1下载 / Download 32 | 33 | - 下载地址:[https://github.com/yeximm/Access_wechat_article/releases](https://github.com/yeximm/Access_wechat_article/releases) 34 | - 👆👆👆以上为本项目发布页地址,选取所需版本下载即可。 35 | 36 | 37 | - 存储库快照:[Github_master](https://github.com/yeximm/Access_wechat_article/archive/refs/heads/master.zip) 38 | - 存储库快照等同于 [Releases](https://github.com/yeximm/Access_wechat_article/releases) 中的 [Source Code (zip)](https://github.com/yeximm/Access_wechat_article/archive/refs/heads/master.zip) 等,包含 `README` 等内容 39 | 40 | ### 3.2 Python环境配置 41 | 42 | (1)创建虚拟环境 43 | 44 | ```bash 45 | python -m venv access_wechat 46 | ``` 47 | 48 | `venv`指定存放环境的目录,一般使用 `venv`,这是一个不成文的规定。 49 | 50 | (2)**激活**环境 51 | 52 | - Windows 53 | 54 | ```bash 55 | .\access_wechat\Scripts\activate 56 | ``` 57 | 58 | - Unix/macOS 59 | 60 | ```bash 61 | source access_wechat/bin/activate 62 | ``` 63 | 64 | (3)退出环境 65 | 66 | ```bash 67 | deactivate 68 | ``` 69 | 70 | ### 3.3 安装包文件 71 | 72 | `requirements.txt`中包含所需python包文件名称,用来批量安装python包文件 73 | 74 | 安装命令: 75 | 76 | ```bash 77 | pip install -r requirements.txt 78 | ``` 79 | 80 | ### 3.4 运行参数 81 | 82 | 1. 项目主文件为:`main.py`,其功能调用方式详见于此。 83 | 项目中**生成文件的存储路径**为:`./all_data`(该目录由程序**自动创建**) 84 | 2. 运行命令: 85 | 86 | 1. 首先进入**虚拟环境**(详见**激活**虚拟环境) 87 | 88 | 2. 安装python包文件(如已安装则进行下一步) 89 | 90 | 3. 在项目目录运行: 91 | 92 | - ```bash 93 | python main.py 94 | ``` 95 | 96 | 4. 根据控制台提示输入 97 | 98 | 5. 如需**自定义功能**,参照`main.py`中的函数调用方式自行编写。 99 | 100 | ## 四、功能截图 101 | 102 | ### 4.1 功能1 103 | 104 | ![function1](./README/function1.png)![function1.1](./README/function1.1.png) 105 | 106 | ### 4.2 功能2 107 | 108 | ![function2](./README/function2.png) 109 | 110 | ![function2.1](./README/function2.1.png) 111 | 112 | ### 4.3 功能3 113 | 114 | ![function3](./README/function3.png) 115 | 116 | ![function3.1](./README/function3.1.png) 117 | 118 | ### 4.4 功能4 119 | 120 | ![function4](./README/function4.png) 121 | 122 | ## 五、程序流程图 123 | 124 | ![wechat_article_drawio](./README/wechat_article_drawio.png) 125 | 126 | ## 六、鼓励一下 127 | 128 | 开源不易,若此项目有帮到你,望你能动用你的发财小手**Star**☆一下。 129 | 130 | 如有遇到代码方面的问题,欢迎一起讨论,你的鼓励是这个项目继续更新的最大动力! 131 | 132 |

133 | 134 |

135 | 136 | 137 | 另外,十分感谢大家对于本项目的关注。 138 | 139 | [![Stargazers repo roster for @yeximm/Access_wechat_article](https://reporoster.com/stars/yeximm/Access_wechat_article)](https://github.com/yeximm/Access_wechat_article/stargazers) 140 | [![Forkers repo roster for @yeximm/Access_wechat_article](https://reporoster.com/forks/yeximm/Access_wechat_article)](https://github.com/yeximm/Access_wechat_article/network/members) 141 | 142 | ## LICENSE 143 | 144 | 本作品采用许可协议 Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International ,简称 **[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)**。 145 | 146 | 所有以任何方式查看本仓库内容的人、或直接或间接使用本仓库内容的使用者都应仔细阅读此声明。本仓库管理者保留随时更改或补充此免责声明的权利。一旦使用、复制、修改了本仓库内容,则视为您已接受此免责声明。 147 | 148 | 项目内容仅供学习研究,请勿用于商业用途。如对本仓库内容的功能有需求,应自行开发相关功能。所有基于本仓库内容的源代码,进行的任何修改,为其他个人或组织的自发行为,与本仓库内容没有任何直接或间接的关系,所造成的一切后果亦与本仓库内容和本仓库管理者无关。 149 | 150 | 本仓库内容中涉及的第三方硬件、软件等,与本仓库内容没有任何直接或间接的关系。本仓库内容仅对部署和使用过程进行客观描述,不代表支持使用任何第三方硬件、软件。使用任何第三方硬件、软件,所造成的一切后果由使用的个人或组织承担,与本仓库内容无关。 151 | 152 | ## Star History 153 | 154 | [![Star History Chart](https://api.star-history.com/svg?repos=yeximm/Access_wechat_article&type=Date)](https://www.star-history.com/#yeximm/Access_wechat_article&Date) 155 | 156 | -------------------------------------------------------------------------------- /README/20250316180200.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/20250316180200.jpg -------------------------------------------------------------------------------- /README/function1.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function1.1.png -------------------------------------------------------------------------------- /README/function1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function1.png -------------------------------------------------------------------------------- /README/function2.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function2.1.png -------------------------------------------------------------------------------- /README/function2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function2.png -------------------------------------------------------------------------------- /README/function3.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function3.1.png -------------------------------------------------------------------------------- /README/function3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function3.png -------------------------------------------------------------------------------- /README/function4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/function4.png -------------------------------------------------------------------------------- /README/qrcode_1749894334903.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/qrcode_1749894334903.jpg -------------------------------------------------------------------------------- /README/wechat_article_drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/wechat_article_drawio.png -------------------------------------------------------------------------------- /README/程序流程图.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/README/程序流程图.drawio.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from Access_articles import * 2 | 3 | 4 | if __name__=="__main__": 5 | app = ArticleDetail() 6 | print('默认存储路径为:' + app.root_path) 7 | screen_text = '''请输入数字键! 8 | 数字键1:获取公众号主页链接(输入公众号下任意一篇已发布的文章链接即可) 9 | 数字键2:获取公众号下文章列表(每页约有文章几十篇) 10 | 数字键3:下载文章内容,自动下载文章列表中所有文章内容 11 | 数字键4:同功能3,下载文章内容,包括单个文章的文本内容 + 阅读量、点赞数等信息 12 | (请注意请求间隔,若请求太多太快可能会触发封禁!!) 13 | 输入其他任意字符退出!''' 14 | print('欢迎使用,' + screen_text) 15 | while True: 16 | text = str(input('请输入功能数字:')) 17 | 18 | if text == '1': 19 | random_url = (input('(默认公众号主页链接为“研招网资讯”,按回车键使用)\n请输入公众号下任意一篇已发布的文章链接:') or 20 | 'https://mp.weixin.qq.com/s/4r_LKJu0mOeUc70ZZXK9LA') 21 | app.get_article_link(random_url) 22 | print('\n' + screen_text) 23 | 24 | elif text == '2': 25 | access_token = input('\n以下内容需要用到fiddler工具!!!!!\n(1)在微信客户端打开步骤1获取到的链接,\n' 26 | '(2)在fiddler中查看——主机地址为https://mp.weixin.qq.com,URL地址为:/mp/profile_ext?acti\n' 27 | '(3)选中此项后按快捷键:Ctrl+U,复制此网址到剪贴板\n(4)将该内容粘贴到此处 (づ ̄ 3 ̄)づ\n请输入复制的链接:') 28 | pages = input('\n########## 默认获取第 1 页文章(约15篇)。如需公众号下全部文章,请输入:0 ##########\n' 29 | '请估算后输入需要下载的最新发布文章的页数(例:1):') or 1 30 | app.access_origin_list(access_token, int(pages)) 31 | print('\n' + screen_text) 32 | 33 | elif text == '3': # 该功能不需要token 34 | text_names3 = input('请输入 已下载文章列表的公众号名称 或 公众号的一篇文章链接(例如:泰山风景名胜区):') 35 | save_img = input('是否保存图片?是(输入任意值),否(默认,直接按回车跳过)') or False 36 | app.get_list_article(text_names3, save_img) 37 | print('\n' + screen_text) 38 | 39 | elif text == '4': 40 | access_token = input('\n以下内容需要用到fiddler工具!!!!!\n(1)在微信客户端打开步骤1获取到的链接,\n' 41 | '(2)在fiddler中查看——主机地址为https://mp.weixin.qq.com,URL地址为:/mp/profile_ext?acti\n' 42 | '(3)选中此项后按快捷键:Ctrl+U,复制此网址到剪贴板\n(4)将该内容粘贴到此处 (づ ̄ 3 ̄)づ\n请输入复制的链接:') 43 | app.get_detail_list(access_token) 44 | print('\n未成功获取的链接已保存到本地。' + '\n' + screen_text) 45 | 46 | else: 47 | print('\n已成功退出!') 48 | break 49 | 50 | 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeximm/Access_wechat_article/7bd5253a013e3d1144fd76c34a1248cfc74676c1/requirements.txt --------------------------------------------------------------------------------