├── .gitignore
├── LICENSE
├── README.md
├── data_extractor.py
├── log.py
├── main.py
├── ocr
└── __init__.py
├── print_beautify.py
├── requirements.txt
├── setting.py
├── spider
├── __init__.py
├── article_spider.py
├── comment_spider.py
└── user_spider.py
├── static
├── encrypt.js
└── encrypt_old.js
├── utils.py
└── zhihu_client.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | /static/cookies.pick
107 | .idea/
108 | cookies.pick
109 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 huvvao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # zhihu-terminal
2 | 命令行版知乎
3 | 灵感来自[duduainankai/zhihu-terminal](https://github.com/duduainankai/zhihu-terminal)
4 |
5 | ## 目前功能
6 |
7 |
8 |
9 | ## GIF演示
10 |
11 |
12 |
13 | ## 项目介绍
14 | 本项目为知乎的终端版实现,基于asyncio实现
15 |
16 | 运行此项目即可以使用命令行的方式来操作知乎,功能涵盖:浏览,点赞,感谢等功能,未来将实现知乎网页端的绝大部分功能
17 |
18 | ## 运行环境
19 | Python 3.7
20 |
21 | 项目在Mac OSX 10.14.5 进行开发,目前未进行Windows系统的适配
22 |
23 | ## 准备
24 |
25 | 建议拉取时仅拉取最近一次提交,历史提交中存在一个比较大的动图拉取时间会较长
26 | ```
27 | git clone --depth 1 git@github.com:wf1314/zhihu-terminal.git
28 | ```
29 | 安装Python 3.7的环境后执行
30 | ```
31 | pip install -r requirements.txt
32 | ```
33 |
34 | ## 本地运行
35 |
36 | 找到项目中的[setting.py](/setting.py)修改账号,密码等必填项
37 |
38 | 执行:
39 |
40 | ```
41 | python main.py
42 | ```
43 |
44 | ## 开发中的功能:
45 |
46 | 获取关注内容(TODO)
47 |
48 | 收藏回答(TODO)
49 |
50 | 回复评论(TODO)
51 |
52 | 验证码自动识别(TODO)
53 |
54 | 查看用户主页信息(TODO)
55 |
--------------------------------------------------------------------------------
/data_extractor.py:
--------------------------------------------------------------------------------
1 | """
2 | 处理从知乎获取到的数据,去除不需要的数据
3 | """
4 | import json
5 | from pyquery import PyQuery as pq
6 | from spider.article_spider import ArticleSpider
7 | from spider.comment_spider import CommentSpider
8 | from spider.user_spider import UserSpider
9 |
10 |
11 | class DataExtractor(ArticleSpider, CommentSpider, UserSpider):
12 | """数据提取"""
13 |
14 | async def get_self_info(self) -> dict:
15 | """
16 | 获取自己的信息
17 | :return:
18 | """
19 | result = await super().get_self_info()
20 | output = {
21 | 'name': result['name'],
22 | 'haealine': result['headline'],
23 | 'head': result['avatar_url'],
24 | 'gender': result['gender'],
25 | 'vip_info': result['vip_info'],
26 | 'url': result['url'],
27 | }
28 | self.logger.debug(output)
29 | return output
30 |
31 | async def get_recommend_article(self) -> list:
32 | """
33 | 获取推荐文章
34 | :return:
35 | """
36 | result = await super().get_recommend_article()
37 | output = []
38 | for d in result['data']: # 提取用到的数据
39 | target = d['target']
40 | author = target['author']
41 | question = target.get('question')
42 | playlist = target.get('thumbnail_extra_info', {}).get('playlist')
43 | article_info = {
44 | 'author': { # 作者信息
45 | 'name': author['name'],
46 | 'headline': author.get('headline'),
47 | 'head': author['avatar_url'],
48 | 'gender': author.get('gender'),
49 | 'url': author.get('url'),
50 | },
51 | 'excerpt': target.get('excerpt_new') or target.get('excerpt'),
52 | 'content': target['content'],
53 | 'voteup_count': target.get('voteup_count', target.get('vote_count')), # 赞同数
54 | 'visited_count': target.get('visited_count'),
55 | 'thanks_count': target.get('thanks_count', 0),
56 | 'comment_count': target['comment_count'],
57 | 'id': str(target['id']),
58 | 'type': target['type'],
59 | 'created_time': d['created_time'],
60 | 'updated_time': d['updated_time'],
61 | }
62 | # # 如果type是zvideo,那么voteup_count对应的属性名是vote_count,这里把属性名修改过来
63 | if target['type'] == 'zvideo' and playlist:
64 | article_info['content'] += f'\n{playlist.get("hd", {}).get("url", "")}'
65 | article_info['excerpt'] = '**video**'
66 | # article_info['voteup_count'] = target.get('vote_count')
67 | if question:
68 | question = {
69 | 'author': {
70 | 'name': question['author']['name'],
71 | 'headline': question['author'].get('headline'),
72 | 'head': question['author'].get('head'),
73 | 'gender': question['author'].get('gender'),
74 | 'url': question['author'].get('url'),
75 | },
76 | 'title': question['title'],
77 | 'url': question['url'],
78 | 'id': str(question['id']),
79 | 'type': 'normal',
80 | }
81 | else:
82 | question = {
83 | 'title': target['title'],
84 | 'url': target.get('url'),
85 | 'type': 'market',
86 | 'id': '',
87 | 'author': target['author']
88 | }
89 | article_info['question'] = question
90 | output.append(article_info)
91 | self.logger.debug(output)
92 | return output
93 |
94 | def extract_comments(self, result: dict) -> tuple:
95 | """
96 | 提取评论
97 | :param result:
98 | :return:
99 | """
100 | output = []
101 | for d in result['data']:
102 | author = d['author']['member']
103 | for clild in d['child_comments']:
104 | clild['author'] = clild['author']['member']
105 | if clild['reply_to_author'].get('member'):
106 | clild['reply_to_author'] = clild['reply_to_author']['member']
107 | # reply_to_author = d.get('reply_to_author', {}).get('member', {})
108 | if not d.get('reply_to_author', {}):
109 | reply_to_author = {}
110 | else:
111 | reply_to_author = d.get('reply_to_author', {}).get('member', {})
112 |
113 | comment_info = {
114 | 'author': {
115 | 'name': author.get('name'),
116 | 'headline': author.get('headline'),
117 | 'head': author.get('head'),
118 | 'gender': author.get('gender'),
119 | 'url': author.get('url'),
120 | },
121 | 'content': d['content'],
122 | 'created_time': d['created_time'],
123 | 'child_comment_count': d['child_comment_count'],
124 | 'id': str(d['id']),
125 | 'vote_count': d['vote_count'],
126 | 'voting': d['voting'],
127 | 'type': d['type'],
128 | 'featured': d.get('featured'), # 是否是热评
129 | 'reply_to_author': {
130 | 'name': reply_to_author.get('name'),
131 | 'headline': reply_to_author.get('headline'),
132 | 'head': reply_to_author.get('head'),
133 | 'gender': reply_to_author.get('gender'),
134 | 'url': reply_to_author.get('url'),
135 | },
136 | 'child_comments': d['child_comments']
137 | }
138 | output.append(comment_info)
139 | self.logger.debug(output)
140 | paging = result['paging']
141 | return output, paging
142 |
143 | async def get_comments(self, uid: str, typ: str ='answer') -> tuple:
144 | """
145 | 获取评论
146 | :param typ:
147 | :param uid:
148 | :return:
149 | """
150 | result = await super().get_comments(uid, typ)
151 | output, paging = self.extract_comments(result)
152 | return output, paging
153 |
154 | async def get_comments_by_url(self, url: str) -> tuple:
155 | """
156 | 获取评论
157 | :return:
158 | """
159 | result = await super().get_comments_by_url(url)
160 | output, paging = self.extract_comments(result)
161 | return output, paging
162 |
163 | async def get_question_details(self, question_id: str, uid: str) -> dict:
164 | """
165 | 获取评论
166 | :return:
167 | """
168 | result = await super().get_question_article_first(question_id, uid)
169 | doc = pq(result)
170 | data = doc('#js-initialData').text()
171 | result = json.loads(data)
172 | questions = list(result['initialState']['entities']['questions'].values())[0]
173 | # answers = list(result['initialState']['entities']['answers'].values())[0]
174 | output = {
175 | 'id': questions['id'],
176 | 'type': questions['type'],
177 | 'title': questions['title'],
178 | 'creTime': questions.get('creTime') or questions.get('created'),
179 | 'excerpt': questions['excerpt'],
180 | 'detail': questions['detail'],
181 | 'author': questions['author'],
182 | 'answerCount': questions['answerCount'],
183 | 'visitCount': questions['visitCount'],
184 | 'comment_count': questions['commentCount'],
185 | 'followerCount': questions['followerCount'],
186 | }
187 | return output
188 | # TODO
189 | # async def get_first_answer_by_qustion(self, question_id: str, uid: str) -> dict:
190 | # """
191 | # 获取第一个回答,这个回答很可能在后续的查询中查询不到
192 | # :return:
193 | # """
194 | # result = await super().get_question_article_first(question_id, uid)
195 | # doc = pq(result)
196 | # data = doc('#js-initialData').text()
197 | # result = json.loads(data)
198 | # # questions = list(result['initialState']['entities']['questions'].values())[0]
199 | # answers = list(result['initialState']['entities']['answers'].values())[0]
200 | # output = {
201 | # 'author': {
202 | # ''
203 | # }
204 | # }
205 | # return output
206 |
207 | def extract_article_by_question(self, result):
208 | """
209 | 提取文章信息
210 | :param result:
211 | :return:
212 | """
213 | output = []
214 | for d in result['data']: # 提取用到的数据
215 | target = d
216 | author = target['author']
217 | question = target.get('question')
218 | article_info = {
219 | 'author': { # 作者信息
220 | 'name': author['name'],
221 | 'headline': author.get('headline'),
222 | 'head': author['avatar_url'],
223 | 'gender': author.get('gender'),
224 | 'url': author.get('url'),
225 | },
226 | 'excerpt': target.get('excerpt_new') or target.get('excerpt'),
227 | 'content': target['content'],
228 | 'voteup_count': target['voteup_count'], # 赞同数
229 | 'visited_count': target.get('visited_count', 0),
230 | 'thanks_count': target.get('thanks_count', 0),
231 | 'comment_count': target['comment_count'],
232 | 'id': str(target['id']),
233 | 'type': target['type'],
234 | 'created_time': d['created_time'],
235 | 'updated_time': d['updated_time'],
236 | }
237 | if question:
238 | question = {
239 | 'title': question['title'],
240 | 'url': question['url'],
241 | 'id': str(question['id']),
242 | 'type': 'normal',
243 | }
244 | else:
245 | question = {
246 | 'title': target['title'],
247 | 'url': target['url'],
248 | 'type': 'market',
249 | 'id': '',
250 | }
251 | article_info['question'] = question
252 | output.append(article_info)
253 | return output
254 |
255 | async def get_article_by_question(self, question_id, offset: int = 0, limit: int = 3) -> tuple:
256 | """
257 |
258 | :param question_id:
259 | :param offset:
260 | :param limit:
261 | :return:
262 | """
263 | result = await super().get_article_by_question(question_id, offset, limit)
264 | output = self.extract_article_by_question(result)
265 | paging = result['paging']
266 | self.logger.debug(output)
267 | return output, paging
268 |
269 | async def get_article_by_question_url(self, url):
270 | """
271 |
272 | :param url:
273 | :return:
274 | """
275 | result = await super().get_article_by_question_url(url)
276 | output = self.extract_article_by_question(result)
277 | paging = result['paging']
278 | self.logger.debug(output)
279 | return output, paging
280 |
--------------------------------------------------------------------------------
/log.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | from logging.handlers import RotatingFileHandler
4 | from setting import DEBUG
5 | from setting import LOG_DIR
6 |
7 |
8 | def get_logger():
9 | """
10 | 获取日志对象
11 | :return:
12 | """
13 | log_dir = LOG_DIR if LOG_DIR else '/tmp/zhihu/'
14 | if not os.path.exists(log_dir):
15 | os.makedirs(log_dir)
16 | log = logging.getLogger(__name__)
17 | log.setLevel(logging.DEBUG) if DEBUG else log.setLevel(logging.ERROR)
18 | log_file = os.path.join(log_dir, 'log.log')
19 | handler = RotatingFileHandler(log_file, maxBytes=1024 * 1024 * 30, backupCount=10)
20 | handler1 = logging.StreamHandler()
21 | default_format = logging.Formatter(
22 | '[%(levelname)1.1s %(asctime)s.%(msecs)03d %(module)s:%(lineno)d]%(' 'message)s ')
23 | handler.setFormatter(fmt=default_format)
24 | handler1.setFormatter(fmt=default_format)
25 | log.addHandler(handler)
26 | log.addHandler(handler1)
27 | log.debug('----------初始化日志-----------')
28 | return log
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import asyncio
4 | from zhihu_client import ZhihuClient
5 | from data_extractor import DataExtractor
6 |
7 | from print_beautify import print_recommend_article
8 | from print_beautify import print_article_content
9 | from print_beautify import print_comments
10 | from print_beautify import print_question
11 | from print_beautify import print_vote_thank
12 | from print_beautify import print_vote_comments
13 | from print_beautify import print_logo
14 | from print_beautify import print_save
15 |
16 | from utils import print_colour
17 | from utils import get_com_func
18 |
19 | from setting import USER
20 | from setting import PASSWORD
21 | from setting import SAVE_DIR
22 |
23 |
24 | def help_main():
25 | output = "\n" \
26 | "**********************************************************\n" \
27 | "** remd: 查看推荐内容\n" \
28 | "** aten: 查看动态内容\n" \
29 | "** q: 退出系统\n" \
30 | "**********************************************************\n"
31 | return output
32 |
33 |
34 | def help_recommend():
35 | output = "\n" \
36 | "**********************************************************\n" \
37 | "** f: 刷新推荐内容\n" \
38 | "** r: 再次显示(重新显示回答)\n" \
39 | "** read:article_id 查看回答具体内容(进入下一级菜单)\n" \
40 | "** question:question_id 查看问题下的其他回答(进入下一级菜单)\n" \
41 | "** back: 返回上层\n" \
42 | "** q: 退出系统\n" \
43 | "**********************************************************\n"
44 | return output
45 |
46 |
47 | def help_article():
48 | output = "\n" \
49 | "**********************************************************\n" \
50 | "** back 返回上层\n" \
51 | "** q 退出系统\n" \
52 | "** save 保存到本地\n" \
53 | "** enshrine 收藏回答\n" \
54 | "** question 查看问题下的其他回答\n" \
55 | "** up 赞同\n" \
56 | "** down 反对\n" \
57 | "** neutral 中立,可以取消对回答的赞同或反对\n" \
58 | "** thank 感谢\n" \
59 | "** unthank 取消感谢\n"\
60 | "** comment 评论相关(查看评论, 回复评论等将进入下一级菜单)\n"\
61 | "**********************************************************\n"
62 | return output
63 |
64 |
65 | def help_comments():
66 | output = "\n" \
67 | "**********************************************************\n" \
68 | "** back 返回上层\n" \
69 | "** q 退出系统\n" \
70 | "** n 显示下一页\n" \
71 | "** p 显示上一页\n" \
72 | "** com:comment_id 回复评论,点赞等功能(进入下级菜单)\n" \
73 | "**********************************************************\n"
74 | return output
75 |
76 |
77 | def help_comments2():
78 | output = "\n" \
79 | "**********************************************************\n" \
80 | "** back 返回上层\n" \
81 | "** q 退出系统\n" \
82 | "** up 点赞\n" \
83 | "** neutral 中立,可以取消对点赞\n" \
84 | "** reply:content 回复评论\n" \
85 | "**********************************************************\n"
86 | return output
87 |
88 |
89 | def help_question():
90 | output = "\n" \
91 | "**********************************************************\n" \
92 | "** back 返回上层\n" \
93 | "** q 退出系统\n" \
94 | "** qsdl 查看问题详情\n" \
95 | "** read:article_id 查看回答具体内容(进入下一级菜单)\n" \
96 | "** n 显示下一页\n" \
97 | "** p 显示上一页\n" \
98 | "** r 再次显示(重新显示回答)\n" \
99 | "**********************************************************\n"
100 | return output
101 |
102 |
103 | def exit(cmd: str):
104 | if cmd in('q', 'quit', 'exit'):
105 | sys.exit()
106 |
107 |
108 | def clear():
109 | os.system("clear")
110 |
111 |
112 | async def deal_comments_by_id(spider, uid):
113 | """
114 | 对应id评论相关
115 | :param spider:
116 | :return:
117 | """
118 | while True:
119 | print_colour('', 'yellow')
120 | com2_cmd = input(help_comments2()).lower()
121 | com2_cmd = com2_cmd.split(':')
122 | if not com2_cmd[0]:
123 | print_colour('输入有误!', 'red')
124 | continue
125 | exit(com2_cmd[0])
126 | if com2_cmd[0] == 'back':
127 | break
128 | elif com2_cmd[0] == 'up':
129 | result = await spider.endorse_comment(uid, False)
130 | print_vote_comments(result, 'up')
131 | elif com2_cmd[0] == 'neutral':
132 | result = await spider.endorse_comment(uid, True)
133 | print_colour(result)
134 | print_vote_comments(result, 'neutral')
135 | elif com2_cmd[0] == 'reply' and len(com2_cmd) == 2:
136 | # todo 回复评论
137 | data = {
138 | 'content': com2_cmd[1],
139 | 'replyToId': uid,
140 | }
141 | print_colour('功能还在开发中...', 'red')
142 | continue
143 | else:
144 | print_colour('输入有误!', 'red')
145 | continue
146 | pass
147 |
148 |
149 | async def deal_comments(spider, result, paging):
150 | """
151 | 处理评论命令
152 | :param spider:
153 | :return:
154 | """
155 | # all_coments = []
156 | while True:
157 | comment_ids = []
158 | for d in result:
159 | comment_ids.append(d['id'])
160 | for clild in d.get('child_comments'):
161 | comment_ids.append(clild['id'])
162 | comment_ids = list(set(comment_ids))
163 | print_colour('', 'yellow')
164 | comm_cmd = input(help_comments()).lower()
165 | comm_cmd = comm_cmd.split(':')
166 | if not comm_cmd:
167 | print_colour('输入有误!', 'red')
168 | continue
169 | exit(comm_cmd[0])
170 | if comm_cmd[0] == 'back':
171 | break
172 | elif comm_cmd[0] == 'n':
173 | if paging.get('is_end'):
174 | print_colour('已是最后一页!', 'red')
175 | continue
176 | # url = paging['next'].replace('https://www.zhihu.com/', 'https://www.zhihu.com/api/v4/')
177 | url = paging['next']
178 | result, paging = await spider.get_comments_by_url(url)
179 | print_comments(result)
180 | continue
181 | elif comm_cmd[0] == 'p':
182 | if paging.get('is_start'):
183 | print_colour('已是第一页!', 'red')
184 | continue
185 | # url = paging['previous'].replace('https://www.zhihu.com/', 'https://www.zhihu.com/api/v4/')
186 | url = paging['previous']
187 | result, paging = await spider.get_comments_by_url(url)
188 | print_comments(result)
189 | continue
190 | elif comm_cmd[0] == 'com':
191 | if len(comm_cmd) != 2:
192 | print_colour('输入有误!', 'red')
193 | continue
194 | if comm_cmd[1] not in comment_ids:
195 | print_colour('输入id有误!', 'red')
196 | continue
197 | await deal_comments_by_id(spider, comm_cmd[1])
198 | continue
199 | else:
200 | print_colour('输入有误!', 'red')
201 | continue
202 |
203 |
204 | async def deal_article(spider, article):
205 | """
206 | 处理文章内容命令
207 | :param spider:
208 | :param recommend_articles:
209 | :param ids:
210 | :return:
211 | """
212 | while True:
213 | print_colour('', 'yellow')
214 | arl_cmd = input(help_article()).lower()
215 | if not arl_cmd:
216 | print_colour('输入有误!', 'red')
217 | continue
218 | exit(arl_cmd)
219 | if arl_cmd == 'back':
220 | break
221 |
222 | elif arl_cmd in ('up', 'down', 'neutral', 'thank', 'unthank'):
223 |
224 | uid = article.get('id')
225 | func = get_com_func(arl_cmd)
226 | result = await getattr(spider, func)(uid)
227 | print_vote_thank(result, arl_cmd)
228 | continue
229 | elif arl_cmd == 'comment':
230 | typ = article['type']
231 | uid = article.get('id')
232 | result, paging = await spider.get_comments(uid, typ)
233 | print_comments(result)
234 | await deal_comments(spider, result, paging)
235 | continue
236 | elif arl_cmd == 'save':
237 | print_save(article)
238 | continue
239 | elif arl_cmd == 'enshrine':
240 | # todo 收藏回答
241 | print_colour('功能还在开发中...', 'red')
242 | continue
243 | elif arl_cmd == 'question':
244 | await deal_question(spider, article.get('question').get('id'), article.get('id'))
245 | continue
246 | else:
247 | print_colour('输入有误!', 'red')
248 | continue
249 |
250 |
251 | async def deal_question(spider, question_id, uid):
252 | """
253 | 处理问题命令
254 | :param spider:
255 | :param uid:
256 | :param id_map:
257 | :return:
258 | """
259 | is_print = True
260 | while True:
261 | if is_print:
262 | question_articles, paging = await spider.get_article_by_question(question_id)
263 | ids = [d.get('id') for d in question_articles]
264 | print_recommend_article(question_articles)
265 | is_print = False
266 | print_colour('', 'yellow')
267 | ques_cmd = input(help_question()).lower()
268 | ques_cmd = ques_cmd.split(':')
269 | if not ques_cmd:
270 | print_colour('输入有误!', 'red')
271 | continue
272 | exit(ques_cmd[0])
273 | if ques_cmd[0] == 'read':
274 | if len(ques_cmd) != 2:
275 | print_colour('输入有误!', 'red')
276 | continue
277 | if ques_cmd[1] not in ids:
278 | print_colour('输入id有误!', 'red')
279 | continue
280 | output = [d for d in question_articles if d['id'] == ques_cmd[1]][0]
281 | print_article_content(output)
282 | await deal_article(spider, output)
283 | continue
284 | elif ques_cmd[0] == 'qsdl':
285 | question_detail = await spider.get_question_details(question_id, uid)
286 | print_question(question_detail)
287 | elif ques_cmd[0] == 'n':
288 | if paging.get('is_end'):
289 | print_colour('已是最后一页!', 'red')
290 | continue
291 | url = paging['next']
292 | question_articles, paging = await spider.get_article_by_question_url(url)
293 | ids = [d.get('id') for d in question_articles]
294 | print_recommend_article(question_articles)
295 | continue
296 | elif ques_cmd[0] == 'p':
297 | if paging.get('is_start'):
298 | print_colour('已是第一页!', 'red')
299 | continue
300 | url = paging['previous']
301 | question_articles, paging = await spider.get_article_by_question_url(url)
302 | ids = [d.get('id') for d in question_articles]
303 | print_recommend_article(question_articles)
304 | elif ques_cmd[0] == 'r':
305 | print_recommend_article(question_articles)
306 | continue
307 | elif ques_cmd[0] == 'back':
308 | break
309 | else:
310 | print_colour('输入有误!', 'red')
311 | continue
312 |
313 |
314 | async def deal_remd(spider):
315 | """
316 | 处理推荐文章命令
317 | :param spider:
318 | :return:
319 | """
320 | is_print = True
321 | while True:
322 | if is_print:
323 | recommend_articles = await spider.get_recommend_article()
324 | ids = [d.get('id') for d in recommend_articles]
325 | print_recommend_article(recommend_articles)
326 | is_print = False
327 | print_colour('', 'yellow')
328 | remd_cmd = input(help_recommend()).lower()
329 | remd_cmd = remd_cmd.split(':')
330 | if not remd_cmd:
331 | print_colour('输入有误!', 'red')
332 | continue
333 | exit(remd_cmd[0])
334 | if remd_cmd[0] == 'f':
335 | is_print = True
336 | continue
337 | elif remd_cmd[0] == 'r':
338 | print_recommend_article(recommend_articles)
339 | continue
340 | elif remd_cmd[0] == 'read':
341 | if len(remd_cmd) != 2:
342 | print_colour('输入有误!', 'red')
343 | continue
344 | if remd_cmd[1] not in ids:
345 | print_colour('输入id有误!', 'red')
346 | continue
347 | output = [d for d in recommend_articles if d['id'] == remd_cmd[1]][0]
348 | print_article_content(output)
349 | await deal_article(spider, output)
350 | continue
351 | elif remd_cmd[0] == 'question':
352 | question_ids = [d.get('question').get('id') for d in recommend_articles]
353 | if len(remd_cmd) != 2:
354 | print_colour('输入有误!', 'red')
355 | continue
356 | if remd_cmd[1] not in question_ids:
357 | print_colour('输入id有误!', 'red')
358 | continue
359 | assert len(ids) == len(question_ids)
360 | id_map = dict(zip(question_ids, ids))
361 | uid = id_map[remd_cmd[1]]
362 | await deal_question(spider, remd_cmd[1], uid)
363 | continue
364 | elif remd_cmd[0] == 'back':
365 | break
366 | else:
367 | print_colour('输入有误!', 'red')
368 | continue
369 |
370 |
371 | async def run(client):
372 | spider = DataExtractor(client)
373 | output = await spider.get_self_info()
374 | print_colour(f'hello {output["name"]} 欢迎使用terminal-zhihu!', 'ultramarine')
375 | flag = True
376 | while flag:
377 | print_colour('', 'yellow')
378 | cmd = input(help_main()).lower()
379 | if not cmd:
380 | print_colour('输入有误!', 'red')
381 | continue
382 | exit(cmd)
383 | if cmd == 'remd':
384 | await deal_remd(spider)
385 | elif cmd == 'aten':
386 | # todo 获取关注动态
387 | print_colour('功能还在开发中...', 'red')
388 | continue
389 | else:
390 | print_colour('输入有误!', 'red')
391 | continue
392 |
393 |
394 | def check_setting():
395 | save_dir = SAVE_DIR or '/tmp/zhihu_save'
396 | if not os.path.exists(save_dir):
397 | os.makedirs(save_dir)
398 |
399 |
400 | async def login(user, password):
401 | """
402 | 登录
403 | :param user:
404 | :param password:
405 | :return:
406 | """
407 | client = ZhihuClient(user, password)
408 | load_cookies = False
409 | if os.path.exists(client.cookie_file):
410 | # 如果cookie缓存存在优先读取缓存
411 | load_cookies = True
412 | if not load_cookies and (not USER or not PASSWORD):
413 | print_colour('请正确配置USER, PASSWORD', 'red')
414 | sys.exit()
415 | await client.login(load_cookies=load_cookies)
416 | return client
417 |
418 |
419 | async def main():
420 | try:
421 | check_setting()
422 | client = await login(USER, PASSWORD)
423 | print_logo()
424 | await run(client)
425 | # except Exception as e:
426 | # print_colour(e, 'red')
427 | finally:
428 | print_colour('欢迎再次使用')
429 | await asyncio.sleep(0)
430 | await client.close()
431 |
432 |
433 | if __name__ == '__main__':
434 | # asyncio.run(main())
435 | asyncio.get_event_loop().run_until_complete(main())
436 |
--------------------------------------------------------------------------------
/ocr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wf1314/zhihu-terminal/bd8cbae26b0027eeffa78b8a4888b3b4aa4a84e0/ocr/__init__.py
--------------------------------------------------------------------------------
/print_beautify.py:
--------------------------------------------------------------------------------
1 | import os
2 | import html2text
3 | from utils import print_colour
4 | from setting import SAVE_DIR
5 |
6 |
7 | def print_logo():
8 | os.system("clear")
9 | logo = '''
10 | ;$$;
11 | #############
12 | #############;#####o
13 | ## o#########################
14 | ##### $###############################
15 | ## ###$ ######! ##########################
16 | ## ### $### ################### ######
17 | ### ### ##o#######################
18 | ###### ;### #### #####################
19 | ## ### ###### ######&###############
20 | ## ### ###### ## ############ #######
21 | o## ######## ## ##################
22 | ##o ### #### #######o#######
23 | ## ###### ###############
24 | ## #### #############!
25 | ### #########
26 | #####& ## o####
27 | ###### ## ####*
28 | ## !## #####
29 | ## ##* ####; ##
30 | ##### #####o #####
31 | #### ### ### $###o
32 | ### ## ####! $###
33 | ## #####
34 | ## ##
35 | ;## ### ;
36 | ##$ ##
37 | ####### ##
38 | ##### # ##
39 | ### ### ###
40 | ### ### ##
41 | ## ;## ##
42 | ## ### ##
43 | ### ### ##
44 | #### ##
45 | ### ##
46 | ##; ##
47 | ##$ ##&
48 | ## ##
49 | ##; ##
50 | ## ##;
51 | ### ### ##$
52 | ### ### ##
53 | ###################### #####&&&&&&&&&&&##
54 | ### $#####$ ############&$o$###############################
55 | # $#######&o
56 | '''
57 | print_colour(logo, 'ultramarine')
58 |
59 |
60 | def print_recommend_article(output: list):
61 | """
62 | 打印推荐文章简述
63 | :param output:
64 | :return:
65 | """
66 | for d in output:
67 | print_colour('=' * 60, 'white')
68 | print_colour(f'article_id:{d["id"]}', 'purple')
69 | print_colour(f'question_id:{d["question"]["id"]}', 'purple')
70 | print_colour(d['question']['title'], 'purple', end='')
71 | print_colour(f"({d['author']['name']})", 'purple')
72 | print_colour(d['excerpt'])
73 | print_colour(f"*赞同数{d.get('voteup_count')} 感谢数{d.get('thanks_count', 0)} "
74 | f"评论数{d.get('comment_count')} 浏览数{d.get('visited_count')}*", 'purple')
75 |
76 |
77 | def print_article_content(output: dict):
78 | """
79 | 打印文章内容
80 | :param output:
81 | :return:
82 | """
83 | content = output['content']
84 | title = output['question']['title']
85 | question_id = output['question']['id']
86 | article_id = output["id"]
87 | typ = output['type']
88 | if typ == 'zvideo':
89 | url = f'https://www.zhihu.com/zvideo/{article_id}'
90 | elif article_id and not question_id:
91 | url = f'https://zhuanlan.zhihu.com/p/{article_id}'
92 | else:
93 | url = f'https://www.zhihu.com/question/{question_id}/answer/{article_id}'
94 | content = html2text.html2text(content)
95 | print_colour(content)
96 | print_colour('-----------------------------------------------------', 'purple')
97 | print_colour(f'|article_id:{article_id}', 'purple')
98 | print_colour(f'|question_id:{question_id}', 'purple')
99 | print_colour(f'|title:{title}', 'purple')
100 | print_colour(f'|原文链接:{url}', 'purple')
101 | print_colour('-----------------------------------------------------', 'purple')
102 |
103 |
104 | def print_question(question: dict):
105 | """
106 | 打印问题及第默认排序下的第一个回答
107 | :param output:
108 | :return:
109 | """
110 | title = question['title']
111 | # question_id = question['id']
112 | question_content = question['detail']
113 | question_content = html2text.html2text(question_content)
114 | print_colour('*' * 50, 'purple')
115 | print_colour(f'标题:{title}')
116 | print_colour('问题详情:')
117 | print_colour(question_content)
118 | print_colour('*' * 50, 'purple')
119 |
120 |
121 | def print_comments(output: list):
122 | """
123 | 打印评论
124 | :param output:
125 | :return:
126 | """
127 | for d in output:
128 | author = d.get('author').get('name')
129 | reply_to_author = d.get('reply_to_author').get('name')
130 | content = d.get('content')
131 | vote_count = d.get('vote_count')
132 | comment_id = d.get('id')
133 | child_comments = d.get('child_comments')
134 | print_colour(f'comment_id:{comment_id}', 'purple')
135 | if d.get('featured'):
136 | print_colour('热评🔥', end='')
137 | if reply_to_author:
138 | print_colour(f'{author}->{reply_to_author}', end='')
139 | else:
140 | print_colour(f'{author}', end='')
141 | print_colour(f'(赞:{vote_count}):{content}')
142 | if child_comments:
143 | for clild in child_comments:
144 | author = clild.get('author').get('name')
145 | reply_to_author = clild.get('reply_to_author').get('name')
146 | content = clild.get('content')
147 | vote_count = clild.get('vote_count')
148 | comment_id = clild.get('id')
149 | print_colour(f' comment_id:{comment_id}', 'purple')
150 | if d.get('featured'):
151 | print_colour(' 热评🔥', end='')
152 | if reply_to_author:
153 | print_colour(f' {author}->{reply_to_author}', end='')
154 | else:
155 | print_colour(f' {author}', end='')
156 | print_colour(f' (赞:{vote_count}):{content}')
157 | print_colour(' *********************************************************', 'blue')
158 | print_colour('==========================================================', 'blue')
159 |
160 |
161 | def print_vote_thank(output: dict, typ: str):
162 | """
163 | 打印赞同感谢 up', 'down', 'neutral'
164 | :param output:
165 | :return:
166 | """
167 | if output.get('error'):
168 | print_colour(output.get('error'), 'red')
169 | elif typ == 'thank':
170 | print_colour(f'感谢成功!感谢总数{output["thanks_count"]}')
171 | elif typ == 'unthank':
172 | print_colour(f'取消感谢!感谢总数{output["thanks_count"]}')
173 | elif typ == 'up':
174 | print_colour(f'赞同成功!赞同总数{output["voteup_count"]}')
175 | elif typ == 'down':
176 | print_colour(f'反对成功!赞同总数{output["voteup_count"]}')
177 | else:
178 | print_colour(f'保持中立!赞同总数{output["voteup_count"]}')
179 |
180 |
181 | def print_vote_comments(output: dict, typ: str):
182 | """
183 | 打印赞同感谢 up', 'down', 'neutral'
184 | :param output:
185 | :return:
186 | """
187 | if output.get('error'):
188 | print_colour(output.get('error'), 'red')
189 | elif typ == 'up':
190 | print_colour(f'点赞评论成功!被赞总数{output["vote_count"]}')
191 | elif typ == 'neutral':
192 | print_colour(f'保持中立!被赞总数{output["vote_count"]}')
193 |
194 |
195 | def print_save(article: dict):
196 | """
197 | 保存文章到本地
198 | :param article:
199 | :return:
200 | """
201 | uid = article.get('id')
202 | title = article.get('question').get('title')
203 | content = article.get('content')
204 | save_dir = SAVE_DIR or '/tmp/zhihu_save'
205 | file = f'{save_dir}/{title}_{uid}.html'
206 | with open(file, 'w') as f:
207 | head = '
Hello world
`); 4 | window = dom.window; 5 | document = window.document; 6 | 7 | function t(e) { 8 | return (t = 'function' == typeof Symbol && 'symbol' == typeof Symbol.A ? function (e) { 9 | return typeof e; 10 | } 11 | : function (e) { 12 | return e && 'function' == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? 'symbol' : typeof e; 13 | } 14 | )(e); 15 | } 16 | 17 | Object.defineProperty(exports, '__esModule', { 18 | value: !0, 19 | }); 20 | var A = '2.0' 21 | , __g = {}; 22 | 23 | function s() { 24 | } 25 | 26 | function i(e) { 27 | this.t = (2048 & e) >> 11, 28 | this.s = (1536 & e) >> 9, 29 | this.i = 511 & e, 30 | this.h = 511 & e; 31 | } 32 | 33 | function h(e) { 34 | this.s = (3072 & e) >> 10, 35 | this.h = 1023 & e; 36 | } 37 | 38 | function a(e) { 39 | this.a = (3072 & e) >> 10, 40 | this.c = (768 & e) >> 8, 41 | this.n = (192 & e) >> 6, 42 | this.t = 63 & e; 43 | } 44 | 45 | function c(e) { 46 | this.s = e >> 10 & 3, 47 | this.i = 1023 & e; 48 | } 49 | 50 | function n() { 51 | } 52 | 53 | function e(e) { 54 | this.a = (3072 & e) >> 10, 55 | this.c = (768 & e) >> 8, 56 | this.n = (192 & e) >> 6, 57 | this.t = 63 & e; 58 | } 59 | 60 | function o(e) { 61 | this.h = (4095 & e) >> 2, 62 | this.t = 3 & e; 63 | } 64 | 65 | function r(e) { 66 | this.s = e >> 10 & 3, 67 | this.i = e >> 2 & 255, 68 | this.t = 3 & e; 69 | } 70 | 71 | s.prototype.e = function (e) { 72 | e.o = !1; 73 | } 74 | , 75 | i.prototype.e = function (e) { 76 | switch (this.t) { 77 | case 0: 78 | e.r[this.s] = this.i; 79 | break; 80 | case 1: 81 | e.r[this.s] = e.k[this.h]; 82 | } 83 | } 84 | , 85 | h.prototype.e = function (e) { 86 | e.k[this.h] = e.r[this.s]; 87 | } 88 | , 89 | a.prototype.e = function (e) { 90 | switch (this.t) { 91 | case 0: 92 | e.r[this.a] = e.r[this.c] + e.r[this.n]; 93 | break; 94 | case 1: 95 | e.r[this.a] = e.r[this.c] - e.r[this.n]; 96 | break; 97 | case 2: 98 | e.r[this.a] = e.r[this.c] * e.r[this.n]; 99 | break; 100 | case 3: 101 | e.r[this.a] = e.r[this.c] / e.r[this.n]; 102 | break; 103 | case 4: 104 | e.r[this.a] = e.r[this.c] % e.r[this.n]; 105 | break; 106 | case 5: 107 | e.r[this.a] = e.r[this.c] == e.r[this.n]; 108 | break; 109 | case 6: 110 | e.r[this.a] = e.r[this.c] >= e.r[this.n]; 111 | break; 112 | case 7: 113 | e.r[this.a] = e.r[this.c] || e.r[this.n]; 114 | break; 115 | case 8: 116 | e.r[this.a] = e.r[this.c] && e.r[this.n]; 117 | break; 118 | case 9: 119 | e.r[this.a] = e.r[this.c] !== e.r[this.n]; 120 | break; 121 | case 10: 122 | e.r[this.a] = t(e.r[this.c]); 123 | break; 124 | case 11: 125 | e.r[this.a] = e.r[this.c] in e.r[this.n]; 126 | break; 127 | case 12: 128 | e.r[this.a] = e.r[this.c] > e.r[this.n]; 129 | break; 130 | case 13: 131 | e.r[this.a] = -e.r[this.c]; 132 | break; 133 | case 14: 134 | e.r[this.a] = e.r[this.c] < e.r[this.n]; 135 | break; 136 | case 15: 137 | e.r[this.a] = e.r[this.c] & e.r[this.n]; 138 | break; 139 | case 16: 140 | e.r[this.a] = e.r[this.c] ^ e.r[this.n]; 141 | break; 142 | case 17: 143 | e.r[this.a] = e.r[this.c] << e.r[this.n]; 144 | break; 145 | case 18: 146 | e.r[this.a] = e.r[this.c] >>> e.r[this.n]; 147 | break; 148 | case 19: 149 | e.r[this.a] = e.r[this.c] | e.r[this.n]; 150 | break; 151 | case 20: 152 | e.r[this.a] = !e.r[this.c]; 153 | } 154 | } 155 | , 156 | c.prototype.e = function (e) { 157 | e.Q.push(e.C), 158 | e.B.push(e.k), 159 | e.C = e.r[this.s], 160 | e.k = []; 161 | for (var t = 0; t < this.i; t++) 162 | e.k.unshift(e.f.pop()); 163 | e.g.push(e.f), 164 | e.f = []; 165 | } 166 | , 167 | n.prototype.e = function (e) { 168 | e.C = e.Q.pop(), 169 | e.k = e.B.pop(), 170 | e.f = e.g.pop(); 171 | } 172 | , 173 | e.prototype.e = function (e) { 174 | switch (this.t) { 175 | case 0: 176 | e.u = e.r[this.a] >= e.r[this.c]; 177 | break; 178 | case 1: 179 | e.u = e.r[this.a] <= e.r[this.c]; 180 | break; 181 | case 2: 182 | e.u = e.r[this.a] > e.r[this.c]; 183 | break; 184 | case 3: 185 | e.u = e.r[this.a] < e.r[this.c]; 186 | break; 187 | case 4: 188 | e.u = e.r[this.a] == e.r[this.c]; 189 | break; 190 | case 5: 191 | e.u = e.r[this.a] != e.r[this.c]; 192 | break; 193 | case 6: 194 | e.u = e.r[this.a]; 195 | break; 196 | case 7: 197 | e.u = !e.r[this.a]; 198 | } 199 | } 200 | , 201 | o.prototype.e = function (e) { 202 | switch (this.t) { 203 | case 0: 204 | e.C = this.h; 205 | break; 206 | case 1: 207 | e.u && (e.C = this.h); 208 | break; 209 | case 2: 210 | e.u || (e.C = this.h); 211 | break; 212 | case 3: 213 | e.C = this.h, 214 | e.w = null; 215 | } 216 | e.u = !1; 217 | } 218 | , 219 | r.prototype.e = function (e) { 220 | switch (this.t) { 221 | case 0: 222 | for (var t = [], n = 0; n < this.i; n++) 223 | t.unshift(e.f.pop()); 224 | e.r[3] = e.r[this.s](t[0], t[1]); 225 | break; 226 | case 1: 227 | for (var r = e.f.pop(), i = [], o = 0; o < this.i; o++) 228 | i.unshift(e.f.pop()); 229 | e.r[3] = e.r[this.s][r](i[0], i[1]); 230 | break; 231 | case 2: 232 | for (var a = [], s = 0; s < this.i; s++) 233 | a.unshift(e.f.pop()); 234 | e.r[3] = new e.r[this.s](a[0], a[1]); 235 | } 236 | } 237 | ; 238 | var k = function (e) { 239 | for (var t = 66, n = [], r = 0; r < e.length; r++) { 240 | var i = 24 ^ e.charCodeAt(r) ^ t; 241 | n.push(String.fromCharCode(i)), 242 | t = i; 243 | } 244 | return n.join(''); 245 | }; 246 | 247 | function Q(e) { 248 | this.t = (4095 & e) >> 10, 249 | this.s = (1023 & e) >> 8, 250 | this.i = 1023 & e, 251 | this.h = 63 & e; 252 | } 253 | 254 | function C(e) { 255 | this.t = (4095 & e) >> 10, 256 | this.a = (1023 & e) >> 8, 257 | this.c = (255 & e) >> 6; 258 | } 259 | 260 | function B(e) { 261 | this.s = (3072 & e) >> 10, 262 | this.h = 1023 & e; 263 | } 264 | 265 | function f(e) { 266 | this.h = 4095 & e; 267 | } 268 | 269 | function g(e) { 270 | this.s = (3072 & e) >> 10; 271 | } 272 | 273 | function u(e) { 274 | this.h = 4095 & e; 275 | } 276 | 277 | function w(e) { 278 | this.t = (3840 & e) >> 8, 279 | this.s = (192 & e) >> 6, 280 | this.i = 63 & e; 281 | } 282 | 283 | function G() { 284 | this.r = [0, 0, 0, 0], 285 | this.C = 0, 286 | this.Q = [], 287 | this.k = [], 288 | this.B = [], 289 | this.f = [], 290 | this.g = [], 291 | this.u = !1, 292 | this.G = [], 293 | this.b = [], 294 | this.o = !1, 295 | this.w = null, 296 | this.U = null, 297 | this.F = [], 298 | this.R = 0, 299 | this.J = { 300 | 0: s, 301 | 1: i, 302 | 2: h, 303 | 3: a, 304 | 4: c, 305 | 5: n, 306 | 6: e, 307 | 7: o, 308 | 8: r, 309 | 9: Q, 310 | 10: C, 311 | 11: B, 312 | 12: f, 313 | 13: g, 314 | 14: u, 315 | 15: w, 316 | }; 317 | } 318 | 319 | Q.prototype.e = function (e) { 320 | switch (this.t) { 321 | case 0: 322 | e.f.push(e.r[this.s]); 323 | break; 324 | case 1: 325 | e.f.push(this.i); 326 | break; 327 | case 2: 328 | e.f.push(e.k[this.h]); 329 | break; 330 | case 3: 331 | e.f.push(k(e.b[this.h])); 332 | } 333 | } 334 | , 335 | C.prototype.e = function (A) { 336 | switch (this.t) { 337 | case 0: 338 | var t = A.f.pop(); 339 | A.r[this.a] = A.r[this.c][t]; 340 | break; 341 | case 1: 342 | var s = A.f.pop() 343 | , i = A.f.pop(); 344 | A.r[this.c][s] = i; 345 | break; 346 | case 2: 347 | var h = A.f.pop(); 348 | A.r[this.a] = eval(h); 349 | } 350 | } 351 | , 352 | B.prototype.e = function (e) { 353 | e.r[this.s] = k(e.b[this.h]); 354 | } 355 | , 356 | f.prototype.e = function (e) { 357 | e.w = this.h; 358 | } 359 | , 360 | g.prototype.e = function (e) { 361 | throw e.r[this.s]; 362 | } 363 | , 364 | u.prototype.e = function (e) { 365 | var t = this 366 | , n = [0]; 367 | e.k.forEach(function (e) { 368 | n.push(e); 369 | }); 370 | var r = function (r) { 371 | var i = new G; 372 | return i.k = n, 373 | i.k[0] = r, 374 | i.v(e.G, t.h, e.b, e.F), 375 | i.r[3]; 376 | }; 377 | r.toString = function () { 378 | return '() { [native code] }'; 379 | } 380 | , 381 | e.r[3] = r; 382 | } 383 | , 384 | w.prototype.e = function (e) { 385 | switch (this.t) { 386 | case 0: 387 | for (var t = {}, n = 0; n < this.i; n++) { 388 | var r = e.f.pop(); 389 | t[e.f.pop()] = r; 390 | } 391 | e.r[this.s] = t; 392 | break; 393 | case 1: 394 | for (var i = [], o = 0; o < this.i; o++) 395 | i.unshift(e.f.pop()); 396 | e.r[this.s] = i; 397 | } 398 | } 399 | , 400 | G.prototype.D = function (e) { 401 | for (var t = window.atob(e), n = t.charCodeAt(0) << 8 | t.charCodeAt(1), r = [], i = 2; i < n + 2; i += 2) 402 | r.push(t.charCodeAt(i) << 8 | t.charCodeAt(i + 1)); 403 | this.G = r; 404 | for (var o = [], a = n + 2; a < t.length;) { 405 | var s = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1) 406 | , c = t.slice(a + 2, a + 2 + s); 407 | o.push(c), 408 | a += s + 2; 409 | } 410 | this.b = o; 411 | } 412 | , 413 | G.prototype.v = function (e, t, n) { 414 | for (t = t || 0, 415 | n = n || [], 416 | this.C = t, 417 | 'string' == typeof e ? this.D(e) : (this.G = e, 418 | this.b = n), 419 | this.o = !0, 420 | this.R = Date.now(); this.o;) { 421 | var r = this.G[this.C++]; 422 | if ('number' != typeof r) 423 | break; 424 | var i = Date.now(); 425 | if (500 < i - this.R) 426 | return; 427 | this.R = i; 428 | try { 429 | this.e(r); 430 | } catch (e) { 431 | this.U = e, 432 | this.w && (this.C = this.w); 433 | } 434 | } 435 | } 436 | , 437 | G.prototype.e = function (e) { 438 | var t = (61440 & e) >> 12; 439 | new this.J[t](e).e(this); 440 | } 441 | , 442 | (new G).v('AxjgB5MAnACoAJwBpAAAABAAIAKcAqgAMAq0AzRJZAZwUpwCqACQACACGAKcBKAAIAOcBagAIAQYAjAUGgKcBqFAuAc5hTSHZAZwqrAIGgA0QJEAJAAYAzAUGgOcCaFANRQ0R2QGcOKwChoANECRACQAsAuQABgDnAmgAJwMgAGcDYwFEAAzBmAGcSqwDhoANECRACQAGAKcD6AAGgKcEKFANEcYApwRoAAxB2AGcXKwEhoANECRACQAGAKcE6AAGgKcFKFANEdkBnGqsBUaADRAkQAkABgCnBagAGAGcdKwFxoANECRACQAGAKcGKAAYAZx+rAZGgA0QJEAJAAYA5waoABgBnIisBsaADRAkQAkABgCnBygABoCnB2hQDRHZAZyWrAeGgA0QJEAJAAYBJwfoAAwFGAGcoawIBoANECRACQAGAOQALAJkAAYBJwfgAlsBnK+sCEaADRAkQAkABgDkACwGpAAGAScH4AJbAZy9rAiGgA0QJEAJACwI5AAGAScH6AAkACcJKgAnCWgAJwmoACcJ4AFnA2MBRAAMw5gBnNasCgaADRAkQAkABgBEio0R5EAJAGwKSAFGACcKqAAEgM0RCQGGAYSATRFZAZzshgAtCs0QCQAGAYSAjRFZAZz1hgAtCw0QCQAEAAgB7AtIAgYAJwqoAASATRBJAkYCRIANEZkBnYqEAgaBxQBOYAoBxQEOYQ0giQKGAmQABgAnC6ABRgBGgo0UhD/MQ8zECALEAgaBxQBOYAoBxQEOYQ0gpEAJAoYARoKNFIQ/zEPkAAgChgLGgkUATmBkgAaAJwuhAUaCjdQFAg5kTSTJAsQCBoHFAE5gCgHFAQ5hDSCkQAkChgBGgo0UhD/MQ+QACAKGAsaCRQCOYGSABoAnC6EBRoKN1AUEDmRNJMkCxgFGgsUPzmPkgAaCJwvhAU0wCQFGAUaCxQGOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQMOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQSOZISPzZPkQAaCJwvhAU0wCQFGAkSAzRBJAlz/B4FUAAAAwUYIAAIBSITFQkTERwABi0GHxITAAAJLwMSGRsXHxMZAAk0Fw8HFh4NAwUABhU1EBceDwAENBcUEAAGNBkTGRcBAAFKAAkvHg4PKz4aEwIAAUsACDIVHB0QEQ4YAAsuAzs7AAoPKToKDgAHMx8SGQUvMQABSAALORoVGCQgERcCAxoACAU3ABEXAgMaAAsFGDcAERcCAxoUCgABSQAGOA8LGBsPAAYYLwsYGw8AAU4ABD8QHAUAAU8ABSkbCQ4BAAFMAAktCh8eDgMHCw8AAU0ADT4TGjQsGQMaFA0FHhkAFz4TGjQsGQMaFA0FHhk1NBkCHgUbGBEPAAFCABg9GgkjIAEmOgUHDQ8eFSU5DggJAwEcAwUAAUMAAUAAAUEADQEtFw0FBwtdWxQTGSAACBwrAxUPBR4ZAAkqGgUDAwMVEQ0ACC4DJD8eAx8RAAQ5GhUYAAFGAAAABjYRExELBAACWhgAAVoAQAg/PTw0NxcQPCQ5C3JZEBs9fkcnDRcUAXZia0Q4EhQgXHojMBY3MWVCNT0uDhMXcGQ7AUFPHigkQUwQFkhaAkEACjkTEQspNBMZPC0ABjkTEQsrLQ=='); 443 | var b = function (e) { 444 | return __g._encrypt(encodeURIComponent(e)); 445 | }; 446 | exports.ENCRYPT_VERSION = A, 447 | exports.default = b; 448 | -------------------------------------------------------------------------------- /static/encrypt_old.js: -------------------------------------------------------------------------------- 1 | function s(e) { 2 | return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function(e) { 3 | return typeof e 4 | } 5 | : function(e) { 6 | return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e 7 | } 8 | )(e) 9 | } 10 | function i() {} 11 | function h(e) { 12 | this.s = (2048 & e) >> 11, 13 | this.i = (1536 & e) >> 9, 14 | this.h = 511 & e, 15 | this.A = 511 & e 16 | } 17 | function A(e) { 18 | this.i = (3072 & e) >> 10, 19 | this.A = 1023 & e 20 | } 21 | function n(e) { 22 | this.n = (3072 & e) >> 10, 23 | this.e = (768 & e) >> 8, 24 | this.a = (192 & e) >> 6, 25 | this.s = 63 & e 26 | } 27 | function e(e) { 28 | this.i = e >> 10 & 3, 29 | this.h = 1023 & e 30 | } 31 | function a() {} 32 | function c(e) { 33 | this.n = (3072 & e) >> 10, 34 | this.e = (768 & e) >> 8, 35 | this.a = (192 & e) >> 6, 36 | this.s = 63 & e 37 | } 38 | function o(e) { 39 | this.A = (4095 & e) >> 2, 40 | this.s = 3 & e 41 | } 42 | function r(e) { 43 | this.i = e >> 10 & 3, 44 | this.h = e >> 2 & 255, 45 | this.s = 3 & e 46 | } 47 | function k(e) { 48 | this.s = (4095 & e) >> 10, 49 | this.i = (1023 & e) >> 8, 50 | this.h = 1023 & e, 51 | this.A = 63 & e 52 | } 53 | function B(e) { 54 | this.s = (4095 & e) >> 10, 55 | this.n = (1023 & e) >> 8, 56 | this.e = (255 & e) >> 6 57 | } 58 | function f(e) { 59 | this.i = (3072 & e) >> 10, 60 | this.A = 1023 & e 61 | } 62 | function u(e) { 63 | this.A = 4095 & e 64 | } 65 | function C(e) { 66 | this.i = (3072 & e) >> 10 67 | } 68 | function b(e) { 69 | this.A = 4095 & e 70 | } 71 | function g(e) { 72 | this.s = (3840 & e) >> 8, 73 | this.i = (192 & e) >> 6, 74 | this.h = 63 & e 75 | } 76 | function G() { 77 | this.c = [0, 0, 0, 0], 78 | this.o = 0, 79 | this.r = [], 80 | this.k = [], 81 | this.B = [], 82 | this.f = [], 83 | this.u = [], 84 | this.C = !1, 85 | this.b = [], 86 | this.g = [], 87 | this.G = !1, 88 | this.Q = null, 89 | this.R = null, 90 | this.w = [], 91 | this.x = 0, 92 | this.D = { 93 | 0: i, 94 | 1: h, 95 | 2: A, 96 | 3: n, 97 | 4: e, 98 | 5: a, 99 | 6: c, 100 | 7: o, 101 | 8: r, 102 | 9: k, 103 | 10: B, 104 | 11: f, 105 | 12: u, 106 | 13: C, 107 | 14: b, 108 | 15: g 109 | } 110 | } 111 | Object.defineProperty(exports, "__esModule", { 112 | value: !0 113 | }); 114 | var t = "1.1" 115 | , __g = {}; 116 | i.prototype.M = function(e) { 117 | e.G = !1 118 | } 119 | , 120 | h.prototype.M = function(e) { 121 | switch (this.s) { 122 | case 0: 123 | e.c[this.i] = this.h; 124 | break; 125 | case 1: 126 | e.c[this.i] = e.k[this.A] 127 | } 128 | } 129 | , 130 | A.prototype.M = function(e) { 131 | e.k[this.A] = e.c[this.i] 132 | } 133 | , 134 | n.prototype.M = function(e) { 135 | switch (this.s) { 136 | case 0: 137 | e.c[this.n] = e.c[this.e] + e.c[this.a]; 138 | break; 139 | case 1: 140 | e.c[this.n] = e.c[this.e] - e.c[this.a]; 141 | break; 142 | case 2: 143 | e.c[this.n] = e.c[this.e] * e.c[this.a]; 144 | break; 145 | case 3: 146 | e.c[this.n] = e.c[this.e] / e.c[this.a]; 147 | break; 148 | case 4: 149 | e.c[this.n] = e.c[this.e] % e.c[this.a]; 150 | break; 151 | case 5: 152 | e.c[this.n] = e.c[this.e] == e.c[this.a]; 153 | break; 154 | case 6: 155 | e.c[this.n] = e.c[this.e] >= e.c[this.a]; 156 | break; 157 | case 7: 158 | e.c[this.n] = e.c[this.e] || e.c[this.a]; 159 | break; 160 | case 8: 161 | e.c[this.n] = e.c[this.e] && e.c[this.a]; 162 | break; 163 | case 9: 164 | e.c[this.n] = e.c[this.e] !== e.c[this.a]; 165 | break; 166 | case 10: 167 | e.c[this.n] = s(e.c[this.e]); 168 | break; 169 | case 11: 170 | e.c[this.n] = e.c[this.e]in e.c[this.a]; 171 | break; 172 | case 12: 173 | e.c[this.n] = e.c[this.e] > e.c[this.a]; 174 | break; 175 | case 13: 176 | e.c[this.n] = -e.c[this.e]; 177 | break; 178 | case 14: 179 | e.c[this.n] = e.c[this.e] < e.c[this.a]; 180 | break; 181 | case 15: 182 | e.c[this.n] = e.c[this.e] & e.c[this.a]; 183 | break; 184 | case 16: 185 | e.c[this.n] = e.c[this.e] ^ e.c[this.a]; 186 | break; 187 | case 17: 188 | e.c[this.n] = e.c[this.e] << e.c[this.a]; 189 | break; 190 | case 18: 191 | e.c[this.n] = e.c[this.e] >>> e.c[this.a]; 192 | break; 193 | case 19: 194 | e.c[this.n] = e.c[this.e] | e.c[this.a] 195 | } 196 | } 197 | , 198 | e.prototype.M = function(e) { 199 | e.r.push(e.o), 200 | e.B.push(e.k), 201 | e.o = e.c[this.i], 202 | e.k = []; 203 | for (var t = 0; t < this.h; t++) 204 | e.k.unshift(e.f.pop()); 205 | e.u.push(e.f), 206 | e.f = [] 207 | } 208 | , 209 | a.prototype.M = function(e) { 210 | e.o = e.r.pop(), 211 | e.k = e.B.pop(), 212 | e.f = e.u.pop() 213 | } 214 | , 215 | c.prototype.M = function(e) { 216 | switch (this.s) { 217 | case 0: 218 | e.C = e.c[this.n] >= e.c[this.e]; 219 | break; 220 | case 1: 221 | e.C = e.c[this.n] <= e.c[this.e]; 222 | break; 223 | case 2: 224 | e.C = e.c[this.n] > e.c[this.e]; 225 | break; 226 | case 3: 227 | e.C = e.c[this.n] < e.c[this.e]; 228 | break; 229 | case 4: 230 | e.C = e.c[this.n] == e.c[this.e]; 231 | break; 232 | case 5: 233 | e.C = e.c[this.n] != e.c[this.e]; 234 | break; 235 | case 6: 236 | e.C = e.c[this.n]; 237 | break; 238 | case 7: 239 | e.C = !e.c[this.n] 240 | } 241 | } 242 | , 243 | o.prototype.M = function(e) { 244 | switch (this.s) { 245 | case 0: 246 | e.o = this.A; 247 | break; 248 | case 1: 249 | e.C && (e.o = this.A); 250 | break; 251 | case 2: 252 | e.C || (e.o = this.A); 253 | break; 254 | case 3: 255 | e.o = this.A, 256 | e.Q = null 257 | } 258 | e.C = !1 259 | } 260 | , 261 | r.prototype.M = function(e) { 262 | switch (this.s) { 263 | case 0: 264 | for (var t = [], n = 0; n < this.h; n++) 265 | t.unshift(e.f.pop()); 266 | e.c[3] = e.c[this.i](t[0], t[1]); 267 | break; 268 | case 1: 269 | for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++) 270 | o.unshift(e.f.pop()); 271 | e.c[3] = e.c[this.i][r](o[0], o[1]); 272 | break; 273 | case 2: 274 | for (var a = [], c = 0; c < this.h; c++) 275 | a.unshift(e.f.pop()); 276 | e.c[3] = new e.c[this.i](a[0],a[1]) 277 | } 278 | } 279 | , 280 | k.prototype.M = function(e) { 281 | switch (this.s) { 282 | case 0: 283 | e.f.push(e.c[this.i]); 284 | break; 285 | case 1: 286 | e.f.push(this.h); 287 | break; 288 | case 2: 289 | e.f.push(e.k[this.A]); 290 | break; 291 | case 3: 292 | e.f.push(e.g[this.A]) 293 | } 294 | } 295 | , 296 | B.prototype.M = function(t) { 297 | switch (this.s) { 298 | case 0: 299 | var s = t.f.pop(); 300 | t.c[this.n] = t.c[this.e][s]; 301 | break; 302 | case 1: 303 | var i = t.f.pop() 304 | , h = t.f.pop(); 305 | t.c[this.e][i] = h; 306 | break; 307 | case 2: 308 | var A = t.f.pop(); 309 | if(A === 'window') { 310 | A = { 311 | encodeURIComponent: function (url) { 312 | return encodeURIComponent(url) 313 | } 314 | } 315 | } else if (A === 'navigator') { 316 | A = { 317 | 'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' + 318 | '(KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36' 319 | } 320 | } 321 | t.c[this.n] = eval(A) 322 | } 323 | } 324 | , 325 | f.prototype.M = function(e) { 326 | e.c[this.i] = e.g[this.A] 327 | } 328 | , 329 | u.prototype.M = function(e) { 330 | e.Q = this.A 331 | } 332 | , 333 | C.prototype.M = function(e) { 334 | throw e.c[this.i] 335 | } 336 | , 337 | b.prototype.M = function(e) { 338 | var t = this 339 | , n = [0]; 340 | e.k.forEach(function(e) { 341 | n.push(e) 342 | }); 343 | var r = function(r) { 344 | var o = new G; 345 | return o.k = n, 346 | o.k[0] = r, 347 | o.J(e.b, t.A, e.g, e.w), 348 | o.c[3] 349 | }; 350 | r.toString = function() { 351 | return "() { [native code] }" 352 | } 353 | , 354 | e.c[3] = r 355 | } 356 | , 357 | g.prototype.M = function(e) { 358 | switch (this.s) { 359 | case 0: 360 | for (var t = {}, n = 0; n < this.h; n++) { 361 | var r = e.f.pop(); 362 | t[e.f.pop()] = r 363 | } 364 | e.c[this.i] = t; 365 | break; 366 | case 1: 367 | for (var o = [], i = 0; i < this.h; i++) 368 | o.unshift(e.f.pop()); 369 | e.c[this.i] = o 370 | } 371 | } 372 | , 373 | G.prototype.v = function(e) { 374 | for (var t = Buffer.from(e, 'base64').toString('binary'), n = [], r = 0; r < t.length - 1; r += 2) 375 | n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1)); 376 | this.b = n 377 | } 378 | , 379 | G.prototype.y = function(e) { 380 | for (var t = Buffer.from(e, 'base64').toString('binary'), n = 66, r = [], o = 0; o < t.length; o++) { 381 | var i = 24 ^ t.charCodeAt(o) ^ n; 382 | r.push(String.fromCharCode(i)), 383 | n = i 384 | } 385 | return r.join("") 386 | } 387 | , 388 | G.prototype.F = function(e) { 389 | var t = this; 390 | this.g = e.map(function(e) { 391 | return "string" == typeof e ? t.y(e) : e 392 | }) 393 | } 394 | , 395 | G.prototype.J = function(e, t, n) { 396 | for (t = t || 0, 397 | n = n || [], 398 | this.o = t, 399 | "string" == typeof e ? (this.F(n), 400 | this.v(e)) : (this.b = e, 401 | this.g = n), 402 | this.G = !0, 403 | this.x = Date.now(); this.G; ) { 404 | var r = this.b[this.o++]; 405 | if ("number" != typeof r) 406 | break; 407 | var o = Date.now(); 408 | if (500 < o - this.x) 409 | return; 410 | this.x = o; 411 | try { 412 | this.M(r) 413 | } catch (e) { 414 | if (this.R = e, 415 | !this.Q) 416 | throw "execption at " + this.o + ": " + e; 417 | this.o = this.Q 418 | } 419 | } 420 | } 421 | , 422 | G.prototype.M = function(e) { 423 | var t = (61440 & e) >> 12; 424 | new this.D[t](e).M(this) 425 | } 426 | , 427 | (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABgCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBnIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]); 428 | var Q = function(e) { 429 | return __g._encrypt(e) 430 | }; 431 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any 3 | """ 4 | 前景色 背景色 颜色 5 | 30 40 黑色 6 | 31 41 红色 7 | 32 42 绿色 8 | 33 43 黃色 9 | 34 44 蓝色(有问题) 10 | 35 45 紫红色 11 | 36 46 青蓝色 12 | 37 47 白色 13 | 14 | 显示方式 意义 15 | 0 终端默认设置 16 | 1 高亮显示 17 | 4 使用下划线 18 | 5 闪烁 19 | 7 反白显示 20 | 8 不可见 21 | """ 22 | colour_map = { 23 | 'black': '30', 24 | 'red': '31', 25 | 'green': '32', 26 | 'yellow': '33', 27 | 'blue': '34', 28 | 'purple': '35', 29 | 'ultramarine': '36', 30 | 'white': '37', 31 | } 32 | 33 | cmd_func_map = { 34 | 'up': 'endorse_answer', 35 | 'down': 'endorse_answer', 36 | 'neutral': 'endorse_answer', 37 | 'thank': 'thank_answer', 38 | 'unthank': 'thank_answer', 39 | 'read-cmt': 'get_comments', 40 | } 41 | 42 | 43 | def get_com_func(cmd): 44 | return cmd_func_map[cmd] 45 | 46 | 47 | def print_colour(s: Any, colour: str='green', way: int=0, **kwargs): 48 | """打印颜色""" 49 | print(f'\033[{way};{colour_map[colour]};m{s}', **kwargs) 50 | 51 | 52 | abs_dir = lambda: os.path.dirname(os.path.abspath(__file__)) 53 | 54 | 55 | class SpiderBaseclass(object): 56 | 57 | def __init__(self, client): 58 | self.client = client 59 | self.logger = self.client.logger 60 | 61 | 62 | if __name__ == '__main__': 63 | ... -------------------------------------------------------------------------------- /zhihu_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | 保存有知乎登录cookie的ClientSession 3 | """ 4 | import aiohttp 5 | import asyncio 6 | import base64 7 | import execjs 8 | import hmac 9 | import hashlib 10 | import json 11 | import re 12 | import os 13 | import sys 14 | import time 15 | # import threading 16 | from typing import Union 17 | from PIL import Image 18 | from urllib.parse import urlencode 19 | from utils import print_colour 20 | from log import get_logger 21 | from setting import COOKIE_FILE 22 | 23 | 24 | class ZhihuClient(aiohttp.ClientSession): 25 | """扩展ClientSession""" 26 | 27 | def __init__(self, user='', password='', *args, **kwargs): 28 | super().__init__(*args, **kwargs) 29 | self.user = user 30 | self.password = password 31 | headers = { 32 | 'Host': 'www.zhihu.com', 33 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' 34 | '(KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586', 35 | 'Connection': 'Keep-Alive', 36 | 'Referer': 'https://www.zhihu.com/', 37 | 'accept-encoding': 'gzip, deflate', 38 | } 39 | self._default_headers = headers 40 | self.logger = get_logger() 41 | self.cookie_file = COOKIE_FILE or '/tmp/cookies.pick' 42 | 43 | def get(self, url, **kwargs): 44 | """Perform HTTP GET request.""" 45 | return super().get(url, ssl=False, **kwargs) 46 | 47 | def post(self, url, data=None, **kwargs): 48 | """Perform HTTP POST request.""" 49 | return super().post(url, ssl=False, data=data, **kwargs) 50 | 51 | def put(self, url, data=None, **kwargs): 52 | """Perform HTTP PUT request.""" 53 | return super().put(url, ssl=False, data=data, **kwargs) 54 | 55 | async def login(self, load_cookies: bool=False) -> None: 56 | """ 57 | 登录 58 | :param load_cookies: 是否加载cookie 59 | :return: 60 | """ 61 | if load_cookies: 62 | self.cookie_jar.load(self.cookie_file) 63 | self.logger.debug(f'加载cookies从:{self.cookie_file}') 64 | is_succ = await self.check_login() 65 | if is_succ: 66 | print_colour('登录成功!', colour='green') 67 | return 68 | else: 69 | print_colour('通过缓存登录失败尝试重新登录', 'red') 70 | self.cookie_jar.clear() 71 | os.remove(self.cookie_file) 72 | 73 | login_data = { 74 | 'client_id': 'c3cef7c66a1843f8b3a9e6a1e3160e20', 75 | 'grant_type': 'password', 76 | 'source': 'com.zhihu.web', 77 | 'username': self.user, 78 | 'password': self.password, 79 | 'lang': 'en', # en 4位验证码, cn 中文验证码 80 | 'ref_source': 'other_https://www.zhihu.com/signin?next=%2F', 81 | 'utm_source': '' 82 | } 83 | xsrf = await self._get_xsrf() 84 | captcha = await self._get_captcha() 85 | timestamp = int(time.time() * 1000) 86 | login_data.update({ 87 | 'captcha': captcha, 88 | 'timestamp': timestamp, 89 | 'signature': self._get_signature(timestamp, login_data) 90 | }) 91 | headers = { 92 | 'accept-encoding': 'gzip, deflate, br', 93 | 'Host': 'www.zhihu.com', 94 | 'Referer': 'https://www.zhihu.com/', 95 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' 96 | '(KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586', 97 | 'content-type': 'application/x-www-form-urlencoded', 98 | 'x-zse-83': '3_2.0', 99 | 'x-xsrftoken': xsrf 100 | } 101 | data = self._encrypt(login_data) 102 | url = 'https://www.zhihu.com/api/v3/oauth/sign_in' 103 | async with self.post(url, data=data, headers=headers) as r: 104 | resp = await r.text() 105 | if 'error' in resp: 106 | print_colour(json.loads(resp)['error'], 'red') 107 | self.logger.debug(f"登录失败:{json.loads(resp)['error']}") 108 | sys.exit() 109 | self.logger.debug(resp) 110 | is_succ = await self.check_login() 111 | if is_succ: 112 | print_colour('登录成功!', colour='green') 113 | else: 114 | print_colour('登录失败!', colour='red') 115 | sys.exit() 116 | 117 | async def _get_captcha(self) -> str: 118 | """ 119 | 请求验证码的 API 接口,无论是否需要验证码都需要请求一次 120 | 如果需要验证码会返回图片的 base64 编码 121 | :return: 验证码的 POST 参数 122 | """ 123 | 124 | url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en' 125 | async with self.get(url) as r: 126 | resp = await r.text() 127 | show_captcha = re.search(r'true', resp) 128 | if show_captcha: 129 | async with self.put(url) as r: 130 | resp = await r.text() 131 | json_data = json.loads(resp) 132 | img_base64 = json_data['img_base64'].replace(r'\n', '') 133 | with open(f'./captcha.jpg', 'wb') as f: 134 | f.write(base64.b64decode(img_base64)) 135 | img = Image.open(f'./captcha.jpg') 136 | # if lang == 'cn': 137 | # import matplotlib.pyplot as plt 138 | # plt.imshow(img) 139 | # print('点击所有倒立的汉字,在命令行中按回车提交') 140 | # points = plt.ginput(7) 141 | # capt = json.dumps({'img_size': [200, 44], 142 | # 'input_points': [[i[0] / 2, i[1] / 2] for i in points]}) 143 | # else: 144 | # img_thread = threading.Thread(target=img.show, daemon=True) 145 | # img_thread.start() 146 | # TODO 验证码自动识别实现 147 | loop = asyncio.get_running_loop() 148 | loop.run_in_executor(None, img.show) 149 | capt = input('请输入图片里的验证码:') 150 | # 这里必须先把参数 POST 验证码接口 151 | await self.post(url, data={'input_text': capt}) 152 | return capt 153 | return '' 154 | 155 | async def check_login(self) -> bool: 156 | """ 157 | 检查登录状态,访问登录页面出现跳转则是已登录, 158 | 如登录成功保存当前 Cookies 159 | :return: bool 160 | """ 161 | url = 'https://www.zhihu.com/' 162 | async with self.get(url, allow_redirects=False) as r: 163 | if r.status == 200: 164 | self.cookie_jar.save(self.cookie_file) 165 | self.logger.debug(f'保存cookies到->{self.cookie_file}') 166 | return True 167 | else: 168 | self.logger.debug(await r.text()) 169 | self.logger.debug(r.headers) 170 | self.logger.debug(r.status) 171 | return False 172 | 173 | async def _get_xsrf(self) -> str: 174 | """ 175 | 从登录页面获取 xsrf 176 | :return: str 177 | """ 178 | async with self.get('https://www.zhihu.com/', allow_redirects=False) as r: 179 | self.logger.debug('尝试获取xsrf token') 180 | if r.cookies.get('_xsrf'): 181 | self.logger.debug(f'获取成功{r.cookies.get("_xsrf").value}') 182 | return r.cookies.get('_xsrf').value 183 | raise AssertionError('获取 xsrf 失败') 184 | 185 | def _get_signature(self, timestamp: Union[int, str], login_data: dict) -> str: 186 | """ 187 | 通过 Hmac 算法计算返回签名 188 | 实际是几个固定字符串加时间戳 189 | :param timestamp: 时间戳 190 | :return: 签名 191 | """ 192 | ha = hmac.new(b'd1b964811afb40118a12068ff74a12f4', digestmod=hashlib.sha1) 193 | grant_type = login_data['grant_type'] 194 | client_id = login_data['client_id'] 195 | source = login_data['source'] 196 | ha.update(bytes((grant_type + client_id + source + str(timestamp)), 'utf-8')) 197 | return ha.hexdigest() 198 | 199 | # @staticmethod 200 | # def _encrypt(form_data: dict) -> str: 201 | # with open(f'./static/encrypt_old.js') as f: 202 | # js = execjs.compile(f.read()) 203 | # return js.call('Q', urlencode(form_data)) 204 | 205 | @staticmethod 206 | def _encrypt(form_data: dict): 207 | with open('./static/encrypt.js') as f: 208 | js = execjs.compile(f.read()) 209 | return js.call('b', urlencode(form_data)) 210 | 211 | 212 | if __name__ == '__main__': 213 | from setting import USER, PASSWORD 214 | 215 | async def test(): 216 | client = ZhihuClient(user=USER, password=PASSWORD) 217 | await client.login(load_cookies=False) 218 | await client.close() 219 | 220 | asyncio.run(test()) 221 | --------------------------------------------------------------------------------