├── .gitattributes
├── .gitignore
├── .travis.yml
├── ChangeLog.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── activity.rst
    ├── answer.rst
    ├── author.rst
    ├── classes.rst
    ├── client.rst
    ├── collection.rst
    ├── column.rst
    ├── comment.rst
    ├── conf.py
    ├── examples.rst
    ├── index.rst
    ├── install.rst
    ├── login.rst
    ├── make.bat
    ├── me.rst
    ├── post.rst
    ├── question.rst
    ├── requirements.txt
    └── topic.rst
├── example
    ├── analyze_user.py
    └── test.json
├── setup.cfg
├── setup.py
├── test
    ├── data
    │   ├── answer.html
    │   ├── answer.md
    │   ├── answer_content.html
    │   ├── answer_upvoter.html
    │   ├── collection.html
    │   ├── column.json
    │   ├── column_post.json
    │   ├── post.md
    │   ├── question.html
    │   └── question_more_answer.html
    ├── test.json
    ├── test_activity.py
    ├── test_answer.py
    ├── test_collection.py
    ├── test_column.py
    ├── test_common.py
    ├── test_post.py
    ├── test_question.py
    ├── test_utils.py
    └── zhihu-test.py
└── zhihu
    ├── __init__.py
    ├── activity.py
    ├── acttype.py
    ├── answer.py
    ├── author.py
    ├── base.py
    ├── client.py
    ├── collection.py
    ├── column.py
    ├── comment.py
    ├── common.py
    ├── me.py
    ├── post.py
    ├── question.py
    └── topic.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 
19 | test/data/* linguist-vendored=true
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # project
  2 | test/cookies.json
  3 | test/zhihu-mytest.py
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyCharm
 30 | .idea/
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | 
 57 | # Sphinx documentation
 58 | /docs/_build/
 59 | 
 60 | # PyBuilder
 61 | target/
 62 | 
 63 | # =========================
 64 | # Operating System Files
 65 | # =========================
 66 | 
 67 | # OSX
 68 | # =========================
 69 | 
 70 | .DS_Store
 71 | .AppleDouble
 72 | .LSOverride
 73 | 
 74 | # Thumbnails
 75 | ._*
 76 | 
 77 | # Files that might appear on external disk
 78 | .Spotlight-V100
 79 | .Trashes
 80 | 
 81 | # Directories potentially created on remote AFP share
 82 | .AppleDB
 83 | .AppleDesktop
 84 | Network Trash Folder
 85 | Temporary Items
 86 | .apdisk
 87 | 
 88 | # Windows
 89 | # =========================
 90 | 
 91 | # Windows image file caches
 92 | Thumbs.db
 93 | ehthumbs.db
 94 | 
 95 | # Folder config file
 96 | Desktop.ini
 97 | 
 98 | # Recycle Bin used on file shares
 99 | $RECYCLE.BIN/
100 | 
101 | # Windows Installer files
102 | *.cab
103 | *.msi
104 | *.msm
105 | *.msp
106 | 
107 | # Windows shortcuts
108 | *.lnk
109 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "3.4"
 5 |   - "3.5"
 6 | 
 7 | before_install:
 8 |   - "pip install --upgrade pip"
 9 | 
10 | install:
11 |   - "pip install .[lxml]"
12 | 
13 | script: 
14 |   - "cd test"
15 |   - "./zhihu-test.py"
16 | 
17 | notifications:
18 |   email:
19 |     on_success: never
20 |     on_failure: always
21 | 


--------------------------------------------------------------------------------
/ChangeLog.rst:
--------------------------------------------------------------------------------
  1 | 更新日志
  2 | ========
  3 | 
  4 | 0.3.23
  5 | ------
  6 | 
  7 | - [Fix] 修复了由于知乎前端更改， `Activity` 类中获取 「提了一个问题」 类型的动态时会报错的 Bug。
  8 | 
  9 | 0.3.22
 10 | ------
 11 | 
 12 | - [Add] 增加了 `Author.followers_skip()` 和 `Author.followees.skip()` 函数，可以在获取用户关注者时跳过前 n 个用户。
 13 | 
 14 | 0.3.21
 15 | ------
 16 | 
 17 | - [Add] 增加了一个 `BanException` 异常，在尝试获取被反作弊系统限制的用户资料时将会引发此异常，需要用户自行处理。
 18 | 
 19 | 0.3.20
 20 | ------
 21 | 
 22 | - [fix] 修复获取用户粉丝时，因为新的徽章的加入造成的解析 Bug。
 23 | 
 24 | 0.3.19
 25 | ------
 26 | 
 27 | - [Fix] 现在允许通过 `https://www.zhihu.com/org/abcde` 这种 URL 获取机构号对象。
 28 | - [Add] `ZhihuClient.login_in_termianl` 和 `ZhihuClient.create_cookies` 增加了参数 `use_getpass` 来设置是否使用安全模式输入密码，以解决在某些 Windows 上的 IDE 中运行时无法输入密码的问题。。
 29 | 
 30 | 0.3.18
 31 | ------
 32 | 
 33 | - [fix] 修复了许多由知乎前端改变造成的小 bug，暂时又可用了。
 34 | 
 35 | 0.3.17
 36 | ------
 37 | 
 38 | - [fix] 修复答案翻页 pagesize 改变造成的无法获取所有答案的问题
 39 | - [fix] 修复了一大都因为前端 data-votecount 属性被删除造成的问题……
 40 | - [update] 改了一点测试代码……
 41 | 
 42 | 0.3.16
 43 | ------
 44 | 
 45 | - [fix] 修复因知乎登录又需要验证码且验证码逻辑小幅度修改造成的无法登录的问题
 46 | 
 47 | 0.3.15
 48 | ------
 49 | 
 50 | - [change] 现在 question.topics 返回 Topic 对象的迭代器，而不是话题名字的列表
 51 | 
 52 | 0.3.14
 53 | ------
 54 | 
 55 | - [fix] 修复 Author.columns 因为知乎专栏大幅度改版而无法获取到专栏的问题
 56 | - [fix] 修复 Post.column 因为知乎现在允许发布无专栏文章而出错的 bug，对于无专栏的 Post 现在 column 属性返回 None
 57 | - [fix] 修复 Post.slug 因为知乎文章网址变更而无法获取的问题
 58 | - [fix] 修复 Post.column_in_name 因为知乎现在允许发布无专栏文章而出错的 bug，对于无专栏的 Post 现在 column 属性返回 None
 59 | - [fix] 修复因为上述 Bugs 造成的 Author.activities 出错的问题
 60 | - [add] 现在 CollectActType 可以直接从模块顶级 import， 即 from zhihu import CollectActType
 61 | - [fix] 修复 Topic.follower.motto 获取不正确的问题
 62 | 
 63 | 0.3.13
 64 | ------
 65 | 
 66 | - [fix] 修复因知乎将每次点击问题页面「更多」按钮只加载 50 个回答改为 20 个造成的无法获取所有问题的 bug
 67 | - [fix] 修复获取 post 后，直接调用 post.save 会出错的 bug
 68 | - [change] 在终端登录时输入的密码改为不可见
 69 | - [change] 貌似知乎登录不怎么需要验证码了，现在作为一个可选项，login_in_terminal 和 create_cookies 默认均不要求验证码
 70 | - [fix] 修复因知乎专栏改版，API 地址变更 造成的 Post 类无法使用的问题
 71 | - [add] 增加 client.add_proxy_pool 方法设置代理池
 72 | - [add] 增加 client.remove_proxy_pool 方法移除代理池
 73 | 
 74 | 0.3.12
 75 | ------
 76 | 
 77 | - [rollback] 知乎又把所有问题的接口改回来了……妈的智障
 78 | - [add] 增加了获取话题下等待回答的问题的功能：Topic.unanswered_questions
 79 | 
 80 | 0.3.11
 81 | ------
 82 | 
 83 | - [add] Avoid redirection
 84 | - [fix] 知乎改了 Topic 的所有问题功能，变成等待回答的功能了
 85 | 
 86 | 0.3.10-1
 87 | --------
 88 | 
 89 | - [fix] 我真是傻了，上传之前忘记删除debug语句了。。。
 90 | 
 91 | 0.3.10
 92 | ------
 93 | - [add] 添加 answer.latest_comments 属性
 94 | - [fix] 获取头像失败
 95 | - [add] zhihu.ANONYMOUS 表示匿名用户
 96 | - [fix] answer.deleted 属性错误
 97 | - [fix] 解决一些诡异的用户带来的问题
 98 | - [add] post 可保存为 html 格式
 99 | - [fix] 修复 Author 的 location education 等属性无法获取的 bug
100 | 
101 | 0.3.9-1
102 | -------
103 | 
104 | - [fix] 修复了由于 img 的 title 属性修改为 alt 属性造成的 Author.followed_topic 获取前几个话题出错的 bug
105 | 
106 | 0.3.9
107 | -----
108 | 
109 | - [add] Question 和 Answer 添加 deleted 属性
110 | - [fix] 修复了问题没有回答时 Question.answers 出错的问题
111 | - [fix] 修复了回答仅有一页时无法获取按时间排序的答案的问题
112 | - [fix] 修复无法刷新 answer_num 的问题
113 | - [fix] 修复收藏为0时获取收藏数出错的问题
114 | - [fix] 知乎修改了评论的前端代码
115 | - [change] Comment 类现在也提供 datetime.datetime 类型的 creation_time 属性, 去掉 time_string
116 | - [fix] 修复了 topic.question 由于时间戳乘以了 1000 而造成的错误
117 | - [fix] 修复了 topic.top_answer 无法获取到内容的 bug
118 | - [fix] 修复了 topic.hot_answer 无法获取到内容的 bug
119 | 
120 | 0.3.8
121 | -----
122 | 
123 | - [add] Answer 和 Question 增加 refresh() 方法, 刷新问题答案 object 的属性
124 | - [add] Question 初始化的 url 现在支持 ?sort=created
125 | - [add] 使用带 ?sort=created 的 url 初始化问题时, question.answers 按照时间顺序返回答案
126 | - [add] 添加了 Answer.comment_num 属性, 获取评论数量
127 | - [add] 添加了 Collection.id 属性
128 | - [fix] 现在 Activity.type 变成 read-only property 并加入文档了
129 | 
130 | 全都 Thanks `@laike9m <https://github.com/laike9m>`__
131 | 
132 | 0.3.7
133 | -----
134 | 
135 | - [fix] 修复了用户动态中有关注圆桌行为时会崩溃的 Bug（目前暂时跳过这类动态）。
136 | - [fix] 知乎删除了深网话题，正好Topic类是用的那个话题测试，我还以为代码bug了……现在改成测试「程序员」话题。
137 | - [add] 曾加了获取专栏文章点赞者的功能。
138 | 
139 | 
140 | 0.3.6
141 | -----
142 | 
143 | - [fix] 修试图获取登录用户自身 location, business 等属性但自己又未填写时出现的 bug
144 | - [fix] 修复 topic.py 中混合使用 return sth 和 yield sth 导致的旧版本 python 报语法错误的问题
145 | - [add/fix] ActType 中添加了关注收藏夹 (Thanks `@cssmlulu <https://github.com/cssmlulu>`__)
146 | - [fix] 修复了 Author.activities 项 answer 的 author 属性不正确的 bug (Thanks `@cssmlulu <https://github.com/cssmlulu>`__)
147 | 
148 | 0.3.5
149 | -----
150 | 
151 | - [add] 添加 Answer.collect_num 属性, 获取答案的收藏数
152 | - [add] 添加 Answer.collections 接口, 获取收藏了该答案的收藏夹
153 | - [add] 添加 Collections.logs 接口, 获取收藏夹日志
154 | - [add] 添加 Question.author 属性，获取提问者
155 | - [fix] 修复文档代码的一些错误
156 | 
157 | 前四个功能Thanks `@laike9m <https://github.com/laike9m>`__
158 | 
159 | 0.3.4
160 | -----
161 | 
162 | - [f**K] 随便在知乎上发了个小专栏……不小心就进撕逼大战了 QAQ 我好方～
163 | - [add] 增加了 example 文件夹，里面放一些实例
164 | - [add] Add answer creation_time attribute(Thanks @laike9m)
165 | - [add] 添加 Question.creation_time 和 Question.last_edit_time 属性(Thanks @laike9m)
166 | - [fix] 修复了 UPVOTE_ANSWER 型的 Activity 的 act.answer.author 全都是匿名用户的 bug（不知道是不是前端改了）
167 | 
168 | 0.3.3
169 | -----
170 | 
171 | - [fix] 紧急更新，知乎页面上的链接大多数都变成了 https, 暂时只简单的改了一点正则表达式已作为紧急应对，有 bug 请开 issue。
172 | 
173 | 0.3.2
174 | -----
175 | 
176 | - [change] 改变 Author 类获取 Activities 的机制，判断类型更准确(Thanks `@laike9m <https://github.com/laike9m>`__)。
177 | - [change] 为方便以后写测试，类架构修改为均继承 BaseZhihu 类（Thanks `@littlezz <https://github.com/littlezz>`__)。
178 | 
179 | 0.3.1
180 | -----
181 | 
182 | - [fix] 修复因为知乎 Answer 的 css class 更改导致的 Answer 类 content 属性获取不正确的 bug
183 | - [fix] 修复历史遗留代码造成使用 profile card 获取头像时，网址不正确的 bug（Thanks `@bdqy <https://github.com/bdqy>`__）
184 | - [fix] 修复因答案被和谐造成的 bug（Thanks `@littlezz <https://github.com/littlezz>`__）
185 | - [add] 获取用户的一些详细信息，包括微博，所在地，教育情况，所在行业等等(Thanks `@zeroxfio <https://github.com/zeroxfio>`__）
186 | - [add] Answer 类增加了获取答案的评论的功能(Thanks `@zeroxfio <https://github.com/zeroxfio>`__）
187 | - [add] Me 类增加了发送私信和评论的功能(Thanks `@zeroxfio <https://github.com/zeroxfio>`__）
188 | - [add] Me 类增加了给答案点没有帮助的功能(Thanks `@lishubing <https://github.com/lishubing>`__)
189 | - [add] Me 类增加了屏蔽用户，屏蔽话题的功能(Thanks `@lishubing <https://github.com/lishubing>`__)
190 | 
191 | 0.3.0
192 | -----
193 | 
194 | - [fix] 修复 Author 类的 get_followed_columns 接口获取到的 Column 对象调用 followed_num 函数可能获取不到正确数量的 bug
195 | - [fix] 修复 Author 类的 get_followed_columns 接口获取到的 Column 对象处于未登录状态的 bug
196 | - [add] Author 类增加获取用户关注的话题数的接口（followed_topic_num）
197 | - [add] Author 类增加获取用户关注的话题的接口 （followed_topics）
198 | 
199 | 0.2.9
200 | -----
201 | 
202 | - [fix] 修复因问题描述和答案使用相同的 class 造成的答案内容与序号不同的 bug。
203 | - [tucao] 一天修三四个bug好累……我估计得找时间抓一下知乎的移动端 API 了，前端天天变这谁受得了。
204 | 
205 | 0.2.8
206 | -----
207 | 
208 | - [fix] 上次的 bug 修复的不完全，匿名用户的情况没有考虑周全，紧急修复下……（可能还有地方没修复，请关注更新。
209 | 
210 | 0.2.7
211 | -----
212 | 
213 | - [fix] 修复由于把用户 tag 从 h3 改成了 div 造成的一系列 bug (Thanks `@lishubing <https://github.com/lishubing>`__)
214 | 
215 | 0.2.6
216 | -----
217 | 
218 | - [fix] 获取匿名用户的ID出错的问题，暂定为返回空字符串
219 | - [add] 增加获取用户关注专栏数的功能 (Thanks `@cssmlulu <https://github.com/cssmlulu>`__)
220 | - [add] 增加获取用户关注专栏的功能 (Thanks `@cssmlulu <https://github.com/cssmlulu>`__)
221 | 
222 | 0.2.5
223 | -----
224 | 
225 | - [fix] 修复了某些问题无法获取答案的bug
226 | - [fix] 知乎又把头像链接改回去了。。。
227 | 
228 | 0.2.4
229 | -----
230 | 
231 | - [fix] 知乎修改了图片链接的格式，影响了答案图片，头像。
232 | 
233 | 0.2.3
234 | -----
235 | 
236 | - [fix] Topic.hot_question 的顺序 Bug
237 | - [fix] 知乎登录逻辑修改（？）
238 | - [add] Topic 所有答案接口
239 | - [add] Topic 热门答案接口
240 | 
241 | 0.2.2
242 | -----
243 | 
244 | 代码美化，尽量满足 PEP8.
245 | 
246 | 0.2.1
247 | -----
248 | 
249 | 增加 Topic 类的最近动态（热门排序）
250 | 修复 Topic.children 的bug
251 | 
252 | 0.2.0
253 | -----
254 | 
255 | 增加Me类及其相关操作
256 | 
257 | -  [x] 点赞，取消点赞，反对，取消反对某回答
258 | -  [x] 点赞，取消点赞，反对，取消反对某文章
259 | -  [x] 感谢，取消感谢某回答
260 | -  [x] 关注，取消关注某用户
261 | -  [x] 关注，取消关注某问题
262 | -  [x] 关注，取消关注某话题
263 | -  [x] 关注，取消关注收藏夹
264 | 
265 | 增加Topic类相关操作：
266 | 
267 | -  [x] 获取话题名称
268 | -  [x] 获取话题描述
269 | -  [x] 获取话题图标
270 | -  [x] 获取关注者数量
271 | -  [x] 获取关注者
272 | -  [x] 获取父话题
273 | -  [x] 获取子话题
274 | -  [x] 获取优秀答主
275 | -  [ ] 获取最近动态（暂缓）
276 | -  [x] 获取精华回答
277 | -  [x] 获取所有问题
278 | 
279 | 0.1.5
280 | -----
281 | 
282 | - 增加了获取收藏夹关注者的功能
283 | - 增加了获取问题关注者的功能
284 | - Column的一个小Bug修复
285 | 
286 | 0.1.4
287 | -----
288 | 
289 | 知乎登录参数变化，从rememberme变成了remember_me，做了跟进。
290 | 
291 | 2015.07.30
292 | ----------
293 | 
294 | 发布到Pypi.
295 | 
296 | 2015.07.29
297 | ----------
298 | 
299 | -  重构项目结构
300 | -  增加 zhihu.Client 类，改善原先模块需要使用当前目录下 cookies 的弊端，现在的使用方法请看 Readme 中的示例。
301 | -  去掉了 _text2int 方法，因为发现知乎以K结尾的赞同数也有办法获取到准确点赞数。
302 | 
303 | 2015.07.26
304 | ----------
305 | 
306 | 重构项目结构，转变为标准 Python 模块结构。
307 | 
308 | 2015.07.26
309 | ----------
310 | 
311 | 添加 Author.photo_url 接口，用于获取用户头像。
312 | 
313 | 本属性的实现较为分散，在不同的地方使用了不同的方法：
314 | 
315 | -  Author.follower(e)s, Answer.upvoters 等属性返回的 Author 自带 photo_url
316 | 
317 | -  用户自定义的 Author 在访问过主页的情况下通过解析主页得到
318 | 
319 | -  用户自定义的 Author 在未访问主页的情况下为了性能使用了知乎的 CardProfile
320 |    API
321 | 
322 | 因为实现混乱所以容易有Bug，欢迎反馈。
323 | 
324 | 2015.07.25
325 | ----------
326 | 
327 | 增加了获取用户关注者和粉丝的功能
328 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
329 | 
330 | Author.followers, Author.folowees 返回Author迭代器，自带url, name, motto, question\_num, answer\_num, upvote\_num, follower\_num属性。
331 | 
332 | html解析器优选
333 | ~~~~~~~~~~~~~~
334 | 
335 | 在安装了 lxml 的情况下默认使用 lxml 作为解析器，否则使用 html.parser。
336 | 
337 | 增加答案获取点赞用户功能
338 | ~~~~~~~~~~~~~~~~~~~~~~~~
339 | 
340 | Author.upvoters 返回 Author 迭代器，自带url, name, motto, question\_num, answer\_num, upvote\_num, thank\_num属性
341 | 
342 | 增加简易判断是否为「三零用户」功能
343 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
344 | 
345 | Author.is_zero_user() ，判断标准为，赞同，感谢，提问数，回答数均为 0。
346 | 
347 | 2015.07.23
348 | ----------
349 | 
350 | 各个类url属性更改为公开
351 | ~~~~~~~~~~~~~~~~~~~~~~~
352 | 
353 | 暂时这样吧，有点懒了，因为这样会让使用者有机会非法修改 url，可能导致 Bug，以后勤快的话会改成 read-only。
354 | 
355 | 类名变更
356 | ~~~~~~~~
357 | 
358 | 专栏类从 Book 更名为 Cloumn
359 | 
360 | 文章类从 Article 更名为 Post
361 | 
362 | 以上两个更名同时影响了其他类的属性名，如 Author.books 变更为 Author.columns，其他类同理。
363 | 
364 | 接口名变更
365 | ~~~~~~~~~~
366 | 
367 | 1. 统一了一下复数的使用。比如 Author.answers_num 变为 Author.answer_num, Author.collections\_num 变为 Author.collection\_num。
368 | 也就是说某某数量的接口名为 Class.foo_num，foo使用单数形式。
369 | 
370 | 2. 知乎的赞同使用单词 upvote，以前叫 agree 的地方现在都叫 upvote。比如 Author.agree_num 变为 Author.upvote_num，Post.agree_num 变为 Post.upvote_num。
371 | 
372 | 3. Answer 类的 upvote 属性更名为 upvote_num。
373 | 
374 | 提供\ ``Topic``\ 类
375 | ~~~~~~~~~~~~~~~~~~~
376 | 
377 | 目前只有获取话题名的功能。
378 | 
379 | 提供\ ``Author.activities``
380 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
381 | 
382 | 属性获取用户动态，返回 Activity 类生成器。
383 | 
384 | Activity 类提供 type 属性用于判断动态类型，type 为 ActType 类定义的常量，根据 type 的不同提供不同的属性，如下表：
385 | 
386 | +----------------+--------------------+--------------+
387 | | 类型           | 常量               | 提供的成员   |
388 | +================+====================+==============+
389 | | 关注了问题     | FOLLOW\_QUESTION   | question     |
390 | +----------------+--------------------+--------------+
391 | | 赞同了回答     | UPVOTE\_ANSWER     | answer       |
392 | +----------------+--------------------+--------------+
393 | | 关注了专栏     | FOLLOW\_COLUMN     | column       |
394 | +----------------+--------------------+--------------+
395 | | 回答了问题     | ANSWER\_QUESTION   | answer       |
396 | +----------------+--------------------+--------------+
397 | | 赞同了文章     | UPVOTE\_POST       | post         |
398 | +----------------+--------------------+--------------+
399 | | 发布了文章     | PUBLISH\_POST      | post         |
400 | +----------------+--------------------+--------------+
401 | | 关注了话题     | FOLLOW\_TOPIC      | topic        |
402 | +----------------+--------------------+--------------+
403 | | 提了一个问题   | ASK\_QUESTION      | question     |
404 | +----------------+--------------------+--------------+
405 | 
406 | 由于每种类型都只提供了一种属性，所以所有Activity对象都有 content 属性，用于直接获取唯一的属性。
407 | 
408 | 示例代码见 zhihu-test.py 的 test_author 函数。
409 | 
410 | activities 属性可以在未登录（未生成cookies）的情况下使用，但是根据知乎的隐私保护政策，开启了隐私保护的用户的回答和文章，此时作者信息会是匿名用户，所以还是建议登录后使用。
411 | 
412 | 2015.07.22
413 | ----------
414 | 
415 | 尝试修复了最新版bs4导致的问题，虽然我没明白问题在哪QuQ，求测试。
416 | 
417 | -   Windows 已测试 (`@7sDream <https://github.com/7sDream>`__)
418 | -   Linux
419 | 
420 |     -   Ubuntu 已测试(`@7sDream <https://github.com/7sDream>`__)
421 | 
422 | -   Mac 已测试(`@SimplyY <https://github.com/SimplyY>`__)
423 | 
424 | 2015.07.16
425 | ----------
426 | 
427 | 重构 Answer 和 Article 的 url 属性为 public.
428 | 
429 | 2015.07.11:
430 | -----------
431 | 
432 | Hotfix， 知乎更换了登录网址，做了简单的跟进，过了Test，等待Bug汇报中。
433 | 
434 | 2015.06.04：
435 | ------------
436 | 
437 | 由 `@Gracker <https://github.com/Gracker>`__ 补充了在 Ubuntu 14.04
438 | 下的测试结果，并添加了补充说明。
439 | 
440 | 2015.05.29：
441 | ------------
442 | 
443 | 修复了当问题关注人数为0时、问题答案数为0时的崩溃问题。（感谢：`@段晓晨 <http://www.zhihu.com/people/loveQt>`__）
444 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015-2016 7sDream
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst LICENSE
2 | include test/test*.py
3 | include test/zhihu-test.py
4 | include docs/*
5 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | zhihu-py3 : 知乎非官方API库 with Python3
  2 | ========================================
  3 | 
  4 | |Author| |Build| |DocumentationStatus| |PypiVersion| |License| |PypiDownloadStatus|
  5 | 
  6 | 通知
  7 | ----
  8 | 
  9 | 由于知乎前端老是改阿改的，每次我都要更新弄的我好烦的说……
 10 | 
 11 | 所以我开发了一个新的项目\ `Zhihu-OAuth <https://github.com/7sDream/zhihu-oauth>`__。
 12 | 
 13 | 这个新项目用了一些黑科技手段，反正应该是更加稳定和快速了！**而且还支持 Python 2 哟！**
 14 | 稳定我倒是没测，但是这里有一个
 15 | `速度对比 <https://github.com/7sDream/zhihu-oauth/blob/master/compare.md>`__。
 16 | 
 17 | 如果你是准备新开一个项目的话，我强烈建议你看看我的新项目~
 18 | 
 19 | 如果你已经用 Zhihu-py3 写了一些代码的话，我最近会写一个从 Zhihu-py3 转到 Zhihu-OAuth
 20 | 的简易指南，你也可以关注一下哟。
 21 | 
 22 | 毕竟嘛，有更好的方案的话，为什么不试试呢？
 23 | 
 24 | 功能
 25 | ----
 26 | 
 27 | 由于知乎没有公开API，加上受到\ `zhihu-python <https://github.com/egrcc/zhihu-python>`__\ 项目的启发，在Python3下重新写了一个知乎的数据解析模块。
 28 | 
 29 | 提供的功能一句话概括为，用户提供知乎的网址构用于建对应类的对象，可以获取到某些需要的数据。
 30 | 
 31 | 简单例子：
 32 | 
 33 | ..  code:: python
 34 | 
 35 |     from zhihu import ZhihuClient
 36 | 
 37 |     Cookies_File = 'cookies.json'
 38 | 
 39 |     client = ZhihuClient(Cookies_File)
 40 | 
 41 |     url = 'http://www.zhihu.com/question/24825703'
 42 |     question = client.question(url)
 43 | 
 44 |     print(question.title)
 45 |     print(question.answer_num)
 46 |     print(question.follower_num)
 47 |     print(question.topics)
 48 | 
 49 |     for answer in question.answers:
 50 |         print(answer.author.name, answer.upvote_num)
 51 | 
 52 | 这段代码的输出为：
 53 | 
 54 | ::
 55 | 
 56 |     关系亲密的人之间要说「谢谢」吗？
 57 |     627
 58 |     4322
 59 |     ['心理学', '恋爱', '社会', '礼仪', '亲密关系']
 60 |     龙晓航 50
 61 |     小不点儿 198
 62 |     芝士就是力量 89
 63 |     欧阳忆希 425
 64 |     ...
 65 | 
 66 | 另外还有\ ``Author（用户）``\ 、\ ``Answer（答案）``\ 、\ ``Collection（收藏夹）``\ 、\ ``Column（专栏）``\ 、\ ``Post（文章）``\ 、\ ``Topic（话题）``\ 等类可以使用，\ ``Answer``,\ ``Post``\ 类提供了\ ``save``\ 方法能将答案或文章保存为HTML或Markdown格式，具体请看文档，或者\ ``zhihu-test.py``\ 。
 67 | 
 68 | 安装
 69 | ----
 70 | 
 71 | ..  class:: bold
 72 | 
 73 |    本项目依赖于\ `requests <https://pypi.python.org/pypi/requests/2.7.0>`__\ 、\ `BeautifulSoup4 <http://www.crummy.com/software/BeautifulSoup>`__\ 、\ `html2text <https://github.com/aaronsw/html2text>`__
 74 | 
 75 | 已将项目发布到pypi，请使用下列命令安装
 76 | 
 77 | ..  code:: bash
 78 | 
 79 |     (sudo) pip(3) install (--upgrade) zhihu-py3
 80 | 
 81 | 希望开启lxml的话请使用：
 82 | 
 83 | ..  code:: bash
 84 | 
 85 |     (sudo) pip(3) install (--upgrade) zhihu-py3[lxml]
 86 | 
 87 | 
 88 | 因为lxml解析html效率高而且容错率强，在知乎使用\ ``<br>``\ 时，自带的html.parser会将其转换成\ ``<br>...</br>``\ ，而lxml则转换为\ ``<br/>``\ ，更为标准且美观，所以推荐使用第二个命令。
 89 | 
 90 | 不安装lxml也能使用本模块，此时会自动使用html.parser作为解析器。
 91 | 
 92 | PS 若在安装lxml时出错，请安装libxml和libxslt后重试：
 93 | 
 94 | ..  code:: bash
 95 | 
 96 |     sudo apt-get install libxml2 libxml2-dev libxslt1.1 libxslt1-dev
 97 | 
 98 | 准备工作
 99 | --------
100 | 
101 | 第一次使用推荐运行以下代码生成 cookies 文件：
102 | 
103 | ..  code:: python
104 | 
105 |     from zhihu import ZhihuClient
106 | 
107 |     ZhihuClient().create_cookies('cookies.json')
108 | 
109 | 运行结果
110 | 
111 | ::
112 | 
113 |     ====== zhihu login =====
114 |     email: <your-email>
115 |     password: <your-password>
116 |     please check captcha.gif for captcha
117 |     captcha: <captcha-code>
118 |     ====== logging.... =====
119 |     login successfully
120 |     cookies file created.
121 | 
122 | 运行成功后会在目录下生成\ ``cookies.json``\ 文件。
123 | 
124 | 以下示例皆以登录成功为前提。
125 | 
126 | 建议在正式使用之前运行\ ``zhihu-test.py``\ 测试一下。
127 | 
128 | 用法实例
129 | --------
130 | 
131 | 为了精简 Readme，本部分移动至文档内。
132 | 
133 | 请看文档的「用法示例」部分。
134 | 
135 | 登录方法综述
136 | ---------------------------------------------
137 | 
138 | 为了精简 Readme，本部分移动至文档内。
139 | 
140 | 请看文档的「登录方法综述」部分。
141 | 
142 | 文档
143 | ----
144 | 
145 | 终于搞定了文档这个磨人的小妖精，可惜 Sphinx 还是不会用 T^T
146 | 先随意弄成这样吧：
147 | 
148 | `Master版文档 <http://zhihu-py3.readthedocs.org/zh_CN/latest>`__
149 | 
150 | `Dev版文档 <http://zhihu-py3.readthedocs.org/zh_CN/dev>`__
151 | 
152 | 其他
153 | ----
154 | 
155 | **有问题请开Issue，几个小时后无回应可加最后面的QQ群询问。**
156 | 
157 | 友链：
158 | 
159 | -  \ `zhihurss <https://github.com/SimplyY/zhihu-rss>`__\ ：一个基于 zhihu-py3 做的跨平台知乎 rss(any user) 的客户端。
160 | 
161 | 
162 | TODO List
163 | ---------
164 | 
165 | - [x] 增加获取用户关注者，用户追随者
166 | - [x] 增加获取答案点赞用户功能
167 | - [x] 获取用户头像地址
168 | - [x] 打包为标准Python模块
169 | - [x] 重构代码，增加\ ``ZhihuClient``\ 类，使类可以自定义cookies文件
170 | - [x] 收藏夹关注者，问题关注者等等
171 | - [x] ``ZhihuClient``\ 增加各种用户操作（比如给某答案点赞）
172 | - [ ] Unittest （因为知乎可能会变，所以这个有点难
173 | - [x] 增加获取用户关注专栏数和关注专栏的功能
174 | - [x] 增加获取用户关注话题数和关注话题的功能
175 | - [x] 评论类也要慢慢提上议程了吧
176 | 
177 | 联系我
178 | ------
179 | 
180 | Github：\ `@7sDream <https://github.com/7sDream>`__
181 | 
182 | 知乎：\ `@7sDream <http://www.zhihu.com/people/7sdream>`__
183 | 
184 | 新浪微博：\ `@Dilover <http://weibo.com/didilover>`__
185 | 
186 | 邮箱：\ `给我发邮件 <mailto:xixihaha.xiha@qq.com>`__
187 | 
188 | 编程交流群：478786205
189 | 
190 | .. |Author| image:: https://img.shields.io/badge/Author-7sDream-blue.svg
191 |    :target: https://github.com/7sDream
192 | .. |DocumentationStatus| image:: https://readthedocs.org/projects/zhihu-py3/badge/?version=latest
193 |    :target: https://readthedocs.org/projects/zhihu-py3/?badge=latest
194 | .. |PypiVersion| image:: https://img.shields.io/pypi/v/zhihu-py3.svg
195 |    :target: https://pypi.python.org/pypi/zhihu-py3
196 | .. |PypiDownloadStatus| image:: https://img.shields.io/pypi/dd/zhihu-py3.svg
197 |    :target: https://pypi.python.org/pypi/zhihu-py3
198 | .. |License| image:: https://img.shields.io/pypi/l/zhihu-py3.svg
199 |    :target: https://github.com/7sDream/zhihu-py3/blob/master/LICENSE
200 | .. |Build| image:: https://travis-ci.org/7sDream/zhihu-py3.svg?branch=dev
201 |    :target: https://travis-ci.org/7sDream/zhihu-py3
202 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/zhihu-py3.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zhihu-py3.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/zhihu-py3"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/zhihu-py3"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/activity.rst:
--------------------------------------------------------------------------------
 1 | Activity and ActType 用户动态类
 2 | ===============================
 3 | 
 4 | ..  autoclass:: zhihu.activity.Activity
 5 |     :members:
 6 |     :special-members: __init__
 7 | 
 8 | ..  autoclass:: zhihu.acttype.ActType
 9 |     :members:
10 |     :special-members: __init__
11 | 


--------------------------------------------------------------------------------
/docs/answer.rst:
--------------------------------------------------------------------------------
1 | Answer 答案类
2 | =============
3 | 
4 | ..  autoclass:: zhihu.answer.Answer
5 |     :members:
6 |     :special-members: __init__
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/author.rst:
--------------------------------------------------------------------------------
 1 | Author 用户类
 2 | =============
 3 | 
 4 | ..  autoclass:: zhihu.author.Author
 5 |     :members:
 6 |     :special-members: __init__
 7 | 
 8 | .. autodata:: zhihu.author.ANONYMOUS
 9 |    :annotation:
10 | 


--------------------------------------------------------------------------------
/docs/classes.rst:
--------------------------------------------------------------------------------
 1 | 知乎相关类文档
 2 | ==============
 3 | 
 4 | ..  toctree::
 5 | 
 6 |     client
 7 |     activity
 8 |     answer
 9 |     author
10 |     collection
11 |     column
12 |     comment
13 |     me
14 |     post
15 |     question
16 |     topic
17 | 


--------------------------------------------------------------------------------
/docs/client.rst:
--------------------------------------------------------------------------------
1 | ZhihuClient 知乎客户端类
2 | ========================
3 | 
4 | ..  autoclass:: zhihu.client.ZhihuClient
5 |     :members:
6 |     :special-members: __init__, __getattr__
7 | 


--------------------------------------------------------------------------------
/docs/collection.rst:
--------------------------------------------------------------------------------
 1 | Collection 收藏夹类
 2 | ===================
 3 | 
 4 | ..  autoclass:: zhihu.collection.Collection
 5 |     :members:
 6 |     :special-members: __init__
 7 | 
 8 | ..  autoclass:: zhihu.collection.CollectActivity
 9 |     :members:
10 |     :special-members: __init__
11 | 
12 | ..  autoclass:: zhihu.acttype.CollectActType
13 |     :members:
14 | 


--------------------------------------------------------------------------------
/docs/column.rst:
--------------------------------------------------------------------------------
1 | Columu 专栏类
2 | =============
3 | 
4 | ..  autoclass:: zhihu.column.Column
5 |     :members:
6 |     :special-members: __init__
7 | 


--------------------------------------------------------------------------------
/docs/comment.rst:
--------------------------------------------------------------------------------
1 | Comment 评论类
2 | ===============
3 | 
4 | ..  autoclass:: zhihu.comment.Comment
5 |     :members:
6 |     :special-members: __init__
7 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # zhihu-py3 documentation build configuration file, created by
  5 | # sphinx-quickstart on Sun Feb 22 23:01:19 2015.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | sys.path.insert(0, os.path.abspath('..'))
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.viewcode',
 35 | ]
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix of source filenames.
 41 | source_suffix = '.rst'
 42 | 
 43 | # The encoding of source files.
 44 | #source_encoding = 'utf-8-sig'
 45 | 
 46 | # The master toctree document.
 47 | master_doc = 'index'
 48 | 
 49 | # General information about the project.
 50 | project = 'zhihu-py3'
 51 | copyright = '2015, 7sDream'
 52 | 
 53 | # The version info for the project you're documenting, acts as replacement for
 54 | # |version| and |release|, also used in various other places throughout the
 55 | # built documents.
 56 | #
 57 | # The short X.Y version.
 58 | version = '0.1'
 59 | # The full version, including alpha/beta/rc tags.
 60 | release = '0.1'
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | #language = None
 65 | 
 66 | # There are two options for replacing |today|: either, you set today to some
 67 | # non-false value, then it is used:
 68 | #today = ''
 69 | # Else, today_fmt is used as the format for a strftime call.
 70 | #today_fmt = '%B %d, %Y'
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | exclude_patterns = ['_build']
 75 | 
 76 | # The reST default role (used for this markup: `text`) to use for all
 77 | # documents.
 78 | #default_role = None
 79 | 
 80 | # If true, '()' will be appended to :func: etc. cross-reference text.
 81 | #add_function_parentheses = True
 82 | 
 83 | # If true, the current module name will be prepended to all description
 84 | # unit titles (such as .. function::).
 85 | #add_module_names = True
 86 | 
 87 | # If true, sectionauthor and moduleauthor directives will be shown in the
 88 | # output. They are ignored by default.
 89 | #show_authors = False
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = 'sphinx'
 93 | 
 94 | # A list of ignored prefixes for module index sorting.
 95 | #modindex_common_prefix = []
 96 | 
 97 | # If true, keep warnings as "system message" paragraphs in the built documents.
 98 | #keep_warnings = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | # on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org
106 | 
107 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
108 | 
109 | if not on_rtd:  # only import and set the theme if we're building docs locally
110 |     import sphinx_rtd_theme
111 |     html_theme = 'sphinx_rtd_theme'
112 |     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
113 | else:
114 |     html_theme = 'default'
115 | 
116 | # Theme options are theme-specific and customize the look and feel of a theme
117 | # further.  For a list of options available for each theme, see the
118 | # documentation.
119 | #html_theme_options = {}
120 | 
121 | # Add any paths that contain custom themes here, relative to this directory.
122 | #html_theme_path = []
123 | 
124 | # The name for this set of Sphinx documents.  If None, it defaults to
125 | # "<project> v<release> documentation".
126 | #html_title = None
127 | 
128 | # A shorter title for the navigation bar.  Default is the same as html_title.
129 | #html_short_title = None
130 | 
131 | # The name of an image file (relative to this directory) to place at the top
132 | # of the sidebar.
133 | #html_logo = None
134 | 
135 | # The name of an image file (within the static path) to use as favicon of the
136 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
137 | # pixels large.
138 | #html_favicon = None
139 | 
140 | # Add any paths that contain custom static files (such as style sheets) here,
141 | # relative to this directory. They are copied after the builtin static files,
142 | # so a file named "default.css" will overwrite the builtin "default.css".
143 | html_static_path = ['_static']
144 | 
145 | # Add any extra paths that contain custom files (such as robots.txt or
146 | # .htaccess) here, relative to this directory. These files are copied
147 | # directly to the root of the documentation.
148 | #html_extra_path = []
149 | 
150 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
151 | # using the given strftime format.
152 | #html_last_updated_fmt = '%b %d, %Y'
153 | 
154 | # If true, SmartyPants will be used to convert quotes and dashes to
155 | # typographically correct entities.
156 | #html_use_smartypants = True
157 | 
158 | # Custom sidebar templates, maps document names to template names.
159 | #html_sidebars = {}
160 | 
161 | # Additional templates that should be rendered to pages, maps page names to
162 | # template names.
163 | #html_additional_pages = {}
164 | 
165 | # If false, no module index is generated.
166 | #html_domain_indices = True
167 | 
168 | # If false, no index is generated.
169 | #html_use_index = True
170 | 
171 | # If true, the index is split into individual pages for each letter.
172 | #html_split_index = False
173 | 
174 | # If true, links to the reST sources are added to the pages.
175 | #html_show_sourcelink = True
176 | 
177 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
178 | #html_show_sphinx = True
179 | 
180 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
181 | #html_show_copyright = True
182 | 
183 | # If true, an OpenSearch description file will be output, and all pages will
184 | # contain a <link> tag referring to it.  The value of this option must be the
185 | # base URL from which the finished HTML is served.
186 | #html_use_opensearch = ''
187 | 
188 | # This is the file name suffix for HTML files (e.g. ".xhtml").
189 | #html_file_suffix = None
190 | 
191 | # Output file base name for HTML help builder.
192 | htmlhelp_basename = 'zhihu-py3doc'
193 | 
194 | 
195 | # -- Options for LaTeX output ---------------------------------------------
196 | 
197 | latex_elements = {
198 | # The paper size ('letterpaper' or 'a4paper').
199 | #'papersize': 'letterpaper',
200 | 
201 | # The font size ('10pt', '11pt' or '12pt').
202 | #'pointsize': '10pt',
203 | 
204 | # Additional stuff for the LaTeX preamble.
205 | #'preamble': '',
206 | }
207 | 
208 | # Grouping the document tree into LaTeX files. List of tuples
209 | # (source start file, target name, title,
210 | #  author, documentclass [howto, manual, or own class]).
211 | latex_documents = [
212 |   ('index', 'zhihu-py3.tex', 'zhihu-py3 Documentation',
213 |    '7sDream', 'manual'),
214 | ]
215 | 
216 | # The name of an image file (relative to this directory) to place at the top of
217 | # the title page.
218 | #latex_logo = None
219 | 
220 | # For "manual" documents, if this is true, then toplevel headings are parts,
221 | # not chapters.
222 | #latex_use_parts = False
223 | 
224 | # If true, show page references after internal links.
225 | #latex_show_pagerefs = False
226 | 
227 | # If true, show URL addresses after external links.
228 | #latex_show_urls = False
229 | 
230 | # Documents to append as an appendix to all manuals.
231 | #latex_appendices = []
232 | 
233 | # If false, no module index is generated.
234 | #latex_domain_indices = True
235 | 
236 | 
237 | # -- Options for manual page output ---------------------------------------
238 | 
239 | # One entry per manual page. List of tuples
240 | # (source start file, name, description, authors, manual section).
241 | man_pages = [
242 |     ('index', 'zhihu-py3', 'zhihu-py3 Documentation',
243 |      ['7sDream'], 1)
244 | ]
245 | 
246 | # If true, show URL addresses after external links.
247 | #man_show_urls = False
248 | 
249 | 
250 | # -- Options for Texinfo output -------------------------------------------
251 | 
252 | # Grouping the document tree into Texinfo files. List of tuples
253 | # (source start file, target name, title, author,
254 | #  dir menu entry, description, category)
255 | texinfo_documents = [
256 |   ('index', 'zhihu-py3', 'zhihu-py3 Documentation',
257 |    '7sDream', 'zhihu-py3', 'One line description of project.',
258 |    'Miscellaneous'),
259 | ]
260 | 
261 | # Documents to append as an appendix to all manuals.
262 | #texinfo_appendices = []
263 | 
264 | # If false, no module index is generated.
265 | #texinfo_domain_indices = True
266 | 
267 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
268 | #texinfo_show_urls = 'footnote'
269 | 
270 | # If true, do not generate a @detailmenu in the "Top" node's menu.
271 | #texinfo_no_detailmenu = False
272 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | 用法示例
  3 | ========
  4 | 
  5 | ..  contents:: 目录
  6 |     :local:
  7 | 
  8 | 
  9 | 获取某用户的基本信息
 10 | ====================
 11 | 
 12 | ..  code-block:: python
 13 |     :linenos:
 14 |    
 15 |     from zhihu import ZhihuClient
 16 | 
 17 |     Cookies_File = 'cookies.json'
 18 | 
 19 |     client = ZhihuClient(Cookies_File)
 20 | 
 21 |     url = 'http://www.zhihu.com/people/excited-vczh'
 22 |     author = client.author(url)
 23 | 
 24 |     print('用户名 %s' % author.name)
 25 |     print('用户简介 %s' % author.motto)
 26 |     print('用户关注人数 %d' % author.followee_num)
 27 |     print('取用户粉丝数 %d' % author.follower_num)
 28 |     print('用户得到赞同数 %d' % author.upvote_num)
 29 |     print('用户得到感谢数 %d' % author.thank_num)
 30 |     print('用户提问数 %d' % author.question_num)
 31 |     print('用户答题数 %d' % author.answer_num)
 32 | 
 33 |     print('用户专栏文章数 %d，名称分别为：' % author.post_num)
 34 |     for column in author.columns:
 35 |       print(column.name)
 36 |     print('用户收藏夹数 %d，名称分别为：' % author.collection_num)
 37 |     for collection in author.collections:
 38 |       print(collection.name)
 39 | 
 40 | ..  code-block:: none
 41 |     :linenos:
 42 | 
 43 |     用户名 vczh
 44 |     用户简介 专业造轮子 https://github.com/vczh-libraries
 45 |     用户关注人数 1339
 46 |     取用户粉丝数 128100
 47 |     用户得到赞同数 320326
 48 |     用户得到感谢数 43045
 49 |     用户提问数 238
 50 |     用户答题数 8392
 51 |     用户专栏文章数 25，名称分别为：
 52 |     vczh的日常
 53 |     深井冰 IT 评论
 54 |     编程语言与高级语言虚拟机杂谈（仮）
 55 |     蓝色小药丸
 56 |     用户收藏夹数 1，名称分别为：
 57 |     李老师牛逼的答案
 58 | 
 59 | 备份某问题所有答案
 60 | ==================
 61 | ..  code-block:: python
 62 |     :linenos:
 63 |    
 64 |     question = client.question('http://www.zhihu.com/question/28092572')
 65 |     for answer in question.answers:
 66 |        answer.save()
 67 |        
 68 | 会在当前目录下新建以问题标题命名的文件夹，并将所有html文件保存到该文件夹。
 69 | 
 70 | ..  code-block:: python
 71 | 
 72 |     answer.save(mode="md")
 73 | 
 74 | 会保存为markdown格式。
 75 | 
 76 | 备份某用户所有答案
 77 | ==================
 78 | 
 79 | ..  code-block:: python
 80 |     :linenos:
 81 | 
 82 |     author = client.author('http://www.zhihu.com/people/7sdream')
 83 |     for answer in author.answers:
 84 |        answer.save(filepath=author.name)
 85 | 
 86 | 备份某收藏夹所有答案，备份专栏文章同理，不再举例。
 87 | 
 88 | 获取某用户点赞的动态
 89 | ====================
 90 | 
 91 | ..  code-block:: python
 92 |     :linenos:
 93 | 
 94 |     author = zhihu.author('http://www.zhihu.com/people/excited-vczh')
 95 |     for act in author.activities:
 96 |        if act.type == zhihu.ActType.UPVOTE_ANSWER:
 97 |            print('%s 在 %s 赞同了问题 %s 中 %s(motto: %s) 的回答, '
 98 |                  '此回答赞同数 %d' %
 99 |                  (author.name, act.time, act.answer.question.title,
100 |                   act.answer.author.name, act.answer.author.motto,
101 |                   act.answer.upvote_num))
102 | 
103 | ..  code-block:: none
104 | 
105 |     vczh 在 2015-07-24 08:35:06 赞同了问题 女生夏天穿超短裙是一种什么样的体验？ 中 Light(motto: 我城故事多。) 的回答, 此回答赞同数 43
106 |     vczh 在 2015-07-24 08:34:30 赞同了问题 女生夏天穿超短裙是一种什么样的体验？ 中 Ms狐狸(motto: 随便写来玩玩) 的回答, 此回答赞同数 57
107 |     ……
108 | 
109 | 获取用户关注的人和关注此用户的人
110 | ================================
111 | 
112 | ..  code-block:: python
113 |     :linenos:
114 | 
115 |     author = client.author('http://www.zhihu.com/people/7sdream')
116 | 
117 |     print('--- Followers ---')
118 |     for follower in author.followers:
119 |        print(follower.name)
120 | 
121 |     print('--- Followees ---')
122 |     for followee in author.followees:
123 |        print(followee.name)
124 | 
125 | ..  code-block:: none
126 | 
127 |     --- Followers ---
128 |     yuwei
129 |     falling
130 |     周非
131 |     ...
132 |     --- Followees ---
133 |     yuwei
134 |     falling
135 |     伍声
136 |     ...
137 | 
138 | 计算某答案点赞中三零用户比例
139 | ============================
140 | 
141 | ..  code-block:: python
142 |     :linenos:
143 |    
144 |     url = 'http://www.zhihu.com/question/30404450/answer/47939822'
145 |     answer = client.answer(url)
146 | 
147 |     three_zero_user_num = 0
148 | 
149 |     for upvoter in answer.upvoters:
150 |        print(upvoter.name, upvoter.upvote_num, upvoter.thank_num,
151 |              upvoter.question_num, upvoter.answer_num)
152 |        if upvoter.is_zero_user():
153 |            three_zero_user_num += 1
154 | 
155 |     print('\n三零用户比例 %.3f%%' % (three_zero_user_num / answer.upvote_num * 100))
156 |    
157 | ..  code-block:: none
158 | 
159 |     ...
160 |     宋飞 0 0 0 0
161 |     唐吃藕 10 0 0 5
162 | 
163 |     三零用户比例 26.852%
164 | 
165 | 爬取某用户关注的人的头像
166 | ========================
167 | 
168 | ..  code-block:: python
169 | 
170 |     import requests
171 |     import os
172 |     import imghdr
173 | 
174 |     author = client.author('http://www.zhihu.com/people/excited-vczh')
175 | 
176 |     os.mkdir('vczh')
177 |     for followee in author.followees:
178 |        try:
179 |            filename = followee.name + ' - ' + followee.id + '.jpeg'
180 |            print(filename)
181 |            with open('vczh/' + filename, 'wb') as f:
182 |                f.write(requests.get(followee.photo_url).content)
183 |        except KeyboardInterrupt:
184 |            break
185 | 
186 |     for root, dirs, files in os.walk('vczh'):
187 |        for filename in files:
188 |            filename = os.path.join(root, filename)
189 |            img_type = imghdr.what(filename)
190 |            if img_type != 'jpeg' and img_type is not None:
191 |                print(filename, '--->', img_type)
192 |                os.rename(filename, filename[:-4] + img_type)
193 | 
194 | 效果见 `这里
195 | <http://pan.baidu.com/s/1i3nLgpB>`_。
196 | 
197 | 
198 | 使用非阻塞的网络请求
199 | ====================
200 | 
201 | 内建的所有请求都是阻塞的, 如果你希望使用其他的网络请求方法, 你可以把请求到的数据传入相关类的 `from_html` 方法中.
202 | `from_html` 方法用于接受数据, 返回相应的类的实例.
203 | 
204 | 这里以使用 aiohttp 为例, 使用的是 python3.5 之后引入的语法. 无需置疑, 你要自己处理 session
205 | 
206 | 比如要获取一个答案.
207 | 
208 | ..  code-block:: python
209 | 
210 |     import aiohttp
211 |     import asyncio
212 |     import zhihu
213 | 
214 | 
215 |     async def get_answer(url, cookies, headers):
216 |         async with aiohttp.get(url, cookies=cookies, headers=headers) as r:
217 |             data = await r.text()
218 | 
219 |         # from_html 是 classmethod
220 |         answer = zhihu.Answer.from_html(data)
221 | 
222 |         print(answer.content)
223 | 
224 |     url = 'answer url'
225 |     cookies = dict(client._session.cookies)
226 |     headers = client._session.headers
227 | 
228 |     loop = asyncio.get_event_loop()
229 |     loop.run_until_complete(get_answer(url, cookies, headers))
230 | 
231 | 
232 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. zhihu-py3 documentation master file, created by
 2 |    sphinx-quickstart on Sun Feb 22 23:01:19 2015.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to zhihu-py3's documentation!
 7 | =====================================
 8 | 
 9 | Contents:
10 | 
11 | ..  toctree::
12 |     :maxdepth: 2
13 | 
14 |     install
15 |     examples
16 |     login
17 |     classes
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | 安装和测试
 3 | ==========
 4 | 
 5 | 
 6 | pip安装（推荐）
 7 | ===============
 8 | 
 9 | .. code-block:: bash
10 | 
11 |    (sudo) pip(3) install (--upgrade) zhihu-py3
12 | 
13 | 如果想同时安装lxml，获得更快的解析速度、容错率和美观程度，请开启lxml feature：
14 | 
15 | .. code-block:: bash
16 |    
17 |    (sudo) pip(3) install (--upgrade) zhihu-py3[lxml]
18 | 
19 | 
20 | 源码安装
21 | ========
22 | 依赖于beautifulsoup4、requests、html2text，会自动安装。
23 | 
24 | ..  code-block:: bash
25 | 
26 |     git clone https://github.com/7sDream/zhihu-py3.git
27 |     cd zhihu-py3
28 |     python(3) setup.py install
29 | 
30 | 
31 | 测试
32 | ====
33 | 
34 | 若是使用源码安装，则安装完成后可以进行一下测试
35 | 
36 | ..  code-block:: bash
37 | 
38 |     cd test
39 |     python(3) zhihu-test.py
40 | 


--------------------------------------------------------------------------------
/docs/login.rst:
--------------------------------------------------------------------------------
 1 | 登录方法综述：
 2 | ==============
 3 | 
 4 | create\_cookies
 5 | ~~~~~~~~~~~~~~~
 6 | 
 7 | 用于生成 cookies，用法见前面的介绍。
 8 | 
 9 | login\_with\_cookies
10 | ~~~~~~~~~~~~~~~~~~~~
11 | 
12 | 用cookies字符串或文件名登录，\ ``ZhihuClient``\ 的构造函数就是使用这个方法。
13 | 
14 | get\_captcha
15 | ~~~~~~~~~~~~
16 | 
17 | 获取验证码数据（bytes二进制数据），当用于其他项目时方便手动获取验证码图片数据进行处理，比如显示在控件内。
18 | 
19 | login
20 | ~~~~~
21 | 
22 | 手动登陆方法，用于其他项目中方便手动无需 cookies 登陆，参数为：
23 | 
24 | -  email
25 | -  password
26 | -  captcha
27 | 
28 | 返回值有三个
29 | 
30 | -  code：成功为0，失败为1
31 | -  msg：错误消息，字符串格式，成功为空
32 | -  cookies：cookies数据，字符串格式，失败为空
33 | 
34 | login\_in\_terminal
35 | ~~~~~~~~~~~~~~~~~~~
36 | 
37 | 跟着提示在终端里登录知乎，返回cookies字符串，create\_cookies就是帮你做了将这个函数的返回值保存下来的工作而已。
38 | 
39 | 综上
40 | ~~~~
41 | 
42 | 如果你只是写个小脚本测试玩玩，可以使用：
43 | 
44 | ..  code-block:: python
45 | 
46 |     from zhihu import ZhihuClient
47 |     client = ZhihuClient()
48 |     client.login_in_terminal()
49 | 
50 |     # do thing you want with client
51 | 
52 | 如果你的脚本不是大项目，又要多次运行，可以先按照上文方法create\_cookies，再使用：
53 | 
54 | ..  code-block:: python
55 | 
56 |     from zhihu import ZhihuClient
57 |     Cookies_File = 'cookies.json'
58 |     client = ZhihuClient(Cookies_File)
59 | 
60 | 如果项目比较大（以GUI项目为例），可以在判断出是首次使用（没有cookies文件）时，弹出登录对话框，使用get\_captcha获取验证码数据，再调用login函数手动登录并在登录成功后保存cookies文件：
61 | 
62 | ..  code-block:: python
63 | 
64 |     import os
65 |     from zhihu import ZhihuClient
66 | 
67 |     Cookies_File = 'config/cookies.json'
68 | 
69 |     client = ZhihuClient()
70 | 
71 |     def on_window_show()
72 |         login_btn.disable()
73 |         if os.path.isfile(Cookies_File) is False:
74 |             captcha_imgbox.setData(client.get_captcha())
75 |             login_btn.enable()
76 |         else:
77 |             with open(Cookies_File) as f
78 |                 client.login_with_cookies(f.read())
79 |             # turn to main window
80 | 
81 |     def on_login_button_clicked():
82 |         login_btn.disable()
83 |         email = email_edit.get_text()
84 |         password = password_edit.get_text()
85 |         captcha = captcha_edit.get_text()
86 |         code, msg, cookies = client.login(email, password, captcha)
87 |         if code == 0:
88 |             with open(Cookies_File, 'w') as f
89 |                 f.write(cookies)
90 |             # turn to main window
91 |         else:
92 |             msgbox(msg)
93 |             login_btn.enable()
94 | 
95 | 注：以上和GUI有关的代码皆为我乱想出来的，仅作示例之用。
96 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\zhihu-py3.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\zhihu-py3.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/docs/me.rst:
--------------------------------------------------------------------------------
1 | Me 用户操作类
2 | ===============
3 | 
4 | **敬告：本类提供的点赞，反对功能，请在使用前三思，并且绝对不要用于批量点赞，批量反对等不甚道德的脚本。和谐知乎，你我共建，谢谢理解。**
5 | 
6 | ..  autoclass:: zhihu.me.Me
7 |     :members:
8 |     :special-members: __init__
9 | 


--------------------------------------------------------------------------------
/docs/post.rst:
--------------------------------------------------------------------------------
1 | Post 专栏文章类
2 | ===============
3 | 
4 | ..  autoclass:: zhihu.post.Post
5 |     :members:
6 |     :special-members: __init__
7 | 


--------------------------------------------------------------------------------
/docs/question.rst:
--------------------------------------------------------------------------------
1 | Question 问题类
2 | ===============
3 | 
4 | ..  autoclass:: zhihu.question.Question
5 |     :members:
6 |     :special-members: __init__
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | beautifulsoup4


--------------------------------------------------------------------------------
/docs/topic.rst:
--------------------------------------------------------------------------------
1 | Topic 话题类
2 | ============
3 | 
4 | ..  autoclass:: zhihu.topic.Topic
5 |     :members:
6 |     :special-members: __init__
7 | 


--------------------------------------------------------------------------------
/example/analyze_user.py:
--------------------------------------------------------------------------------
 1 | """
 2 | What's this:
 3 |     | this is an Example of zhihu-py3 to analyze is user bought some fans.
 4 | 
 5 | Usage:
 6 |     | 1. copy your cookies file to the dir where me located
 7 |     | 2. change USER_URL at line 12 to the user's home page url.
 8 |     | 3. var FOLLOWER_CHECK_MAX_NUM defined how many newest follower will be checked.
 9 |     | 4. var ANSWER_CHECK_MAX_NUM defined how many newest answer of user will be checked.
10 |     | 5. just run me.
11 | 
12 | Info:
13 |     | if FOLLOWER_CHECK_MAX_NUM big than user's follower amount, it  will be auto set to user's follower amount.
14 | 
15 | Author:
16 |     | 7sDream @ 2015.12.19.
17 | """
18 | 
19 | from zhihu import ZhihuClient
20 | import datetime
21 | 
22 | # ==============================
23 | 
24 | USER_URL = "https://www.zhihu.com/people/7sdream"
25 | 
26 | FOLLOWER_CHECK_MAX_NUM = 2000
27 | ANSWER_CHECK_MAX_NUM = 20
28 | 
29 | # ==============================
30 | 
31 | 
32 | def is_zero_user(author):
33 |     return (author.upvote_num + author.question_num + author.answer_num) <= 3
34 | 
35 | 
36 | client = ZhihuClient('test.json')
37 | 
38 | user = client.author(USER_URL)
39 | 
40 | print("检查用户{user.name} at {time}".format(user=user, time=datetime.datetime.now()))
41 | 
42 | if user.follower_num < FOLLOWER_CHECK_MAX_NUM:
43 |     FOLLOWER_CHECK_MAX_NUM = user.follower_num
44 | 
45 | print("正在检查前{FOLLOWER_CHECK_MAX_NUM}个关注者....".format(**locals()))
46 | 
47 | zeros = 0
48 | for _, follower in zip(range(FOLLOWER_CHECK_MAX_NUM), user.followers):
49 |     if is_zero_user(follower):
50 |         zeros += 1
51 | 
52 | rate = zeros / FOLLOWER_CHECK_MAX_NUM
53 | print("{user.name}最近{FOLLOWER_CHECK_MAX_NUM}个关注者中，三无用户{zeros}个，占比{rate:.2%}".format(**locals()))
54 | 
55 | print("正在检查用户答案点赞者...")
56 | 
57 | for _, ans in zip(range(ANSWER_CHECK_MAX_NUM), user.answers):
58 |     zeros = 0
59 |     for upvoter in ans.upvoters:
60 |         if is_zero_user(upvoter):
61 |             zeros += 1
62 |     rate = zeros / ans.upvote_num if ans.upvote_num != 0 else 0
63 |     print("在问题「{ans.question.title}」{user.name}的答案中，共有{ans.upvote_num}个点赞用户，其中三无用户{zeros}个，三无用户比率{rate:.2%}。".format(**locals()))
64 | 


--------------------------------------------------------------------------------
/example/test.json:
--------------------------------------------------------------------------------
1 | {"unlock_ticket": "\"QUJETWRxZ3ZKQWtYQUFBQVlRSlZUUU9qYWxiVC1ENXU2WVhCejJDTlpFQ2FwQXBSdFpsaWxBPT0=|1449827323|4a3dc8a8f1add7d68f446d799ec86f192d9a4e83\"", "z_c0": "\"QUJETWRxZ3ZKQWtYQUFBQVlRSlZUZnNva2xhMEdldWRoVzVXZU1xUkEwd0VFNENxb0dXUHpnPT0=|1449827323|a2ecf0642368c1b43368bf83d5f2cc1ccbf6649a\"", "_xsrf": "1452764141227ef365f2d0c7695c0457", "cap_id": "\"NjY3ZmUxNGVlZDE2NGNmNGFlY2QzOTAzNTU0ZDVlOWI=|1449827304|723c630cd1c47d17fae91e416bbf522f6d2d31b0\"", "q_c1": "d5271065cdea4b029dfae6debf6e7832|1449827304000|1449827304000"}


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | python-tag=py3
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | import re
 7 | import ast
 8 | 
 9 | try:
10 |     from setuptools import setup
11 | except ImportError:
12 |     from distutils.core import setup
13 | 
14 | 
15 | def extract_version():
16 |     with open('zhihu/__init__.py', 'rb') as f_version:
17 |         ast_tree = re.search(
18 |             r'__version__ = (.*)',
19 |             f_version.read().decode('utf-8')
20 |         ).group(1)
21 |         if ast_tree is None:
22 |             raise RuntimeError('Cannot find version information')
23 |         return str(ast.literal_eval(ast_tree))
24 | 
25 | 
26 | with open('README.rst', 'rb') as f_readme:
27 |     readme = f_readme.read().decode('utf-8')
28 | 
29 | packages = ['zhihu']
30 | 
31 | version = extract_version()
32 | 
33 | setup(
34 |     name='zhihu-py3',
35 |     version=version,
36 |     keywords=['zhihu', 'network', 'spider', 'html'],
37 |     description='Zhihu UNOFFICIAL API library in python3, '
38 |                 'with help of bs4, lxml, requests and html2text.',
39 |     long_description=readme,
40 | 
41 |     author='7sDream',
42 |     author_email='didislover@gmail.com',
43 |     license='MIT',
44 | 
45 |     url='https://github.com/7sDream/zhihu-py3',
46 |     download_url='https://github.com/7sDream/zhihu-py3',
47 | 
48 |     install_requires=[
49 |         'beautifulsoup4',
50 |         'requests',
51 |         'html2text'
52 |     ],
53 |     extras_require={
54 |         'lxml': ['lxml']
55 |     },
56 |     packages=packages,
57 | 
58 |     classifiers=[
59 |         'Development Status :: 3 - Alpha',
60 |         'Environment :: Web Environment',
61 |         'Intended Audience :: Developers',
62 |         'License :: OSI Approved :: MIT License',
63 |         'Operating System :: OS Independent',
64 |         'Programming Language :: Python :: 3',
65 |         'Topic :: Internet :: WWW/HTTP',
66 |         'Topic :: Software Development :: Libraries :: Python Modules'
67 |     ]
68 | )
69 | 


--------------------------------------------------------------------------------
/test/data/answer.md:
--------------------------------------------------------------------------------
 1 | 多谢刘柯的邀请！这也是个有意思的提问。   
 2 |   
 3 | 说说家庭文化吧。   
 4 |   
 5 | 家庭文化是一个家庭世代传承过程中形成和发展起来的 ** 较为稳定的生活方式、生活作风、传统习惯、家庭道德规范以及为人处世之道等等。 **   
 6 |   
 7 | 家庭是一个人生存的最早的文化环境，家庭的价值观是家庭文化的核心，有教育的功能。一个家庭中的父母扮演的就是施教者的角色，父母的价值观和文化素养将会对孩子的成长形成决定性的影响。   
 8 |   
 9 | 家庭文化对孩子的影响体现在意识形态的影响和行为规范方面。中国的传统家庭文化非常注重道德教育，强调每个人对家庭的责任和义务，要敬老爱幼等等。   
10 |   
11 | 象题主的家庭教育就很传统，父母教育题主在接受他人帮助的时候及时地道谢，非常讲究礼数， ** 是很符合中国人“克己复礼”的行事风格的。 **   
12 |   
13 | 东方的文化，是很强调做人的道理的，题主在父母教育下 ** 秉持“以德报人、以诚相待”的处世原则 ** ，怎么可能是“错”的呢？   
14 |   
15 | 题主所提到的室友、朋友、男朋友等， ** 由于他们每个人也都有自己的家庭文化，也有自己的为人处世的信条和习惯，所以不尽和题主相同，所以觉得题主见外，也是有可能的，但并不真正成为一种交际上的冲突，他们只是没有去习惯你的家庭文化给你带来的生活交际风格。 **   
16 |   
17 | 不同的生活习惯，没有错与对，没有好与坏，只有是否理解和接受，是不是？   
18 |   
19 | 一个社会的基本伦理是一个人的言行不影响他人的感受和利益，你所做的事是及时地向帮助自己的人道谢，你为周围的人体现了来自你的家庭文化，这样的文化绝对是讨喜的，而不会让人厌烦。   
20 |   
21 | 相熟的人也许会觉得你说谢谢太过客气，如上所述，也许是他们的家庭文化相对比较粗放，并不太在意一定要你道谢或者回报，这只是家庭文化上人际交往过程中的一个非常小的侧面的体现，应该不会给你造成心理困扰。   
22 |   
23 | 人是人际关系的动物，必须生活在人际关系层面，无论你愿意不愿意，都得遵循文化规则来呈现自己，中国人是很推崇中庸之道的，人际关系和谐，内心秩序井然，人才能在宽松自在的感觉中驾驭生活、平衡自我。   
24 |   
25 | 所以无论怎么说，你在日常生活中呈现自己的文化色彩，都是受人欢迎的， ** 谁会真正的去埋怨一个懂礼貌的懂得与人为善的人呢？ **   
26 |   
27 | 你只需要去区分一件事，分清你的三个社交等级： ** 谁和你是亲密关系，谁和你是朋友关系，谁和你是一般关系。 ** 你根据这三层关系的划分，可以稍微的让自己不那么恪守自己的家庭文化规则，在亲密的人、相熟的人面前不拘小节， ** 偶而允许自己也不那么的象平时的自己 ** ，这样的体验也不错啊!   
28 |   
29 | 再进一步的说，那些说不习惯你常常客气的人，不见得在帮助你之后，真的可以不需要你道谢，亲密的人往往帮的忙都是大忙，都是劳心费神的，你一句“谢谢你”就能让他们感觉为你做事很值，而不是在做苦力。如果费了九牛二虎之力，一句春风拂面的话都听不到，久而久之，还有什么动力为你做事呢？   
30 |   
31 | 人对他人的期待总是潜伏着的，如果要学为人处世，就要学彻底，受人恩惠与帮助，多说谢谢，让他人心理也能得到平衡，这种做法是没错的。   
32 |   
33 | 其实 ** ，更应该对亲密的人说谢谢，感谢他的爱，感谢他的无私奉献，感谢他的一路相伴，这个世界上，亲密的关系都是互爱互惠互助建构起来的。 **   
34 |   
35 |   
36 | 你会坚持与人为善、以诚待人的，对吧！ 
37 | 


--------------------------------------------------------------------------------
/test/data/answer_content.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |  <head>
 3 |   <meta charset="utf-8"/>
 4 |  </head>
 5 |  <body>
 6 |   <div>
 7 |    多谢刘柯的邀请！这也是个有意思的提问。
 8 |    <br/>
 9 |    <br/>
10 |    说说家庭文化吧。
11 |    <br/>
12 |    <br/>
13 |    家庭文化是一个家庭世代传承过程中形成和发展起来的
14 |    <b>
15 |     较为稳定的生活方式、生活作风、传统习惯、家庭道德规范以及为人处世之道等等。
16 |    </b>
17 |    <br/>
18 |    <br/>
19 |    家庭是一个人生存的最早的文化环境，家庭的价值观是家庭文化的核心，有教育的功能。一个家庭中的父母扮演的就是施教者的角色，父母的价值观和文化素养将会对孩子的成长形成决定性的影响。
20 |    <br/>
21 |    <br/>
22 |    家庭文化对孩子的影响体现在意识形态的影响和行为规范方面。中国的传统家庭文化非常注重道德教育，强调每个人对家庭的责任和义务，要敬老爱幼等等。
23 |    <br/>
24 |    <br/>
25 |    象题主的家庭教育就很传统，父母教育题主在接受他人帮助的时候及时地道谢，非常讲究礼数，
26 |    <b>
27 |     是很符合中国人“克己复礼”的行事风格的。
28 |    </b>
29 |    <br/>
30 |    <br/>
31 |    东方的文化，是很强调做人的道理的，题主在父母教育下
32 |    <b>
33 |     秉持“以德报人、以诚相待”的处世原则
34 |    </b>
35 |    ，怎么可能是“错”的呢？
36 |    <br/>
37 |    <br/>
38 |    题主所提到的室友、朋友、男朋友等，
39 |    <b>
40 |     由于他们每个人也都有自己的家庭文化，也有自己的为人处世的信条和习惯，所以不尽和题主相同，所以觉得题主见外，也是有可能的，但并不真正成为一种交际上的冲突，他们只是没有去习惯你的家庭文化给你带来的生活交际风格。
41 |    </b>
42 |    <br/>
43 |    <br/>
44 |    不同的生活习惯，没有错与对，没有好与坏，只有是否理解和接受，是不是？
45 |    <br/>
46 |    <br/>
47 |    一个社会的基本伦理是一个人的言行不影响他人的感受和利益，你所做的事是及时地向帮助自己的人道谢，你为周围的人体现了来自你的家庭文化，这样的文化绝对是讨喜的，而不会让人厌烦。
48 |    <br/>
49 |    <br/>
50 |    相熟的人也许会觉得你说谢谢太过客气，如上所述，也许是他们的家庭文化相对比较粗放，并不太在意一定要你道谢或者回报，这只是家庭文化上人际交往过程中的一个非常小的侧面的体现，应该不会给你造成心理困扰。
51 |    <br/>
52 |    <br/>
53 |    人是人际关系的动物，必须生活在人际关系层面，无论你愿意不愿意，都得遵循文化规则来呈现自己，中国人是很推崇中庸之道的，人际关系和谐，内心秩序井然，人才能在宽松自在的感觉中驾驭生活、平衡自我。
54 |    <br/>
55 |    <br/>
56 |    所以无论怎么说，你在日常生活中呈现自己的文化色彩，都是受人欢迎的，
57 |    <b>
58 |     谁会真正的去埋怨一个懂礼貌的懂得与人为善的人呢？
59 |    </b>
60 |    <br/>
61 |    <br/>
62 |    你只需要去区分一件事，分清你的三个社交等级：
63 |    <b>
64 |     谁和你是亲密关系，谁和你是朋友关系，谁和你是一般关系。
65 |    </b>
66 |    你根据这三层关系的划分，可以稍微的让自己不那么恪守自己的家庭文化规则，在亲密的人、相熟的人面前不拘小节，
67 |    <b>
68 |     偶而允许自己也不那么的象平时的自己
69 |    </b>
70 |    ，这样的体验也不错啊!
71 |    <br/>
72 |    <br/>
73 |    再进一步的说，那些说不习惯你常常客气的人，不见得在帮助你之后，真的可以不需要你道谢，亲密的人往往帮的忙都是大忙，都是劳心费神的，你一句“谢谢你”就能让他们感觉为你做事很值，而不是在做苦力。如果费了九牛二虎之力，一句春风拂面的话都听不到，久而久之，还有什么动力为你做事呢？
74 |    <br/>
75 |    <br/>
76 |    人对他人的期待总是潜伏着的，如果要学为人处世，就要学彻底，受人恩惠与帮助，多说谢谢，让他人心理也能得到平衡，这种做法是没错的。
77 |    <br/>
78 |    <br/>
79 |    其实
80 |    <b>
81 |     ，更应该对亲密的人说谢谢，感谢他的爱，感谢他的无私奉献，感谢他的一路相伴，这个世界上，亲密的关系都是互爱互惠互助建构起来的。
82 |    </b>
83 |    <br/>
84 |    <br/>
85 |    <br/>
86 |    你会坚持与人为善、以诚待人的，对吧！
87 |   </div>
88 |  </body>
89 | </html>


--------------------------------------------------------------------------------
/test/data/answer_upvoter.html:
--------------------------------------------------------------------------------
 1 | <div class="zm-profile-card clearfix no-hovercard">
 2 | <div class="zg-right">
 3 | 
 4 | <button data-follow="m:button" data-id="7411e2c455d475ffdedf542cb40163f1" class="zg-btn zg-btn-follow zm-rich-follow-btn small nth-0">关注她</button>
 5 | 
 6 | </div>
 7 | <a title="Mikuroneko"
 8 | data-tip="p$t$guo-yi-hui-23"
 9 | class="zm-item-link-avatar"
10 | target="_blank"
11 | href="/people/guo-yi-hui-23">
12 | <img src="//pic3.zhimg.com/cded0147b3f57cff0c884b4e607f2c2e_m.jpg" class="zm-item-img-avatar">
13 | </a>
14 | <div class="body">
15 | <div class="author ellipsis">
16 | <a data-tip="p$t$guo-yi-hui-23" href="http://www.zhihu.com/people/guo-yi-hui-23" target="_blank" class="zg-link" title="Mikuroneko">Mikuroneko</a>
17 | <span class="bio hidden-phone">不好意思我属猫(ΦωΦ)ﾌﾌﾌ…</span>
18 | </div>
19 | <ul class="status">
20 | <li><span>0 赞同</span></li>
21 | <li><span>0 感谢</span></li>
22 | 
23 | <li class="hidden-phone"><a href="/people/guo-yi-hui-23/asks" target="_blank">0 提问</a></li>
24 | <li class="hidden-phone"><a href="/people/guo-yi-hui-23/answers" target="_blank">0 回答</a></li>
25 | 
26 | </ul>
27 | </div>
28 | </div>


--------------------------------------------------------------------------------
/test/data/column.json:
--------------------------------------------------------------------------------
1 | {"following": false, "followersCount": 76605, "canPost": false, "avatar": {"id": "993d9179c", "template": "http://pic1.zhimg.com/{id}_{size}.jpg"}, "postsCount": 69, "url": "/xiepanda", "description": "\u6709\u7a7a\u5c31\u8d34\u70b9\u65e7\u6587\u7ae0\uff0c\u5076\u5c14\u60f3\u66f4\u65b0\u4e86\u5c31\u653e\u70b9\u65b0\u6587\u7ae0", "topics": [], "activateAuthorRequested": false, "href": "/api/columns/xiepanda", "commentPermission": "anyone", "acceptSubmission": true, "name": "\u8c22\u718a\u732b\u51fa\u6ca1\u6ce8\u610f", "slug": "xiepanda", "creator": {"hash": "c948a6c96e21986af5d9c720334989f7", "slug": "xiepanda", "bio": "\u642c\u5230\u6df1\u5733\u5566\uff0c\u5e0c\u671b\u8ba4\u8bc6\u4e9b\u5f53\u5730\u670b\u53cb\uff0c\u8bf7\u79c1\u4fe1\u3002", "name": "\u8c22\u718a\u732b\u541b", "avatar": {"id": "e7a5b32f3", "template": "http://pic4.zhimg.com/{id}_{size}.jpg"}, "profileUrl": "http://www.zhihu.com/people/xiepanda", "description": "\u56db\u4f4d\u77e5\u53cb\u5171\u540c\u7ffb\u8bd1\u7684\u300a\u548c\u5b69\u5b50\u8c08\u8c08\u764c\u75c7\u300b\uff0c\u5173\u4e8e\u300c\u786e\u8bca\u809d\u764c\u665a\u671f\uff0c\u600e\u6837\u5c06\u75c5\u60c5\u544a\u77e5\u5b69\u5b50\u5e76\u51cf\u5c11\u4f24\u5bb3\uff1f\u300d\u7684\u8be6\u7ec6\u89e3\u7b54 \u548c\u4e0b\u8f7d\uff1ahttp://zhuanlan.zhihu.com/gongyi/19920822"}}


--------------------------------------------------------------------------------
/test/data/column_post.json:
--------------------------------------------------------------------------------
1 | {"state": "published", "summary": "<img src=\"http://pic4.zhimg.com/0b656fb32a070f9717f9e4a737a148f3_xld.jpg\" data-rawwidth=\"1920\" data-rawheight=\"1088\" class=\"origin_image inline-img zh-lightbox-thumb\" data-original=\"http://pic4.zhimg.com/0b656fb32a070f9717f9e4a737a148f3_r.jpg\"><b>\u8fd9\u662f\u4e2a\u5173\u4e8e\u79d1\u666e\u516c\u76ca\u9879\u76ee\u7684\u786c\u5e7f\uff0c\u800c\u4e14\u662f\u7bc7\u627e\u8bfb\u8005\u4f17\u7b79\u7684\u786c\u5e7f\uff0c\u7ee7\u7eed\u8bfb\u4e0b\u53bb\u4e4b\u524d\u8bf7\u63a5\u53d7\u8fd9\u6837\u7684\u8bbe\u5b9a\uff0c\u4e0d\u7136\u5c31\u4e0d\u8981\u8bfb\u4e86\u3002</b>\u6211\u6709\u4e2a\u53eb\u9879\u680b\u6881\u7684\u8001\u670b\u53cb\uff0c\u4ed6\u662f\u4e2a\u5f88\u6015\u6b7b\u7684\u4eba\u3002 \u90a3\u5929\u548c\u680b\u6881\u4e00\u8d77\u5403\u996d\uff0c\u5927\u5bb6\u4e00\u8d77\u559d\u70b9\u4f50\u9910\u8461\u8404\u9152\uff0c\u680b\u6881\u4e0d\u613f\u610f\u559d\u3002\u56e0\u4e3a\u680b\u6881\u5f88\u6015\u6b7b\uff0c\u4efb\u4f55\u6709\u79d1\u5b66\u7814\u7a76\u8bc1\u2026", "commentsCount": 199, "canComment": false, "publishedTime": "2015-09-06T20:03:31+08:00", "snapshotUrl": "", "url": "/xiepanda/20202275", "author": {"hash": "c948a6c96e21986af5d9c720334989f7", "slug": "xiepanda", "bio": "\u642c\u5230\u6df1\u5733\u5566\uff0c\u5e0c\u671b\u8ba4\u8bc6\u4e9b\u5f53\u5730\u670b\u53cb\uff0c\u8bf7\u79c1\u4fe1\u3002", "name": "\u8c22\u718a\u732b\u541b", "avatar": {"id": "e7a5b32f3", "template": "http://pic4.zhimg.com/{id}_{size}.jpg"}, "profileUrl": "http://www.zhihu.com/people/xiepanda", "description": "\u56db\u4f4d\u77e5\u53cb\u5171\u540c\u7ffb\u8bd1\u7684\u300a\u548c\u5b69\u5b50\u8c08\u8c08\u764c\u75c7\u300b\uff0c\u5173\u4e8e\u300c\u786e\u8bca\u809d\u764c\u665a\u671f\uff0c\u600e\u6837\u5c06\u75c5\u60c5\u544a\u77e5\u5b69\u5b50\u5e76\u51cf\u5c11\u4f24\u5bb3\uff1f\u300d\u7684\u8be6\u7ec6\u89e3\u7b54 \u548c\u4e0b\u8f7d\uff1ahttp://zhuanlan.zhihu.com/gongyi/19920822"}, "topics": [], "rating": "none", "href": "/api/columns/xiepanda/posts/20202275", "column": {"name": "\u8c22\u718a\u732b\u51fa\u6ca1\u6ce8\u610f", "slug": "xiepanda"}, "titleImage": "http://pic2.zhimg.com/3cae80c34fcdd484e04a2c40a3e5ffbd_b.jpg", "likesCount": 963, "content": "<p><b>\u8fd9\u662f\u4e2a\u5173\u4e8e\u79d1\u666e\u516c\u76ca\u9879\u76ee\u7684\u786c\u5e7f\uff0c\u800c\u4e14\u662f\u7bc7\u627e\u8bfb\u8005\u4f17\u7b79\u7684\u786c\u5e7f\uff0c\u7ee7\u7eed\u8bfb\u4e0b\u53bb\u4e4b\u524d\u8bf7\u63a5\u53d7\u8fd9\u6837\u7684\u8bbe\u5b9a\uff0c\u4e0d\u7136\u5c31\u4e0d\u8981\u8bfb\u4e86\u3002</b></p><p>\u6211\u6709\u4e2a\u53eb\u9879\u680b\u6881\u7684\u8001\u670b\u53cb\uff0c\u4ed6\u662f\u4e2a\u5f88\u6015\u6b7b\u7684\u4eba\u3002 </p><p>\u90a3\u5929\u548c\u680b\u6881\u4e00\u8d77\u5403\u996d\uff0c\u5927\u5bb6\u4e00\u8d77\u559d\u70b9\u4f50\u9910\u8461\u8404\u9152\uff0c\u680b\u6881\u4e0d\u613f\u610f\u559d\u3002\u56e0\u4e3a\u680b\u6881\u5f88\u6015\u6b7b\uff0c\u4efb\u4f55\u6709\u79d1\u5b66\u7814\u7a76\u8bc1\u660e\u53ef\u80fd\u5f71\u54cd\u5bff\u547d\u7684\u4e1c\u897f\u4ed6\u90fd\u4e0d\u78b0\uff0c\u6240\u4ee5\u4ed6\u4e0d\u559d\u9152\u3002 </p><p>\u5c31\u662f\u8fd9\u4e2a\u5f88\u6015\u6b7b\u7684\u680b\u6881\uff0c\u4e0d\u4e45\u524d\u8f9e\u53bb\u4e86\u5a92\u4f53\u7684\u5de5\u4f5c\uff0c\u51b3\u5b9a\u8fde\u5403100\u5929\u8f6c\u57fa\u56e0\u98df\u7269\uff0c\u5e76\u4e14\u7528\u7eaa\u5f55\u7247\u7684\u65b9\u5f0f\u5168\u7a0b\u8bb0\u5f55\u3002</p><p>\u680b\u6881\u9080\u8bf7\u6211\u4f5c\u4e3a\u4ed6\u8fd9\u4e2a\u201c\u8fde\u5403100\u5929\u8f6c\u57fa\u56e0\u98df\u7269\u201d\u9879\u76ee\u7684\u7f8e\u98df\u987e\u95ee\uff0c\u4e8e\u662f\u4e0d\u4e45\u524d\u6211\u53bb\u5e7f\u5dde\u5403\u4e86\u4e00\u987f\u201c\u8f6c\u57fa\u56e0\u5927\u9910\u201d\uff0c\u7528\u7684\u662f\u8f6c\u57fa\u56e0\u5927\u7c73\u3001\u7389\u7c73\u3001\u6728\u74dc\u548c\u5927\u8c46\u6cb9\u8fd9\u4e9b\u98df\u6750\u3002\u5927\u6982\u662f\u56e0\u4e3a\u4e00\u672c\u6b63\u7ecf\u5148\u7b7e\u4e86\u300a\u77e5\u60c5\u540c\u610f\u4e66\u300b\u7684\u7f18\u6545\uff0c\u5403\u996d\u65f6\u5bf9\u98df\u6750\u672c\u8eab\u7684\u5728\u610f\u591a\u8fc7\u4e86\u5bf9\u6599\u7406\u6c34\u5e73\u7684\u8bc4\u4ef7\u3002\u53ef\u60dc\u5e76\u6ca1\u6709\u5403\u5230\u8f6c\u57fa\u56e0\u7684\u725b\u8089\u548c\u9e21\u7fc5\u3002</p><p><b>\u4e3a\u4ec0\u4e48\u6211\u613f\u610f\u65e0\u507f\u505a\u8fd9\u4e2a\u9879\u76ee\u7684\u7f8e\u98df\u987e\u95ee\u5462\uff1f\u56e0\u4e3a\u6211\u4e0d\u4f46\u81ea\u5df1\u5e38\u5403\u8f6c\u57fa\u56e0\u98df\u7269\uff0c\u4e5f\u9f13\u52b1\u8eab\u8fb9\u7684\u670b\u53cb\u548c\u5bb6\u4eba\u5403\u8f6c\u57fa\u56e0\u98df\u7269\uff0c\u800c\u4e14\u6211\u4e00\u76f4\u8ba4\u4e3a\u4e00\u4e2a\u771f\u6b63\u7684\u5403\u8d27\u5e94\u8be5\u7528\u884c\u52a8\u652f\u6301\u8f6c\u57fa\u56e0\u3002</b></p><img src=\"http://pic4.zhimg.com/0b656fb32a070f9717f9e4a737a148f3_b.jpg\" data-rawwidth=\"1920\" data-rawheight=\"1088\" class=\"origin_image zh-lightbox-thumb\" width=\"1920\" data-original=\"http://pic4.zhimg.com/0b656fb32a070f9717f9e4a737a148f3_r.jpg\"><p>\u8f6c\u57fa\u56e0\u7684\u98df\u6750\u5403\u8d77\u6765\u6709\u4ec0\u4e48\u7279\u522b\u4e4b\u5904\u5417\uff1f\u5f88\u9057\u61be\uff0c\u6211\u662f\u6ca1\u6709\u5c1d\u51fa\u6765\u3002\u90a3\u4e3a\u4ec0\u4e48\u8bf4\u771f\u6b63\u7684\u5403\u8d27\u5e94\u8be5\u652f\u6301\u8f6c\u57fa\u56e0\u5462\uff1f </p><p><b>\u56e0\u4e3a\u8f6c\u57fa\u56e0\u53ef\u4ee5\u8ba9\u98df\u6750\u53d8\u5f97\u66f4\u597d\u5403\u3002 </b></p><p><b>\u56e0\u4e3a\u8f6c\u57fa\u56e0\u53ef\u4ee5\u8ba9\u98df\u6750\u53d8\u5f97\u66f4\u597d\u5403\u3002 </b></p><p><b>\u56e0\u4e3a\u8f6c\u57fa\u56e0\u53ef\u4ee5\u8ba9\u98df\u6750\u53d8\u5f97\u66f4\u597d\u5403\u3002</b></p><br><p>\u6211\u4ee5\u524d\u5199\u8fc7\u4e00\u7bc7\u4e13\u680f  \u300a\u7eaf\u5929\u7136\u539f\u751f\u6001\u7684\u4e1c\u897f\u5c31\u597d\u5403\uff1f\u8fd9\u53ef\u4e0d\u4e00\u5b9a\u300b\u3002\u8fd9\u6b21\u518d\u6765\u8c08\u8c08\u4e3a\u4ec0\u4e48\u201c\u975e\u5929\u7136\u201d\u7684\u6280\u672f\u53ef\u4ee5\u8ba9\u98df\u7269\u53d8\u5f97\u66f4\u597d\u5403\u3002</p><p>\u901a\u5e38\uff0c\u6211\u4eec\u8981\u60f3\u505a\u70b9\u597d\u5403\u7684\uff0c\u7b2c\u4e00\u6b65\u662f\u53bb\u5e02\u573a\u4e0a\u201c\u6311\u201d\u98df\u6750\u3002\u8fd9\u4e2a\u201c\u6311\u201d\u7684\u610f\u601d\uff0c\u5f53\u7136\u662f\u8bf4\u98df\u6750\u5df2\u7ecf\u5206\u95e8\u522b\u7c7b\u6446\u5728\u90a3\u91cc\uff0c\u7b49\u7740\u6211\u4eec\u51ed\u7ecf\u9a8c\u3001\u53e3\u5473\u548c\u7ecf\u6d4e\u80fd\u529b\u53bb\u9009\u62e9\u3002</p><br><p>\u9047\u5230\u7279\u522b\u6ee1\u610f\u7684\u98df\u6750\uff0c\u5f88\u591a\u4eba\u4f1a\u611f\u6168\u4e00\u53e5\uff1a\u611f\u8c22\u5927\u81ea\u7136\u7684\u9988\u8d60\uff01</p><p>\u4f46\u5b9e\u9645\u4e0a\uff0c\u8fd9\u662f\u4e00\u4e2a\u7279\u522b\u5e38\u89c1\u7684\u8bef\u89e3\u3002</p><p><b>\u4f60\u4eec\u4eba\u7c7b\u73b0\u5728\u7eb3\u5165\u98df\u8c31\u7684\u8fd9\u4e9b\u98df\u6750\uff0c\u7edd\u5927\u90e8\u5206\u90fd\u4e0d\u662f\u81ea\u7136\u8fdb\u5316\u7684\u4ea7\u7269</b>\u3002\u4eba\u7c7b\u4e00\u6b65\u6b65\u722c\u5230\u98df\u7269\u94fe\u7684\u9876\u7aef\uff0c\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u6211\u4eec\u5bf9\u6574\u6761\u98df\u7269\u94fe\u6240\u505a\u7684\u4e8b\u60c5\u53ef\u8c13\u201c\u60e8\u65e0\u5929\u9053\u201d\u3002</p><br><p>\u9996\u5148\u662f\u7cae\u98df</p><img src=\"http://pic3.zhimg.com/60c901836121ee08722d0ff8ef6a487a_b.jpg\" data-rawwidth=\"500\" data-rawheight=\"325\" class=\"origin_image zh-lightbox-thumb\" width=\"500\" data-original=\"http://pic3.zhimg.com/60c901836121ee08722d0ff8ef6a487a_r.jpg\"><p>\u7136\u540e\u662f\u6c34\u679c</p><img src=\"http://pic2.zhimg.com/0a3c8516cdcacc707f4cff9fc1ec07f1_b.jpg\" data-rawwidth=\"640\" data-rawheight=\"640\" class=\"origin_image zh-lightbox-thumb\" width=\"640\" data-original=\"http://pic2.zhimg.com/0a3c8516cdcacc707f4cff9fc1ec07f1_r.jpg\"><p>\u8fd8\u6709\u8089</p><img src=\"http://pic1.zhimg.com/776ad08c7976697938fd333e06ec667c_b.jpg\" data-rawwidth=\"800\" data-rawheight=\"533\" class=\"origin_image zh-lightbox-thumb\" width=\"800\" data-original=\"http://pic1.zhimg.com/776ad08c7976697938fd333e06ec667c_r.jpg\"><img src=\"http://pic1.zhimg.com/fcb72a34df91474aed05ca96b3235950_b.jpg\" data-rawwidth=\"625\" data-rawheight=\"469\" class=\"origin_image zh-lightbox-thumb\" width=\"625\" data-original=\"http://pic1.zhimg.com/fcb72a34df91474aed05ca96b3235950_r.jpg\"><p>\u690d\u7269\u7684\u6742\u4ea4<img src=\"http://pic2.zhimg.com/2c965a0e54f6383c3552cf4d0f915b99_b.jpg\" data-rawwidth=\"500\" data-rawheight=\"354\" class=\"origin_image zh-lightbox-thumb\" width=\"500\" data-original=\"http://pic2.zhimg.com/2c965a0e54f6383c3552cf4d0f915b99_r.jpg\"></p><p>\u4e0d\u540c\u7269\u79cd\u7684\u5ac1\u63a5\uff0c\u6bd4\u5982\u5ac1\u63a5\u6c34\u679c</p><img src=\"http://pic1.zhimg.com/1b18aa2f5f93ca12d2191d56902e14a4_b.jpg\" data-rawwidth=\"503\" data-rawheight=\"372\" class=\"origin_image zh-lightbox-thumb\" width=\"503\" data-original=\"http://pic1.zhimg.com/1b18aa2f5f93ca12d2191d56902e14a4_r.jpg\"><p><b>\u6211\u4eec\u73b0\u5728\u5403\u7684\u5927\u90e8\u5206\u4e3b\u98df\u3001\u8089\u3001\u6c34\u679c\u3001\u852c\u83dc\uff0c\u57fa\u672c\u4e0a\u90fd\u4e0d\u662f\u5927\u81ea\u7136\u7684\u9988\u8d60\uff0c\u90fd\u662f\u88ab\u6781\u901f\u81a8\u80c0\u7684\u4eba\u53e3\uff0c\u8fd8\u6709\u5403\u8d27\u4eec\u8d8a\u6765\u8d8a\u6311\u5254\u7684\u53e3\u5473\u903c\u7740\u6539\u9020\u51fa\u6765\u7684\u3002 </b></p><p>\u5f97\u76ca\u4e8e\u5bf9\u98df\u6750\u4e0d\u65ad\u7684\u6539\u9020\uff0c\u5730\u7403\u627f\u8f7d\u7684\u4eba\u53e3\u6570\u91cf\u76f8\u5bf9\u4e00\u4e07\u5e74\u524d\u7ffb\u4e86\u7ea61000\u500d\u3002\u4e00\u4e2a\u751f\u6d3b\u5728\u6df1\u5733\u7684\u73b0\u4ee3\u4eba\u8c22\u718a\u732b\u4e00\u751f\u9884\u8ba1\u4f1a\u5403\u638940\u523060\u5428\u98df\u7269\uff0c\u6570\u91cf\u4e0a\u6bd4\u79e6\u671d\u7684\u7956\u5148\u8c22\u718a\u8001\u8001\u8001\u8001\u732b\u591a\u4e0d\u4e86\u5f88\u591a\u3002\u4f46\u662f\u4ed6\u8fd9\u4e00\u751f\u53ef\u4ee5\u54c1\u5c1d\u5230\u7684\u98df\u6750\u79cd\u7c7b\uff0c\u5c31\u6bd4\u8c22\u718a\u8001\u8001\u8001\u8001\u732b\u4e0d\u77e5\u9ad8\u5230\u54ea\u91cc\u53bb\u4e86\u3002</p><p>\u90a3\u4e48\u518d\u8fc750\u5e74\uff0c\u518d\u8fc7300\u5e74\u5462\uff1f\u8c22\u718a\u732b\u7684\u540e\u4ee3\u8c22\u718a\u5c0f\u5c0f\u5c0f\u5c0f\u732b\uff0c\u4f1a\u6709\u66f4\u597d\u7684\u53e3\u798f\uff0c\u53ef\u4ee5\u5403\u5230\u66f4\u591a\u66f4\u7f8e\u5473\u7684\u98df\u6750\u5417\uff1f</p><p>\u7b54\u6848\u662f\u4e0d\u4e00\u5b9a\u3002\u8fd9\u53d6\u51b3\u4e8e\u4eba\u7c7b\u4ee5\u591a\u5927\u7684\u51b3\u5fc3\u53bb\u6539\u9020\u98df\u6750\u548c\u62d3\u5bbd\u98df\u8c31\u3002</p><br><p>\u4eca\u65f6\u4eca\u65e5\uff0c\u4eba\u7c7b\u5bf9\u5730\u7403\u4e0a\u53ef\u98df\u7528\u751f\u7269\u7684\u5f00\u53d1\u5229\u7528\u5df2\u7ecf\u8fbe\u5230\u4e86\u4e00\u4e2a\u660e\u663e\u7684\u74f6\u9888\u671f\u3002\u5929\u4e0a\u98de\u7684\uff0c\u5730\u4e0a\u8dd1\u7684\uff0c\u6c34\u91cc\u6e38\u7684\uff0c\u51e1\u662f\u6709\u4e00\u4e01\u70b9\u7eb3\u5165\u98df\u8c31\u53ef\u80fd\u6027\u7684\uff0c\u57fa\u672c\u4e0a\u90fd\u5df2\u7ecf\u88ab\u4eba\u7c7b\u627e\u5230\u4e86\u3002</p><img src=\"http://pic1.zhimg.com/91e9f06427e82fffdf5d2d26829377f8_b.jpg\" data-rawwidth=\"500\" data-rawheight=\"374\" class=\"origin_image zh-lightbox-thumb\" width=\"500\" data-original=\"http://pic1.zhimg.com/91e9f06427e82fffdf5d2d26829377f8_r.jpg\"><p>\u6211\u4eec\u73b0\u5728\u80fd\u505a\u7684\uff0c\u53ea\u662f\u901a\u8fc7\u5168\u7403\u8d38\u6613\u548c\u73b0\u4ee3\u8fd0\u8f93\u624b\u6bb5\uff0c\u8ba9\u4e0d\u540c\u5730\u7406\u533a\u57df\u7684\u98df\u6750\u80fd\u4e92\u901a\u6709\u65e0\u3002\u901a\u8fc7\u51b7\u94fe\u8fd0\u8f93\uff0c\u4e4c\u9c81\u6728\u9f50\u7684\u5e02\u6c11\u53ef\u4ee5\u5403\u5230\u6765\u81ea\u632a\u5a01\u7684\u4e09\u6587\u9c7c\u3002 </p><p>\u4e00\u4e9b\u517b\u6b96\u624b\u6bb5\u4e5f\u6709\u5e2e\u52a9\uff0c\u6bd4\u5982\u901a\u8fc7\u54c1\u79cd\u9009\u80b2\u548c\u9972\u6599\u63a7\u5236\u6765\u8c03\u8282\u725b\u8089\u4e2d\u8102\u80aa\u5206\u5e03\uff0c\u96ea\u82b1\u80a5\u725b\u6b63\u9010\u6e10\u53d8\u5f97\u7a00\u677e\u5e73\u5e38\u3002</p><img src=\"http://pic2.zhimg.com/590abedf42af1ed736a3990becf2227d_b.jpg\" data-rawwidth=\"470\" data-rawheight=\"284\" class=\"origin_image zh-lightbox-thumb\" width=\"470\" data-original=\"http://pic2.zhimg.com/590abedf42af1ed736a3990becf2227d_r.jpg\"><p>\u4f46\u662f\uff0c\u5f53\u6211\u4eec\u60f3\u8981\u518d\u663e\u8457\u5730\u6539\u5584\u67d0\u79cd\u98df\u6750\u7684\u53e3\u5473\uff0c\u63d0\u9ad8\u98df\u6750\u7684\u989c\u503c\uff0c\u4f18\u5316\u98df\u6750\u7684\u8425\u517b\uff0c\u964d\u4f4e\u98df\u6750\u4ef7\u683c\uff0c\u91c7\u7528\u4ee5\u524d\u7684\u6539\u9020\u65b9\u6cd5\u6b63\u53d8\u5f97\u8d8a\u6765\u8d8a\u6162\uff0c\u4e5f\u8d8a\u6765\u8d8a\u56f0\u96be\u3002\u6b63\u5982\u4e00\u8f86\u5df2\u7ecf\u51fa\u5382\u7684\u6574\u8f66\uff0c\u8981\u901a\u8fc7\u6539\u88c5\u6765\u63d0\u5347\u6027\u80fd\uff0c\u5f53\u7136\u4e5f\u53ef\u4ee5\uff0c\u4f46\u6539\u88c5\u5e45\u5ea6\u662f\u5f88\u6709\u9650\u7684\u3002\u8981\u60f3\u5f97\u5230\u5927\u5e45\u5ea6\u5347\u7ea7\u7684\u8f66\u578b\uff0c\u53ea\u80fd\u5728\u8bbe\u8ba1\u3001\u751f\u4ea7\u73af\u8282\u5c31\u7528\u4e0a\u66f4\u597d\u7684\u6784\u4ef6\u3002 </p><p>\u5bf9\u4e8e\u98df\u6750\u7684\u671f\u5f85\uff0c\u518d\u60f3\u8981\u8de8\u4e00\u5927\u6b65\uff0c\u5c31\u9700\u8981\u5728\u9057\u4f20\u5206\u5b50\u5c42\u9762\u5bf9\u98df\u6750\u8fdb\u884c\u5b9a\u5411\u7684\u6539\u9020\u4e86\u3002 </p><p>\u4e3a\u4ec0\u4e48\u8bf4\u8f6c\u57fa\u56e0\u6280\u672f\u662f\u6700\u6709\u5e0c\u671b\u53bb\u7a81\u7834\u74f6\u9888\u7684\u65b9\u5f0f\u5462\uff1f</p><p>\u56e0\u4e3a\u51b3\u5b9a\u98df\u6750\u53e3\u5473\u3001\u989c\u503c\u3001\u8425\u517b\u548c\u4ef7\u683c\u7684\u90a3\u4e9b\u56e0\u7d20\uff0c\u6bd4\u5982\u7cd6\u5ea6\u3001\u6e38\u79bb\u6c28\u57fa\u9178\u542b\u91cf\u3001\u82b1\u9752\u7d20\u542b\u91cf\u3001\u8102\u80aa\u6bd4\u4f8b\u3001\u4ea7\u91cf\u548c\u8d27\u67b6\u671f\uff0c\u90fd\u662f\u53ef\u4ee5\u91cf\u5316\uff0c\u4e5f\u662f\u53ef\u4ee5\u901a\u8fc7\u64cd\u63a7\u7279\u5b9a\u57fa\u56e0\u6765\u8fdb\u884c\u8c03\u8282\u7684\u3002</p><p><b>\u90a3\u4e3a\u4ec0\u4e48\u4e0d\u73b0\u5728\u5c31\u5f00\u59cb\u505a\u5462\uff1f<br></b></p><p><b>\u56e0\u4e3a\u8fd8\u6709\u975e\u5e38\u591a\u7684\u4eba\u89c9\u5f97\uff0c\u8f6c\u57fa\u56e0\u7684\u98df\u54c1\u5fc5\u987b\u8bd5\u5403300\u5e74\u6ca1\u95ee\u9898\u624d\u53ef\u4ee5\u6279\u51c6\u4e0a\u5e02\u554a\uff01\u4f60\u89c1\u8fc7\u54ea\u4e2a\u4ea7\u54c1\u7684\u7814\u53d1\u5468\u671f\u662f300\u5e74\u4e48\uff1f</b></p><p>\u4e3a\u4e86\u634d\u536b\u4e00\u540d\u5403\u8d27\u83b7\u5f97\u66f4\u591a\u7f8e\u5473\u98df\u6750\u7684\u5929\u8d4b\u6743\u5229\uff0c\u4e5f\u4e3a\u4e86\u8ba9\u4eb2\u670b\u597d\u53cb\u4e0d\u518d\u65e0\u7aef\u6050\u60e7\u8f6c\u57fa\u56e0\u6280\u672f\uff0c\u8bf7\u6211\u5403\u8f6c\u57fa\u56e0\u5927\u9910\u7684\u680b\u6881\u53d1\u8d77\u4e86\u8fd9\u4e2a\u79d1\u666e\u516c\u76ca\u9879\u76ee\uff0c\u51c6\u5907\u9080\u8bf7\u66f4\u591a\u5403\u8d27\u4e00\u8d77\u516c\u5f00\u54c1\u5c1d\u8f6c\u57fa\u56e0\u5927\u9910\uff0c\u5e76\u7528\u8f6c\u57fa\u56e0\u98df\u6750\u521b\u9020100\u9053\u8f6c\u57fa\u56e0\u6599\u7406\u3002</p><p><b>\u611f\u5174\u8da3\u7684\u670b\u53cb\u53ef\u4ee5\u652f\u6301\u4e00\u4e0b\u4ed6\u4eec\u7684\u4f17\u7b79\uff0c\u4e5f\u6b22\u8fce\u5927\u5bb6\u53bb\u5e7f\u5dde\u627e\u680b\u6881\u8e6d\u4e00\u987f\u8f6c\u57fa\u56e0\u7684\u996d\uff0c\u8e6d\u996d\u8bf7\u641c\u7d22\u5fae\u4fe1\u516c\u4f17\u53f7\uff1a\n\u8f6c\u57fa\u56e0\u80fd\u597d\u600e</b></p><p>\u4f17\u7b79\u7684\u4e8c\u7ef4\u7801</p><img src=\"http://pic4.zhimg.com/a776bb513418a56d0fcfcf069b3f3b27_b.jpg\" data-rawwidth=\"280\" data-rawheight=\"280\" class=\"content_image\" width=\"280\"><p>\u5229\u76ca\u76f8\u5173\uff1a\u6211\u53cb\u60c5\u62c5\u4efb\u4e86\u8fd9\u4e2a\u79d1\u666e\u516c\u76ca\u9879\u76ee\u7684\u7f8e\u98df\u987e\u95ee\uff0c\u4ece\u5934\u5230\u5c3e\u4e0d\u4f1a\u6709\u4efb\u4f55\u62a5\u916c\uff0c\u8fde\u6765\u56de\u5e7f\u5dde\u7684\u9ad8\u94c1\u94b1\u90fd\u662f\u6211\u81ea\u5df1\u51fa\u7684\u3002</p><p>\u56fe\u7247\u6765\u81ea\u7f51\u7edc\uff0c\u4fb5\u5220\u3002</p>", "commentPermission": "anyone", "title": "\u4e3a\u4e86\u505a\u4e00\u4e2a\u79f0\u804c\u7684\u5403\u8d27\uff0c\u4ed6\u51b3\u5b9a\u8fde\u7740\u5403\u4e00\u767e\u5929\u8f6c\u57fa\u56e0\u98df\u7269", "links": {"comments": "http://zhuanlan.zhihu.com/api/columns/xiepanda/posts/20202275/comments"}, "sourceUrl": "", "slug": 20202275, "meta": {"next": null, "previous": null}}


--------------------------------------------------------------------------------
/test/data/post.md:
--------------------------------------------------------------------------------
  1 | **这是个关于科普公益项目的硬广，而且是篇找读者众筹的硬广，继续读下去之前请接受这样的设定，不然就不要读了。**
  2 | 
  3 | 我有个叫项栋梁的老朋友，他是个很怕死的人。 
  4 | 
  5 | 那天和栋梁一起吃饭，大家一起喝点佐餐葡萄酒，栋梁不愿意喝。因为栋梁很怕死，任何有科学研究证明可能影响寿命的东西他都不碰，所以他不喝酒。 
  6 | 
  7 | 就是这个很怕死的栋梁，不久前辞去了媒体的工作，决定连吃100天转基因食物，并且用纪录片的方式全程记录。
  8 | 
  9 | 栋梁邀请我作为他这个“连吃100天转基因食物”项目的美食顾问，于是不久前我去广州吃了一顿“转基因大餐”，用的是转基因大米、玉米、木瓜和大豆油这些食材。大概是因为一本正经先签了《知情同意书》的缘故，吃饭时对食材本身的在意多过了对料理水平的评价。可惜并没有吃到转基因的牛肉和鸡翅。
 10 | 
 11 | **为什么我愿意无偿做这个项目的美食顾问呢？因为我不但自己常吃转基因食物，也鼓励身边的朋友和家人吃转基因食物，而且我一直认为一个真正的吃货应该用行动支持转基因。**
 12 | 
 13 | ![](http://pic4.zhimg.com/0b656fb32a070f9717f9e4a737a148f3_b.jpg)
 14 | 
 15 | 转基因的食材吃起来有什么特别之处吗？很遗憾，我是没有尝出来。那为什么说真正的吃货应该支持转基因呢？ 
 16 | 
 17 | **因为转基因可以让食材变得更好吃。 **
 18 | 
 19 | **因为转基因可以让食材变得更好吃。 **
 20 | 
 21 | **因为转基因可以让食材变得更好吃。**
 22 | 
 23 |   
 24 | 
 25 | 
 26 | 我以前写过一篇专栏 《纯天然原生态的东西就好吃？这可不一定》。这次再来谈谈为什么“非天然”的技术可以让食物变得更好吃。
 27 | 
 28 | 通常，我们要想做点好吃的，第一步是去市场上“挑”食材。这个“挑”的意思，当然是说食材已经分门别类摆在那里，等着我们凭经验、口味和经济能力去选择。
 29 | 
 30 |   
 31 | 
 32 | 
 33 | 遇到特别满意的食材，很多人会感慨一句：感谢大自然的馈赠！
 34 | 
 35 | 但实际上，这是一个特别常见的误解。
 36 | 
 37 | **你们人类现在纳入食谱的这些食材，绝大部分都不是自然进化的产物**。人类一步步爬到食物链的顶端，这个过程中，我们对整条食物链所做的事情可谓“惨无天道”。
 38 | 
 39 |   
 40 | 
 41 | 
 42 | 首先是粮食
 43 | 
 44 | ![](http://pic3.zhimg.com/60c901836121ee08722d0ff8ef6a487a_b.jpg)
 45 | 
 46 | 然后是水果
 47 | 
 48 | ![](http://pic2.zhimg.com/0a3c8516cdcacc707f4cff9fc1ec07f1_b.jpg)
 49 | 
 50 | 还有肉
 51 | 
 52 | ![](http://pic1.zhimg.com/776ad08c7976697938fd333e06ec667c_b.jpg)![](http://pic1.zhimg.com/fcb72a34df91474aed05ca96b3235950_b.jpg)
 53 | 
 54 | 植物的杂交![](http://pic2.zhimg.com/2c965a0e54f6383c3552cf4d0f915b99_b.jpg)
 55 | 
 56 | 不同物种的嫁接，比如嫁接水果
 57 | 
 58 | ![](http://pic1.zhimg.com/1b18aa2f5f93ca12d2191d56902e14a4_b.jpg)
 59 | 
 60 | **我们现在吃的大部分主食、肉、水果、蔬菜，基本上都不是大自然的馈赠，都是被极速膨胀的人口，还有吃货们越来越挑剔的口味逼着改造出来的。 **
 61 | 
 62 | 得益于对食材不断的改造，地球承载的人口数量相对一万年前翻了约1000倍。一个生活在深圳的现代人谢熊猫一生预计会吃掉40到60吨食物，数量上比秦朝的祖先谢熊老老老老猫多不了很多。但是他这一生可以品尝到的食材种类，就比谢熊老老老老猫不知高到哪里去了。
 63 | 
 64 | 那么再过50年，再过300年呢？谢熊猫的后代谢熊小小小小猫，会有更好的口福，可以吃到更多更美味的食材吗？
 65 | 
 66 | 答案是不一定。这取决于人类以多大的决心去改造食材和拓宽食谱。
 67 | 
 68 |   
 69 | 
 70 | 
 71 | 今时今日，人类对地球上可食用生物的开发利用已经达到了一个明显的瓶颈期。天上飞的，地上跑的，水里游的，凡是有一丁点纳入食谱可能性的，基本上都已经被人类找到了。
 72 | 
 73 | ![](http://pic1.zhimg.com/91e9f06427e82fffdf5d2d26829377f8_b.jpg)
 74 | 
 75 | 我们现在能做的，只是通过全球贸易和现代运输手段，让不同地理区域的食材能互通有无。通过冷链运输，乌鲁木齐的市民可以吃到来自挪威的三文鱼。 
 76 | 
 77 | 一些养殖手段也有帮助，比如通过品种选育和饲料控制来调节牛肉中脂肪分布，雪花肥牛正逐渐变得稀松平常。
 78 | 
 79 | ![](http://pic2.zhimg.com/590abedf42af1ed736a3990becf2227d_b.jpg)
 80 | 
 81 | 但是，当我们想要再显著地改善某种食材的口味，提高食材的颜值，优化食材的营养，降低食材价格，采用以前的改造方法正变得越来越慢，也越来越困难。正如一辆已经出厂的整车，要通过改装来提升性能，当然也可以，但改装幅度是很有限的。要想得到大幅度升级的车型，只能在设计、生产环节就用上更好的构件。 
 82 | 
 83 | 对于食材的期待，再想要跨一大步，就需要在遗传分子层面对食材进行定向的改造了。 
 84 | 
 85 | 为什么说转基因技术是最有希望去突破瓶颈的方式呢？
 86 | 
 87 | 因为决定食材口味、颜值、营养和价格的那些因素，比如糖度、游离氨基酸含量、花青素含量、脂肪比例、产量和货架期，都是可以量化，也是可以通过操控特定基因来进行调节的。
 88 | 
 89 | **那为什么不现在就开始做呢？  
 90 | **
 91 | 
 92 | **因为还有非常多的人觉得，转基因的食品必须试吃300年没问题才可以批准上市啊！你见过哪个产品的研发周期是300年么？**
 93 | 
 94 | 为了捍卫一名吃货获得更多美味食材的天赋权利，也为了让亲朋好友不再无端恐惧转基因技术，请我吃转基因大餐的栋梁发起了这个科普公益项目，准备邀请更多吃货一起公开品尝转基因大餐，并用转基因食材创造100道转基因料理。
 95 | 
 96 | **感兴趣的朋友可以支持一下他们的众筹，也欢迎大家去广州找栋梁蹭一顿转基因的饭，蹭饭请搜索微信公众号： 转基因能好怎**
 97 | 
 98 | 众筹的二维码
 99 | 
100 | ![](http://pic4.zhimg.com/a776bb513418a56d0fcfcf069b3f3b27_b.jpg)
101 | 
102 | 利益相关：我友情担任了这个科普公益项目的美食顾问，从头到尾不会有任何报酬，连来回广州的高铁钱都是我自己出的。
103 | 
104 | 图片来自网络，侵删。
105 | 


--------------------------------------------------------------------------------
/test/data/question_more_answer.html:
--------------------------------------------------------------------------------
  1 | <div tabindex="-1" class="zm-item-answer "
  2 | 
  3 | itemprop="topAnswer"
  4 | 
  5 | itemscope itemtype="http://schema.org/Answer"
  6 | data-aid="11375476"
  7 | data-atoken="39958704"
  8 | data-collapsed="0"
  9 | data-created="1424146127"
 10 | data-deleted="0"
 11 | data-helpful="1"
 12 | data-isowner="0"
 13 | data-copyable="1"
 14 | 
 15 | 
 16 | >
 17 | <a class="zg-anchor-hidden" name="answer-11375476"></a>
 18 | 
 19 | 
 20 | <div class="zm-votebar">
 21 | <button class="up ">
 22 | <i class="icon vote-arrow"></i>
 23 | <span class="label">赞同</span>
 24 | <span class="count">1</span>
 25 | </button>
 26 | <button class="down ">
 27 | <i class="icon vote-arrow"></i>
 28 | <span class="label">反对</span>
 29 | </button>
 30 | </div>
 31 | 
 32 | 
 33 | <div class="answer-head">
 34 | <div class="zm-item-answer-author-info">
 35 | <h3 class="zm-item-answer-author-wrap">
 36 | 
 37 | 
 38 | <a data-tip="p$t$liu-qing-ting-23"
 39 | class="zm-item-link-avatar"
 40 | href="/people/liu-qing-ting-23">
 41 | <img src="//pic3.zhimg.com/50d6d358a_s.jpg"
 42 | class="zm-list-avatar"
 43 | data-source="//pic3.zhimg.com/50d6d358a_s.jpg" />
 44 | </a>
 45 | 
 46 | 
 47 | 
 48 | <a data-tip="p$t$liu-qing-ting-23" href="/people/liu-qing-ting-23">柳蜻蜓</a>，<strong title="爱猫咪" class="zu-question-my-bio">爱猫咪</strong>
 49 | 
 50 | </h3>
 51 | <a href="javascript:;" name="collapse" class="collapse meta-item zg-right"><i class="z-icon-fold"></i>收起</a>
 52 | </div>
 53 | <div class="zm-item-vote-info " data-votecount="1">
 54 | 
 55 | <span class="voters">
 56 | <span class="user-block"><a data-tip="p$t$feng-hui-38-59" href="http://www.zhihu.com/people/feng-hui-38-59" class="zg-link" title="冯慧">冯慧</a></span>
 57 | </span>
 58 | 
 59 | 
 60 | <span>赞同</span>
 61 | 
 62 | 
 63 | </div>
 64 | </div>
 65 | <div class="zm-item-rich-text" data-resourceid="2112271" data-action="/answer/content">
 66 | 
 67 | <div class="fixed-summary zm-editable-content clearfix">我一般都告诉他们，这叫相敬如宾o(╯□╰)o
 68 | 
 69 | 
 70 | </div>
 71 | 
 72 | </div>
 73 | <a class="zg-anchor-hidden ac" name="11375476-comment"></a>
 74 | <div class="zm-item-meta zm-item-comment-el answer-actions clearfix">
 75 | <div class="zm-meta-panel">
 76 | 
 77 | <span class="answer-date-link-wrap">
 78 | <a class="answer-date-link meta-item" target="_blank" href="/question/24825703/answer/39958704">发布于 2015-02-17</a>
 79 | </span>
 80 | 
 81 | <a href="#" name="addcomment" class=" meta-item toggle-comment">
 82 | <i class="z-icon-comment"></i>添加评论</a>
 83 | 
 84 | 
 85 | <a href="#" class="meta-item zu-autohide" name="thanks" data-thanked="false"><i class="z-icon-thank"></i>感谢</a>
 86 | 
 87 | 
 88 | 
 89 | <a href="#" class="meta-item zu-autohide" name="share">
 90 | <i class="z-icon-share"></i>分享</a>
 91 | <a href="#" class="meta-item zu-autohide" name="favo">
 92 | <i class="z-icon-collect"></i>收藏</a>
 93 | 
 94 | 
 95 | 
 96 | 
 97 | <span class="zg-bull zu-autohide">&bull;</span>
 98 | 
 99 | <a href="#" name="nohelp" class="meta-item zu-autohide">没有帮助</a>
100 | 
101 | <span class="zg-bull zu-autohide">&bull;</span>
102 | <a href="#" name="report" class="meta-item zu-autohide">举报</a>
103 | 
104 | 
105 | 
106 | <span class="zg-bull">&bull;</span>
107 | 
108 | <a href="/terms#sec-licence-1" target="_blank" class="meta-item copyright"> 作者保留权利 </a>
109 | 
110 | 
111 | 
112 | </div>
113 | </div>
114 | </div>
115 | 


--------------------------------------------------------------------------------
/test/test.json:
--------------------------------------------------------------------------------
1 | {"cap_id": "\"OWJjODVkYWQ3MDJjNGM5M2EyNmY1NDAyOGU5MzQwNDA=|1472645376|6e09f5d2b2bb30835a2a9b19b5fa4986637637bd\"", "a_t": "\"ABDMdqgvJAkXAAAAAFbuVwAQzHaoLyQJFwAAAGECVU0AVu5XcAN6dNpEkjB21U5fvKPeu47f8Ug437yVUWuvtYnOb2ctSWwp3f4PAw==\"", "z_c0": "\"QUJETWRxZ3ZKQWtYQUFBQVlRSlZUUUJXN2xkd0EzcDAya1NTTUhiVlRsLThvOTY3anRfeFNBPT0=|1472645376|021fc93e0c8c88bb13669dc31e12ec1900f7cbb9\"", "q_c1": "b924d120986f4133acf1c2390644ca98|1472645376000|1472645376000", "l_cap_id": "\"MTNhNWZmNDllZmYwNDdhZTk2ZDI0ODk5YzM5YjlhMTI=|1472645376|bff7a553af1fd8f4244d26d4fb2bc17fc7063202\"", "login": "\"YjNiNmE2YmFlNTRjNDZiYmIxMGI0MzEzOTMyMDJiM2Q=|1472645376|7bb6c20ba86434b35bdeb2d40d3f816e36354652\"", "n_c": "1"}


--------------------------------------------------------------------------------
/test/test_activity.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function, division, unicode_literals
 5 | import unittest
 6 | import os
 7 | import datetime
 8 | 
 9 | from zhihu import Question, Activity, ActType
10 | from zhihu.common import BeautifulSoup
11 | 
12 | from test_utils import TEST_DATA_PATH
13 | 
14 | 
15 | class ActivityTest(unittest.TestCase):
16 |     @classmethod
17 |     def setUpClass(cls):
18 |         url = 'http://www.zhihu.com/question/24825703'
19 |         file_path = os.path.join(TEST_DATA_PATH, 'question.html')
20 |         with open(file_path, 'rb') as f:
21 |             html = f.read()
22 |         soup = BeautifulSoup(html)
23 | 
24 |         cls.question = Question(url)
25 |         cls.question._session = None
26 |         cls.question.soup = soup
27 | 
28 |         act_time = datetime.datetime.fromtimestamp(1439395600)
29 |         act_type = ActType.FOLLOW_QUESTION
30 |         cls.activity = Activity(act_type, act_time, question=cls.question)
31 | 
32 |     def test_content(self):
33 |         self.assertIs(self.question, self.activity.content)
34 | 
35 |     def test_init_errors(self):
36 |         act_time = datetime.datetime.fromtimestamp(1439395600)
37 |         act_type = ActType.FOLLOW_QUESTION
38 | 
39 |         with self.assertRaises(ValueError):
40 |             Activity(100, act_time)
41 |         with self.assertRaises(ValueError):
42 |             Activity(act_type, act_time)
43 | 


--------------------------------------------------------------------------------
/test/test_answer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function, division, unicode_literals
 5 | import unittest
 6 | import os
 7 | 
 8 | from zhihu import Answer
 9 | from zhihu.common import BeautifulSoup
10 | from test_utils import TEST_DATA_PATH
11 | 
12 | 
13 | class AnswerTest(unittest.TestCase):
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         url = 'http://www.zhihu.com/question/24825703/answer/30975949'
17 |         file_path = os.path.join(TEST_DATA_PATH, 'answer.html')
18 |         with open(file_path, 'rb') as f:
19 |             html = f.read()
20 |         soup = BeautifulSoup(html)
21 | 
22 |         answer_saved_path = os.path.join(TEST_DATA_PATH, 'answer.md')
23 |         with open(answer_saved_path, 'rb') as f:
24 |             cls.answer_saved = f.read()
25 | 
26 |         cls.answer = Answer(url)
27 |         cls.answer._session = None
28 |         cls.answer.soup = soup
29 |         cls.expected = {'id': 30975949, 'aid': 7775236,
30 |                         'xsrf': 'cfd489623d34ca03adfdc125368c6426',
31 |                         'html': soup.prettify(), 'author_id': 'tian-ge-xia',
32 |                         'author_name': '甜阁下', 'question_id': 24825703,
33 |                         'question_title': '关系亲密的人之间要说「谢谢」吗？',
34 |                         'upvote_num': 1164, 'upvoter_name': 'Mikuroneko',
35 |                         'upvoter_id': 'guo-yi-hui-23'}
36 | 
37 |     def test_id(self):
38 |         self.assertEqual(self.expected['id'], self.answer.id)
39 | 
40 |     def test_aid(self):
41 |         self.assertEqual(self.expected['aid'], self.answer.aid)
42 | 
43 |     def test_xsrf(self):
44 |         self.assertEqual(self.expected['xsrf'], self.answer.xsrf)
45 | 
46 |     def test_html(self):
47 |         self.assertEqual(self.expected['html'], self.answer.html)
48 | 
49 |     def test_upvote_num(self):
50 |         self.assertEqual(self.expected['upvote_num'], self.answer.upvote_num)
51 | 
52 |     def test_author(self):
53 |         self.assertEqual(self.expected['author_id'], self.answer.author.id)
54 |         self.assertEqual(self.expected['author_name'], self.answer.author.name)
55 | 
56 |     def test_question(self):
57 |         self.assertEqual(self.expected['question_id'], self.answer.question.id)
58 |         self.assertEqual(self.expected['question_title'],
59 |                          self.answer.question.title)
60 | 
61 |     def test_content(self):
62 |         path = os.path.join(TEST_DATA_PATH, 'answer_content.html')
63 |         with open(path, 'rb') as f:
64 |             content = f.read()
65 |         self.assertEqual(content.decode('utf-8'), self.answer.content)
66 | 
67 |     def test_save(self):
68 |         save_name = 'answer_save'
69 |         self.answer.save(filepath=TEST_DATA_PATH, filename=save_name,
70 |                          mode='md')
71 |         answer_saved_path = os.path.join(TEST_DATA_PATH, save_name + '.md')
72 |         with open(answer_saved_path, 'rb') as f:
73 |             answer_saved = f.read()
74 |         os.remove(answer_saved_path)
75 |         self.assertEqual(self.answer_saved, answer_saved)
76 | 
77 |     def test_parse_author_soup(self):
78 |         fpath = os.path.join(TEST_DATA_PATH, 'answer_upvoter.html')
79 |         with open(fpath, 'rb') as f:
80 |             html = f.read().decode('utf-8')
81 | 
82 |         soup = BeautifulSoup(html)
83 |         upvoter = self.answer._parse_author_soup(soup)
84 | 
85 |         self.assertEqual(self.expected['upvoter_name'], upvoter.name)
86 |         self.assertEqual(self.expected['upvoter_id'], upvoter.id)
87 | 
88 |     def test_save_error(self):
89 |         with self.assertRaises(ValueError):
90 |             self.answer.save(filepath=TEST_DATA_PATH, filename='invalid',
91 |                              mode='invalid')
92 | 


--------------------------------------------------------------------------------
/test/test_collection.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function, division, unicode_literals
 5 | import unittest
 6 | import os
 7 | 
 8 | from test_utils import TEST_DATA_PATH
 9 | from zhihu import Collection
10 | from zhihu.common import BeautifulSoup
11 | 
12 | 
13 | class CollectionTest(unittest.TestCase):
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         url = 'http://www.zhihu.com/collection/28698204'
17 |         file_path = os.path.join(TEST_DATA_PATH, 'collection.html')
18 |         with open(file_path, 'rb') as f:
19 |             html = f.read()
20 |         soup = BeautifulSoup(html)
21 | 
22 |         cls.collection = Collection(url)
23 |         cls.collection._session = None
24 |         cls.collection.soup = soup
25 |         cls.expected = {'cid': 3725428, 'name': '可以用来背的答案',
26 |                         'xsrf': 'cfd489623d34ca03adfdc125368c6426',
27 |                         'owner_id': 'buhuilengyoumo', 'owner_name': '树叶',
28 |                         'follower_num': 6328, 'top_ques_id': 26092705,
29 |                         'top_ques_title': ('一直追求（吸引）不到喜欢的异性，'
30 |                                            '感觉累了怎么办？'),
31 |                         'top_ans_id': 32989919, 'top_ans_author_name': '朱炫',
32 |                         'top_ans_upvote_num': 16595,
33 |                         'top_ans_author_id': 'zhu-xuan-86'
34 |                         }
35 | 
36 |     def test_cid(self):
37 |         self.assertEqual(self.expected['cid'], self.collection.cid)
38 | 
39 |     def test_name(self):
40 |         self.assertEqual(self.expected['name'], self.collection.name)
41 | 
42 |     def test_xsrf(self):
43 |         self.assertEqual(self.expected['xsrf'], self.collection.xsrf)
44 | 
45 |     def test_owner(self):
46 |         owner = self.collection.owner
47 |         self.assertEqual(self.expected['owner_id'], owner.id)
48 |         self.assertEqual(self.expected['owner_name'], owner.name)
49 | 
50 |     def test_follower_num(self):
51 |         self.assertEqual(self.expected['follower_num'],
52 |                          self.collection.follower_num)
53 | 
54 |     def test_page_get_questions(self):
55 |         questions = [q for q in
56 |                      self.collection._page_get_questions(self.collection.soup)]
57 |         ques = questions[0]
58 |         self.assertEqual(self.expected['top_ques_id'], ques.id)
59 |         self.assertEqual(self.expected['top_ques_title'], ques.title)
60 | 
61 |     def test_page_get_answers(self):
62 |         answers = [a for a in
63 |                    self.collection._page_get_answers(self.collection.soup)]
64 |         ans = answers[0]
65 |         self.assertEqual(self.expected['top_ans_id'], ans.id)
66 |         self.assertEqual(self.expected['top_ans_upvote_num'], ans.upvote_num)
67 |         self.assertEqual(self.expected['top_ans_author_name'], ans.author.name)
68 |         self.assertEqual(self.expected['top_ans_author_id'], ans.author.id)
69 | 
70 |     def test_questions(self):
71 |         qs = self.collection.questions
72 |         ques = next(qs)
73 |         self.assertEqual(self.expected['top_ques_id'], ques.id)
74 |         self.assertEqual(self.expected['top_ques_title'], ques.title)
75 | 
76 |     def test_answers(self):
77 |         anses = self.collection.answers
78 |         ans = next(anses)
79 |         self.assertEqual(self.expected['top_ans_id'], ans.id)
80 |         self.assertEqual(self.expected['top_ans_upvote_num'], ans.upvote_num)
81 |         self.assertEqual(self.expected['top_ans_author_name'], ans.author.name)
82 |         self.assertEqual(self.expected['top_ans_author_id'], ans.author.id)
83 | 


--------------------------------------------------------------------------------
/test/test_column.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function, division, unicode_literals
 5 | import unittest
 6 | import os
 7 | import json
 8 | 
 9 | from test_utils import TEST_DATA_PATH
10 | from zhihu import Column, Post
11 | 
12 | 
13 | class ColumnTest(unittest.TestCase):
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         url = 'http://zhuanlan.zhihu.com/xiepanda'
17 |         file_path = os.path.join(TEST_DATA_PATH, 'column.json')
18 |         with open(file_path, 'r') as f:
19 |             soup = json.load(f)
20 | 
21 |         post_path = os.path.join(TEST_DATA_PATH, 'column_post.json')
22 |         with open(post_path, 'r') as f:
23 |             cls.post_json = json.load(f)
24 | 
25 |         cls.column = Column(url)
26 |         cls.column.soup = soup
27 |         cls.expected = {'name': '谢熊猫出没注意', 'follower_num': 76605,
28 |                         'post_num': 69, 'post_author_id': 'xiepanda',
29 |                         'post_title': ("为了做一个称职的吃货，他决定连着吃"
30 |                                        "一百天转基因食物"),
31 |                         'post_upvote_num': 963, 'post_comment_num': 199}
32 | 
33 |     def test_name(self):
34 |         self.assertEqual(self.expected['name'], self.column.name)
35 | 
36 |     def test_folower_num(self):
37 |         self.assertEqual(self.expected['follower_num'],
38 |                          self.column.follower_num)
39 | 
40 |     def test_post_num(self):
41 |         self.assertEqual(self.expected['post_num'], self.column.post_num)
42 | 
43 |     def test_parse_post_data(self):
44 |         post = self.column._parse_post_data(self.post_json)
45 |         self.assertEqual(self.expected['post_author_id'], post.author.id)
46 |         self.assertEqual(self.expected['post_title'], post.title)
47 |         self.assertEqual(self.expected['post_upvote_num'], post.upvote_num)
48 |         self.assertEqual(self.expected['post_comment_num'], post.comment_num)
49 | 
50 |     def test_posts(self):
51 |         ps = self.column.posts
52 |         post = next(ps)
53 |         self.assertTrue(isinstance(post, Post))
54 | 


--------------------------------------------------------------------------------
/test/test_common.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import re
 3 | 
 4 | from zhihu.common import re_question_url
 5 | 
 6 | 
 7 | class CommonTest(unittest.TestCase):
 8 | 
 9 |     def test_question_url(self):
10 |         url = 'https://www.zhihu.com/question/26901243?sort=created'
11 |         obj = re.match(re_question_url, url)
12 |         assert obj.group() == url
13 | 
14 |         url = 'https://www.zhihu.com/question/26901243'
15 |         obj = re.match(re_question_url, url)
16 |         assert obj.group() == url
17 | 
18 |         url = 'https://www.zhihu.com/question/26901243/'
19 |         obj = re.match(re_question_url, url)
20 |         assert obj.group() == url
21 | 
22 |         url = 'https://www.zhihu.com/question/26901243?sort=createdx'
23 |         obj = re.match(re_question_url, url)
24 |         assert obj is None
25 | 
26 |         url = 'https://www.zhihu.com/question/26901243sort=created'
27 |         obj = re.match(re_question_url, url)
28 |         assert obj is None
29 | 
30 |         url = 'https://www.zhihu.com/question/26901243/?sort=created'
31 |         obj = re.match(re_question_url, url)
32 |         assert obj is None
33 | 
34 |         url = 'https://www.zhihu.com/question/26901243?/sort=created'
35 |         obj = re.match(re_question_url, url)
36 |         assert obj is None
37 | 
38 | 


--------------------------------------------------------------------------------
/test/test_post.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function, division, unicode_literals
 5 | import unittest
 6 | import os
 7 | import json
 8 | 
 9 | from zhihu import Post
10 | from test_utils import TEST_DATA_PATH
11 | 
12 | 
13 | class ColumnTest(unittest.TestCase):
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         url = 'http://zhuanlan.zhihu.com/xiepanda/20202275'
17 | 
18 |         post_path = os.path.join(TEST_DATA_PATH, 'column_post.json')
19 |         with open(post_path, 'r') as f:
20 |             post_json = json.load(f)
21 | 
22 |         post_saved_path = os.path.join(TEST_DATA_PATH, 'post.md')
23 |         with open(post_saved_path, 'rb') as f:
24 |             cls.post_saved = f.read()
25 | 
26 |         cls.post = Post(url)
27 |         cls.post.soup = post_json
28 |         cls.expected = {'column_in_name': 'xiepanda', 'slug': 20202275,
29 |                         'column_name': '谢熊猫出没注意',
30 |                         'author_name': '谢熊猫君', 'author_id': 'xiepanda',
31 |                         'title': '为了做一个称职的吃货，他决定连着吃一百天转基因食物',
32 |                         'upvote_num': 963, 'comment_num': 199}
33 | 
34 |     def test_column_in_name(self):
35 |         self.assertEqual(self.expected['column_in_name'],
36 |                          self.post.column_in_name)
37 | 
38 |     def test_slug(self):
39 |         self.assertEqual(self.expected['slug'], self.post.slug)
40 | 
41 |     def test_author(self):
42 |         self.assertEqual(self.expected['author_name'], self.post.author.name)
43 |         self.assertEqual(self.expected['author_id'], self.post.author.id)
44 | 
45 |     def test_title(self):
46 |         self.assertEqual(self.expected['title'], self.post.title)
47 | 
48 |     def test_upvote_num(self):
49 |         self.assertEqual(self.expected['upvote_num'], self.post.upvote_num)
50 | 
51 |     def test_comment_num(self):
52 |         self.assertEqual(self.expected['comment_num'], self.post.comment_num)
53 | 
54 |     def test_save(self):
55 |         save_name = 'post_save'
56 |         self.post.save(filepath=TEST_DATA_PATH, filename=save_name)
57 |         post_saved_path = os.path.join(TEST_DATA_PATH, save_name + '.md')
58 |         with open(post_saved_path, 'rb') as f:
59 |             post_saved = f.read()
60 |         os.remove(post_saved_path)
61 |         self.assertEqual(self.post_saved, post_saved)
62 | 


--------------------------------------------------------------------------------
/test/test_question.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function, division, unicode_literals
  5 | import unittest
  6 | import os
  7 | 
  8 | from test_utils import TEST_DATA_PATH
  9 | from zhihu import Question, Author, Answer
 10 | from zhihu.common import BeautifulSoup
 11 | 
 12 | 
 13 | description = ("从小父母和大家庭里，长辈都教我们得到别人帮助时要说“谢谢”。"
 14 |                "比方说家庭聚餐，亲人们帮忙夹了菜要感谢。无论多么亲密,父母还是"
 15 |                "兄妹，都要说声谢谢。   后来上了高中，大学。也习惯性的及时表达"
 16 |                "对他人帮助的感谢。  但室友们，还有男朋友，都不喜欢我这样。他们"
 17 |                "说，这样说的话会感觉双方很有距离感很生疏。尤其是男朋友，不喜"
 18 |                "欢我这样，他说这样很不亲密，情侣之间就不分得那么清，不需要谢"
 19 |                "谢的。但我从小习惯了，别人帮了我我不说的话，会很不自在。  "
 20 |                '怎么办？我还要继续这样吗？什么时候不该说"谢谢”？？')
 21 | 
 22 | 
 23 | class QuestionTest(unittest.TestCase):
 24 |     @classmethod
 25 |     def setUpClass(cls):
 26 |         url = 'http://www.zhihu.com/question/24825703'
 27 |         file_path = os.path.join(TEST_DATA_PATH, 'question.html')
 28 |         with open(file_path, 'rb') as f:
 29 |             html = f.read()
 30 |         soup = BeautifulSoup(html)
 31 | 
 32 |         cls.question = Question(url)
 33 |         cls.question._session = None
 34 |         cls.question.soup = soup
 35 |         cls.expected = {'id': 24825703, 'qid': 2112271,
 36 |                         'xsrf': 'cfd489623d34ca03adfdc125368c6426',
 37 |                         'html': soup.prettify(),
 38 |                         'title': '关系亲密的人之间要说「谢谢」吗？',
 39 |                         'details': description, 'answer_num': 621,
 40 |                         'follower_num': 4427, 'top_answer_id': 39753456,
 41 |                         'top_answer_author_name': '芝士就是力量',
 42 |                         'top_answer_upvote_num': 97, 'top_50_ans_id': 31003847,
 43 |                         'top_50_ans_author_name': '圭多达莱佐',
 44 |                         'top_50_ans_upvote_num': 31, 'more_ans_id': 39958704,
 45 |                         'more_ans_author_name': '柳蜻蜓',
 46 |                         'more_ans_upvote_num': 1,
 47 |                         'topics': ['心理学', '恋爱', '社会', '礼仪',
 48 |                                    '亲密关系'],
 49 |                         }
 50 | 
 51 |         more_ans_file_path = os.path.join(TEST_DATA_PATH,
 52 |                                           'question_more_answer.html')
 53 |         with open(more_ans_file_path, 'rb') as f:
 54 |             cls.more_ans_html = f.read()
 55 | 
 56 |     def test_id(self):
 57 |         self.assertEqual(self.expected['id'], self.question.id)
 58 | 
 59 |     def test_qid(self):
 60 |         self.assertEqual(self.expected['qid'], self.question.qid)
 61 | 
 62 |     def test_xsrf(self):
 63 |         self.assertEqual(self.expected['xsrf'], self.question.xsrf)
 64 | 
 65 |     def test_html(self):
 66 |         self.assertEqual(self.expected['html'], self.question.html)
 67 | 
 68 |     def test_title(self):
 69 |         self.assertEqual(self.expected['title'], self.question.title)
 70 | 
 71 |     def test_details(self):
 72 |         self.assertEqual(self.expected['details'], self.question.details)
 73 | 
 74 |     def test_answer_num(self):
 75 |         self.assertEqual(self.expected['answer_num'], self.question.answer_num)
 76 | 
 77 |     def test_follower_num(self):
 78 |         self.assertEqual(self.expected['follower_num'],
 79 |                          self.question.follower_num)
 80 | 
 81 |     def test_topics(self):
 82 |         self.assertEqual(self.expected['topics'], self.question.topics)
 83 | 
 84 |     def test_top_answer(self):
 85 |         answer = self.question.top_answer
 86 |         self.assertEqual(self.expected['top_answer_id'], answer.id)
 87 |         self.assertEqual(self.expected['top_answer_author_name'],
 88 |                          answer.author.name)
 89 |         self.assertEqual(self.expected['top_answer_upvote_num'],
 90 |                          answer.upvote_num)
 91 | 
 92 |     def test_top_i_answer(self):
 93 |         answer = self.question.top_i_answer(50)
 94 |         self.assertEqual(self.expected['top_50_ans_id'], answer.id)
 95 |         self.assertEqual(self.expected['top_50_ans_author_name'],
 96 |                          answer.author.name)
 97 |         self.assertEqual(self.expected['top_50_ans_upvote_num'],
 98 |                          answer.upvote_num)
 99 | 
100 |     def test_parse_answer_html(self):
101 |         answer = self.question._parse_answer_html(self.more_ans_html)
102 |         self.assertEqual(self.expected['more_ans_id'], answer.id)
103 |         self.assertEqual(self.expected['more_ans_author_name'],
104 |                          answer.author.name)
105 |         self.assertEqual(self.expected['more_ans_upvote_num'],
106 |                          answer.upvote_num)
107 | 
108 |     def test_top_i_answers(self):
109 |         answers = [a for a in self.question.top_i_answers(1)]
110 |         answer = answers[0]
111 |         self.assertEqual(self.expected['top_answer_id'], answer.id)
112 |         self.assertEqual(self.expected['top_answer_author_name'],
113 |                          answer.author.name)
114 |         self.assertEqual(self.expected['top_answer_upvote_num'],
115 |                          answer.upvote_num)
116 | 


--------------------------------------------------------------------------------
/test/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import inspect
 3 | 
 4 | 
 5 | def module_path(local_function):
 6 |     ''' returns the module path without the use of __file__.
 7 |     Requires a function defined locally in the module.
 8 |     from "http://stackoverflow.com/questions/729583/
 9 |     getting-file-path-of-imported-module"'''
10 | 
11 |     return os.path.abspath(inspect.getsourcefile(local_function))
12 | 
13 | 
14 | TEST_DATA_PATH = os.path.join(
15 |     os.path.split(module_path(module_path))[0], 'data')
16 | 


--------------------------------------------------------------------------------
/zhihu/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from .client import ZhihuClient
 5 | from .question import Question
 6 | from .author import Author, ANONYMOUS, BanException
 7 | from .activity import Activity
 8 | from .acttype import ActType, CollectActType
 9 | from .answer import Answer
10 | from .collection import Collection
11 | from .column import Column
12 | from .post import Post
13 | from .topic import Topic
14 | 
15 | __all__ = ['ZhihuClient', 'Question', 'Author', 'ActType', 'Activity',
16 |            'Answer', 'Collection', 'CollectActType', 'Column', 'Post', 'Topic',
17 |            'ANONYMOUS', 'BanException']
18 | 
19 | __version__ = '0.3.23'
20 | 


--------------------------------------------------------------------------------
/zhihu/activity.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from datetime import datetime
  5 | 
  6 | from .acttype import ActType
  7 | from .answer import Answer
  8 | from .author import Author, ANONYMOUS
  9 | from .collection import Collection
 10 | from .column import Column
 11 | from .common import *
 12 | from .post import Post
 13 | from .question import Question
 14 | from .topic import Topic
 15 | 
 16 | 
 17 | class Activity:
 18 |     """用户动态类，请使用Author.activities获取."""
 19 | 
 20 |     def __init__(self, act, session, author):
 21 |         """创建用户动态类实例.
 22 | 
 23 |         :param bs4.element.Tag act: 表示用户动态的页面元素
 24 |         :param Session session: 使用的网络会话
 25 |         :param Author author: Activity 所属的用户对象
 26 |         :return: 用户动态对象
 27 |         :rtype: Activity
 28 | 
 29 |         :说明:
 30 |             根据Activity.type不同可以获取不同属性，具体请看 :class:`.ActType`
 31 | 
 32 |         """
 33 |         self._session = session
 34 |         self._author = author
 35 |         self._type = ActType.from_str(act.attrs['data-type-detail'])
 36 | 
 37 |         useless_tag = act.div.find('a', class_='zg-link')
 38 |         if useless_tag is not None:
 39 |             useless_tag.extract()
 40 | 
 41 |         attribute = self._get_assemble_method(self.type)(act)
 42 |         self._attr = attribute.__class__.__name__.lower()
 43 |         setattr(self, self._attr, attribute)
 44 |         self._time = datetime.fromtimestamp(int(act['data-time']))
 45 | 
 46 |     @property
 47 |     def type(self):
 48 |         """
 49 |         :return: 用户动态类型, 具体参见 :class:`.ActType`
 50 |         :rtype: class:`.ActType`
 51 |         """
 52 |         return self._type
 53 | 
 54 |     @property
 55 |     def content(self):
 56 |         """获取此对象中能提供的那个属性，对应表请查看 :class:`.ActType` 类.
 57 | 
 58 |         :return: 对象提供的对象
 59 |         :rtype: Author or Question or Answer or Topic or Column or Post
 60 |         """
 61 |         return getattr(self, self._attr)
 62 | 
 63 |     @property
 64 |     def time(self):
 65 |         """
 66 |         :return: 返回用户执行 Activity 操作的时间
 67 |         :rtype: datetime.datetime
 68 |         """
 69 |         return self._time
 70 | 
 71 |     def __find_post(self, act):
 72 |         try:
 73 |             column_url = act.find('a', class_='column_link')['href']
 74 |             column_name = act.find('a', class_='column_link').text
 75 |             column = Column(column_url, column_name, session=self._session)
 76 |         except TypeError:
 77 |             column = None
 78 |         try:
 79 |             author_tag = act.find('div', class_='author-info')
 80 |             author_url = Zhihu_URL + author_tag.a['href']
 81 |             author_name = author_tag.a.text
 82 |             author_motto = author_tag.span.text if author_tag.span else ''
 83 |             author = Author(author_url, author_name, author_motto,
 84 |                             session=self._session)
 85 |         except TypeError:
 86 |             author = ANONYMOUS
 87 |         post_url = act.find('a', class_='post-link')['href']
 88 |         post_title = act.find('a', class_='post-link').text
 89 |         post_comment_num, post_upvote_num = self._parse_un_cn(act)
 90 |         return Post(post_url, column, author, post_title,
 91 |                     post_upvote_num, post_comment_num,
 92 |                     session=self._session)
 93 | 
 94 |     def _assemble_create_post(self, act):
 95 |         return self.__find_post(act)
 96 | 
 97 |     def _assemble_voteup_post(self, act):
 98 |         return self.__find_post(act)
 99 | 
100 |     def _assemble_follow_column(self, act):
101 |         return Column(act.div.a['href'], act.div.a.text, session=self._session)
102 | 
103 |     def _assemble_follow_topic(self, act):
104 |         topic_url = Zhihu_URL + act.div.a['href']
105 |         topic_name = act.div.a['title']
106 |         return Topic(topic_url, topic_name, session=self._session)
107 | 
108 |     def _assemble_answer_question(self, act):
109 |         question_url = Zhihu_URL + re_a2q.match(
110 |             act.div.find_all('a')[-1]['href']).group(1)
111 |         question_title = act.div.find_all('a')[-1].text.strip()
112 |         question = Question(question_url, question_title, session=self._session)
113 |         answer_url = Zhihu_URL + act.div.find_all('a')[-1]['href']
114 |         answer_comment_num, answer_upvote_num = self._parse_un_cn(act)
115 |         return Answer(answer_url, question, self._author, answer_upvote_num,
116 |                       session=self._session)
117 | 
118 |     def _assemble_voteup_answer(self, act):
119 |         question_url = Zhihu_URL + re_a2q.match(act.div.a['href']).group(1)
120 |         question_title = act.div.a.text.strip()
121 |         question = Question(question_url, question_title, session=self._session)
122 |         try_find_author = act.find_all('a', class_='author-link',
123 |                                        href=re.compile('^/people/[^/]*$'))
124 | 
125 |         if len(try_find_author) == 0:
126 |             author_url = None
127 |             author_name = '匿名用户'
128 |             author_motto = ''
129 |         else:
130 |             try_find_author = try_find_author[-1]
131 |             author_url = Zhihu_URL + try_find_author['href']
132 |             author_name = try_find_author.text
133 |             try_find_motto = act.find('span', class_='bio')
134 |             if try_find_motto is None:
135 |                 author_motto = ''
136 |             else:
137 |                 author_motto = try_find_motto['title']
138 | 
139 |         author = Author(author_url, author_name, author_motto,
140 |                         session=self._session)
141 |         answer_url = Zhihu_URL + act.div.a['href']
142 |         answer_comment_num, answer_upvote_num = self._parse_un_cn(act)
143 |         return Answer(answer_url, question, author, answer_upvote_num,
144 |                       session=self._session)
145 | 
146 |     def _assemble_ask_question(self, act):
147 |         a = act.find("a", class_="question_link")
148 |         url = Zhihu_URL + a['href']
149 |         title = a.text.strip(' \n')
150 |         return Question(url, title, session=self._session)
151 | 
152 |     def _assemble_follow_question(self, act):
153 |         return Question(Zhihu_URL + act.div.a['href'], act.div.a.text.strip(),
154 |                         session=self._session)
155 | 
156 |     def _assemble_follow_collection(self, act):
157 |         url = act.div.a['href']
158 |         if not url.startswith('http'):
159 |             url = Zhihu_URL + url
160 |         return Collection(url, session=self._session)
161 | 
162 |     def _get_assemble_method(self, act_type):
163 |         assemble_methods = {
164 |             ActType.UPVOTE_POST: self._assemble_voteup_post,
165 |             ActType.FOLLOW_COLUMN: self._assemble_follow_column,
166 |             ActType.UPVOTE_ANSWER: self._assemble_voteup_answer,
167 |             ActType.ANSWER_QUESTION: self._assemble_answer_question,
168 |             ActType.ASK_QUESTION: self._assemble_ask_question,
169 |             ActType.FOLLOW_QUESTION: self._assemble_follow_question,
170 |             ActType.FOLLOW_TOPIC: self._assemble_follow_topic,
171 |             ActType.PUBLISH_POST: self._assemble_create_post,
172 |             ActType.FOLLOW_COLLECTION: self._assemble_follow_collection
173 |         }
174 | 
175 |         if act_type in assemble_methods:
176 |             return assemble_methods[act_type]
177 |         else:
178 |             raise ValueError('invalid activity type')
179 | 
180 |     @staticmethod
181 |     def _parse_un_cn(act):
182 |         upvote_num = act.find('a', class_='zm-item-vote-count').text
183 |         if upvote_num.isdigit():
184 |             upvote_num = int(upvote_num)
185 |         else:
186 |             upvote_num = None
187 |         comment = act.find('a', class_='toggle-comment')
188 |         comment_text = next(comment.stripped_strings)
189 |         comment_num_match = re_get_number.match(comment_text)
190 |         comment_num = int(
191 |             comment_num_match.group(1)) if comment_num_match is not None else 0
192 |         return comment_num, upvote_num
193 | 


--------------------------------------------------------------------------------
/zhihu/acttype.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import enum
 5 | 
 6 | match = {
 7 |     'ANSWER_QUESTION': 'member_answer_question',
 8 |     'UPVOTE_ANSWER': 'member_voteup_answer',
 9 |     'ASK_QUESTION': 'member_ask_question',
10 |     'FOLLOW_QUESTION': 'member_follow_question',
11 |     'UPVOTE_POST': 'member_voteup_article',
12 |     'FOLLOW_COLUMN': 'member_follow_column',
13 |     'FOLLOW_TOPIC': 'member_follow_topic',
14 |     'PUBLISH_POST': 'member_create_article',
15 |     'FOLLOW_COLLECTION': 'member_follow_favlist'
16 | }
17 | 
18 | reverse_match = {v: k for k, v in match.items()}
19 | 
20 | 
21 | class ActType(enum.Enum):
22 | 
23 |     """用于表示用户动态的类型.
24 | 
25 |     :常量说明:
26 |         ================= ================ ============ =====================
27 |         常量名              说明              提供属性      属性类型
28 |         ================= ================ ============ =====================
29 |         ANSWER_QUESTION   回答了一个问题    answer       :class:`.Answer`
30 |         UPVOTE_ANSWER     赞同了一个回答    answer       :class:`.Answer`
31 |         ASK_QUESTION      提出了一个问题    question     :class:`.Question`
32 |         FOLLOW_QUESTION   关注了一个问题    question     :class:`.Question`
33 |         UPVOTE_POST       赞同了一篇文章    post         :class:`.Post`
34 |         FOLLOW_COLUMN     关注了一个专栏    column       :class:`.Column`
35 |         FOLLOW_TOPIC      关注了一个话题    topic        :class:`.Topic`
36 |         PUBLISH_POST      发表了一篇文章    post         :class:`.Post`
37 |         FOLLOW_COLLECTION 关注了一个收藏夹  collection   :class:`.Collection`
38 |         ================= ================ ============ =====================
39 | 
40 |     """
41 | 
42 |     ANSWER_QUESTION = 1
43 |     UPVOTE_ANSWER = 2
44 |     ASK_QUESTION = 4
45 |     FOLLOW_QUESTION = 8
46 |     UPVOTE_POST = 16
47 |     FOLLOW_COLUMN = 32
48 |     FOLLOW_TOPIC = 64
49 |     PUBLISH_POST = 128
50 |     FOLLOW_COLLECTION = 256
51 | 
52 |     @classmethod
53 |     def from_str(cls, div_class):
54 |         return cls.__getattr__(reverse_match[div_class])
55 | 
56 |     def __str__(self):
57 |         return match[self.name]
58 | 
59 | 
60 | class CollectActType(enum.Enum):
61 |     """用于表示收藏夹操作的类型.
62 | 
63 |     :常量说明:
64 |         ================= ==============
65 |         常量名            说明
66 |         ================= ==============
67 |         INSERT_ANSWER     在收藏夹中增加一个回答
68 |         DELETE_ANSWER     在收藏夹中删除一个回答
69 |         CREATE_COLLECTION 创建收藏夹
70 |         ================= ==============
71 |     """
72 |     INSERT_ANSWER = 1
73 |     DELETE_ANSWER = 2
74 |     CREATE_COLLECTION = 3
75 | 


--------------------------------------------------------------------------------
/zhihu/answer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | from datetime import datetime
  6 | 
  7 | from .common import *
  8 | from .base import BaseZhihu
  9 | from .collection import Collection
 10 | from .author import Author, ANONYMOUS
 11 | 
 12 | 
 13 | class Answer(BaseZhihu):
 14 |     """答案类，请使用``ZhihuClient.answer``方法构造对象."""
 15 | 
 16 |     @class_common_init(re_ans_url)
 17 |     def __init__(self, url, question=None, author=None,
 18 |                  upvote_num=None, content=None, session=None):
 19 |         """创建答案类实例.
 20 | 
 21 |         :param str url: 答案url
 22 |         :param Question question: 答案所在的问题对象，可选
 23 |         :param Author author: 答案回答者对象，可选
 24 |         :param int upvote_num: 答案赞同数量，可选
 25 |         :param str content: 答案内容，可选
 26 |         :param Session session: 使用的网络会话，为空则使用新会话
 27 |         :return: 答案对象
 28 |         :rtype: Answer
 29 |         """
 30 |         self.url = url
 31 |         self._session = session
 32 |         self._question = question
 33 |         self._author = author
 34 |         self._upvote_num = upvote_num
 35 |         self._content = content
 36 |         self._deleted = None
 37 | 
 38 |     @property
 39 |     def id(self):
 40 |         """答案的id
 41 | 
 42 |         :return: 答案id
 43 |         :rtype: int
 44 |         """
 45 |         return int(re.match(r'.*/(\d+)/$', self.url).group(1))
 46 | 
 47 |     @property
 48 |     @check_soup('_xsrf')
 49 |     def xsrf(self):
 50 |         """获取知乎的反xsrf参数（用不到就忽视吧~）
 51 | 
 52 |         :return: xsrf参数
 53 |         :rtype: str
 54 |         """
 55 |         return self.soup.find('input', attrs={'name': '_xsrf'})['value']
 56 | 
 57 |     @property
 58 |     @check_soup('_aid')
 59 |     def aid(self):
 60 |         """获取答案的内部id，某些POST操作需要此参数
 61 | 
 62 |         :return: 答案内部id
 63 |         :rtype: str
 64 |         """
 65 |         return int(self.soup.find('div', class_='zm-item-answer')['data-aid'])
 66 | 
 67 |     @property
 68 |     @check_soup('_html')
 69 |     def html(self):
 70 |         """获取网页源码
 71 | 
 72 |         :return: 网页源码
 73 |         :rtype: str
 74 |         """
 75 |         return self.soup.prettify()
 76 | 
 77 |     @property
 78 |     @check_soup('_author')
 79 |     def author(self):
 80 |         """获取答案作者.
 81 | 
 82 |         :return: 答案作者
 83 |         :rtype: Author
 84 |         """
 85 |         from .author import Author
 86 | 
 87 |         author = self.soup.find('div', class_='zm-item-answer-author-info')
 88 |         url, name, motto, photo = parser_author_from_tag(author)
 89 |         if name == '匿名用户':
 90 |             return ANONYMOUS
 91 |         else:
 92 |             return Author(url, name, motto, photo_url=photo,
 93 |                           session=self._session)
 94 | 
 95 |     @property
 96 |     @check_soup('_question')
 97 |     def question(self):
 98 |         """获取答案所在问题.
 99 | 
100 |         :return: 答案所在问题
101 |         :rtype: Question
102 |         """
103 |         from .question import Question
104 | 
105 |         question_link = self.soup.find(
106 |             "h2", class_="zm-item-title").a
107 |         url = Zhihu_URL + question_link["href"]
108 |         title = question_link.text.strip()
109 |         followers_num = int(self.soup.find(
110 |             'div', class_='zh-question-followers-sidebar').div.a.strong.text)
111 |         answers_num = int(re_get_number.match(self.soup.find(
112 |             'div', class_='zh-answers-title').h3.a.text).group(1))
113 |         return Question(url, title, followers_num, answers_num,
114 |                         session=self._session)
115 | 
116 |     @property
117 |     @check_soup('_upvote_num')
118 |     def upvote_num(self):
119 |         """获取答案赞同数量.
120 | 
121 |         :return: 答案赞同数量
122 |         :rtype: int
123 |         """
124 |         return int(self.soup.find(
125 |             'div', class_='zm-item-vote-info')['data-votecount'])
126 | 
127 |     @property
128 |     def upvoters(self):
129 |         """获取答案点赞用户，返回生成器.
130 | 
131 |         :return: 点赞用户
132 |         :rtype: Author.Iterable
133 |         """
134 |         self._make_soup()
135 |         next_req = '/answer/' + str(self.aid) + '/voters_profile'
136 |         while next_req != '':
137 |             data = self._session.get(Zhihu_URL + next_req).json()
138 |             next_req = data['paging']['next']
139 |             for html in data['payload']:
140 |                 soup = BeautifulSoup(html)
141 |                 yield self._parse_author_soup(soup)
142 | 
143 |     @property
144 |     @check_soup('_content')
145 |     def content(self):
146 |         """以处理过的Html代码形式返回答案内容.
147 | 
148 |         :return: 答案内容
149 |         :rtype: str
150 |         """
151 |         answer_wrap = self.soup.find('div', id='zh-question-answer-wrap')
152 |         content = answer_wrap.find('div', class_='zm-editable-content')
153 |         content = answer_content_process(content)
154 |         return content
155 | 
156 |     @property
157 |     @check_soup('_creation_time')
158 |     def creation_time(self):
159 |         """获取答案创建时间
160 | 
161 |         :return: 答案创建时间
162 |         :rtype: datetime.datetime
163 |         """
164 |         return datetime.fromtimestamp(int(self.soup.find(
165 |                 'div', class_='zm-item-answer')['data-created']))
166 | 
167 |     @property
168 |     @check_soup('_collect_num')
169 |     def collect_num(self):
170 |         """获取答案收藏数
171 | 
172 |         :return:  答案收藏数量
173 |         :rtype: int
174 |         """
175 |         element = self.soup.find("a", {
176 |             "data-za-a": "click_answer_collected_count"
177 |         })
178 |         if element is None:
179 |             return 0
180 |         else:
181 |             return int(element.get_text())
182 | 
183 |     @property
184 |     def collections(self):
185 |         """获取包含该答案的收藏夹
186 | 
187 |         :return: 包含该答案的收藏夹
188 |         :rtype: Collection.Iterable
189 | 
190 |         collect_num 未必等于 len(collections)，比如:
191 |         https://www.zhihu.com/question/20064699/answer/13855720
192 |         显示被收藏 38 次，但只有 30 个收藏夹
193 |         """
194 |         import time
195 |         gotten_feed_num = 20
196 |         offset = 0
197 |         data = {
198 |             'method':'next',
199 |             '_xsrf': self.xsrf
200 |         }
201 |         while gotten_feed_num >= 10:
202 |             data['params'] = "{\"answer_url\": %d,\"offset\": %d}" % (self.id, offset)
203 |             res = self._session.post(url=Get_Collection_Url, data=data)
204 |             gotten_feed_num = len(res.json()['msg'])
205 |             offset += gotten_feed_num
206 |             soup = BeautifulSoup(''.join(res.json()['msg']))
207 |             for zm_item in soup.find_all('div', class_='zm-item'):
208 |                 url = Zhihu_URL + zm_item.h2.a['href']
209 |                 name = zm_item.h2.a.text
210 |                 links = zm_item.div.find_all('a')
211 |                 owner = Author(links[0]['href'], session=self._session)
212 |                 follower_num = int(links[1].text.split()[0])
213 |                 yield Collection(url, owner=owner, name=name,
214 |                                  follower_num=follower_num,
215 |                                  session=self._session)
216 | 
217 |             time.sleep(0.2)  # prevent from posting too quickly
218 | 
219 |     def save(self, filepath=None, filename=None, mode="html"):
220 |         """保存答案为Html文档或markdown文档.
221 | 
222 |         :param str filepath: 要保存的文件所在的目录，
223 |             不填为当前目录下以问题标题命名的目录, 设为"."则为当前目录。
224 |         :param str filename: 要保存的文件名，
225 |             不填则默认为 所在问题标题 - 答主名.html/md。
226 |             如果文件已存在，自动在后面加上数字区分。
227 |             **自定义文件名时请不要输入后缀 .html 或 .md。**
228 |         :param str mode: 保存类型，可选 `html` 、 `markdown` 、 `md` 。
229 |         :return: 无
230 |         :rtype: None
231 |         """
232 |         if mode not in ["html", "md", "markdown"]:
233 |             raise ValueError("`mode` must be 'html', 'markdown' or 'md',"
234 |                              " got {0}".format(mode))
235 |         file = get_path(filepath, filename, mode, self.question.title,
236 |                         self.question.title + '-' + self.author.name)
237 |         with open(file, 'wb') as f:
238 |             if mode == "html":
239 |                 f.write(self.content.encode('utf-8'))
240 |             else:
241 |                 import html2text
242 |                 h2t = html2text.HTML2Text()
243 |                 h2t.body_width = 0
244 |                 f.write(h2t.handle(self.content).encode('utf-8'))
245 | 
246 |     def _parse_author_soup(self, soup):
247 |         from .author import Author, ANONYMOUS
248 | 
249 |         author_tag = soup.find('div', class_='body')
250 |         if author_tag.string is None:
251 |             author_name = author_tag.div.a['title']
252 |             author_url = author_tag.div.a['href']
253 |             author_motto = author_tag.div.span.text
254 |             photo_url = PROTOCOL + soup.a.img['src'].replace('_m', '_r')
255 |             numbers_tag = soup.find_all('li')
256 |             numbers = [int(re_get_number.match(x.get_text()).group(1))
257 |                        for x in numbers_tag]
258 |             # noinspection PyTypeChecker
259 |             return Author(author_url, author_name, author_motto, None,
260 |                           numbers[2], numbers[3], numbers[0], numbers[1],
261 |                           photo_url, session=self._session)
262 |         else:
263 |             return ANONYMOUS
264 | 
265 |     @property
266 |     @check_soup('_comment_num')
267 |     def comment_num(self):
268 |         """
269 |         :return: 答案下评论的数量
270 |         :rtype: int
271 |         """
272 |         comment = self.soup.select_one("div.answer-actions a.toggle-comment")
273 |         comment_num_string = comment.text
274 |         number = comment_num_string.split()[0]
275 |         return int(number) if number.isdigit() else 0
276 | 
277 |     @property
278 |     def comments(self):
279 |         """获取答案下的所有评论.
280 | 
281 |         :return: 答案下的所有评论，返回生成器
282 |         :rtype: Comments.Iterable
283 |         """
284 |         import math
285 |         from .author import Author, ANONYMOUS
286 |         from .comment import Comment
287 | 
288 |         api_url = Get_Answer_Comment_URL.format(self.aid)
289 |         page = pages = 1
290 |         while page <= pages:
291 |             res = self._session.get(api_url + '?page=' + str(page))
292 |             if page == 1:
293 |                 total = int(res.json()['paging']['totalCount'])
294 |                 if total == 0:
295 |                     return
296 |                 pages = math.ceil(total / 30) 
297 |             page += 1
298 | 
299 |             comment_items = res.json()['data']
300 |             for comment_item in comment_items:
301 |                 comment_id = comment_item['id']
302 |                 content = comment_item['content']
303 |                 upvote_num = comment_item['likesCount']
304 |                 time_string = comment_item['createdTime'][:19]
305 |                 time = datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S")
306 | 
307 |                 if comment_item['author'].get('url') is not None:
308 |                     a_url = comment_item['author']['url']
309 |                     a_name = comment_item['author']['name']
310 |                     photo_url_tmp = comment_item['author']['avatar']['template']
311 |                     photo_url_id = comment_item['author']['avatar']['id']
312 |                     a_photo_url = photo_url_tmp.replace(
313 |                             '{id}', photo_url_id).replace('_{size}', '')
314 |                     author_obj = Author(a_url, a_name, photo_url=a_photo_url,
315 |                                         session=self._session)
316 |                 else:
317 |                     author_obj = ANONYMOUS
318 | 
319 |                 yield Comment(comment_id, self, author_obj, upvote_num, content, time)
320 | 
321 |     @property
322 |     def latest_comments(self):
323 |         """获取答案下的所有评论。较新的评论先返回。
324 |         使用该方法比 ``reversed(list(answer.comments))`` 效率高  
325 |         因为现在靠后的热门评论会被挪到前面，所以返回的评论未必严格满足时间先后关系
326 | 
327 |         :return: 答案下的所有评论，返回生成器
328 |         :rtype: Comments.Iterable
329 |         """
330 |         import math
331 |         from .author import Author, ANONYMOUS
332 |         from .comment import Comment
333 |         
334 |         if self.comment_num == 0:
335 |             return
336 |         pages = math.ceil(self.comment_num / 30)
337 |         api_url = Get_Answer_Comment_URL.format(self.aid)
338 |         for page in range(pages, 0, -1):
339 |             res = self._session.get(api_url + '?page=' + str(page))
340 |             comment_items = res.json()['data']
341 |             for comment_item in reversed(comment_items):
342 |                 comment_id = comment_item['id']
343 |                 content = comment_item['content']
344 |                 upvote_num = comment_item['likesCount']
345 |                 time_string = comment_item['createdTime'][:19]
346 |                 time = datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S")
347 | 
348 |                 if comment_item['author'].get('url') != None:
349 |                     a_url = comment_item['author']['url']
350 |                     a_name = comment_item['author']['name']
351 |                     photo_url_tmp = comment_item['author']['avatar']['template']
352 |                     photo_url_id = comment_item['author']['avatar']['id']
353 |                     a_photo_url = photo_url_tmp.replace(
354 |                             '{id}', photo_url_id).replace('_{size}', '')
355 |                     author_obj = Author(a_url, a_name, photo_url=a_photo_url,
356 |                                         session=self._session)
357 |                 else:
358 |                     author_obj = ANONYMOUS
359 | 
360 |                 yield Comment(comment_id, self, author_obj, upvote_num, content, time)
361 | 
362 |     def refresh(self):
363 |         """刷新 Answer object 的属性. 
364 |         例如赞同数增加了, 先调用 ``refresh()`` 
365 |         再访问 upvote_num属性, 可获得更新后的赞同数.
366 |         
367 |         :return: None
368 |         """
369 |         super().refresh()
370 |         self._html = None
371 |         self._upvote_num = None
372 |         self._content = None
373 |         self._collect_num = None
374 |         self._comment_num = None
375 | 
376 |     @property
377 |     @check_soup('_deleted')
378 |     def deleted(self):
379 |         """答案是否被删除, 被删除了返回 True, 为被删除返回 False
380 |         :return: True or False
381 |         """
382 |         return self._deleted
383 | 


--------------------------------------------------------------------------------
/zhihu/base.py:
--------------------------------------------------------------------------------
 1 | from .common import BeautifulSoup
 2 | from requests import Response
 3 | import json
 4 | 
 5 | 
 6 | class BaseZhihu:
 7 |     def _gen_soup(self, content):
 8 |         self.soup = BeautifulSoup(content)
 9 | 
10 |     def _get_content(self):
11 |         # use _url for question
12 |         url = self._url if hasattr(self, '_url') else self.url
13 |         if url.endswith('/'):
14 |             resp = self._session.get(url[:-1])
15 |         else:
16 |             resp = self._session.get(url)
17 | 
18 |         class_name = self.__class__.__name__
19 |         if class_name == 'Answer':
20 |             if 'answer' in resp.url:
21 |                 self._deleted = False
22 |             else:
23 |                 self._deleted = True
24 |         elif class_name == 'Question':
25 |             self._deleted = resp.status_code == 404
26 | 
27 |         return resp.content
28 | 
29 |     def _make_soup(self):
30 |         if self.url and not self.soup:
31 |             self._gen_soup(self._get_content())
32 | 
33 |     def refresh(self):
34 |         # refresh self.soup's content
35 |         self._gen_soup(self._get_content())
36 | 
37 |     @classmethod
38 |     def from_html(cls, content):
39 |         obj = cls(url=None)
40 |         obj._gen_soup(content)
41 |         return obj
42 | 
43 | 
44 | class JsonAsSoupMixin:
45 |     def _gen_soup(self, content):
46 |         # 为了让`from_html`对外提供统一的接口, 判断一下输入, 如果是bytes 或者 str 则用json处理,
47 |         # 否则认为是由_get_content返回的dict
48 | 
49 |         if isinstance(content, bytes):
50 |             r = Response()
51 |             r._content = content
52 |             soup = r.json()
53 |             self.soup = soup
54 |         elif isinstance(content, str):
55 |             self.soup = json.loads(content)
56 |         else:
57 |             self.soup = content
58 | 


--------------------------------------------------------------------------------
/zhihu/client.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import getpass
  5 | import importlib
  6 | import json
  7 | import time
  8 | from urllib.parse import urlencode
  9 | 
 10 | import requests
 11 | 
 12 | from .common import *
 13 | 
 14 | 
 15 | class ZhihuClient:
 16 | 
 17 |     """知乎客户端类，内部维护了自己专用的网络会话，可用cookies或账号密码登录."""
 18 | 
 19 |     def __init__(self, cookies=None):
 20 |         """创建客户端类实例.
 21 | 
 22 |         :param str cookies: 见 :meth:`.login_with_cookies` 中 ``cookies`` 参数
 23 |         :return: 知乎客户端对象
 24 |         :rtype: ZhihuClient
 25 |         """
 26 |         self._session = requests.Session()
 27 |         self._session.headers.update(Default_Header)
 28 |         self.proxies = None
 29 |         if cookies is not None:
 30 |             assert isinstance(cookies, str)
 31 |             self.login_with_cookies(cookies)
 32 | 
 33 |     # ===== login staff =====
 34 | 
 35 |     @staticmethod
 36 |     def _get_captcha_url():
 37 |         params = {
 38 |             'r': str(int(time.time() * 1000)),
 39 |             'type': 'login',
 40 |         }
 41 |         return Captcha_URL + '?' + urlencode(params)
 42 | 
 43 |     def get_captcha(self):
 44 |         """获取验证码数据。
 45 | 
 46 |         :return: 验证码图片数据。
 47 |         :rtype: bytes
 48 |         """
 49 |         self._session.get(Zhihu_URL)
 50 |         r = self._session.get(self._get_captcha_url())
 51 |         return r.content
 52 | 
 53 |     def login(self, email, password, captcha=None):
 54 |         """登陆知乎.
 55 | 
 56 |         :param str email: 邮箱
 57 |         :param str password: 密码
 58 |         :param str captcha: 验证码, 默认为None，表示不提交验证码
 59 |         :return:
 60 |             ======== ======== ============== ====================
 61 |             元素序号 元素类型 意义           说明
 62 |             ======== ======== ============== ====================
 63 |             0        int      是否成功       0为成功，1为失败
 64 |             1        str      失败原因       登录成功则为空字符串
 65 |             2        str       cookies字符串 登录失败则为空字符串
 66 |             ======== ======== ============== ====================
 67 | 
 68 |         :rtype: (int, str, str)
 69 |         """
 70 |         data = {'email': email, 'password': password,
 71 |                 'remember_me': 'true'}
 72 |         if captcha is not None:
 73 |             data['captcha'] = captcha
 74 |         r = self._session.post(Login_URL, data=data)
 75 |         j = r.json()
 76 |         code = int(j['r'])
 77 |         message = j['msg']
 78 |         cookies_str = json.dumps(self._session.cookies.get_dict()) \
 79 |             if code == 0 else ''
 80 |         return code, message, cookies_str
 81 | 
 82 |     def login_with_cookies(self, cookies):
 83 |         """使用cookies文件或字符串登录知乎
 84 | 
 85 |         :param str cookies:
 86 |             ============== ===========================
 87 |             参数形式       作用
 88 |             ============== ===========================
 89 |             文件名         将文件内容作为cookies字符串
 90 |             cookies 字符串  直接提供cookies字符串
 91 |             ============== ===========================
 92 |         :return: 无
 93 |         :rtype: None
 94 |         """
 95 |         if os.path.isfile(cookies):
 96 |             with open(cookies) as f:
 97 |                 cookies = f.read()
 98 |         cookies_dict = json.loads(cookies)
 99 |         self._session.cookies.update(cookies_dict)
100 | 
101 |     def login_in_terminal(self, need_captcha=False, use_getpass=True):
102 |         """不使用cookies，在终端中根据提示登陆知乎
103 | 
104 |         :param bool need_captcha: 是否要求输入验证码，如果登录失败请设为 True
105 |         :param bool use_getpass: 是否使用安全模式输入密码，默认为 True，
106 |             如果在某些 Windows IDE 中无法正常输入密码，请把此参数设置为 False 试试
107 |         :return: 如果成功返回cookies字符串
108 |         :rtype: str
109 |         """
110 |         print('====== zhihu login =====')
111 | 
112 |         email = input('email: ')
113 |         if use_getpass:
114 |             password = getpass.getpass('password: ')
115 |         else:
116 |             password = input("password: ")
117 | 
118 |         if need_captcha:
119 |             captcha_data = self.get_captcha()
120 |             with open('captcha.gif', 'wb') as f:
121 |                 f.write(captcha_data)
122 | 
123 |             print('please check captcha.gif for captcha')
124 |             captcha = input('captcha: ')
125 |             os.remove('captcha.gif')
126 |         else:
127 |             captcha = None
128 | 
129 |         print('====== logging.... =====')
130 | 
131 |         code, msg, cookies = self.login(email, password, captcha)
132 | 
133 |         if code == 0:
134 |             print('login successfully')
135 |         else:
136 |             print('login failed, reason: {0}'.format(msg))
137 | 
138 |         return cookies
139 | 
140 |     def create_cookies(self, file, need_captcha=False, use_getpass=True):
141 |         """在终端中执行登录流程，将 cookies 存放在文件中以便后续使用
142 | 
143 |         :param str file: 文件名
144 |         :param bool need_captcha: 登录过程中是否使用验证码， 默认为 False
145 |         :param bool use_getpass: 是否使用安全模式输入密码，默认为 True，
146 |             如果在某些 Windows IDE 中无法正常输入密码，请把此参数设置为 False 试试
147 |         :return:
148 |         """
149 |         cookies_str = self.login_in_terminal(need_captcha, use_getpass)
150 |         if cookies_str:
151 |             with open(file, 'w') as f:
152 |                 f.write(cookies_str)
153 |             print('cookies file created.')
154 |         else:
155 |             print('can\'t create cookies.')
156 | 
157 |     # ===== network staff =====
158 | 
159 |     def set_proxy(self, proxy):
160 |         """设置代理
161 | 
162 |         :param str proxy: 使用 "http://example.com:port" 的形式
163 |         :return: 无
164 |         :rtype: None
165 | 
166 |         :说明:
167 |             由于一个 :class:`.ZhihuClient` 对象和它创建出来的其他知乎对象共用
168 |             一个Session，所以调用这个方法也会将所有生成出的知乎类设置上代理。
169 |         """
170 |         self._session.proxies.update({'http': proxy})
171 | 
172 |     def set_proxy_pool(self, proxies, auth=None, https=True):
173 |         """设置代理池
174 | 
175 |         :param proxies: proxy列表, 形如 ``["ip1:port1", "ip2:port2"]``
176 |         :param auth: 如果代理需要验证身份, 通过这个参数提供, 比如
177 |         :param https: 默认为 True, 传入 False 则不设置 https 代理
178 |         .. code-block:: python
179 | 
180 |               from requests.auth import HTTPProxyAuth
181 |               auth = HTTPProxyAuth('laike9m', '123')
182 |         :说明:
183 |              每次 GET/POST 请求会随机选择列表中的代理
184 |         """
185 |         from random import choice
186 | 
187 |         if https:
188 |             self.proxies = [{'http': p, 'https': p} for p in proxies]
189 |         else:
190 |             self.proxies = [{'http': p} for p in proxies]
191 | 
192 |         def get_with_random_proxy(url, **kwargs):
193 |             proxy = choice(self.proxies)
194 |             kwargs['proxies'] = proxy
195 |             if auth:
196 |                 kwargs['auth'] = auth
197 |             return self._session.original_get(url, **kwargs)
198 | 
199 |         def post_with_random_proxy(url, *args, **kwargs):
200 |             proxy = choice(self.proxies)
201 |             kwargs['proxies'] = proxy
202 |             if auth:
203 |                 kwargs['auth'] = auth
204 |             return self._session.original_post(url, *args, **kwargs)
205 | 
206 |         self._session.original_get = self._session.get
207 |         self._session.get = get_with_random_proxy
208 |         self._session.original_post = self._session.post
209 |         self._session.post = post_with_random_proxy
210 | 
211 |     def remove_proxy_pool(self):
212 |         """
213 |         移除代理池
214 |         """
215 |         self.proxies = None
216 |         self._session.get = self._session.original_get
217 |         self._session.post = self._session.original_post
218 |         del self._session.original_get
219 |         del self._session.original_post
220 | 
221 |     # ===== getter staff ======
222 | 
223 |     def me(self):
224 |         """获取使用特定 cookies 的 Me 实例
225 | 
226 |         :return: cookies对应的Me对象
227 |         :rtype: Me
228 |         """
229 |         from .me import Me
230 |         headers = dict(Default_Header)
231 |         headers['Host'] = 'zhuanlan.zhihu.com'
232 |         res = self._session.get(Get_Me_Info_Url, headers=headers)
233 |         json_data = res.json()
234 |         url = json_data['profileUrl']
235 |         name = json_data['name']
236 |         motto = json_data['bio']
237 |         photo = json_data['avatar']['template'].format(
238 |             id=json_data['avatar']['id'], size='r')
239 |         return Me(url, name, motto, photo, session=self._session)
240 | 
241 |     def __getattr__(self, item: str):
242 |         """本函数用于获取各种类，如 `Answer` `Question` 等.
243 | 
244 |         :支持的形式有:
245 |             1. client.answer()
246 |             2. client.author()
247 |             3. client.collection()
248 |             4. client.column()
249 |             5. client.post()
250 |             6. client.question()
251 |             7. client.topic()
252 | 
253 |             参数均为对应页面的url，返回对应的类的实例。
254 |         """
255 |         def getter(url):
256 |             return getattr(module, item.capitalize())(url,
257 |                                                       session=self._session)
258 |         attr_list = ['answer', 'author', 'collection',
259 |                      'column', 'post', 'question', 'topic']
260 |         if item.lower() in attr_list:
261 |             module = importlib.import_module('.'+item.lower(), 'zhihu')
262 |             return getter
263 | 


--------------------------------------------------------------------------------
/zhihu/collection.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from .common import *
  5 | from .base import BaseZhihu
  6 | 
  7 | 
  8 | class Collection(BaseZhihu):
  9 | 
 10 |     """收藏夹，请使用``ZhihuClient.collection``方法构造对象."""
 11 | 
 12 |     @class_common_init(re_collection_url)
 13 |     def __init__(self, url, owner=None, name=None, follower_num=None,
 14 |                  session=None):
 15 |         """创建收藏夹类实例.
 16 | 
 17 |         :param str url: 收藏夹主页url，必须
 18 |         :param Author owner: 收藏夹拥有者，可选
 19 |         :param str name: 收藏夹标题，可选
 20 |         :param int follower_num: 收藏夹关注人数，可选
 21 |         :param Session session: 使用的网络会话，为空则使用新会话。
 22 |         :return: 收藏夹对象
 23 |         :rtype: Collection
 24 |         """
 25 |         self.url = url
 26 |         self._session = session
 27 |         self.soup = None
 28 |         self._name = name
 29 |         self._owner = owner
 30 |         self._follower_num = follower_num
 31 |         self._id = int(re.match(r'.*/(\d+)', self.url).group(1))
 32 | 
 33 |     @property
 34 |     def id(self):
 35 |         """获取收藏夹id（网址最后的部分）.
 36 | 
 37 |         :return: 收藏夹id
 38 |         :rtype: int
 39 |         """
 40 |         return self._id
 41 | 
 42 |     @property
 43 |     @check_soup('_cid')
 44 |     def cid(self):
 45 |         """获取收藏夹内部Id（用不到忽视就好）
 46 | 
 47 |         :return: 内部Id
 48 |         :rtype: int
 49 |         """
 50 |         return int(re_get_number.match(
 51 |             self.soup.find('a', attrs={'name': 'focus'})['id']).group(1))
 52 | 
 53 |     @property
 54 |     @check_soup('_xsrf')
 55 |     def xsrf(self):
 56 |         """获取知乎的反xsrf参数（用不到就忽视吧~）
 57 | 
 58 |         :return: xsrf参数
 59 |         :rtype: str
 60 |         """
 61 |         return self.soup.find(
 62 |             'input', attrs={'name': '_xsrf'})['value']
 63 | 
 64 |     @property
 65 |     @check_soup('_name')
 66 |     def name(self):
 67 |         """获取收藏夹名字.
 68 | 
 69 |         :return: 收藏夹名字
 70 |         :rtype: str
 71 |         """
 72 |         return re_del_empty_line.match(
 73 |             self.soup.find('h2', id='zh-fav-head-title').text).group(1)
 74 | 
 75 |     @property
 76 |     @check_soup('_owner')
 77 |     def owner(self):
 78 |         """获取收藏夹拥有者，返回Author对象.
 79 | 
 80 |         :return: 收藏夹拥有者
 81 |         :rtype: Author
 82 |         """
 83 |         from .author import Author
 84 | 
 85 |         a = self.soup.find('h2', class_='zm-list-content-title').a
 86 |         name = a.text
 87 |         url = Zhihu_URL + a['href']
 88 |         motto = self.soup.find(
 89 |             'div', id='zh-single-answer-author-info').div.text
 90 |         photo_url = PROTOCOL + self.soup.find(
 91 |             'img', class_='zm-list-avatar-medium')['src'].replace('_m', '_r')
 92 |         return Author(url, name, motto, photo_url=photo_url,
 93 |                       session=self._session)
 94 | 
 95 |     @property
 96 |     @check_soup('_follower_num')
 97 |     def follower_num(self):
 98 |         """获取关注此收藏夹的人数.
 99 | 
100 |         :return: 关注此收藏夹的人数
101 |         :rtype: int
102 |         """
103 |         href = re_collection_url_split.match(self.url).group(1)
104 |         return int(self.soup.find('a', href=href + 'followers').text)
105 | 
106 |     @property
107 |     def followers(self):
108 |         """获取关注此收藏夹的用户
109 | 
110 |         :return: 关注此收藏夹的用户
111 |         :rtype: Author.Iterable
112 |         """
113 |         self._make_soup()
114 |         followers_url = self.url + 'followers'
115 |         for x in common_follower(followers_url, self.xsrf, self._session):
116 |             yield x
117 | 
118 |     @property
119 |     def questions(self):
120 |         """获取收藏夹内所有问题对象.
121 | 
122 |         :return: 收藏夹内所有问题，返回生成器
123 |         :rtype: Question.Iterable
124 |         """
125 |         self._make_soup()
126 |         # noinspection PyTypeChecker
127 |         for question in self._page_get_questions(self.soup):
128 |             yield question
129 |         i = 2
130 |         while True:
131 |             soup = BeautifulSoup(self._session.get(
132 |                 self.url[:-1] + '?page=' + str(i)).text)
133 |             for question in self._page_get_questions(soup):
134 |                 if question == 0:
135 |                     return
136 |                 yield question
137 |             i += 1
138 | 
139 |     @property
140 |     def answers(self):
141 |         """获取收藏夹内所有答案对象.
142 | 
143 |         :return: 收藏夹内所有答案，返回生成器
144 |         :rtype: Answer.Iterable
145 |         """
146 |         self._make_soup()
147 |         # noinspection PyTypeChecker
148 |         for answer in self._page_get_answers(self.soup):
149 |             yield answer
150 |         i = 2
151 |         while True:
152 |             soup = BeautifulSoup(self._session.get(
153 |                 self.url[:-1] + '?page=' + str(i)).text)
154 |             for answer in self._page_get_answers(soup):
155 |                 if answer == 0:
156 |                     return
157 |                 yield answer
158 |             i += 1
159 | 
160 |     @property
161 |     def logs(self):
162 |         """获取收藏夹日志
163 | 
164 |         :return: 收藏夹日志中的操作，返回生成器
165 |         :rtype: CollectActivity.Iterable
166 |         """
167 |         import time
168 |         from datetime import datetime
169 |         from .answer import Answer
170 |         from .question import Question
171 |         from .acttype import CollectActType
172 | 
173 |         self._make_soup()
174 |         gotten_feed_num = 20
175 |         offset = 0
176 |         data = {
177 |             'start': 0,
178 |             '_xsrf': self.xsrf
179 |         }
180 |         api_url = self.url + 'log'
181 |         while gotten_feed_num == 20:
182 |             data['offset'] = offset
183 |             res = self._session.post(url=api_url, data=data)
184 |             gotten_feed_num = res.json()['msg'][0]
185 |             soup = BeautifulSoup(res.json()['msg'][1])
186 |             offset += gotten_feed_num
187 |             zm_items = soup.find_all('div', class_='zm-item')
188 | 
189 |             for zm_item in zm_items:
190 |                 act_time = datetime.strptime(zm_item.find('time').text, "%Y-%m-%d %H:%M:%S")
191 |                 if zm_item.find('ins'):
192 |                     link = zm_item.find('ins').a
193 |                     act_type = CollectActType.INSERT_ANSWER
194 |                 elif zm_item.find('del'):
195 |                     link = zm_item.find('del').a
196 |                     act_type = CollectActType.DELETE_ANSWER
197 |                 else:
198 |                     continue
199 |                 try:
200 |                     answer_url = Zhihu_URL + link['href']
201 |                     question_url = re_a2q.match(answer_url).group(1)
202 |                     question = Question(question_url, link.text)
203 |                     answer = Answer(
204 |                         answer_url, question, session=self._session)
205 |                     yield CollectActivity(
206 |                         act_type, act_time, self.owner, self, answer)
207 |                 except AttributeError:
208 |                     act_type = CollectActType.CREATE_COLLECTION
209 |                     yield CollectActivity(
210 |                         act_type, act_time, self.owner, self)
211 |             data['start'] = zm_items[-1]['id'][8:]
212 |             time.sleep(0.5)
213 | 
214 |     def _page_get_questions(self, soup):
215 |         from .question import Question
216 | 
217 |         question_tags = soup.find_all("div", class_="zm-item")
218 |         if len(question_tags) == 0:
219 |             yield 0
220 |             return
221 |         else:
222 |             for question_tag in question_tags:
223 |                 if question_tag.h2 is not None:
224 |                     question_title = question_tag.h2.a.text
225 |                     question_url = Zhihu_URL + question_tag.h2.a['href']
226 |                     yield Question(question_url, question_title,
227 |                                    session=self._session)
228 | 
229 |     def _page_get_answers(self, soup):
230 |         from .question import Question
231 |         from .author import Author, ANONYMOUS
232 |         from .answer import Answer
233 | 
234 |         answer_tags = soup.find_all("div", class_="zm-item")
235 |         if len(answer_tags) == 0:
236 |             yield 0
237 |             return
238 |         else:
239 |             question = None
240 |             for tag in answer_tags:
241 |                 # 判断是否是'建议修改的回答'等情况
242 |                 url_tag = tag.find('a', class_='answer-date-link')
243 |                 if url_tag is None:
244 |                     reason = tag.find('div', id='answer-status').p.text
245 |                     print("pass a answer, reason %s ." % reason)
246 |                     continue
247 |                 if tag.h2 is not None:
248 |                     question_title = tag.h2.a.text
249 |                     question_url = Zhihu_URL + tag.h2.a['href']
250 |                     question = Question(question_url, question_title,
251 |                                         session=self._session)
252 |                 answer_url = Zhihu_URL + url_tag['href']
253 |                 div = tag.find('div', class_='zm-item-answer-author-info')
254 |                 author_link = div.find('a', class_='author-link')
255 |                 if author_link is not None:
256 |                     author_url = Zhihu_URL + author_link['href']
257 |                     author_name = author_link.text
258 |                     motto_span = div.find('span', class_='bio')
259 |                     author_motto = motto_span['title'] if motto_span else ''
260 |                     author = Author(author_url, author_name, author_motto,
261 |                                     session=self._session)
262 |                 else:
263 |                     author = ANONYMOUS
264 |                 upvote_num = tag.find('a', class_='zm-item-vote-count').text
265 |                 if upvote_num.isdigit():
266 |                     upvote_num = int(upvote_num)
267 |                 else:
268 |                     upvote_num = None
269 |                 answer = Answer(answer_url, question, author,
270 |                                 upvote_num, session=self._session)
271 |                 yield answer
272 | 
273 | 
274 | class CollectActivity:
275 |     """收藏夹操作, 请使用``Collection.logs``构造对象."""
276 | 
277 |     def __init__(self, type, time, owner, collection, answer=None):
278 |         """创建收藏夹操作类实例
279 | 
280 |         :param acttype.CollectActType type: 操作类型
281 |         :param datetime.datetime time: 进行操作的时间
282 |         :param Author owner: 收藏夹的拥有者
283 |         :param Collection collection: 所属收藏夹
284 |         :param Answer answer: 收藏的答案，可选
285 |         :return: CollectActivity
286 |         """
287 |         self._type = type
288 |         self._time = time
289 |         self._owner = owner
290 |         self._collection = collection
291 |         self._answer = answer
292 | 
293 |     @property
294 |     def type(self):
295 |         """
296 |         :return: 收藏夹操作类型, 具体参见 :class:`.CollectActType`
297 |         :rtype: :class:`.CollectActType`
298 |         """
299 |         return self._type
300 | 
301 |     @property
302 |     def answer(self):
303 |         """
304 |         :return: 添加或删除收藏的答案, 若是创建收藏夹操作返回 None
305 |         :rtype: Answer or None
306 |         """
307 |         return self._answer
308 | 
309 |     @property
310 |     def time(self):
311 |         """
312 |         :return: 进行操作的时间
313 |         :rtype: datetime.datetime
314 |         """
315 |         return self._time
316 | 
317 |     @property
318 |     def owner(self):
319 |         """
320 |         :return: 收藏夹的拥有者
321 |         :rtype: Author
322 |         """
323 |         return self._owner
324 | 
325 |     @property
326 |     def collection(self):
327 |         """
328 |         :return: 所属收藏夹
329 |         :rtype: Collection
330 |         """
331 |         return self._collection
332 | 


--------------------------------------------------------------------------------
/zhihu/column.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from .common import *
  5 | from .base import BaseZhihu, JsonAsSoupMixin
  6 | 
  7 | 
  8 | class Column(JsonAsSoupMixin, BaseZhihu):
  9 | 
 10 |     """专栏类，请使用``ZhihuClient.column``方法构造对象."""
 11 | 
 12 |     @class_common_init(re_column_url)
 13 |     def __init__(self, url, name=None, follower_num=None,
 14 |                  post_num=None, session=None):
 15 |         """创建专栏类实例.
 16 | 
 17 |         :param str url: 专栏url
 18 |         :param str name: 专栏名，可选
 19 |         :param int follower_num: 关注者数量，可选
 20 |         :param int post_num: 文章数量，可选
 21 |         :param Session session: 使用的网络会话，为空则使用新会话。
 22 |         :return: 专栏对象
 23 |         :rtype: Column
 24 |         """
 25 |         self._in_name = re_column_url.match(url).group(1)
 26 |         self.url = url
 27 |         self._session = session
 28 |         self._name = name
 29 |         self._follower_num = follower_num
 30 |         self._post_num = post_num
 31 | 
 32 |     def _make_soup(self):
 33 |         if self.soup is None:
 34 |             json = self._get_content()
 35 |             self._gen_soup(json)
 36 | 
 37 |     def _get_content(self):
 38 |         origin_host = self._session.headers.get('Host')
 39 |         self._session.headers.update(Host='zhuanlan.zhihu.com')
 40 |         res = self._session.get(Column_Data.format(self._in_name))
 41 |         self._session.headers.update(Host=origin_host)
 42 |         return res.json()
 43 | 
 44 |     @property
 45 |     @check_soup('_name')
 46 |     def name(self):
 47 |         """获取专栏名称.
 48 | 
 49 |         :return: 专栏名称
 50 |         :rtype: str
 51 |         """
 52 |         return self.soup['name']
 53 | 
 54 |     @property
 55 |     @check_soup('_follower_num')
 56 |     def follower_num(self):
 57 |         """获取关注人数.
 58 | 
 59 |         :return: 关注人数
 60 |         :rtype: int
 61 |         """
 62 |         return int(self.soup['followersCount'])
 63 | 
 64 |     @property
 65 |     @check_soup('_post_num')
 66 |     def post_num(self):
 67 |         """获取专栏文章数.
 68 | 
 69 |         :return: 专栏文章数
 70 |         :rtype: int
 71 |         """
 72 |         return int(self.soup['postsCount'])
 73 | 
 74 |     @property
 75 |     def posts(self):
 76 |         """获取专栏的所有文章.
 77 | 
 78 |         :return: 专栏所有文章，返回生成器
 79 |         :rtype: Post.Iterable
 80 |         """
 81 |         origin_host = self._session.headers.get('Host')
 82 |         for offset in range(0, (self.post_num - 1) // 10 + 1):
 83 |             self._session.headers.update(Host='zhuanlan.zhihu.com')
 84 |             res = self._session.get(
 85 |                 Column_Posts_Data.format(self._in_name, offset * 10))
 86 |             soup = res.json()
 87 |             self._session.headers.update(Host=origin_host)
 88 |             for post in soup:
 89 |                 yield self._parse_post_data(post)
 90 | 
 91 |     def _parse_post_data(self, post):
 92 |         from .author import Author
 93 |         from .post import Post
 94 | 
 95 |         url = Column_Url + post['url']
 96 |         template = post['author']['avatar']['template']
 97 |         photo_id = post['author']['avatar']['id']
 98 |         photo_url = template.format(id=photo_id, size='r')
 99 |         author = Author(post['author']['profileUrl'],
100 |                         post['author']['name'], post['author']['bio'],
101 |                         photo_url=photo_url, session=self._session)
102 |         title = post['title']
103 |         upvote_num = post['likesCount']
104 |         comment_num = post['commentsCount']
105 |         print(url)
106 |         return Post(url, self, author, title, upvote_num, comment_num,
107 |                     session=self._session)
108 | 


--------------------------------------------------------------------------------
/zhihu/comment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | class Comment:
 6 | 
 7 |     """评论类，一般不直接使用，而是作为``Answer.comments``迭代器的返回类型."""
 8 | 
 9 |     def __init__(self, cid, answer, author,
10 |                  upvote_num, content, time, group_id=None):
11 |         """创建评论类实例.
12 | 
13 |         :param int cid: 评论ID
14 |         :param int group_id: 评论所在的组ID
15 |         :param Answer answer: 评论所在的答案对象
16 |         :param Author author: 评论的作者对象
17 |         :param int upvote_num: 评论赞同数量
18 |         :param str content: 评论内容
19 |         :param datetime.datetime creation_time: 评论发表时间
20 |         :return: 评论对象
21 |         :rtype: Comment
22 |         """
23 | 
24 |         self.cid = cid
25 |         self.answer = answer
26 |         self.author = author
27 |         self.upvote_num = upvote_num
28 |         self.content = content
29 |         self.creation_time = time
30 |         self._group_id = group_id
31 | 


--------------------------------------------------------------------------------
/zhihu/common.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import functools
  5 | import re
  6 | import os
  7 | 
  8 | from requests import Session
  9 | from bs4 import BeautifulSoup as _Bs
 10 | from bs4 import Tag, NavigableString
 11 | from requests.packages.urllib3.util import Retry
 12 | 
 13 | try:
 14 |     __import__('lxml')
 15 |     BeautifulSoup = lambda makeup: _Bs(makeup, 'lxml')
 16 | except ImportError:
 17 |     BeautifulSoup = lambda makeup: _Bs(makeup, 'html.parser')
 18 | 
 19 | Default_Header = {'X-Requested-With': 'XMLHttpRequest',
 20 |                   'Referer': 'http://www.zhihu.com',
 21 |                   'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; '
 22 |                                 'rv:39.0) Gecko/20100101 Firefox/39.0',
 23 |                   'Host': 'www.zhihu.com'}
 24 | 
 25 | Zhihu_URL = 'https://www.zhihu.com'
 26 | Login_URL = Zhihu_URL + '/login/email'
 27 | Captcha_URL = Zhihu_URL + '/captcha.gif'
 28 | Get_Profile_Card_URL = Zhihu_URL + '/node/MemberProfileCardV2'
 29 | Question_Get_More_Answer_URL = Zhihu_URL + '/node/QuestionAnswerListV2'
 30 | Answer_Add_Comment_URL = Zhihu_URL + '/node/AnswerCommentAddV2'
 31 | Answer_Comment_Box_URL = Zhihu_URL + '/node/AnswerCommentBoxV2'
 32 | Get_Answer_Comment_URL = Zhihu_URL + '/r/answers/{0}/comments'
 33 | Author_Get_More_Followers_URL = Zhihu_URL + '/node/ProfileFollowersListV2'
 34 | Author_Get_More_Followees_URL = Zhihu_URL + '/node/ProfileFolloweesListV2'
 35 | Author_Get_More_Follow_Column_URL = Zhihu_URL + \
 36 |                                     '/node/ProfileFollowedColumnsListV2'
 37 | Author_Get_More_Follow_Topic_URL = Zhihu_URL + \
 38 |                                    '/people/{0}/topics'
 39 | 
 40 | PROTOCOL = ''
 41 | 
 42 | Column_Url = 'http://zhuanlan.zhihu.com'
 43 | Column_API = Column_Url + '/api/columns'
 44 | Column_Data = Column_API + '/{0}'
 45 | Column_Posts_Data = Column_API + '/{0}/posts?limit=10&offset={1}'
 46 | Column_Post_Data = Column_Url + '/api/posts/{0}'
 47 | Post_Get_Upvoter = Column_Post_Data + '/likers'
 48 | 
 49 | Topic_Url = Zhihu_URL + '/topic'
 50 | Topic_Get_Children_Url = Topic_Url + '/{0}/organize/entire'
 51 | Topic_Get_More_Follower_Url = Topic_Url + '/{0}/followers'
 52 | Topic_Questions_Url = Topic_Url + '/{0}/questions'
 53 | Topic_Unanswered_Question_Url = Topic_Url + '/{0}/unanswered'
 54 | Topic_Top_Answers_Url = Topic_Url + '/{0}/top-answers'
 55 | Topic_Hot_Questions_Url = Topic_Url + '/{0}/hot'
 56 | Topic_Newest_Url = Topic_Url + '/{0}/newest'
 57 | 
 58 | Get_Me_Info_Url = Column_Url + '/api/me'
 59 | Upvote_Answer_Url = Zhihu_URL + '/node/AnswerVoteBarV2'
 60 | Upvote_Article_Url = Column_API + '/{0}/posts/{1}/rating'
 61 | Follow_Author_Url = Zhihu_URL + '/node/MemberFollowBaseV2'
 62 | Follow_Question_Url = Zhihu_URL + '/node/QuestionFollowBaseV2'
 63 | Follow_Topic_Url = Zhihu_URL + '/node/TopicFollowBaseV2'
 64 | Follow_Collection_Url = Zhihu_URL + '/collection/follow'
 65 | Unfollow_Collection_Url = Zhihu_URL + '/collection/unfollow'
 66 | Thanks_Url = Zhihu_URL + '/answer/thanks'
 67 | Cancel_Thanks_Url = Zhihu_URL + '/answer/cancel_thanks'
 68 | Send_Message_Url = Zhihu_URL + '/inbox/post'
 69 | Unhelpful_Url = Zhihu_URL + '/answer/not_helpful'
 70 | Cancel_Unhelpful_Url = Zhihu_URL + '/answer/helpful'
 71 | Get_Collection_Url = Zhihu_URL + '/node/AnswerFavlists'
 72 | 
 73 | re_question_url = re.compile(
 74 |     r'^https?://www\.zhihu\.com/question/\d+(\?sort=created|/?)$')
 75 | re_question_url_std = re.compile(r'^https?://www\.zhihu\.com/question/\d+/?')
 76 | re_ans_url = re.compile(
 77 |     r'^https?://www\.zhihu\.com/question/\d+/answer/\d+/?$')
 78 | re_author_url = re.compile(r'^https?://www\.zhihu\.com/(?:people|org)/[^/]+/?$')
 79 | re_collection_url = re.compile(r'^https?://www\.zhihu\.com/collection/\d+/?$')
 80 | re_column_url = re.compile(r'^http://zhuanlan\.zhihu\.com/([^/]+)/?$')
 81 | re_post_url = re.compile(r'^http://zhuanlan\.zhihu\.com/p/(\d+)/?$')
 82 | re_topic_url = re.compile(r'^https?://www\.zhihu\.com/topic/(\d+)/?$')
 83 | re_a2q = re.compile(r'(.*)/answer/.*')
 84 | re_collection_url_split = re.compile(r'.*(/c.*)')
 85 | re_get_number = re.compile(r'[^\d]*(\d+).*')
 86 | re_del_empty_line = re.compile(r'\n*(.*)\n*')
 87 | 
 88 | 
 89 | def check_soup(attr, soup_type='_make_soup'):
 90 |     def real(func):
 91 |         @functools.wraps(func)
 92 |         def wrapper(self):
 93 |             # noinspection PyTypeChecker
 94 |             value = getattr(self, attr, None)
 95 |             if value is None:
 96 |                 if soup_type == '_make_soup':
 97 |                     getattr(self, soup_type)()
 98 |                 elif self.soup is None:
 99 |                     getattr(self, soup_type)()
100 |                 value = func(self)
101 |                 setattr(self, attr, value)
102 |             return value
103 | 
104 |         return wrapper
105 | 
106 |     return real
107 | 
108 | 
109 | def class_common_init(url_re, allowed_none=True, trailing_slash=True):
110 |     def real(func):
111 |         @functools.wraps(func)
112 |         def wrapper(self, url, *args, **kwargs):
113 |             if url is None and not allowed_none:
114 |                 raise ValueError('Invalid Url: ' + url)
115 |             if url is not None:
116 |                 if url_re.match(url) is None:
117 |                     raise ValueError('Invalid URL: ' + url)
118 |                 if not url.endswith('/') and trailing_slash:
119 |                     url += '/'
120 |             if 'session' not in kwargs.keys() or kwargs['session'] is None:
121 |                 kwargs['session'] = Session()
122 |                 kwargs['session'].mount('https://', Retry(5))
123 |                 kwargs['session'].mount('http://', Retry(5))
124 |             self.soup = None
125 |             return func(self, url, *args, **kwargs)
126 | 
127 |         return wrapper
128 | 
129 |     return real
130 | 
131 | 
132 | def remove_invalid_char(text):
133 |     """去除字符串中的无效字符，一般用于保存文件时保证文件名的有效性.
134 | 
135 |     :param str text: 待处理的字符串
136 |     :return: 处理后的字符串
137 |     :rtype: str
138 |     """
139 |     invalid_char_list = ['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\n']
140 |     res = ''
141 |     for char in text:
142 |         if char not in invalid_char_list:
143 |             res += char
144 |     return res
145 | 
146 | 
147 | def parser_author_from_tag(author):
148 |     author_link = author.find('a', class_='author-link')
149 |     if author_link is None:
150 |         return None, '匿名用户', '', ''
151 |     else:
152 |         author_name = author_link.text
153 |         motto_span = author.find('span', class_='bio')
154 |         author_motto = motto_span['title'] \
155 |             if motto_span is not None else ''
156 |         author_url = Zhihu_URL + author_link['href']
157 |         avatar_link = author.find('a', class_='avatar-link')
158 |         photo_url = PROTOCOL + avatar_link.img['src'].replace('_s', '_r')
159 |         return author_url, author_name, author_motto, photo_url
160 | 
161 | 
162 | def parser_author_from_comment(author):
163 |     author_avatar = author.find('a', class_='zm-item-link-avatar')
164 |     if author_avatar is None:
165 |         return None, '匿名用户', ''
166 |     else:
167 |         author_link = author.find('a', class_='zg-link')
168 |         author_name = author_link.text
169 |         author_url = author_link['href']
170 |         avatar_link = author.find('img', class_='zm-item-img-avatar')
171 |         photo_url = PROTOCOL + avatar_link['src'].replace('_s', '_r')
172 |         return author_url, author_name,  photo_url
173 | 
174 | 
175 | def answer_content_process(content):
176 |     content = clone_bs4_elem(content)
177 |     del content['class']
178 |     soup = BeautifulSoup(
179 |         '<html><head><meta charset="utf-8"></head><body></body></html>')
180 |     soup.body.append(content)
181 |     no_script_list = soup.find_all("noscript")
182 |     for no_script in no_script_list:
183 |         no_script.extract()
184 |     img_list = soup.find_all(
185 |         "img", class_=["origin_image", "content_image"])
186 |     for img in img_list:
187 |         if "content_image" in img['class']:
188 |             img['data-original'] = img['data-actualsrc']
189 |         new_img = soup.new_tag('img', src=PROTOCOL + img['data-original'])
190 |         img.replace_with(new_img)
191 |         if img.next_sibling is None:
192 |             new_img.insert_after(soup.new_tag('br'))
193 |     useless_list = soup.find_all("i", class_="icon-external")
194 |     for useless in useless_list:
195 |         useless.extract()
196 |     return soup.prettify()
197 | 
198 | 
199 | def get_path(path, filename, mode, default_path, default_name):
200 |     if path is None:
201 |         path = os.path.join(
202 |             os.getcwd(), remove_invalid_char(default_path))
203 |     if filename is None:
204 |         filename = remove_invalid_char(default_name)
205 |     if os.path.isdir(path) is False:
206 |         os.makedirs(path)
207 |     temp = filename
208 |     i = 0
209 |     while os.path.isfile(os.path.join(path, temp) + '.' + mode):
210 |         i += 1
211 |         temp = filename + str(i)
212 |     return os.path.join(path, temp) + '.' + mode
213 | 
214 | 
215 | def common_follower(url, xsrf, session):
216 |     from .author import Author, ANONYMOUS
217 |     headers = dict(Default_Header)
218 |     headers['Referer'] = url
219 |     data = {'offset': 0, '_xsrf': xsrf}
220 |     gotten_data_num = 20
221 |     offset = 0
222 |     while gotten_data_num == 20:
223 |         data['offset'] = offset
224 |         res = session.post(url, data=data, headers=headers)
225 |         json_data = res.json()['msg']
226 |         gotten_data_num = json_data[0]
227 |         offset += gotten_data_num
228 |         soup = BeautifulSoup(json_data[1])
229 |         follower_divs = soup.find_all('div', class_='zm-profile-card')
230 |         for div in follower_divs:
231 |             if div.a is not None:
232 |                 author_name = div.a['title']
233 |                 author_url = Zhihu_URL + div.a['href']
234 |                 author_motto = div.find('span', class_='bio').text
235 |                 author_photo = PROTOCOL + div.img['src'].replace('_m', '_r')
236 |                 numbers = [re_get_number.match(a.text).group(1)
237 |                            for a in div.find_all('a', target='_blank')]
238 |                 try:
239 |                     yield Author(author_url, author_name, author_motto,
240 |                                  *numbers, photo_url=author_photo,
241 |                                  session=session)
242 |                 except ValueError:  # invalid url
243 |                     yield ANONYMOUS
244 |             else:
245 |                 yield ANONYMOUS
246 | 
247 | 
248 | def clone_bs4_elem(el):
249 |     """Clone a bs4 tag before modifying it.
250 | 
251 |     Code from `http://stackoverflow.com/questions/23057631/clone-element-with
252 |     -beautifulsoup`
253 |     """
254 |     if isinstance(el, NavigableString):
255 |         return type(el)(el)
256 | 
257 |     copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
258 |     # work around bug where there is no builder set
259 |     # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
260 |     copy.attrs = dict(el.attrs)
261 |     for attr in ('can_be_empty_element', 'hidden'):
262 |         setattr(copy, attr, getattr(el, attr))
263 |     for child in el.contents:
264 |         copy.append(clone_bs4_elem(child))
265 |     return copy
266 | 


--------------------------------------------------------------------------------
/zhihu/me.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | 
  6 | from .common import *
  7 | from .author import Author
  8 | 
  9 | 
 10 | class Me(Author):
 11 |     """封装了相关操作（如点赞，关注问题）的类。
 12 |     请使用 :meth:`.ZhihuClient.me` 方法获取实例。
 13 |     """
 14 | 
 15 |     def __init__(self, url, name, motto, photo_url, session):
 16 |         super(Me, self).__init__(url, name, motto,
 17 |                                  photo_url=photo_url, session=session)
 18 | 
 19 |     def vote(self, something, vote='up'):
 20 |         """给答案或文章点赞或取消点赞
 21 | 
 22 |         :param Answer/Post something: 需要点赞的答案或文章对象
 23 |         :param str vote:
 24 |             ===== ================ ======
 25 |             取值        说明       默认值
 26 |             ===== ================ ======
 27 |             up    赞同              √
 28 |             down  反对              X
 29 |             clear 既不赞同也不反对  X
 30 |             ===== ================ ======
 31 | 
 32 |         :return: 成功返回True，失败返回False
 33 |         :rtype: bool
 34 |         """
 35 |         from .answer import Answer
 36 |         from zhihu import Post
 37 |         if isinstance(something, Answer):
 38 |             mapping = {
 39 |                 'up': 'vote_up',
 40 |                 'clear': 'vote_neutral',
 41 |                 'down': 'vote_down'
 42 |             }
 43 |             if vote not in mapping.keys():
 44 |                 raise ValueError('Invalid vote value: {0}'.format(vote))
 45 |             if something.author.url == self.url:
 46 |                 return False
 47 |             params = {'answer_id': str(something.aid)}
 48 |             data = {
 49 |                 '_xsrf': something.xsrf,
 50 |                 'method': mapping[vote],
 51 |                 'params': json.dumps(params)
 52 |             }
 53 |             headers = dict(Default_Header)
 54 |             headers['Referer'] = something.question.url[:-1]
 55 |             res = self._session.post(Upvote_Answer_Url,
 56 |                                      headers=headers, data=data)
 57 |             return res.json()['r'] == 0
 58 |         elif isinstance(something, Post):
 59 |             mapping = {
 60 |                 'up': 'like',
 61 |                 'clear': 'none',
 62 |                 'down': 'dislike'
 63 |             }
 64 |             if vote not in mapping.keys():
 65 |                 raise ValueError('Invalid vote value: {0}'.format(vote))
 66 |             if something.author.url == self.url:
 67 |                 return False
 68 |             put_url = Upvote_Article_Url.format(
 69 |                 something.column_in_name, something.slug)
 70 |             data = {'value': mapping[vote]}
 71 |             headers = {
 72 |                 'Content-Type': 'application/json;charset=utf-8',
 73 |                 'Host': 'zhuanlan.zhihu.com',
 74 |                 'Referer': something.url[:-1],
 75 |                 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; '
 76 |                               'rv:39.0) Gecko/20100101 Firefox/39.0',
 77 |                 'X-XSRF-TOKEN': self._session.cookies.get('XSRF-TOKEN')
 78 |             }
 79 |             res = self._session.put(put_url, json.dumps(data), headers=headers)
 80 |             return res.status_code == 204
 81 |         else:
 82 |             raise ValueError('argument something need to be '
 83 |                              'zhihu.Answer or zhihu.Post object.')
 84 | 
 85 |     def thanks(self, answer, thanks=True):
 86 |         """感谢或取消感谢回答
 87 | 
 88 |         :param Answer answer: 要感谢或取消感谢的回答
 89 |         :param thanks: True-->感谢，False-->取消感谢
 90 |         :return: 成功返回True，失败返回False
 91 |         :rtype: bool
 92 |         """
 93 |         from .answer import Answer
 94 |         if isinstance(answer, Answer) is False:
 95 |             raise ValueError('argument answer need to be Zhihu.Answer object.')
 96 |         if answer.author.url == self.url:
 97 |                 return False
 98 |         data = {
 99 |             '_xsrf': answer.xsrf,
100 |             'aid': answer.aid
101 |         }
102 |         res = self._session.post(Thanks_Url if thanks else Cancel_Thanks_Url,
103 |                                  data=data)
104 |         return res.json()['r'] == 0
105 | 
106 |     def follow(self, something, follow=True):
107 |         """关注用户、问题、话题或收藏夹
108 | 
109 |         :param Author/Question/Topic something: 需要关注的对象
110 |         :param bool follow: True-->关注，False-->取消关注
111 |         :return: 成功返回True，失败返回False
112 |         :rtype: bool
113 |         """
114 |         from .question import Question
115 |         from .topic import Topic
116 |         from .collection import Collection
117 |         if isinstance(something, Author):
118 |             if something.url == self.url:
119 |                 return False
120 |             data = {
121 |                 '_xsrf': something.xsrf,
122 |                 'method': '	follow_member' if follow else 'unfollow_member',
123 |                 'params': json.dumps({'hash_id': something.hash_id})
124 |             }
125 |             res = self._session.post(Follow_Author_Url, data=data)
126 |             return res.json()['r'] == 0
127 |         elif isinstance(something, Question):
128 |             data = {
129 |                 '_xsrf': something.xsrf,
130 |                 'method': 'follow_question' if follow else 'unfollow_question',
131 |                 'params': json.dumps({'question_id': str(something.qid)})
132 |             }
133 |             res = self._session.post(Follow_Question_Url, data=data)
134 |             return res.json()['r'] == 0
135 |         elif isinstance(something, Topic):
136 |             data = {
137 |                 '_xsrf': something.xsrf,
138 |                 'method': 'follow_topic' if follow else 'unfollow_topic',
139 |                 'params': json.dumps({'topic_id': something.tid})
140 |             }
141 |             res = self._session.post(Follow_Topic_Url, data=data)
142 |             return res.json()['r'] == 0
143 |         elif isinstance(something, Collection):
144 |             data = {
145 |                 '_xsrf': something.xsrf,
146 |                 'favlist_id': something.cid
147 |             }
148 |             res = self._session.post(
149 |                 Follow_Collection_Url if follow else Unfollow_Collection_Url,
150 |                 data=data)
151 |             return res.json()['r'] == 0
152 |         else:
153 |             raise ValueError('argument something need to be '
154 |                              'zhihu.Author, zhihu.Question'
155 |                              ', Zhihu.Topic or Zhihu.Collection object.')
156 | 
157 |     def add_comment(self, answer, content):
158 |         """给指定答案添加评论
159 | 
160 |         :param Answer answer: 答案对象
161 |         :param string content: 评论内容
162 |         :return: 成功返回 True，失败返回 False
163 |         :rtype: bool
164 |         """
165 | 
166 |         from .answer import Answer
167 |         if isinstance(answer, Answer) is False:
168 |             raise ValueError('argument answer need to be Zhihu.Answer object.')
169 |         if not content:
170 |             raise ValueError('answer content cannot be empty')
171 |         data = {
172 |             'method': 'add_comment',
173 |             'params': json.dumps({'answer_id': answer.aid, 'content': content}),
174 |             '_xsrf': answer.xsrf
175 |             }
176 |         res = self._session.post(Answer_Add_Comment_URL,
177 |                                  data=data)
178 |         return res.json()['r'] == 0
179 | 
180 |     def send_message(self, author, content):
181 |         """发送私信给一个用户
182 | 
183 |         :param Author author: 接收私信用户对象
184 |         :param string content: 发送给用户的私信内容
185 |         :return: 成功返回 True，失败返回 False
186 |         :rtype: bool
187 |         """
188 |         if isinstance(author, Author) is False:
189 |             raise ValueError('argument answer need to be Zhihu.Author object.')
190 |         if not content:
191 |             raise ValueError('answer content cannot be empty')
192 |         if author.url == self.url:
193 |                 return False
194 |         data = {
195 |             'member_id': author.hash_id,
196 |             'content': content,
197 |             'token': '',
198 |             '_xsrf': author.xsrf
199 |         }
200 |         res = self._session.post(Send_Message_Url,
201 |                                  data=data)
202 |         return res.json()['r'] == 0
203 | 
204 |     def block(self, something, block=True):
205 |         """屏蔽某个用户、话题
206 | 
207 |         :param Author/Topic something:
208 |         :param block: True-->屏蔽，False-->取消屏蔽
209 |         :return: 成功返回 True，失败返回 False
210 |         :rtype: bool
211 |         """
212 |         from .topic import Topic
213 | 
214 |         if isinstance(something, Author):
215 | 
216 |             if something.url == self.url:
217 |                 return False
218 |             data = {
219 |                 '_xsrf': something.xsrf,
220 |                 'action': 'add' if block else 'cancel',
221 |             }
222 |             block_author_url = something.url + 'block'
223 |             res = self._session.post(block_author_url, data=data)
224 |             return res.json()['r'] == 0
225 |         elif isinstance(something, Topic):
226 |             tid = something.tid
227 |             data = {
228 |                 '_xsrf': something.xsrf,
229 |                 'method': 'add' if block else 'del',
230 |                 'tid': tid,
231 |             }
232 |             block_topic_url = 'http://www.zhihu.com/topic/ignore'
233 |             res = self._session.post(block_topic_url, data=data)
234 |             return res.status_code == 200
235 |         else:
236 |             raise ValueError('argument something need to be '
237 |                              'Zhihu.Author or Zhihu.Topic object.')
238 | 
239 |     def unhelpful(self, answer, unhelpful=True):
240 |         """没有帮助或取消没有帮助回答
241 | 
242 |         :param Answer answer: 要没有帮助或取消没有帮助回答
243 |         :param unhelpful: True-->没有帮助，False-->取消没有帮助
244 |         :return: 成功返回 True，失败返回 False
245 |         :rtype: bool
246 |         """
247 |         from .answer import Answer
248 |         if isinstance(answer, Answer) is False:
249 |             raise ValueError('argument answer need to be Zhihu.Answer object.')
250 |         if answer.author.url == self.url:
251 |                 return False
252 |         data = {
253 |             '_xsrf': answer.xsrf,
254 |             'aid': answer.aid
255 |         }
256 |         res = self._session.post(Unhelpful_Url if unhelpful else Cancel_Unhelpful_Url,
257 |                                  data=data)
258 |         return res.json()['r'] == 0
259 | 


--------------------------------------------------------------------------------
/zhihu/post.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from .common import *
  5 | from .base import BaseZhihu, JsonAsSoupMixin
  6 | 
  7 | 
  8 | class Post(JsonAsSoupMixin, BaseZhihu):
  9 | 
 10 |     """专栏文章类，请使用``ZhihuClient.post``方法构造对象."""
 11 | 
 12 |     @class_common_init(re_post_url)
 13 |     def __init__(self, url, column=None, author=None, title=None,
 14 |                  upvote_num=None, comment_num=None, session=None):
 15 |         """创建专栏文章类实例.
 16 | 
 17 |         :param str url: 文章url
 18 |         :param Column column: 文章所属专栏，可选
 19 |         :param Author author: 文章作者，可选
 20 |         :param str title: 文章标题，可选
 21 |         :param int upvote_num: 文章赞同数，可选
 22 |         :param int comment_num: 文章评论数，可选
 23 |         :param Session session: 使用的网络会话，为空则使用新会话
 24 |         :return: 专栏文章对象
 25 |         :rtype: Post
 26 |         """
 27 |         match = re_post_url.match(url)
 28 |         self.url = url
 29 |         self._session = session
 30 |         self._column = column
 31 |         self._author = author
 32 |         self._title = title
 33 |         self._upvote_num = upvote_num
 34 |         self._comment_num = comment_num
 35 |         self._slug = int(match.group(1))  # 文章编号
 36 | 
 37 |     def _make_soup(self):
 38 |         if self.soup is None:
 39 |             json = self._get_content()
 40 |             self._gen_soup(json)
 41 | 
 42 |     def _get_content(self):
 43 |         origin_host = self._session.headers.get('Host')
 44 |         self._session.headers.update(Host='zhuanlan.zhihu.com')
 45 |         json = self._session.get(Column_Post_Data.format(self.slug)).json()
 46 |         self._session.headers.update(Host=origin_host)
 47 |         return json
 48 | 
 49 |     @property
 50 |     def column_in_name(self):
 51 |         """获取文章所在专栏的内部名称（用不到就忽视吧~）
 52 | 
 53 |         :return: 专栏的内部名称
 54 |         :rtype: str
 55 |         """
 56 |         self._make_soup()
 57 |         if 'column' in self.soup:
 58 |             return self.soup['column']['slug']
 59 |         else:
 60 |             return None
 61 | 
 62 |     @property
 63 |     def slug(self):
 64 |         """获取文章的编号（用不到就忽视吧~）
 65 | 
 66 |         :return: 文章编号
 67 |         :rtype: int
 68 |         """
 69 |         return self._slug
 70 | 
 71 |     @property
 72 |     @check_soup('_column')
 73 |     def column(self):
 74 |         """获取文章所在专栏.
 75 | 
 76 |         :return: 文章所在专栏
 77 |         :rtype: Column
 78 |         """
 79 |         from .column import Column
 80 | 
 81 |         if 'column' in self.soup:
 82 |             url = Column_Url + '/' + self.soup['column']['slug']
 83 |             name = self.soup['column']['name']
 84 |             return Column(url, name, session=self._session)
 85 |         else:
 86 |             return None
 87 | 
 88 |     @property
 89 |     @check_soup('_author')
 90 |     def author(self):
 91 |         """获取文章作者.
 92 | 
 93 |         :return: 文章作者
 94 |         :rtype: Author
 95 |         """
 96 |         from .author import Author
 97 | 
 98 |         url = self.soup['author']['profileUrl']
 99 |         name = self.soup['author']['name']
100 |         motto = self.soup['author']['bio']
101 |         template = self.soup['author']['avatar']['template']
102 |         photo_id = self.soup['author']['avatar']['id']
103 |         photo_url = template.format(id=photo_id, size='r')
104 |         return Author(url, name, motto, photo_url=photo_url,
105 |                       session=self._session)
106 | 
107 |     @property
108 |     @check_soup('_title')
109 |     def title(self):
110 |         """获取文章标题.
111 | 
112 |         :return: 文章标题
113 |         :rtype: str
114 |         """
115 |         return self.soup['title']
116 | 
117 |     @property
118 |     @check_soup('_upvote_num')
119 |     def upvote_num(self):
120 |         """获取文章赞同数量.
121 | 
122 |         :return: 文章赞同数
123 |         :rtype: int
124 |         """
125 |         return int(self.soup['likesCount'])
126 | 
127 |     @property
128 |     @check_soup('_comment_num')
129 |     def comment_num(self):
130 |         """获取评论数量.
131 | 
132 |         :return: 评论数量
133 |         :rtype: int
134 |         """
135 |         return self.soup['commentsCount']
136 | 
137 |     def save(self, filepath=None, filename=None, mode="md"):
138 |         """保存答案为 Html 文档或 markdown 文档.
139 | 
140 |         :param str filepath: 要保存的文件所在的目录，
141 |             不填为当前目录下以专栏标题命名的目录, 设为"."则为当前目录。
142 |         :param str filename: 要保存的文件名，
143 |             不填则默认为 所在文章标题 - 作者名.html/md。
144 |             如果文件已存在，自动在后面加上数字区分。
145 |             **自定义文件名时请不要输入后缀 .html 或 .md。**
146 |         :param str mode: 保存类型，可选 `html` 、 `markdown` 、 `md` 。
147 |         :return: 无
148 |         :rtype: None
149 |         """
150 |         if mode not in ["html", "md", "markdown"]:
151 |             raise ValueError("`mode` must be 'html', 'markdown' or 'md',"
152 |                              " got {0}".format(mode))
153 |         self._make_soup()
154 |         file = get_path(filepath, filename, mode, self.column.name,
155 |                         self.title + '-' + self.author.name)
156 |         with open(file, 'wb') as f:
157 |             if mode == "html":
158 |                 f.write(self.soup['content'].encode('utf-8'))
159 |             else:
160 |                 import html2text
161 |                 h2t = html2text.HTML2Text()
162 |                 h2t.body_width = 0
163 |                 f.write(h2t.handle(self.soup['content']).encode('utf-8'))
164 | 
165 |     @property
166 |     def upvoters(self):
167 |         """获取文章的点赞用户
168 | 
169 |         :return: 文章的点赞用户，返回生成器。
170 |         """
171 |         from .author import Author, ANONYMOUS
172 |         self._make_soup()
173 |         headers = dict(Default_Header)
174 |         headers['Host'] = 'zhuanlan.zhihu.com'
175 |         json = self._session.get(
176 |             Post_Get_Upvoter.format(self.slug),
177 |             headers=headers
178 |         ).json()
179 |         for au in json:
180 |             try:
181 |                 yield Author(
182 |                         au['profileUrl'],
183 |                         au['name'],
184 |                         au['bio'],
185 |                         photo_url=au['avatar']['template'].format(
186 |                                 id=au['avatar']['id'], size='r'),
187 |                         session=self._session
188 |                 )
189 |             except ValueError:  # invalid url
190 |                 yield ANONYMOUS
191 | 


--------------------------------------------------------------------------------
/zhihu/question.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | import time
  6 | from datetime import datetime
  7 | 
  8 | from .common import *
  9 | from .base import BaseZhihu
 10 | 
 11 | 
 12 | class Question(BaseZhihu):
 13 |     """问题类，请使用``ZhihuClient.question``方法构造对象."""
 14 | 
 15 |     @class_common_init(re_question_url, trailing_slash=False)
 16 |     def __init__(self, url, title=None, followers_num=None,
 17 |                  answer_num=None, creation_time=None, author=None,
 18 |                  session=None):
 19 |         """创建问题类实例.
 20 | 
 21 |         :param str url: 问题url. 现在支持两种 url
 22 | 
 23 |             1. https://www.zhihu.com/question/qid
 24 |             2. https://www.zhihu.com/question/qid?sort=created
 25 | 
 26 |             区别在于,使用第一种,调用 ``question.answers`` 的时候会按投票排序返回答案;
 27 |             使用第二种, 会按时间排序返回答案, 后提交的答案先返回
 28 |         
 29 |         :param str title: 问题标题，可选,
 30 |         :param int followers_num: 问题关注人数，可选
 31 |         :param int answer_num: 问题答案数，可选
 32 |         :param datetime.datetime creation_time: 问题创建时间，可选
 33 |         :param Author author: 提问者，可选
 34 |         :return: 问题对象
 35 |         :rtype: Question
 36 |         """
 37 |         self._session = session
 38 |         self._url = url
 39 |         self._title = title
 40 |         self._answer_num = answer_num
 41 |         self._followers_num = followers_num
 42 |         self._id = int(re.match(r'.*/(\d+)', self.url).group(1))
 43 |         self._author = author
 44 |         self._creation_time = creation_time
 45 |         self._logs = None
 46 |         self._deleted = None
 47 | 
 48 |     @property
 49 |     def url(self):
 50 |         # always return url like https://www.zhihu.com/question/1234/
 51 |         url = re.match(re_question_url_std, self._url).group()
 52 |         return url if url.endswith('/') else url + '/'
 53 | 
 54 |     @property
 55 |     def id(self):
 56 |         """获取问题id（网址最后的部分）.
 57 | 
 58 |         :return: 问题id
 59 |         :rtype: int
 60 |         """
 61 |         return self._id
 62 | 
 63 |     @property
 64 |     @check_soup('_qid')
 65 |     def qid(self):
 66 |         """获取问题内部id（用不到就忽视吧）
 67 | 
 68 |         :return: 问题内部id
 69 |         :rtype: int
 70 |         """
 71 |         return int(self.soup.find(
 72 |             'div', id='zh-question-detail')['data-resourceid'])
 73 | 
 74 |     @property
 75 |     @check_soup('_xsrf')
 76 |     def xsrf(self):
 77 |         """获取知乎的反xsrf参数（用不到就忽视吧~）
 78 | 
 79 |         :return: xsrf参数
 80 |         :rtype: str
 81 |         """
 82 |         return self.soup.find('input', attrs={'name': '_xsrf'})['value']
 83 | 
 84 |     @property
 85 |     @check_soup('_html')
 86 |     def html(self):
 87 |         """获取页面源码.
 88 | 
 89 |         :return: 页面源码
 90 |         :rtype: str
 91 |         """
 92 |         return self.soup.prettify()
 93 | 
 94 |     @property
 95 |     @check_soup('_title')
 96 |     def title(self):
 97 |         """获取问题标题.
 98 | 
 99 |         :return: 问题标题
100 |         :rtype: str
101 |         """
102 |         return self.soup.find('h2', class_='zm-item-title') \
103 |             .text.replace('\n', '')
104 | 
105 |     @property
106 |     @check_soup('_details')
107 |     def details(self):
108 |         """获取问题详细描述，目前实现方法只是直接获取文本，效果不满意……等更新.
109 | 
110 |         :return: 问题详细描述
111 |         :rtype: str
112 |         """
113 |         return self.soup.find("div", id="zh-question-detail").div.text
114 | 
115 |     @property
116 |     @check_soup('_answer_num')
117 |     def answer_num(self):
118 |         """获取问题答案数量.
119 | 
120 |         :return: 问题答案数量
121 |         :rtype: int
122 |         """
123 |         answer_num_block = self.soup.find('h3', id='zh-question-answer-num')
124 |         # 当0人回答或1回答时，都会找不到 answer_num_block，
125 |         # 通过找答案的赞同数block来判断到底有没有答案。
126 |         # （感谢知乎用户 段晓晨 提出此问题）
127 |         if answer_num_block is None:
128 |             if self.soup.find('span', class_='count') is not None:
129 |                 return 1
130 |             else:
131 |                 return 0
132 |         return int(answer_num_block['data-num'])
133 | 
134 |     @property
135 |     @check_soup('_follower_num')
136 |     def follower_num(self):
137 |         """获取问题关注人数.
138 | 
139 |         :return: 问题关注人数
140 |         :rtype: int
141 |         """
142 |         follower_num_block = self.soup.find('div', class_='zg-gray-normal')
143 |         # 无人关注时 找不到对应block，直接返回0 （感谢知乎用户 段晓晨 提出此问题）
144 |         if follower_num_block is None or follower_num_block.strong is None:
145 |             return 0
146 |         return int(follower_num_block.strong.text)
147 | 
148 |     @property
149 |     @check_soup('_topics')
150 |     def topics(self):
151 |         """获取问题所属话题.
152 | 
153 |         :return: 问题所属话题
154 |         :rtype: Topic.Iterable
155 |         """
156 |         from .topic import Topic
157 | 
158 |         for topic in self.soup.find_all('a', class_='zm-item-tag'):
159 |             yield Topic(Zhihu_URL + topic['href'], topic.text.replace('\n', ''),
160 |                         session=self._session)
161 | 
162 |     @property
163 |     def followers(self):
164 |         """获取关注此问题的用户
165 | 
166 |         :return: 关注此问题的用户
167 |         :rtype: Author.Iterable
168 |         :问题: 要注意若执行过程中另外有人关注，可能造成重复获取到某些用户
169 |         """
170 |         self._make_soup()
171 |         followers_url = self.url + 'followers'
172 |         for x in common_follower(followers_url, self.xsrf, self._session):
173 |             yield x
174 | 
175 |     @property
176 |     def answers(self):
177 |         """获取问题的所有答案.
178 | 
179 |         :return: 问题的所有答案，返回生成器
180 |         :rtype: Answer.Iterable
181 |         """
182 |         from .author import Author
183 |         from .answer import Answer
184 | 
185 |         self._make_soup()
186 | 
187 |         # TODO: 统一逻辑. 完全可以都用 _parse_answer_html 的逻辑替换
188 |         if self._url.endswith('sort=created'):
189 |             pager = self.soup.find('div', class_='zm-invite-pager')
190 |             if pager is None:
191 |                 max_page = 1
192 |             else:
193 |                 max_page = int(pager.find_all('span')[-2].a.text)
194 | 
195 |             for page in range(1, max_page + 1):
196 |                 if page == 1:
197 |                     soup = self.soup
198 |                 else:
199 |                     url = self._url + '&page=%d' % page
200 |                     soup = BeautifulSoup(self._session.get(url).content)
201 |                 error_answers = soup.find_all('div', id='answer-status')
202 |                 for each in error_answers:
203 |                     each['class'] = 'zm-editable-content'
204 |                 answers_wrap = soup.find('div', id='zh-question-answer-wrap')
205 |                 # 正式处理
206 |                 authors = answers_wrap.find_all(
207 |                     'div', class_='zm-item-answer-author-info')
208 |                 urls = answers_wrap.find_all('a', class_='answer-date-link')
209 |                 up_num = answers_wrap.find_all('div',
210 |                                                class_='zm-item-vote-info')
211 |                 contents = answers_wrap.find_all(
212 |                     'div', class_='zm-editable-content')
213 |                 assert len(authors) == len(urls) == len(up_num) == len(
214 |                     contents)
215 |                 for author, url, up_num, content in \
216 |                         zip(authors, urls, up_num, contents):
217 |                     a_url, name, motto, photo = parser_author_from_tag(author)
218 |                     author_obj = Author(a_url, name, motto, photo_url=photo,
219 |                                         session=self._session)
220 |                     url = Zhihu_URL + url['href']
221 |                     up_num = int(up_num['data-votecount'])
222 |                     content = answer_content_process(content)
223 |                     yield Answer(url, self, author_obj, up_num, content,
224 |                                  session=self._session)
225 |         else:
226 |             pagesize = 10
227 |             new_header = dict(Default_Header)
228 |             new_header['Referer'] = self.url
229 |             params = {"url_token": self.id,
230 |                       'pagesize': pagesize,
231 |                       'offset': 0}
232 |             data = {'_xsrf': self.xsrf,
233 |                     'method': 'next',
234 |                     'params': ''}
235 |             for i in range(0, (self.answer_num - 1) // pagesize + 1):
236 |                 if i == 0:
237 |                     # 修正各种建议修改的回答……
238 |                     error_answers = self.soup.find_all('div',
239 |                                                        id='answer-status')
240 |                     for each in error_answers:
241 |                         each['class'] = 'zm-editable-content'
242 |                     answers_wrap = self.soup.find('div',
243 |                                                   id='zh-question-answer-wrap')
244 |                     # 正式处理
245 |                     authors = answers_wrap.find_all(
246 |                         'div', class_='zm-item-answer-author-info')
247 |                     urls = answers_wrap.find_all('a', class_='answer-date-link')
248 |                     up_num = answers_wrap.find_all('div',
249 |                                                    class_='zm-item-vote-info')
250 |                     contents = answers_wrap.find_all(
251 |                         'div', class_='zm-editable-content')
252 |                     assert len(authors) == len(urls) == len(up_num) == len(
253 |                         contents)
254 |                     for author, url, up_num, content in \
255 |                             zip(authors, urls, up_num, contents):
256 |                         a_url, name, motto, photo = parser_author_from_tag(
257 |                             author)
258 |                         author_obj = Author(a_url, name, motto, photo_url=photo,
259 |                                             session=self._session)
260 |                         url = Zhihu_URL + url['href']
261 |                         up_num = int(up_num['data-votecount'])
262 |                         content = answer_content_process(content)
263 |                         yield Answer(url, self, author_obj, up_num, content,
264 |                                      session=self._session)
265 |                 else:
266 |                     params['offset'] = i * pagesize
267 |                     data['params'] = json.dumps(params)
268 |                     r = self._session.post(Question_Get_More_Answer_URL,
269 |                                            data=data,
270 |                                            headers=new_header)
271 |                     answer_list = r.json()['msg']
272 |                     for answer_html in answer_list:
273 |                         yield self._parse_answer_html(answer_html)
274 | 
275 |     @property
276 |     def top_answer(self):
277 |         """获取排名第一的答案.
278 | 
279 |         :return: 排名第一的答案
280 |         :rtype: Answer
281 |         """
282 |         for a in self.answers:
283 |             return a
284 | 
285 |     def top_i_answer(self, i):
286 |         """获取排名某一位的答案.
287 | 
288 |         :param int i: 要获取的答案的排名
289 |         :return: 答案对象，能直接获取的属性参见answers方法
290 |         :rtype: Answer
291 |         """
292 |         for j, a in enumerate(self.answers):
293 |             if j == i - 1:
294 |                 return a
295 | 
296 |     def top_i_answers(self, i):
297 |         """获取排名在前几位的答案.
298 | 
299 |         :param int i: 获取前几个
300 |         :return: 答案对象，返回生成器
301 |         :rtype: Answer.Iterable
302 |         """
303 |         for j, a in enumerate(self.answers):
304 |             if j <= i - 1:
305 |                 yield a
306 |             else:
307 |                 return
308 | 
309 |     @property
310 |     @check_soup('_author')
311 |     def author(self):
312 |         """获取问题的提问者.
313 |         
314 |         :return: 提问者
315 |         :rtype: Author or zhihu.ANONYMOUS
316 |         """
317 |         from .author import Author, ANONYMOUS
318 | 
319 |         logs = self._query_logs()
320 |         author_a = logs[-1].find_all('div')[0].a
321 |         if author_a.text == '匿名用户':
322 |             return ANONYMOUS
323 |         else:
324 |             url = Zhihu_URL + author_a['href']
325 |             return Author(url, name=author_a.text, session=self._session)
326 | 
327 |     @property
328 |     @check_soup('_creation_time')
329 |     def creation_time(self):
330 |         """
331 |         :return: 问题创建时间
332 |         :rtype: datetime.datetime
333 |         """
334 |         logs = self._query_logs()
335 |         time_string = logs[-1].find('div', class_='zm-item-meta').time[
336 |             'datetime']
337 |         return datetime.strptime(time_string, "%Y-%m-%d %H:%M:%S")
338 | 
339 |     @property
340 |     @check_soup('_last_edit_time')
341 |     def last_edit_time(self):
342 |         """
343 |         :return: 问题最后编辑时间
344 |         :rtype: datetime.datetime
345 |         """
346 |         data = {'_xsrf': self.xsrf, 'offset': '1'}
347 |         res = self._session.post(self.url + 'log', data=data)
348 |         _, content = res.json()['msg']
349 |         soup = BeautifulSoup(content)
350 |         time_string = soup.find_all('time')[0]['datetime']
351 |         return datetime.strptime(time_string, "%Y-%m-%d %H:%M:%S")
352 | 
353 |     def _query_logs(self):
354 |         if self._logs is None:
355 |             gotten_feed_num = 20
356 |             start = '0'
357 |             offset = 0
358 |             api_url = self.url + 'log'
359 |             logs = None
360 |             while gotten_feed_num == 20:
361 |                 data = {'_xsrf': self.xsrf, 'offset': offset, 'start': start}
362 |                 res = self._session.post(api_url, data=data)
363 |                 gotten_feed_num, content = res.json()['msg']
364 |                 offset += gotten_feed_num
365 |                 soup = BeautifulSoup(content)
366 |                 logs = soup.find_all('div', class_='zm-item')
367 |                 start = logs[-1]['id'][8:] if len(logs) > 0 else '0'
368 |                 time.sleep(0.2)  # prevent from posting too quickly
369 | 
370 |             self._logs = logs
371 | 
372 |         return self._logs
373 | 
374 |     # noinspection PyAttributeOutsideInit
375 |     def refresh(self):
376 |         """刷新 Question object 的属性. 
377 |         例如回答数增加了, 先调用 ``refresh()`` 
378 |         再访问 answer_num 属性, 可获得更新后的答案数量.
379 |         
380 |         :return: None
381 |         """
382 |         super().refresh()
383 |         self._html = None
384 |         self._title = None
385 |         self._details = None
386 |         self._answer_num = None
387 |         self._follower_num = None
388 |         self._topics = None
389 |         self._last_edit_time = None
390 |         self._logs = None
391 | 
392 |     @property
393 |     @check_soup('_deleted')
394 |     def deleted(self):
395 |         """问题是否被删除, 被删除了返回 True, 未被删除返回 False
396 |         :return: True or False
397 |         """
398 |         return self._deleted
399 | 
400 |     def _parse_answer_html(self, answer_html):
401 |         from .author import Author
402 |         from .answer import Answer
403 |         soup = BeautifulSoup(answer_html)
404 |         # 修正各种建议修改的回答……
405 |         error_answers = soup.find_all('div', id='answer-status')
406 | 
407 |         for each in error_answers:
408 |             each['class'] = 'zm-editable-content'
409 | 
410 |         answer_url = self.url + 'answer/' + soup.div['data-atoken']
411 |         author = soup.find('div', class_='zm-item-answer-author-info')
412 |         upvote_num = int(soup.find(
413 |             'div', class_='zm-item-vote-info')['data-votecount'])
414 |         content = soup.find('div', class_='zm-editable-content')
415 |         content = answer_content_process(content)
416 |         a_url, name, motto, photo = parser_author_from_tag(author)
417 |         author = Author(a_url, name, motto, photo_url=photo,
418 |                         session=self._session)
419 |         return Answer(answer_url, self, author, upvote_num, content,
420 |                       session=self._session)
421 | 


--------------------------------------------------------------------------------
/zhihu/topic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import time
  5 | from datetime import datetime
  6 | 
  7 | from .common import *
  8 | from .base import BaseZhihu
  9 | 
 10 | 
 11 | class Topic(BaseZhihu):
 12 | 
 13 |     """答案类，请使用``ZhihuClient.topic``方法构造对象."""
 14 | 
 15 |     @class_common_init(re_topic_url)
 16 |     def __init__(self, url, name=None, session=None):
 17 |         """创建话题类实例.
 18 | 
 19 |         :param url: 话题url
 20 |         :param name: 话题名称，可选
 21 |         :return: Topic
 22 |         """
 23 |         self.url = url
 24 |         self._session = session
 25 |         self._name = name
 26 |         self._id = int(re_topic_url.match(self.url).group(1))
 27 | 
 28 |     @property
 29 |     def id(self):
 30 |         """获取话题Id（网址最后那串数字）
 31 | 
 32 |         :return: 话题Id
 33 |         :rtype: int
 34 |         """
 35 |         return self._id
 36 | 
 37 |     @property
 38 |     @check_soup('_xsrf')
 39 |     def xsrf(self):
 40 |         """获取知乎的反xsrf参数（用不到就忽视吧~）
 41 | 
 42 |         :return: xsrf参数
 43 |         :rtype: str
 44 |         """
 45 |         return self.soup.find('input', attrs={'name': '_xsrf'})['value']
 46 | 
 47 |     @property
 48 |     @check_soup('_tid')
 49 |     def tid(self):
 50 |         """话题内部Id，有时候要用到
 51 | 
 52 |         :return: 话题内部Id
 53 |         :rtype: int
 54 |         """
 55 |         return int(self.soup.find(
 56 |             'div', id='zh-topic-desc')['data-resourceid'])
 57 | 
 58 |     @property
 59 |     @check_soup('_name')
 60 |     def name(self):
 61 |         """获取话题名称.
 62 | 
 63 |         :return: 话题名称
 64 |         :rtype: str
 65 |         """
 66 |         return self.soup.find('h1').text
 67 | 
 68 |     @property
 69 |     def parents(self):
 70 |         """获取此话题的父话题。
 71 |         注意：由于没找到有很多父话题的话题来测试，
 72 |         所以本方法可能再某些时候出现问题，请不吝反馈。
 73 | 
 74 |         :return: 此话题的父话题，返回生成器
 75 |         :rtype: Topic.Iterable
 76 |         """
 77 |         self._make_soup()
 78 |         parent_topic_tag = self.soup.find('div', class_='parent-topic')
 79 |         if parent_topic_tag is None:
 80 |             yield []
 81 |         else:
 82 |             for topic_tag in parent_topic_tag.find_all('a'):
 83 |                 yield Topic(Zhihu_URL + topic_tag['href'],
 84 |                             topic_tag.text.strip(),
 85 |                             session=self._session)
 86 | 
 87 |     @property
 88 |     def children(self):
 89 |         """获取此话题的子话题
 90 | 
 91 |         :return: 此话题的子话题， 返回生成器
 92 |         :rtype: Topic.Iterable
 93 |         """
 94 |         self._make_soup()
 95 |         child_topic_tag = self.soup.find('div', class_='child-topic')
 96 |         if child_topic_tag is None:
 97 |             return []
 98 |         elif '共有' not in child_topic_tag.contents[-2].text:
 99 |             for topic_tag in child_topic_tag.div.find_all('a'):
100 |                 yield Topic(Zhihu_URL + topic_tag['href'],
101 |                             topic_tag.text.strip(),
102 |                             session=self._session)
103 |         else:
104 |             flag = 'load'
105 |             child = ''
106 |             data = {'_xsrf': self.xsrf}
107 |             params = {
108 |                 'parent': self.id
109 |             }
110 |             while flag == 'load':
111 |                 params['child'] = child
112 |                 res = self._session.post(Topic_Get_Children_Url,
113 |                                          params=params, data=data)
114 |                 j = map(lambda x: x[0], res.json()['msg'][1])
115 |                 *topics, last = j
116 |                 for topic in topics:
117 |                     yield Topic(Zhihu_URL + '/topic/' + topic[2], topic[1],
118 |                                 session=self._session)
119 |                 flag = last[0]
120 |                 child = last[2]
121 |                 if flag == 'topic':
122 |                     yield Topic(Zhihu_URL + '/topic/' + last[2], last[1],
123 |                                 session=self._session)
124 | 
125 |     @property
126 |     @check_soup('_follower_num')
127 |     def follower_num(self):
128 |         """获取话题关注人数.
129 | 
130 |         :return: 关注人数
131 |         :rtype: int
132 |         """
133 |         follower_num_block = self.soup.find(
134 |             'div', class_='zm-topic-side-followers-info')
135 |         # 无人关注时 找不到对应block，直接返回0 （感谢知乎用户 段晓晨 提出此问题）
136 |         if follower_num_block.strong is None:
137 |             return 0
138 |         return int(follower_num_block.strong.text)
139 | 
140 |     @property
141 |     def followers(self):
142 |         """获取话题关注者
143 | 
144 |         :return: 话题关注者，返回生成器
145 |         :rtype: Author.Iterable
146 |         """
147 |         from .author import Author, ANONYMOUS
148 |         self._make_soup()
149 |         gotten_data_num = 20
150 |         data = {
151 |             '_xsrf': self.xsrf,
152 |             'start': '',
153 |             'offset': 0
154 |         }
155 |         while gotten_data_num == 20:
156 |             res = self._session.post(
157 |                 Topic_Get_More_Follower_Url.format(self.id), data=data)
158 |             j = res.json()['msg']
159 |             gotten_data_num = j[0]
160 |             data['offset'] += gotten_data_num
161 |             soup = BeautifulSoup(j[1])
162 |             divs = soup.find_all('div', class_='zm-person-item')
163 |             for div in divs:
164 |                 h2 = div.h2
165 |                 url = Zhihu_URL + h2.a['href']
166 |                 name = h2.a.text
167 |                 motto = h2.parent.div.text.strip()
168 |                 try:
169 |                     yield Author(url, name, motto, session=self._session)
170 |                 except ValueError:  # invalid url
171 |                     yield ANONYMOUS
172 |             data['start'] = int(re_get_number.match(divs[-1]['id']).group(1))
173 | 
174 |     @property
175 |     @check_soup('_photo_url')
176 |     def photo_url(self):
177 |         """获取话题头像图片地址.
178 | 
179 |         :return: 话题头像url
180 |         :rtype: str
181 |         """
182 |         img = self.soup.find('a', id='zh-avartar-edit-form').img['src']
183 |         return img.replace('_m', '_r')
184 | 
185 |     @property
186 |     @check_soup('_description')
187 |     def description(self):
188 |         """获取话题描述信息.
189 | 
190 |         :return: 话题描述信息
191 |         :rtype: str
192 |         """
193 |         desc = self.soup.find('div', class_='zm-editable-content').text
194 |         return desc
195 | 
196 |     @property
197 |     def top_authors(self):
198 |         """获取最佳回答者
199 | 
200 |         :return: 此话题下最佳回答者，一般来说是5个，要不就没有，返回生成器
201 |         :rtype: Author.Iterable
202 |         """
203 |         from .author import Author, ANONYMOUS
204 |         self._make_soup()
205 |         t = self.soup.find('div', id='zh-topic-top-answerer')
206 |         if t is None:
207 |             return
208 |         for d in t.find_all('div', class_='zm-topic-side-person-item-content'):
209 |             url = Zhihu_URL + d.a['href']
210 |             name = d.a.text
211 |             motto = d.find('span', class_='bio')['title']
212 |             try:
213 |                 yield Author(url, name, motto, session=self._session)
214 |             except ValueError:  # invalid url
215 |                 yield ANONYMOUS
216 | 
217 |     @property
218 |     def top_answers(self):
219 |         """获取话题下的精华答案.
220 | 
221 |         :return: 话题下的精华答案，返回生成器.
222 |         :rtype: Answer.Iterable
223 |         """
224 |         from .question import Question
225 |         from .answer import Answer
226 |         from .author import Author, ANONYMOUS
227 | 
228 |         top_answers_url = Topic_Top_Answers_Url.format(self.id)
229 |         params = {'page': 1}
230 |         while True:
231 |             # 超出50页直接返回
232 |             if params['page'] > 50:
233 |                 return
234 |             res = self._session.get(top_answers_url, params=params)
235 |             params['page'] += 1
236 |             soup = BeautifulSoup(res.content)
237 |             # 不够50页，来到错误页面 返回
238 |             if soup.find('div', class_='error') is not None:
239 |                 return
240 |             questions = soup.find_all('a', class_='question_link')
241 |             answers = soup.find_all('a', class_='answer-date-link')
242 |             authors = soup.find_all('div', class_='zm-item-answer-author-info')
243 |             upvotes = soup.find_all('a', class_='zm-item-vote-count')
244 |             for ans, up, q, au in zip(answers, upvotes, questions, authors):
245 |                 answer_url = Zhihu_URL + ans['href']
246 |                 question_url = Zhihu_URL + q['href']
247 |                 question_title = q.text.strip()
248 |                 upvote = up.text
249 |                 if upvote.isdigit():
250 |                     upvote = int(upvote)
251 |                 else:
252 |                     upvote = None
253 |                 question = Question(question_url, question_title,
254 |                                     session=self._session)
255 |                 if au.a is None:
256 |                     author = ANONYMOUS
257 |                 else:
258 |                     author_url = Zhihu_URL + au.a['href']
259 |                     author_name = au.a.text
260 |                     author_motto = au.strong['title'] if au.strong else ''
261 |                     author = Author(author_url, author_name, author_motto,
262 |                                     session=self._session)
263 |                 yield Answer(answer_url, question, author, upvote,
264 |                              session=self._session)
265 | 
266 |     @property
267 |     def questions(self):
268 |         """获取话题下的所有问题（按时间降序排列）
269 | 
270 |         :return: 话题下所有问题，返回生成器
271 |         :rtype: Question.Iterable
272 |         """
273 |         from .question import Question
274 |         question_url = Topic_Questions_Url.format(self.id)
275 |         params = {'page': 1}
276 |         older_time_stamp = int(time.time()) * 1000
277 |         while True:
278 |             res = self._session.get(question_url, params=params)
279 |             soup = BeautifulSoup(res.content)
280 |             if soup.find('div', class_='error') is not None:
281 |                 return
282 |             questions = soup.find_all('div', class_='question-item')
283 |             questions = list(filter(
284 |                 lambda x: int(x.h2.span['data-timestamp']) < older_time_stamp,
285 |                 questions))
286 |             for qu_div in questions:
287 |                 url = Zhihu_URL + qu_div.h2.a['href']
288 |                 title = qu_div.h2.a.text.strip()
289 |                 creation_time = datetime.fromtimestamp(
290 |                         int(qu_div.h2.span['data-timestamp']) // 1000)
291 |                 yield Question(url, title, creation_time=creation_time,
292 |                                session=self._session)
293 |             older_time_stamp = int(questions[-1].h2.span['data-timestamp'])
294 |             params['page'] += 1
295 | 
296 |     @property
297 |     def unanswered_questions(self):
298 |         """获取话题下的等待回答的问题
299 | 
300 |         什么是「等待回答」的问题：https://www.zhihu.com/question/40470324
301 | 
302 |         :return: 话题下等待回答的问题，返回生成器
303 |         :rtype: Question.Iterable
304 |         """
305 |         from .question import Question
306 |         question_url = Topic_Unanswered_Question_Url.format(self.id)
307 |         params = {'page': 1}
308 |         while True:
309 |             res = self._session.get(question_url, params=params)
310 |             soup = BeautifulSoup(res.content)
311 |             if soup.find('div', class_='error') is not None:
312 |                 return
313 |             questions = soup.find_all('div', class_='question-item')
314 |             for qu_div in questions:
315 |                 url = Zhihu_URL + qu_div.h2.a['href']
316 |                 title = qu_div.h2.a.text.strip()
317 |                 yield Question(url, title, session=self._session)
318 |             params['page'] += 1
319 | 
320 |     @property
321 |     def answers(self):
322 |         """获取话题下所有答案（按时间降序排列）
323 | 
324 |         :return: 话题下所有答案，返回生成器
325 |         :rtype: Answer.Iterable
326 |         """
327 |         from .question import Question
328 |         from .answer import Answer
329 |         from .author import Author, ANONYMOUS
330 | 
331 |         newest_url = Topic_Newest_Url.format(self.id)
332 |         params = {'start': 0, '_xsrf': self.xsrf}
333 |         res = self._session.get(newest_url)
334 |         soup = BeautifulSoup(res.content)
335 |         while True:
336 |             divs = soup.find_all('div', class_='folding')
337 |             # 如果话题下无答案，则直接返回
338 |             if len(divs) == 0:
339 |                 return
340 |             last_score = divs[-1]['data-score']
341 |             for div in divs:
342 |                 q = div.find('a', class_="question_link")
343 |                 question_url = Zhihu_URL + q['href']
344 |                 question_title = q.text.strip()
345 |                 question = Question(question_url, question_title,
346 |                                     session=self._session)
347 | 
348 |                 ans = div.find('a', class_='answer-date-link')
349 |                 answer_url = Zhihu_URL + ans['href']
350 | 
351 |                 upvote = div.find('a', class_='zm-item-vote-count').text
352 |                 if upvote.isdigit():
353 |                     upvote = int(upvote)
354 |                 else:
355 |                     upvote = None
356 | 
357 |                 au = div.find('div', class_='zm-item-answer-author-info')
358 |                 if au.a is None:
359 |                     author = ANONYMOUS
360 |                 else:
361 |                     author_url = Zhihu_URL + au.a['href']
362 |                     author_name = au.a.text
363 |                     author_motto = au.strong['title'] if au.strong else ''
364 |                     author = Author(author_url, author_name, author_motto,
365 |                                     session=self._session)
366 |                 yield Answer(answer_url, question, author, upvote,
367 |                              session=self._session)
368 | 
369 |             params['offset'] = last_score
370 |             res = self._session.post(newest_url, data=params)
371 |             gotten_feed_num = res.json()['msg'][0]
372 |             # 如果得到内容数量为0则返回
373 |             if gotten_feed_num == 0:
374 |                 return
375 |             soup = BeautifulSoup(res.json()['msg'][1])
376 | 
377 |     @property
378 |     def hot_questions(self):
379 |         """获取话题下热门的问题
380 | 
381 |         :return: 话题下的热门动态中的问题，按热门度顺序返回生成器
382 |         :rtype: Question.Iterable
383 |         """
384 |         from .question import Question
385 |         hot_questions_url = Topic_Hot_Questions_Url.format(self.id)
386 |         params = {'start': 0, '_xsrf': self.xsrf}
387 |         res = self._session.get(hot_questions_url)
388 |         soup = BeautifulSoup(res.content)
389 |         while True:
390 |             questions_duplicate = soup.find_all('a', class_='question_link')
391 |             # 如果话题下无问题，则直接返回
392 |             if len(questions_duplicate) == 0:
393 |                 return
394 |                 # 去除重复的问题
395 |             questions = list(set(questions_duplicate))
396 |             questions.sort(key=self._get_score, reverse=True)
397 |             last_score = soup.find_all(
398 |                 'div', class_='feed-item')[-1]['data-score']
399 |             for q in questions:
400 |                 question_url = Zhihu_URL + q['href']
401 |                 question_title = q.text.strip()
402 |                 question = Question(question_url, question_title,
403 |                                     session=self._session)
404 |                 yield question
405 |             params['offset'] = last_score
406 |             res = self._session.post(hot_questions_url, data=params)
407 |             gotten_feed_num = res.json()['msg'][0]
408 |             # 如果得到问题数量为0则返回
409 |             if gotten_feed_num == 0:
410 |                 return
411 |             soup = BeautifulSoup(res.json()['msg'][1])
412 | 
413 |     @property
414 |     def hot_answers(self):
415 |         """获取话题下热门的回答
416 | 
417 |         :return: 话题下的热门动态中的回答，按热门度顺序返回生成器
418 |         :rtype: Question.Iterable
419 |         """
420 |         from .question import Question
421 |         from .author import Author
422 |         from .answer import Answer
423 |         hot_questions_url = Topic_Hot_Questions_Url.format(self.id)
424 |         params = {'start': 0, '_xsrf': self.xsrf}
425 |         res = self._session.get(hot_questions_url)
426 |         soup = BeautifulSoup(res.content)
427 |         while True:
428 |             answers_div = soup.find_all('div', class_='feed-item')
429 |             last_score = answers_div[-1]['data-score']
430 |             for div in answers_div:
431 |                 # 没有 text area 的情况是：答案被和谐。
432 |                 if not div.textarea:
433 |                     continue
434 |                 question_url = Zhihu_URL + div.h2.a['href']
435 |                 question_title = div.h2.a.text.strip()
436 |                 question = Question(question_url, question_title,
437 |                                     session=self._session)
438 |                 author_link = div.find('a', class_='author-link')
439 |                 if not author_link:
440 |                     author_url = None
441 |                     author_name = '匿名用户'
442 |                     author_motto = ''
443 |                 else:
444 |                     author_url = Zhihu_URL + author_link['href']
445 |                     author_name = author_link.text
446 |                     author_motto_span = div.find('span', class_='bio')
447 |                     author_motto = author_motto_span['title'] \
448 |                         if author_motto_span else ''
449 |                 author = Author(author_url, author_name, author_motto,
450 |                                 session=self._session)
451 | 
452 |                 body = div.find('div', class_='zm-item-rich-text')
453 |                 answer_url = Zhihu_URL + body['data-entry-url']
454 |                 upvote_num = int(div.find(
455 |                     'div', class_='zm-item-vote-info')['data-votecount'])
456 | 
457 |                 yield Answer(answer_url, question, author, upvote_num,
458 |                              session=self._session)
459 | 
460 |             params['offset'] = last_score
461 |             res = self._session.post(hot_questions_url, data=params)
462 |             gotten_feed_num = res.json()['msg'][0]
463 |             # 如果得到问题数量为0则返回
464 |             if gotten_feed_num == 0:
465 |                 return
466 |             soup = BeautifulSoup(res.json()['msg'][1])
467 | 
468 |     @staticmethod
469 |     def _get_score(tag):
470 |         h2 = tag.parent
471 |         div = h2.parent
472 |         try:
473 |             _ = h2['class']
474 |             return div['data-score']
475 |         except KeyError:
476 |             return div.parent.parent['data-score']
477 | 


--------------------------------------------------------------------------------