├── .gitattributes ├── .gitignore ├── .travis.yml ├── ChangeLog.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── activity.rst ├── answer.rst ├── author.rst ├── classes.rst ├── client.rst ├── collection.rst ├── column.rst ├── comment.rst ├── conf.py ├── examples.rst ├── index.rst ├── install.rst ├── login.rst ├── make.bat ├── me.rst ├── post.rst ├── question.rst ├── requirements.txt └── topic.rst ├── example ├── analyze_user.py └── test.json ├── setup.cfg ├── setup.py ├── test ├── data │ ├── answer.html │ ├── answer.md │ ├── answer_content.html │ ├── answer_upvoter.html │ ├── collection.html │ ├── column.json │ ├── column_post.json │ ├── post.md │ ├── question.html │ └── question_more_answer.html ├── test.json ├── test_activity.py ├── test_answer.py ├── test_collection.py ├── test_column.py ├── test_common.py ├── test_post.py ├── test_question.py ├── test_utils.py └── zhihu-test.py └── zhihu ├── __init__.py ├── activity.py ├── acttype.py ├── answer.py ├── author.py ├── base.py ├── client.py ├── collection.py ├── column.py ├── comment.py ├── common.py ├── me.py ├── post.py ├── question.py └── topic.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | 19 | test/data/* linguist-vendored=true 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # project 2 | test/cookies.json 3 | test/zhihu-mytest.py 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyCharm 30 | .idea/ 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | /docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | # ========================= 64 | # Operating System Files 65 | # ========================= 66 | 67 | # OSX 68 | # ========================= 69 | 70 | .DS_Store 71 | .AppleDouble 72 | .LSOverride 73 | 74 | # Thumbnails 75 | ._* 76 | 77 | # Files that might appear on external disk 78 | .Spotlight-V100 79 | .Trashes 80 | 81 | # Directories potentially created on remote AFP share 82 | .AppleDB 83 | .AppleDesktop 84 | Network Trash Folder 85 | Temporary Items 86 | .apdisk 87 | 88 | # Windows 89 | # ========================= 90 | 91 | # Windows image file caches 92 | Thumbs.db 93 | ehthumbs.db 94 | 95 | # Folder config file 96 | Desktop.ini 97 | 98 | # Recycle Bin used on file shares 99 | $RECYCLE.BIN/ 100 | 101 | # Windows Installer files 102 | *.cab 103 | *.msi 104 | *.msm 105 | *.msp 106 | 107 | # Windows shortcuts 108 | *.lnk 109 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.4" 5 | - "3.5" 6 | 7 | before_install: 8 | - "pip install --upgrade pip" 9 | 10 | install: 11 | - "pip install .[lxml]" 12 | 13 | script: 14 | - "cd test" 15 | - "./zhihu-test.py" 16 | 17 | notifications: 18 | email: 19 | on_success: never 20 | on_failure: always 21 | -------------------------------------------------------------------------------- /ChangeLog.rst: -------------------------------------------------------------------------------- 1 | 更新日志 2 | ======== 3 | 4 | 0.3.23 5 | ------ 6 | 7 | - [Fix] 修复了由于知乎前端更改, `Activity` 类中获取 「提了一个问题」 类型的动态时会报错的 Bug。 8 | 9 | 0.3.22 10 | ------ 11 | 12 | - [Add] 增加了 `Author.followers_skip()` 和 `Author.followees.skip()` 函数,可以在获取用户关注者时跳过前 n 个用户。 13 | 14 | 0.3.21 15 | ------ 16 | 17 | - [Add] 增加了一个 `BanException` 异常,在尝试获取被反作弊系统限制的用户资料时将会引发此异常,需要用户自行处理。 18 | 19 | 0.3.20 20 | ------ 21 | 22 | - [fix] 修复获取用户粉丝时,因为新的徽章的加入造成的解析 Bug。 23 | 24 | 0.3.19 25 | ------ 26 | 27 | - [Fix] 现在允许通过 `https://www.zhihu.com/org/abcde` 这种 URL 获取机构号对象。 28 | - [Add] `ZhihuClient.login_in_termianl` 和 `ZhihuClient.create_cookies` 增加了参数 `use_getpass` 来设置是否使用安全模式输入密码,以解决在某些 Windows 上的 IDE 中运行时无法输入密码的问题。。 29 | 30 | 0.3.18 31 | ------ 32 | 33 | - [fix] 修复了许多由知乎前端改变造成的小 bug,暂时又可用了。 34 | 35 | 0.3.17 36 | ------ 37 | 38 | - [fix] 修复答案翻页 pagesize 改变造成的无法获取所有答案的问题 39 | - [fix] 修复了一大都因为前端 data-votecount 属性被删除造成的问题…… 40 | - [update] 改了一点测试代码…… 41 | 42 | 0.3.16 43 | ------ 44 | 45 | - [fix] 修复因知乎登录又需要验证码且验证码逻辑小幅度修改造成的无法登录的问题 46 | 47 | 0.3.15 48 | ------ 49 | 50 | - [change] 现在 question.topics 返回 Topic 对象的迭代器,而不是话题名字的列表 51 | 52 | 0.3.14 53 | ------ 54 | 55 | - [fix] 修复 Author.columns 因为知乎专栏大幅度改版而无法获取到专栏的问题 56 | - [fix] 修复 Post.column 因为知乎现在允许发布无专栏文章而出错的 bug,对于无专栏的 Post 现在 column 属性返回 None 57 | - [fix] 修复 Post.slug 因为知乎文章网址变更而无法获取的问题 58 | - [fix] 修复 Post.column_in_name 因为知乎现在允许发布无专栏文章而出错的 bug,对于无专栏的 Post 现在 column 属性返回 None 59 | - [fix] 修复因为上述 Bugs 造成的 Author.activities 出错的问题 60 | - [add] 现在 CollectActType 可以直接从模块顶级 import, 即 from zhihu import CollectActType 61 | - [fix] 修复 Topic.follower.motto 获取不正确的问题 62 | 63 | 0.3.13 64 | ------ 65 | 66 | - [fix] 修复因知乎将每次点击问题页面「更多」按钮只加载 50 个回答改为 20 个造成的无法获取所有问题的 bug 67 | - [fix] 修复获取 post 后,直接调用 post.save 会出错的 bug 68 | - [change] 在终端登录时输入的密码改为不可见 69 | - [change] 貌似知乎登录不怎么需要验证码了,现在作为一个可选项,login_in_terminal 和 create_cookies 默认均不要求验证码 70 | - [fix] 修复因知乎专栏改版,API 地址变更 造成的 Post 类无法使用的问题 71 | - [add] 增加 client.add_proxy_pool 方法设置代理池 72 | - [add] 增加 client.remove_proxy_pool 方法移除代理池 73 | 74 | 0.3.12 75 | ------ 76 | 77 | - [rollback] 知乎又把所有问题的接口改回来了……妈的智障 78 | - [add] 增加了获取话题下等待回答的问题的功能:Topic.unanswered_questions 79 | 80 | 0.3.11 81 | ------ 82 | 83 | - [add] Avoid redirection 84 | - [fix] 知乎改了 Topic 的所有问题功能,变成等待回答的功能了 85 | 86 | 0.3.10-1 87 | -------- 88 | 89 | - [fix] 我真是傻了,上传之前忘记删除debug语句了。。。 90 | 91 | 0.3.10 92 | ------ 93 | - [add] 添加 answer.latest_comments 属性 94 | - [fix] 获取头像失败 95 | - [add] zhihu.ANONYMOUS 表示匿名用户 96 | - [fix] answer.deleted 属性错误 97 | - [fix] 解决一些诡异的用户带来的问题 98 | - [add] post 可保存为 html 格式 99 | - [fix] 修复 Author 的 location education 等属性无法获取的 bug 100 | 101 | 0.3.9-1 102 | ------- 103 | 104 | - [fix] 修复了由于 img 的 title 属性修改为 alt 属性造成的 Author.followed_topic 获取前几个话题出错的 bug 105 | 106 | 0.3.9 107 | ----- 108 | 109 | - [add] Question 和 Answer 添加 deleted 属性 110 | - [fix] 修复了问题没有回答时 Question.answers 出错的问题 111 | - [fix] 修复了回答仅有一页时无法获取按时间排序的答案的问题 112 | - [fix] 修复无法刷新 answer_num 的问题 113 | - [fix] 修复收藏为0时获取收藏数出错的问题 114 | - [fix] 知乎修改了评论的前端代码 115 | - [change] Comment 类现在也提供 datetime.datetime 类型的 creation_time 属性, 去掉 time_string 116 | - [fix] 修复了 topic.question 由于时间戳乘以了 1000 而造成的错误 117 | - [fix] 修复了 topic.top_answer 无法获取到内容的 bug 118 | - [fix] 修复了 topic.hot_answer 无法获取到内容的 bug 119 | 120 | 0.3.8 121 | ----- 122 | 123 | - [add] Answer 和 Question 增加 refresh() 方法, 刷新问题答案 object 的属性 124 | - [add] Question 初始化的 url 现在支持 ?sort=created 125 | - [add] 使用带 ?sort=created 的 url 初始化问题时, question.answers 按照时间顺序返回答案 126 | - [add] 添加了 Answer.comment_num 属性, 获取评论数量 127 | - [add] 添加了 Collection.id 属性 128 | - [fix] 现在 Activity.type 变成 read-only property 并加入文档了 129 | 130 | 全都 Thanks `@laike9m `__ 131 | 132 | 0.3.7 133 | ----- 134 | 135 | - [fix] 修复了用户动态中有关注圆桌行为时会崩溃的 Bug(目前暂时跳过这类动态)。 136 | - [fix] 知乎删除了深网话题,正好Topic类是用的那个话题测试,我还以为代码bug了……现在改成测试「程序员」话题。 137 | - [add] 曾加了获取专栏文章点赞者的功能。 138 | 139 | 140 | 0.3.6 141 | ----- 142 | 143 | - [fix] 修试图获取登录用户自身 location, business 等属性但自己又未填写时出现的 bug 144 | - [fix] 修复 topic.py 中混合使用 return sth 和 yield sth 导致的旧版本 python 报语法错误的问题 145 | - [add/fix] ActType 中添加了关注收藏夹 (Thanks `@cssmlulu `__) 146 | - [fix] 修复了 Author.activities 项 answer 的 author 属性不正确的 bug (Thanks `@cssmlulu `__) 147 | 148 | 0.3.5 149 | ----- 150 | 151 | - [add] 添加 Answer.collect_num 属性, 获取答案的收藏数 152 | - [add] 添加 Answer.collections 接口, 获取收藏了该答案的收藏夹 153 | - [add] 添加 Collections.logs 接口, 获取收藏夹日志 154 | - [add] 添加 Question.author 属性,获取提问者 155 | - [fix] 修复文档代码的一些错误 156 | 157 | 前四个功能Thanks `@laike9m `__ 158 | 159 | 0.3.4 160 | ----- 161 | 162 | - [f**K] 随便在知乎上发了个小专栏……不小心就进撕逼大战了 QAQ 我好方~ 163 | - [add] 增加了 example 文件夹,里面放一些实例 164 | - [add] Add answer creation_time attribute(Thanks @laike9m) 165 | - [add] 添加 Question.creation_time 和 Question.last_edit_time 属性(Thanks @laike9m) 166 | - [fix] 修复了 UPVOTE_ANSWER 型的 Activity 的 act.answer.author 全都是匿名用户的 bug(不知道是不是前端改了) 167 | 168 | 0.3.3 169 | ----- 170 | 171 | - [fix] 紧急更新,知乎页面上的链接大多数都变成了 https, 暂时只简单的改了一点正则表达式已作为紧急应对,有 bug 请开 issue。 172 | 173 | 0.3.2 174 | ----- 175 | 176 | - [change] 改变 Author 类获取 Activities 的机制,判断类型更准确(Thanks `@laike9m `__)。 177 | - [change] 为方便以后写测试,类架构修改为均继承 BaseZhihu 类(Thanks `@littlezz `__)。 178 | 179 | 0.3.1 180 | ----- 181 | 182 | - [fix] 修复因为知乎 Answer 的 css class 更改导致的 Answer 类 content 属性获取不正确的 bug 183 | - [fix] 修复历史遗留代码造成使用 profile card 获取头像时,网址不正确的 bug(Thanks `@bdqy `__) 184 | - [fix] 修复因答案被和谐造成的 bug(Thanks `@littlezz `__) 185 | - [add] 获取用户的一些详细信息,包括微博,所在地,教育情况,所在行业等等(Thanks `@zeroxfio `__) 186 | - [add] Answer 类增加了获取答案的评论的功能(Thanks `@zeroxfio `__) 187 | - [add] Me 类增加了发送私信和评论的功能(Thanks `@zeroxfio `__) 188 | - [add] Me 类增加了给答案点没有帮助的功能(Thanks `@lishubing `__) 189 | - [add] Me 类增加了屏蔽用户,屏蔽话题的功能(Thanks `@lishubing `__) 190 | 191 | 0.3.0 192 | ----- 193 | 194 | - [fix] 修复 Author 类的 get_followed_columns 接口获取到的 Column 对象调用 followed_num 函数可能获取不到正确数量的 bug 195 | - [fix] 修复 Author 类的 get_followed_columns 接口获取到的 Column 对象处于未登录状态的 bug 196 | - [add] Author 类增加获取用户关注的话题数的接口(followed_topic_num) 197 | - [add] Author 类增加获取用户关注的话题的接口 (followed_topics) 198 | 199 | 0.2.9 200 | ----- 201 | 202 | - [fix] 修复因问题描述和答案使用相同的 class 造成的答案内容与序号不同的 bug。 203 | - [tucao] 一天修三四个bug好累……我估计得找时间抓一下知乎的移动端 API 了,前端天天变这谁受得了。 204 | 205 | 0.2.8 206 | ----- 207 | 208 | - [fix] 上次的 bug 修复的不完全,匿名用户的情况没有考虑周全,紧急修复下……(可能还有地方没修复,请关注更新。 209 | 210 | 0.2.7 211 | ----- 212 | 213 | - [fix] 修复由于把用户 tag 从 h3 改成了 div 造成的一系列 bug (Thanks `@lishubing `__) 214 | 215 | 0.2.6 216 | ----- 217 | 218 | - [fix] 获取匿名用户的ID出错的问题,暂定为返回空字符串 219 | - [add] 增加获取用户关注专栏数的功能 (Thanks `@cssmlulu `__) 220 | - [add] 增加获取用户关注专栏的功能 (Thanks `@cssmlulu `__) 221 | 222 | 0.2.5 223 | ----- 224 | 225 | - [fix] 修复了某些问题无法获取答案的bug 226 | - [fix] 知乎又把头像链接改回去了。。。 227 | 228 | 0.2.4 229 | ----- 230 | 231 | - [fix] 知乎修改了图片链接的格式,影响了答案图片,头像。 232 | 233 | 0.2.3 234 | ----- 235 | 236 | - [fix] Topic.hot_question 的顺序 Bug 237 | - [fix] 知乎登录逻辑修改(?) 238 | - [add] Topic 所有答案接口 239 | - [add] Topic 热门答案接口 240 | 241 | 0.2.2 242 | ----- 243 | 244 | 代码美化,尽量满足 PEP8. 245 | 246 | 0.2.1 247 | ----- 248 | 249 | 增加 Topic 类的最近动态(热门排序) 250 | 修复 Topic.children 的bug 251 | 252 | 0.2.0 253 | ----- 254 | 255 | 增加Me类及其相关操作 256 | 257 | - [x] 点赞,取消点赞,反对,取消反对某回答 258 | - [x] 点赞,取消点赞,反对,取消反对某文章 259 | - [x] 感谢,取消感谢某回答 260 | - [x] 关注,取消关注某用户 261 | - [x] 关注,取消关注某问题 262 | - [x] 关注,取消关注某话题 263 | - [x] 关注,取消关注收藏夹 264 | 265 | 增加Topic类相关操作: 266 | 267 | - [x] 获取话题名称 268 | - [x] 获取话题描述 269 | - [x] 获取话题图标 270 | - [x] 获取关注者数量 271 | - [x] 获取关注者 272 | - [x] 获取父话题 273 | - [x] 获取子话题 274 | - [x] 获取优秀答主 275 | - [ ] 获取最近动态(暂缓) 276 | - [x] 获取精华回答 277 | - [x] 获取所有问题 278 | 279 | 0.1.5 280 | ----- 281 | 282 | - 增加了获取收藏夹关注者的功能 283 | - 增加了获取问题关注者的功能 284 | - Column的一个小Bug修复 285 | 286 | 0.1.4 287 | ----- 288 | 289 | 知乎登录参数变化,从rememberme变成了remember_me,做了跟进。 290 | 291 | 2015.07.30 292 | ---------- 293 | 294 | 发布到Pypi. 295 | 296 | 2015.07.29 297 | ---------- 298 | 299 | - 重构项目结构 300 | - 增加 zhihu.Client 类,改善原先模块需要使用当前目录下 cookies 的弊端,现在的使用方法请看 Readme 中的示例。 301 | - 去掉了 _text2int 方法,因为发现知乎以K结尾的赞同数也有办法获取到准确点赞数。 302 | 303 | 2015.07.26 304 | ---------- 305 | 306 | 重构项目结构,转变为标准 Python 模块结构。 307 | 308 | 2015.07.26 309 | ---------- 310 | 311 | 添加 Author.photo_url 接口,用于获取用户头像。 312 | 313 | 本属性的实现较为分散,在不同的地方使用了不同的方法: 314 | 315 | - Author.follower(e)s, Answer.upvoters 等属性返回的 Author 自带 photo_url 316 | 317 | - 用户自定义的 Author 在访问过主页的情况下通过解析主页得到 318 | 319 | - 用户自定义的 Author 在未访问主页的情况下为了性能使用了知乎的 CardProfile 320 | API 321 | 322 | 因为实现混乱所以容易有Bug,欢迎反馈。 323 | 324 | 2015.07.25 325 | ---------- 326 | 327 | 增加了获取用户关注者和粉丝的功能 328 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 329 | 330 | Author.followers, Author.folowees 返回Author迭代器,自带url, name, motto, question\_num, answer\_num, upvote\_num, follower\_num属性。 331 | 332 | html解析器优选 333 | ~~~~~~~~~~~~~~ 334 | 335 | 在安装了 lxml 的情况下默认使用 lxml 作为解析器,否则使用 html.parser。 336 | 337 | 增加答案获取点赞用户功能 338 | ~~~~~~~~~~~~~~~~~~~~~~~~ 339 | 340 | Author.upvoters 返回 Author 迭代器,自带url, name, motto, question\_num, answer\_num, upvote\_num, thank\_num属性 341 | 342 | 增加简易判断是否为「三零用户」功能 343 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 344 | 345 | Author.is_zero_user() ,判断标准为,赞同,感谢,提问数,回答数均为 0。 346 | 347 | 2015.07.23 348 | ---------- 349 | 350 | 各个类url属性更改为公开 351 | ~~~~~~~~~~~~~~~~~~~~~~~ 352 | 353 | 暂时这样吧,有点懒了,因为这样会让使用者有机会非法修改 url,可能导致 Bug,以后勤快的话会改成 read-only。 354 | 355 | 类名变更 356 | ~~~~~~~~ 357 | 358 | 专栏类从 Book 更名为 Cloumn 359 | 360 | 文章类从 Article 更名为 Post 361 | 362 | 以上两个更名同时影响了其他类的属性名,如 Author.books 变更为 Author.columns,其他类同理。 363 | 364 | 接口名变更 365 | ~~~~~~~~~~ 366 | 367 | 1. 统一了一下复数的使用。比如 Author.answers_num 变为 Author.answer_num, Author.collections\_num 变为 Author.collection\_num。 368 | 也就是说某某数量的接口名为 Class.foo_num,foo使用单数形式。 369 | 370 | 2. 知乎的赞同使用单词 upvote,以前叫 agree 的地方现在都叫 upvote。比如 Author.agree_num 变为 Author.upvote_num,Post.agree_num 变为 Post.upvote_num。 371 | 372 | 3. Answer 类的 upvote 属性更名为 upvote_num。 373 | 374 | 提供\ ``Topic``\ 类 375 | ~~~~~~~~~~~~~~~~~~~ 376 | 377 | 目前只有获取话题名的功能。 378 | 379 | 提供\ ``Author.activities`` 380 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 381 | 382 | 属性获取用户动态,返回 Activity 类生成器。 383 | 384 | Activity 类提供 type 属性用于判断动态类型,type 为 ActType 类定义的常量,根据 type 的不同提供不同的属性,如下表: 385 | 386 | +----------------+--------------------+--------------+ 387 | | 类型 | 常量 | 提供的成员 | 388 | +================+====================+==============+ 389 | | 关注了问题 | FOLLOW\_QUESTION | question | 390 | +----------------+--------------------+--------------+ 391 | | 赞同了回答 | UPVOTE\_ANSWER | answer | 392 | +----------------+--------------------+--------------+ 393 | | 关注了专栏 | FOLLOW\_COLUMN | column | 394 | +----------------+--------------------+--------------+ 395 | | 回答了问题 | ANSWER\_QUESTION | answer | 396 | +----------------+--------------------+--------------+ 397 | | 赞同了文章 | UPVOTE\_POST | post | 398 | +----------------+--------------------+--------------+ 399 | | 发布了文章 | PUBLISH\_POST | post | 400 | +----------------+--------------------+--------------+ 401 | | 关注了话题 | FOLLOW\_TOPIC | topic | 402 | +----------------+--------------------+--------------+ 403 | | 提了一个问题 | ASK\_QUESTION | question | 404 | +----------------+--------------------+--------------+ 405 | 406 | 由于每种类型都只提供了一种属性,所以所有Activity对象都有 content 属性,用于直接获取唯一的属性。 407 | 408 | 示例代码见 zhihu-test.py 的 test_author 函数。 409 | 410 | activities 属性可以在未登录(未生成cookies)的情况下使用,但是根据知乎的隐私保护政策,开启了隐私保护的用户的回答和文章,此时作者信息会是匿名用户,所以还是建议登录后使用。 411 | 412 | 2015.07.22 413 | ---------- 414 | 415 | 尝试修复了最新版bs4导致的问题,虽然我没明白问题在哪QuQ,求测试。 416 | 417 | - Windows 已测试 (`@7sDream `__) 418 | - Linux 419 | 420 | - Ubuntu 已测试(`@7sDream `__) 421 | 422 | - Mac 已测试(`@SimplyY `__) 423 | 424 | 2015.07.16 425 | ---------- 426 | 427 | 重构 Answer 和 Article 的 url 属性为 public. 428 | 429 | 2015.07.11: 430 | ----------- 431 | 432 | Hotfix, 知乎更换了登录网址,做了简单的跟进,过了Test,等待Bug汇报中。 433 | 434 | 2015.06.04: 435 | ------------ 436 | 437 | 由 `@Gracker `__ 补充了在 Ubuntu 14.04 438 | 下的测试结果,并添加了补充说明。 439 | 440 | 2015.05.29: 441 | ------------ 442 | 443 | 修复了当问题关注人数为0时、问题答案数为0时的崩溃问题。(感谢:`@段晓晨 `__) 444 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016 7sDream 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst LICENSE 2 | include test/test*.py 3 | include test/zhihu-test.py 4 | include docs/* 5 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | zhihu-py3 : 知乎非官方API库 with Python3 2 | ======================================== 3 | 4 | |Author| |Build| |DocumentationStatus| |PypiVersion| |License| |PypiDownloadStatus| 5 | 6 | 通知 7 | ---- 8 | 9 | 由于知乎前端老是改阿改的,每次我都要更新弄的我好烦的说…… 10 | 11 | 所以我开发了一个新的项目\ `Zhihu-OAuth `__。 12 | 13 | 这个新项目用了一些黑科技手段,反正应该是更加稳定和快速了!**而且还支持 Python 2 哟!** 14 | 稳定我倒是没测,但是这里有一个 15 | `速度对比 `__。 16 | 17 | 如果你是准备新开一个项目的话,我强烈建议你看看我的新项目~ 18 | 19 | 如果你已经用 Zhihu-py3 写了一些代码的话,我最近会写一个从 Zhihu-py3 转到 Zhihu-OAuth 20 | 的简易指南,你也可以关注一下哟。 21 | 22 | 毕竟嘛,有更好的方案的话,为什么不试试呢? 23 | 24 | 功能 25 | ---- 26 | 27 | 由于知乎没有公开API,加上受到\ `zhihu-python `__\ 项目的启发,在Python3下重新写了一个知乎的数据解析模块。 28 | 29 | 提供的功能一句话概括为,用户提供知乎的网址构用于建对应类的对象,可以获取到某些需要的数据。 30 | 31 | 简单例子: 32 | 33 | .. code:: python 34 | 35 | from zhihu import ZhihuClient 36 | 37 | Cookies_File = 'cookies.json' 38 | 39 | client = ZhihuClient(Cookies_File) 40 | 41 | url = 'http://www.zhihu.com/question/24825703' 42 | question = client.question(url) 43 | 44 | print(question.title) 45 | print(question.answer_num) 46 | print(question.follower_num) 47 | print(question.topics) 48 | 49 | for answer in question.answers: 50 | print(answer.author.name, answer.upvote_num) 51 | 52 | 这段代码的输出为: 53 | 54 | :: 55 | 56 | 关系亲密的人之间要说「谢谢」吗? 57 | 627 58 | 4322 59 | ['心理学', '恋爱', '社会', '礼仪', '亲密关系'] 60 | 龙晓航 50 61 | 小不点儿 198 62 | 芝士就是力量 89 63 | 欧阳忆希 425 64 | ... 65 | 66 | 另外还有\ ``Author(用户)``\ 、\ ``Answer(答案)``\ 、\ ``Collection(收藏夹)``\ 、\ ``Column(专栏)``\ 、\ ``Post(文章)``\ 、\ ``Topic(话题)``\ 等类可以使用,\ ``Answer``,\ ``Post``\ 类提供了\ ``save``\ 方法能将答案或文章保存为HTML或Markdown格式,具体请看文档,或者\ ``zhihu-test.py``\ 。 67 | 68 | 安装 69 | ---- 70 | 71 | .. class:: bold 72 | 73 | 本项目依赖于\ `requests `__\ 、\ `BeautifulSoup4 `__\ 、\ `html2text `__ 74 | 75 | 已将项目发布到pypi,请使用下列命令安装 76 | 77 | .. code:: bash 78 | 79 | (sudo) pip(3) install (--upgrade) zhihu-py3 80 | 81 | 希望开启lxml的话请使用: 82 | 83 | .. code:: bash 84 | 85 | (sudo) pip(3) install (--upgrade) zhihu-py3[lxml] 86 | 87 | 88 | 因为lxml解析html效率高而且容错率强,在知乎使用\ ``
``\ 时,自带的html.parser会将其转换成\ ``
...
``\ ,而lxml则转换为\ ``
``\ ,更为标准且美观,所以推荐使用第二个命令。 89 | 90 | 不安装lxml也能使用本模块,此时会自动使用html.parser作为解析器。 91 | 92 | PS 若在安装lxml时出错,请安装libxml和libxslt后重试: 93 | 94 | .. code:: bash 95 | 96 | sudo apt-get install libxml2 libxml2-dev libxslt1.1 libxslt1-dev 97 | 98 | 准备工作 99 | -------- 100 | 101 | 第一次使用推荐运行以下代码生成 cookies 文件: 102 | 103 | .. code:: python 104 | 105 | from zhihu import ZhihuClient 106 | 107 | ZhihuClient().create_cookies('cookies.json') 108 | 109 | 运行结果 110 | 111 | :: 112 | 113 | ====== zhihu login ===== 114 | email: 115 | password: 116 | please check captcha.gif for captcha 117 | captcha: 118 | ====== logging.... ===== 119 | login successfully 120 | cookies file created. 121 | 122 | 运行成功后会在目录下生成\ ``cookies.json``\ 文件。 123 | 124 | 以下示例皆以登录成功为前提。 125 | 126 | 建议在正式使用之前运行\ ``zhihu-test.py``\ 测试一下。 127 | 128 | 用法实例 129 | -------- 130 | 131 | 为了精简 Readme,本部分移动至文档内。 132 | 133 | 请看文档的「用法示例」部分。 134 | 135 | 登录方法综述 136 | --------------------------------------------- 137 | 138 | 为了精简 Readme,本部分移动至文档内。 139 | 140 | 请看文档的「登录方法综述」部分。 141 | 142 | 文档 143 | ---- 144 | 145 | 终于搞定了文档这个磨人的小妖精,可惜 Sphinx 还是不会用 T^T 146 | 先随意弄成这样吧: 147 | 148 | `Master版文档 `__ 149 | 150 | `Dev版文档 `__ 151 | 152 | 其他 153 | ---- 154 | 155 | **有问题请开Issue,几个小时后无回应可加最后面的QQ群询问。** 156 | 157 | 友链: 158 | 159 | - \ `zhihurss `__\ :一个基于 zhihu-py3 做的跨平台知乎 rss(any user) 的客户端。 160 | 161 | 162 | TODO List 163 | --------- 164 | 165 | - [x] 增加获取用户关注者,用户追随者 166 | - [x] 增加获取答案点赞用户功能 167 | - [x] 获取用户头像地址 168 | - [x] 打包为标准Python模块 169 | - [x] 重构代码,增加\ ``ZhihuClient``\ 类,使类可以自定义cookies文件 170 | - [x] 收藏夹关注者,问题关注者等等 171 | - [x] ``ZhihuClient``\ 增加各种用户操作(比如给某答案点赞) 172 | - [ ] Unittest (因为知乎可能会变,所以这个有点难 173 | - [x] 增加获取用户关注专栏数和关注专栏的功能 174 | - [x] 增加获取用户关注话题数和关注话题的功能 175 | - [x] 评论类也要慢慢提上议程了吧 176 | 177 | 联系我 178 | ------ 179 | 180 | Github:\ `@7sDream `__ 181 | 182 | 知乎:\ `@7sDream `__ 183 | 184 | 新浪微博:\ `@Dilover `__ 185 | 186 | 邮箱:\ `给我发邮件 `__ 187 | 188 | 编程交流群:478786205 189 | 190 | .. |Author| image:: https://img.shields.io/badge/Author-7sDream-blue.svg 191 | :target: https://github.com/7sDream 192 | .. |DocumentationStatus| image:: https://readthedocs.org/projects/zhihu-py3/badge/?version=latest 193 | :target: https://readthedocs.org/projects/zhihu-py3/?badge=latest 194 | .. |PypiVersion| image:: https://img.shields.io/pypi/v/zhihu-py3.svg 195 | :target: https://pypi.python.org/pypi/zhihu-py3 196 | .. |PypiDownloadStatus| image:: https://img.shields.io/pypi/dd/zhihu-py3.svg 197 | :target: https://pypi.python.org/pypi/zhihu-py3 198 | .. |License| image:: https://img.shields.io/pypi/l/zhihu-py3.svg 199 | :target: https://github.com/7sDream/zhihu-py3/blob/master/LICENSE 200 | .. |Build| image:: https://travis-ci.org/7sDream/zhihu-py3.svg?branch=dev 201 | :target: https://travis-ci.org/7sDream/zhihu-py3 202 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/zhihu-py3.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zhihu-py3.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/zhihu-py3" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/zhihu-py3" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/activity.rst: -------------------------------------------------------------------------------- 1 | Activity and ActType 用户动态类 2 | =============================== 3 | 4 | .. autoclass:: zhihu.activity.Activity 5 | :members: 6 | :special-members: __init__ 7 | 8 | .. autoclass:: zhihu.acttype.ActType 9 | :members: 10 | :special-members: __init__ 11 | -------------------------------------------------------------------------------- /docs/answer.rst: -------------------------------------------------------------------------------- 1 | Answer 答案类 2 | ============= 3 | 4 | .. autoclass:: zhihu.answer.Answer 5 | :members: 6 | :special-members: __init__ 7 | 8 | -------------------------------------------------------------------------------- /docs/author.rst: -------------------------------------------------------------------------------- 1 | Author 用户类 2 | ============= 3 | 4 | .. autoclass:: zhihu.author.Author 5 | :members: 6 | :special-members: __init__ 7 | 8 | .. autodata:: zhihu.author.ANONYMOUS 9 | :annotation: 10 | -------------------------------------------------------------------------------- /docs/classes.rst: -------------------------------------------------------------------------------- 1 | 知乎相关类文档 2 | ============== 3 | 4 | .. toctree:: 5 | 6 | client 7 | activity 8 | answer 9 | author 10 | collection 11 | column 12 | comment 13 | me 14 | post 15 | question 16 | topic 17 | -------------------------------------------------------------------------------- /docs/client.rst: -------------------------------------------------------------------------------- 1 | ZhihuClient 知乎客户端类 2 | ======================== 3 | 4 | .. autoclass:: zhihu.client.ZhihuClient 5 | :members: 6 | :special-members: __init__, __getattr__ 7 | -------------------------------------------------------------------------------- /docs/collection.rst: -------------------------------------------------------------------------------- 1 | Collection 收藏夹类 2 | =================== 3 | 4 | .. autoclass:: zhihu.collection.Collection 5 | :members: 6 | :special-members: __init__ 7 | 8 | .. autoclass:: zhihu.collection.CollectActivity 9 | :members: 10 | :special-members: __init__ 11 | 12 | .. autoclass:: zhihu.acttype.CollectActType 13 | :members: 14 | -------------------------------------------------------------------------------- /docs/column.rst: -------------------------------------------------------------------------------- 1 | Columu 专栏类 2 | ============= 3 | 4 | .. autoclass:: zhihu.column.Column 5 | :members: 6 | :special-members: __init__ 7 | -------------------------------------------------------------------------------- /docs/comment.rst: -------------------------------------------------------------------------------- 1 | Comment 评论类 2 | =============== 3 | 4 | .. autoclass:: zhihu.comment.Comment 5 | :members: 6 | :special-members: __init__ 7 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # zhihu-py3 documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Feb 22 23:01:19 2015. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('..')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.viewcode', 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix of source filenames. 41 | source_suffix = '.rst' 42 | 43 | # The encoding of source files. 44 | #source_encoding = 'utf-8-sig' 45 | 46 | # The master toctree document. 47 | master_doc = 'index' 48 | 49 | # General information about the project. 50 | project = 'zhihu-py3' 51 | copyright = '2015, 7sDream' 52 | 53 | # The version info for the project you're documenting, acts as replacement for 54 | # |version| and |release|, also used in various other places throughout the 55 | # built documents. 56 | # 57 | # The short X.Y version. 58 | version = '0.1' 59 | # The full version, including alpha/beta/rc tags. 60 | release = '0.1' 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | #language = None 65 | 66 | # There are two options for replacing |today|: either, you set today to some 67 | # non-false value, then it is used: 68 | #today = '' 69 | # Else, today_fmt is used as the format for a strftime call. 70 | #today_fmt = '%B %d, %Y' 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | exclude_patterns = ['_build'] 75 | 76 | # The reST default role (used for this markup: `text`) to use for all 77 | # documents. 78 | #default_role = None 79 | 80 | # If true, '()' will be appended to :func: etc. cross-reference text. 81 | #add_function_parentheses = True 82 | 83 | # If true, the current module name will be prepended to all description 84 | # unit titles (such as .. function::). 85 | #add_module_names = True 86 | 87 | # If true, sectionauthor and moduleauthor directives will be shown in the 88 | # output. They are ignored by default. 89 | #show_authors = False 90 | 91 | # The name of the Pygments (syntax highlighting) style to use. 92 | pygments_style = 'sphinx' 93 | 94 | # A list of ignored prefixes for module index sorting. 95 | #modindex_common_prefix = [] 96 | 97 | # If true, keep warnings as "system message" paragraphs in the built documents. 98 | #keep_warnings = False 99 | 100 | 101 | # -- Options for HTML output ---------------------------------------------- 102 | 103 | # The theme to use for HTML and HTML Help pages. See the documentation for 104 | # a list of builtin themes. 105 | # on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org 106 | 107 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 108 | 109 | if not on_rtd: # only import and set the theme if we're building docs locally 110 | import sphinx_rtd_theme 111 | html_theme = 'sphinx_rtd_theme' 112 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 113 | else: 114 | html_theme = 'default' 115 | 116 | # Theme options are theme-specific and customize the look and feel of a theme 117 | # further. For a list of options available for each theme, see the 118 | # documentation. 119 | #html_theme_options = {} 120 | 121 | # Add any paths that contain custom themes here, relative to this directory. 122 | #html_theme_path = [] 123 | 124 | # The name for this set of Sphinx documents. If None, it defaults to 125 | # " v documentation". 126 | #html_title = None 127 | 128 | # A shorter title for the navigation bar. Default is the same as html_title. 129 | #html_short_title = None 130 | 131 | # The name of an image file (relative to this directory) to place at the top 132 | # of the sidebar. 133 | #html_logo = None 134 | 135 | # The name of an image file (within the static path) to use as favicon of the 136 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 137 | # pixels large. 138 | #html_favicon = None 139 | 140 | # Add any paths that contain custom static files (such as style sheets) here, 141 | # relative to this directory. They are copied after the builtin static files, 142 | # so a file named "default.css" will overwrite the builtin "default.css". 143 | html_static_path = ['_static'] 144 | 145 | # Add any extra paths that contain custom files (such as robots.txt or 146 | # .htaccess) here, relative to this directory. These files are copied 147 | # directly to the root of the documentation. 148 | #html_extra_path = [] 149 | 150 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 151 | # using the given strftime format. 152 | #html_last_updated_fmt = '%b %d, %Y' 153 | 154 | # If true, SmartyPants will be used to convert quotes and dashes to 155 | # typographically correct entities. 156 | #html_use_smartypants = True 157 | 158 | # Custom sidebar templates, maps document names to template names. 159 | #html_sidebars = {} 160 | 161 | # Additional templates that should be rendered to pages, maps page names to 162 | # template names. 163 | #html_additional_pages = {} 164 | 165 | # If false, no module index is generated. 166 | #html_domain_indices = True 167 | 168 | # If false, no index is generated. 169 | #html_use_index = True 170 | 171 | # If true, the index is split into individual pages for each letter. 172 | #html_split_index = False 173 | 174 | # If true, links to the reST sources are added to the pages. 175 | #html_show_sourcelink = True 176 | 177 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 178 | #html_show_sphinx = True 179 | 180 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 181 | #html_show_copyright = True 182 | 183 | # If true, an OpenSearch description file will be output, and all pages will 184 | # contain a tag referring to it. The value of this option must be the 185 | # base URL from which the finished HTML is served. 186 | #html_use_opensearch = '' 187 | 188 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 189 | #html_file_suffix = None 190 | 191 | # Output file base name for HTML help builder. 192 | htmlhelp_basename = 'zhihu-py3doc' 193 | 194 | 195 | # -- Options for LaTeX output --------------------------------------------- 196 | 197 | latex_elements = { 198 | # The paper size ('letterpaper' or 'a4paper'). 199 | #'papersize': 'letterpaper', 200 | 201 | # The font size ('10pt', '11pt' or '12pt'). 202 | #'pointsize': '10pt', 203 | 204 | # Additional stuff for the LaTeX preamble. 205 | #'preamble': '', 206 | } 207 | 208 | # Grouping the document tree into LaTeX files. List of tuples 209 | # (source start file, target name, title, 210 | # author, documentclass [howto, manual, or own class]). 211 | latex_documents = [ 212 | ('index', 'zhihu-py3.tex', 'zhihu-py3 Documentation', 213 | '7sDream', 'manual'), 214 | ] 215 | 216 | # The name of an image file (relative to this directory) to place at the top of 217 | # the title page. 218 | #latex_logo = None 219 | 220 | # For "manual" documents, if this is true, then toplevel headings are parts, 221 | # not chapters. 222 | #latex_use_parts = False 223 | 224 | # If true, show page references after internal links. 225 | #latex_show_pagerefs = False 226 | 227 | # If true, show URL addresses after external links. 228 | #latex_show_urls = False 229 | 230 | # Documents to append as an appendix to all manuals. 231 | #latex_appendices = [] 232 | 233 | # If false, no module index is generated. 234 | #latex_domain_indices = True 235 | 236 | 237 | # -- Options for manual page output --------------------------------------- 238 | 239 | # One entry per manual page. List of tuples 240 | # (source start file, name, description, authors, manual section). 241 | man_pages = [ 242 | ('index', 'zhihu-py3', 'zhihu-py3 Documentation', 243 | ['7sDream'], 1) 244 | ] 245 | 246 | # If true, show URL addresses after external links. 247 | #man_show_urls = False 248 | 249 | 250 | # -- Options for Texinfo output ------------------------------------------- 251 | 252 | # Grouping the document tree into Texinfo files. List of tuples 253 | # (source start file, target name, title, author, 254 | # dir menu entry, description, category) 255 | texinfo_documents = [ 256 | ('index', 'zhihu-py3', 'zhihu-py3 Documentation', 257 | '7sDream', 'zhihu-py3', 'One line description of project.', 258 | 'Miscellaneous'), 259 | ] 260 | 261 | # Documents to append as an appendix to all manuals. 262 | #texinfo_appendices = [] 263 | 264 | # If false, no module index is generated. 265 | #texinfo_domain_indices = True 266 | 267 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 268 | #texinfo_show_urls = 'footnote' 269 | 270 | # If true, do not generate a @detailmenu in the "Top" node's menu. 271 | #texinfo_no_detailmenu = False 272 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | 用法示例 3 | ======== 4 | 5 | .. contents:: 目录 6 | :local: 7 | 8 | 9 | 获取某用户的基本信息 10 | ==================== 11 | 12 | .. code-block:: python 13 | :linenos: 14 | 15 | from zhihu import ZhihuClient 16 | 17 | Cookies_File = 'cookies.json' 18 | 19 | client = ZhihuClient(Cookies_File) 20 | 21 | url = 'http://www.zhihu.com/people/excited-vczh' 22 | author = client.author(url) 23 | 24 | print('用户名 %s' % author.name) 25 | print('用户简介 %s' % author.motto) 26 | print('用户关注人数 %d' % author.followee_num) 27 | print('取用户粉丝数 %d' % author.follower_num) 28 | print('用户得到赞同数 %d' % author.upvote_num) 29 | print('用户得到感谢数 %d' % author.thank_num) 30 | print('用户提问数 %d' % author.question_num) 31 | print('用户答题数 %d' % author.answer_num) 32 | 33 | print('用户专栏文章数 %d,名称分别为:' % author.post_num) 34 | for column in author.columns: 35 | print(column.name) 36 | print('用户收藏夹数 %d,名称分别为:' % author.collection_num) 37 | for collection in author.collections: 38 | print(collection.name) 39 | 40 | .. code-block:: none 41 | :linenos: 42 | 43 | 用户名 vczh 44 | 用户简介 专业造轮子 https://github.com/vczh-libraries 45 | 用户关注人数 1339 46 | 取用户粉丝数 128100 47 | 用户得到赞同数 320326 48 | 用户得到感谢数 43045 49 | 用户提问数 238 50 | 用户答题数 8392 51 | 用户专栏文章数 25,名称分别为: 52 | vczh的日常 53 | 深井冰 IT 评论 54 | 编程语言与高级语言虚拟机杂谈(仮) 55 | 蓝色小药丸 56 | 用户收藏夹数 1,名称分别为: 57 | 李老师牛逼的答案 58 | 59 | 备份某问题所有答案 60 | ================== 61 | .. code-block:: python 62 | :linenos: 63 | 64 | question = client.question('http://www.zhihu.com/question/28092572') 65 | for answer in question.answers: 66 | answer.save() 67 | 68 | 会在当前目录下新建以问题标题命名的文件夹,并将所有html文件保存到该文件夹。 69 | 70 | .. code-block:: python 71 | 72 | answer.save(mode="md") 73 | 74 | 会保存为markdown格式。 75 | 76 | 备份某用户所有答案 77 | ================== 78 | 79 | .. code-block:: python 80 | :linenos: 81 | 82 | author = client.author('http://www.zhihu.com/people/7sdream') 83 | for answer in author.answers: 84 | answer.save(filepath=author.name) 85 | 86 | 备份某收藏夹所有答案,备份专栏文章同理,不再举例。 87 | 88 | 获取某用户点赞的动态 89 | ==================== 90 | 91 | .. code-block:: python 92 | :linenos: 93 | 94 | author = zhihu.author('http://www.zhihu.com/people/excited-vczh') 95 | for act in author.activities: 96 | if act.type == zhihu.ActType.UPVOTE_ANSWER: 97 | print('%s 在 %s 赞同了问题 %s 中 %s(motto: %s) 的回答, ' 98 | '此回答赞同数 %d' % 99 | (author.name, act.time, act.answer.question.title, 100 | act.answer.author.name, act.answer.author.motto, 101 | act.answer.upvote_num)) 102 | 103 | .. code-block:: none 104 | 105 | vczh 在 2015-07-24 08:35:06 赞同了问题 女生夏天穿超短裙是一种什么样的体验? 中 Light(motto: 我城故事多。) 的回答, 此回答赞同数 43 106 | vczh 在 2015-07-24 08:34:30 赞同了问题 女生夏天穿超短裙是一种什么样的体验? 中 Ms狐狸(motto: 随便写来玩玩) 的回答, 此回答赞同数 57 107 | …… 108 | 109 | 获取用户关注的人和关注此用户的人 110 | ================================ 111 | 112 | .. code-block:: python 113 | :linenos: 114 | 115 | author = client.author('http://www.zhihu.com/people/7sdream') 116 | 117 | print('--- Followers ---') 118 | for follower in author.followers: 119 | print(follower.name) 120 | 121 | print('--- Followees ---') 122 | for followee in author.followees: 123 | print(followee.name) 124 | 125 | .. code-block:: none 126 | 127 | --- Followers --- 128 | yuwei 129 | falling 130 | 周非 131 | ... 132 | --- Followees --- 133 | yuwei 134 | falling 135 | 伍声 136 | ... 137 | 138 | 计算某答案点赞中三零用户比例 139 | ============================ 140 | 141 | .. code-block:: python 142 | :linenos: 143 | 144 | url = 'http://www.zhihu.com/question/30404450/answer/47939822' 145 | answer = client.answer(url) 146 | 147 | three_zero_user_num = 0 148 | 149 | for upvoter in answer.upvoters: 150 | print(upvoter.name, upvoter.upvote_num, upvoter.thank_num, 151 | upvoter.question_num, upvoter.answer_num) 152 | if upvoter.is_zero_user(): 153 | three_zero_user_num += 1 154 | 155 | print('\n三零用户比例 %.3f%%' % (three_zero_user_num / answer.upvote_num * 100)) 156 | 157 | .. code-block:: none 158 | 159 | ... 160 | 宋飞 0 0 0 0 161 | 唐吃藕 10 0 0 5 162 | 163 | 三零用户比例 26.852% 164 | 165 | 爬取某用户关注的人的头像 166 | ======================== 167 | 168 | .. code-block:: python 169 | 170 | import requests 171 | import os 172 | import imghdr 173 | 174 | author = client.author('http://www.zhihu.com/people/excited-vczh') 175 | 176 | os.mkdir('vczh') 177 | for followee in author.followees: 178 | try: 179 | filename = followee.name + ' - ' + followee.id + '.jpeg' 180 | print(filename) 181 | with open('vczh/' + filename, 'wb') as f: 182 | f.write(requests.get(followee.photo_url).content) 183 | except KeyboardInterrupt: 184 | break 185 | 186 | for root, dirs, files in os.walk('vczh'): 187 | for filename in files: 188 | filename = os.path.join(root, filename) 189 | img_type = imghdr.what(filename) 190 | if img_type != 'jpeg' and img_type is not None: 191 | print(filename, '--->', img_type) 192 | os.rename(filename, filename[:-4] + img_type) 193 | 194 | 效果见 `这里 195 | `_。 196 | 197 | 198 | 使用非阻塞的网络请求 199 | ==================== 200 | 201 | 内建的所有请求都是阻塞的, 如果你希望使用其他的网络请求方法, 你可以把请求到的数据传入相关类的 `from_html` 方法中. 202 | `from_html` 方法用于接受数据, 返回相应的类的实例. 203 | 204 | 这里以使用 aiohttp 为例, 使用的是 python3.5 之后引入的语法. 无需置疑, 你要自己处理 session 205 | 206 | 比如要获取一个答案. 207 | 208 | .. code-block:: python 209 | 210 | import aiohttp 211 | import asyncio 212 | import zhihu 213 | 214 | 215 | async def get_answer(url, cookies, headers): 216 | async with aiohttp.get(url, cookies=cookies, headers=headers) as r: 217 | data = await r.text() 218 | 219 | # from_html 是 classmethod 220 | answer = zhihu.Answer.from_html(data) 221 | 222 | print(answer.content) 223 | 224 | url = 'answer url' 225 | cookies = dict(client._session.cookies) 226 | headers = client._session.headers 227 | 228 | loop = asyncio.get_event_loop() 229 | loop.run_until_complete(get_answer(url, cookies, headers)) 230 | 231 | 232 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. zhihu-py3 documentation master file, created by 2 | sphinx-quickstart on Sun Feb 22 23:01:19 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to zhihu-py3's documentation! 7 | ===================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | install 15 | examples 16 | login 17 | classes 18 | 19 | Indices and tables 20 | ================== 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | 安装和测试 3 | ========== 4 | 5 | 6 | pip安装(推荐) 7 | =============== 8 | 9 | .. code-block:: bash 10 | 11 | (sudo) pip(3) install (--upgrade) zhihu-py3 12 | 13 | 如果想同时安装lxml,获得更快的解析速度、容错率和美观程度,请开启lxml feature: 14 | 15 | .. code-block:: bash 16 | 17 | (sudo) pip(3) install (--upgrade) zhihu-py3[lxml] 18 | 19 | 20 | 源码安装 21 | ======== 22 | 依赖于beautifulsoup4、requests、html2text,会自动安装。 23 | 24 | .. code-block:: bash 25 | 26 | git clone https://github.com/7sDream/zhihu-py3.git 27 | cd zhihu-py3 28 | python(3) setup.py install 29 | 30 | 31 | 测试 32 | ==== 33 | 34 | 若是使用源码安装,则安装完成后可以进行一下测试 35 | 36 | .. code-block:: bash 37 | 38 | cd test 39 | python(3) zhihu-test.py 40 | -------------------------------------------------------------------------------- /docs/login.rst: -------------------------------------------------------------------------------- 1 | 登录方法综述: 2 | ============== 3 | 4 | create\_cookies 5 | ~~~~~~~~~~~~~~~ 6 | 7 | 用于生成 cookies,用法见前面的介绍。 8 | 9 | login\_with\_cookies 10 | ~~~~~~~~~~~~~~~~~~~~ 11 | 12 | 用cookies字符串或文件名登录,\ ``ZhihuClient``\ 的构造函数就是使用这个方法。 13 | 14 | get\_captcha 15 | ~~~~~~~~~~~~ 16 | 17 | 获取验证码数据(bytes二进制数据),当用于其他项目时方便手动获取验证码图片数据进行处理,比如显示在控件内。 18 | 19 | login 20 | ~~~~~ 21 | 22 | 手动登陆方法,用于其他项目中方便手动无需 cookies 登陆,参数为: 23 | 24 | - email 25 | - password 26 | - captcha 27 | 28 | 返回值有三个 29 | 30 | - code:成功为0,失败为1 31 | - msg:错误消息,字符串格式,成功为空 32 | - cookies:cookies数据,字符串格式,失败为空 33 | 34 | login\_in\_terminal 35 | ~~~~~~~~~~~~~~~~~~~ 36 | 37 | 跟着提示在终端里登录知乎,返回cookies字符串,create\_cookies就是帮你做了将这个函数的返回值保存下来的工作而已。 38 | 39 | 综上 40 | ~~~~ 41 | 42 | 如果你只是写个小脚本测试玩玩,可以使用: 43 | 44 | .. code-block:: python 45 | 46 | from zhihu import ZhihuClient 47 | client = ZhihuClient() 48 | client.login_in_terminal() 49 | 50 | # do thing you want with client 51 | 52 | 如果你的脚本不是大项目,又要多次运行,可以先按照上文方法create\_cookies,再使用: 53 | 54 | .. code-block:: python 55 | 56 | from zhihu import ZhihuClient 57 | Cookies_File = 'cookies.json' 58 | client = ZhihuClient(Cookies_File) 59 | 60 | 如果项目比较大(以GUI项目为例),可以在判断出是首次使用(没有cookies文件)时,弹出登录对话框,使用get\_captcha获取验证码数据,再调用login函数手动登录并在登录成功后保存cookies文件: 61 | 62 | .. code-block:: python 63 | 64 | import os 65 | from zhihu import ZhihuClient 66 | 67 | Cookies_File = 'config/cookies.json' 68 | 69 | client = ZhihuClient() 70 | 71 | def on_window_show() 72 | login_btn.disable() 73 | if os.path.isfile(Cookies_File) is False: 74 | captcha_imgbox.setData(client.get_captcha()) 75 | login_btn.enable() 76 | else: 77 | with open(Cookies_File) as f 78 | client.login_with_cookies(f.read()) 79 | # turn to main window 80 | 81 | def on_login_button_clicked(): 82 | login_btn.disable() 83 | email = email_edit.get_text() 84 | password = password_edit.get_text() 85 | captcha = captcha_edit.get_text() 86 | code, msg, cookies = client.login(email, password, captcha) 87 | if code == 0: 88 | with open(Cookies_File, 'w') as f 89 | f.write(cookies) 90 | # turn to main window 91 | else: 92 | msgbox(msg) 93 | login_btn.enable() 94 | 95 | 注:以上和GUI有关的代码皆为我乱想出来的,仅作示例之用。 96 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\zhihu-py3.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\zhihu-py3.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/me.rst: -------------------------------------------------------------------------------- 1 | Me 用户操作类 2 | =============== 3 | 4 | **敬告:本类提供的点赞,反对功能,请在使用前三思,并且绝对不要用于批量点赞,批量反对等不甚道德的脚本。和谐知乎,你我共建,谢谢理解。** 5 | 6 | .. autoclass:: zhihu.me.Me 7 | :members: 8 | :special-members: __init__ 9 | -------------------------------------------------------------------------------- /docs/post.rst: -------------------------------------------------------------------------------- 1 | Post 专栏文章类 2 | =============== 3 | 4 | .. autoclass:: zhihu.post.Post 5 | :members: 6 | :special-members: __init__ 7 | -------------------------------------------------------------------------------- /docs/question.rst: -------------------------------------------------------------------------------- 1 | Question 问题类 2 | =============== 3 | 4 | .. autoclass:: zhihu.question.Question 5 | :members: 6 | :special-members: __init__ 7 | 8 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | beautifulsoup4 -------------------------------------------------------------------------------- /docs/topic.rst: -------------------------------------------------------------------------------- 1 | Topic 话题类 2 | ============ 3 | 4 | .. autoclass:: zhihu.topic.Topic 5 | :members: 6 | :special-members: __init__ 7 | -------------------------------------------------------------------------------- /example/analyze_user.py: -------------------------------------------------------------------------------- 1 | """ 2 | What's this: 3 | | this is an Example of zhihu-py3 to analyze is user bought some fans. 4 | 5 | Usage: 6 | | 1. copy your cookies file to the dir where me located 7 | | 2. change USER_URL at line 12 to the user's home page url. 8 | | 3. var FOLLOWER_CHECK_MAX_NUM defined how many newest follower will be checked. 9 | | 4. var ANSWER_CHECK_MAX_NUM defined how many newest answer of user will be checked. 10 | | 5. just run me. 11 | 12 | Info: 13 | | if FOLLOWER_CHECK_MAX_NUM big than user's follower amount, it will be auto set to user's follower amount. 14 | 15 | Author: 16 | | 7sDream @ 2015.12.19. 17 | """ 18 | 19 | from zhihu import ZhihuClient 20 | import datetime 21 | 22 | # ============================== 23 | 24 | USER_URL = "https://www.zhihu.com/people/7sdream" 25 | 26 | FOLLOWER_CHECK_MAX_NUM = 2000 27 | ANSWER_CHECK_MAX_NUM = 20 28 | 29 | # ============================== 30 | 31 | 32 | def is_zero_user(author): 33 | return (author.upvote_num + author.question_num + author.answer_num) <= 3 34 | 35 | 36 | client = ZhihuClient('test.json') 37 | 38 | user = client.author(USER_URL) 39 | 40 | print("检查用户{user.name} at {time}".format(user=user, time=datetime.datetime.now())) 41 | 42 | if user.follower_num < FOLLOWER_CHECK_MAX_NUM: 43 | FOLLOWER_CHECK_MAX_NUM = user.follower_num 44 | 45 | print("正在检查前{FOLLOWER_CHECK_MAX_NUM}个关注者....".format(**locals())) 46 | 47 | zeros = 0 48 | for _, follower in zip(range(FOLLOWER_CHECK_MAX_NUM), user.followers): 49 | if is_zero_user(follower): 50 | zeros += 1 51 | 52 | rate = zeros / FOLLOWER_CHECK_MAX_NUM 53 | print("{user.name}最近{FOLLOWER_CHECK_MAX_NUM}个关注者中,三无用户{zeros}个,占比{rate:.2%}".format(**locals())) 54 | 55 | print("正在检查用户答案点赞者...") 56 | 57 | for _, ans in zip(range(ANSWER_CHECK_MAX_NUM), user.answers): 58 | zeros = 0 59 | for upvoter in ans.upvoters: 60 | if is_zero_user(upvoter): 61 | zeros += 1 62 | rate = zeros / ans.upvote_num if ans.upvote_num != 0 else 0 63 | print("在问题「{ans.question.title}」{user.name}的答案中,共有{ans.upvote_num}个点赞用户,其中三无用户{zeros}个,三无用户比率{rate:.2%}。".format(**locals())) 64 | -------------------------------------------------------------------------------- /example/test.json: -------------------------------------------------------------------------------- 1 | {"unlock_ticket": "\"QUJETWRxZ3ZKQWtYQUFBQVlRSlZUUU9qYWxiVC1ENXU2WVhCejJDTlpFQ2FwQXBSdFpsaWxBPT0=|1449827323|4a3dc8a8f1add7d68f446d799ec86f192d9a4e83\"", "z_c0": "\"QUJETWRxZ3ZKQWtYQUFBQVlRSlZUZnNva2xhMEdldWRoVzVXZU1xUkEwd0VFNENxb0dXUHpnPT0=|1449827323|a2ecf0642368c1b43368bf83d5f2cc1ccbf6649a\"", "_xsrf": "1452764141227ef365f2d0c7695c0457", "cap_id": "\"NjY3ZmUxNGVlZDE2NGNmNGFlY2QzOTAzNTU0ZDVlOWI=|1449827304|723c630cd1c47d17fae91e416bbf522f6d2d31b0\"", "q_c1": "d5271065cdea4b029dfae6debf6e7832|1449827304000|1449827304000"} -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | python-tag=py3 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import unicode_literals 5 | 6 | import re 7 | import ast 8 | 9 | try: 10 | from setuptools import setup 11 | except ImportError: 12 | from distutils.core import setup 13 | 14 | 15 | def extract_version(): 16 | with open('zhihu/__init__.py', 'rb') as f_version: 17 | ast_tree = re.search( 18 | r'__version__ = (.*)', 19 | f_version.read().decode('utf-8') 20 | ).group(1) 21 | if ast_tree is None: 22 | raise RuntimeError('Cannot find version information') 23 | return str(ast.literal_eval(ast_tree)) 24 | 25 | 26 | with open('README.rst', 'rb') as f_readme: 27 | readme = f_readme.read().decode('utf-8') 28 | 29 | packages = ['zhihu'] 30 | 31 | version = extract_version() 32 | 33 | setup( 34 | name='zhihu-py3', 35 | version=version, 36 | keywords=['zhihu', 'network', 'spider', 'html'], 37 | description='Zhihu UNOFFICIAL API library in python3, ' 38 | 'with help of bs4, lxml, requests and html2text.', 39 | long_description=readme, 40 | 41 | author='7sDream', 42 | author_email='didislover@gmail.com', 43 | license='MIT', 44 | 45 | url='https://github.com/7sDream/zhihu-py3', 46 | download_url='https://github.com/7sDream/zhihu-py3', 47 | 48 | install_requires=[ 49 | 'beautifulsoup4', 50 | 'requests', 51 | 'html2text' 52 | ], 53 | extras_require={ 54 | 'lxml': ['lxml'] 55 | }, 56 | packages=packages, 57 | 58 | classifiers=[ 59 | 'Development Status :: 3 - Alpha', 60 | 'Environment :: Web Environment', 61 | 'Intended Audience :: Developers', 62 | 'License :: OSI Approved :: MIT License', 63 | 'Operating System :: OS Independent', 64 | 'Programming Language :: Python :: 3', 65 | 'Topic :: Internet :: WWW/HTTP', 66 | 'Topic :: Software Development :: Libraries :: Python Modules' 67 | ] 68 | ) 69 | -------------------------------------------------------------------------------- /test/data/answer.md: -------------------------------------------------------------------------------- 1 | 多谢刘柯的邀请!这也是个有意思的提问。 2 | 3 | 说说家庭文化吧。 4 | 5 | 家庭文化是一个家庭世代传承过程中形成和发展起来的 ** 较为稳定的生活方式、生活作风、传统习惯、家庭道德规范以及为人处世之道等等。 ** 6 | 7 | 家庭是一个人生存的最早的文化环境,家庭的价值观是家庭文化的核心,有教育的功能。一个家庭中的父母扮演的就是施教者的角色,父母的价值观和文化素养将会对孩子的成长形成决定性的影响。 8 | 9 | 家庭文化对孩子的影响体现在意识形态的影响和行为规范方面。中国的传统家庭文化非常注重道德教育,强调每个人对家庭的责任和义务,要敬老爱幼等等。 10 | 11 | 象题主的家庭教育就很传统,父母教育题主在接受他人帮助的时候及时地道谢,非常讲究礼数, ** 是很符合中国人“克己复礼”的行事风格的。 ** 12 | 13 | 东方的文化,是很强调做人的道理的,题主在父母教育下 ** 秉持“以德报人、以诚相待”的处世原则 ** ,怎么可能是“错”的呢? 14 | 15 | 题主所提到的室友、朋友、男朋友等, ** 由于他们每个人也都有自己的家庭文化,也有自己的为人处世的信条和习惯,所以不尽和题主相同,所以觉得题主见外,也是有可能的,但并不真正成为一种交际上的冲突,他们只是没有去习惯你的家庭文化给你带来的生活交际风格。 ** 16 | 17 | 不同的生活习惯,没有错与对,没有好与坏,只有是否理解和接受,是不是? 18 | 19 | 一个社会的基本伦理是一个人的言行不影响他人的感受和利益,你所做的事是及时地向帮助自己的人道谢,你为周围的人体现了来自你的家庭文化,这样的文化绝对是讨喜的,而不会让人厌烦。 20 | 21 | 相熟的人也许会觉得你说谢谢太过客气,如上所述,也许是他们的家庭文化相对比较粗放,并不太在意一定要你道谢或者回报,这只是家庭文化上人际交往过程中的一个非常小的侧面的体现,应该不会给你造成心理困扰。 22 | 23 | 人是人际关系的动物,必须生活在人际关系层面,无论你愿意不愿意,都得遵循文化规则来呈现自己,中国人是很推崇中庸之道的,人际关系和谐,内心秩序井然,人才能在宽松自在的感觉中驾驭生活、平衡自我。 24 | 25 | 所以无论怎么说,你在日常生活中呈现自己的文化色彩,都是受人欢迎的, ** 谁会真正的去埋怨一个懂礼貌的懂得与人为善的人呢? ** 26 | 27 | 你只需要去区分一件事,分清你的三个社交等级: ** 谁和你是亲密关系,谁和你是朋友关系,谁和你是一般关系。 ** 你根据这三层关系的划分,可以稍微的让自己不那么恪守自己的家庭文化规则,在亲密的人、相熟的人面前不拘小节, ** 偶而允许自己也不那么的象平时的自己 ** ,这样的体验也不错啊! 28 | 29 | 再进一步的说,那些说不习惯你常常客气的人,不见得在帮助你之后,真的可以不需要你道谢,亲密的人往往帮的忙都是大忙,都是劳心费神的,你一句“谢谢你”就能让他们感觉为你做事很值,而不是在做苦力。如果费了九牛二虎之力,一句春风拂面的话都听不到,久而久之,还有什么动力为你做事呢? 30 | 31 | 人对他人的期待总是潜伏着的,如果要学为人处世,就要学彻底,受人恩惠与帮助,多说谢谢,让他人心理也能得到平衡,这种做法是没错的。 32 | 33 | 其实 ** ,更应该对亲密的人说谢谢,感谢他的爱,感谢他的无私奉献,感谢他的一路相伴,这个世界上,亲密的关系都是互爱互惠互助建构起来的。 ** 34 | 35 | 36 | 你会坚持与人为善、以诚待人的,对吧! 37 | -------------------------------------------------------------------------------- /test/data/answer_content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
7 | 多谢刘柯的邀请!这也是个有意思的提问。 8 |
9 |
10 | 说说家庭文化吧。 11 |
12 |
13 | 家庭文化是一个家庭世代传承过程中形成和发展起来的 14 | 15 | 较为稳定的生活方式、生活作风、传统习惯、家庭道德规范以及为人处世之道等等。 16 | 17 |
18 |
19 | 家庭是一个人生存的最早的文化环境,家庭的价值观是家庭文化的核心,有教育的功能。一个家庭中的父母扮演的就是施教者的角色,父母的价值观和文化素养将会对孩子的成长形成决定性的影响。 20 |
21 |
22 | 家庭文化对孩子的影响体现在意识形态的影响和行为规范方面。中国的传统家庭文化非常注重道德教育,强调每个人对家庭的责任和义务,要敬老爱幼等等。 23 |
24 |
25 | 象题主的家庭教育就很传统,父母教育题主在接受他人帮助的时候及时地道谢,非常讲究礼数, 26 | 27 | 是很符合中国人“克己复礼”的行事风格的。 28 | 29 |
30 |
31 | 东方的文化,是很强调做人的道理的,题主在父母教育下 32 | 33 | 秉持“以德报人、以诚相待”的处世原则 34 | 35 | ,怎么可能是“错”的呢? 36 |
37 |
38 | 题主所提到的室友、朋友、男朋友等, 39 | 40 | 由于他们每个人也都有自己的家庭文化,也有自己的为人处世的信条和习惯,所以不尽和题主相同,所以觉得题主见外,也是有可能的,但并不真正成为一种交际上的冲突,他们只是没有去习惯你的家庭文化给你带来的生活交际风格。 41 | 42 |
43 |
44 | 不同的生活习惯,没有错与对,没有好与坏,只有是否理解和接受,是不是? 45 |
46 |
47 | 一个社会的基本伦理是一个人的言行不影响他人的感受和利益,你所做的事是及时地向帮助自己的人道谢,你为周围的人体现了来自你的家庭文化,这样的文化绝对是讨喜的,而不会让人厌烦。 48 |
49 |
50 | 相熟的人也许会觉得你说谢谢太过客气,如上所述,也许是他们的家庭文化相对比较粗放,并不太在意一定要你道谢或者回报,这只是家庭文化上人际交往过程中的一个非常小的侧面的体现,应该不会给你造成心理困扰。 51 |
52 |
53 | 人是人际关系的动物,必须生活在人际关系层面,无论你愿意不愿意,都得遵循文化规则来呈现自己,中国人是很推崇中庸之道的,人际关系和谐,内心秩序井然,人才能在宽松自在的感觉中驾驭生活、平衡自我。 54 |
55 |
56 | 所以无论怎么说,你在日常生活中呈现自己的文化色彩,都是受人欢迎的, 57 | 58 | 谁会真正的去埋怨一个懂礼貌的懂得与人为善的人呢? 59 | 60 |
61 |
62 | 你只需要去区分一件事,分清你的三个社交等级: 63 | 64 | 谁和你是亲密关系,谁和你是朋友关系,谁和你是一般关系。 65 | 66 | 你根据这三层关系的划分,可以稍微的让自己不那么恪守自己的家庭文化规则,在亲密的人、相熟的人面前不拘小节, 67 | 68 | 偶而允许自己也不那么的象平时的自己 69 | 70 | ,这样的体验也不错啊! 71 |
72 |
73 | 再进一步的说,那些说不习惯你常常客气的人,不见得在帮助你之后,真的可以不需要你道谢,亲密的人往往帮的忙都是大忙,都是劳心费神的,你一句“谢谢你”就能让他们感觉为你做事很值,而不是在做苦力。如果费了九牛二虎之力,一句春风拂面的话都听不到,久而久之,还有什么动力为你做事呢? 74 |
75 |
76 | 人对他人的期待总是潜伏着的,如果要学为人处世,就要学彻底,受人恩惠与帮助,多说谢谢,让他人心理也能得到平衡,这种做法是没错的。 77 |
78 |
79 | 其实 80 | 81 | ,更应该对亲密的人说谢谢,感谢他的爱,感谢他的无私奉献,感谢他的一路相伴,这个世界上,亲密的关系都是互爱互惠互助建构起来的。 82 | 83 |
84 |
85 |
86 | 你会坚持与人为善、以诚待人的,对吧! 87 |
88 | 89 | -------------------------------------------------------------------------------- /test/data/answer_upvoter.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 5 | 6 |
7 | 12 | 13 | 14 |
15 |
16 | Mikuroneko 17 | 不好意思我属猫(ΦωΦ)フフフ… 18 |
19 | 27 |
28 |
-------------------------------------------------------------------------------- /test/data/column.json: -------------------------------------------------------------------------------- 1 | {"following": false, "followersCount": 76605, "canPost": false, "avatar": {"id": "993d9179c", "template": "http://pic1.zhimg.com/{id}_{size}.jpg"}, "postsCount": 69, "url": "/xiepanda", "description": "\u6709\u7a7a\u5c31\u8d34\u70b9\u65e7\u6587\u7ae0\uff0c\u5076\u5c14\u60f3\u66f4\u65b0\u4e86\u5c31\u653e\u70b9\u65b0\u6587\u7ae0", "topics": [], "activateAuthorRequested": false, "href": "/api/columns/xiepanda", "commentPermission": "anyone", "acceptSubmission": true, "name": "\u8c22\u718a\u732b\u51fa\u6ca1\u6ce8\u610f", "slug": "xiepanda", "creator": {"hash": "c948a6c96e21986af5d9c720334989f7", "slug": "xiepanda", "bio": "\u642c\u5230\u6df1\u5733\u5566\uff0c\u5e0c\u671b\u8ba4\u8bc6\u4e9b\u5f53\u5730\u670b\u53cb\uff0c\u8bf7\u79c1\u4fe1\u3002", "name": "\u8c22\u718a\u732b\u541b", "avatar": {"id": "e7a5b32f3", "template": "http://pic4.zhimg.com/{id}_{size}.jpg"}, "profileUrl": "http://www.zhihu.com/people/xiepanda", "description": "\u56db\u4f4d\u77e5\u53cb\u5171\u540c\u7ffb\u8bd1\u7684\u300a\u548c\u5b69\u5b50\u8c08\u8c08\u764c\u75c7\u300b\uff0c\u5173\u4e8e\u300c\u786e\u8bca\u809d\u764c\u665a\u671f\uff0c\u600e\u6837\u5c06\u75c5\u60c5\u544a\u77e5\u5b69\u5b50\u5e76\u51cf\u5c11\u4f24\u5bb3\uff1f\u300d\u7684\u8be6\u7ec6\u89e3\u7b54 \u548c\u4e0b\u8f7d\uff1ahttp://zhuanlan.zhihu.com/gongyi/19920822"}} -------------------------------------------------------------------------------- /test/data/column_post.json: -------------------------------------------------------------------------------- 1 | {"state": "published", "summary": "\u8fd9\u662f\u4e2a\u5173\u4e8e\u79d1\u666e\u516c\u76ca\u9879\u76ee\u7684\u786c\u5e7f\uff0c\u800c\u4e14\u662f\u7bc7\u627e\u8bfb\u8005\u4f17\u7b79\u7684\u786c\u5e7f\uff0c\u7ee7\u7eed\u8bfb\u4e0b\u53bb\u4e4b\u524d\u8bf7\u63a5\u53d7\u8fd9\u6837\u7684\u8bbe\u5b9a\uff0c\u4e0d\u7136\u5c31\u4e0d\u8981\u8bfb\u4e86\u3002\u6211\u6709\u4e2a\u53eb\u9879\u680b\u6881\u7684\u8001\u670b\u53cb\uff0c\u4ed6\u662f\u4e2a\u5f88\u6015\u6b7b\u7684\u4eba\u3002 \u90a3\u5929\u548c\u680b\u6881\u4e00\u8d77\u5403\u996d\uff0c\u5927\u5bb6\u4e00\u8d77\u559d\u70b9\u4f50\u9910\u8461\u8404\u9152\uff0c\u680b\u6881\u4e0d\u613f\u610f\u559d\u3002\u56e0\u4e3a\u680b\u6881\u5f88\u6015\u6b7b\uff0c\u4efb\u4f55\u6709\u79d1\u5b66\u7814\u7a76\u8bc1\u2026", "commentsCount": 199, "canComment": false, "publishedTime": "2015-09-06T20:03:31+08:00", "snapshotUrl": "", "url": "/xiepanda/20202275", "author": {"hash": "c948a6c96e21986af5d9c720334989f7", "slug": "xiepanda", "bio": "\u642c\u5230\u6df1\u5733\u5566\uff0c\u5e0c\u671b\u8ba4\u8bc6\u4e9b\u5f53\u5730\u670b\u53cb\uff0c\u8bf7\u79c1\u4fe1\u3002", "name": "\u8c22\u718a\u732b\u541b", "avatar": {"id": "e7a5b32f3", "template": "http://pic4.zhimg.com/{id}_{size}.jpg"}, "profileUrl": "http://www.zhihu.com/people/xiepanda", "description": "\u56db\u4f4d\u77e5\u53cb\u5171\u540c\u7ffb\u8bd1\u7684\u300a\u548c\u5b69\u5b50\u8c08\u8c08\u764c\u75c7\u300b\uff0c\u5173\u4e8e\u300c\u786e\u8bca\u809d\u764c\u665a\u671f\uff0c\u600e\u6837\u5c06\u75c5\u60c5\u544a\u77e5\u5b69\u5b50\u5e76\u51cf\u5c11\u4f24\u5bb3\uff1f\u300d\u7684\u8be6\u7ec6\u89e3\u7b54 \u548c\u4e0b\u8f7d\uff1ahttp://zhuanlan.zhihu.com/gongyi/19920822"}, "topics": [], "rating": "none", "href": "/api/columns/xiepanda/posts/20202275", "column": {"name": "\u8c22\u718a\u732b\u51fa\u6ca1\u6ce8\u610f", "slug": "xiepanda"}, "titleImage": "http://pic2.zhimg.com/3cae80c34fcdd484e04a2c40a3e5ffbd_b.jpg", "likesCount": 963, "content": "

\u8fd9\u662f\u4e2a\u5173\u4e8e\u79d1\u666e\u516c\u76ca\u9879\u76ee\u7684\u786c\u5e7f\uff0c\u800c\u4e14\u662f\u7bc7\u627e\u8bfb\u8005\u4f17\u7b79\u7684\u786c\u5e7f\uff0c\u7ee7\u7eed\u8bfb\u4e0b\u53bb\u4e4b\u524d\u8bf7\u63a5\u53d7\u8fd9\u6837\u7684\u8bbe\u5b9a\uff0c\u4e0d\u7136\u5c31\u4e0d\u8981\u8bfb\u4e86\u3002

\u6211\u6709\u4e2a\u53eb\u9879\u680b\u6881\u7684\u8001\u670b\u53cb\uff0c\u4ed6\u662f\u4e2a\u5f88\u6015\u6b7b\u7684\u4eba\u3002

\u90a3\u5929\u548c\u680b\u6881\u4e00\u8d77\u5403\u996d\uff0c\u5927\u5bb6\u4e00\u8d77\u559d\u70b9\u4f50\u9910\u8461\u8404\u9152\uff0c\u680b\u6881\u4e0d\u613f\u610f\u559d\u3002\u56e0\u4e3a\u680b\u6881\u5f88\u6015\u6b7b\uff0c\u4efb\u4f55\u6709\u79d1\u5b66\u7814\u7a76\u8bc1\u660e\u53ef\u80fd\u5f71\u54cd\u5bff\u547d\u7684\u4e1c\u897f\u4ed6\u90fd\u4e0d\u78b0\uff0c\u6240\u4ee5\u4ed6\u4e0d\u559d\u9152\u3002

\u5c31\u662f\u8fd9\u4e2a\u5f88\u6015\u6b7b\u7684\u680b\u6881\uff0c\u4e0d\u4e45\u524d\u8f9e\u53bb\u4e86\u5a92\u4f53\u7684\u5de5\u4f5c\uff0c\u51b3\u5b9a\u8fde\u5403100\u5929\u8f6c\u57fa\u56e0\u98df\u7269\uff0c\u5e76\u4e14\u7528\u7eaa\u5f55\u7247\u7684\u65b9\u5f0f\u5168\u7a0b\u8bb0\u5f55\u3002

\u680b\u6881\u9080\u8bf7\u6211\u4f5c\u4e3a\u4ed6\u8fd9\u4e2a\u201c\u8fde\u5403100\u5929\u8f6c\u57fa\u56e0\u98df\u7269\u201d\u9879\u76ee\u7684\u7f8e\u98df\u987e\u95ee\uff0c\u4e8e\u662f\u4e0d\u4e45\u524d\u6211\u53bb\u5e7f\u5dde\u5403\u4e86\u4e00\u987f\u201c\u8f6c\u57fa\u56e0\u5927\u9910\u201d\uff0c\u7528\u7684\u662f\u8f6c\u57fa\u56e0\u5927\u7c73\u3001\u7389\u7c73\u3001\u6728\u74dc\u548c\u5927\u8c46\u6cb9\u8fd9\u4e9b\u98df\u6750\u3002\u5927\u6982\u662f\u56e0\u4e3a\u4e00\u672c\u6b63\u7ecf\u5148\u7b7e\u4e86\u300a\u77e5\u60c5\u540c\u610f\u4e66\u300b\u7684\u7f18\u6545\uff0c\u5403\u996d\u65f6\u5bf9\u98df\u6750\u672c\u8eab\u7684\u5728\u610f\u591a\u8fc7\u4e86\u5bf9\u6599\u7406\u6c34\u5e73\u7684\u8bc4\u4ef7\u3002\u53ef\u60dc\u5e76\u6ca1\u6709\u5403\u5230\u8f6c\u57fa\u56e0\u7684\u725b\u8089\u548c\u9e21\u7fc5\u3002

\u4e3a\u4ec0\u4e48\u6211\u613f\u610f\u65e0\u507f\u505a\u8fd9\u4e2a\u9879\u76ee\u7684\u7f8e\u98df\u987e\u95ee\u5462\uff1f\u56e0\u4e3a\u6211\u4e0d\u4f46\u81ea\u5df1\u5e38\u5403\u8f6c\u57fa\u56e0\u98df\u7269\uff0c\u4e5f\u9f13\u52b1\u8eab\u8fb9\u7684\u670b\u53cb\u548c\u5bb6\u4eba\u5403\u8f6c\u57fa\u56e0\u98df\u7269\uff0c\u800c\u4e14\u6211\u4e00\u76f4\u8ba4\u4e3a\u4e00\u4e2a\u771f\u6b63\u7684\u5403\u8d27\u5e94\u8be5\u7528\u884c\u52a8\u652f\u6301\u8f6c\u57fa\u56e0\u3002

\u8f6c\u57fa\u56e0\u7684\u98df\u6750\u5403\u8d77\u6765\u6709\u4ec0\u4e48\u7279\u522b\u4e4b\u5904\u5417\uff1f\u5f88\u9057\u61be\uff0c\u6211\u662f\u6ca1\u6709\u5c1d\u51fa\u6765\u3002\u90a3\u4e3a\u4ec0\u4e48\u8bf4\u771f\u6b63\u7684\u5403\u8d27\u5e94\u8be5\u652f\u6301\u8f6c\u57fa\u56e0\u5462\uff1f

\u56e0\u4e3a\u8f6c\u57fa\u56e0\u53ef\u4ee5\u8ba9\u98df\u6750\u53d8\u5f97\u66f4\u597d\u5403\u3002

\u56e0\u4e3a\u8f6c\u57fa\u56e0\u53ef\u4ee5\u8ba9\u98df\u6750\u53d8\u5f97\u66f4\u597d\u5403\u3002

\u56e0\u4e3a\u8f6c\u57fa\u56e0\u53ef\u4ee5\u8ba9\u98df\u6750\u53d8\u5f97\u66f4\u597d\u5403\u3002


\u6211\u4ee5\u524d\u5199\u8fc7\u4e00\u7bc7\u4e13\u680f \u300a\u7eaf\u5929\u7136\u539f\u751f\u6001\u7684\u4e1c\u897f\u5c31\u597d\u5403\uff1f\u8fd9\u53ef\u4e0d\u4e00\u5b9a\u300b\u3002\u8fd9\u6b21\u518d\u6765\u8c08\u8c08\u4e3a\u4ec0\u4e48\u201c\u975e\u5929\u7136\u201d\u7684\u6280\u672f\u53ef\u4ee5\u8ba9\u98df\u7269\u53d8\u5f97\u66f4\u597d\u5403\u3002

\u901a\u5e38\uff0c\u6211\u4eec\u8981\u60f3\u505a\u70b9\u597d\u5403\u7684\uff0c\u7b2c\u4e00\u6b65\u662f\u53bb\u5e02\u573a\u4e0a\u201c\u6311\u201d\u98df\u6750\u3002\u8fd9\u4e2a\u201c\u6311\u201d\u7684\u610f\u601d\uff0c\u5f53\u7136\u662f\u8bf4\u98df\u6750\u5df2\u7ecf\u5206\u95e8\u522b\u7c7b\u6446\u5728\u90a3\u91cc\uff0c\u7b49\u7740\u6211\u4eec\u51ed\u7ecf\u9a8c\u3001\u53e3\u5473\u548c\u7ecf\u6d4e\u80fd\u529b\u53bb\u9009\u62e9\u3002


\u9047\u5230\u7279\u522b\u6ee1\u610f\u7684\u98df\u6750\uff0c\u5f88\u591a\u4eba\u4f1a\u611f\u6168\u4e00\u53e5\uff1a\u611f\u8c22\u5927\u81ea\u7136\u7684\u9988\u8d60\uff01

\u4f46\u5b9e\u9645\u4e0a\uff0c\u8fd9\u662f\u4e00\u4e2a\u7279\u522b\u5e38\u89c1\u7684\u8bef\u89e3\u3002

\u4f60\u4eec\u4eba\u7c7b\u73b0\u5728\u7eb3\u5165\u98df\u8c31\u7684\u8fd9\u4e9b\u98df\u6750\uff0c\u7edd\u5927\u90e8\u5206\u90fd\u4e0d\u662f\u81ea\u7136\u8fdb\u5316\u7684\u4ea7\u7269\u3002\u4eba\u7c7b\u4e00\u6b65\u6b65\u722c\u5230\u98df\u7269\u94fe\u7684\u9876\u7aef\uff0c\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u6211\u4eec\u5bf9\u6574\u6761\u98df\u7269\u94fe\u6240\u505a\u7684\u4e8b\u60c5\u53ef\u8c13\u201c\u60e8\u65e0\u5929\u9053\u201d\u3002


\u9996\u5148\u662f\u7cae\u98df

\u7136\u540e\u662f\u6c34\u679c

\u8fd8\u6709\u8089

\u690d\u7269\u7684\u6742\u4ea4

\u4e0d\u540c\u7269\u79cd\u7684\u5ac1\u63a5\uff0c\u6bd4\u5982\u5ac1\u63a5\u6c34\u679c

\u6211\u4eec\u73b0\u5728\u5403\u7684\u5927\u90e8\u5206\u4e3b\u98df\u3001\u8089\u3001\u6c34\u679c\u3001\u852c\u83dc\uff0c\u57fa\u672c\u4e0a\u90fd\u4e0d\u662f\u5927\u81ea\u7136\u7684\u9988\u8d60\uff0c\u90fd\u662f\u88ab\u6781\u901f\u81a8\u80c0\u7684\u4eba\u53e3\uff0c\u8fd8\u6709\u5403\u8d27\u4eec\u8d8a\u6765\u8d8a\u6311\u5254\u7684\u53e3\u5473\u903c\u7740\u6539\u9020\u51fa\u6765\u7684\u3002

\u5f97\u76ca\u4e8e\u5bf9\u98df\u6750\u4e0d\u65ad\u7684\u6539\u9020\uff0c\u5730\u7403\u627f\u8f7d\u7684\u4eba\u53e3\u6570\u91cf\u76f8\u5bf9\u4e00\u4e07\u5e74\u524d\u7ffb\u4e86\u7ea61000\u500d\u3002\u4e00\u4e2a\u751f\u6d3b\u5728\u6df1\u5733\u7684\u73b0\u4ee3\u4eba\u8c22\u718a\u732b\u4e00\u751f\u9884\u8ba1\u4f1a\u5403\u638940\u523060\u5428\u98df\u7269\uff0c\u6570\u91cf\u4e0a\u6bd4\u79e6\u671d\u7684\u7956\u5148\u8c22\u718a\u8001\u8001\u8001\u8001\u732b\u591a\u4e0d\u4e86\u5f88\u591a\u3002\u4f46\u662f\u4ed6\u8fd9\u4e00\u751f\u53ef\u4ee5\u54c1\u5c1d\u5230\u7684\u98df\u6750\u79cd\u7c7b\uff0c\u5c31\u6bd4\u8c22\u718a\u8001\u8001\u8001\u8001\u732b\u4e0d\u77e5\u9ad8\u5230\u54ea\u91cc\u53bb\u4e86\u3002

\u90a3\u4e48\u518d\u8fc750\u5e74\uff0c\u518d\u8fc7300\u5e74\u5462\uff1f\u8c22\u718a\u732b\u7684\u540e\u4ee3\u8c22\u718a\u5c0f\u5c0f\u5c0f\u5c0f\u732b\uff0c\u4f1a\u6709\u66f4\u597d\u7684\u53e3\u798f\uff0c\u53ef\u4ee5\u5403\u5230\u66f4\u591a\u66f4\u7f8e\u5473\u7684\u98df\u6750\u5417\uff1f

\u7b54\u6848\u662f\u4e0d\u4e00\u5b9a\u3002\u8fd9\u53d6\u51b3\u4e8e\u4eba\u7c7b\u4ee5\u591a\u5927\u7684\u51b3\u5fc3\u53bb\u6539\u9020\u98df\u6750\u548c\u62d3\u5bbd\u98df\u8c31\u3002


\u4eca\u65f6\u4eca\u65e5\uff0c\u4eba\u7c7b\u5bf9\u5730\u7403\u4e0a\u53ef\u98df\u7528\u751f\u7269\u7684\u5f00\u53d1\u5229\u7528\u5df2\u7ecf\u8fbe\u5230\u4e86\u4e00\u4e2a\u660e\u663e\u7684\u74f6\u9888\u671f\u3002\u5929\u4e0a\u98de\u7684\uff0c\u5730\u4e0a\u8dd1\u7684\uff0c\u6c34\u91cc\u6e38\u7684\uff0c\u51e1\u662f\u6709\u4e00\u4e01\u70b9\u7eb3\u5165\u98df\u8c31\u53ef\u80fd\u6027\u7684\uff0c\u57fa\u672c\u4e0a\u90fd\u5df2\u7ecf\u88ab\u4eba\u7c7b\u627e\u5230\u4e86\u3002

\u6211\u4eec\u73b0\u5728\u80fd\u505a\u7684\uff0c\u53ea\u662f\u901a\u8fc7\u5168\u7403\u8d38\u6613\u548c\u73b0\u4ee3\u8fd0\u8f93\u624b\u6bb5\uff0c\u8ba9\u4e0d\u540c\u5730\u7406\u533a\u57df\u7684\u98df\u6750\u80fd\u4e92\u901a\u6709\u65e0\u3002\u901a\u8fc7\u51b7\u94fe\u8fd0\u8f93\uff0c\u4e4c\u9c81\u6728\u9f50\u7684\u5e02\u6c11\u53ef\u4ee5\u5403\u5230\u6765\u81ea\u632a\u5a01\u7684\u4e09\u6587\u9c7c\u3002

\u4e00\u4e9b\u517b\u6b96\u624b\u6bb5\u4e5f\u6709\u5e2e\u52a9\uff0c\u6bd4\u5982\u901a\u8fc7\u54c1\u79cd\u9009\u80b2\u548c\u9972\u6599\u63a7\u5236\u6765\u8c03\u8282\u725b\u8089\u4e2d\u8102\u80aa\u5206\u5e03\uff0c\u96ea\u82b1\u80a5\u725b\u6b63\u9010\u6e10\u53d8\u5f97\u7a00\u677e\u5e73\u5e38\u3002

\u4f46\u662f\uff0c\u5f53\u6211\u4eec\u60f3\u8981\u518d\u663e\u8457\u5730\u6539\u5584\u67d0\u79cd\u98df\u6750\u7684\u53e3\u5473\uff0c\u63d0\u9ad8\u98df\u6750\u7684\u989c\u503c\uff0c\u4f18\u5316\u98df\u6750\u7684\u8425\u517b\uff0c\u964d\u4f4e\u98df\u6750\u4ef7\u683c\uff0c\u91c7\u7528\u4ee5\u524d\u7684\u6539\u9020\u65b9\u6cd5\u6b63\u53d8\u5f97\u8d8a\u6765\u8d8a\u6162\uff0c\u4e5f\u8d8a\u6765\u8d8a\u56f0\u96be\u3002\u6b63\u5982\u4e00\u8f86\u5df2\u7ecf\u51fa\u5382\u7684\u6574\u8f66\uff0c\u8981\u901a\u8fc7\u6539\u88c5\u6765\u63d0\u5347\u6027\u80fd\uff0c\u5f53\u7136\u4e5f\u53ef\u4ee5\uff0c\u4f46\u6539\u88c5\u5e45\u5ea6\u662f\u5f88\u6709\u9650\u7684\u3002\u8981\u60f3\u5f97\u5230\u5927\u5e45\u5ea6\u5347\u7ea7\u7684\u8f66\u578b\uff0c\u53ea\u80fd\u5728\u8bbe\u8ba1\u3001\u751f\u4ea7\u73af\u8282\u5c31\u7528\u4e0a\u66f4\u597d\u7684\u6784\u4ef6\u3002

\u5bf9\u4e8e\u98df\u6750\u7684\u671f\u5f85\uff0c\u518d\u60f3\u8981\u8de8\u4e00\u5927\u6b65\uff0c\u5c31\u9700\u8981\u5728\u9057\u4f20\u5206\u5b50\u5c42\u9762\u5bf9\u98df\u6750\u8fdb\u884c\u5b9a\u5411\u7684\u6539\u9020\u4e86\u3002

\u4e3a\u4ec0\u4e48\u8bf4\u8f6c\u57fa\u56e0\u6280\u672f\u662f\u6700\u6709\u5e0c\u671b\u53bb\u7a81\u7834\u74f6\u9888\u7684\u65b9\u5f0f\u5462\uff1f

\u56e0\u4e3a\u51b3\u5b9a\u98df\u6750\u53e3\u5473\u3001\u989c\u503c\u3001\u8425\u517b\u548c\u4ef7\u683c\u7684\u90a3\u4e9b\u56e0\u7d20\uff0c\u6bd4\u5982\u7cd6\u5ea6\u3001\u6e38\u79bb\u6c28\u57fa\u9178\u542b\u91cf\u3001\u82b1\u9752\u7d20\u542b\u91cf\u3001\u8102\u80aa\u6bd4\u4f8b\u3001\u4ea7\u91cf\u548c\u8d27\u67b6\u671f\uff0c\u90fd\u662f\u53ef\u4ee5\u91cf\u5316\uff0c\u4e5f\u662f\u53ef\u4ee5\u901a\u8fc7\u64cd\u63a7\u7279\u5b9a\u57fa\u56e0\u6765\u8fdb\u884c\u8c03\u8282\u7684\u3002

\u90a3\u4e3a\u4ec0\u4e48\u4e0d\u73b0\u5728\u5c31\u5f00\u59cb\u505a\u5462\uff1f

\u56e0\u4e3a\u8fd8\u6709\u975e\u5e38\u591a\u7684\u4eba\u89c9\u5f97\uff0c\u8f6c\u57fa\u56e0\u7684\u98df\u54c1\u5fc5\u987b\u8bd5\u5403300\u5e74\u6ca1\u95ee\u9898\u624d\u53ef\u4ee5\u6279\u51c6\u4e0a\u5e02\u554a\uff01\u4f60\u89c1\u8fc7\u54ea\u4e2a\u4ea7\u54c1\u7684\u7814\u53d1\u5468\u671f\u662f300\u5e74\u4e48\uff1f

\u4e3a\u4e86\u634d\u536b\u4e00\u540d\u5403\u8d27\u83b7\u5f97\u66f4\u591a\u7f8e\u5473\u98df\u6750\u7684\u5929\u8d4b\u6743\u5229\uff0c\u4e5f\u4e3a\u4e86\u8ba9\u4eb2\u670b\u597d\u53cb\u4e0d\u518d\u65e0\u7aef\u6050\u60e7\u8f6c\u57fa\u56e0\u6280\u672f\uff0c\u8bf7\u6211\u5403\u8f6c\u57fa\u56e0\u5927\u9910\u7684\u680b\u6881\u53d1\u8d77\u4e86\u8fd9\u4e2a\u79d1\u666e\u516c\u76ca\u9879\u76ee\uff0c\u51c6\u5907\u9080\u8bf7\u66f4\u591a\u5403\u8d27\u4e00\u8d77\u516c\u5f00\u54c1\u5c1d\u8f6c\u57fa\u56e0\u5927\u9910\uff0c\u5e76\u7528\u8f6c\u57fa\u56e0\u98df\u6750\u521b\u9020100\u9053\u8f6c\u57fa\u56e0\u6599\u7406\u3002

\u611f\u5174\u8da3\u7684\u670b\u53cb\u53ef\u4ee5\u652f\u6301\u4e00\u4e0b\u4ed6\u4eec\u7684\u4f17\u7b79\uff0c\u4e5f\u6b22\u8fce\u5927\u5bb6\u53bb\u5e7f\u5dde\u627e\u680b\u6881\u8e6d\u4e00\u987f\u8f6c\u57fa\u56e0\u7684\u996d\uff0c\u8e6d\u996d\u8bf7\u641c\u7d22\u5fae\u4fe1\u516c\u4f17\u53f7\uff1a\n\u8f6c\u57fa\u56e0\u80fd\u597d\u600e

\u4f17\u7b79\u7684\u4e8c\u7ef4\u7801

\u5229\u76ca\u76f8\u5173\uff1a\u6211\u53cb\u60c5\u62c5\u4efb\u4e86\u8fd9\u4e2a\u79d1\u666e\u516c\u76ca\u9879\u76ee\u7684\u7f8e\u98df\u987e\u95ee\uff0c\u4ece\u5934\u5230\u5c3e\u4e0d\u4f1a\u6709\u4efb\u4f55\u62a5\u916c\uff0c\u8fde\u6765\u56de\u5e7f\u5dde\u7684\u9ad8\u94c1\u94b1\u90fd\u662f\u6211\u81ea\u5df1\u51fa\u7684\u3002

\u56fe\u7247\u6765\u81ea\u7f51\u7edc\uff0c\u4fb5\u5220\u3002

", "commentPermission": "anyone", "title": "\u4e3a\u4e86\u505a\u4e00\u4e2a\u79f0\u804c\u7684\u5403\u8d27\uff0c\u4ed6\u51b3\u5b9a\u8fde\u7740\u5403\u4e00\u767e\u5929\u8f6c\u57fa\u56e0\u98df\u7269", "links": {"comments": "http://zhuanlan.zhihu.com/api/columns/xiepanda/posts/20202275/comments"}, "sourceUrl": "", "slug": 20202275, "meta": {"next": null, "previous": null}} -------------------------------------------------------------------------------- /test/data/post.md: -------------------------------------------------------------------------------- 1 | **这是个关于科普公益项目的硬广,而且是篇找读者众筹的硬广,继续读下去之前请接受这样的设定,不然就不要读了。** 2 | 3 | 我有个叫项栋梁的老朋友,他是个很怕死的人。 4 | 5 | 那天和栋梁一起吃饭,大家一起喝点佐餐葡萄酒,栋梁不愿意喝。因为栋梁很怕死,任何有科学研究证明可能影响寿命的东西他都不碰,所以他不喝酒。 6 | 7 | 就是这个很怕死的栋梁,不久前辞去了媒体的工作,决定连吃100天转基因食物,并且用纪录片的方式全程记录。 8 | 9 | 栋梁邀请我作为他这个“连吃100天转基因食物”项目的美食顾问,于是不久前我去广州吃了一顿“转基因大餐”,用的是转基因大米、玉米、木瓜和大豆油这些食材。大概是因为一本正经先签了《知情同意书》的缘故,吃饭时对食材本身的在意多过了对料理水平的评价。可惜并没有吃到转基因的牛肉和鸡翅。 10 | 11 | **为什么我愿意无偿做这个项目的美食顾问呢?因为我不但自己常吃转基因食物,也鼓励身边的朋友和家人吃转基因食物,而且我一直认为一个真正的吃货应该用行动支持转基因。** 12 | 13 | ![](http://pic4.zhimg.com/0b656fb32a070f9717f9e4a737a148f3_b.jpg) 14 | 15 | 转基因的食材吃起来有什么特别之处吗?很遗憾,我是没有尝出来。那为什么说真正的吃货应该支持转基因呢? 16 | 17 | **因为转基因可以让食材变得更好吃。 ** 18 | 19 | **因为转基因可以让食材变得更好吃。 ** 20 | 21 | **因为转基因可以让食材变得更好吃。** 22 | 23 | 24 | 25 | 26 | 我以前写过一篇专栏 《纯天然原生态的东西就好吃?这可不一定》。这次再来谈谈为什么“非天然”的技术可以让食物变得更好吃。 27 | 28 | 通常,我们要想做点好吃的,第一步是去市场上“挑”食材。这个“挑”的意思,当然是说食材已经分门别类摆在那里,等着我们凭经验、口味和经济能力去选择。 29 | 30 | 31 | 32 | 33 | 遇到特别满意的食材,很多人会感慨一句:感谢大自然的馈赠! 34 | 35 | 但实际上,这是一个特别常见的误解。 36 | 37 | **你们人类现在纳入食谱的这些食材,绝大部分都不是自然进化的产物**。人类一步步爬到食物链的顶端,这个过程中,我们对整条食物链所做的事情可谓“惨无天道”。 38 | 39 | 40 | 41 | 42 | 首先是粮食 43 | 44 | ![](http://pic3.zhimg.com/60c901836121ee08722d0ff8ef6a487a_b.jpg) 45 | 46 | 然后是水果 47 | 48 | ![](http://pic2.zhimg.com/0a3c8516cdcacc707f4cff9fc1ec07f1_b.jpg) 49 | 50 | 还有肉 51 | 52 | ![](http://pic1.zhimg.com/776ad08c7976697938fd333e06ec667c_b.jpg)![](http://pic1.zhimg.com/fcb72a34df91474aed05ca96b3235950_b.jpg) 53 | 54 | 植物的杂交![](http://pic2.zhimg.com/2c965a0e54f6383c3552cf4d0f915b99_b.jpg) 55 | 56 | 不同物种的嫁接,比如嫁接水果 57 | 58 | ![](http://pic1.zhimg.com/1b18aa2f5f93ca12d2191d56902e14a4_b.jpg) 59 | 60 | **我们现在吃的大部分主食、肉、水果、蔬菜,基本上都不是大自然的馈赠,都是被极速膨胀的人口,还有吃货们越来越挑剔的口味逼着改造出来的。 ** 61 | 62 | 得益于对食材不断的改造,地球承载的人口数量相对一万年前翻了约1000倍。一个生活在深圳的现代人谢熊猫一生预计会吃掉40到60吨食物,数量上比秦朝的祖先谢熊老老老老猫多不了很多。但是他这一生可以品尝到的食材种类,就比谢熊老老老老猫不知高到哪里去了。 63 | 64 | 那么再过50年,再过300年呢?谢熊猫的后代谢熊小小小小猫,会有更好的口福,可以吃到更多更美味的食材吗? 65 | 66 | 答案是不一定。这取决于人类以多大的决心去改造食材和拓宽食谱。 67 | 68 | 69 | 70 | 71 | 今时今日,人类对地球上可食用生物的开发利用已经达到了一个明显的瓶颈期。天上飞的,地上跑的,水里游的,凡是有一丁点纳入食谱可能性的,基本上都已经被人类找到了。 72 | 73 | ![](http://pic1.zhimg.com/91e9f06427e82fffdf5d2d26829377f8_b.jpg) 74 | 75 | 我们现在能做的,只是通过全球贸易和现代运输手段,让不同地理区域的食材能互通有无。通过冷链运输,乌鲁木齐的市民可以吃到来自挪威的三文鱼。 76 | 77 | 一些养殖手段也有帮助,比如通过品种选育和饲料控制来调节牛肉中脂肪分布,雪花肥牛正逐渐变得稀松平常。 78 | 79 | ![](http://pic2.zhimg.com/590abedf42af1ed736a3990becf2227d_b.jpg) 80 | 81 | 但是,当我们想要再显著地改善某种食材的口味,提高食材的颜值,优化食材的营养,降低食材价格,采用以前的改造方法正变得越来越慢,也越来越困难。正如一辆已经出厂的整车,要通过改装来提升性能,当然也可以,但改装幅度是很有限的。要想得到大幅度升级的车型,只能在设计、生产环节就用上更好的构件。 82 | 83 | 对于食材的期待,再想要跨一大步,就需要在遗传分子层面对食材进行定向的改造了。 84 | 85 | 为什么说转基因技术是最有希望去突破瓶颈的方式呢? 86 | 87 | 因为决定食材口味、颜值、营养和价格的那些因素,比如糖度、游离氨基酸含量、花青素含量、脂肪比例、产量和货架期,都是可以量化,也是可以通过操控特定基因来进行调节的。 88 | 89 | **那为什么不现在就开始做呢? 90 | ** 91 | 92 | **因为还有非常多的人觉得,转基因的食品必须试吃300年没问题才可以批准上市啊!你见过哪个产品的研发周期是300年么?** 93 | 94 | 为了捍卫一名吃货获得更多美味食材的天赋权利,也为了让亲朋好友不再无端恐惧转基因技术,请我吃转基因大餐的栋梁发起了这个科普公益项目,准备邀请更多吃货一起公开品尝转基因大餐,并用转基因食材创造100道转基因料理。 95 | 96 | **感兴趣的朋友可以支持一下他们的众筹,也欢迎大家去广州找栋梁蹭一顿转基因的饭,蹭饭请搜索微信公众号: 转基因能好怎** 97 | 98 | 众筹的二维码 99 | 100 | ![](http://pic4.zhimg.com/a776bb513418a56d0fcfcf069b3f3b27_b.jpg) 101 | 102 | 利益相关:我友情担任了这个科普公益项目的美食顾问,从头到尾不会有任何报酬,连来回广州的高铁钱都是我自己出的。 103 | 104 | 图片来自网络,侵删。 105 | -------------------------------------------------------------------------------- /test/data/question_more_answer.html: -------------------------------------------------------------------------------- 1 |
17 | 18 | 19 | 20 |
21 | 26 | 30 |
31 | 32 | 33 |
34 |
35 |

36 | 37 | 38 | 41 | 44 | 45 | 46 | 47 | 48 | 柳蜻蜓爱猫咪 49 | 50 |

51 | 收起 52 |
53 |
54 | 55 | 56 | 冯慧 57 | 58 | 59 | 60 | 赞同 61 | 62 | 63 |
64 |
65 |
66 | 67 |
我一般都告诉他们,这叫相敬如宾o(╯□╰)o 68 | 69 | 70 |
71 | 72 |
73 | 74 |
75 |
76 | 77 | 78 | 发布于 2015-02-17 79 | 80 | 81 | 82 | 添加评论 83 | 84 | 85 | 感谢 86 | 87 | 88 | 89 | 90 | 分享 91 | 92 | 收藏 93 | 94 | 95 | 96 | 97 | 98 | 99 | 没有帮助 100 | 101 | 102 | 举报 103 | 104 | 105 | 106 | 107 | 108 | 作者保留权利 109 | 110 | 111 | 112 |
113 |
114 |
115 | -------------------------------------------------------------------------------- /test/test.json: -------------------------------------------------------------------------------- 1 | {"cap_id": "\"OWJjODVkYWQ3MDJjNGM5M2EyNmY1NDAyOGU5MzQwNDA=|1472645376|6e09f5d2b2bb30835a2a9b19b5fa4986637637bd\"", "a_t": "\"ABDMdqgvJAkXAAAAAFbuVwAQzHaoLyQJFwAAAGECVU0AVu5XcAN6dNpEkjB21U5fvKPeu47f8Ug437yVUWuvtYnOb2ctSWwp3f4PAw==\"", "z_c0": "\"QUJETWRxZ3ZKQWtYQUFBQVlRSlZUUUJXN2xkd0EzcDAya1NTTUhiVlRsLThvOTY3anRfeFNBPT0=|1472645376|021fc93e0c8c88bb13669dc31e12ec1900f7cbb9\"", "q_c1": "b924d120986f4133acf1c2390644ca98|1472645376000|1472645376000", "l_cap_id": "\"MTNhNWZmNDllZmYwNDdhZTk2ZDI0ODk5YzM5YjlhMTI=|1472645376|bff7a553af1fd8f4244d26d4fb2bc17fc7063202\"", "login": "\"YjNiNmE2YmFlNTRjNDZiYmIxMGI0MzEzOTMyMDJiM2Q=|1472645376|7bb6c20ba86434b35bdeb2d40d3f816e36354652\"", "n_c": "1"} -------------------------------------------------------------------------------- /test/test_activity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division, unicode_literals 5 | import unittest 6 | import os 7 | import datetime 8 | 9 | from zhihu import Question, Activity, ActType 10 | from zhihu.common import BeautifulSoup 11 | 12 | from test_utils import TEST_DATA_PATH 13 | 14 | 15 | class ActivityTest(unittest.TestCase): 16 | @classmethod 17 | def setUpClass(cls): 18 | url = 'http://www.zhihu.com/question/24825703' 19 | file_path = os.path.join(TEST_DATA_PATH, 'question.html') 20 | with open(file_path, 'rb') as f: 21 | html = f.read() 22 | soup = BeautifulSoup(html) 23 | 24 | cls.question = Question(url) 25 | cls.question._session = None 26 | cls.question.soup = soup 27 | 28 | act_time = datetime.datetime.fromtimestamp(1439395600) 29 | act_type = ActType.FOLLOW_QUESTION 30 | cls.activity = Activity(act_type, act_time, question=cls.question) 31 | 32 | def test_content(self): 33 | self.assertIs(self.question, self.activity.content) 34 | 35 | def test_init_errors(self): 36 | act_time = datetime.datetime.fromtimestamp(1439395600) 37 | act_type = ActType.FOLLOW_QUESTION 38 | 39 | with self.assertRaises(ValueError): 40 | Activity(100, act_time) 41 | with self.assertRaises(ValueError): 42 | Activity(act_type, act_time) 43 | -------------------------------------------------------------------------------- /test/test_answer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division, unicode_literals 5 | import unittest 6 | import os 7 | 8 | from zhihu import Answer 9 | from zhihu.common import BeautifulSoup 10 | from test_utils import TEST_DATA_PATH 11 | 12 | 13 | class AnswerTest(unittest.TestCase): 14 | @classmethod 15 | def setUpClass(cls): 16 | url = 'http://www.zhihu.com/question/24825703/answer/30975949' 17 | file_path = os.path.join(TEST_DATA_PATH, 'answer.html') 18 | with open(file_path, 'rb') as f: 19 | html = f.read() 20 | soup = BeautifulSoup(html) 21 | 22 | answer_saved_path = os.path.join(TEST_DATA_PATH, 'answer.md') 23 | with open(answer_saved_path, 'rb') as f: 24 | cls.answer_saved = f.read() 25 | 26 | cls.answer = Answer(url) 27 | cls.answer._session = None 28 | cls.answer.soup = soup 29 | cls.expected = {'id': 30975949, 'aid': 7775236, 30 | 'xsrf': 'cfd489623d34ca03adfdc125368c6426', 31 | 'html': soup.prettify(), 'author_id': 'tian-ge-xia', 32 | 'author_name': '甜阁下', 'question_id': 24825703, 33 | 'question_title': '关系亲密的人之间要说「谢谢」吗?', 34 | 'upvote_num': 1164, 'upvoter_name': 'Mikuroneko', 35 | 'upvoter_id': 'guo-yi-hui-23'} 36 | 37 | def test_id(self): 38 | self.assertEqual(self.expected['id'], self.answer.id) 39 | 40 | def test_aid(self): 41 | self.assertEqual(self.expected['aid'], self.answer.aid) 42 | 43 | def test_xsrf(self): 44 | self.assertEqual(self.expected['xsrf'], self.answer.xsrf) 45 | 46 | def test_html(self): 47 | self.assertEqual(self.expected['html'], self.answer.html) 48 | 49 | def test_upvote_num(self): 50 | self.assertEqual(self.expected['upvote_num'], self.answer.upvote_num) 51 | 52 | def test_author(self): 53 | self.assertEqual(self.expected['author_id'], self.answer.author.id) 54 | self.assertEqual(self.expected['author_name'], self.answer.author.name) 55 | 56 | def test_question(self): 57 | self.assertEqual(self.expected['question_id'], self.answer.question.id) 58 | self.assertEqual(self.expected['question_title'], 59 | self.answer.question.title) 60 | 61 | def test_content(self): 62 | path = os.path.join(TEST_DATA_PATH, 'answer_content.html') 63 | with open(path, 'rb') as f: 64 | content = f.read() 65 | self.assertEqual(content.decode('utf-8'), self.answer.content) 66 | 67 | def test_save(self): 68 | save_name = 'answer_save' 69 | self.answer.save(filepath=TEST_DATA_PATH, filename=save_name, 70 | mode='md') 71 | answer_saved_path = os.path.join(TEST_DATA_PATH, save_name + '.md') 72 | with open(answer_saved_path, 'rb') as f: 73 | answer_saved = f.read() 74 | os.remove(answer_saved_path) 75 | self.assertEqual(self.answer_saved, answer_saved) 76 | 77 | def test_parse_author_soup(self): 78 | fpath = os.path.join(TEST_DATA_PATH, 'answer_upvoter.html') 79 | with open(fpath, 'rb') as f: 80 | html = f.read().decode('utf-8') 81 | 82 | soup = BeautifulSoup(html) 83 | upvoter = self.answer._parse_author_soup(soup) 84 | 85 | self.assertEqual(self.expected['upvoter_name'], upvoter.name) 86 | self.assertEqual(self.expected['upvoter_id'], upvoter.id) 87 | 88 | def test_save_error(self): 89 | with self.assertRaises(ValueError): 90 | self.answer.save(filepath=TEST_DATA_PATH, filename='invalid', 91 | mode='invalid') 92 | -------------------------------------------------------------------------------- /test/test_collection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division, unicode_literals 5 | import unittest 6 | import os 7 | 8 | from test_utils import TEST_DATA_PATH 9 | from zhihu import Collection 10 | from zhihu.common import BeautifulSoup 11 | 12 | 13 | class CollectionTest(unittest.TestCase): 14 | @classmethod 15 | def setUpClass(cls): 16 | url = 'http://www.zhihu.com/collection/28698204' 17 | file_path = os.path.join(TEST_DATA_PATH, 'collection.html') 18 | with open(file_path, 'rb') as f: 19 | html = f.read() 20 | soup = BeautifulSoup(html) 21 | 22 | cls.collection = Collection(url) 23 | cls.collection._session = None 24 | cls.collection.soup = soup 25 | cls.expected = {'cid': 3725428, 'name': '可以用来背的答案', 26 | 'xsrf': 'cfd489623d34ca03adfdc125368c6426', 27 | 'owner_id': 'buhuilengyoumo', 'owner_name': '树叶', 28 | 'follower_num': 6328, 'top_ques_id': 26092705, 29 | 'top_ques_title': ('一直追求(吸引)不到喜欢的异性,' 30 | '感觉累了怎么办?'), 31 | 'top_ans_id': 32989919, 'top_ans_author_name': '朱炫', 32 | 'top_ans_upvote_num': 16595, 33 | 'top_ans_author_id': 'zhu-xuan-86' 34 | } 35 | 36 | def test_cid(self): 37 | self.assertEqual(self.expected['cid'], self.collection.cid) 38 | 39 | def test_name(self): 40 | self.assertEqual(self.expected['name'], self.collection.name) 41 | 42 | def test_xsrf(self): 43 | self.assertEqual(self.expected['xsrf'], self.collection.xsrf) 44 | 45 | def test_owner(self): 46 | owner = self.collection.owner 47 | self.assertEqual(self.expected['owner_id'], owner.id) 48 | self.assertEqual(self.expected['owner_name'], owner.name) 49 | 50 | def test_follower_num(self): 51 | self.assertEqual(self.expected['follower_num'], 52 | self.collection.follower_num) 53 | 54 | def test_page_get_questions(self): 55 | questions = [q for q in 56 | self.collection._page_get_questions(self.collection.soup)] 57 | ques = questions[0] 58 | self.assertEqual(self.expected['top_ques_id'], ques.id) 59 | self.assertEqual(self.expected['top_ques_title'], ques.title) 60 | 61 | def test_page_get_answers(self): 62 | answers = [a for a in 63 | self.collection._page_get_answers(self.collection.soup)] 64 | ans = answers[0] 65 | self.assertEqual(self.expected['top_ans_id'], ans.id) 66 | self.assertEqual(self.expected['top_ans_upvote_num'], ans.upvote_num) 67 | self.assertEqual(self.expected['top_ans_author_name'], ans.author.name) 68 | self.assertEqual(self.expected['top_ans_author_id'], ans.author.id) 69 | 70 | def test_questions(self): 71 | qs = self.collection.questions 72 | ques = next(qs) 73 | self.assertEqual(self.expected['top_ques_id'], ques.id) 74 | self.assertEqual(self.expected['top_ques_title'], ques.title) 75 | 76 | def test_answers(self): 77 | anses = self.collection.answers 78 | ans = next(anses) 79 | self.assertEqual(self.expected['top_ans_id'], ans.id) 80 | self.assertEqual(self.expected['top_ans_upvote_num'], ans.upvote_num) 81 | self.assertEqual(self.expected['top_ans_author_name'], ans.author.name) 82 | self.assertEqual(self.expected['top_ans_author_id'], ans.author.id) 83 | -------------------------------------------------------------------------------- /test/test_column.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division, unicode_literals 5 | import unittest 6 | import os 7 | import json 8 | 9 | from test_utils import TEST_DATA_PATH 10 | from zhihu import Column, Post 11 | 12 | 13 | class ColumnTest(unittest.TestCase): 14 | @classmethod 15 | def setUpClass(cls): 16 | url = 'http://zhuanlan.zhihu.com/xiepanda' 17 | file_path = os.path.join(TEST_DATA_PATH, 'column.json') 18 | with open(file_path, 'r') as f: 19 | soup = json.load(f) 20 | 21 | post_path = os.path.join(TEST_DATA_PATH, 'column_post.json') 22 | with open(post_path, 'r') as f: 23 | cls.post_json = json.load(f) 24 | 25 | cls.column = Column(url) 26 | cls.column.soup = soup 27 | cls.expected = {'name': '谢熊猫出没注意', 'follower_num': 76605, 28 | 'post_num': 69, 'post_author_id': 'xiepanda', 29 | 'post_title': ("为了做一个称职的吃货,他决定连着吃" 30 | "一百天转基因食物"), 31 | 'post_upvote_num': 963, 'post_comment_num': 199} 32 | 33 | def test_name(self): 34 | self.assertEqual(self.expected['name'], self.column.name) 35 | 36 | def test_folower_num(self): 37 | self.assertEqual(self.expected['follower_num'], 38 | self.column.follower_num) 39 | 40 | def test_post_num(self): 41 | self.assertEqual(self.expected['post_num'], self.column.post_num) 42 | 43 | def test_parse_post_data(self): 44 | post = self.column._parse_post_data(self.post_json) 45 | self.assertEqual(self.expected['post_author_id'], post.author.id) 46 | self.assertEqual(self.expected['post_title'], post.title) 47 | self.assertEqual(self.expected['post_upvote_num'], post.upvote_num) 48 | self.assertEqual(self.expected['post_comment_num'], post.comment_num) 49 | 50 | def test_posts(self): 51 | ps = self.column.posts 52 | post = next(ps) 53 | self.assertTrue(isinstance(post, Post)) 54 | -------------------------------------------------------------------------------- /test/test_common.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import re 3 | 4 | from zhihu.common import re_question_url 5 | 6 | 7 | class CommonTest(unittest.TestCase): 8 | 9 | def test_question_url(self): 10 | url = 'https://www.zhihu.com/question/26901243?sort=created' 11 | obj = re.match(re_question_url, url) 12 | assert obj.group() == url 13 | 14 | url = 'https://www.zhihu.com/question/26901243' 15 | obj = re.match(re_question_url, url) 16 | assert obj.group() == url 17 | 18 | url = 'https://www.zhihu.com/question/26901243/' 19 | obj = re.match(re_question_url, url) 20 | assert obj.group() == url 21 | 22 | url = 'https://www.zhihu.com/question/26901243?sort=createdx' 23 | obj = re.match(re_question_url, url) 24 | assert obj is None 25 | 26 | url = 'https://www.zhihu.com/question/26901243sort=created' 27 | obj = re.match(re_question_url, url) 28 | assert obj is None 29 | 30 | url = 'https://www.zhihu.com/question/26901243/?sort=created' 31 | obj = re.match(re_question_url, url) 32 | assert obj is None 33 | 34 | url = 'https://www.zhihu.com/question/26901243?/sort=created' 35 | obj = re.match(re_question_url, url) 36 | assert obj is None 37 | 38 | -------------------------------------------------------------------------------- /test/test_post.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division, unicode_literals 5 | import unittest 6 | import os 7 | import json 8 | 9 | from zhihu import Post 10 | from test_utils import TEST_DATA_PATH 11 | 12 | 13 | class ColumnTest(unittest.TestCase): 14 | @classmethod 15 | def setUpClass(cls): 16 | url = 'http://zhuanlan.zhihu.com/xiepanda/20202275' 17 | 18 | post_path = os.path.join(TEST_DATA_PATH, 'column_post.json') 19 | with open(post_path, 'r') as f: 20 | post_json = json.load(f) 21 | 22 | post_saved_path = os.path.join(TEST_DATA_PATH, 'post.md') 23 | with open(post_saved_path, 'rb') as f: 24 | cls.post_saved = f.read() 25 | 26 | cls.post = Post(url) 27 | cls.post.soup = post_json 28 | cls.expected = {'column_in_name': 'xiepanda', 'slug': 20202275, 29 | 'column_name': '谢熊猫出没注意', 30 | 'author_name': '谢熊猫君', 'author_id': 'xiepanda', 31 | 'title': '为了做一个称职的吃货,他决定连着吃一百天转基因食物', 32 | 'upvote_num': 963, 'comment_num': 199} 33 | 34 | def test_column_in_name(self): 35 | self.assertEqual(self.expected['column_in_name'], 36 | self.post.column_in_name) 37 | 38 | def test_slug(self): 39 | self.assertEqual(self.expected['slug'], self.post.slug) 40 | 41 | def test_author(self): 42 | self.assertEqual(self.expected['author_name'], self.post.author.name) 43 | self.assertEqual(self.expected['author_id'], self.post.author.id) 44 | 45 | def test_title(self): 46 | self.assertEqual(self.expected['title'], self.post.title) 47 | 48 | def test_upvote_num(self): 49 | self.assertEqual(self.expected['upvote_num'], self.post.upvote_num) 50 | 51 | def test_comment_num(self): 52 | self.assertEqual(self.expected['comment_num'], self.post.comment_num) 53 | 54 | def test_save(self): 55 | save_name = 'post_save' 56 | self.post.save(filepath=TEST_DATA_PATH, filename=save_name) 57 | post_saved_path = os.path.join(TEST_DATA_PATH, save_name + '.md') 58 | with open(post_saved_path, 'rb') as f: 59 | post_saved = f.read() 60 | os.remove(post_saved_path) 61 | self.assertEqual(self.post_saved, post_saved) 62 | -------------------------------------------------------------------------------- /test/test_question.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division, unicode_literals 5 | import unittest 6 | import os 7 | 8 | from test_utils import TEST_DATA_PATH 9 | from zhihu import Question, Author, Answer 10 | from zhihu.common import BeautifulSoup 11 | 12 | 13 | description = ("从小父母和大家庭里,长辈都教我们得到别人帮助时要说“谢谢”。" 14 | "比方说家庭聚餐,亲人们帮忙夹了菜要感谢。无论多么亲密,父母还是" 15 | "兄妹,都要说声谢谢。 后来上了高中,大学。也习惯性的及时表达" 16 | "对他人帮助的感谢。 但室友们,还有男朋友,都不喜欢我这样。他们" 17 | "说,这样说的话会感觉双方很有距离感很生疏。尤其是男朋友,不喜" 18 | "欢我这样,他说这样很不亲密,情侣之间就不分得那么清,不需要谢" 19 | "谢的。但我从小习惯了,别人帮了我我不说的话,会很不自在。 " 20 | '怎么办?我还要继续这样吗?什么时候不该说"谢谢”??') 21 | 22 | 23 | class QuestionTest(unittest.TestCase): 24 | @classmethod 25 | def setUpClass(cls): 26 | url = 'http://www.zhihu.com/question/24825703' 27 | file_path = os.path.join(TEST_DATA_PATH, 'question.html') 28 | with open(file_path, 'rb') as f: 29 | html = f.read() 30 | soup = BeautifulSoup(html) 31 | 32 | cls.question = Question(url) 33 | cls.question._session = None 34 | cls.question.soup = soup 35 | cls.expected = {'id': 24825703, 'qid': 2112271, 36 | 'xsrf': 'cfd489623d34ca03adfdc125368c6426', 37 | 'html': soup.prettify(), 38 | 'title': '关系亲密的人之间要说「谢谢」吗?', 39 | 'details': description, 'answer_num': 621, 40 | 'follower_num': 4427, 'top_answer_id': 39753456, 41 | 'top_answer_author_name': '芝士就是力量', 42 | 'top_answer_upvote_num': 97, 'top_50_ans_id': 31003847, 43 | 'top_50_ans_author_name': '圭多达莱佐', 44 | 'top_50_ans_upvote_num': 31, 'more_ans_id': 39958704, 45 | 'more_ans_author_name': '柳蜻蜓', 46 | 'more_ans_upvote_num': 1, 47 | 'topics': ['心理学', '恋爱', '社会', '礼仪', 48 | '亲密关系'], 49 | } 50 | 51 | more_ans_file_path = os.path.join(TEST_DATA_PATH, 52 | 'question_more_answer.html') 53 | with open(more_ans_file_path, 'rb') as f: 54 | cls.more_ans_html = f.read() 55 | 56 | def test_id(self): 57 | self.assertEqual(self.expected['id'], self.question.id) 58 | 59 | def test_qid(self): 60 | self.assertEqual(self.expected['qid'], self.question.qid) 61 | 62 | def test_xsrf(self): 63 | self.assertEqual(self.expected['xsrf'], self.question.xsrf) 64 | 65 | def test_html(self): 66 | self.assertEqual(self.expected['html'], self.question.html) 67 | 68 | def test_title(self): 69 | self.assertEqual(self.expected['title'], self.question.title) 70 | 71 | def test_details(self): 72 | self.assertEqual(self.expected['details'], self.question.details) 73 | 74 | def test_answer_num(self): 75 | self.assertEqual(self.expected['answer_num'], self.question.answer_num) 76 | 77 | def test_follower_num(self): 78 | self.assertEqual(self.expected['follower_num'], 79 | self.question.follower_num) 80 | 81 | def test_topics(self): 82 | self.assertEqual(self.expected['topics'], self.question.topics) 83 | 84 | def test_top_answer(self): 85 | answer = self.question.top_answer 86 | self.assertEqual(self.expected['top_answer_id'], answer.id) 87 | self.assertEqual(self.expected['top_answer_author_name'], 88 | answer.author.name) 89 | self.assertEqual(self.expected['top_answer_upvote_num'], 90 | answer.upvote_num) 91 | 92 | def test_top_i_answer(self): 93 | answer = self.question.top_i_answer(50) 94 | self.assertEqual(self.expected['top_50_ans_id'], answer.id) 95 | self.assertEqual(self.expected['top_50_ans_author_name'], 96 | answer.author.name) 97 | self.assertEqual(self.expected['top_50_ans_upvote_num'], 98 | answer.upvote_num) 99 | 100 | def test_parse_answer_html(self): 101 | answer = self.question._parse_answer_html(self.more_ans_html) 102 | self.assertEqual(self.expected['more_ans_id'], answer.id) 103 | self.assertEqual(self.expected['more_ans_author_name'], 104 | answer.author.name) 105 | self.assertEqual(self.expected['more_ans_upvote_num'], 106 | answer.upvote_num) 107 | 108 | def test_top_i_answers(self): 109 | answers = [a for a in self.question.top_i_answers(1)] 110 | answer = answers[0] 111 | self.assertEqual(self.expected['top_answer_id'], answer.id) 112 | self.assertEqual(self.expected['top_answer_author_name'], 113 | answer.author.name) 114 | self.assertEqual(self.expected['top_answer_upvote_num'], 115 | answer.upvote_num) 116 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import inspect 3 | 4 | 5 | def module_path(local_function): 6 | ''' returns the module path without the use of __file__. 7 | Requires a function defined locally in the module. 8 | from "http://stackoverflow.com/questions/729583/ 9 | getting-file-path-of-imported-module"''' 10 | 11 | return os.path.abspath(inspect.getsourcefile(local_function)) 12 | 13 | 14 | TEST_DATA_PATH = os.path.join( 15 | os.path.split(module_path(module_path))[0], 'data') 16 | -------------------------------------------------------------------------------- /zhihu/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from .client import ZhihuClient 5 | from .question import Question 6 | from .author import Author, ANONYMOUS, BanException 7 | from .activity import Activity 8 | from .acttype import ActType, CollectActType 9 | from .answer import Answer 10 | from .collection import Collection 11 | from .column import Column 12 | from .post import Post 13 | from .topic import Topic 14 | 15 | __all__ = ['ZhihuClient', 'Question', 'Author', 'ActType', 'Activity', 16 | 'Answer', 'Collection', 'CollectActType', 'Column', 'Post', 'Topic', 17 | 'ANONYMOUS', 'BanException'] 18 | 19 | __version__ = '0.3.23' 20 | -------------------------------------------------------------------------------- /zhihu/activity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from datetime import datetime 5 | 6 | from .acttype import ActType 7 | from .answer import Answer 8 | from .author import Author, ANONYMOUS 9 | from .collection import Collection 10 | from .column import Column 11 | from .common import * 12 | from .post import Post 13 | from .question import Question 14 | from .topic import Topic 15 | 16 | 17 | class Activity: 18 | """用户动态类,请使用Author.activities获取.""" 19 | 20 | def __init__(self, act, session, author): 21 | """创建用户动态类实例. 22 | 23 | :param bs4.element.Tag act: 表示用户动态的页面元素 24 | :param Session session: 使用的网络会话 25 | :param Author author: Activity 所属的用户对象 26 | :return: 用户动态对象 27 | :rtype: Activity 28 | 29 | :说明: 30 | 根据Activity.type不同可以获取不同属性,具体请看 :class:`.ActType` 31 | 32 | """ 33 | self._session = session 34 | self._author = author 35 | self._type = ActType.from_str(act.attrs['data-type-detail']) 36 | 37 | useless_tag = act.div.find('a', class_='zg-link') 38 | if useless_tag is not None: 39 | useless_tag.extract() 40 | 41 | attribute = self._get_assemble_method(self.type)(act) 42 | self._attr = attribute.__class__.__name__.lower() 43 | setattr(self, self._attr, attribute) 44 | self._time = datetime.fromtimestamp(int(act['data-time'])) 45 | 46 | @property 47 | def type(self): 48 | """ 49 | :return: 用户动态类型, 具体参见 :class:`.ActType` 50 | :rtype: class:`.ActType` 51 | """ 52 | return self._type 53 | 54 | @property 55 | def content(self): 56 | """获取此对象中能提供的那个属性,对应表请查看 :class:`.ActType` 类. 57 | 58 | :return: 对象提供的对象 59 | :rtype: Author or Question or Answer or Topic or Column or Post 60 | """ 61 | return getattr(self, self._attr) 62 | 63 | @property 64 | def time(self): 65 | """ 66 | :return: 返回用户执行 Activity 操作的时间 67 | :rtype: datetime.datetime 68 | """ 69 | return self._time 70 | 71 | def __find_post(self, act): 72 | try: 73 | column_url = act.find('a', class_='column_link')['href'] 74 | column_name = act.find('a', class_='column_link').text 75 | column = Column(column_url, column_name, session=self._session) 76 | except TypeError: 77 | column = None 78 | try: 79 | author_tag = act.find('div', class_='author-info') 80 | author_url = Zhihu_URL + author_tag.a['href'] 81 | author_name = author_tag.a.text 82 | author_motto = author_tag.span.text if author_tag.span else '' 83 | author = Author(author_url, author_name, author_motto, 84 | session=self._session) 85 | except TypeError: 86 | author = ANONYMOUS 87 | post_url = act.find('a', class_='post-link')['href'] 88 | post_title = act.find('a', class_='post-link').text 89 | post_comment_num, post_upvote_num = self._parse_un_cn(act) 90 | return Post(post_url, column, author, post_title, 91 | post_upvote_num, post_comment_num, 92 | session=self._session) 93 | 94 | def _assemble_create_post(self, act): 95 | return self.__find_post(act) 96 | 97 | def _assemble_voteup_post(self, act): 98 | return self.__find_post(act) 99 | 100 | def _assemble_follow_column(self, act): 101 | return Column(act.div.a['href'], act.div.a.text, session=self._session) 102 | 103 | def _assemble_follow_topic(self, act): 104 | topic_url = Zhihu_URL + act.div.a['href'] 105 | topic_name = act.div.a['title'] 106 | return Topic(topic_url, topic_name, session=self._session) 107 | 108 | def _assemble_answer_question(self, act): 109 | question_url = Zhihu_URL + re_a2q.match( 110 | act.div.find_all('a')[-1]['href']).group(1) 111 | question_title = act.div.find_all('a')[-1].text.strip() 112 | question = Question(question_url, question_title, session=self._session) 113 | answer_url = Zhihu_URL + act.div.find_all('a')[-1]['href'] 114 | answer_comment_num, answer_upvote_num = self._parse_un_cn(act) 115 | return Answer(answer_url, question, self._author, answer_upvote_num, 116 | session=self._session) 117 | 118 | def _assemble_voteup_answer(self, act): 119 | question_url = Zhihu_URL + re_a2q.match(act.div.a['href']).group(1) 120 | question_title = act.div.a.text.strip() 121 | question = Question(question_url, question_title, session=self._session) 122 | try_find_author = act.find_all('a', class_='author-link', 123 | href=re.compile('^/people/[^/]*$')) 124 | 125 | if len(try_find_author) == 0: 126 | author_url = None 127 | author_name = '匿名用户' 128 | author_motto = '' 129 | else: 130 | try_find_author = try_find_author[-1] 131 | author_url = Zhihu_URL + try_find_author['href'] 132 | author_name = try_find_author.text 133 | try_find_motto = act.find('span', class_='bio') 134 | if try_find_motto is None: 135 | author_motto = '' 136 | else: 137 | author_motto = try_find_motto['title'] 138 | 139 | author = Author(author_url, author_name, author_motto, 140 | session=self._session) 141 | answer_url = Zhihu_URL + act.div.a['href'] 142 | answer_comment_num, answer_upvote_num = self._parse_un_cn(act) 143 | return Answer(answer_url, question, author, answer_upvote_num, 144 | session=self._session) 145 | 146 | def _assemble_ask_question(self, act): 147 | a = act.find("a", class_="question_link") 148 | url = Zhihu_URL + a['href'] 149 | title = a.text.strip(' \n') 150 | return Question(url, title, session=self._session) 151 | 152 | def _assemble_follow_question(self, act): 153 | return Question(Zhihu_URL + act.div.a['href'], act.div.a.text.strip(), 154 | session=self._session) 155 | 156 | def _assemble_follow_collection(self, act): 157 | url = act.div.a['href'] 158 | if not url.startswith('http'): 159 | url = Zhihu_URL + url 160 | return Collection(url, session=self._session) 161 | 162 | def _get_assemble_method(self, act_type): 163 | assemble_methods = { 164 | ActType.UPVOTE_POST: self._assemble_voteup_post, 165 | ActType.FOLLOW_COLUMN: self._assemble_follow_column, 166 | ActType.UPVOTE_ANSWER: self._assemble_voteup_answer, 167 | ActType.ANSWER_QUESTION: self._assemble_answer_question, 168 | ActType.ASK_QUESTION: self._assemble_ask_question, 169 | ActType.FOLLOW_QUESTION: self._assemble_follow_question, 170 | ActType.FOLLOW_TOPIC: self._assemble_follow_topic, 171 | ActType.PUBLISH_POST: self._assemble_create_post, 172 | ActType.FOLLOW_COLLECTION: self._assemble_follow_collection 173 | } 174 | 175 | if act_type in assemble_methods: 176 | return assemble_methods[act_type] 177 | else: 178 | raise ValueError('invalid activity type') 179 | 180 | @staticmethod 181 | def _parse_un_cn(act): 182 | upvote_num = act.find('a', class_='zm-item-vote-count').text 183 | if upvote_num.isdigit(): 184 | upvote_num = int(upvote_num) 185 | else: 186 | upvote_num = None 187 | comment = act.find('a', class_='toggle-comment') 188 | comment_text = next(comment.stripped_strings) 189 | comment_num_match = re_get_number.match(comment_text) 190 | comment_num = int( 191 | comment_num_match.group(1)) if comment_num_match is not None else 0 192 | return comment_num, upvote_num 193 | -------------------------------------------------------------------------------- /zhihu/acttype.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import enum 5 | 6 | match = { 7 | 'ANSWER_QUESTION': 'member_answer_question', 8 | 'UPVOTE_ANSWER': 'member_voteup_answer', 9 | 'ASK_QUESTION': 'member_ask_question', 10 | 'FOLLOW_QUESTION': 'member_follow_question', 11 | 'UPVOTE_POST': 'member_voteup_article', 12 | 'FOLLOW_COLUMN': 'member_follow_column', 13 | 'FOLLOW_TOPIC': 'member_follow_topic', 14 | 'PUBLISH_POST': 'member_create_article', 15 | 'FOLLOW_COLLECTION': 'member_follow_favlist' 16 | } 17 | 18 | reverse_match = {v: k for k, v in match.items()} 19 | 20 | 21 | class ActType(enum.Enum): 22 | 23 | """用于表示用户动态的类型. 24 | 25 | :常量说明: 26 | ================= ================ ============ ===================== 27 | 常量名 说明 提供属性 属性类型 28 | ================= ================ ============ ===================== 29 | ANSWER_QUESTION 回答了一个问题 answer :class:`.Answer` 30 | UPVOTE_ANSWER 赞同了一个回答 answer :class:`.Answer` 31 | ASK_QUESTION 提出了一个问题 question :class:`.Question` 32 | FOLLOW_QUESTION 关注了一个问题 question :class:`.Question` 33 | UPVOTE_POST 赞同了一篇文章 post :class:`.Post` 34 | FOLLOW_COLUMN 关注了一个专栏 column :class:`.Column` 35 | FOLLOW_TOPIC 关注了一个话题 topic :class:`.Topic` 36 | PUBLISH_POST 发表了一篇文章 post :class:`.Post` 37 | FOLLOW_COLLECTION 关注了一个收藏夹 collection :class:`.Collection` 38 | ================= ================ ============ ===================== 39 | 40 | """ 41 | 42 | ANSWER_QUESTION = 1 43 | UPVOTE_ANSWER = 2 44 | ASK_QUESTION = 4 45 | FOLLOW_QUESTION = 8 46 | UPVOTE_POST = 16 47 | FOLLOW_COLUMN = 32 48 | FOLLOW_TOPIC = 64 49 | PUBLISH_POST = 128 50 | FOLLOW_COLLECTION = 256 51 | 52 | @classmethod 53 | def from_str(cls, div_class): 54 | return cls.__getattr__(reverse_match[div_class]) 55 | 56 | def __str__(self): 57 | return match[self.name] 58 | 59 | 60 | class CollectActType(enum.Enum): 61 | """用于表示收藏夹操作的类型. 62 | 63 | :常量说明: 64 | ================= ============== 65 | 常量名 说明 66 | ================= ============== 67 | INSERT_ANSWER 在收藏夹中增加一个回答 68 | DELETE_ANSWER 在收藏夹中删除一个回答 69 | CREATE_COLLECTION 创建收藏夹 70 | ================= ============== 71 | """ 72 | INSERT_ANSWER = 1 73 | DELETE_ANSWER = 2 74 | CREATE_COLLECTION = 3 75 | -------------------------------------------------------------------------------- /zhihu/answer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | from datetime import datetime 6 | 7 | from .common import * 8 | from .base import BaseZhihu 9 | from .collection import Collection 10 | from .author import Author, ANONYMOUS 11 | 12 | 13 | class Answer(BaseZhihu): 14 | """答案类,请使用``ZhihuClient.answer``方法构造对象.""" 15 | 16 | @class_common_init(re_ans_url) 17 | def __init__(self, url, question=None, author=None, 18 | upvote_num=None, content=None, session=None): 19 | """创建答案类实例. 20 | 21 | :param str url: 答案url 22 | :param Question question: 答案所在的问题对象,可选 23 | :param Author author: 答案回答者对象,可选 24 | :param int upvote_num: 答案赞同数量,可选 25 | :param str content: 答案内容,可选 26 | :param Session session: 使用的网络会话,为空则使用新会话 27 | :return: 答案对象 28 | :rtype: Answer 29 | """ 30 | self.url = url 31 | self._session = session 32 | self._question = question 33 | self._author = author 34 | self._upvote_num = upvote_num 35 | self._content = content 36 | self._deleted = None 37 | 38 | @property 39 | def id(self): 40 | """答案的id 41 | 42 | :return: 答案id 43 | :rtype: int 44 | """ 45 | return int(re.match(r'.*/(\d+)/$', self.url).group(1)) 46 | 47 | @property 48 | @check_soup('_xsrf') 49 | def xsrf(self): 50 | """获取知乎的反xsrf参数(用不到就忽视吧~) 51 | 52 | :return: xsrf参数 53 | :rtype: str 54 | """ 55 | return self.soup.find('input', attrs={'name': '_xsrf'})['value'] 56 | 57 | @property 58 | @check_soup('_aid') 59 | def aid(self): 60 | """获取答案的内部id,某些POST操作需要此参数 61 | 62 | :return: 答案内部id 63 | :rtype: str 64 | """ 65 | return int(self.soup.find('div', class_='zm-item-answer')['data-aid']) 66 | 67 | @property 68 | @check_soup('_html') 69 | def html(self): 70 | """获取网页源码 71 | 72 | :return: 网页源码 73 | :rtype: str 74 | """ 75 | return self.soup.prettify() 76 | 77 | @property 78 | @check_soup('_author') 79 | def author(self): 80 | """获取答案作者. 81 | 82 | :return: 答案作者 83 | :rtype: Author 84 | """ 85 | from .author import Author 86 | 87 | author = self.soup.find('div', class_='zm-item-answer-author-info') 88 | url, name, motto, photo = parser_author_from_tag(author) 89 | if name == '匿名用户': 90 | return ANONYMOUS 91 | else: 92 | return Author(url, name, motto, photo_url=photo, 93 | session=self._session) 94 | 95 | @property 96 | @check_soup('_question') 97 | def question(self): 98 | """获取答案所在问题. 99 | 100 | :return: 答案所在问题 101 | :rtype: Question 102 | """ 103 | from .question import Question 104 | 105 | question_link = self.soup.find( 106 | "h2", class_="zm-item-title").a 107 | url = Zhihu_URL + question_link["href"] 108 | title = question_link.text.strip() 109 | followers_num = int(self.soup.find( 110 | 'div', class_='zh-question-followers-sidebar').div.a.strong.text) 111 | answers_num = int(re_get_number.match(self.soup.find( 112 | 'div', class_='zh-answers-title').h3.a.text).group(1)) 113 | return Question(url, title, followers_num, answers_num, 114 | session=self._session) 115 | 116 | @property 117 | @check_soup('_upvote_num') 118 | def upvote_num(self): 119 | """获取答案赞同数量. 120 | 121 | :return: 答案赞同数量 122 | :rtype: int 123 | """ 124 | return int(self.soup.find( 125 | 'div', class_='zm-item-vote-info')['data-votecount']) 126 | 127 | @property 128 | def upvoters(self): 129 | """获取答案点赞用户,返回生成器. 130 | 131 | :return: 点赞用户 132 | :rtype: Author.Iterable 133 | """ 134 | self._make_soup() 135 | next_req = '/answer/' + str(self.aid) + '/voters_profile' 136 | while next_req != '': 137 | data = self._session.get(Zhihu_URL + next_req).json() 138 | next_req = data['paging']['next'] 139 | for html in data['payload']: 140 | soup = BeautifulSoup(html) 141 | yield self._parse_author_soup(soup) 142 | 143 | @property 144 | @check_soup('_content') 145 | def content(self): 146 | """以处理过的Html代码形式返回答案内容. 147 | 148 | :return: 答案内容 149 | :rtype: str 150 | """ 151 | answer_wrap = self.soup.find('div', id='zh-question-answer-wrap') 152 | content = answer_wrap.find('div', class_='zm-editable-content') 153 | content = answer_content_process(content) 154 | return content 155 | 156 | @property 157 | @check_soup('_creation_time') 158 | def creation_time(self): 159 | """获取答案创建时间 160 | 161 | :return: 答案创建时间 162 | :rtype: datetime.datetime 163 | """ 164 | return datetime.fromtimestamp(int(self.soup.find( 165 | 'div', class_='zm-item-answer')['data-created'])) 166 | 167 | @property 168 | @check_soup('_collect_num') 169 | def collect_num(self): 170 | """获取答案收藏数 171 | 172 | :return: 答案收藏数量 173 | :rtype: int 174 | """ 175 | element = self.soup.find("a", { 176 | "data-za-a": "click_answer_collected_count" 177 | }) 178 | if element is None: 179 | return 0 180 | else: 181 | return int(element.get_text()) 182 | 183 | @property 184 | def collections(self): 185 | """获取包含该答案的收藏夹 186 | 187 | :return: 包含该答案的收藏夹 188 | :rtype: Collection.Iterable 189 | 190 | collect_num 未必等于 len(collections),比如: 191 | https://www.zhihu.com/question/20064699/answer/13855720 192 | 显示被收藏 38 次,但只有 30 个收藏夹 193 | """ 194 | import time 195 | gotten_feed_num = 20 196 | offset = 0 197 | data = { 198 | 'method':'next', 199 | '_xsrf': self.xsrf 200 | } 201 | while gotten_feed_num >= 10: 202 | data['params'] = "{\"answer_url\": %d,\"offset\": %d}" % (self.id, offset) 203 | res = self._session.post(url=Get_Collection_Url, data=data) 204 | gotten_feed_num = len(res.json()['msg']) 205 | offset += gotten_feed_num 206 | soup = BeautifulSoup(''.join(res.json()['msg'])) 207 | for zm_item in soup.find_all('div', class_='zm-item'): 208 | url = Zhihu_URL + zm_item.h2.a['href'] 209 | name = zm_item.h2.a.text 210 | links = zm_item.div.find_all('a') 211 | owner = Author(links[0]['href'], session=self._session) 212 | follower_num = int(links[1].text.split()[0]) 213 | yield Collection(url, owner=owner, name=name, 214 | follower_num=follower_num, 215 | session=self._session) 216 | 217 | time.sleep(0.2) # prevent from posting too quickly 218 | 219 | def save(self, filepath=None, filename=None, mode="html"): 220 | """保存答案为Html文档或markdown文档. 221 | 222 | :param str filepath: 要保存的文件所在的目录, 223 | 不填为当前目录下以问题标题命名的目录, 设为"."则为当前目录。 224 | :param str filename: 要保存的文件名, 225 | 不填则默认为 所在问题标题 - 答主名.html/md。 226 | 如果文件已存在,自动在后面加上数字区分。 227 | **自定义文件名时请不要输入后缀 .html 或 .md。** 228 | :param str mode: 保存类型,可选 `html` 、 `markdown` 、 `md` 。 229 | :return: 无 230 | :rtype: None 231 | """ 232 | if mode not in ["html", "md", "markdown"]: 233 | raise ValueError("`mode` must be 'html', 'markdown' or 'md'," 234 | " got {0}".format(mode)) 235 | file = get_path(filepath, filename, mode, self.question.title, 236 | self.question.title + '-' + self.author.name) 237 | with open(file, 'wb') as f: 238 | if mode == "html": 239 | f.write(self.content.encode('utf-8')) 240 | else: 241 | import html2text 242 | h2t = html2text.HTML2Text() 243 | h2t.body_width = 0 244 | f.write(h2t.handle(self.content).encode('utf-8')) 245 | 246 | def _parse_author_soup(self, soup): 247 | from .author import Author, ANONYMOUS 248 | 249 | author_tag = soup.find('div', class_='body') 250 | if author_tag.string is None: 251 | author_name = author_tag.div.a['title'] 252 | author_url = author_tag.div.a['href'] 253 | author_motto = author_tag.div.span.text 254 | photo_url = PROTOCOL + soup.a.img['src'].replace('_m', '_r') 255 | numbers_tag = soup.find_all('li') 256 | numbers = [int(re_get_number.match(x.get_text()).group(1)) 257 | for x in numbers_tag] 258 | # noinspection PyTypeChecker 259 | return Author(author_url, author_name, author_motto, None, 260 | numbers[2], numbers[3], numbers[0], numbers[1], 261 | photo_url, session=self._session) 262 | else: 263 | return ANONYMOUS 264 | 265 | @property 266 | @check_soup('_comment_num') 267 | def comment_num(self): 268 | """ 269 | :return: 答案下评论的数量 270 | :rtype: int 271 | """ 272 | comment = self.soup.select_one("div.answer-actions a.toggle-comment") 273 | comment_num_string = comment.text 274 | number = comment_num_string.split()[0] 275 | return int(number) if number.isdigit() else 0 276 | 277 | @property 278 | def comments(self): 279 | """获取答案下的所有评论. 280 | 281 | :return: 答案下的所有评论,返回生成器 282 | :rtype: Comments.Iterable 283 | """ 284 | import math 285 | from .author import Author, ANONYMOUS 286 | from .comment import Comment 287 | 288 | api_url = Get_Answer_Comment_URL.format(self.aid) 289 | page = pages = 1 290 | while page <= pages: 291 | res = self._session.get(api_url + '?page=' + str(page)) 292 | if page == 1: 293 | total = int(res.json()['paging']['totalCount']) 294 | if total == 0: 295 | return 296 | pages = math.ceil(total / 30) 297 | page += 1 298 | 299 | comment_items = res.json()['data'] 300 | for comment_item in comment_items: 301 | comment_id = comment_item['id'] 302 | content = comment_item['content'] 303 | upvote_num = comment_item['likesCount'] 304 | time_string = comment_item['createdTime'][:19] 305 | time = datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S") 306 | 307 | if comment_item['author'].get('url') is not None: 308 | a_url = comment_item['author']['url'] 309 | a_name = comment_item['author']['name'] 310 | photo_url_tmp = comment_item['author']['avatar']['template'] 311 | photo_url_id = comment_item['author']['avatar']['id'] 312 | a_photo_url = photo_url_tmp.replace( 313 | '{id}', photo_url_id).replace('_{size}', '') 314 | author_obj = Author(a_url, a_name, photo_url=a_photo_url, 315 | session=self._session) 316 | else: 317 | author_obj = ANONYMOUS 318 | 319 | yield Comment(comment_id, self, author_obj, upvote_num, content, time) 320 | 321 | @property 322 | def latest_comments(self): 323 | """获取答案下的所有评论。较新的评论先返回。 324 | 使用该方法比 ``reversed(list(answer.comments))`` 效率高 325 | 因为现在靠后的热门评论会被挪到前面,所以返回的评论未必严格满足时间先后关系 326 | 327 | :return: 答案下的所有评论,返回生成器 328 | :rtype: Comments.Iterable 329 | """ 330 | import math 331 | from .author import Author, ANONYMOUS 332 | from .comment import Comment 333 | 334 | if self.comment_num == 0: 335 | return 336 | pages = math.ceil(self.comment_num / 30) 337 | api_url = Get_Answer_Comment_URL.format(self.aid) 338 | for page in range(pages, 0, -1): 339 | res = self._session.get(api_url + '?page=' + str(page)) 340 | comment_items = res.json()['data'] 341 | for comment_item in reversed(comment_items): 342 | comment_id = comment_item['id'] 343 | content = comment_item['content'] 344 | upvote_num = comment_item['likesCount'] 345 | time_string = comment_item['createdTime'][:19] 346 | time = datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S") 347 | 348 | if comment_item['author'].get('url') != None: 349 | a_url = comment_item['author']['url'] 350 | a_name = comment_item['author']['name'] 351 | photo_url_tmp = comment_item['author']['avatar']['template'] 352 | photo_url_id = comment_item['author']['avatar']['id'] 353 | a_photo_url = photo_url_tmp.replace( 354 | '{id}', photo_url_id).replace('_{size}', '') 355 | author_obj = Author(a_url, a_name, photo_url=a_photo_url, 356 | session=self._session) 357 | else: 358 | author_obj = ANONYMOUS 359 | 360 | yield Comment(comment_id, self, author_obj, upvote_num, content, time) 361 | 362 | def refresh(self): 363 | """刷新 Answer object 的属性. 364 | 例如赞同数增加了, 先调用 ``refresh()`` 365 | 再访问 upvote_num属性, 可获得更新后的赞同数. 366 | 367 | :return: None 368 | """ 369 | super().refresh() 370 | self._html = None 371 | self._upvote_num = None 372 | self._content = None 373 | self._collect_num = None 374 | self._comment_num = None 375 | 376 | @property 377 | @check_soup('_deleted') 378 | def deleted(self): 379 | """答案是否被删除, 被删除了返回 True, 为被删除返回 False 380 | :return: True or False 381 | """ 382 | return self._deleted 383 | -------------------------------------------------------------------------------- /zhihu/base.py: -------------------------------------------------------------------------------- 1 | from .common import BeautifulSoup 2 | from requests import Response 3 | import json 4 | 5 | 6 | class BaseZhihu: 7 | def _gen_soup(self, content): 8 | self.soup = BeautifulSoup(content) 9 | 10 | def _get_content(self): 11 | # use _url for question 12 | url = self._url if hasattr(self, '_url') else self.url 13 | if url.endswith('/'): 14 | resp = self._session.get(url[:-1]) 15 | else: 16 | resp = self._session.get(url) 17 | 18 | class_name = self.__class__.__name__ 19 | if class_name == 'Answer': 20 | if 'answer' in resp.url: 21 | self._deleted = False 22 | else: 23 | self._deleted = True 24 | elif class_name == 'Question': 25 | self._deleted = resp.status_code == 404 26 | 27 | return resp.content 28 | 29 | def _make_soup(self): 30 | if self.url and not self.soup: 31 | self._gen_soup(self._get_content()) 32 | 33 | def refresh(self): 34 | # refresh self.soup's content 35 | self._gen_soup(self._get_content()) 36 | 37 | @classmethod 38 | def from_html(cls, content): 39 | obj = cls(url=None) 40 | obj._gen_soup(content) 41 | return obj 42 | 43 | 44 | class JsonAsSoupMixin: 45 | def _gen_soup(self, content): 46 | # 为了让`from_html`对外提供统一的接口, 判断一下输入, 如果是bytes 或者 str 则用json处理, 47 | # 否则认为是由_get_content返回的dict 48 | 49 | if isinstance(content, bytes): 50 | r = Response() 51 | r._content = content 52 | soup = r.json() 53 | self.soup = soup 54 | elif isinstance(content, str): 55 | self.soup = json.loads(content) 56 | else: 57 | self.soup = content 58 | -------------------------------------------------------------------------------- /zhihu/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import getpass 5 | import importlib 6 | import json 7 | import time 8 | from urllib.parse import urlencode 9 | 10 | import requests 11 | 12 | from .common import * 13 | 14 | 15 | class ZhihuClient: 16 | 17 | """知乎客户端类,内部维护了自己专用的网络会话,可用cookies或账号密码登录.""" 18 | 19 | def __init__(self, cookies=None): 20 | """创建客户端类实例. 21 | 22 | :param str cookies: 见 :meth:`.login_with_cookies` 中 ``cookies`` 参数 23 | :return: 知乎客户端对象 24 | :rtype: ZhihuClient 25 | """ 26 | self._session = requests.Session() 27 | self._session.headers.update(Default_Header) 28 | self.proxies = None 29 | if cookies is not None: 30 | assert isinstance(cookies, str) 31 | self.login_with_cookies(cookies) 32 | 33 | # ===== login staff ===== 34 | 35 | @staticmethod 36 | def _get_captcha_url(): 37 | params = { 38 | 'r': str(int(time.time() * 1000)), 39 | 'type': 'login', 40 | } 41 | return Captcha_URL + '?' + urlencode(params) 42 | 43 | def get_captcha(self): 44 | """获取验证码数据。 45 | 46 | :return: 验证码图片数据。 47 | :rtype: bytes 48 | """ 49 | self._session.get(Zhihu_URL) 50 | r = self._session.get(self._get_captcha_url()) 51 | return r.content 52 | 53 | def login(self, email, password, captcha=None): 54 | """登陆知乎. 55 | 56 | :param str email: 邮箱 57 | :param str password: 密码 58 | :param str captcha: 验证码, 默认为None,表示不提交验证码 59 | :return: 60 | ======== ======== ============== ==================== 61 | 元素序号 元素类型 意义 说明 62 | ======== ======== ============== ==================== 63 | 0 int 是否成功 0为成功,1为失败 64 | 1 str 失败原因 登录成功则为空字符串 65 | 2 str cookies字符串 登录失败则为空字符串 66 | ======== ======== ============== ==================== 67 | 68 | :rtype: (int, str, str) 69 | """ 70 | data = {'email': email, 'password': password, 71 | 'remember_me': 'true'} 72 | if captcha is not None: 73 | data['captcha'] = captcha 74 | r = self._session.post(Login_URL, data=data) 75 | j = r.json() 76 | code = int(j['r']) 77 | message = j['msg'] 78 | cookies_str = json.dumps(self._session.cookies.get_dict()) \ 79 | if code == 0 else '' 80 | return code, message, cookies_str 81 | 82 | def login_with_cookies(self, cookies): 83 | """使用cookies文件或字符串登录知乎 84 | 85 | :param str cookies: 86 | ============== =========================== 87 | 参数形式 作用 88 | ============== =========================== 89 | 文件名 将文件内容作为cookies字符串 90 | cookies 字符串 直接提供cookies字符串 91 | ============== =========================== 92 | :return: 无 93 | :rtype: None 94 | """ 95 | if os.path.isfile(cookies): 96 | with open(cookies) as f: 97 | cookies = f.read() 98 | cookies_dict = json.loads(cookies) 99 | self._session.cookies.update(cookies_dict) 100 | 101 | def login_in_terminal(self, need_captcha=False, use_getpass=True): 102 | """不使用cookies,在终端中根据提示登陆知乎 103 | 104 | :param bool need_captcha: 是否要求输入验证码,如果登录失败请设为 True 105 | :param bool use_getpass: 是否使用安全模式输入密码,默认为 True, 106 | 如果在某些 Windows IDE 中无法正常输入密码,请把此参数设置为 False 试试 107 | :return: 如果成功返回cookies字符串 108 | :rtype: str 109 | """ 110 | print('====== zhihu login =====') 111 | 112 | email = input('email: ') 113 | if use_getpass: 114 | password = getpass.getpass('password: ') 115 | else: 116 | password = input("password: ") 117 | 118 | if need_captcha: 119 | captcha_data = self.get_captcha() 120 | with open('captcha.gif', 'wb') as f: 121 | f.write(captcha_data) 122 | 123 | print('please check captcha.gif for captcha') 124 | captcha = input('captcha: ') 125 | os.remove('captcha.gif') 126 | else: 127 | captcha = None 128 | 129 | print('====== logging.... =====') 130 | 131 | code, msg, cookies = self.login(email, password, captcha) 132 | 133 | if code == 0: 134 | print('login successfully') 135 | else: 136 | print('login failed, reason: {0}'.format(msg)) 137 | 138 | return cookies 139 | 140 | def create_cookies(self, file, need_captcha=False, use_getpass=True): 141 | """在终端中执行登录流程,将 cookies 存放在文件中以便后续使用 142 | 143 | :param str file: 文件名 144 | :param bool need_captcha: 登录过程中是否使用验证码, 默认为 False 145 | :param bool use_getpass: 是否使用安全模式输入密码,默认为 True, 146 | 如果在某些 Windows IDE 中无法正常输入密码,请把此参数设置为 False 试试 147 | :return: 148 | """ 149 | cookies_str = self.login_in_terminal(need_captcha, use_getpass) 150 | if cookies_str: 151 | with open(file, 'w') as f: 152 | f.write(cookies_str) 153 | print('cookies file created.') 154 | else: 155 | print('can\'t create cookies.') 156 | 157 | # ===== network staff ===== 158 | 159 | def set_proxy(self, proxy): 160 | """设置代理 161 | 162 | :param str proxy: 使用 "http://example.com:port" 的形式 163 | :return: 无 164 | :rtype: None 165 | 166 | :说明: 167 | 由于一个 :class:`.ZhihuClient` 对象和它创建出来的其他知乎对象共用 168 | 一个Session,所以调用这个方法也会将所有生成出的知乎类设置上代理。 169 | """ 170 | self._session.proxies.update({'http': proxy}) 171 | 172 | def set_proxy_pool(self, proxies, auth=None, https=True): 173 | """设置代理池 174 | 175 | :param proxies: proxy列表, 形如 ``["ip1:port1", "ip2:port2"]`` 176 | :param auth: 如果代理需要验证身份, 通过这个参数提供, 比如 177 | :param https: 默认为 True, 传入 False 则不设置 https 代理 178 | .. code-block:: python 179 | 180 | from requests.auth import HTTPProxyAuth 181 | auth = HTTPProxyAuth('laike9m', '123') 182 | :说明: 183 | 每次 GET/POST 请求会随机选择列表中的代理 184 | """ 185 | from random import choice 186 | 187 | if https: 188 | self.proxies = [{'http': p, 'https': p} for p in proxies] 189 | else: 190 | self.proxies = [{'http': p} for p in proxies] 191 | 192 | def get_with_random_proxy(url, **kwargs): 193 | proxy = choice(self.proxies) 194 | kwargs['proxies'] = proxy 195 | if auth: 196 | kwargs['auth'] = auth 197 | return self._session.original_get(url, **kwargs) 198 | 199 | def post_with_random_proxy(url, *args, **kwargs): 200 | proxy = choice(self.proxies) 201 | kwargs['proxies'] = proxy 202 | if auth: 203 | kwargs['auth'] = auth 204 | return self._session.original_post(url, *args, **kwargs) 205 | 206 | self._session.original_get = self._session.get 207 | self._session.get = get_with_random_proxy 208 | self._session.original_post = self._session.post 209 | self._session.post = post_with_random_proxy 210 | 211 | def remove_proxy_pool(self): 212 | """ 213 | 移除代理池 214 | """ 215 | self.proxies = None 216 | self._session.get = self._session.original_get 217 | self._session.post = self._session.original_post 218 | del self._session.original_get 219 | del self._session.original_post 220 | 221 | # ===== getter staff ====== 222 | 223 | def me(self): 224 | """获取使用特定 cookies 的 Me 实例 225 | 226 | :return: cookies对应的Me对象 227 | :rtype: Me 228 | """ 229 | from .me import Me 230 | headers = dict(Default_Header) 231 | headers['Host'] = 'zhuanlan.zhihu.com' 232 | res = self._session.get(Get_Me_Info_Url, headers=headers) 233 | json_data = res.json() 234 | url = json_data['profileUrl'] 235 | name = json_data['name'] 236 | motto = json_data['bio'] 237 | photo = json_data['avatar']['template'].format( 238 | id=json_data['avatar']['id'], size='r') 239 | return Me(url, name, motto, photo, session=self._session) 240 | 241 | def __getattr__(self, item: str): 242 | """本函数用于获取各种类,如 `Answer` `Question` 等. 243 | 244 | :支持的形式有: 245 | 1. client.answer() 246 | 2. client.author() 247 | 3. client.collection() 248 | 4. client.column() 249 | 5. client.post() 250 | 6. client.question() 251 | 7. client.topic() 252 | 253 | 参数均为对应页面的url,返回对应的类的实例。 254 | """ 255 | def getter(url): 256 | return getattr(module, item.capitalize())(url, 257 | session=self._session) 258 | attr_list = ['answer', 'author', 'collection', 259 | 'column', 'post', 'question', 'topic'] 260 | if item.lower() in attr_list: 261 | module = importlib.import_module('.'+item.lower(), 'zhihu') 262 | return getter 263 | -------------------------------------------------------------------------------- /zhihu/collection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from .common import * 5 | from .base import BaseZhihu 6 | 7 | 8 | class Collection(BaseZhihu): 9 | 10 | """收藏夹,请使用``ZhihuClient.collection``方法构造对象.""" 11 | 12 | @class_common_init(re_collection_url) 13 | def __init__(self, url, owner=None, name=None, follower_num=None, 14 | session=None): 15 | """创建收藏夹类实例. 16 | 17 | :param str url: 收藏夹主页url,必须 18 | :param Author owner: 收藏夹拥有者,可选 19 | :param str name: 收藏夹标题,可选 20 | :param int follower_num: 收藏夹关注人数,可选 21 | :param Session session: 使用的网络会话,为空则使用新会话。 22 | :return: 收藏夹对象 23 | :rtype: Collection 24 | """ 25 | self.url = url 26 | self._session = session 27 | self.soup = None 28 | self._name = name 29 | self._owner = owner 30 | self._follower_num = follower_num 31 | self._id = int(re.match(r'.*/(\d+)', self.url).group(1)) 32 | 33 | @property 34 | def id(self): 35 | """获取收藏夹id(网址最后的部分). 36 | 37 | :return: 收藏夹id 38 | :rtype: int 39 | """ 40 | return self._id 41 | 42 | @property 43 | @check_soup('_cid') 44 | def cid(self): 45 | """获取收藏夹内部Id(用不到忽视就好) 46 | 47 | :return: 内部Id 48 | :rtype: int 49 | """ 50 | return int(re_get_number.match( 51 | self.soup.find('a', attrs={'name': 'focus'})['id']).group(1)) 52 | 53 | @property 54 | @check_soup('_xsrf') 55 | def xsrf(self): 56 | """获取知乎的反xsrf参数(用不到就忽视吧~) 57 | 58 | :return: xsrf参数 59 | :rtype: str 60 | """ 61 | return self.soup.find( 62 | 'input', attrs={'name': '_xsrf'})['value'] 63 | 64 | @property 65 | @check_soup('_name') 66 | def name(self): 67 | """获取收藏夹名字. 68 | 69 | :return: 收藏夹名字 70 | :rtype: str 71 | """ 72 | return re_del_empty_line.match( 73 | self.soup.find('h2', id='zh-fav-head-title').text).group(1) 74 | 75 | @property 76 | @check_soup('_owner') 77 | def owner(self): 78 | """获取收藏夹拥有者,返回Author对象. 79 | 80 | :return: 收藏夹拥有者 81 | :rtype: Author 82 | """ 83 | from .author import Author 84 | 85 | a = self.soup.find('h2', class_='zm-list-content-title').a 86 | name = a.text 87 | url = Zhihu_URL + a['href'] 88 | motto = self.soup.find( 89 | 'div', id='zh-single-answer-author-info').div.text 90 | photo_url = PROTOCOL + self.soup.find( 91 | 'img', class_='zm-list-avatar-medium')['src'].replace('_m', '_r') 92 | return Author(url, name, motto, photo_url=photo_url, 93 | session=self._session) 94 | 95 | @property 96 | @check_soup('_follower_num') 97 | def follower_num(self): 98 | """获取关注此收藏夹的人数. 99 | 100 | :return: 关注此收藏夹的人数 101 | :rtype: int 102 | """ 103 | href = re_collection_url_split.match(self.url).group(1) 104 | return int(self.soup.find('a', href=href + 'followers').text) 105 | 106 | @property 107 | def followers(self): 108 | """获取关注此收藏夹的用户 109 | 110 | :return: 关注此收藏夹的用户 111 | :rtype: Author.Iterable 112 | """ 113 | self._make_soup() 114 | followers_url = self.url + 'followers' 115 | for x in common_follower(followers_url, self.xsrf, self._session): 116 | yield x 117 | 118 | @property 119 | def questions(self): 120 | """获取收藏夹内所有问题对象. 121 | 122 | :return: 收藏夹内所有问题,返回生成器 123 | :rtype: Question.Iterable 124 | """ 125 | self._make_soup() 126 | # noinspection PyTypeChecker 127 | for question in self._page_get_questions(self.soup): 128 | yield question 129 | i = 2 130 | while True: 131 | soup = BeautifulSoup(self._session.get( 132 | self.url[:-1] + '?page=' + str(i)).text) 133 | for question in self._page_get_questions(soup): 134 | if question == 0: 135 | return 136 | yield question 137 | i += 1 138 | 139 | @property 140 | def answers(self): 141 | """获取收藏夹内所有答案对象. 142 | 143 | :return: 收藏夹内所有答案,返回生成器 144 | :rtype: Answer.Iterable 145 | """ 146 | self._make_soup() 147 | # noinspection PyTypeChecker 148 | for answer in self._page_get_answers(self.soup): 149 | yield answer 150 | i = 2 151 | while True: 152 | soup = BeautifulSoup(self._session.get( 153 | self.url[:-1] + '?page=' + str(i)).text) 154 | for answer in self._page_get_answers(soup): 155 | if answer == 0: 156 | return 157 | yield answer 158 | i += 1 159 | 160 | @property 161 | def logs(self): 162 | """获取收藏夹日志 163 | 164 | :return: 收藏夹日志中的操作,返回生成器 165 | :rtype: CollectActivity.Iterable 166 | """ 167 | import time 168 | from datetime import datetime 169 | from .answer import Answer 170 | from .question import Question 171 | from .acttype import CollectActType 172 | 173 | self._make_soup() 174 | gotten_feed_num = 20 175 | offset = 0 176 | data = { 177 | 'start': 0, 178 | '_xsrf': self.xsrf 179 | } 180 | api_url = self.url + 'log' 181 | while gotten_feed_num == 20: 182 | data['offset'] = offset 183 | res = self._session.post(url=api_url, data=data) 184 | gotten_feed_num = res.json()['msg'][0] 185 | soup = BeautifulSoup(res.json()['msg'][1]) 186 | offset += gotten_feed_num 187 | zm_items = soup.find_all('div', class_='zm-item') 188 | 189 | for zm_item in zm_items: 190 | act_time = datetime.strptime(zm_item.find('time').text, "%Y-%m-%d %H:%M:%S") 191 | if zm_item.find('ins'): 192 | link = zm_item.find('ins').a 193 | act_type = CollectActType.INSERT_ANSWER 194 | elif zm_item.find('del'): 195 | link = zm_item.find('del').a 196 | act_type = CollectActType.DELETE_ANSWER 197 | else: 198 | continue 199 | try: 200 | answer_url = Zhihu_URL + link['href'] 201 | question_url = re_a2q.match(answer_url).group(1) 202 | question = Question(question_url, link.text) 203 | answer = Answer( 204 | answer_url, question, session=self._session) 205 | yield CollectActivity( 206 | act_type, act_time, self.owner, self, answer) 207 | except AttributeError: 208 | act_type = CollectActType.CREATE_COLLECTION 209 | yield CollectActivity( 210 | act_type, act_time, self.owner, self) 211 | data['start'] = zm_items[-1]['id'][8:] 212 | time.sleep(0.5) 213 | 214 | def _page_get_questions(self, soup): 215 | from .question import Question 216 | 217 | question_tags = soup.find_all("div", class_="zm-item") 218 | if len(question_tags) == 0: 219 | yield 0 220 | return 221 | else: 222 | for question_tag in question_tags: 223 | if question_tag.h2 is not None: 224 | question_title = question_tag.h2.a.text 225 | question_url = Zhihu_URL + question_tag.h2.a['href'] 226 | yield Question(question_url, question_title, 227 | session=self._session) 228 | 229 | def _page_get_answers(self, soup): 230 | from .question import Question 231 | from .author import Author, ANONYMOUS 232 | from .answer import Answer 233 | 234 | answer_tags = soup.find_all("div", class_="zm-item") 235 | if len(answer_tags) == 0: 236 | yield 0 237 | return 238 | else: 239 | question = None 240 | for tag in answer_tags: 241 | # 判断是否是'建议修改的回答'等情况 242 | url_tag = tag.find('a', class_='answer-date-link') 243 | if url_tag is None: 244 | reason = tag.find('div', id='answer-status').p.text 245 | print("pass a answer, reason %s ." % reason) 246 | continue 247 | if tag.h2 is not None: 248 | question_title = tag.h2.a.text 249 | question_url = Zhihu_URL + tag.h2.a['href'] 250 | question = Question(question_url, question_title, 251 | session=self._session) 252 | answer_url = Zhihu_URL + url_tag['href'] 253 | div = tag.find('div', class_='zm-item-answer-author-info') 254 | author_link = div.find('a', class_='author-link') 255 | if author_link is not None: 256 | author_url = Zhihu_URL + author_link['href'] 257 | author_name = author_link.text 258 | motto_span = div.find('span', class_='bio') 259 | author_motto = motto_span['title'] if motto_span else '' 260 | author = Author(author_url, author_name, author_motto, 261 | session=self._session) 262 | else: 263 | author = ANONYMOUS 264 | upvote_num = tag.find('a', class_='zm-item-vote-count').text 265 | if upvote_num.isdigit(): 266 | upvote_num = int(upvote_num) 267 | else: 268 | upvote_num = None 269 | answer = Answer(answer_url, question, author, 270 | upvote_num, session=self._session) 271 | yield answer 272 | 273 | 274 | class CollectActivity: 275 | """收藏夹操作, 请使用``Collection.logs``构造对象.""" 276 | 277 | def __init__(self, type, time, owner, collection, answer=None): 278 | """创建收藏夹操作类实例 279 | 280 | :param acttype.CollectActType type: 操作类型 281 | :param datetime.datetime time: 进行操作的时间 282 | :param Author owner: 收藏夹的拥有者 283 | :param Collection collection: 所属收藏夹 284 | :param Answer answer: 收藏的答案,可选 285 | :return: CollectActivity 286 | """ 287 | self._type = type 288 | self._time = time 289 | self._owner = owner 290 | self._collection = collection 291 | self._answer = answer 292 | 293 | @property 294 | def type(self): 295 | """ 296 | :return: 收藏夹操作类型, 具体参见 :class:`.CollectActType` 297 | :rtype: :class:`.CollectActType` 298 | """ 299 | return self._type 300 | 301 | @property 302 | def answer(self): 303 | """ 304 | :return: 添加或删除收藏的答案, 若是创建收藏夹操作返回 None 305 | :rtype: Answer or None 306 | """ 307 | return self._answer 308 | 309 | @property 310 | def time(self): 311 | """ 312 | :return: 进行操作的时间 313 | :rtype: datetime.datetime 314 | """ 315 | return self._time 316 | 317 | @property 318 | def owner(self): 319 | """ 320 | :return: 收藏夹的拥有者 321 | :rtype: Author 322 | """ 323 | return self._owner 324 | 325 | @property 326 | def collection(self): 327 | """ 328 | :return: 所属收藏夹 329 | :rtype: Collection 330 | """ 331 | return self._collection 332 | -------------------------------------------------------------------------------- /zhihu/column.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from .common import * 5 | from .base import BaseZhihu, JsonAsSoupMixin 6 | 7 | 8 | class Column(JsonAsSoupMixin, BaseZhihu): 9 | 10 | """专栏类,请使用``ZhihuClient.column``方法构造对象.""" 11 | 12 | @class_common_init(re_column_url) 13 | def __init__(self, url, name=None, follower_num=None, 14 | post_num=None, session=None): 15 | """创建专栏类实例. 16 | 17 | :param str url: 专栏url 18 | :param str name: 专栏名,可选 19 | :param int follower_num: 关注者数量,可选 20 | :param int post_num: 文章数量,可选 21 | :param Session session: 使用的网络会话,为空则使用新会话。 22 | :return: 专栏对象 23 | :rtype: Column 24 | """ 25 | self._in_name = re_column_url.match(url).group(1) 26 | self.url = url 27 | self._session = session 28 | self._name = name 29 | self._follower_num = follower_num 30 | self._post_num = post_num 31 | 32 | def _make_soup(self): 33 | if self.soup is None: 34 | json = self._get_content() 35 | self._gen_soup(json) 36 | 37 | def _get_content(self): 38 | origin_host = self._session.headers.get('Host') 39 | self._session.headers.update(Host='zhuanlan.zhihu.com') 40 | res = self._session.get(Column_Data.format(self._in_name)) 41 | self._session.headers.update(Host=origin_host) 42 | return res.json() 43 | 44 | @property 45 | @check_soup('_name') 46 | def name(self): 47 | """获取专栏名称. 48 | 49 | :return: 专栏名称 50 | :rtype: str 51 | """ 52 | return self.soup['name'] 53 | 54 | @property 55 | @check_soup('_follower_num') 56 | def follower_num(self): 57 | """获取关注人数. 58 | 59 | :return: 关注人数 60 | :rtype: int 61 | """ 62 | return int(self.soup['followersCount']) 63 | 64 | @property 65 | @check_soup('_post_num') 66 | def post_num(self): 67 | """获取专栏文章数. 68 | 69 | :return: 专栏文章数 70 | :rtype: int 71 | """ 72 | return int(self.soup['postsCount']) 73 | 74 | @property 75 | def posts(self): 76 | """获取专栏的所有文章. 77 | 78 | :return: 专栏所有文章,返回生成器 79 | :rtype: Post.Iterable 80 | """ 81 | origin_host = self._session.headers.get('Host') 82 | for offset in range(0, (self.post_num - 1) // 10 + 1): 83 | self._session.headers.update(Host='zhuanlan.zhihu.com') 84 | res = self._session.get( 85 | Column_Posts_Data.format(self._in_name, offset * 10)) 86 | soup = res.json() 87 | self._session.headers.update(Host=origin_host) 88 | for post in soup: 89 | yield self._parse_post_data(post) 90 | 91 | def _parse_post_data(self, post): 92 | from .author import Author 93 | from .post import Post 94 | 95 | url = Column_Url + post['url'] 96 | template = post['author']['avatar']['template'] 97 | photo_id = post['author']['avatar']['id'] 98 | photo_url = template.format(id=photo_id, size='r') 99 | author = Author(post['author']['profileUrl'], 100 | post['author']['name'], post['author']['bio'], 101 | photo_url=photo_url, session=self._session) 102 | title = post['title'] 103 | upvote_num = post['likesCount'] 104 | comment_num = post['commentsCount'] 105 | print(url) 106 | return Post(url, self, author, title, upvote_num, comment_num, 107 | session=self._session) 108 | -------------------------------------------------------------------------------- /zhihu/comment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class Comment: 6 | 7 | """评论类,一般不直接使用,而是作为``Answer.comments``迭代器的返回类型.""" 8 | 9 | def __init__(self, cid, answer, author, 10 | upvote_num, content, time, group_id=None): 11 | """创建评论类实例. 12 | 13 | :param int cid: 评论ID 14 | :param int group_id: 评论所在的组ID 15 | :param Answer answer: 评论所在的答案对象 16 | :param Author author: 评论的作者对象 17 | :param int upvote_num: 评论赞同数量 18 | :param str content: 评论内容 19 | :param datetime.datetime creation_time: 评论发表时间 20 | :return: 评论对象 21 | :rtype: Comment 22 | """ 23 | 24 | self.cid = cid 25 | self.answer = answer 26 | self.author = author 27 | self.upvote_num = upvote_num 28 | self.content = content 29 | self.creation_time = time 30 | self._group_id = group_id 31 | -------------------------------------------------------------------------------- /zhihu/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import functools 5 | import re 6 | import os 7 | 8 | from requests import Session 9 | from bs4 import BeautifulSoup as _Bs 10 | from bs4 import Tag, NavigableString 11 | from requests.packages.urllib3.util import Retry 12 | 13 | try: 14 | __import__('lxml') 15 | BeautifulSoup = lambda makeup: _Bs(makeup, 'lxml') 16 | except ImportError: 17 | BeautifulSoup = lambda makeup: _Bs(makeup, 'html.parser') 18 | 19 | Default_Header = {'X-Requested-With': 'XMLHttpRequest', 20 | 'Referer': 'http://www.zhihu.com', 21 | 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; ' 22 | 'rv:39.0) Gecko/20100101 Firefox/39.0', 23 | 'Host': 'www.zhihu.com'} 24 | 25 | Zhihu_URL = 'https://www.zhihu.com' 26 | Login_URL = Zhihu_URL + '/login/email' 27 | Captcha_URL = Zhihu_URL + '/captcha.gif' 28 | Get_Profile_Card_URL = Zhihu_URL + '/node/MemberProfileCardV2' 29 | Question_Get_More_Answer_URL = Zhihu_URL + '/node/QuestionAnswerListV2' 30 | Answer_Add_Comment_URL = Zhihu_URL + '/node/AnswerCommentAddV2' 31 | Answer_Comment_Box_URL = Zhihu_URL + '/node/AnswerCommentBoxV2' 32 | Get_Answer_Comment_URL = Zhihu_URL + '/r/answers/{0}/comments' 33 | Author_Get_More_Followers_URL = Zhihu_URL + '/node/ProfileFollowersListV2' 34 | Author_Get_More_Followees_URL = Zhihu_URL + '/node/ProfileFolloweesListV2' 35 | Author_Get_More_Follow_Column_URL = Zhihu_URL + \ 36 | '/node/ProfileFollowedColumnsListV2' 37 | Author_Get_More_Follow_Topic_URL = Zhihu_URL + \ 38 | '/people/{0}/topics' 39 | 40 | PROTOCOL = '' 41 | 42 | Column_Url = 'http://zhuanlan.zhihu.com' 43 | Column_API = Column_Url + '/api/columns' 44 | Column_Data = Column_API + '/{0}' 45 | Column_Posts_Data = Column_API + '/{0}/posts?limit=10&offset={1}' 46 | Column_Post_Data = Column_Url + '/api/posts/{0}' 47 | Post_Get_Upvoter = Column_Post_Data + '/likers' 48 | 49 | Topic_Url = Zhihu_URL + '/topic' 50 | Topic_Get_Children_Url = Topic_Url + '/{0}/organize/entire' 51 | Topic_Get_More_Follower_Url = Topic_Url + '/{0}/followers' 52 | Topic_Questions_Url = Topic_Url + '/{0}/questions' 53 | Topic_Unanswered_Question_Url = Topic_Url + '/{0}/unanswered' 54 | Topic_Top_Answers_Url = Topic_Url + '/{0}/top-answers' 55 | Topic_Hot_Questions_Url = Topic_Url + '/{0}/hot' 56 | Topic_Newest_Url = Topic_Url + '/{0}/newest' 57 | 58 | Get_Me_Info_Url = Column_Url + '/api/me' 59 | Upvote_Answer_Url = Zhihu_URL + '/node/AnswerVoteBarV2' 60 | Upvote_Article_Url = Column_API + '/{0}/posts/{1}/rating' 61 | Follow_Author_Url = Zhihu_URL + '/node/MemberFollowBaseV2' 62 | Follow_Question_Url = Zhihu_URL + '/node/QuestionFollowBaseV2' 63 | Follow_Topic_Url = Zhihu_URL + '/node/TopicFollowBaseV2' 64 | Follow_Collection_Url = Zhihu_URL + '/collection/follow' 65 | Unfollow_Collection_Url = Zhihu_URL + '/collection/unfollow' 66 | Thanks_Url = Zhihu_URL + '/answer/thanks' 67 | Cancel_Thanks_Url = Zhihu_URL + '/answer/cancel_thanks' 68 | Send_Message_Url = Zhihu_URL + '/inbox/post' 69 | Unhelpful_Url = Zhihu_URL + '/answer/not_helpful' 70 | Cancel_Unhelpful_Url = Zhihu_URL + '/answer/helpful' 71 | Get_Collection_Url = Zhihu_URL + '/node/AnswerFavlists' 72 | 73 | re_question_url = re.compile( 74 | r'^https?://www\.zhihu\.com/question/\d+(\?sort=created|/?)$') 75 | re_question_url_std = re.compile(r'^https?://www\.zhihu\.com/question/\d+/?') 76 | re_ans_url = re.compile( 77 | r'^https?://www\.zhihu\.com/question/\d+/answer/\d+/?$') 78 | re_author_url = re.compile(r'^https?://www\.zhihu\.com/(?:people|org)/[^/]+/?$') 79 | re_collection_url = re.compile(r'^https?://www\.zhihu\.com/collection/\d+/?$') 80 | re_column_url = re.compile(r'^http://zhuanlan\.zhihu\.com/([^/]+)/?$') 81 | re_post_url = re.compile(r'^http://zhuanlan\.zhihu\.com/p/(\d+)/?$') 82 | re_topic_url = re.compile(r'^https?://www\.zhihu\.com/topic/(\d+)/?$') 83 | re_a2q = re.compile(r'(.*)/answer/.*') 84 | re_collection_url_split = re.compile(r'.*(/c.*)') 85 | re_get_number = re.compile(r'[^\d]*(\d+).*') 86 | re_del_empty_line = re.compile(r'\n*(.*)\n*') 87 | 88 | 89 | def check_soup(attr, soup_type='_make_soup'): 90 | def real(func): 91 | @functools.wraps(func) 92 | def wrapper(self): 93 | # noinspection PyTypeChecker 94 | value = getattr(self, attr, None) 95 | if value is None: 96 | if soup_type == '_make_soup': 97 | getattr(self, soup_type)() 98 | elif self.soup is None: 99 | getattr(self, soup_type)() 100 | value = func(self) 101 | setattr(self, attr, value) 102 | return value 103 | 104 | return wrapper 105 | 106 | return real 107 | 108 | 109 | def class_common_init(url_re, allowed_none=True, trailing_slash=True): 110 | def real(func): 111 | @functools.wraps(func) 112 | def wrapper(self, url, *args, **kwargs): 113 | if url is None and not allowed_none: 114 | raise ValueError('Invalid Url: ' + url) 115 | if url is not None: 116 | if url_re.match(url) is None: 117 | raise ValueError('Invalid URL: ' + url) 118 | if not url.endswith('/') and trailing_slash: 119 | url += '/' 120 | if 'session' not in kwargs.keys() or kwargs['session'] is None: 121 | kwargs['session'] = Session() 122 | kwargs['session'].mount('https://', Retry(5)) 123 | kwargs['session'].mount('http://', Retry(5)) 124 | self.soup = None 125 | return func(self, url, *args, **kwargs) 126 | 127 | return wrapper 128 | 129 | return real 130 | 131 | 132 | def remove_invalid_char(text): 133 | """去除字符串中的无效字符,一般用于保存文件时保证文件名的有效性. 134 | 135 | :param str text: 待处理的字符串 136 | :return: 处理后的字符串 137 | :rtype: str 138 | """ 139 | invalid_char_list = ['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\n'] 140 | res = '' 141 | for char in text: 142 | if char not in invalid_char_list: 143 | res += char 144 | return res 145 | 146 | 147 | def parser_author_from_tag(author): 148 | author_link = author.find('a', class_='author-link') 149 | if author_link is None: 150 | return None, '匿名用户', '', '' 151 | else: 152 | author_name = author_link.text 153 | motto_span = author.find('span', class_='bio') 154 | author_motto = motto_span['title'] \ 155 | if motto_span is not None else '' 156 | author_url = Zhihu_URL + author_link['href'] 157 | avatar_link = author.find('a', class_='avatar-link') 158 | photo_url = PROTOCOL + avatar_link.img['src'].replace('_s', '_r') 159 | return author_url, author_name, author_motto, photo_url 160 | 161 | 162 | def parser_author_from_comment(author): 163 | author_avatar = author.find('a', class_='zm-item-link-avatar') 164 | if author_avatar is None: 165 | return None, '匿名用户', '' 166 | else: 167 | author_link = author.find('a', class_='zg-link') 168 | author_name = author_link.text 169 | author_url = author_link['href'] 170 | avatar_link = author.find('img', class_='zm-item-img-avatar') 171 | photo_url = PROTOCOL + avatar_link['src'].replace('_s', '_r') 172 | return author_url, author_name, photo_url 173 | 174 | 175 | def answer_content_process(content): 176 | content = clone_bs4_elem(content) 177 | del content['class'] 178 | soup = BeautifulSoup( 179 | '') 180 | soup.body.append(content) 181 | no_script_list = soup.find_all("noscript") 182 | for no_script in no_script_list: 183 | no_script.extract() 184 | img_list = soup.find_all( 185 | "img", class_=["origin_image", "content_image"]) 186 | for img in img_list: 187 | if "content_image" in img['class']: 188 | img['data-original'] = img['data-actualsrc'] 189 | new_img = soup.new_tag('img', src=PROTOCOL + img['data-original']) 190 | img.replace_with(new_img) 191 | if img.next_sibling is None: 192 | new_img.insert_after(soup.new_tag('br')) 193 | useless_list = soup.find_all("i", class_="icon-external") 194 | for useless in useless_list: 195 | useless.extract() 196 | return soup.prettify() 197 | 198 | 199 | def get_path(path, filename, mode, default_path, default_name): 200 | if path is None: 201 | path = os.path.join( 202 | os.getcwd(), remove_invalid_char(default_path)) 203 | if filename is None: 204 | filename = remove_invalid_char(default_name) 205 | if os.path.isdir(path) is False: 206 | os.makedirs(path) 207 | temp = filename 208 | i = 0 209 | while os.path.isfile(os.path.join(path, temp) + '.' + mode): 210 | i += 1 211 | temp = filename + str(i) 212 | return os.path.join(path, temp) + '.' + mode 213 | 214 | 215 | def common_follower(url, xsrf, session): 216 | from .author import Author, ANONYMOUS 217 | headers = dict(Default_Header) 218 | headers['Referer'] = url 219 | data = {'offset': 0, '_xsrf': xsrf} 220 | gotten_data_num = 20 221 | offset = 0 222 | while gotten_data_num == 20: 223 | data['offset'] = offset 224 | res = session.post(url, data=data, headers=headers) 225 | json_data = res.json()['msg'] 226 | gotten_data_num = json_data[0] 227 | offset += gotten_data_num 228 | soup = BeautifulSoup(json_data[1]) 229 | follower_divs = soup.find_all('div', class_='zm-profile-card') 230 | for div in follower_divs: 231 | if div.a is not None: 232 | author_name = div.a['title'] 233 | author_url = Zhihu_URL + div.a['href'] 234 | author_motto = div.find('span', class_='bio').text 235 | author_photo = PROTOCOL + div.img['src'].replace('_m', '_r') 236 | numbers = [re_get_number.match(a.text).group(1) 237 | for a in div.find_all('a', target='_blank')] 238 | try: 239 | yield Author(author_url, author_name, author_motto, 240 | *numbers, photo_url=author_photo, 241 | session=session) 242 | except ValueError: # invalid url 243 | yield ANONYMOUS 244 | else: 245 | yield ANONYMOUS 246 | 247 | 248 | def clone_bs4_elem(el): 249 | """Clone a bs4 tag before modifying it. 250 | 251 | Code from `http://stackoverflow.com/questions/23057631/clone-element-with 252 | -beautifulsoup` 253 | """ 254 | if isinstance(el, NavigableString): 255 | return type(el)(el) 256 | 257 | copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix) 258 | # work around bug where there is no builder set 259 | # https://bugs.launchpad.net/beautifulsoup/+bug/1307471 260 | copy.attrs = dict(el.attrs) 261 | for attr in ('can_be_empty_element', 'hidden'): 262 | setattr(copy, attr, getattr(el, attr)) 263 | for child in el.contents: 264 | copy.append(clone_bs4_elem(child)) 265 | return copy 266 | -------------------------------------------------------------------------------- /zhihu/me.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | 6 | from .common import * 7 | from .author import Author 8 | 9 | 10 | class Me(Author): 11 | """封装了相关操作(如点赞,关注问题)的类。 12 | 请使用 :meth:`.ZhihuClient.me` 方法获取实例。 13 | """ 14 | 15 | def __init__(self, url, name, motto, photo_url, session): 16 | super(Me, self).__init__(url, name, motto, 17 | photo_url=photo_url, session=session) 18 | 19 | def vote(self, something, vote='up'): 20 | """给答案或文章点赞或取消点赞 21 | 22 | :param Answer/Post something: 需要点赞的答案或文章对象 23 | :param str vote: 24 | ===== ================ ====== 25 | 取值 说明 默认值 26 | ===== ================ ====== 27 | up 赞同 √ 28 | down 反对 X 29 | clear 既不赞同也不反对 X 30 | ===== ================ ====== 31 | 32 | :return: 成功返回True,失败返回False 33 | :rtype: bool 34 | """ 35 | from .answer import Answer 36 | from zhihu import Post 37 | if isinstance(something, Answer): 38 | mapping = { 39 | 'up': 'vote_up', 40 | 'clear': 'vote_neutral', 41 | 'down': 'vote_down' 42 | } 43 | if vote not in mapping.keys(): 44 | raise ValueError('Invalid vote value: {0}'.format(vote)) 45 | if something.author.url == self.url: 46 | return False 47 | params = {'answer_id': str(something.aid)} 48 | data = { 49 | '_xsrf': something.xsrf, 50 | 'method': mapping[vote], 51 | 'params': json.dumps(params) 52 | } 53 | headers = dict(Default_Header) 54 | headers['Referer'] = something.question.url[:-1] 55 | res = self._session.post(Upvote_Answer_Url, 56 | headers=headers, data=data) 57 | return res.json()['r'] == 0 58 | elif isinstance(something, Post): 59 | mapping = { 60 | 'up': 'like', 61 | 'clear': 'none', 62 | 'down': 'dislike' 63 | } 64 | if vote not in mapping.keys(): 65 | raise ValueError('Invalid vote value: {0}'.format(vote)) 66 | if something.author.url == self.url: 67 | return False 68 | put_url = Upvote_Article_Url.format( 69 | something.column_in_name, something.slug) 70 | data = {'value': mapping[vote]} 71 | headers = { 72 | 'Content-Type': 'application/json;charset=utf-8', 73 | 'Host': 'zhuanlan.zhihu.com', 74 | 'Referer': something.url[:-1], 75 | 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; ' 76 | 'rv:39.0) Gecko/20100101 Firefox/39.0', 77 | 'X-XSRF-TOKEN': self._session.cookies.get('XSRF-TOKEN') 78 | } 79 | res = self._session.put(put_url, json.dumps(data), headers=headers) 80 | return res.status_code == 204 81 | else: 82 | raise ValueError('argument something need to be ' 83 | 'zhihu.Answer or zhihu.Post object.') 84 | 85 | def thanks(self, answer, thanks=True): 86 | """感谢或取消感谢回答 87 | 88 | :param Answer answer: 要感谢或取消感谢的回答 89 | :param thanks: True-->感谢,False-->取消感谢 90 | :return: 成功返回True,失败返回False 91 | :rtype: bool 92 | """ 93 | from .answer import Answer 94 | if isinstance(answer, Answer) is False: 95 | raise ValueError('argument answer need to be Zhihu.Answer object.') 96 | if answer.author.url == self.url: 97 | return False 98 | data = { 99 | '_xsrf': answer.xsrf, 100 | 'aid': answer.aid 101 | } 102 | res = self._session.post(Thanks_Url if thanks else Cancel_Thanks_Url, 103 | data=data) 104 | return res.json()['r'] == 0 105 | 106 | def follow(self, something, follow=True): 107 | """关注用户、问题、话题或收藏夹 108 | 109 | :param Author/Question/Topic something: 需要关注的对象 110 | :param bool follow: True-->关注,False-->取消关注 111 | :return: 成功返回True,失败返回False 112 | :rtype: bool 113 | """ 114 | from .question import Question 115 | from .topic import Topic 116 | from .collection import Collection 117 | if isinstance(something, Author): 118 | if something.url == self.url: 119 | return False 120 | data = { 121 | '_xsrf': something.xsrf, 122 | 'method': ' follow_member' if follow else 'unfollow_member', 123 | 'params': json.dumps({'hash_id': something.hash_id}) 124 | } 125 | res = self._session.post(Follow_Author_Url, data=data) 126 | return res.json()['r'] == 0 127 | elif isinstance(something, Question): 128 | data = { 129 | '_xsrf': something.xsrf, 130 | 'method': 'follow_question' if follow else 'unfollow_question', 131 | 'params': json.dumps({'question_id': str(something.qid)}) 132 | } 133 | res = self._session.post(Follow_Question_Url, data=data) 134 | return res.json()['r'] == 0 135 | elif isinstance(something, Topic): 136 | data = { 137 | '_xsrf': something.xsrf, 138 | 'method': 'follow_topic' if follow else 'unfollow_topic', 139 | 'params': json.dumps({'topic_id': something.tid}) 140 | } 141 | res = self._session.post(Follow_Topic_Url, data=data) 142 | return res.json()['r'] == 0 143 | elif isinstance(something, Collection): 144 | data = { 145 | '_xsrf': something.xsrf, 146 | 'favlist_id': something.cid 147 | } 148 | res = self._session.post( 149 | Follow_Collection_Url if follow else Unfollow_Collection_Url, 150 | data=data) 151 | return res.json()['r'] == 0 152 | else: 153 | raise ValueError('argument something need to be ' 154 | 'zhihu.Author, zhihu.Question' 155 | ', Zhihu.Topic or Zhihu.Collection object.') 156 | 157 | def add_comment(self, answer, content): 158 | """给指定答案添加评论 159 | 160 | :param Answer answer: 答案对象 161 | :param string content: 评论内容 162 | :return: 成功返回 True,失败返回 False 163 | :rtype: bool 164 | """ 165 | 166 | from .answer import Answer 167 | if isinstance(answer, Answer) is False: 168 | raise ValueError('argument answer need to be Zhihu.Answer object.') 169 | if not content: 170 | raise ValueError('answer content cannot be empty') 171 | data = { 172 | 'method': 'add_comment', 173 | 'params': json.dumps({'answer_id': answer.aid, 'content': content}), 174 | '_xsrf': answer.xsrf 175 | } 176 | res = self._session.post(Answer_Add_Comment_URL, 177 | data=data) 178 | return res.json()['r'] == 0 179 | 180 | def send_message(self, author, content): 181 | """发送私信给一个用户 182 | 183 | :param Author author: 接收私信用户对象 184 | :param string content: 发送给用户的私信内容 185 | :return: 成功返回 True,失败返回 False 186 | :rtype: bool 187 | """ 188 | if isinstance(author, Author) is False: 189 | raise ValueError('argument answer need to be Zhihu.Author object.') 190 | if not content: 191 | raise ValueError('answer content cannot be empty') 192 | if author.url == self.url: 193 | return False 194 | data = { 195 | 'member_id': author.hash_id, 196 | 'content': content, 197 | 'token': '', 198 | '_xsrf': author.xsrf 199 | } 200 | res = self._session.post(Send_Message_Url, 201 | data=data) 202 | return res.json()['r'] == 0 203 | 204 | def block(self, something, block=True): 205 | """屏蔽某个用户、话题 206 | 207 | :param Author/Topic something: 208 | :param block: True-->屏蔽,False-->取消屏蔽 209 | :return: 成功返回 True,失败返回 False 210 | :rtype: bool 211 | """ 212 | from .topic import Topic 213 | 214 | if isinstance(something, Author): 215 | 216 | if something.url == self.url: 217 | return False 218 | data = { 219 | '_xsrf': something.xsrf, 220 | 'action': 'add' if block else 'cancel', 221 | } 222 | block_author_url = something.url + 'block' 223 | res = self._session.post(block_author_url, data=data) 224 | return res.json()['r'] == 0 225 | elif isinstance(something, Topic): 226 | tid = something.tid 227 | data = { 228 | '_xsrf': something.xsrf, 229 | 'method': 'add' if block else 'del', 230 | 'tid': tid, 231 | } 232 | block_topic_url = 'http://www.zhihu.com/topic/ignore' 233 | res = self._session.post(block_topic_url, data=data) 234 | return res.status_code == 200 235 | else: 236 | raise ValueError('argument something need to be ' 237 | 'Zhihu.Author or Zhihu.Topic object.') 238 | 239 | def unhelpful(self, answer, unhelpful=True): 240 | """没有帮助或取消没有帮助回答 241 | 242 | :param Answer answer: 要没有帮助或取消没有帮助回答 243 | :param unhelpful: True-->没有帮助,False-->取消没有帮助 244 | :return: 成功返回 True,失败返回 False 245 | :rtype: bool 246 | """ 247 | from .answer import Answer 248 | if isinstance(answer, Answer) is False: 249 | raise ValueError('argument answer need to be Zhihu.Answer object.') 250 | if answer.author.url == self.url: 251 | return False 252 | data = { 253 | '_xsrf': answer.xsrf, 254 | 'aid': answer.aid 255 | } 256 | res = self._session.post(Unhelpful_Url if unhelpful else Cancel_Unhelpful_Url, 257 | data=data) 258 | return res.json()['r'] == 0 259 | -------------------------------------------------------------------------------- /zhihu/post.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from .common import * 5 | from .base import BaseZhihu, JsonAsSoupMixin 6 | 7 | 8 | class Post(JsonAsSoupMixin, BaseZhihu): 9 | 10 | """专栏文章类,请使用``ZhihuClient.post``方法构造对象.""" 11 | 12 | @class_common_init(re_post_url) 13 | def __init__(self, url, column=None, author=None, title=None, 14 | upvote_num=None, comment_num=None, session=None): 15 | """创建专栏文章类实例. 16 | 17 | :param str url: 文章url 18 | :param Column column: 文章所属专栏,可选 19 | :param Author author: 文章作者,可选 20 | :param str title: 文章标题,可选 21 | :param int upvote_num: 文章赞同数,可选 22 | :param int comment_num: 文章评论数,可选 23 | :param Session session: 使用的网络会话,为空则使用新会话 24 | :return: 专栏文章对象 25 | :rtype: Post 26 | """ 27 | match = re_post_url.match(url) 28 | self.url = url 29 | self._session = session 30 | self._column = column 31 | self._author = author 32 | self._title = title 33 | self._upvote_num = upvote_num 34 | self._comment_num = comment_num 35 | self._slug = int(match.group(1)) # 文章编号 36 | 37 | def _make_soup(self): 38 | if self.soup is None: 39 | json = self._get_content() 40 | self._gen_soup(json) 41 | 42 | def _get_content(self): 43 | origin_host = self._session.headers.get('Host') 44 | self._session.headers.update(Host='zhuanlan.zhihu.com') 45 | json = self._session.get(Column_Post_Data.format(self.slug)).json() 46 | self._session.headers.update(Host=origin_host) 47 | return json 48 | 49 | @property 50 | def column_in_name(self): 51 | """获取文章所在专栏的内部名称(用不到就忽视吧~) 52 | 53 | :return: 专栏的内部名称 54 | :rtype: str 55 | """ 56 | self._make_soup() 57 | if 'column' in self.soup: 58 | return self.soup['column']['slug'] 59 | else: 60 | return None 61 | 62 | @property 63 | def slug(self): 64 | """获取文章的编号(用不到就忽视吧~) 65 | 66 | :return: 文章编号 67 | :rtype: int 68 | """ 69 | return self._slug 70 | 71 | @property 72 | @check_soup('_column') 73 | def column(self): 74 | """获取文章所在专栏. 75 | 76 | :return: 文章所在专栏 77 | :rtype: Column 78 | """ 79 | from .column import Column 80 | 81 | if 'column' in self.soup: 82 | url = Column_Url + '/' + self.soup['column']['slug'] 83 | name = self.soup['column']['name'] 84 | return Column(url, name, session=self._session) 85 | else: 86 | return None 87 | 88 | @property 89 | @check_soup('_author') 90 | def author(self): 91 | """获取文章作者. 92 | 93 | :return: 文章作者 94 | :rtype: Author 95 | """ 96 | from .author import Author 97 | 98 | url = self.soup['author']['profileUrl'] 99 | name = self.soup['author']['name'] 100 | motto = self.soup['author']['bio'] 101 | template = self.soup['author']['avatar']['template'] 102 | photo_id = self.soup['author']['avatar']['id'] 103 | photo_url = template.format(id=photo_id, size='r') 104 | return Author(url, name, motto, photo_url=photo_url, 105 | session=self._session) 106 | 107 | @property 108 | @check_soup('_title') 109 | def title(self): 110 | """获取文章标题. 111 | 112 | :return: 文章标题 113 | :rtype: str 114 | """ 115 | return self.soup['title'] 116 | 117 | @property 118 | @check_soup('_upvote_num') 119 | def upvote_num(self): 120 | """获取文章赞同数量. 121 | 122 | :return: 文章赞同数 123 | :rtype: int 124 | """ 125 | return int(self.soup['likesCount']) 126 | 127 | @property 128 | @check_soup('_comment_num') 129 | def comment_num(self): 130 | """获取评论数量. 131 | 132 | :return: 评论数量 133 | :rtype: int 134 | """ 135 | return self.soup['commentsCount'] 136 | 137 | def save(self, filepath=None, filename=None, mode="md"): 138 | """保存答案为 Html 文档或 markdown 文档. 139 | 140 | :param str filepath: 要保存的文件所在的目录, 141 | 不填为当前目录下以专栏标题命名的目录, 设为"."则为当前目录。 142 | :param str filename: 要保存的文件名, 143 | 不填则默认为 所在文章标题 - 作者名.html/md。 144 | 如果文件已存在,自动在后面加上数字区分。 145 | **自定义文件名时请不要输入后缀 .html 或 .md。** 146 | :param str mode: 保存类型,可选 `html` 、 `markdown` 、 `md` 。 147 | :return: 无 148 | :rtype: None 149 | """ 150 | if mode not in ["html", "md", "markdown"]: 151 | raise ValueError("`mode` must be 'html', 'markdown' or 'md'," 152 | " got {0}".format(mode)) 153 | self._make_soup() 154 | file = get_path(filepath, filename, mode, self.column.name, 155 | self.title + '-' + self.author.name) 156 | with open(file, 'wb') as f: 157 | if mode == "html": 158 | f.write(self.soup['content'].encode('utf-8')) 159 | else: 160 | import html2text 161 | h2t = html2text.HTML2Text() 162 | h2t.body_width = 0 163 | f.write(h2t.handle(self.soup['content']).encode('utf-8')) 164 | 165 | @property 166 | def upvoters(self): 167 | """获取文章的点赞用户 168 | 169 | :return: 文章的点赞用户,返回生成器。 170 | """ 171 | from .author import Author, ANONYMOUS 172 | self._make_soup() 173 | headers = dict(Default_Header) 174 | headers['Host'] = 'zhuanlan.zhihu.com' 175 | json = self._session.get( 176 | Post_Get_Upvoter.format(self.slug), 177 | headers=headers 178 | ).json() 179 | for au in json: 180 | try: 181 | yield Author( 182 | au['profileUrl'], 183 | au['name'], 184 | au['bio'], 185 | photo_url=au['avatar']['template'].format( 186 | id=au['avatar']['id'], size='r'), 187 | session=self._session 188 | ) 189 | except ValueError: # invalid url 190 | yield ANONYMOUS 191 | -------------------------------------------------------------------------------- /zhihu/question.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import time 6 | from datetime import datetime 7 | 8 | from .common import * 9 | from .base import BaseZhihu 10 | 11 | 12 | class Question(BaseZhihu): 13 | """问题类,请使用``ZhihuClient.question``方法构造对象.""" 14 | 15 | @class_common_init(re_question_url, trailing_slash=False) 16 | def __init__(self, url, title=None, followers_num=None, 17 | answer_num=None, creation_time=None, author=None, 18 | session=None): 19 | """创建问题类实例. 20 | 21 | :param str url: 问题url. 现在支持两种 url 22 | 23 | 1. https://www.zhihu.com/question/qid 24 | 2. https://www.zhihu.com/question/qid?sort=created 25 | 26 | 区别在于,使用第一种,调用 ``question.answers`` 的时候会按投票排序返回答案; 27 | 使用第二种, 会按时间排序返回答案, 后提交的答案先返回 28 | 29 | :param str title: 问题标题,可选, 30 | :param int followers_num: 问题关注人数,可选 31 | :param int answer_num: 问题答案数,可选 32 | :param datetime.datetime creation_time: 问题创建时间,可选 33 | :param Author author: 提问者,可选 34 | :return: 问题对象 35 | :rtype: Question 36 | """ 37 | self._session = session 38 | self._url = url 39 | self._title = title 40 | self._answer_num = answer_num 41 | self._followers_num = followers_num 42 | self._id = int(re.match(r'.*/(\d+)', self.url).group(1)) 43 | self._author = author 44 | self._creation_time = creation_time 45 | self._logs = None 46 | self._deleted = None 47 | 48 | @property 49 | def url(self): 50 | # always return url like https://www.zhihu.com/question/1234/ 51 | url = re.match(re_question_url_std, self._url).group() 52 | return url if url.endswith('/') else url + '/' 53 | 54 | @property 55 | def id(self): 56 | """获取问题id(网址最后的部分). 57 | 58 | :return: 问题id 59 | :rtype: int 60 | """ 61 | return self._id 62 | 63 | @property 64 | @check_soup('_qid') 65 | def qid(self): 66 | """获取问题内部id(用不到就忽视吧) 67 | 68 | :return: 问题内部id 69 | :rtype: int 70 | """ 71 | return int(self.soup.find( 72 | 'div', id='zh-question-detail')['data-resourceid']) 73 | 74 | @property 75 | @check_soup('_xsrf') 76 | def xsrf(self): 77 | """获取知乎的反xsrf参数(用不到就忽视吧~) 78 | 79 | :return: xsrf参数 80 | :rtype: str 81 | """ 82 | return self.soup.find('input', attrs={'name': '_xsrf'})['value'] 83 | 84 | @property 85 | @check_soup('_html') 86 | def html(self): 87 | """获取页面源码. 88 | 89 | :return: 页面源码 90 | :rtype: str 91 | """ 92 | return self.soup.prettify() 93 | 94 | @property 95 | @check_soup('_title') 96 | def title(self): 97 | """获取问题标题. 98 | 99 | :return: 问题标题 100 | :rtype: str 101 | """ 102 | return self.soup.find('h2', class_='zm-item-title') \ 103 | .text.replace('\n', '') 104 | 105 | @property 106 | @check_soup('_details') 107 | def details(self): 108 | """获取问题详细描述,目前实现方法只是直接获取文本,效果不满意……等更新. 109 | 110 | :return: 问题详细描述 111 | :rtype: str 112 | """ 113 | return self.soup.find("div", id="zh-question-detail").div.text 114 | 115 | @property 116 | @check_soup('_answer_num') 117 | def answer_num(self): 118 | """获取问题答案数量. 119 | 120 | :return: 问题答案数量 121 | :rtype: int 122 | """ 123 | answer_num_block = self.soup.find('h3', id='zh-question-answer-num') 124 | # 当0人回答或1回答时,都会找不到 answer_num_block, 125 | # 通过找答案的赞同数block来判断到底有没有答案。 126 | # (感谢知乎用户 段晓晨 提出此问题) 127 | if answer_num_block is None: 128 | if self.soup.find('span', class_='count') is not None: 129 | return 1 130 | else: 131 | return 0 132 | return int(answer_num_block['data-num']) 133 | 134 | @property 135 | @check_soup('_follower_num') 136 | def follower_num(self): 137 | """获取问题关注人数. 138 | 139 | :return: 问题关注人数 140 | :rtype: int 141 | """ 142 | follower_num_block = self.soup.find('div', class_='zg-gray-normal') 143 | # 无人关注时 找不到对应block,直接返回0 (感谢知乎用户 段晓晨 提出此问题) 144 | if follower_num_block is None or follower_num_block.strong is None: 145 | return 0 146 | return int(follower_num_block.strong.text) 147 | 148 | @property 149 | @check_soup('_topics') 150 | def topics(self): 151 | """获取问题所属话题. 152 | 153 | :return: 问题所属话题 154 | :rtype: Topic.Iterable 155 | """ 156 | from .topic import Topic 157 | 158 | for topic in self.soup.find_all('a', class_='zm-item-tag'): 159 | yield Topic(Zhihu_URL + topic['href'], topic.text.replace('\n', ''), 160 | session=self._session) 161 | 162 | @property 163 | def followers(self): 164 | """获取关注此问题的用户 165 | 166 | :return: 关注此问题的用户 167 | :rtype: Author.Iterable 168 | :问题: 要注意若执行过程中另外有人关注,可能造成重复获取到某些用户 169 | """ 170 | self._make_soup() 171 | followers_url = self.url + 'followers' 172 | for x in common_follower(followers_url, self.xsrf, self._session): 173 | yield x 174 | 175 | @property 176 | def answers(self): 177 | """获取问题的所有答案. 178 | 179 | :return: 问题的所有答案,返回生成器 180 | :rtype: Answer.Iterable 181 | """ 182 | from .author import Author 183 | from .answer import Answer 184 | 185 | self._make_soup() 186 | 187 | # TODO: 统一逻辑. 完全可以都用 _parse_answer_html 的逻辑替换 188 | if self._url.endswith('sort=created'): 189 | pager = self.soup.find('div', class_='zm-invite-pager') 190 | if pager is None: 191 | max_page = 1 192 | else: 193 | max_page = int(pager.find_all('span')[-2].a.text) 194 | 195 | for page in range(1, max_page + 1): 196 | if page == 1: 197 | soup = self.soup 198 | else: 199 | url = self._url + '&page=%d' % page 200 | soup = BeautifulSoup(self._session.get(url).content) 201 | error_answers = soup.find_all('div', id='answer-status') 202 | for each in error_answers: 203 | each['class'] = 'zm-editable-content' 204 | answers_wrap = soup.find('div', id='zh-question-answer-wrap') 205 | # 正式处理 206 | authors = answers_wrap.find_all( 207 | 'div', class_='zm-item-answer-author-info') 208 | urls = answers_wrap.find_all('a', class_='answer-date-link') 209 | up_num = answers_wrap.find_all('div', 210 | class_='zm-item-vote-info') 211 | contents = answers_wrap.find_all( 212 | 'div', class_='zm-editable-content') 213 | assert len(authors) == len(urls) == len(up_num) == len( 214 | contents) 215 | for author, url, up_num, content in \ 216 | zip(authors, urls, up_num, contents): 217 | a_url, name, motto, photo = parser_author_from_tag(author) 218 | author_obj = Author(a_url, name, motto, photo_url=photo, 219 | session=self._session) 220 | url = Zhihu_URL + url['href'] 221 | up_num = int(up_num['data-votecount']) 222 | content = answer_content_process(content) 223 | yield Answer(url, self, author_obj, up_num, content, 224 | session=self._session) 225 | else: 226 | pagesize = 10 227 | new_header = dict(Default_Header) 228 | new_header['Referer'] = self.url 229 | params = {"url_token": self.id, 230 | 'pagesize': pagesize, 231 | 'offset': 0} 232 | data = {'_xsrf': self.xsrf, 233 | 'method': 'next', 234 | 'params': ''} 235 | for i in range(0, (self.answer_num - 1) // pagesize + 1): 236 | if i == 0: 237 | # 修正各种建议修改的回答…… 238 | error_answers = self.soup.find_all('div', 239 | id='answer-status') 240 | for each in error_answers: 241 | each['class'] = 'zm-editable-content' 242 | answers_wrap = self.soup.find('div', 243 | id='zh-question-answer-wrap') 244 | # 正式处理 245 | authors = answers_wrap.find_all( 246 | 'div', class_='zm-item-answer-author-info') 247 | urls = answers_wrap.find_all('a', class_='answer-date-link') 248 | up_num = answers_wrap.find_all('div', 249 | class_='zm-item-vote-info') 250 | contents = answers_wrap.find_all( 251 | 'div', class_='zm-editable-content') 252 | assert len(authors) == len(urls) == len(up_num) == len( 253 | contents) 254 | for author, url, up_num, content in \ 255 | zip(authors, urls, up_num, contents): 256 | a_url, name, motto, photo = parser_author_from_tag( 257 | author) 258 | author_obj = Author(a_url, name, motto, photo_url=photo, 259 | session=self._session) 260 | url = Zhihu_URL + url['href'] 261 | up_num = int(up_num['data-votecount']) 262 | content = answer_content_process(content) 263 | yield Answer(url, self, author_obj, up_num, content, 264 | session=self._session) 265 | else: 266 | params['offset'] = i * pagesize 267 | data['params'] = json.dumps(params) 268 | r = self._session.post(Question_Get_More_Answer_URL, 269 | data=data, 270 | headers=new_header) 271 | answer_list = r.json()['msg'] 272 | for answer_html in answer_list: 273 | yield self._parse_answer_html(answer_html) 274 | 275 | @property 276 | def top_answer(self): 277 | """获取排名第一的答案. 278 | 279 | :return: 排名第一的答案 280 | :rtype: Answer 281 | """ 282 | for a in self.answers: 283 | return a 284 | 285 | def top_i_answer(self, i): 286 | """获取排名某一位的答案. 287 | 288 | :param int i: 要获取的答案的排名 289 | :return: 答案对象,能直接获取的属性参见answers方法 290 | :rtype: Answer 291 | """ 292 | for j, a in enumerate(self.answers): 293 | if j == i - 1: 294 | return a 295 | 296 | def top_i_answers(self, i): 297 | """获取排名在前几位的答案. 298 | 299 | :param int i: 获取前几个 300 | :return: 答案对象,返回生成器 301 | :rtype: Answer.Iterable 302 | """ 303 | for j, a in enumerate(self.answers): 304 | if j <= i - 1: 305 | yield a 306 | else: 307 | return 308 | 309 | @property 310 | @check_soup('_author') 311 | def author(self): 312 | """获取问题的提问者. 313 | 314 | :return: 提问者 315 | :rtype: Author or zhihu.ANONYMOUS 316 | """ 317 | from .author import Author, ANONYMOUS 318 | 319 | logs = self._query_logs() 320 | author_a = logs[-1].find_all('div')[0].a 321 | if author_a.text == '匿名用户': 322 | return ANONYMOUS 323 | else: 324 | url = Zhihu_URL + author_a['href'] 325 | return Author(url, name=author_a.text, session=self._session) 326 | 327 | @property 328 | @check_soup('_creation_time') 329 | def creation_time(self): 330 | """ 331 | :return: 问题创建时间 332 | :rtype: datetime.datetime 333 | """ 334 | logs = self._query_logs() 335 | time_string = logs[-1].find('div', class_='zm-item-meta').time[ 336 | 'datetime'] 337 | return datetime.strptime(time_string, "%Y-%m-%d %H:%M:%S") 338 | 339 | @property 340 | @check_soup('_last_edit_time') 341 | def last_edit_time(self): 342 | """ 343 | :return: 问题最后编辑时间 344 | :rtype: datetime.datetime 345 | """ 346 | data = {'_xsrf': self.xsrf, 'offset': '1'} 347 | res = self._session.post(self.url + 'log', data=data) 348 | _, content = res.json()['msg'] 349 | soup = BeautifulSoup(content) 350 | time_string = soup.find_all('time')[0]['datetime'] 351 | return datetime.strptime(time_string, "%Y-%m-%d %H:%M:%S") 352 | 353 | def _query_logs(self): 354 | if self._logs is None: 355 | gotten_feed_num = 20 356 | start = '0' 357 | offset = 0 358 | api_url = self.url + 'log' 359 | logs = None 360 | while gotten_feed_num == 20: 361 | data = {'_xsrf': self.xsrf, 'offset': offset, 'start': start} 362 | res = self._session.post(api_url, data=data) 363 | gotten_feed_num, content = res.json()['msg'] 364 | offset += gotten_feed_num 365 | soup = BeautifulSoup(content) 366 | logs = soup.find_all('div', class_='zm-item') 367 | start = logs[-1]['id'][8:] if len(logs) > 0 else '0' 368 | time.sleep(0.2) # prevent from posting too quickly 369 | 370 | self._logs = logs 371 | 372 | return self._logs 373 | 374 | # noinspection PyAttributeOutsideInit 375 | def refresh(self): 376 | """刷新 Question object 的属性. 377 | 例如回答数增加了, 先调用 ``refresh()`` 378 | 再访问 answer_num 属性, 可获得更新后的答案数量. 379 | 380 | :return: None 381 | """ 382 | super().refresh() 383 | self._html = None 384 | self._title = None 385 | self._details = None 386 | self._answer_num = None 387 | self._follower_num = None 388 | self._topics = None 389 | self._last_edit_time = None 390 | self._logs = None 391 | 392 | @property 393 | @check_soup('_deleted') 394 | def deleted(self): 395 | """问题是否被删除, 被删除了返回 True, 未被删除返回 False 396 | :return: True or False 397 | """ 398 | return self._deleted 399 | 400 | def _parse_answer_html(self, answer_html): 401 | from .author import Author 402 | from .answer import Answer 403 | soup = BeautifulSoup(answer_html) 404 | # 修正各种建议修改的回答…… 405 | error_answers = soup.find_all('div', id='answer-status') 406 | 407 | for each in error_answers: 408 | each['class'] = 'zm-editable-content' 409 | 410 | answer_url = self.url + 'answer/' + soup.div['data-atoken'] 411 | author = soup.find('div', class_='zm-item-answer-author-info') 412 | upvote_num = int(soup.find( 413 | 'div', class_='zm-item-vote-info')['data-votecount']) 414 | content = soup.find('div', class_='zm-editable-content') 415 | content = answer_content_process(content) 416 | a_url, name, motto, photo = parser_author_from_tag(author) 417 | author = Author(a_url, name, motto, photo_url=photo, 418 | session=self._session) 419 | return Answer(answer_url, self, author, upvote_num, content, 420 | session=self._session) 421 | -------------------------------------------------------------------------------- /zhihu/topic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import time 5 | from datetime import datetime 6 | 7 | from .common import * 8 | from .base import BaseZhihu 9 | 10 | 11 | class Topic(BaseZhihu): 12 | 13 | """答案类,请使用``ZhihuClient.topic``方法构造对象.""" 14 | 15 | @class_common_init(re_topic_url) 16 | def __init__(self, url, name=None, session=None): 17 | """创建话题类实例. 18 | 19 | :param url: 话题url 20 | :param name: 话题名称,可选 21 | :return: Topic 22 | """ 23 | self.url = url 24 | self._session = session 25 | self._name = name 26 | self._id = int(re_topic_url.match(self.url).group(1)) 27 | 28 | @property 29 | def id(self): 30 | """获取话题Id(网址最后那串数字) 31 | 32 | :return: 话题Id 33 | :rtype: int 34 | """ 35 | return self._id 36 | 37 | @property 38 | @check_soup('_xsrf') 39 | def xsrf(self): 40 | """获取知乎的反xsrf参数(用不到就忽视吧~) 41 | 42 | :return: xsrf参数 43 | :rtype: str 44 | """ 45 | return self.soup.find('input', attrs={'name': '_xsrf'})['value'] 46 | 47 | @property 48 | @check_soup('_tid') 49 | def tid(self): 50 | """话题内部Id,有时候要用到 51 | 52 | :return: 话题内部Id 53 | :rtype: int 54 | """ 55 | return int(self.soup.find( 56 | 'div', id='zh-topic-desc')['data-resourceid']) 57 | 58 | @property 59 | @check_soup('_name') 60 | def name(self): 61 | """获取话题名称. 62 | 63 | :return: 话题名称 64 | :rtype: str 65 | """ 66 | return self.soup.find('h1').text 67 | 68 | @property 69 | def parents(self): 70 | """获取此话题的父话题。 71 | 注意:由于没找到有很多父话题的话题来测试, 72 | 所以本方法可能再某些时候出现问题,请不吝反馈。 73 | 74 | :return: 此话题的父话题,返回生成器 75 | :rtype: Topic.Iterable 76 | """ 77 | self._make_soup() 78 | parent_topic_tag = self.soup.find('div', class_='parent-topic') 79 | if parent_topic_tag is None: 80 | yield [] 81 | else: 82 | for topic_tag in parent_topic_tag.find_all('a'): 83 | yield Topic(Zhihu_URL + topic_tag['href'], 84 | topic_tag.text.strip(), 85 | session=self._session) 86 | 87 | @property 88 | def children(self): 89 | """获取此话题的子话题 90 | 91 | :return: 此话题的子话题, 返回生成器 92 | :rtype: Topic.Iterable 93 | """ 94 | self._make_soup() 95 | child_topic_tag = self.soup.find('div', class_='child-topic') 96 | if child_topic_tag is None: 97 | return [] 98 | elif '共有' not in child_topic_tag.contents[-2].text: 99 | for topic_tag in child_topic_tag.div.find_all('a'): 100 | yield Topic(Zhihu_URL + topic_tag['href'], 101 | topic_tag.text.strip(), 102 | session=self._session) 103 | else: 104 | flag = 'load' 105 | child = '' 106 | data = {'_xsrf': self.xsrf} 107 | params = { 108 | 'parent': self.id 109 | } 110 | while flag == 'load': 111 | params['child'] = child 112 | res = self._session.post(Topic_Get_Children_Url, 113 | params=params, data=data) 114 | j = map(lambda x: x[0], res.json()['msg'][1]) 115 | *topics, last = j 116 | for topic in topics: 117 | yield Topic(Zhihu_URL + '/topic/' + topic[2], topic[1], 118 | session=self._session) 119 | flag = last[0] 120 | child = last[2] 121 | if flag == 'topic': 122 | yield Topic(Zhihu_URL + '/topic/' + last[2], last[1], 123 | session=self._session) 124 | 125 | @property 126 | @check_soup('_follower_num') 127 | def follower_num(self): 128 | """获取话题关注人数. 129 | 130 | :return: 关注人数 131 | :rtype: int 132 | """ 133 | follower_num_block = self.soup.find( 134 | 'div', class_='zm-topic-side-followers-info') 135 | # 无人关注时 找不到对应block,直接返回0 (感谢知乎用户 段晓晨 提出此问题) 136 | if follower_num_block.strong is None: 137 | return 0 138 | return int(follower_num_block.strong.text) 139 | 140 | @property 141 | def followers(self): 142 | """获取话题关注者 143 | 144 | :return: 话题关注者,返回生成器 145 | :rtype: Author.Iterable 146 | """ 147 | from .author import Author, ANONYMOUS 148 | self._make_soup() 149 | gotten_data_num = 20 150 | data = { 151 | '_xsrf': self.xsrf, 152 | 'start': '', 153 | 'offset': 0 154 | } 155 | while gotten_data_num == 20: 156 | res = self._session.post( 157 | Topic_Get_More_Follower_Url.format(self.id), data=data) 158 | j = res.json()['msg'] 159 | gotten_data_num = j[0] 160 | data['offset'] += gotten_data_num 161 | soup = BeautifulSoup(j[1]) 162 | divs = soup.find_all('div', class_='zm-person-item') 163 | for div in divs: 164 | h2 = div.h2 165 | url = Zhihu_URL + h2.a['href'] 166 | name = h2.a.text 167 | motto = h2.parent.div.text.strip() 168 | try: 169 | yield Author(url, name, motto, session=self._session) 170 | except ValueError: # invalid url 171 | yield ANONYMOUS 172 | data['start'] = int(re_get_number.match(divs[-1]['id']).group(1)) 173 | 174 | @property 175 | @check_soup('_photo_url') 176 | def photo_url(self): 177 | """获取话题头像图片地址. 178 | 179 | :return: 话题头像url 180 | :rtype: str 181 | """ 182 | img = self.soup.find('a', id='zh-avartar-edit-form').img['src'] 183 | return img.replace('_m', '_r') 184 | 185 | @property 186 | @check_soup('_description') 187 | def description(self): 188 | """获取话题描述信息. 189 | 190 | :return: 话题描述信息 191 | :rtype: str 192 | """ 193 | desc = self.soup.find('div', class_='zm-editable-content').text 194 | return desc 195 | 196 | @property 197 | def top_authors(self): 198 | """获取最佳回答者 199 | 200 | :return: 此话题下最佳回答者,一般来说是5个,要不就没有,返回生成器 201 | :rtype: Author.Iterable 202 | """ 203 | from .author import Author, ANONYMOUS 204 | self._make_soup() 205 | t = self.soup.find('div', id='zh-topic-top-answerer') 206 | if t is None: 207 | return 208 | for d in t.find_all('div', class_='zm-topic-side-person-item-content'): 209 | url = Zhihu_URL + d.a['href'] 210 | name = d.a.text 211 | motto = d.find('span', class_='bio')['title'] 212 | try: 213 | yield Author(url, name, motto, session=self._session) 214 | except ValueError: # invalid url 215 | yield ANONYMOUS 216 | 217 | @property 218 | def top_answers(self): 219 | """获取话题下的精华答案. 220 | 221 | :return: 话题下的精华答案,返回生成器. 222 | :rtype: Answer.Iterable 223 | """ 224 | from .question import Question 225 | from .answer import Answer 226 | from .author import Author, ANONYMOUS 227 | 228 | top_answers_url = Topic_Top_Answers_Url.format(self.id) 229 | params = {'page': 1} 230 | while True: 231 | # 超出50页直接返回 232 | if params['page'] > 50: 233 | return 234 | res = self._session.get(top_answers_url, params=params) 235 | params['page'] += 1 236 | soup = BeautifulSoup(res.content) 237 | # 不够50页,来到错误页面 返回 238 | if soup.find('div', class_='error') is not None: 239 | return 240 | questions = soup.find_all('a', class_='question_link') 241 | answers = soup.find_all('a', class_='answer-date-link') 242 | authors = soup.find_all('div', class_='zm-item-answer-author-info') 243 | upvotes = soup.find_all('a', class_='zm-item-vote-count') 244 | for ans, up, q, au in zip(answers, upvotes, questions, authors): 245 | answer_url = Zhihu_URL + ans['href'] 246 | question_url = Zhihu_URL + q['href'] 247 | question_title = q.text.strip() 248 | upvote = up.text 249 | if upvote.isdigit(): 250 | upvote = int(upvote) 251 | else: 252 | upvote = None 253 | question = Question(question_url, question_title, 254 | session=self._session) 255 | if au.a is None: 256 | author = ANONYMOUS 257 | else: 258 | author_url = Zhihu_URL + au.a['href'] 259 | author_name = au.a.text 260 | author_motto = au.strong['title'] if au.strong else '' 261 | author = Author(author_url, author_name, author_motto, 262 | session=self._session) 263 | yield Answer(answer_url, question, author, upvote, 264 | session=self._session) 265 | 266 | @property 267 | def questions(self): 268 | """获取话题下的所有问题(按时间降序排列) 269 | 270 | :return: 话题下所有问题,返回生成器 271 | :rtype: Question.Iterable 272 | """ 273 | from .question import Question 274 | question_url = Topic_Questions_Url.format(self.id) 275 | params = {'page': 1} 276 | older_time_stamp = int(time.time()) * 1000 277 | while True: 278 | res = self._session.get(question_url, params=params) 279 | soup = BeautifulSoup(res.content) 280 | if soup.find('div', class_='error') is not None: 281 | return 282 | questions = soup.find_all('div', class_='question-item') 283 | questions = list(filter( 284 | lambda x: int(x.h2.span['data-timestamp']) < older_time_stamp, 285 | questions)) 286 | for qu_div in questions: 287 | url = Zhihu_URL + qu_div.h2.a['href'] 288 | title = qu_div.h2.a.text.strip() 289 | creation_time = datetime.fromtimestamp( 290 | int(qu_div.h2.span['data-timestamp']) // 1000) 291 | yield Question(url, title, creation_time=creation_time, 292 | session=self._session) 293 | older_time_stamp = int(questions[-1].h2.span['data-timestamp']) 294 | params['page'] += 1 295 | 296 | @property 297 | def unanswered_questions(self): 298 | """获取话题下的等待回答的问题 299 | 300 | 什么是「等待回答」的问题:https://www.zhihu.com/question/40470324 301 | 302 | :return: 话题下等待回答的问题,返回生成器 303 | :rtype: Question.Iterable 304 | """ 305 | from .question import Question 306 | question_url = Topic_Unanswered_Question_Url.format(self.id) 307 | params = {'page': 1} 308 | while True: 309 | res = self._session.get(question_url, params=params) 310 | soup = BeautifulSoup(res.content) 311 | if soup.find('div', class_='error') is not None: 312 | return 313 | questions = soup.find_all('div', class_='question-item') 314 | for qu_div in questions: 315 | url = Zhihu_URL + qu_div.h2.a['href'] 316 | title = qu_div.h2.a.text.strip() 317 | yield Question(url, title, session=self._session) 318 | params['page'] += 1 319 | 320 | @property 321 | def answers(self): 322 | """获取话题下所有答案(按时间降序排列) 323 | 324 | :return: 话题下所有答案,返回生成器 325 | :rtype: Answer.Iterable 326 | """ 327 | from .question import Question 328 | from .answer import Answer 329 | from .author import Author, ANONYMOUS 330 | 331 | newest_url = Topic_Newest_Url.format(self.id) 332 | params = {'start': 0, '_xsrf': self.xsrf} 333 | res = self._session.get(newest_url) 334 | soup = BeautifulSoup(res.content) 335 | while True: 336 | divs = soup.find_all('div', class_='folding') 337 | # 如果话题下无答案,则直接返回 338 | if len(divs) == 0: 339 | return 340 | last_score = divs[-1]['data-score'] 341 | for div in divs: 342 | q = div.find('a', class_="question_link") 343 | question_url = Zhihu_URL + q['href'] 344 | question_title = q.text.strip() 345 | question = Question(question_url, question_title, 346 | session=self._session) 347 | 348 | ans = div.find('a', class_='answer-date-link') 349 | answer_url = Zhihu_URL + ans['href'] 350 | 351 | upvote = div.find('a', class_='zm-item-vote-count').text 352 | if upvote.isdigit(): 353 | upvote = int(upvote) 354 | else: 355 | upvote = None 356 | 357 | au = div.find('div', class_='zm-item-answer-author-info') 358 | if au.a is None: 359 | author = ANONYMOUS 360 | else: 361 | author_url = Zhihu_URL + au.a['href'] 362 | author_name = au.a.text 363 | author_motto = au.strong['title'] if au.strong else '' 364 | author = Author(author_url, author_name, author_motto, 365 | session=self._session) 366 | yield Answer(answer_url, question, author, upvote, 367 | session=self._session) 368 | 369 | params['offset'] = last_score 370 | res = self._session.post(newest_url, data=params) 371 | gotten_feed_num = res.json()['msg'][0] 372 | # 如果得到内容数量为0则返回 373 | if gotten_feed_num == 0: 374 | return 375 | soup = BeautifulSoup(res.json()['msg'][1]) 376 | 377 | @property 378 | def hot_questions(self): 379 | """获取话题下热门的问题 380 | 381 | :return: 话题下的热门动态中的问题,按热门度顺序返回生成器 382 | :rtype: Question.Iterable 383 | """ 384 | from .question import Question 385 | hot_questions_url = Topic_Hot_Questions_Url.format(self.id) 386 | params = {'start': 0, '_xsrf': self.xsrf} 387 | res = self._session.get(hot_questions_url) 388 | soup = BeautifulSoup(res.content) 389 | while True: 390 | questions_duplicate = soup.find_all('a', class_='question_link') 391 | # 如果话题下无问题,则直接返回 392 | if len(questions_duplicate) == 0: 393 | return 394 | # 去除重复的问题 395 | questions = list(set(questions_duplicate)) 396 | questions.sort(key=self._get_score, reverse=True) 397 | last_score = soup.find_all( 398 | 'div', class_='feed-item')[-1]['data-score'] 399 | for q in questions: 400 | question_url = Zhihu_URL + q['href'] 401 | question_title = q.text.strip() 402 | question = Question(question_url, question_title, 403 | session=self._session) 404 | yield question 405 | params['offset'] = last_score 406 | res = self._session.post(hot_questions_url, data=params) 407 | gotten_feed_num = res.json()['msg'][0] 408 | # 如果得到问题数量为0则返回 409 | if gotten_feed_num == 0: 410 | return 411 | soup = BeautifulSoup(res.json()['msg'][1]) 412 | 413 | @property 414 | def hot_answers(self): 415 | """获取话题下热门的回答 416 | 417 | :return: 话题下的热门动态中的回答,按热门度顺序返回生成器 418 | :rtype: Question.Iterable 419 | """ 420 | from .question import Question 421 | from .author import Author 422 | from .answer import Answer 423 | hot_questions_url = Topic_Hot_Questions_Url.format(self.id) 424 | params = {'start': 0, '_xsrf': self.xsrf} 425 | res = self._session.get(hot_questions_url) 426 | soup = BeautifulSoup(res.content) 427 | while True: 428 | answers_div = soup.find_all('div', class_='feed-item') 429 | last_score = answers_div[-1]['data-score'] 430 | for div in answers_div: 431 | # 没有 text area 的情况是:答案被和谐。 432 | if not div.textarea: 433 | continue 434 | question_url = Zhihu_URL + div.h2.a['href'] 435 | question_title = div.h2.a.text.strip() 436 | question = Question(question_url, question_title, 437 | session=self._session) 438 | author_link = div.find('a', class_='author-link') 439 | if not author_link: 440 | author_url = None 441 | author_name = '匿名用户' 442 | author_motto = '' 443 | else: 444 | author_url = Zhihu_URL + author_link['href'] 445 | author_name = author_link.text 446 | author_motto_span = div.find('span', class_='bio') 447 | author_motto = author_motto_span['title'] \ 448 | if author_motto_span else '' 449 | author = Author(author_url, author_name, author_motto, 450 | session=self._session) 451 | 452 | body = div.find('div', class_='zm-item-rich-text') 453 | answer_url = Zhihu_URL + body['data-entry-url'] 454 | upvote_num = int(div.find( 455 | 'div', class_='zm-item-vote-info')['data-votecount']) 456 | 457 | yield Answer(answer_url, question, author, upvote_num, 458 | session=self._session) 459 | 460 | params['offset'] = last_score 461 | res = self._session.post(hot_questions_url, data=params) 462 | gotten_feed_num = res.json()['msg'][0] 463 | # 如果得到问题数量为0则返回 464 | if gotten_feed_num == 0: 465 | return 466 | soup = BeautifulSoup(res.json()['msg'][1]) 467 | 468 | @staticmethod 469 | def _get_score(tag): 470 | h2 = tag.parent 471 | div = h2.parent 472 | try: 473 | _ = h2['class'] 474 | return div['data-score'] 475 | except KeyError: 476 | return div.parent.parent['data-score'] 477 | --------------------------------------------------------------------------------