├── .gitattributes ├── .gitignore ├── ReadList_for_test.txt ├── db └── zhihuhelp.sql ├── readme.md ├── src ├── __init__.py ├── command_parser.py ├── container │ ├── __init__.py │ ├── book.py │ ├── data │ │ ├── __init__.py │ │ ├── answer.py │ │ ├── article.py │ │ ├── author.py │ │ ├── collection.py │ │ ├── column.py │ │ ├── question.py │ │ └── topic.py │ ├── image_container.py │ ├── task.py │ └── task_result.py ├── lib │ ├── __init__.py │ ├── epub │ │ ├── __init__.py │ │ ├── directory.py │ │ ├── epub.py │ │ ├── inf.py │ │ ├── mime_type.py │ │ ├── opf.py │ │ ├── template │ │ │ ├── META-INF │ │ │ │ ├── container │ │ │ │ │ └── container.xml │ │ │ │ └── duokan_container │ │ │ │ │ └── duokan-extension.xml │ │ │ ├── OEBPS │ │ │ │ ├── opf │ │ │ │ │ ├── content.xml │ │ │ │ │ ├── guide │ │ │ │ │ │ └── item.xml │ │ │ │ │ ├── manifest │ │ │ │ │ │ └── item.xml │ │ │ │ │ ├── metadata │ │ │ │ │ │ ├── book_id.xml │ │ │ │ │ │ ├── cover.xml │ │ │ │ │ │ ├── creator.xml │ │ │ │ │ │ ├── language.xml │ │ │ │ │ │ └── title.xml │ │ │ │ │ └── spine │ │ │ │ │ │ ├── item.xml │ │ │ │ │ │ └── item_nolinear.xml │ │ │ │ └── toc │ │ │ │ │ ├── content.xml │ │ │ │ │ ├── docTitle │ │ │ │ │ └── title.xml │ │ │ │ │ ├── head │ │ │ │ │ ├── depth.xml │ │ │ │ │ └── uid.xml │ │ │ │ │ └── navMap │ │ │ │ │ └── item.xml │ │ │ └── directory │ │ │ │ ├── chapter.html │ │ │ │ ├── content.html │ │ │ │ ├── finish_chapter.html │ │ │ │ ├── item_leaf.html │ │ │ │ └── item_root.html │ │ ├── toc.py │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── epub_config.py │ │ │ └── epub_path.py │ │ └── zhihuhelp_tools │ │ │ ├── __init__.py │ │ │ ├── debug.py │ │ │ └── path.py │ ├── oauth │ │ ├── __init__.py │ │ └── zhihu_oauth │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ ├── exception.py │ │ │ ├── helpers.py │ │ │ ├── oauth │ │ │ ├── __init__.py │ │ │ ├── before_login_auth.py │ │ │ ├── im_android.py │ │ │ ├── setting.py │ │ │ ├── token.py │ │ │ ├── utils.py │ │ │ └── zhihu_oauth.py │ │ │ ├── setting.py │ │ │ ├── utils.py │ │ │ └── zhcls │ │ │ ├── __init__.py │ │ │ ├── activity.py │ │ │ ├── answer.py │ │ │ ├── article.py │ │ │ ├── base.py │ │ │ ├── collection.py │ │ │ ├── column.py │ │ │ ├── comment.py │ │ │ ├── generator.py │ │ │ ├── live.py │ │ │ ├── me.py │ │ │ ├── message.py │ │ │ ├── normal.py │ │ │ ├── other.py │ │ │ ├── people.py │ │ │ ├── question.py │ │ │ ├── streaming.py │ │ │ ├── topic.py │ │ │ ├── urls.py │ │ │ ├── utils.py │ │ │ └── whisper.py │ └── requests │ │ ├── __init__.py │ │ ├── _internal_utils.py │ │ ├── adapters.py │ │ ├── api.py │ │ ├── auth.py │ │ ├── cacert.pem │ │ ├── certs.py │ │ ├── compat.py │ │ ├── cookies.py │ │ ├── exceptions.py │ │ ├── hooks.py │ │ ├── models.py │ │ ├── packages │ │ ├── README.rst │ │ ├── __init__.py │ │ ├── chardet │ │ │ ├── __init__.py │ │ │ ├── big5freq.py │ │ │ ├── big5prober.py │ │ │ ├── chardetect.py │ │ │ ├── chardistribution.py │ │ │ ├── charsetgroupprober.py │ │ │ ├── charsetprober.py │ │ │ ├── codingstatemachine.py │ │ │ ├── compat.py │ │ │ ├── constants.py │ │ │ ├── cp949prober.py │ │ │ ├── escprober.py │ │ │ ├── escsm.py │ │ │ ├── eucjpprober.py │ │ │ ├── euckrfreq.py │ │ │ ├── euckrprober.py │ │ │ ├── euctwfreq.py │ │ │ ├── euctwprober.py │ │ │ ├── gb2312freq.py │ │ │ ├── gb2312prober.py │ │ │ ├── hebrewprober.py │ │ │ ├── jisfreq.py │ │ │ ├── jpcntx.py │ │ │ ├── langbulgarianmodel.py │ │ │ ├── langcyrillicmodel.py │ │ │ ├── langgreekmodel.py │ │ │ ├── langhebrewmodel.py │ │ │ ├── langhungarianmodel.py │ │ │ ├── langthaimodel.py │ │ │ ├── latin1prober.py │ │ │ ├── mbcharsetprober.py │ │ │ ├── mbcsgroupprober.py │ │ │ ├── mbcssm.py │ │ │ ├── sbcharsetprober.py │ │ │ ├── sbcsgroupprober.py │ │ │ ├── sjisprober.py │ │ │ ├── universaldetector.py │ │ │ └── utf8prober.py │ │ └── urllib3 │ │ │ ├── __init__.py │ │ │ ├── _collections.py │ │ │ ├── connection.py │ │ │ ├── connectionpool.py │ │ │ ├── contrib │ │ │ ├── __init__.py │ │ │ ├── appengine.py │ │ │ ├── ntlmpool.py │ │ │ ├── pyopenssl.py │ │ │ └── socks.py │ │ │ ├── exceptions.py │ │ │ ├── fields.py │ │ │ ├── filepost.py │ │ │ ├── packages │ │ │ ├── __init__.py │ │ │ ├── backports │ │ │ │ ├── __init__.py │ │ │ │ └── makefile.py │ │ │ ├── ordered_dict.py │ │ │ ├── six.py │ │ │ └── ssl_match_hostname │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ └── _implementation.py │ │ │ ├── poolmanager.py │ │ │ ├── request.py │ │ │ ├── response.py │ │ │ └── util │ │ │ ├── __init__.py │ │ │ ├── connection.py │ │ │ ├── request.py │ │ │ ├── response.py │ │ │ ├── retry.py │ │ │ ├── ssl_.py │ │ │ ├── timeout.py │ │ │ └── url.py │ │ ├── sessions.py │ │ ├── status_codes.py │ │ ├── structures.py │ │ └── utils.py ├── login.py ├── main.py ├── tools │ ├── __init__.py │ ├── config.py │ ├── controler.py │ ├── db.py │ ├── debug.py │ ├── extra_tools.py │ ├── http.py │ ├── match.py │ ├── path.py │ ├── template.py │ ├── template_config.py │ └── type.py └── worker.py ├── unit ├── BS4 │ ├── content.html │ └── parser.py ├── __init__.py ├── addressFile │ ├── address_All │ ├── answer │ ├── article │ ├── collection │ ├── column │ ├── people │ ├── question │ ├── table │ └── topic ├── demo │ ├── __init__.json │ ├── activity.html │ ├── answer.html │ ├── article.html │ ├── collection.html │ ├── columns.html │ ├── people.html │ ├── question.html │ ├── question_answer.html │ ├── readme.md │ └── topic.html ├── oauth_test.py ├── parser_unit.py └── unit_html │ ├── author.html │ ├── author_info.html │ ├── collection.html │ ├── private_collection.html │ ├── single_answer.html │ ├── single_question.html │ ├── topic.html │ └── topic_info.html ├── update.md ├── www ├── __init__.py ├── css │ ├── bootstrap.css │ ├── customer.css │ ├── markdown.css │ └── normalize.css ├── image │ ├── cover.jpg │ └── kanshan.png └── template │ ├── __init__.py │ ├── base.html │ ├── content │ └── question │ │ ├── answer.html │ │ └── question.html │ ├── front_page │ ├── base.html │ └── info │ │ ├── answer.html │ │ ├── article.html │ │ ├── author.html │ │ ├── collection.html │ │ ├── column.html │ │ ├── question.html │ │ └── topic.html │ └── info_page │ ├── article.html │ ├── author.html │ ├── book.html │ ├── collection.html │ ├── column.html │ ├── question.html │ └── topic.html ├── zhihuHelp.py └── 知乎助手实现思路.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /config.json 2 | /.idea/dataSources.local.xml 3 | /.idea/workspace.xml 4 | /.idea/ZhihuHelp__Python.iml 5 | ################# 6 | ## Eclipse 7 | ################# 8 | 9 | *.pydevproject 10 | .project 11 | .metadata 12 | bin/ 13 | tmp/ 14 | *.tmp 15 | *.bak 16 | *.swp 17 | *~.nib 18 | local.properties 19 | .classpath 20 | .settings/ 21 | .loadpath 22 | 23 | # External tool builders 24 | .externalToolBuilders/ 25 | 26 | # Locally stored "Eclipse launch configurations" 27 | *.launch 28 | 29 | # CDT-specific 30 | .cproject 31 | 32 | # PDT-specific 33 | .buildpath 34 | 35 | 36 | ################# 37 | ## Visual Studio 38 | ################# 39 | 40 | ## Ignore Visual Studio temporary files, build results, and 41 | ## files generated by popular Visual Studio add-ons. 42 | 43 | # User-specific files 44 | *.suo 45 | *.user 46 | *.sln.docstates 47 | 48 | # Build results 49 | 50 | [Dd]ebug/ 51 | [Rr]elease/ 52 | x64/ 53 | build/ 54 | [Bb]in/ 55 | [Oo]bj/ 56 | 57 | # MSTest test Results 58 | [Tt]est[Rr]esult*/ 59 | [Bb]uild[Ll]og.* 60 | 61 | *_i.c 62 | *_p.c 63 | *.ilk 64 | *.meta 65 | *.obj 66 | *.pch 67 | *.pdb 68 | *.pgc 69 | *.pgd 70 | *.rsp 71 | *.sbr 72 | *.tlb 73 | *.tli 74 | *.tlh 75 | *.tmp_proj 76 | *.log 77 | *.vspscc 78 | *.vssscc 79 | .builds 80 | *.pidb 81 | *.scc 82 | 83 | # Visual C++ cache files 84 | ipch/ 85 | *.aps 86 | *.ncb 87 | *.opensdf 88 | *.sdf 89 | *.cachefile 90 | 91 | # Visual Studio profiler 92 | *.psess 93 | *.vsp 94 | *.vspx 95 | 96 | # Guidance Automation Toolkit 97 | *.gpState 98 | 99 | # ReSharper is a .NET coding add-in 100 | _ReSharper*/ 101 | *.[Rr]e[Ss]harper 102 | 103 | # TeamCity is a build add-in 104 | _TeamCity* 105 | 106 | # DotCover is a Code Coverage Tool 107 | *.dotCover 108 | 109 | # NCrunch 110 | *.ncrunch* 111 | .*crunch*.local.xml 112 | 113 | # Installshield output folder 114 | [Ee]xpress/ 115 | 116 | # DocProject is a documentation generator add-in 117 | DocProject/buildhelp/ 118 | DocProject/Help/*.HxT 119 | DocProject/Help/*.HxC 120 | DocProject/Help/*.hhc 121 | DocProject/Help/*.hhk 122 | DocProject/Help/*.hhp 123 | DocProject/Help/Html2 124 | DocProject/Help/html 125 | 126 | # Click-Once directory 127 | publish/ 128 | 129 | # Publish Web Output 130 | *.Publish.xml 131 | *.pubxml 132 | 133 | # NuGet Packages Directory 134 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 135 | #packages/ 136 | 137 | # Windows Azure Build Output 138 | csx 139 | *.build.csdef 140 | 141 | # Windows Store app package directory 142 | AppPackages/ 143 | 144 | # Others 145 | sql/ 146 | *.Cache 147 | ClientBin/ 148 | [Ss]tyle[Cc]op.* 149 | ~$* 150 | *~ 151 | *.dbmdl 152 | *.[Pp]ublish.xml 153 | *.pfx 154 | *.publishsettings 155 | 156 | # RIA/Silverlight projects 157 | Generated_Code/ 158 | 159 | # Backup & report files from converting an old project file to a newer 160 | # Visual Studio version. Backup files are not needed, because we have git ;-) 161 | _UpgradeReport_Files/ 162 | Backup*/ 163 | UpgradeLog*.XML 164 | UpgradeLog*.htm 165 | 166 | # SQL Server files 167 | App_Data/*.mdf 168 | App_Data/*.ldf 169 | 170 | ############# 171 | ## Windows detritus 172 | ############# 173 | 174 | # Windows image file caches 175 | Thumbs.db 176 | ehthumbs.db 177 | 178 | # Folder config file 179 | Desktop.ini 180 | 181 | # Recycle Bin used on file shares 182 | $RECYCLE.BIN/ 183 | 184 | # Mac crap 185 | .DS_Store 186 | 187 | 188 | ############# 189 | ## Python 190 | ############# 191 | 192 | *.py[co] 193 | 194 | # Packages 195 | *.egg 196 | *.egg-info 197 | dist/ 198 | eggs/ 199 | parts/ 200 | var/ 201 | sdist/ 202 | develop-eggs/ 203 | .idea/ 204 | 知乎电子书临时资源库/ 205 | 知乎助手生成的电子书/ 206 | .installed.cfg 207 | 208 | # Installer logs 209 | pip-log.txt 210 | 211 | # Unit test / coverage reports 212 | .coverage 213 | .tox 214 | 215 | #Translations 216 | *.mo 217 | 218 | #Mr Developer 219 | .mr.developer.cfg 220 | 221 | #Mine 222 | ReadList.txt 223 | misc.xml 224 | *.db 225 | *.sqlite 226 | *.ini 227 | zhihuhelp1.7.0/我是登陆知乎时的验证码.gif 228 | .idea/workspace.xml 229 | 我是登陆知乎时的验证码.gif 230 | # 自动生成的token 231 | token.pkl 232 | *.token 233 | 知乎登录密钥_token_file.token 234 | 235 | *.jpg 236 | 237 | *.xml 238 | 239 | *.xhtml 240 | 241 | *.epub 242 | 243 | *.opf 244 | 245 | *.ncx 246 | 247 | *.html 248 | 249 | *.css 250 | -------------------------------------------------------------------------------- /ReadList_for_test.txt: -------------------------------------------------------------------------------- 1 | https://www.zhihu.com/people/404-Page-Not-found$https://www.zhihu.com/people/meng-qing-xue-94$https://www.zhihu.com/people/ying-ye-78 #用户答案收集测试-姚泽源-孟晴雪-影夜 2 | https://www.zhihu.com/topic/19560104 #话题-青岛 3 | https://www.zhihu.com/collection/19726342 #收藏夹-工作 4 | https://www.zhihu.com/collection/34451960 #孟晴雪-私人收藏夹 5 | http://zhuanlan.zhihu.com/patisserie #专栏-甘党万歳 6 | http://zhuanlan.zhihu.com/jiageng/20084431 #单篇专栏测试 - 一只吐泡泡的扇贝 7 | https://www.zhihu.com/question/19611675/answer/29365393?from=profile_answer_card # 单个答案测试-青岛旅游攻略 8 | https://www.zhihu.com/question/22719537 # 单个问题测试-如何保存某位知乎用户的所有答案? 9 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/__init__.py -------------------------------------------------------------------------------- /src/command_parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from src.container.task import QuestionTask, AnswerTask, AuthorTask, CollectionTask, TopicTask, \ 3 | ArticleTask, ColumnTask 4 | from src.tools.debug import Debug 5 | from src.tools.match import Match 6 | from src.tools.type import Type 7 | 8 | 9 | class CommandParser(object): 10 | u""" 11 | 通过Parser类,生成任务列表,以task容器列表的形式返回回去 12 | """ 13 | 14 | @staticmethod 15 | def get_task_list(command): 16 | u""" 17 | 解析指令类型 18 | """ 19 | command = command \ 20 | .replace(' ', '') \ 21 | .replace('\r', '') \ 22 | .replace('\n', '') \ 23 | .replace('\t', '') \ 24 | .split('#')[0] 25 | command_list = command.split('$') 26 | 27 | task_list = [] 28 | for command in command_list: 29 | task = CommandParser.parse_command(command) 30 | if not task: 31 | continue 32 | task_list.append(task) 33 | return task_list 34 | 35 | @staticmethod 36 | def detect(command): 37 | for command_type in [ 38 | Type.answer, Type.question, 39 | Type.author, Type.collection, Type.topic, 40 | Type.article, Type.column, # 文章必须放在专栏之前(否则检测类别的时候就一律检测为专栏了) 41 | ]: 42 | result = getattr(Match, command_type)(command) 43 | if result: 44 | return command_type 45 | return Type.unknown 46 | 47 | @staticmethod 48 | def parse_command(raw_command=''): 49 | u""" 50 | 分析单条命令并返回待完成的task 51 | """ 52 | parser = { 53 | Type.author: CommandParser.parse_author, 54 | Type.answer: CommandParser.parse_answer, 55 | Type.question: CommandParser.parse_question, 56 | Type.collection: CommandParser.parse_collection, 57 | Type.topic: CommandParser.parse_topic, 58 | Type.article: CommandParser.parse_article, 59 | Type.column: CommandParser.parse_column, 60 | Type.unknown: CommandParser.parse_error, 61 | } 62 | kind = CommandParser.detect(raw_command) 63 | return parser[kind](raw_command) 64 | 65 | @staticmethod 66 | def parse_question(command): 67 | result = Match.question(command) 68 | question_id = result.group(u'question_id') 69 | task = QuestionTask(question_id) 70 | return task 71 | 72 | @staticmethod 73 | def parse_answer(command): 74 | result = Match.answer(command) 75 | question_id = result.group(u'question_id') 76 | answer_id = result.group(u'answer_id') 77 | task = AnswerTask(question_id, answer_id) 78 | return task 79 | 80 | @staticmethod 81 | def parse_author(command): 82 | result = Match.author(command) 83 | author_page_id = result.group(u'author_page_id') 84 | task = AuthorTask(author_page_id) 85 | return task 86 | 87 | @staticmethod 88 | def parse_collection(command): 89 | result = Match.collection(command) 90 | collection_id = result.group(u'collection_id') 91 | task = CollectionTask(collection_id) 92 | return task 93 | 94 | @staticmethod 95 | def parse_topic(command): 96 | result = Match.topic(command) 97 | topic_id = result.group(u'topic_id') 98 | task = TopicTask(topic_id) 99 | return task 100 | 101 | @staticmethod 102 | def parse_article(command): 103 | result = Match.article(command) 104 | column_id = result.group(u'column_id') 105 | article_id = result.group(u'article_id') 106 | task = ArticleTask(column_id, article_id) 107 | return task 108 | 109 | @staticmethod 110 | def parse_column(command): 111 | result = Match.column(command) 112 | column_id = result.group(u'column_id') 113 | task = ColumnTask(column_id) 114 | return task 115 | 116 | @staticmethod 117 | def parse_error(command): 118 | if command: 119 | Debug.logger.info(u"""无法解析记录:{}所属网址类型,请检查后重试。""".format(command)) 120 | return 121 | -------------------------------------------------------------------------------- /src/container/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /src/container/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 用于储存数据信息 -------------------------------------------------------------------------------- /src/container/data/answer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | from src.tools.config import Config 5 | from src.tools.match import Match 6 | from src.tools.path import Path 7 | 8 | 9 | class Answer(object): 10 | def __init__(self, data): 11 | self.answer_id = data['answer_id'] 12 | self.question_id = data['question_id'] 13 | self.author_id = data['author_id'] 14 | self.author_name = data['author_name'] 15 | self.author_headline = data['author_headline'] 16 | self.author_avatar_url = data['author_avatar_url'] 17 | self.author_gender = data['author_gender'] 18 | self.comment_count = data['comment_count'] 19 | self.content = data['content'] 20 | self.created_time = data['created_time'] 21 | self.updated_time = data['updated_time'] 22 | self.is_copyable = data['is_copyable'] 23 | self.thanks_count = data['thanks_count'] 24 | self.voteup_count = data['voteup_count'] 25 | 26 | self.total_img_size_kb = 0 # 文件大小(只统计图片大小,包括答案内图片和答主头像,单位kb) 27 | self.img_filename_list = [] 28 | return 29 | 30 | def download_img(self): 31 | from src.container.image_container import ImageContainer 32 | img_container = ImageContainer() 33 | img_src_dict = Match.match_img_with_src_dict(self.content) 34 | self.img_filename_list = [] 35 | for img in img_src_dict: 36 | src = img_src_dict[img] 37 | filename = img_container.add(src) 38 | self.img_filename_list.append(filename) 39 | self.content = self.content.replace(img, Match.create_img_element_with_file_name(filename)) 40 | 41 | # 答案作者的头像也要下载 42 | filename = img_container.add(self.author_avatar_url) 43 | self.img_filename_list.append(filename) 44 | self.author_avatar_url = Match.create_local_img_src(filename) 45 | 46 | img_container.start_download() 47 | 48 | # 下载完成后,更新图片大小 49 | for filename in self.img_filename_list: 50 | self.total_img_size_kb += Path.get_img_size_by_filename_kb(filename) 51 | return -------------------------------------------------------------------------------- /src/container/data/article.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from src.container.data.author import Author 3 | from src.tools.db import DB 4 | from src.tools.match import Match 5 | from src.tools.path import Path 6 | 7 | 8 | class Article(object): 9 | u""" 10 | 文章容器 11 | """ 12 | def __init__(self, data): 13 | self.article_id = data['article_id'] 14 | self.title = data['title'] 15 | self.updated_time = data['updated_time'] 16 | self.voteup_count = data['voteup_count'] 17 | self.image_url = data['image_url'] 18 | self.column_id = data['column_id'] 19 | self.content = data['content'] 20 | self.comment_count = data['comment_count'] 21 | self.author_id = data['author_id'] 22 | self.author_name = data['author_name'] 23 | self.author_headline = data['author_headline'] 24 | self.author_avatar_url = data['author_avatar_url'] 25 | self.author_gender = data['author_gender'] 26 | 27 | self.total_img_size_kb = 0 28 | self.img_filename_list = [] 29 | return 30 | 31 | def download_img(self): 32 | from src.container.image_container import ImageContainer 33 | img_container = ImageContainer() 34 | img_src_dict = Match.match_img_with_src_dict(self.content) 35 | self.img_filename_list = [] 36 | for img in img_src_dict: 37 | src = img_src_dict[img] 38 | filename = img_container.add(src) 39 | self.img_filename_list.append(filename) 40 | self.content = self.content.replace(img, Match.create_img_element_with_file_name(filename)) 41 | 42 | # 下载文章封面图像 43 | filename = img_container.add(self.image_url) 44 | self.img_filename_list.append(filename) 45 | self.image_url = Match.create_local_img_src(filename) 46 | 47 | # 下载用户头像 48 | filename = img_container.add(self.author_avatar_url) 49 | self.img_filename_list.append(filename) 50 | self.author_avatar_url = Match.create_local_img_src(filename) 51 | 52 | img_container.start_download() 53 | 54 | # 下载完成后,更新图片大小 55 | for filename in self.img_filename_list: 56 | self.total_img_size_kb += Path.get_img_size_by_filename_kb(filename) 57 | return -------------------------------------------------------------------------------- /src/container/data/author.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Author(object): 5 | def __init__(self, data): 6 | self.author_id = data['author_id'] 7 | self.author_page_id = data['author_page_id'] 8 | self.answer_count = data['answer_count'] 9 | self.articles_count = data['articles_count'] 10 | self.avatar_url = data['avatar_url'] 11 | self.columns_count = data['columns_count'] 12 | self.description = data['description'] 13 | self.favorite_count = data['favorite_count'] 14 | self.favorited_count = data['favorited_count'] 15 | self.follower_count = data['follower_count'] 16 | self.following_columns_count = data['following_columns_count'] 17 | self.following_count = data['following_count'] 18 | self.following_question_count = data['following_question_count'] 19 | self.following_topic_count = data['following_topic_count'] 20 | self.gender = data['gender'] 21 | self.headline = data['headline'] 22 | self.name = data['name'] 23 | self.question_count = data['question_count'] 24 | self.shared_count = data['shared_count'] 25 | self.is_bind_sina = data['is_bind_sina'] 26 | self.thanked_count = data['thanked_count'] 27 | self.sina_weibo_name = data['sina_weibo_name'] 28 | self.sina_weibo_url = data['sina_weibo_url'] 29 | self.voteup_count = data['voteup_count'] 30 | return -------------------------------------------------------------------------------- /src/container/data/collection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Collection(object): 5 | def __init__(self, data): 6 | self.collection_id = data['collection_id'] 7 | self.answer_count = data['answer_count'] 8 | self.comment_count = data['comment_count'] 9 | self.created_time = data['created_time'] 10 | self.follower_count = data['follower_count'] 11 | self.description = data['description'] 12 | self.title = data['title'] 13 | self.updated_time = data['updated_time'] 14 | self.creator_id = data['creator_id'] 15 | self.creator_name = data['creator_name'] 16 | self.creator_headline = data['creator_headline'] 17 | self.creator_avatar_url = data['creator_avatar_url'] 18 | self.collected_answer_id_list = data['collected_answer_id_list'] 19 | return -------------------------------------------------------------------------------- /src/container/data/column.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Column(object): 5 | def __init__(self, data): 6 | self.column_id = data['column_id'] 7 | self.title = data['title'] 8 | self.article_count = data['article_count'] 9 | self.follower_count = data['follower_count'] 10 | self.description = data['description'] 11 | self.image_url = data['image_url'] 12 | return 13 | -------------------------------------------------------------------------------- /src/container/data/question.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Question(object): 5 | def __init__(self, data): 6 | self.question_id = data['question_id'] 7 | self.answer_count = data['answer_count'] 8 | self.comment_count = data['comment_count'] 9 | self.follower_count = data['follower_count'] 10 | self.title = data['title'] 11 | self.detail = data['detail'] 12 | self.updated_time = data['updated_time'] 13 | return -------------------------------------------------------------------------------- /src/container/data/topic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Topic(object): 5 | def __init__(self, data): 6 | self.topic_id = data['topic_id'] 7 | self.avatar_url = data['avatar_url'] 8 | self.best_answerers_count = data['best_answerers_count'] 9 | self.best_answers_count = data['best_answers_count'] 10 | self.excerpt = data['excerpt'] 11 | self.followers_count = data['followers_count'] 12 | self.introduction = data['introduction'] 13 | self.name = data['name'] 14 | self.questions_count = data['questions_count'] 15 | self.unanswered_count = data['unanswered_count'] 16 | self.best_answer_id_list = data['best_answer_id_list'] 17 | return -------------------------------------------------------------------------------- /src/container/image_container.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import hashlib 3 | import os.path 4 | 5 | from src.tools.config import Config 6 | from src.tools.controler import Control 7 | from src.tools.debug import Debug 8 | from src.tools.extra_tools import ExtraTools 9 | from src.tools.http import Http 10 | from src.tools.match import Match 11 | from src.tools.path import Path 12 | 13 | 14 | class ImageContainer(object): 15 | def __init__(self, save_path=''): 16 | if len(save_path) == 0: 17 | save_path = Path.image_pool_path 18 | self.save_path = save_path 19 | self.container = {} 20 | self.md5 = hashlib.md5() 21 | return 22 | 23 | def set_save_path(self, save_path): 24 | self.save_path = save_path 25 | return 26 | 27 | def add(self, href): 28 | """ 29 | :param href: 图片地址 30 | :return: 31 | """ 32 | self.container[href] = self.create_image(href) 33 | return self.get_filename(href) 34 | 35 | def delete(self, href): 36 | del self.container[href] 37 | return 38 | 39 | def get_filename(self, href): 40 | image = self.container.get(href) 41 | if image: 42 | return image['filename'] 43 | return '' 44 | 45 | def get_filename_list(self): 46 | return self.container.values() 47 | 48 | def download(self, index): 49 | image = self.container[index] 50 | filename = image['filename'] 51 | href = image['href'] 52 | # 下载图片时自动把https换成http,以便加速图片下载过程 53 | href = href.replace('https://', 'http://') 54 | 55 | if os.path.isfile(self.save_path + '/' + filename): 56 | return 57 | Debug.print_in_single_line(u'开始下载图片{}'.format(href)) 58 | if href: 59 | content = Http.get_content(url=href, timeout=Config.timeout_download_picture) 60 | if not content: 61 | Debug.logger.debug(u'图片『{}』下载失败'.format(href)) 62 | content = '' 63 | else: 64 | Debug.print_in_single_line(u'图片{}下载完成'.format(href)) 65 | else: 66 | # 当下载地址为空的时候,就没必要再去下载了 67 | content = '' 68 | with open(self.save_path + '/' + filename, 'wb') as image: 69 | image.write(content) 70 | return 71 | 72 | def start_download(self): 73 | argv = {'func': self.download, # 所有待存入数据库中的数据都应当是list 74 | 'iterable': self.container, } 75 | Control.control_center(argv, self.container) 76 | return 77 | 78 | def create_image(self, href): 79 | # 在这里,根据图片配置对文件类别进行统一处理 80 | href = self.transfer_img_href_by_config_quality(href) 81 | image = {'filename': self.create_filename(href), 'href': href} 82 | return image 83 | 84 | def transfer_img_href_by_config_quality(self, raw_href): 85 | href = Match.generate_img_src(raw_href, Config.picture_quality) 86 | if href is None: 87 | href = raw_href 88 | return href 89 | 90 | def create_filename(self, href): 91 | filename = ExtraTools.md5(href) + '.jpg' 92 | return filename 93 | -------------------------------------------------------------------------------- /src/container/task.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from src.tools.type import Type 3 | 4 | 5 | class Task(object): 6 | def __init__(self, task_type): 7 | self.task_type = task_type 8 | return 9 | 10 | def get_task_type(self): 11 | return self.task_type 12 | 13 | 14 | class AuthorTask(Task): 15 | def __init__(self, author_page_id): 16 | Task.__init__(self, Type.author) 17 | self.author_page_id = author_page_id 18 | return 19 | 20 | 21 | class TopicTask(Task): 22 | def __init__(self, topic_id): 23 | Task.__init__(self, Type.topic) 24 | self.topic_id = int(topic_id) 25 | return 26 | 27 | 28 | class CollectionTask(Task): 29 | def __init__(self, collection_id): 30 | Task.__init__(self, Type.collection) 31 | self.collection_id = int(collection_id) 32 | return 33 | 34 | 35 | class QuestionTask(Task): 36 | def __init__(self, question_id): 37 | Task.__init__(self, Type.question) 38 | self.question_id = int(question_id) 39 | return 40 | 41 | 42 | class AnswerTask(Task): 43 | def __init__(self, question_id, answer_id): 44 | Task.__init__(self, Type.answer) 45 | self.question_id = int(question_id) 46 | self.answer_id = int(answer_id) 47 | return 48 | 49 | 50 | class ColumnTask(Task): 51 | def __init__(self, column_id): 52 | Task.__init__(self, Type.column) 53 | self.column_id = column_id 54 | return 55 | 56 | 57 | class ArticleTask(Task): 58 | def __init__(self, column_id, article_id): 59 | Task.__init__(self, Type.article) 60 | self.column_id = column_id 61 | self.article_id = int(article_id) 62 | return 63 | -------------------------------------------------------------------------------- /src/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /src/lib/epub/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /src/lib/epub/directory.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .zhihuhelp_tools.path import Path 3 | from .tools.base import Base 4 | 5 | 6 | class Directory(Base): 7 | def __init__(self): 8 | Base.__init__(self) 9 | self.chapter_deep = 0 10 | return 11 | 12 | def add_html(self, src, title): 13 | template = self.get_template('directory', 'item_leaf') 14 | self.content += template.format(href=Path.get_filename(src), title=title) 15 | return 16 | 17 | def create_chapter(self, src, title): 18 | template = self.get_template('directory', 'item_root') 19 | item = template.format(href=Path.get_filename(src), title=title) 20 | if self.chapter_deep == 0: 21 | template = self.get_template('directory', 'chapter') 22 | item = template.format(item=item, title=u'目录') 23 | self.content += item 24 | 25 | self.chapter_deep += 1 26 | return 27 | 28 | def finish_chapter(self): 29 | if self.chapter_deep == 1: 30 | template = self.get_template('directory', 'finish_chapter') 31 | self.content += template 32 | 33 | self.chapter_deep -= 1 34 | return 35 | 36 | def get_content(self): 37 | template = self.get_template('directory', 'content') 38 | return template.format(content=self.content) 39 | -------------------------------------------------------------------------------- /src/lib/epub/inf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .tools.epub_config import EpubConfig 3 | from .tools.epub_path import EpubPath 4 | 5 | from .zhihuhelp_tools.path import Path 6 | 7 | 8 | class INF(object): 9 | def __init__(self): 10 | return 11 | 12 | @staticmethod 13 | def add_container(): 14 | Path.copy(EpubConfig.container_uri, EpubPath.meta_inf_path) 15 | return 16 | 17 | @staticmethod 18 | def add_duokan_ext(): 19 | Path.copy(EpubConfig.duokan_container_uri, EpubPath.meta_inf_path) 20 | return 21 | -------------------------------------------------------------------------------- /src/lib/epub/mime_type.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .tools.epub_path import EpubPath 3 | 4 | 5 | class MimeType(object): 6 | def __init__(self): 7 | self.content = u'application/epub+zip' 8 | return 9 | 10 | def create(self): 11 | with open(EpubPath.work_path + '/mimetype', 'w') as mimetype: 12 | mimetype.write(self.content) 13 | return 14 | -------------------------------------------------------------------------------- /src/lib/epub/template/META-INF/container/container.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/lib/epub/template/META-INF/duokan_container/duokan-extension.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/content.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | {metadata} 7 | 8 | 9 | 10 | {manifest} 11 | 12 | 13 | {spine} 14 | 15 | 16 | {guide} 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/guide/item.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/manifest/item.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/metadata/book_id.xml: -------------------------------------------------------------------------------- 1 | {uid} 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/metadata/cover.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/metadata/creator.xml: -------------------------------------------------------------------------------- 1 | {creator} 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/metadata/language.xml: -------------------------------------------------------------------------------- 1 | {language} 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/metadata/title.xml: -------------------------------------------------------------------------------- 1 | {title} 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/spine/item.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/opf/spine/item_nolinear.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/toc/content.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | {head} 7 | 8 | 9 | 10 | 11 | {doc_title} 12 | 13 | 14 | {nav_point} 15 | 16 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/toc/docTitle/title.xml: -------------------------------------------------------------------------------- 1 | 2 | {title} 3 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/toc/head/depth.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/toc/head/uid.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/OEBPS/toc/navMap/item.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | {title} 4 | 5 | 6 | 7 | 8 | {extend_nav_point} 9 | 10 | -------------------------------------------------------------------------------- /src/lib/epub/template/directory/chapter.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
{item}
4 | 5 |
6 | 7 | -------------------------------------------------------------------------------- /src/lib/epub/template/directory/content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 目录 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | {content} 14 |
15 | 16 | 17 | -------------------------------------------------------------------------------- /src/lib/epub/template/directory/finish_chapter.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | -------------------------------------------------------------------------------- /src/lib/epub/template/directory/item_leaf.html: -------------------------------------------------------------------------------- 1 | {title} 2 | -------------------------------------------------------------------------------- /src/lib/epub/template/directory/item_root.html: -------------------------------------------------------------------------------- 1 | {title} 2 | -------------------------------------------------------------------------------- /src/lib/epub/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /src/lib/epub/tools/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .epub_config import EpubConfig 3 | 4 | 5 | class Base(object): 6 | def __init__(self): 7 | self.content = '' 8 | return 9 | 10 | def get_template(self, template_kind, template_name): 11 | template_uri = '{}_{}_uri'.format(template_kind, template_name) 12 | with open(getattr(EpubConfig, template_uri)) as template: 13 | content = template.read() 14 | return content 15 | 16 | def get_content(self): 17 | return self.content 18 | -------------------------------------------------------------------------------- /src/lib/epub/tools/epub_config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .epub_path import EpubPath 3 | 4 | 5 | class EpubConfig(object): 6 | u""" 7 | 用于记录epub创建过程中所需用到的常量 8 | 比如,常见模板的实际路径 9 | """ 10 | base_path = EpubPath.base_path + u'/template' 11 | # META-INF 12 | container_uri = base_path + u'/META-INF/container/container.xml' 13 | duokan_container_uri = base_path + u'/META-INF/duokan_container/duokan-extension.xml' 14 | 15 | # OEBPS 16 | 17 | ## OPF 18 | opf_content_uri = base_path + u'/OEBPS/opf/content.xml' 19 | 20 | ### guide 21 | guide_item_uri = base_path + u'/OEBPS/opf/guide/item.xml' 22 | 23 | ### metadata 24 | metadata_cover_uri = base_path + u'/OEBPS/opf/metadata/cover.xml' 25 | metadata_creator_uri = base_path + u'/OEBPS/opf/metadata/creator.xml' 26 | metadata_book_id_uri = base_path + u'/OEBPS/opf/metadata/book_id.xml' 27 | metadata_title_uri = base_path + u'/OEBPS/opf/metadata/title.xml' 28 | metadata_language_uri = base_path + u'/OEBPS/opf/metadata/language.xml' 29 | 30 | ### manifest 31 | manifest_item_uri = base_path + u'/OEBPS/opf/manifest/item.xml' 32 | 33 | ### spine 34 | spine_item_uri = base_path + u'/OEBPS/opf/spine/item.xml' 35 | spine_item_nolinear_uri = base_path + u'/OEBPS/opf/spine/item_nolinear.xml' 36 | 37 | 38 | ## TOC 39 | toc_content_uri = base_path + u'/OEBPS/toc/content.xml' 40 | ###head 41 | head_uid_uri = base_path + u'/OEBPS/toc/head/uid.xml' 42 | head_depth_uri = base_path + u'/OEBPS/toc/head/depth.xml' 43 | 44 | # doc_title 45 | doc_title_title_uri = base_path + u'/OEBPS/toc/docTitle/title.xml' 46 | 47 | ### ncx 48 | ncx_item_uri = base_path + u'/OEBPS/toc/navMap/item.xml' 49 | 50 | # Directory 51 | directory_item_root_uri = base_path + u'/directory/item_root.html' 52 | directory_item_leaf_uri = base_path + u'/directory/item_leaf.html' 53 | directory_chapter_uri = base_path + u'/directory/chapter.html' 54 | directory_finish_chapter_uri = base_path + u'/directory/finish_chapter.html' 55 | directory_content_uri = base_path + u'/directory/content.html' 56 | 57 | # Default 58 | book_id = u'create_by_yaozeyuan' 59 | book_title = u'no_title' 60 | creator = u'zhihuhelp' 61 | uid = u'urn:uuid:create-by-yao-ze-yuan-Tsingtao' 62 | identifier = u'' 63 | language = u'zh' 64 | -------------------------------------------------------------------------------- /src/lib/epub/tools/epub_path.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | from ..zhihuhelp_tools.path import Path 5 | 6 | class EpubPath(object): 7 | file_path = os.path.realpath(__file__) 8 | base_path = os.path.dirname(file_path) 9 | base_path = unicode(os.path.dirname(base_path).decode(sys.stdout.encoding)) # 库文件位置 10 | 11 | work_path = base_path # 默认以库位置作为初始工作地址 12 | output_path = os.path.dirname(work_path) # 默认以工作目录的上一级为输出目录 13 | meta_inf_path = work_path + u'/META-INF' 14 | oebps_path = work_path + u'/OEBPS' 15 | image_path = work_path + u'/images' 16 | html_path = oebps_path + u'/html' 17 | style_path = oebps_path + u'/style' 18 | 19 | @staticmethod 20 | def set_work_path(work_path): 21 | EpubPath.work_path = work_path 22 | EpubPath.meta_inf_path = EpubPath.work_path + u'/META-INF' 23 | EpubPath.oebps_path = EpubPath.work_path + u'/OEBPS' 24 | EpubPath.image_path = EpubPath.oebps_path + u'/images' 25 | EpubPath.html_path = EpubPath.oebps_path + u'/html' 26 | EpubPath.style_path = EpubPath.oebps_path + u'/style' 27 | return 28 | 29 | @staticmethod 30 | def set_output_path(output_path): 31 | EpubPath.output_path = output_path 32 | return 33 | 34 | @staticmethod 35 | def init_epub_path(work_path): 36 | """ 37 | 设置工作地址,根据该路径进行创建文件夹,生成epub,压缩等操作 38 | """ 39 | EpubPath.set_work_path(work_path) 40 | Path.mkdir(EpubPath.meta_inf_path) 41 | Path.mkdir(EpubPath.oebps_path) 42 | Path.chdir(EpubPath.oebps_path) 43 | Path.mkdir(EpubPath.html_path) 44 | Path.mkdir(EpubPath.image_path) 45 | Path.mkdir(EpubPath.style_path) 46 | return 47 | 48 | @staticmethod 49 | def reset_path(): 50 | Path.chdir(EpubPath.work_path) 51 | return 52 | -------------------------------------------------------------------------------- /src/lib/epub/zhihuhelp_tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __version__ = '2015-12-19' 3 | -------------------------------------------------------------------------------- /src/lib/epub/zhihuhelp_tools/debug.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import logging.handlers 4 | import sys 5 | 6 | 7 | class Debug(object): 8 | u""" 9 | 打印日志 10 | """ 11 | handler = logging.StreamHandler() # 实例化handler 12 | fmt = '%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(message)s' 13 | 14 | formatter = logging.Formatter(fmt) # 实例化formatter 15 | handler.setFormatter(formatter) # 为handler添加formatter 16 | 17 | logger = logging.getLogger('main') # 获取名为main的logger 18 | logger.addHandler(handler) # 为logger添加handler 19 | logger.setLevel(logging.INFO) # 发布时关闭log输出 20 | 21 | # 辅助函数 22 | @staticmethod 23 | def print_in_single_line(text=''): 24 | try: 25 | sys.stdout.write("\r" + " " * 60 + '\r') 26 | sys.stdout.flush() 27 | sys.stdout.write(text) 28 | sys.stdout.flush() 29 | except: 30 | pass 31 | return 32 | 33 | @staticmethod 34 | def print_dict(data={}, key='', prefix=''): 35 | try: 36 | if isinstance(data, dict): 37 | for key in data: 38 | Debug.print_dict(data[key], key, prefix + ' ') 39 | else: 40 | if isinstance(data, basestring): 41 | print prefix + unicode(key) + ' => ' + data 42 | else: 43 | print prefix + unicode(key) + ' => ' + unicode(data) 44 | except UnicodeEncodeError as error: 45 | Debug.logger.info(u'编码异常') 46 | Debug.logger.info(u'系统默认编码为:' + sys.getdefaultencoding()) 47 | # raise error 48 | return 49 | -------------------------------------------------------------------------------- /src/lib/epub/zhihuhelp_tools/path.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import shutil 4 | import locale 5 | 6 | 7 | class Path(object): 8 | base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding())) # 初始地址,不含分隔符 9 | 10 | config_path = base_path + u'/config.json' 11 | 12 | www_css = base_path + u'/www/css' 13 | www_image = base_path + u'/www/images' 14 | 15 | html_pool_path = base_path + u'/知乎电子书临时资源库/知乎网页池' 16 | image_pool_path = base_path + u'/知乎电子书临时资源库/知乎图片池' 17 | result_path = base_path + u'./知乎助手生成的电子书' 18 | 19 | @staticmethod 20 | def reset_path(): 21 | Path.chdir(Path.base_path) 22 | return 23 | 24 | @staticmethod 25 | def pwd(): 26 | print os.path.realpath('.') 27 | return 28 | 29 | @staticmethod 30 | def get_pwd(): 31 | path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding())) 32 | return path 33 | 34 | @staticmethod 35 | def mkdir(path): 36 | try: 37 | os.mkdir(path) 38 | except OSError: 39 | # Debug.logger.debug(u'指定目录已存在') 40 | pass 41 | return 42 | 43 | @staticmethod 44 | def chdir(path): 45 | try: 46 | os.chdir(path) 47 | except OSError: 48 | # Debug.logger.debug(u'指定目录不存在,自动创建之') 49 | Path.mkdir(path) 50 | os.chdir(path) 51 | return 52 | 53 | @staticmethod 54 | def rmdir(path): 55 | if path: 56 | shutil.rmtree(path, ignore_errors=True) 57 | return 58 | 59 | @staticmethod 60 | def copy(src, dst): 61 | if not os.path.exists(src): 62 | # Debug.logger.info('{}不存在,自动跳过'.format(src)) 63 | return 64 | if os.path.isdir(src): 65 | shutil.copytree(src, dst) 66 | else: 67 | shutil.copy(src=src, dst=dst) 68 | return 69 | 70 | @staticmethod 71 | def get_filename(src): 72 | return os.path.basename(src) 73 | 74 | @staticmethod 75 | def init_base_path(): 76 | Path.base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding())) 77 | Path.config_path = Path.base_path + u'/config.json' 78 | Path.sql_path = Path.base_path + u'/db/zhihuhelp.sql' 79 | 80 | Path.www_css = Path.base_path + u'/www/css' 81 | Path.www_image = Path.base_path + u'/www/images' 82 | 83 | Path.html_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎网页池' 84 | Path.image_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎图片池' 85 | Path.result_path = Path.base_path + u'./知乎助手生成的电子书' 86 | 87 | return 88 | 89 | @staticmethod 90 | def init_work_directory(): 91 | Path.reset_path() 92 | Path.mkdir(u'./知乎助手生成的电子书') 93 | Path.mkdir(u'./知乎电子书临时资源库') 94 | Path.chdir(u'./知乎电子书临时资源库') 95 | Path.mkdir(u'./知乎网页池') 96 | Path.mkdir(u'./知乎图片池') 97 | Path.reset_path() 98 | return 99 | 100 | @staticmethod 101 | def is_file(path): 102 | return os.path.isfile(path) 103 | -------------------------------------------------------------------------------- /src/lib/oauth/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from .client import ZhihuClient 4 | from .exception import ( 5 | ZhihuWarning, IgnoreErrorDataWarning, CantGetTicketsWarning, 6 | ZhihuException, UnexpectedResponseException, GetDataErrorException, 7 | NeedCaptchaException, NeedLoginException, IdMustBeIntException, 8 | UnimplementedException, 9 | ) 10 | from .helpers import shield, SHIELD_ACTION 11 | from .zhcls import ( 12 | Activity, ActType, Answer, Article, Comment, Collection, Column, Comment, 13 | Live, LiveBadge, LiveTag, LiveTicket, 14 | Me, Message, People, Question, Topic, Whisper, ANONYMOUS 15 | ) 16 | 17 | __all__ = ['ZhihuClient', 'ANONYMOUS', 'Activity', 'Answer', 'ActType', 18 | 'Article', 'Collection', 'Column', 'Comment', 19 | 'Live', 'LiveBadge', 'LiveTag', 'LiveTicket', 20 | 'Me', 'Message', 21 | 'People', 'Question', 'Topic', 'Whisper', 22 | 'ZhihuException', 'ZhihuWarning', 23 | 'NeedCaptchaException', 'UnexpectedResponseException', 24 | 'GetDataErrorException', 25 | 'SHIELD_ACTION', 'shield'] 26 | 27 | __version__ = '0.0.30.post1' 28 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/helpers.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import warnings 6 | 7 | import requests.packages.urllib3 as urllib3 8 | 9 | from .zhcls.utils import SimpleEnum 10 | from .zhcls.generator import BaseGenerator, ActivityGenerator 11 | from .exception import ZhihuException, ZhihuWarning 12 | 13 | 14 | __all__ = ['SHIELD_ACTION', 'shield'] 15 | 16 | 17 | SHIELD_ACTION = SimpleEnum( 18 | ['EXCEPTION', 'PASS', 'STOP'] 19 | ) 20 | """ 21 | ActType 是用于表示 shield 抵挡 Exception 达到最大次数后的动作的枚举类,取值如下: 22 | 23 | ================= ==================== 24 | 常量名 说明 25 | ================= ==================== 26 | EXCEPTION 抛出异常 27 | PASS 跳过,获取下一个数据 28 | STOP 结束处理 29 | ================= ==================== 30 | """ 31 | 32 | 33 | def shield(inner, durability=3, start_at=0, action=SHIELD_ACTION.EXCEPTION): 34 | """ 35 | shield 函数用于自动处理知乎的各种生成器 36 | (如 :any:`People.followers`, :any:`Question.answers`) 在获取分页数据时出错的情况。 37 | 38 | .. warning:: 用户动态的生成器因为获取方式比较特殊,无法被 shield 保护 39 | 40 | 用法: 41 | 42 | 比如我们想获取关注了某个专栏的用户分别关注了哪些话题…… 43 | 44 | .. code-block:: python 45 | 46 | column = client.column('zijingnotes') 47 | result = [] 48 | for user in shield(column.followers, action=SHIELD_ACTION.PASS): 49 | L = [] 50 | print("Start proc user", user.name) 51 | if user.over: 52 | print(user.over_reason) 53 | continue 54 | for topic in shield(user.following_topics, action=SHIELD_ACTION.PASS): 55 | print("Add topic", topic.name) 56 | L.append(topic.name) 57 | result.append(L) 58 | 59 | # output result 60 | 61 | :param inner: 需要被保护的生成器 62 | :param int durability: 耐久度,表示获取同一数据最多连续出错几次 63 | :param int start_at: 从第几个数据开始获取 64 | :param action: 当耐久度消耗完后的动作,参见 :any:`SHIELD_ACTION`,默认动作是抛出异常 65 | :return: 新的生成器…… 66 | """ 67 | if not isinstance(inner, BaseGenerator): 68 | raise ValueError('First argument must be Zhihu Generator Classes') 69 | if isinstance(inner, ActivityGenerator): 70 | raise ValueError(' Activity Generator is the only one can\'t be shield') 71 | offset = start_at 72 | hp = durability 73 | while True: 74 | i = -1 75 | try: 76 | for i, x in enumerate(inner.jump(offset)): 77 | yield x 78 | hp = durability 79 | break 80 | except (ZhihuException, urllib3.exceptions.MaxRetryError) as e: 81 | offset += i + 1 82 | hp -= 1 83 | warnings.warn( 84 | "[{type}: {e}] be shield when get NO.{offset} data".format( 85 | type=e.__class__.__name__, 86 | e=e, 87 | offset=offset 88 | ), 89 | ZhihuWarning 90 | ) 91 | if hp == 0: 92 | if action is SHIELD_ACTION.EXCEPTION: 93 | raise e 94 | elif action is SHIELD_ACTION.PASS: 95 | offset += 1 96 | hp = durability 97 | elif action is SHIELD_ACTION.STOP: 98 | break 99 | else: 100 | raise e 101 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from .im_android import ImZhihuAndroidClient 4 | from .before_login_auth import BeforeLoginAuth 5 | from .zhihu_oauth import ZhihuOAuth 6 | from .token import ZhihuToken 7 | from .utils import login_signature 8 | 9 | __all__ = ['ImZhihuAndroidClient', 'BeforeLoginAuth', 'ZhihuOAuth', 10 | 'ZhihuToken', 'login_signature'] 11 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/before_login_auth.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # from __future__ import unicode_literals 4 | 5 | from .im_android import ImZhihuAndroidClient 6 | 7 | __all__ = ['BeforeLoginAuth'] 8 | 9 | 10 | class BeforeLoginAuth(ImZhihuAndroidClient): 11 | def __init__(self, client_id, api_version=None, app_version=None, 12 | app_build=None, app_za=None, uuid=None, ua=None): 13 | """ 14 | .. inheritance-diagram:: BeforeLoginAuth 15 | :parts: 1 16 | 17 | 这个 Auth 在 :class:`.ImZhihuAndroidClient` 18 | 的基础上加上了发送 ``client_id`` 的功能。表示登录之前的基础验证。 19 | 20 | :param str|unicode client_id: 客户端 ID 21 | 22 | .. seealso:: 23 | 以下参数的文档参见 :meth:`.ImZhihuAndroidClient.__init__` 24 | 25 | :param str|unicode api_version: 26 | :param str|unicode app_version: 27 | :param str|unicode app_build: 28 | :param str|unicode app_za: 29 | :param str|unicode uuid: 30 | :param str|unicode ua: 31 | """ 32 | super(BeforeLoginAuth, self).__init__( 33 | api_version, app_version, app_build, app_za, uuid, ua) 34 | self._client_id = client_id 35 | 36 | def __call__(self, r): 37 | """ 38 | .. note:: 39 | requests 会自动调用这个方法 40 | 41 | 此函数在 PreparedRequest 的 HTTP header 42 | 里加上了 HTTP Authorization 头,值为 CLIENT_ID。 43 | 44 | 由于是 :class:`.ImZhihuAndroidClient` 的子类,也会自动加上描述 APP 信息的头。 45 | 46 | .. seealso:: 47 | :meth:`.ImZhihuAndroidClient.__call__` 48 | """ 49 | r = super(BeforeLoginAuth, self).__call__(r) 50 | r.headers['Authorization'] = 'oauth {0}'.format(str(self._client_id)) 51 | return r 52 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/im_android.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # from __future__ import unicode_literals 4 | 5 | from requests.auth import AuthBase 6 | 7 | from .setting import ( 8 | API_VERSION, APP_VERSION, APP_BUILD, APP_ZA, UUID, DEFAULT_UA 9 | ) 10 | 11 | __all__ = ['ImZhihuAndroidClient'] 12 | 13 | 14 | class ImZhihuAndroidClient(AuthBase): 15 | def __init__(self, api_version=None, app_version=None, 16 | app_build=None, app_za=None, uuid=None, ua=None): 17 | """ 18 | .. inheritance-diagram:: ImZhihuAndroidClient 19 | 20 | 这个 Auth 类用于模拟一些 Android 上的知乎官方客户端的特殊参数 21 | 22 | :param str|unicode api_version: 所用 API 版本 23 | :param str|unicode app_version: 客户端(APK) 版本 24 | :param str|unicode app_build: APP 类型? 25 | :param str|unicode app_za: APP 杂项,是一个 urlencoded 的 params dict 26 | :param str|unicode uuid: 暂时不知道是什么 27 | :param str|unicode ua: User-Agent,新 API 会验证 UA 了 28 | """ 29 | self._api_version = api_version or API_VERSION 30 | self._app_version = app_version or APP_VERSION 31 | self._app_build = app_build or APP_BUILD 32 | self._app_za = app_za or APP_ZA 33 | self._uuid = uuid or UUID 34 | self._ua = ua or DEFAULT_UA 35 | 36 | def __call__(self, r): 37 | """ 38 | .. note:: 39 | requests 会自动调用这个方法 40 | 41 | 此函数在 PreparedRequest 的 HTTP header 42 | 里加上了模拟 Android 客户端所需要的附加属性 43 | 44 | .. seealso:: 45 | 自动添加的属性参见 :meth:`__init__` 46 | """ 47 | r.headers['x-api-version'] = self._api_version 48 | r.headers['x-app-version'] = self._app_version 49 | r.headers['x-app-build'] = self._app_build 50 | r.headers['x-app-za'] = self._app_za 51 | r.headers['x-uuid'] = self._uuid 52 | r.headers['User-Agent'] = self._ua 53 | return r 54 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/setting.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # from __future__ import unicode_literals 4 | 5 | try: 6 | # python2 7 | from urllib import urlencode 8 | except ImportError: 9 | # python3 10 | # noinspection PyUnresolvedReferences,PyCompatibility 11 | from urllib.parse import urlencode 12 | 13 | ZHIHU_API_ROOT = 'https://api.zhihu.com' 14 | """知乎 API 的根目录""" 15 | 16 | # ------- Zhihu OAuth Keys ------- 17 | 18 | CLIENT_ID = '8d5227e0aaaa4797a763ac64e0c3b8' 19 | """ 20 | 默认的 CLIENT ID。 21 | 如果 :class:`.ZhihuClient` 构造时没有提供 CLIENT ID,则使用这个值。 22 | """ 23 | 24 | APP_SECRET = 'ecbefbf6b17e47ecb9035107866380' 25 | """ 26 | 默认的 SECRET。 27 | 如果 :class:`.ZhihuClient` 构造时没有提供 SECRET,则使用这个值。 28 | """ 29 | 30 | # ------- Zhihu Client Info ------- 31 | 32 | API_VERSION = '3.0.41' 33 | """ 34 | 模拟 Android 官方客户端使用的参数,表示使用的 API 版本。 35 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 api_version,则使用这个值。 36 | """ 37 | 38 | APP_VERSION = '4.12.0' 39 | """ 40 | 模拟 Android 官方客户端使用的参数,表示使用的 APP 版本。 41 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_version,则使用这个值。 42 | """ 43 | 44 | APP_BUILD = 'release' 45 | """ 46 | 模拟 Android 官方客户端使用的参数,表示使用的 APP 的 Build 类型。 47 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_build,则使用这个值。 48 | """ 49 | 50 | UUID = 'AHBCVBVCDAtLBfZCo1SYbPj8SgivYjqcGCs=' 51 | """ 52 | 新加的一个东西,暂时不知道是啥的 ID 53 | """ 54 | 55 | DEFAULT_UA = 'Futureve/4.12.0 Mozilla/5.0 ' \ 56 | '(Linux; Android 6.0; Google Nexus 5 - 6.0.0 - ' \ 57 | 'API 23 - 1080x1920 Build/MRA58K; wv) ' \ 58 | 'AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0' 59 | """ 60 | 新版本的 API 开始检查 UA了。 61 | """ 62 | 63 | APP_ZA = urlencode({ 64 | 'OS': 'Android', 65 | 'Release': '6.0', 66 | 'Model': 'Google Nexus 5 - 6.0.0 - API 23 - 1080x1920', 67 | 'VersionName': APP_VERSION, 68 | 'VersionCode': '450', 69 | 'Width': '1080', 70 | 'Height': '1920', 71 | 'Installer': 'Google Play', 72 | }) 73 | """ 74 | 模拟 Android 官方客户端使用的参数,表示使用的 APP 的 杂项数据。 75 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_za,则使用这个值。 76 | 77 | .. note:: 78 | 它是一个 url encode 后的 dict 79 | 80 | 参见 :meth:`.ImZhihuAndroidClient.__init__` 81 | """ 82 | 83 | # ------- Zhihu API URL for Login ------- 84 | 85 | CAPTCHA_URL = ZHIHU_API_ROOT + '/captcha' 86 | """ 87 | 验证码相关 88 | 89 | :GET: 是否需要验证码 90 | :PUT: 获取验证码 91 | :POST: 提交验证码 92 | """ 93 | 94 | # sign_in - POST - 用户登录 95 | 96 | LOGIN_URL = ZHIHU_API_ROOT + '/sign_in' 97 | """ 98 | OAuth 登录地址 99 | """ 100 | 101 | LOGIN_DATA = { 102 | 'grant_type': 'password', 103 | 'source': 'com.zhihu.android', 104 | 'client_id': '', 105 | 'signature': '', 106 | 'timestamp': '', 107 | 'username': '', 108 | 'password': '', 109 | } 110 | """ 111 | 登录数据格式。需要填充的只有用户名和密码。 112 | 113 | `client_id` 会由 :class:`.ZhihuClient` 填写。 114 | 115 | `timestamp` 和 `signature` 会由 :class:`.ZhihuClient` 内部调用的 116 | :func:`.login_signature` 自动填写。 117 | """ 118 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/token.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import json 6 | import pickle 7 | import time 8 | 9 | from ..exception import MyJSONDecodeError 10 | 11 | __all__ = ['ZhihuToken'] 12 | 13 | 14 | class ZhihuToken: 15 | def __init__(self, user_id, uid, access_token, expires_in, token_type, 16 | refresh_token, cookie, lock_in=None, unlock_ticket=None): 17 | """ 18 | 知乎令牌。 19 | 20 | 尽量不要直接使用这个类,而是用 :meth:`ZhihuToken.from_str` 或 21 | :meth:`ZhihuToken.form_dict` 或 22 | :meth:`ZhihuToken.from_file` 方法来构造。 23 | 24 | .. note:: 25 | 26 | 本类仅在 :class:`.ZhihuClient` 类内使用,一般用户不需要了解。 27 | 28 | :param str|unicode user_id: 用户 ID 29 | :param int uid: 某个数字型用户 ID,貌似没啥用 30 | :param str|unicode access_token: 最重要的访问令牌 31 | :param int expires_in: 过期时间 32 | :param str|unicode token_type: 令牌类型 33 | :param str|unicode refresh_token: 刷新令牌 34 | :param str|unicode cookie: 登录成功后需要加上这段 Cookies 35 | :param int lock_in: 不知道用处 36 | :param str|unicode unlock_ticket: 不知道用处 37 | """ 38 | self._create_at = time.time() 39 | self._user_id = uid 40 | self._uid = user_id 41 | self._access_token = access_token 42 | self._expires_in = expires_in 43 | self._expires_at = self._create_at + self._expires_in 44 | self._token_type = token_type 45 | self._refresh_token = refresh_token 46 | self._cookie = cookie 47 | 48 | # 以下两个属性暂时不知道用处 49 | self._lock_in = lock_in 50 | self._unlock_ticket = unlock_ticket 51 | 52 | @staticmethod 53 | def from_str(json_str): 54 | """ 55 | 从字符串读取 token。 56 | 57 | :param str|unicode json_str: 一个合法的代表知乎 Token 的 JSON 字符串 58 | :rtype: :class:`ZhihuToken` 59 | :raise ValueError: 提供的参数不合法时 60 | """ 61 | try: 62 | return ZhihuToken.from_dict(json.loads(json_str)) 63 | except (MyJSONDecodeError, ValueError): 64 | raise ValueError( 65 | '"{json_str}" is NOT a valid zhihu token json string.'.format( 66 | json_str=json_str 67 | )) 68 | 69 | @staticmethod 70 | def from_dict(json_dict): 71 | """ 72 | 从字典读取 token。 73 | 74 | :param dict json_dict: 一个代表知乎 Token 的字典 75 | :rtype: :class:`ZhihuToken` 76 | :raise ValueError: 提供的参数不合法时 77 | """ 78 | try: 79 | return ZhihuToken(**json_dict) 80 | except TypeError: 81 | raise ValueError( 82 | '"{json_dict}" is NOT a valid zhihu token json.'.format( 83 | json_dict=json_dict 84 | )) 85 | 86 | @staticmethod 87 | def from_file(filename): 88 | """ 89 | 从文件读取 token。 90 | 91 | :param str|unicode filename: 文件名 92 | :rtype: :class:`ZhihuToken` 93 | """ 94 | with open(filename, 'rb') as f: 95 | return pickle.load(f) 96 | 97 | def save(self, filename): 98 | """ 99 | 将 token 保存成文件。 100 | 101 | :param str|unicode filename: 文件名 102 | :return: 无返回值 103 | """ 104 | with open(filename, 'wb') as f: 105 | pickle.dump(self, f) 106 | 107 | @property 108 | def user_id(self): 109 | """ 110 | :return: 获取用户 ID 111 | :rtype: str 112 | """ 113 | return self._user_id 114 | 115 | @property 116 | def type(self): 117 | """ 118 | :return: 获取验证类型 119 | :rtype: str 120 | """ 121 | return self._token_type 122 | 123 | @property 124 | def token(self): 125 | """ 126 | :return: 获取访问令牌 127 | :rtype: str 128 | """ 129 | return self._access_token 130 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import hashlib 6 | import hmac 7 | import time 8 | 9 | __all__ = ['login_signature'] 10 | 11 | 12 | def login_signature(data, secret): 13 | """ 14 | 为登录请求附加签名。 15 | 16 | :param dict data: POST 数据 17 | :param str|unicode secret: APP SECRET 18 | :return: 经过签名后的 dict, 增加了 timestamp 和 signature 两项 19 | """ 20 | data['timestamp'] = str(int(time.time())) 21 | 22 | params = ''.join([ 23 | data['grant_type'], 24 | data['client_id'], 25 | data['source'], 26 | data['timestamp'], 27 | ]) 28 | 29 | data['signature'] = hmac.new( 30 | secret.encode('utf-8'), 31 | params.encode('utf-8'), 32 | hashlib.sha1 33 | ).hexdigest() 34 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/oauth/zhihu_oauth.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # from __future__ import unicode_literals 4 | 5 | from .im_android import ImZhihuAndroidClient 6 | from .token import ZhihuToken 7 | 8 | __all__ = ['ZhihuOAuth'] 9 | 10 | 11 | class ZhihuOAuth(ImZhihuAndroidClient): 12 | def __init__(self, token, api_version=None, app_version=None, 13 | app_build=None, app_za=None): 14 | """ 15 | .. inheritance-diagram:: ZhihuOAuth 16 | 17 | 这个 Auth 在 :class:`.ImZhihuAndroidClient` 18 | 的基础上加上了发送 token 的功能。 19 | 20 | :param ZhihuToken token: 成功登录后得到的 Token 21 | 22 | .. seealso:: 23 | 以下参数的文档参见 :meth:`.ImZhihuAndroidClient.__init__` 24 | 25 | :param api_version: 26 | :param app_version: 27 | :param app_build: 28 | :param app_za: 29 | """ 30 | assert isinstance(token, ZhihuToken) 31 | super(ZhihuOAuth, self).__init__( 32 | api_version, app_version, app_build, app_za) 33 | self._token = token 34 | 35 | def __call__(self, r): 36 | """ 37 | .. note:: 38 | requests 会自动调用这个方法 39 | 40 | 此函数在 PreparedRequest 的 HTTP header 41 | 里加上了 HTTP Authorization 头,值为登录成功后 Zhihu 发的 access_token。 42 | 43 | 由于是 :class:`.ImZhihuAndroidClient` 的子类,也会自动加上描述 APP 信息的头。 44 | 45 | .. seealso:: 46 | :meth:`.ImZhihuAndroidClient.__call__` 47 | """ 48 | r = super(ZhihuOAuth, self).__call__(r) 49 | r.headers['Authorization'] = '{type} {token}'.format( 50 | type=str(self._token.type.capitalize()), 51 | token=str(self._token.token) 52 | ) 53 | return r 54 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/setting.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import re 6 | import requests.adapters 7 | 8 | ADAPTER_WITH_RETRY = requests.adapters.HTTPAdapter( 9 | max_retries=requests.adapters.Retry( 10 | total=10, 11 | status_forcelist=[403, 408, 500, 502] 12 | ) 13 | ) 14 | 15 | CAPTCHA_FILE = 'captcha.gif' 16 | """ 17 | 请求验证码后储存文件名的默认值,现在的值是当前目录下的 captcha.gif。 18 | 19 | 仅在 :meth:`.ZhihuClient.login_in_terminal` 中被使用。 20 | """ 21 | 22 | re_answer_url = re.compile( 23 | r'^(?:https?://)?www.zhihu.com/question/\d+/answer/(\d+)/?$') 24 | """ 25 | 答案 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 26 | """ 27 | 28 | re_article_url = re.compile(r'^(?:https?://)?zhuanlan.zhihu.com/p/(\d+)/?$') 29 | """ 30 | 文章 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 31 | """ 32 | 33 | re_collection_url = re.compile( 34 | r'^(?:https?://)?www.zhihu.com/collection/(\d+)/?$') 35 | """ 36 | 收藏夹 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 37 | """ 38 | 39 | # TODO: 详细了解专栏 slug 的构成,更新正则 40 | re_column_url = re.compile(r'^(?:https?://)?zhuanlan.zhihu.com/([^/ ]+)/?$') 41 | """ 42 | 专栏 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 43 | """ 44 | 45 | re_live_url = re.compile(r'^(?:https?://)?www.zhihu.com/lives/(\d+)/?$') 46 | """ 47 | Live URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 48 | """ 49 | 50 | re_people_url = re.compile(r'^(?:https?://)?www.zhihu.com/people/([^/ ]+)/?$') 51 | """ 52 | 用户 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 53 | """ 54 | 55 | re_question_url = re.compile(r'^(?:https?://)?www.zhihu.com/question/(\d+)/?$') 56 | """ 57 | 问题 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 58 | """ 59 | 60 | re_topic_url = re.compile(r'^(?:https?://)?www.zhihu.com/topic/(\d+)/?$') 61 | """ 62 | 问题 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。 63 | """ 64 | 65 | 66 | RE_FUNC_MAP = { 67 | # RE func int id 68 | re_answer_url: ('answer', True), 69 | re_article_url: ('article', True), 70 | re_collection_url: ('collection', True), 71 | re_column_url: ('column', False), 72 | re_live_url: ('live', True), 73 | re_people_url: ('people', False), 74 | re_question_url: ('question', True), 75 | re_topic_url: ('topic', True), 76 | } 77 | """ 78 | 正则表达式于 :any:`ZhihuClient` 的方法的对应关系。 79 | 80 | 键是正则,值是二元组,两个值分别是方法名和是否需要将 ``id`` 转化为整数。 81 | """ 82 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import functools 6 | 7 | from .exception import NeedLoginException, IdMustBeIntException 8 | 9 | __all__ = ['need_login', 'int_id'] 10 | 11 | 12 | def need_login(func): 13 | """ 14 | 装饰器。作用于 :class:`.ZhihuClient` 中的某些方法, 15 | 强制它们必须在登录状态下才能被使用。 16 | """ 17 | @functools.wraps(func) 18 | def wrapper(self, *args, **kwargs): 19 | if self.is_login(): 20 | return func(self, *args, **kwargs) 21 | else: 22 | raise NeedLoginException(func.__name__) 23 | 24 | return wrapper 25 | 26 | 27 | def int_id(func): 28 | """ 29 | 装饰器。作用于 :class:`.ZhihuClient` 中需要整型 ID 来构建对应知乎类的方法。 30 | 作用就是个强制类型检查。 31 | 32 | :raise: :class:`.IdMustBeIntException` 当传过来的 ID 不是整型的时候 33 | """ 34 | @functools.wraps(func) 35 | def wrapper(self, *args, **kwargs): 36 | try: 37 | some_id = args[0] 38 | except IndexError: 39 | some_id = None 40 | if not isinstance(some_id, int): 41 | raise IdMustBeIntException(func) 42 | return func(self, *args, **kwargs) 43 | 44 | return wrapper 45 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from .activity import Activity, ActType 4 | from .answer import Answer 5 | from .article import Article 6 | from .collection import Collection 7 | from .column import Column 8 | from .comment import Comment 9 | from .live import Live, LiveBadge, LiveTag, LiveTicket 10 | from .me import Me 11 | from .message import Message 12 | from .people import People, ANONYMOUS 13 | from .question import Question 14 | from .topic import Topic 15 | from .whisper import Whisper 16 | 17 | __all__ = ['Activity', 'ActType', 'Answer', 'Article', 'Collection', 'Column', 18 | 'Comment', 'Live', 'LiveBadge', 'LiveTag', 'LiveTicket', 'Me', 19 | 'Message', 'People', 'ANONYMOUS', 'Question', 'Topic', 'Whisper'] 20 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/article.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .base import Base 6 | from .generator import generator_of 7 | from .other import other_obj 8 | from .normal import normal_attr 9 | from .streaming import streaming 10 | from .utils import common_save 11 | from .urls import ( 12 | ARTICLE_DETAIL_URL, 13 | ARTICLE_COMMENTS_URL, 14 | ) 15 | 16 | __all__ = ['Article'] 17 | 18 | 19 | class Article(Base): 20 | def __init__(self, aid, cache, session): 21 | super(Article, self).__init__(aid, cache, session) 22 | 23 | def _build_url(self): 24 | return ARTICLE_DETAIL_URL.format(self.id) 25 | 26 | # ----- simple info ----- 27 | 28 | @property 29 | @other_obj('people') 30 | def author(self): 31 | return None 32 | 33 | @property 34 | @streaming() 35 | def can_comment(self): 36 | """ 37 | .. seealso:: :any:`Answer.can_comment` 38 | """ 39 | return None 40 | 41 | @property 42 | @other_obj() 43 | def column(self): 44 | """ 45 | 文章所属专栏。 46 | 47 | .. warning:: 当文章不属于任何专栏时值为 None,使用其属性前应先做检查。 48 | """ 49 | return None 50 | 51 | @property 52 | @normal_attr() 53 | def comment_count(self): 54 | return None 55 | 56 | @property 57 | @normal_attr() 58 | def comment_permission(self): 59 | """ 60 | .. seealso:: :any:`Answer.comment_permission` 61 | """ 62 | return None 63 | 64 | @property 65 | @normal_attr() 66 | def content(self): 67 | return None 68 | 69 | @property 70 | @normal_attr() 71 | def excerpt(self): 72 | return None 73 | 74 | @property 75 | @normal_attr() 76 | def id(self): 77 | return self._id 78 | 79 | @property 80 | @normal_attr() 81 | def image_url(self): 82 | return None 83 | 84 | @property 85 | @streaming(use_cache=False) 86 | def suggest_edit(self): 87 | """ 88 | .. seealso:: :any:`Answer.suggest_edit` 89 | """ 90 | return None 91 | 92 | @property 93 | @normal_attr() 94 | def title(self): 95 | return None 96 | 97 | @property 98 | @normal_attr('updated') 99 | def updated_time(self): 100 | return None 101 | 102 | @property 103 | @normal_attr() 104 | def voteup_count(self): 105 | return None 106 | 107 | # ----- generators ----- 108 | 109 | @property 110 | @generator_of(ARTICLE_COMMENTS_URL) 111 | def comments(self): 112 | return None 113 | 114 | # TODO: article.voters, API 接口未知 115 | 116 | # ----- other operate ----- 117 | 118 | def save(self, path='.', filename=None, invalid_chars=None): 119 | """ 120 | 除了默认文件名是文章标题外,和 :any:`Answer.save` 完全一致。 121 | 122 | .. seealso:: :any:`Answer.save` 123 | 124 | .. note:: TIPS 125 | 126 | 建议的使用方法: 127 | 128 | .. code-block:: python 129 | 130 | for article in column.articles: 131 | print(article.title) 132 | article.save(column.title) 133 | 134 | """ 135 | if self._cache is None: 136 | self._get_data() 137 | common_save(path, filename, self.content, self.title, invalid_chars) 138 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import abc 6 | 7 | from ..exception import MyJSONDecodeError, GetDataErrorException 8 | 9 | __all__ = ['Base'] 10 | 11 | 12 | class Base(object): 13 | def __init__(self, zhihu_obj_id, cache, session): 14 | """ 15 | 16 | .. note:: Cache 与 Data 17 | 18 | :any:`Base` 类的 ``cache`` 参数表示已知的属性值。一般由另一个对象的 19 | JSON 数据中的一个属性充当。 20 | 21 | 比如 :any:`Answer.author` 方法,由于在请求 :any:`Answer` 的数据时, 22 | 原始 JSON 数据中就有关于作者的一些简单信息。比如 name,id,headline。 23 | 在使用此方法时就会将这些不完整的数据传递到 ``answer`` 对象 (类型为 24 | :any:`People`)的 ``cache`` 中。这样一来,在执行 25 | ``answer.author.name`` 时,取出名字的操作可以省去一次网络请求。 26 | 27 | :any:`normal_attr`,:any:`other_obj` 和 :any:`streaming` 装饰器都会 28 | 优先使用 ``cache`` 中的数据,当获取失败时才会调用 29 | :any:`_get_data` 方法请求数据。 30 | 31 | :param zhihu_obj_id: 构建知乎对象所用的 ID 32 | :param dict cache: 缓存数据,就是已知的这个对象的属性集 33 | :param session: 网络请求 Session 34 | """ 35 | self._id = zhihu_obj_id 36 | self._cache = cache 37 | self._session = session 38 | self._data = None 39 | 40 | def _get_data(self): 41 | """ 42 | 调用知乎 API 接口获取数据的主要方法。 43 | 44 | url 从 :any:`_build_url` 中获取。 45 | 46 | method 从 :any:`_method` 中获取。 47 | 48 | params 从 :any:`_build_params` 中获取。 49 | 50 | data 从 :any:`_build_data` 中获取。 51 | 52 | :raise: 当返回的数据无法被解析成 JSON 53 | 或 JSON 中含有 'message' 字段时,会抛出 :any:`GetDataErrorException` 54 | """ 55 | if self._data is None: 56 | url = self._build_url() 57 | res = self._session.request( 58 | self._method(), 59 | url=url, 60 | params=self._build_params(), 61 | data=self._build_data(), 62 | ) 63 | e = GetDataErrorException( 64 | url, 65 | res, 66 | 'a valid Zhihu {0} JSON data'.format(self.__class__.__name__), 67 | ) 68 | try: 69 | json_dict = res.json() 70 | if 'error' in json_dict: 71 | raise e 72 | self._data = json_dict 73 | except MyJSONDecodeError: 74 | raise e 75 | 76 | @abc.abstractmethod 77 | def _build_url(self): 78 | """ 79 | 子类 **必须** 重载这一函数,提供获取数据的 API URL。 80 | 81 | 一般格式为 ZHIHU_XXX_URL.format(self.id) 82 | """ 83 | return '' 84 | 85 | # noinspection PyMethodMayBeStatic 86 | def _build_params(self): 87 | """ 88 | 子类可以重载这一函数,提供请求 API 时要传递的参数。默认值为 None。 89 | """ 90 | return None 91 | 92 | # noinspection PyMethodMayBeStatic 93 | def _build_data(self): 94 | """ 95 | 子类可以重载这一函数,提供请求 API 时要传递的数据。默认值为 None。 96 | """ 97 | return None 98 | 99 | # noinspection PyMethodMayBeStatic 100 | def _method(self): 101 | """ 102 | 子类可以重载这一函数,提供 HTTP 请求的类型,默认值为 GET。 103 | """ 104 | return 'GET' 105 | 106 | def refresh(self): 107 | """ 108 | 删除自身的 cache 和 data,下一次获取属性会重新向知乎发送请求,获取最新数据。 109 | """ 110 | self._data = self._cache = None 111 | 112 | @property 113 | def pure_data(self): 114 | """ 115 | 调试用。返回现在对象内的 JSON 数据。 116 | 117 | 如果对象没有 cache 也没有 data,会自动发送数据请求 data。 118 | """ 119 | if not self._cache: 120 | self._get_data() 121 | return { 122 | 'cache': self._cache, 123 | 'data': self._data, 124 | } 125 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/column.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .base import Base 6 | from .generator import generator_of 7 | from .other import other_obj 8 | from .normal import normal_attr 9 | from .urls import ( 10 | COLUMN_DETAIL_URL, 11 | COLUMN_ARTICLES_URL, 12 | COLUMN_FOLLOWERS_URL, 13 | ) 14 | 15 | __all__ = ['Column'] 16 | 17 | 18 | class Column(Base): 19 | def __init__(self, cid, cache, session): 20 | super(Column, self).__init__(cid, cache, session) 21 | 22 | def _build_url(self): 23 | return COLUMN_DETAIL_URL.format(self.id) 24 | 25 | # ---- simple info ----- 26 | 27 | @property 28 | @normal_attr('articles_count') 29 | def article_count(self): 30 | return None 31 | 32 | @property 33 | def articles_count(self): 34 | return self.article_count 35 | 36 | @property 37 | @other_obj('people') 38 | def author(self): 39 | return None 40 | 41 | @property 42 | @normal_attr() 43 | def comment_permission(self): 44 | return None 45 | 46 | @property 47 | @normal_attr() 48 | def description(self): 49 | return None 50 | 51 | @property 52 | @normal_attr('followers') 53 | def follower_count(self): 54 | return None 55 | 56 | @property 57 | @normal_attr() 58 | def id(self): 59 | return self._id 60 | 61 | @property 62 | @normal_attr() 63 | def image_url(self): 64 | return None 65 | 66 | @property 67 | @normal_attr() 68 | def title(self): 69 | return None 70 | 71 | @property 72 | @normal_attr('updated') 73 | def updated_time(self): 74 | return None 75 | 76 | @property 77 | def updated(self): 78 | return self.updated_time 79 | 80 | # ----- generators ----- 81 | 82 | @property 83 | @generator_of(COLUMN_ARTICLES_URL) 84 | def articles(self): 85 | return None 86 | 87 | @property 88 | @generator_of(COLUMN_FOLLOWERS_URL, 'people') 89 | def followers(self): 90 | return None 91 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/message.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .base import Base 6 | from .other import other_obj 7 | from .normal import normal_attr 8 | 9 | __all__ = ['Message'] 10 | 11 | 12 | class Message(Base): 13 | def __init__(self, mid, cache, session): 14 | super(Message, self).__init__(mid, cache, session) 15 | 16 | def _build_url(self): 17 | return '' 18 | 19 | # ----- simple info ----- 20 | 21 | @property 22 | @normal_attr() 23 | def created_time(self): 24 | return None 25 | 26 | @property 27 | @normal_attr() 28 | def content(self): 29 | return None 30 | 31 | @property 32 | @other_obj('people') 33 | def sender(self): 34 | return None 35 | 36 | @property 37 | @other_obj('people') 38 | def receiver(self): 39 | return None 40 | 41 | def format(self, template='[{time}] {sender} --> {receiver}: {content}'): 42 | """ 43 | 格式化输出消息 44 | 45 | ``{time}`` 时间戳;``{sender}`` 发送者用户名;``{receiver}`` 接收者用户名; 46 | ``{content}`` 消息内容 47 | 48 | 用法参见示例。 49 | 50 | :param str template: 格式化模板 51 | :return: 格式化后的字符串 52 | :rtype: str 53 | """ 54 | return template.format( 55 | time=self.created_time, 56 | sender=self.sender.name, 57 | receiver=self.receiver.name, 58 | content=self.content, 59 | ) 60 | 61 | def __str__(self): 62 | return self.format() 63 | 64 | __repr__ = __str__ 65 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/normal.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals, print_function 4 | 5 | import functools 6 | 7 | from .utils import can_get_from 8 | 9 | __all__ = ['normal_attr'] 10 | 11 | 12 | def normal_attr(name_in_json=None): 13 | """ 14 | 15 | 本装饰器的作用为: 16 | 17 | 1. 标识这个属性为常规属性。 18 | 2. 自动从对象的数据中取对应属性返回,会自行判断需不需要请求网络。 19 | 20 | 取数据流程如下: 21 | 22 | 1. 如果 ``data`` 存在,转 2,否则转 3。 23 | 2. 尝试从 ``data`` 中取数据,成功则返回数据,否则返回被装饰函数的执行结果。 24 | 3. 尝试从 ``cache`` 中取需要的属性,成功则返回。 25 | 4. 判断属性名是不是 ``id``。不是转 5,是则返回被装饰函数的执行结果。(因为 26 | ``id`` 属性一般在 :any:`_build_url` 方法中需要引用, 27 | 如果这时向知乎请求数据会造成死循环。) 28 | 5. 则使用 API 请求数据。然后转 2。 29 | 30 | .. seealso:: 关于 cache 和 data 31 | 32 | 请看 :any:`Base` 类中的\ :any:`说明 `。 33 | 34 | :param str|unicode name_in_json: 需要取的属性在 JSON 中的名字。可空,默认值为 35 | 使用此装饰器的方法名。 36 | """ 37 | def wrappers_wrapper(func): 38 | 39 | @functools.wraps(func) 40 | def wrapper(self, *args, **kwargs): 41 | 42 | def use_data_or_func(the_name, data): 43 | if can_get_from(the_name, data): 44 | return data[the_name] 45 | else: 46 | return func(self, *args, **kwargs) 47 | 48 | name = name_in_json if name_in_json else func.__name__ 49 | if self._data: 50 | return use_data_or_func(name, self._data) 51 | elif self._cache and can_get_from(name, self._cache): 52 | return self._cache[name] 53 | else: 54 | # id is important, when there is no data, _build_url need it, 55 | # so, just return the function result 56 | if name == 'id': 57 | return func(self, *args, **kwargs) 58 | 59 | self._get_data() 60 | # noinspection PyTypeChecker 61 | if self._data: 62 | return use_data_or_func(name, self._data) 63 | return wrapper 64 | 65 | return wrappers_wrapper 66 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/other.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | import functools 6 | import importlib 7 | 8 | __all__ = ['other_obj'] 9 | 10 | 11 | def other_obj(class_name=None, name_in_json=None, module_filename=None): 12 | """ 13 | 14 | 本装饰器的作用为: 15 | 16 | 1. 标识这个属性为另一个知乎对象。 17 | 2. 自动从当前对象的数据中取出对应属性,构建成所需要的对象。 18 | 19 | 生成对象流程如下: 20 | 21 | 1. 尝试导入类名表示的类,如果获取失败则设为 :any:`Base` 类。 22 | 2. 将对象数据设置为被装饰函数的返回值,如果不为 None 则转 6 23 | 3. 尝试从 ``cache`` 中获取用来建立对象的数据。成功转 6。 24 | 4. 如果当前对象没有 ``data`` 则调用知乎 API 获取。 25 | 5. 尝试从 ``data`` 中获取数据,如果这个也没有就返回 None 26 | 6. 将获取到的数据作为 ``cache`` 构建第一步中的导入的知乎类对象。 27 | 28 | .. seealso:: 关于 cache 和 data 29 | 30 | 请看 :any:`Base` 类中的\ :any:`说明 `。 31 | 32 | :param class_name: 要生成的对象类名 33 | :param name_in_json: 属性在 JSON 里的键名。 34 | :param module_filename: 所在的模块的文件名 35 | """ 36 | def wrappers_wrapper(func): 37 | @functools.wraps(func) 38 | def wrapper(self, *args, **kwargs): 39 | cls_name = class_name or func.__name__ 40 | if cls_name.islower(): 41 | cls_name = cls_name.capitalize() 42 | name_in_j = name_in_json or func.__name__ 43 | file_name = module_filename or cls_name.lower() 44 | 45 | try: 46 | module = importlib.import_module( 47 | '.' + file_name, 48 | 'zhihu_oauth.zhcls' 49 | ) 50 | cls = getattr(module, cls_name) 51 | except (ImportError, AttributeError): 52 | from .base import Base 53 | cls = Base 54 | 55 | cache = func(self, *args, **kwargs) 56 | 57 | if cache is None: 58 | if self._cache and name_in_j in self._cache: 59 | cache = self._cache[name_in_j] 60 | else: 61 | self._get_data() 62 | if self._data and name_in_j in self._data: 63 | cache = self._data[name_in_j] 64 | 65 | if cache is not None and 'id' in cache: 66 | return cls(cache['id'], cache, self._session) 67 | else: 68 | return None 69 | 70 | return wrapper 71 | 72 | return wrappers_wrapper 73 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/question.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .base import Base 6 | from .generator import generator_of 7 | from .normal import normal_attr 8 | from .streaming import streaming 9 | from zhihu_oauth.zhcls.urls import ( 10 | QUESTION_DETAIL_URL, 11 | QUESTION_ANSWERS_URL, 12 | QUESTION_COMMENTS_URL, 13 | QUESTION_FOLLOWERS_URL, 14 | QUESTION_TOPICS_URL, 15 | ) 16 | 17 | __all__ = ['Question'] 18 | 19 | 20 | class Question(Base): 21 | def __init__(self, qid, cache, session): 22 | super(Question, self).__init__(qid, cache, session) 23 | 24 | def _build_url(self): 25 | return QUESTION_DETAIL_URL.format(self._id) 26 | 27 | # ----- simple info ----- 28 | 29 | @property 30 | @normal_attr() 31 | def allow_delete(self): 32 | return None 33 | 34 | @property 35 | @normal_attr() 36 | def answer_count(self): 37 | return None 38 | 39 | @property 40 | @normal_attr() 41 | def comment_count(self): 42 | return None 43 | 44 | @property 45 | @normal_attr("created") 46 | def created_time(self): 47 | return None 48 | 49 | @property 50 | @normal_attr('except') 51 | def excerpt(self): 52 | """ 53 | 知乎返回的 json 里这一项叫做 except.... 也是醉了 54 | """ 55 | return None 56 | 57 | @property 58 | @normal_attr() 59 | def follower_count(self): 60 | return None 61 | 62 | @property 63 | @normal_attr() 64 | def id(self): 65 | return self._id 66 | 67 | @property 68 | @normal_attr() 69 | def detail(self): 70 | return None 71 | 72 | @property 73 | @streaming() 74 | def redirection(self): 75 | """ 76 | 常见返回值: 77 | 78 | .. code-block:: python 79 | 80 | { 81 | 'to': 82 | { 83 | 'url': 'https://api.zhihu.com/questions/19570036', 84 | 'id': 19570036, 85 | 'type': 'question', 86 | 'title': '什么是「问题重定向」?如何正确使用该功能解决重复问题?' 87 | }, 88 | 'from': 89 | [ 90 | { 91 | 'url': 'https://api.zhihu.com/questions/19772082', 92 | 'id': 19772082, 93 | 'type': 'question', 94 | 'title': '知乎上有重复的问题吗?' 95 | }, 96 | { 97 | 'url': 'https://api.zhihu.com/questions/20830682', 98 | 'id': 20830682, 99 | 'type': 'question', 100 | 'title': '各位知友以为同一问题重复出现,知乎应如何应对?' 101 | } 102 | ] 103 | } 104 | 105 | 在使用 from 属性时遇到语法错误?请看 :ref:`说明 ` 106 | 107 | """ 108 | return None 109 | 110 | @property 111 | @streaming() 112 | def status(self): 113 | return None 114 | 115 | @property 116 | @streaming(use_cache=False) 117 | def suggest_edit(self): 118 | """ 119 | 常见返回值: 120 | 121 | .. code-block:: python 122 | 123 | {'status': False', reason': ''} 124 | 125 | {'status': True, 'reason': '问题表意不明'} 126 | """ 127 | return None 128 | 129 | @property 130 | @normal_attr() 131 | def title(self): 132 | return None 133 | 134 | @property 135 | @normal_attr() 136 | def updated_time(self): 137 | return None 138 | 139 | # ----- generators ----- 140 | 141 | @property 142 | @generator_of(QUESTION_ANSWERS_URL) 143 | def answers(self): 144 | return None 145 | 146 | @property 147 | @generator_of(QUESTION_COMMENTS_URL) 148 | def comments(self): 149 | return None 150 | 151 | @property 152 | @generator_of(QUESTION_FOLLOWERS_URL, 'people') 153 | def followers(self): 154 | return None 155 | 156 | @property 157 | @generator_of(QUESTION_TOPICS_URL) 158 | def topics(self): 159 | return None 160 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/topic.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .base import Base 6 | from .generator import generator_of 7 | from .normal import normal_attr 8 | from zhihu_oauth.zhcls.urls import ( 9 | TOPIC_DETAIL_URL, 10 | TOPIC_BEST_ANSWERS_URL, 11 | TOPIC_BEST_ANSWERERS_URL, 12 | TOPIC_CHILDREN_URL, 13 | TOPIC_FOLLOWERS_URL, 14 | TOPIC_PARENTS_URL, 15 | TOPIC_UNANSWERED_QUESTION, 16 | ) 17 | 18 | __all__ = ['Topic'] 19 | 20 | 21 | class Topic(Base): 22 | def __init__(self, tid, cache, session): 23 | super(Topic, self).__init__(tid, cache, session) 24 | 25 | def _build_url(self): 26 | return TOPIC_DETAIL_URL.format(self.id) 27 | 28 | # ---- simple info ----- 29 | 30 | @property 31 | @normal_attr() 32 | def avatar_url(self): 33 | return None 34 | 35 | @property 36 | @normal_attr('best_answers_count') 37 | def best_answer_count(self): 38 | return None 39 | 40 | @property 41 | def best_answers_count(self): 42 | return self.best_answer_count 43 | 44 | @property 45 | @normal_attr() 46 | def id(self): 47 | return self._id 48 | 49 | @property 50 | @normal_attr() 51 | def introduction(self): 52 | return None 53 | 54 | @property 55 | @normal_attr() 56 | def excerpt(self): 57 | return None 58 | 59 | @property 60 | def father_count(self): 61 | return self.parent_count 62 | 63 | @property 64 | @normal_attr('followers_count') 65 | def follower_count(self): 66 | return None 67 | 68 | @property 69 | def followers_count(self): 70 | return self.follower_count 71 | 72 | @property 73 | @normal_attr() 74 | def name(self): 75 | return None 76 | 77 | @property 78 | @normal_attr('father_count') 79 | def parent_count(self): 80 | return None 81 | 82 | @property 83 | @normal_attr('questions_count') 84 | def question_count(self): 85 | return None 86 | 87 | @property 88 | def questions_count(self): 89 | return self.question_count 90 | 91 | @property 92 | @normal_attr() 93 | def unanswered_count(self): 94 | return None 95 | 96 | # ----- generators ----- 97 | 98 | @property 99 | @generator_of(TOPIC_BEST_ANSWERS_URL, 'answer') 100 | def best_answers(self): 101 | """ 102 | 精华回答 103 | """ 104 | return None 105 | 106 | @property 107 | @generator_of(TOPIC_BEST_ANSWERERS_URL, 'people') 108 | def best_answerers(self): 109 | """ 110 | 好像叫,最佳回答者吧…… 111 | 112 | best_answerers……知乎真会起名字…… 113 | """ 114 | return None 115 | 116 | @property 117 | @generator_of(TOPIC_CHILDREN_URL, 'topic') 118 | def children(self): 119 | """ 120 | 子话题 121 | """ 122 | return None 123 | 124 | @property 125 | @generator_of(TOPIC_FOLLOWERS_URL, 'people') 126 | def followers(self): 127 | return None 128 | 129 | @property 130 | @generator_of(TOPIC_PARENTS_URL, 'topic') 131 | def parents(self): 132 | """ 133 | 父话题 134 | """ 135 | return None 136 | 137 | @property 138 | @generator_of(TOPIC_UNANSWERED_QUESTION, 'question') 139 | def unanswered_questions(self): 140 | return None 141 | -------------------------------------------------------------------------------- /src/lib/oauth/zhihu_oauth/zhcls/whisper.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .base import Base 6 | from .other import other_obj 7 | from .generator import generator_of 8 | from .normal import normal_attr 9 | from .urls import MESSAGES_URL 10 | 11 | __all__ = ['Whisper'] 12 | 13 | 14 | class Whisper(Base): 15 | """ 16 | 唔,其实就是「和某人的所有消息」。 17 | 18 | 为这个东西命名我想了半天……最后群里的一个小姐姐说叫 Whisper 吧,我觉得很可以诶~ 19 | 20 | 后来发现知乎接口里把这个叫做 Thread,嗯,不想改,我就是这么任性…… 21 | """ 22 | def _build_url(self): 23 | return '' 24 | 25 | def _get_data(self): 26 | pass 27 | 28 | def __init__(self, wid, cache, session): 29 | super(Whisper, self).__init__(wid, cache, session) 30 | 31 | # ----- simple info ----- 32 | 33 | @property 34 | @normal_attr() 35 | def allow_reply(self): 36 | return None 37 | 38 | @property 39 | def id(self): 40 | return self._id 41 | 42 | @property 43 | @normal_attr() 44 | def snippet(self): 45 | """ 46 | 最后一次私信的摘要 47 | """ 48 | return None 49 | 50 | @property 51 | @normal_attr() 52 | def updated_time(self): 53 | return None 54 | 55 | @property 56 | @normal_attr() 57 | def unread_count(self): 58 | return None 59 | 60 | @property 61 | @other_obj('people', 'participant') 62 | def who(self): 63 | """ 64 | 参与此私信会话的另一个知乎用户 65 | """ 66 | return None 67 | 68 | @property 69 | @generator_of(MESSAGES_URL) 70 | def messages(self): 71 | return None 72 | -------------------------------------------------------------------------------- /src/lib/requests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | Requests HTTP library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. Basic GET 13 | usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('https://www.python.org') 17 | >>> r.status_code 18 | 200 19 | >>> 'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post('http://httpbin.org/post', data=payload) 26 | >>> print(r.text) 27 | { 28 | ... 29 | "form": { 30 | "key2": "value2", 31 | "key1": "value1" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2016 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | """ 42 | 43 | __title__ = 'requests' 44 | __version__ = '2.11.1' 45 | __build__ = 0x021101 46 | __author__ = 'Kenneth Reitz' 47 | __license__ = 'Apache 2.0' 48 | __copyright__ = 'Copyright 2016 Kenneth Reitz' 49 | 50 | # Attempt to enable urllib3's SNI support, if possible 51 | try: 52 | from .packages.urllib3.contrib import pyopenssl 53 | pyopenssl.inject_into_urllib3() 54 | except ImportError: 55 | pass 56 | 57 | import warnings 58 | 59 | # urllib3's DependencyWarnings should be silenced. 60 | from .packages.urllib3.exceptions import DependencyWarning 61 | warnings.simplefilter('ignore', DependencyWarning) 62 | 63 | from . import utils 64 | from .models import Request, Response, PreparedRequest 65 | from .api import request, get, head, post, patch, put, delete, options 66 | from .sessions import session, Session 67 | from .status_codes import codes 68 | from .exceptions import ( 69 | RequestException, Timeout, URLRequired, 70 | TooManyRedirects, HTTPError, ConnectionError, 71 | FileModeWarning, ConnectTimeout, ReadTimeout 72 | ) 73 | 74 | # Set default logging handler to avoid "No handler found" warnings. 75 | import logging 76 | try: # Python 2.7+ 77 | from logging import NullHandler 78 | except ImportError: 79 | class NullHandler(logging.Handler): 80 | def emit(self, record): 81 | pass 82 | 83 | logging.getLogger(__name__).addHandler(NullHandler()) 84 | 85 | # FileModeWarnings go off per the default. 86 | warnings.simplefilter('default', FileModeWarning, append=True) 87 | -------------------------------------------------------------------------------- /src/lib/requests/_internal_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests._internal_utils 5 | ~~~~~~~~~~~~~~ 6 | 7 | Provides utility functions that are consumed internally by Requests 8 | which depend on extremely few external helpers (such as compat) 9 | """ 10 | 11 | from .compat import is_py2, builtin_str 12 | 13 | 14 | def to_native_string(string, encoding='ascii'): 15 | """Given a string object, regardless of type, returns a representation of 16 | that string in the native string type, encoding and decoding where 17 | necessary. This assumes ASCII unless told otherwise. 18 | """ 19 | if isinstance(string, builtin_str): 20 | out = string 21 | else: 22 | if is_py2: 23 | out = string.encode(encoding) 24 | else: 25 | out = string.decode(encoding) 26 | 27 | return out 28 | -------------------------------------------------------------------------------- /src/lib/requests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | requests.certs 6 | ~~~~~~~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. 9 | 10 | If you are packaging Requests, e.g., for a Linux distribution or a managed 11 | environment, you can change the definition of where() to return a separately 12 | packaged CA bundle. 13 | """ 14 | import os.path 15 | 16 | try: 17 | from certifi import where 18 | except ImportError: 19 | def where(): 20 | """Return the preferred certificate bundle.""" 21 | # vendored bundle inside Requests 22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem') 23 | 24 | if __name__ == '__main__': 25 | print(where()) 26 | -------------------------------------------------------------------------------- /src/lib/requests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.compat 5 | ~~~~~~~~~~~~~~~ 6 | 7 | This module handles import compatibility issues between Python 2 and 8 | Python 3. 9 | """ 10 | 11 | from .packages import chardet 12 | 13 | import sys 14 | 15 | # ------- 16 | # Pythons 17 | # ------- 18 | 19 | # Syntax sugar. 20 | _ver = sys.version_info 21 | 22 | #: Python 2.x? 23 | is_py2 = (_ver[0] == 2) 24 | 25 | #: Python 3.x? 26 | is_py3 = (_ver[0] == 3) 27 | 28 | try: 29 | import simplejson as json 30 | except (ImportError, SyntaxError): 31 | # simplejson does not support Python 3.2, it throws a SyntaxError 32 | # because of u'...' Unicode literals. 33 | import json 34 | 35 | # --------- 36 | # Specifics 37 | # --------- 38 | 39 | if is_py2: 40 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass 41 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 42 | from urllib2 import parse_http_list 43 | import cookielib 44 | from Cookie import Morsel 45 | from StringIO import StringIO 46 | from .packages.urllib3.packages.ordered_dict import OrderedDict 47 | 48 | builtin_str = str 49 | bytes = str 50 | str = unicode 51 | basestring = basestring 52 | numeric_types = (int, long, float) 53 | 54 | elif is_py3: 55 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 56 | from urllib.request import parse_http_list, getproxies, proxy_bypass 57 | from http import cookiejar as cookielib 58 | from http.cookies import Morsel 59 | from io import StringIO 60 | from collections import OrderedDict 61 | 62 | builtin_str = str 63 | str = str 64 | bytes = bytes 65 | basestring = (str, bytes) 66 | numeric_types = (int, float) 67 | -------------------------------------------------------------------------------- /src/lib/requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | """ 9 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError 10 | 11 | 12 | class RequestException(IOError): 13 | """There was an ambiguous exception that occurred while handling your 14 | request. 15 | """ 16 | 17 | def __init__(self, *args, **kwargs): 18 | """Initialize RequestException with `request` and `response` objects.""" 19 | response = kwargs.pop('response', None) 20 | self.response = response 21 | self.request = kwargs.pop('request', None) 22 | if (response is not None and not self.request and 23 | hasattr(response, 'request')): 24 | self.request = self.response.request 25 | super(RequestException, self).__init__(*args, **kwargs) 26 | 27 | 28 | class HTTPError(RequestException): 29 | """An HTTP error occurred.""" 30 | 31 | 32 | class ConnectionError(RequestException): 33 | """A Connection error occurred.""" 34 | 35 | 36 | class ProxyError(ConnectionError): 37 | """A proxy error occurred.""" 38 | 39 | 40 | class SSLError(ConnectionError): 41 | """An SSL error occurred.""" 42 | 43 | 44 | class Timeout(RequestException): 45 | """The request timed out. 46 | 47 | Catching this error will catch both 48 | :exc:`~requests.exceptions.ConnectTimeout` and 49 | :exc:`~requests.exceptions.ReadTimeout` errors. 50 | """ 51 | 52 | 53 | class ConnectTimeout(ConnectionError, Timeout): 54 | """The request timed out while trying to connect to the remote server. 55 | 56 | Requests that produced this error are safe to retry. 57 | """ 58 | 59 | 60 | class ReadTimeout(Timeout): 61 | """The server did not send any data in the allotted amount of time.""" 62 | 63 | 64 | class URLRequired(RequestException): 65 | """A valid URL is required to make a request.""" 66 | 67 | 68 | class TooManyRedirects(RequestException): 69 | """Too many redirects.""" 70 | 71 | 72 | class MissingSchema(RequestException, ValueError): 73 | """The URL schema (e.g. http or https) is missing.""" 74 | 75 | 76 | class InvalidSchema(RequestException, ValueError): 77 | """See defaults.py for valid schemas.""" 78 | 79 | 80 | class InvalidURL(RequestException, ValueError): 81 | """The URL provided was somehow invalid.""" 82 | 83 | 84 | class InvalidHeader(RequestException, ValueError): 85 | """The header value provided was somehow invalid.""" 86 | 87 | 88 | class ChunkedEncodingError(RequestException): 89 | """The server declared chunked encoding but sent an invalid chunk.""" 90 | 91 | 92 | class ContentDecodingError(RequestException, BaseHTTPError): 93 | """Failed to decode response content""" 94 | 95 | 96 | class StreamConsumedError(RequestException, TypeError): 97 | """The content for this response was already consumed""" 98 | 99 | 100 | class RetryError(RequestException): 101 | """Custom retries logic failed""" 102 | 103 | 104 | # Warnings 105 | 106 | 107 | class RequestsWarning(Warning): 108 | """Base warning for Requests.""" 109 | pass 110 | 111 | 112 | class FileModeWarning(RequestsWarning, DeprecationWarning): 113 | """A file was opened in text mode, but Requests determined its binary length.""" 114 | pass 115 | -------------------------------------------------------------------------------- /src/lib/requests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | """ 14 | HOOKS = ['response'] 15 | 16 | 17 | def default_hooks(): 18 | return dict((event, []) for event in HOOKS) 19 | 20 | # TODO: response is the only one 21 | 22 | 23 | def dispatch_hook(key, hooks, hook_data, **kwargs): 24 | """Dispatches a hook dictionary on a given piece of data.""" 25 | hooks = hooks or dict() 26 | hooks = hooks.get(key) 27 | if hooks: 28 | if hasattr(hooks, '__call__'): 29 | hooks = [hooks] 30 | for hook in hooks: 31 | _hook_data = hook(hook_data, **kwargs) 32 | if _hook_data is not None: 33 | hook_data = _hook_data 34 | return hook_data 35 | -------------------------------------------------------------------------------- /src/lib/requests/packages/README.rst: -------------------------------------------------------------------------------- 1 | If you are planning to submit a pull request to requests with any changes in 2 | this library do not go any further. These are independent libraries which we 3 | vendor into requests. Any changes necessary to these libraries must be made in 4 | them and submitted as separate pull requests to those libraries. 5 | 6 | urllib3 pull requests go here: https://github.com/shazow/urllib3 7 | 8 | chardet pull requests go here: https://github.com/chardet/chardet 9 | 10 | See https://github.com/kennethreitz/requests/pull/1812#issuecomment-30854316 11 | for the reasoning behind this. 12 | -------------------------------------------------------------------------------- /src/lib/requests/packages/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Debian and other distributions "unbundle" requests' vendored dependencies, and 3 | rewrite all imports to use the global versions of ``urllib3`` and ``chardet``. 4 | The problem with this is that not only requests itself imports those 5 | dependencies, but third-party code outside of the distros' control too. 6 | 7 | In reaction to these problems, the distro maintainers replaced 8 | ``requests.packages`` with a magical "stub module" that imports the correct 9 | modules. The implementations were varying in quality and all had severe 10 | problems. For example, a symlink (or hardlink) that links the correct modules 11 | into place introduces problems regarding object identity, since you now have 12 | two modules in `sys.modules` with the same API, but different identities:: 13 | 14 | requests.packages.urllib3 is not urllib3 15 | 16 | With version ``2.5.2``, requests started to maintain its own stub, so that 17 | distro-specific breakage would be reduced to a minimum, even though the whole 18 | issue is not requests' fault in the first place. See 19 | https://github.com/kennethreitz/requests/pull/2375 for the corresponding pull 20 | request. 21 | ''' 22 | 23 | from __future__ import absolute_import 24 | import sys 25 | 26 | try: 27 | from . import urllib3 28 | except ImportError: 29 | import urllib3 30 | sys.modules['%s.urllib3' % __name__] = urllib3 31 | 32 | try: 33 | from . import chardet 34 | except ImportError: 35 | import chardet 36 | sys.modules['%s.chardet' % __name__] = chardet 37 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "2.3.0" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | from io import open 21 | 22 | from chardet import __version__ 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | u.feed(line) 39 | u.close() 40 | result = u.result 41 | if result['encoding']: 42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 43 | result['confidence']) 44 | else: 45 | return '{0}: no result'.format(name) 46 | 47 | 48 | def main(argv=None): 49 | ''' 50 | Handles command line arguments and gets things started. 51 | 52 | :param argv: List of arguments, as if specified on the command-line. 53 | If None, ``sys.argv[1:]`` is used instead. 54 | :type argv: list of str 55 | ''' 56 | # Get command line arguments 57 | parser = argparse.ArgumentParser( 58 | description="Takes one or more file paths and reports their detected \ 59 | encodings", 60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 61 | conflict_handler='resolve') 62 | parser.add_argument('input', 63 | help='File whose encoding we would like to determine.', 64 | type=argparse.FileType('rb'), nargs='*', 65 | default=[sys.stdin]) 66 | parser.add_argument('--version', action='version', 67 | version='%(prog)s {0}'.format(__version__)) 68 | args = parser.parse_args(argv) 69 | 70 | for f in args.input: 71 | if f.isatty(): 72 | print("You are running chardetect interactively. Press " + 73 | "CTRL-D twice at the start of a blank line to signal the " + 74 | "end of your input. If you want help, run chardetect " + 75 | "--help\n", file=sys.stderr) 76 | print(description_of(f, f.name)) 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | import sys 30 | from .charsetprober import CharSetProber 31 | 32 | 33 | class CharSetGroupProber(CharSetProber): 34 | def __init__(self): 35 | CharSetProber.__init__(self) 36 | self._mActiveNum = 0 37 | self._mProbers = [] 38 | self._mBestGuessProber = None 39 | 40 | def reset(self): 41 | CharSetProber.reset(self) 42 | self._mActiveNum = 0 43 | for prober in self._mProbers: 44 | if prober: 45 | prober.reset() 46 | prober.active = True 47 | self._mActiveNum += 1 48 | self._mBestGuessProber = None 49 | 50 | def get_charset_name(self): 51 | if not self._mBestGuessProber: 52 | self.get_confidence() 53 | if not self._mBestGuessProber: 54 | return None 55 | # self._mBestGuessProber = self._mProbers[0] 56 | return self._mBestGuessProber.get_charset_name() 57 | 58 | def feed(self, aBuf): 59 | for prober in self._mProbers: 60 | if not prober: 61 | continue 62 | if not prober.active: 63 | continue 64 | st = prober.feed(aBuf) 65 | if not st: 66 | continue 67 | if st == constants.eFoundIt: 68 | self._mBestGuessProber = prober 69 | return self.get_state() 70 | elif st == constants.eNotMe: 71 | prober.active = False 72 | self._mActiveNum -= 1 73 | if self._mActiveNum <= 0: 74 | self._mState = constants.eNotMe 75 | return self.get_state() 76 | return self.get_state() 77 | 78 | def get_confidence(self): 79 | st = self.get_state() 80 | if st == constants.eFoundIt: 81 | return 0.99 82 | elif st == constants.eNotMe: 83 | return 0.01 84 | bestConf = 0.0 85 | self._mBestGuessProber = None 86 | for prober in self._mProbers: 87 | if not prober: 88 | continue 89 | if not prober.active: 90 | if constants._debug: 91 | sys.stderr.write(prober.get_charset_name() 92 | + ' not active\n') 93 | continue 94 | cf = prober.get_confidence() 95 | if constants._debug: 96 | sys.stderr.write('%s confidence = %s\n' % 97 | (prober.get_charset_name(), cf)) 98 | if bestConf < cf: 99 | bestConf = cf 100 | self._mBestGuessProber = prober 101 | if not self._mBestGuessProber: 102 | return 0.0 103 | return bestConf 104 | # else: 105 | # self._mBestGuessProber = self._mProbers[0] 106 | # return self._mBestGuessProber.get_confidence() 107 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from . import constants 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .codingstatemachine import CodingStateMachine 32 | from .chardistribution import EUCJPDistributionAnalysis 33 | from .jpcntx import EUCJPContextAnalysis 34 | from .mbcssm import EUCJPSMModel 35 | 36 | 37 | class EUCJPProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel) 41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis() 42 | self._mContextAnalyzer = EUCJPContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "EUC-JP" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mContextAnalyzer.feed(self._mLastChar, charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) 75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 76 | charLen) 77 | 78 | self._mLastChar[0] = aBuf[aLen - 1] 79 | 80 | if self.get_state() == constants.eDetecting: 81 | if (self._mContextAnalyzer.got_enough_data() and 82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 83 | self._mState = constants.eFoundIt 84 | 85 | return self.get_state() 86 | 87 | def get_confidence(self): 88 | contxtCf = self._mContextAnalyzer.get_confidence() 89 | distribCf = self._mDistributionAnalyzer.get_confidence() 90 | return max(contxtCf, distribCf) 91 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | import sys 31 | from . import constants 32 | from .charsetprober import CharSetProber 33 | 34 | 35 | class MultiByteCharSetProber(CharSetProber): 36 | def __init__(self): 37 | CharSetProber.__init__(self) 38 | self._mDistributionAnalyzer = None 39 | self._mCodingSM = None 40 | self._mLastChar = [0, 0] 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | if self._mCodingSM: 45 | self._mCodingSM.reset() 46 | if self._mDistributionAnalyzer: 47 | self._mDistributionAnalyzer.reset() 48 | self._mLastChar = [0, 0] 49 | 50 | def get_charset_name(self): 51 | pass 52 | 53 | def feed(self, aBuf): 54 | aLen = len(aBuf) 55 | for i in range(0, aLen): 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 72 | else: 73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 74 | charLen) 75 | 76 | self._mLastChar[0] = aBuf[aLen - 1] 77 | 78 | if self.get_state() == constants.eDetecting: 79 | if (self._mDistributionAnalyzer.got_enough_data() and 80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 81 | self._mState = constants.eFoundIt 82 | 83 | return self.get_state() 84 | 85 | def get_confidence(self): 86 | return self._mDistributionAnalyzer.get_confidence() 87 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import SJISDistributionAnalysis 32 | from .jpcntx import SJISContextAnalysis 33 | from .mbcssm import SJISSMModel 34 | from . import constants 35 | 36 | 37 | class SJISProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(SJISSMModel) 41 | self._mDistributionAnalyzer = SJISDistributionAnalysis() 42 | self._mContextAnalyzer = SJISContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return self._mContextAnalyzer.get_charset_name() 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | codingState = self._mCodingSM.next_state(aBuf[i]) 56 | if codingState == constants.eError: 57 | if constants._debug: 58 | sys.stderr.write(self.get_charset_name() 59 | + ' prober hit error at byte ' + str(i) 60 | + '\n') 61 | self._mState = constants.eNotMe 62 | break 63 | elif codingState == constants.eItsMe: 64 | self._mState = constants.eFoundIt 65 | break 66 | elif codingState == constants.eStart: 67 | charLen = self._mCodingSM.get_current_charlen() 68 | if i == 0: 69 | self._mLastChar[1] = aBuf[0] 70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], 71 | charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 75 | - charLen], charLen) 76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 77 | charLen) 78 | 79 | self._mLastChar[0] = aBuf[aLen - 1] 80 | 81 | if self.get_state() == constants.eDetecting: 82 | if (self._mContextAnalyzer.got_enough_data() and 83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 84 | self._mState = constants.eFoundIt 85 | 86 | return self.get_state() 87 | 88 | def get_confidence(self): 89 | contxtCf = self._mContextAnalyzer.get_confidence() 90 | distribCf = self._mDistributionAnalyzer.get_confidence() 91 | return max(contxtCf, distribCf) 92 | -------------------------------------------------------------------------------- /src/lib/requests/packages/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | import warnings 7 | 8 | from .connectionpool import ( 9 | HTTPConnectionPool, 10 | HTTPSConnectionPool, 11 | connection_from_url 12 | ) 13 | 14 | from . import exceptions 15 | from .filepost import encode_multipart_formdata 16 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 17 | from .response import HTTPResponse 18 | from .util.request import make_headers 19 | from .util.url import get_host 20 | from .util.timeout import Timeout 21 | from .util.retry import Retry 22 | 23 | 24 | # Set default logging handler to avoid "No handler found" warnings. 25 | import logging 26 | try: # Python 2.7+ 27 | from logging import NullHandler 28 | except ImportError: 29 | class NullHandler(logging.Handler): 30 | def emit(self, record): 31 | pass 32 | 33 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 34 | __license__ = 'MIT' 35 | __version__ = '1.16' 36 | 37 | __all__ = ( 38 | 'HTTPConnectionPool', 39 | 'HTTPSConnectionPool', 40 | 'PoolManager', 41 | 'ProxyManager', 42 | 'HTTPResponse', 43 | 'Retry', 44 | 'Timeout', 45 | 'add_stderr_logger', 46 | 'connection_from_url', 47 | 'disable_warnings', 48 | 'encode_multipart_formdata', 49 | 'get_host', 50 | 'make_headers', 51 | 'proxy_from_url', 52 | ) 53 | 54 | logging.getLogger(__name__).addHandler(NullHandler()) 55 | 56 | 57 | def add_stderr_logger(level=logging.DEBUG): 58 | """ 59 | Helper for quickly adding a StreamHandler to the logger. Useful for 60 | debugging. 61 | 62 | Returns the handler after adding it. 63 | """ 64 | # This method needs to be in this __init__.py to get the __name__ correct 65 | # even if urllib3 is vendored within another package. 66 | logger = logging.getLogger(__name__) 67 | handler = logging.StreamHandler() 68 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 69 | logger.addHandler(handler) 70 | logger.setLevel(level) 71 | logger.debug('Added a stderr logging handler to logger: %s', __name__) 72 | return handler 73 | 74 | # ... Clean up. 75 | del NullHandler 76 | 77 | 78 | # All warning filters *must* be appended unless you're really certain that they 79 | # shouldn't be: otherwise, it's very hard for users to use most Python 80 | # mechanisms to silence them. 81 | # SecurityWarning's always go off by default. 82 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True) 83 | # SubjectAltNameWarning's should go off once per host 84 | warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True) 85 | # InsecurePlatformWarning's don't vary between requests, so we keep it default. 86 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning, 87 | append=True) 88 | # SNIMissingWarnings should go off only once. 89 | warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True) 90 | 91 | 92 | def disable_warnings(category=exceptions.HTTPWarning): 93 | """ 94 | Helper for quickly disabling all urllib3 warnings. 95 | """ 96 | warnings.simplefilter('ignore', category) 97 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/lib/requests/packages/urllib3/contrib/__init__.py -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import codecs 3 | 4 | from uuid import uuid4 5 | from io import BytesIO 6 | 7 | from .packages import six 8 | from .packages.six import b 9 | from .fields import RequestField 10 | 11 | writer = codecs.lookup('utf-8')[3] 12 | 13 | 14 | def choose_boundary(): 15 | """ 16 | Our embarassingly-simple replacement for mimetools.choose_boundary. 17 | """ 18 | return uuid4().hex 19 | 20 | 21 | def iter_field_objects(fields): 22 | """ 23 | Iterate over fields. 24 | 25 | Supports list of (k, v) tuples and dicts, and lists of 26 | :class:`~urllib3.fields.RequestField`. 27 | 28 | """ 29 | if isinstance(fields, dict): 30 | i = six.iteritems(fields) 31 | else: 32 | i = iter(fields) 33 | 34 | for field in i: 35 | if isinstance(field, RequestField): 36 | yield field 37 | else: 38 | yield RequestField.from_tuples(*field) 39 | 40 | 41 | def iter_fields(fields): 42 | """ 43 | .. deprecated:: 1.6 44 | 45 | Iterate over fields. 46 | 47 | The addition of :class:`~urllib3.fields.RequestField` makes this function 48 | obsolete. Instead, use :func:`iter_field_objects`, which returns 49 | :class:`~urllib3.fields.RequestField` objects. 50 | 51 | Supports list of (k, v) tuples and dicts. 52 | """ 53 | if isinstance(fields, dict): 54 | return ((k, v) for k, v in six.iteritems(fields)) 55 | 56 | return ((k, v) for k, v in fields) 57 | 58 | 59 | def encode_multipart_formdata(fields, boundary=None): 60 | """ 61 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 62 | 63 | :param fields: 64 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 65 | 66 | :param boundary: 67 | If not specified, then a random boundary will be generated using 68 | :func:`mimetools.choose_boundary`. 69 | """ 70 | body = BytesIO() 71 | if boundary is None: 72 | boundary = choose_boundary() 73 | 74 | for field in iter_field_objects(fields): 75 | body.write(b('--%s\r\n' % (boundary))) 76 | 77 | writer(body).write(field.render_headers()) 78 | data = field.data 79 | 80 | if isinstance(data, int): 81 | data = str(data) # Backwards compatibility 82 | 83 | if isinstance(data, six.text_type): 84 | writer(body).write(data) 85 | else: 86 | body.write(data) 87 | 88 | body.write(b'\r\n') 89 | 90 | body.write(b('--%s--\r\n' % (boundary))) 91 | 92 | content_type = str('multipart/form-data; boundary=%s' % boundary) 93 | 94 | return body.getvalue(), content_type 95 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | __all__ = ('ssl_match_hostname', ) 6 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/packages/backports/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/lib/requests/packages/urllib3/packages/backports/__init__.py -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/packages/backports/makefile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | backports.makefile 4 | ~~~~~~~~~~~~~~~~~~ 5 | 6 | Backports the Python 3 ``socket.makefile`` method for use with anything that 7 | wants to create a "fake" socket object. 8 | """ 9 | import io 10 | 11 | from socket import SocketIO 12 | 13 | 14 | def backport_makefile(self, mode="r", buffering=None, encoding=None, 15 | errors=None, newline=None): 16 | """ 17 | Backport of ``socket.makefile`` from Python 3.5. 18 | """ 19 | if not set(mode) <= set(["r", "w", "b"]): 20 | raise ValueError( 21 | "invalid mode %r (only r, w, b allowed)" % (mode,) 22 | ) 23 | writing = "w" in mode 24 | reading = "r" in mode or not writing 25 | assert reading or writing 26 | binary = "b" in mode 27 | rawmode = "" 28 | if reading: 29 | rawmode += "r" 30 | if writing: 31 | rawmode += "w" 32 | raw = SocketIO(self, rawmode) 33 | self._makefile_refs += 1 34 | if buffering is None: 35 | buffering = -1 36 | if buffering < 0: 37 | buffering = io.DEFAULT_BUFFER_SIZE 38 | if buffering == 0: 39 | if not binary: 40 | raise ValueError("unbuffered streams must be binary") 41 | return raw 42 | if reading and writing: 43 | buffer = io.BufferedRWPair(raw, raw, buffering) 44 | elif reading: 45 | buffer = io.BufferedReader(raw, buffering) 46 | else: 47 | assert writing 48 | buffer = io.BufferedWriter(raw, buffering) 49 | if binary: 50 | return buffer 51 | text = io.TextIOWrapper(buffer, encoding, errors, newline) 52 | text.mode = mode 53 | return text 54 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/packages/ssl_match_hostname/.gitignore: -------------------------------------------------------------------------------- 1 | env 2 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | # Python 3.2+ 3 | from ssl import CertificateError, match_hostname 4 | except ImportError: 5 | try: 6 | # Backport of the function from a pypi module 7 | from backports.ssl_match_hostname import CertificateError, match_hostname 8 | except ImportError: 9 | # Our vendored copy 10 | from ._implementation import CertificateError, match_hostname 11 | 12 | # Not needed, but documenting what we provide. 13 | __all__ = ('CertificateError', 'match_hostname') 14 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | 8 | __version__ = '3.4.0.2' 9 | 10 | class CertificateError(ValueError): 11 | pass 12 | 13 | 14 | def _dnsname_match(dn, hostname, max_wildcards=1): 15 | """Matching according to RFC 6125, section 6.4.3 16 | 17 | http://tools.ietf.org/html/rfc6125#section-6.4.3 18 | """ 19 | pats = [] 20 | if not dn: 21 | return False 22 | 23 | # Ported from python3-syntax: 24 | # leftmost, *remainder = dn.split(r'.') 25 | parts = dn.split(r'.') 26 | leftmost = parts[0] 27 | remainder = parts[1:] 28 | 29 | wildcards = leftmost.count('*') 30 | if wildcards > max_wildcards: 31 | # Issue #17980: avoid denials of service by refusing more 32 | # than one wildcard per fragment. A survey of established 33 | # policy among SSL implementations showed it to be a 34 | # reasonable choice. 35 | raise CertificateError( 36 | "too many wildcards in certificate DNS name: " + repr(dn)) 37 | 38 | # speed up common case w/o wildcards 39 | if not wildcards: 40 | return dn.lower() == hostname.lower() 41 | 42 | # RFC 6125, section 6.4.3, subitem 1. 43 | # The client SHOULD NOT attempt to match a presented identifier in which 44 | # the wildcard character comprises a label other than the left-most label. 45 | if leftmost == '*': 46 | # When '*' is a fragment by itself, it matches a non-empty dotless 47 | # fragment. 48 | pats.append('[^.]+') 49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 50 | # RFC 6125, section 6.4.3, subitem 3. 51 | # The client SHOULD NOT attempt to match a presented identifier 52 | # where the wildcard character is embedded within an A-label or 53 | # U-label of an internationalized domain name. 54 | pats.append(re.escape(leftmost)) 55 | else: 56 | # Otherwise, '*' matches any dotless string, e.g. www* 57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 58 | 59 | # add the remaining fragments, ignore any wildcards 60 | for frag in remainder: 61 | pats.append(re.escape(frag)) 62 | 63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 64 | return pat.match(hostname) 65 | 66 | 67 | def match_hostname(cert, hostname): 68 | """Verify that *cert* (in decoded format as returned by 69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 70 | rules are followed, but IP addresses are not accepted for *hostname*. 71 | 72 | CertificateError is raised on failure. On success, the function 73 | returns nothing. 74 | """ 75 | if not cert: 76 | raise ValueError("empty or no certificate") 77 | dnsnames = [] 78 | san = cert.get('subjectAltName', ()) 79 | for key, value in san: 80 | if key == 'DNS': 81 | if _dnsname_match(value, hostname): 82 | return 83 | dnsnames.append(value) 84 | if not dnsnames: 85 | # The subject is only checked when there is no dNSName entry 86 | # in subjectAltName 87 | for sub in cert.get('subject', ()): 88 | for key, value in sub: 89 | # XXX according to RFC 2818, the most specific Common Name 90 | # must be used. 91 | if key == 'commonName': 92 | if _dnsname_match(value, hostname): 93 | return 94 | dnsnames.append(value) 95 | if len(dnsnames) > 1: 96 | raise CertificateError("hostname %r " 97 | "doesn't match either of %s" 98 | % (hostname, ', '.join(map(repr, dnsnames)))) 99 | elif len(dnsnames) == 1: 100 | raise CertificateError("hostname %r " 101 | "doesn't match %r" 102 | % (hostname, dnsnames[0])) 103 | else: 104 | raise CertificateError("no appropriate commonName or " 105 | "subjectAltName fields were found") 106 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # For backwards compatibility, provide imports that used to be here. 3 | from .connection import is_connection_dropped 4 | from .request import make_headers 5 | from .response import is_fp_closed 6 | from .ssl_ import ( 7 | SSLContext, 8 | HAS_SNI, 9 | IS_PYOPENSSL, 10 | assert_fingerprint, 11 | resolve_cert_reqs, 12 | resolve_ssl_version, 13 | ssl_wrap_socket, 14 | ) 15 | from .timeout import ( 16 | current_time, 17 | Timeout, 18 | ) 19 | 20 | from .retry import Retry 21 | from .url import ( 22 | get_host, 23 | parse_url, 24 | split_first, 25 | Url, 26 | ) 27 | 28 | __all__ = ( 29 | 'HAS_SNI', 30 | 'IS_PYOPENSSL', 31 | 'SSLContext', 32 | 'Retry', 33 | 'Timeout', 34 | 'Url', 35 | 'assert_fingerprint', 36 | 'current_time', 37 | 'is_connection_dropped', 38 | 'is_fp_closed', 39 | 'get_host', 40 | 'parse_url', 41 | 'make_headers', 42 | 'resolve_cert_reqs', 43 | 'resolve_ssl_version', 44 | 'split_first', 45 | 'ssl_wrap_socket', 46 | ) 47 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from base64 import b64encode 3 | 4 | from ..packages.six import b 5 | 6 | ACCEPT_ENCODING = 'gzip,deflate' 7 | 8 | 9 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, 10 | basic_auth=None, proxy_basic_auth=None, disable_cache=None): 11 | """ 12 | Shortcuts for generating request headers. 13 | 14 | :param keep_alive: 15 | If ``True``, adds 'connection: keep-alive' header. 16 | 17 | :param accept_encoding: 18 | Can be a boolean, list, or string. 19 | ``True`` translates to 'gzip,deflate'. 20 | List will get joined by comma. 21 | String will be used as provided. 22 | 23 | :param user_agent: 24 | String representing the user-agent you want, such as 25 | "python-urllib3/0.6" 26 | 27 | :param basic_auth: 28 | Colon-separated username:password string for 'authorization: basic ...' 29 | auth header. 30 | 31 | :param proxy_basic_auth: 32 | Colon-separated username:password string for 'proxy-authorization: basic ...' 33 | auth header. 34 | 35 | :param disable_cache: 36 | If ``True``, adds 'cache-control: no-cache' header. 37 | 38 | Example:: 39 | 40 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 41 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 42 | >>> make_headers(accept_encoding=True) 43 | {'accept-encoding': 'gzip,deflate'} 44 | """ 45 | headers = {} 46 | if accept_encoding: 47 | if isinstance(accept_encoding, str): 48 | pass 49 | elif isinstance(accept_encoding, list): 50 | accept_encoding = ','.join(accept_encoding) 51 | else: 52 | accept_encoding = ACCEPT_ENCODING 53 | headers['accept-encoding'] = accept_encoding 54 | 55 | if user_agent: 56 | headers['user-agent'] = user_agent 57 | 58 | if keep_alive: 59 | headers['connection'] = 'keep-alive' 60 | 61 | if basic_auth: 62 | headers['authorization'] = 'Basic ' + \ 63 | b64encode(b(basic_auth)).decode('utf-8') 64 | 65 | if proxy_basic_auth: 66 | headers['proxy-authorization'] = 'Basic ' + \ 67 | b64encode(b(proxy_basic_auth)).decode('utf-8') 68 | 69 | if disable_cache: 70 | headers['cache-control'] = 'no-cache' 71 | 72 | return headers 73 | -------------------------------------------------------------------------------- /src/lib/requests/packages/urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..packages.six.moves import http_client as httplib 3 | 4 | from ..exceptions import HeaderParsingError 5 | 6 | 7 | def is_fp_closed(obj): 8 | """ 9 | Checks whether a given file-like object is closed. 10 | 11 | :param obj: 12 | The file-like object to check. 13 | """ 14 | 15 | try: 16 | # Check via the official file-like-object way. 17 | return obj.closed 18 | except AttributeError: 19 | pass 20 | 21 | try: 22 | # Check if the object is a container for another file-like object that 23 | # gets released on exhaustion (e.g. HTTPResponse). 24 | return obj.fp is None 25 | except AttributeError: 26 | pass 27 | 28 | raise ValueError("Unable to determine whether fp is closed.") 29 | 30 | 31 | def assert_header_parsing(headers): 32 | """ 33 | Asserts whether all headers have been successfully parsed. 34 | Extracts encountered errors from the result of parsing headers. 35 | 36 | Only works on Python 3. 37 | 38 | :param headers: Headers to verify. 39 | :type headers: `httplib.HTTPMessage`. 40 | 41 | :raises urllib3.exceptions.HeaderParsingError: 42 | If parsing errors are found. 43 | """ 44 | 45 | # This will fail silently if we pass in the wrong kind of parameter. 46 | # To make debugging easier add an explicit check. 47 | if not isinstance(headers, httplib.HTTPMessage): 48 | raise TypeError('expected httplib.Message, got {0}.'.format( 49 | type(headers))) 50 | 51 | defects = getattr(headers, 'defects', None) 52 | get_payload = getattr(headers, 'get_payload', None) 53 | 54 | unparsed_data = None 55 | if get_payload: # Platform-specific: Python 3. 56 | unparsed_data = get_payload() 57 | 58 | if defects or unparsed_data: 59 | raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) 60 | 61 | 62 | def is_response_to_head(response): 63 | """ 64 | Checks whether the request of a response has been a HEAD-request. 65 | Handles the quirks of AppEngine. 66 | 67 | :param conn: 68 | :type conn: :class:`httplib.HTTPResponse` 69 | """ 70 | # FIXME: Can we do this somehow without accessing private httplib _method? 71 | method = response._method 72 | if isinstance(method, int): # Platform-specific: Appengine 73 | return method == 3 74 | return method.upper() == 'HEAD' 75 | -------------------------------------------------------------------------------- /src/lib/requests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .structures import LookupDict 4 | 5 | _codes = { 6 | 7 | # Informational. 8 | 100: ('continue',), 9 | 101: ('switching_protocols',), 10 | 102: ('processing',), 11 | 103: ('checkpoint',), 12 | 122: ('uri_too_long', 'request_uri_too_long'), 13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 14 | 201: ('created',), 15 | 202: ('accepted',), 16 | 203: ('non_authoritative_info', 'non_authoritative_information'), 17 | 204: ('no_content',), 18 | 205: ('reset_content', 'reset'), 19 | 206: ('partial_content', 'partial'), 20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 21 | 208: ('already_reported',), 22 | 226: ('im_used',), 23 | 24 | # Redirection. 25 | 300: ('multiple_choices',), 26 | 301: ('moved_permanently', 'moved', '\\o-'), 27 | 302: ('found',), 28 | 303: ('see_other', 'other'), 29 | 304: ('not_modified',), 30 | 305: ('use_proxy',), 31 | 306: ('switch_proxy',), 32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 33 | 308: ('permanent_redirect', 34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 35 | 36 | # Client Error. 37 | 400: ('bad_request', 'bad'), 38 | 401: ('unauthorized',), 39 | 402: ('payment_required', 'payment'), 40 | 403: ('forbidden',), 41 | 404: ('not_found', '-o-'), 42 | 405: ('method_not_allowed', 'not_allowed'), 43 | 406: ('not_acceptable',), 44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 45 | 408: ('request_timeout', 'timeout'), 46 | 409: ('conflict',), 47 | 410: ('gone',), 48 | 411: ('length_required',), 49 | 412: ('precondition_failed', 'precondition'), 50 | 413: ('request_entity_too_large',), 51 | 414: ('request_uri_too_large',), 52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 54 | 417: ('expectation_failed',), 55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 56 | 421: ('misdirected_request',), 57 | 422: ('unprocessable_entity', 'unprocessable'), 58 | 423: ('locked',), 59 | 424: ('failed_dependency', 'dependency'), 60 | 425: ('unordered_collection', 'unordered'), 61 | 426: ('upgrade_required', 'upgrade'), 62 | 428: ('precondition_required', 'precondition'), 63 | 429: ('too_many_requests', 'too_many'), 64 | 431: ('header_fields_too_large', 'fields_too_large'), 65 | 444: ('no_response', 'none'), 66 | 449: ('retry_with', 'retry'), 67 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 68 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 69 | 499: ('client_closed_request',), 70 | 71 | # Server Error. 72 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 73 | 501: ('not_implemented',), 74 | 502: ('bad_gateway',), 75 | 503: ('service_unavailable', 'unavailable'), 76 | 504: ('gateway_timeout',), 77 | 505: ('http_version_not_supported', 'http_version'), 78 | 506: ('variant_also_negotiates',), 79 | 507: ('insufficient_storage',), 80 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 81 | 510: ('not_extended',), 82 | 511: ('network_authentication_required', 'network_auth', 'network_authentication'), 83 | } 84 | 85 | codes = LookupDict(name='status_codes') 86 | 87 | for code, titles in _codes.items(): 88 | for title in titles: 89 | setattr(codes, title, code) 90 | if not title.startswith('\\'): 91 | setattr(codes, title.upper(), code) 92 | -------------------------------------------------------------------------------- /src/lib/requests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | """ 9 | 10 | import collections 11 | 12 | from .compat import OrderedDict 13 | 14 | 15 | class CaseInsensitiveDict(collections.MutableMapping): 16 | """A case-insensitive ``dict``-like object. 17 | 18 | Implements all methods and operations of 19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also 20 | provides ``lower_items``. 21 | 22 | All keys are expected to be strings. The structure remembers the 23 | case of the last key to be set, and ``iter(instance)``, 24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 25 | will contain case-sensitive keys. However, querying and contains 26 | testing is case insensitive:: 27 | 28 | cid = CaseInsensitiveDict() 29 | cid['Accept'] = 'application/json' 30 | cid['aCCEPT'] == 'application/json' # True 31 | list(cid) == ['Accept'] # True 32 | 33 | For example, ``headers['content-encoding']`` will return the 34 | value of a ``'Content-Encoding'`` response header, regardless 35 | of how the header name was originally stored. 36 | 37 | If the constructor, ``.update``, or equality comparison 38 | operations are given keys that have equal ``.lower()``s, the 39 | behavior is undefined. 40 | """ 41 | 42 | def __init__(self, data=None, **kwargs): 43 | self._store = OrderedDict() 44 | if data is None: 45 | data = {} 46 | self.update(data, **kwargs) 47 | 48 | def __setitem__(self, key, value): 49 | # Use the lowercased key for lookups, but store the actual 50 | # key alongside the value. 51 | self._store[key.lower()] = (key, value) 52 | 53 | def __getitem__(self, key): 54 | return self._store[key.lower()][1] 55 | 56 | def __delitem__(self, key): 57 | del self._store[key.lower()] 58 | 59 | def __iter__(self): 60 | return (casedkey for casedkey, mappedvalue in self._store.values()) 61 | 62 | def __len__(self): 63 | return len(self._store) 64 | 65 | def lower_items(self): 66 | """Like iteritems(), but with all lowercase keys.""" 67 | return ( 68 | (lowerkey, keyval[1]) 69 | for (lowerkey, keyval) 70 | in self._store.items() 71 | ) 72 | 73 | def __eq__(self, other): 74 | if isinstance(other, collections.Mapping): 75 | other = CaseInsensitiveDict(other) 76 | else: 77 | return NotImplemented 78 | # Compare insensitively 79 | return dict(self.lower_items()) == dict(other.lower_items()) 80 | 81 | # Copy is required 82 | def copy(self): 83 | return CaseInsensitiveDict(self._store.values()) 84 | 85 | def __repr__(self): 86 | return str(dict(self.items())) 87 | 88 | 89 | class LookupDict(dict): 90 | """Dictionary lookup object.""" 91 | 92 | def __init__(self, name=None): 93 | self.name = name 94 | super(LookupDict, self).__init__() 95 | 96 | def __repr__(self): 97 | return '' % (self.name) 98 | 99 | def __getitem__(self, key): 100 | # We allow fall-through here, so values default to None 101 | 102 | return self.__dict__.get(key, None) 103 | 104 | def get(self, key, default=None): 105 | return self.__dict__.get(key, default) 106 | -------------------------------------------------------------------------------- /src/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /src/tools/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import os 4 | 5 | from src.tools.path import Path 6 | 7 | 8 | class Config(object): 9 | u""" 10 | 用于储存、获取设置值、全局变量值 11 | """ 12 | # 全局变量 13 | update_time = '2017-01-24' # 更新日期 14 | 15 | debug = False 16 | debug_for_create_book = False # 是否在测试电子书生成功能,在测试的话跳过网页抓取部分 17 | debug_for_thread = False # 是否在测试多线程功能,在测试的话改为单线程执行 18 | 19 | account = 'mengqingxue@yaozeyuan.online' # 默认账号密码, 2017年更新 20 | password = '912714398d' # 21 | remember_account = False # 是否使用已有密码 22 | max_thread = 10 # 最大线程数,其实设成5就行了,但下图片的时候还是得多开几个线程,所以还是设成10好了(反正冬天,CPU满了有利于室内保温 - -) 23 | picture_quality = 1 # 图片质量(0/1/2,无图/标清/原图) 24 | max_try = 5 # 下载图片时的最大尝试次数 25 | max_book_size_mb = 100 # 单个文件的最大大小(MB, 兆),超过这个数会自动分卷 26 | timeout_download_picture = 10 # 多给知乎服务器点时间,批量生成tex太痛苦了- - 27 | timeout_download_html = 5 28 | 29 | article_order_by = ' order by article_id asc ' # 文章排序顺序,默认:时间顺序正序 30 | answer_order_by = ' order by voteup_count desc ' # 答案排序顺序,默认:赞同数降序 31 | topic_or_collection_answer_order_by = ' ' # 话题/收藏夹中答案排序顺序,默认:按在话题/收藏夹中的顺序排列 32 | 33 | 34 | @staticmethod 35 | def init_config(): 36 | Config.load() 37 | return 38 | 39 | @staticmethod 40 | def save(): 41 | data = {} 42 | with open(Path.config_path, 'w') as f: 43 | for key in Config.__dict__: 44 | value = Config.__dict__[key] 45 | if '__' in key[:2]: 46 | # 内置属性直接跳过 47 | continue 48 | try: 49 | json.dumps(value) 50 | except TypeError: 51 | # 暴力判断是否可被序列化←_← 52 | pass 53 | else: 54 | data[key] = value 55 | json.dump(data, f, indent=4) 56 | return 57 | 58 | @staticmethod 59 | def load(): 60 | if not os.path.isfile(Path.config_path): 61 | return 62 | with open(Path.config_path) as f: 63 | config = json.load(f) 64 | if not config.get('remember_account'): 65 | # 当选择不记住密码时,跳过读取,使用默认设置 66 | # 不考虑用户强行在配置文件中把account改成空的情况 67 | return 68 | for (key, value) in config.items(): 69 | setattr(Config, key, value) 70 | return 71 | -------------------------------------------------------------------------------- /src/tools/controler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from multiprocessing.dummy import Pool as ThreadPool # 多线程并行库 3 | 4 | from src.tools.config import Config 5 | 6 | 7 | class Control(object): 8 | thread_pool = ThreadPool(Config.max_thread) 9 | 10 | @staticmethod 11 | def control_center(argv, test_flag): 12 | max_try = Config.max_try 13 | for time in range(max_try): 14 | if test_flag: 15 | if Config.debug_for_thread: 16 | Control.debug_control(argv) 17 | else: 18 | Control.release_control(argv) 19 | Control.thread_pool.map(**argv) 20 | return 21 | 22 | @staticmethod 23 | def debug_control(argv): 24 | for item in argv['iterable']: 25 | argv['func'](item) 26 | return 27 | 28 | @staticmethod 29 | def release_control(argv): 30 | try: 31 | Control.thread_pool.map(**argv) 32 | except Exception: 33 | # 按照惯例,报错全部pass掉 34 | # 等用户反馈了再开debug查吧 35 | pass 36 | return 37 | -------------------------------------------------------------------------------- /src/tools/debug.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import logging.handlers 4 | import sys 5 | 6 | from src.tools.config import Config 7 | 8 | 9 | class Debug(object): 10 | u""" 11 | 打印日志 12 | """ 13 | logger = logging.getLogger('main') # 获取名为main的logger 14 | if Config.debug: 15 | logger.setLevel(logging.DEBUG) # debug模式 16 | else: 17 | logger.setLevel(logging.INFO) # 发布时关闭log输出 18 | 19 | # 辅助函数 20 | @staticmethod 21 | def print_in_single_line(text=''): 22 | try: 23 | sys.stdout.write("\r" + " " * 60 + '\r') 24 | sys.stdout.flush() 25 | sys.stdout.write(text) 26 | sys.stdout.flush() 27 | except: 28 | pass 29 | return 30 | 31 | @staticmethod 32 | def print_dict(data={}, key='', prefix=''): 33 | try: 34 | if isinstance(data, dict): 35 | for key in data: 36 | Debug.print_dict(data[key], key, prefix + ' ') 37 | else: 38 | if isinstance(data, basestring): 39 | print prefix + unicode(key) + ' => ' + data 40 | else: 41 | print prefix + unicode(key) + ' => ' + unicode(data) 42 | except UnicodeEncodeError as error: 43 | Debug.logger.info(u'编码异常') 44 | Debug.logger.info(u'系统默认编码为:' + sys.getdefaultencoding()) 45 | # raise error 46 | return 47 | 48 | @staticmethod 49 | def print_config(): 50 | Debug.print_dict(Config.__dict__) 51 | return 52 | -------------------------------------------------------------------------------- /src/tools/extra_tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import hashlib 3 | import time 4 | import datetime 5 | 6 | 7 | class ExtraTools(object): 8 | @staticmethod 9 | def format_date(date_format, timestamp): 10 | ltime = time.localtime(timestamp) 11 | return time.strftime(date_format, ltime) 12 | 13 | @staticmethod 14 | def get_time(): 15 | return str(time.time()).split('.')[0] 16 | 17 | @staticmethod 18 | def get_friendly_time(): 19 | return datetime.datetime.today().isoformat().split('.')[0].replace(':', ':') 20 | 21 | @staticmethod 22 | def get_today(): 23 | return datetime.date.today().isoformat() 24 | 25 | @staticmethod 26 | def get_yesterday(): 27 | today = datetime.date.today() 28 | one = datetime.timedelta(days=1) 29 | yesterday = today - one 30 | return yesterday.isoformat() 31 | 32 | @staticmethod 33 | def md5(content): 34 | encrypt = hashlib.md5() 35 | encrypt.update(str(content)) 36 | return encrypt.hexdigest() 37 | -------------------------------------------------------------------------------- /src/tools/path.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import shutil 4 | import locale 5 | 6 | 7 | class Path(object): 8 | # 初始地址,不含分隔符 9 | # 此时sys.stdout.encoding已被修改为utf-8,故改为使用locale.getpreferredencoding()获取默认编码 10 | base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding())) 11 | 12 | config_path = base_path + u'/config.json' 13 | db_path = base_path + u'/zhihuDB_18.sqlite' 14 | sql_path = base_path + u'/db/zhihuhelp.sql' 15 | 16 | www_css = base_path + u'/www/css' 17 | www_image = base_path + u'/www/images' 18 | 19 | html_pool_path = base_path + u'/知乎电子书临时资源库/知乎网页池' 20 | image_pool_path = base_path + u'/知乎电子书临时资源库/知乎图片池' 21 | book_pool_path = base_path + u'/知乎电子书临时资源库/知乎电子书临时文件池' 22 | result_path = base_path + u'/知乎助手生成的电子书' 23 | 24 | @staticmethod 25 | def reset_path(): 26 | Path.chdir(Path.base_path) 27 | return 28 | 29 | @staticmethod 30 | def pwd(): 31 | print os.path.realpath('.') 32 | return 33 | 34 | @staticmethod 35 | def get_pwd(): 36 | path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding())) 37 | return path 38 | 39 | @staticmethod 40 | def mkdir(path): 41 | try: 42 | os.mkdir(path) 43 | except OSError: 44 | # Debug.logger.debug(u'指定目录已存在') 45 | pass 46 | return 47 | 48 | @staticmethod 49 | def chdir(path): 50 | try: 51 | os.chdir(path) 52 | except OSError: 53 | # Debug.logger.debug(u'指定目录不存在,自动创建之') 54 | Path.mkdir(path) 55 | os.chdir(path) 56 | return 57 | 58 | @staticmethod 59 | def rmdir(path): 60 | if path: 61 | shutil.rmtree(path, ignore_errors=True) 62 | return 63 | 64 | @staticmethod 65 | def copy(src, dst): 66 | if not os.path.exists(src): 67 | # Debug.logger.info('{}不存在,自动跳过'.format(src)) 68 | return 69 | if os.path.isdir(src): 70 | shutil.copytree(src, dst) 71 | else: 72 | shutil.copy(src=src, dst=dst) 73 | return 74 | 75 | @staticmethod 76 | def get_filename(src): 77 | return os.path.basename(src) 78 | 79 | @staticmethod 80 | def init_base_path(): 81 | Path.base_path = Path.get_pwd() 82 | 83 | Path.config_path = Path.base_path + u'/config.json' 84 | Path.db_path = Path.base_path + u'/zhihuDB_18.sqlite' 85 | Path.sql_path = Path.base_path + u'/db/zhihuhelp.sql' 86 | 87 | Path.www_css = Path.base_path + u'/www/css' 88 | Path.www_image = Path.base_path + u'/www/images' 89 | 90 | Path.html_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎网页池' 91 | Path.image_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎图片池' 92 | Path.book_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎电子书临时文件池' 93 | Path.result_path = Path.base_path + u'/知乎助手生成的电子书' 94 | 95 | return 96 | 97 | @staticmethod 98 | def init_work_directory(): 99 | Path.reset_path() 100 | Path.mkdir(u'./知乎助手生成的电子书') 101 | Path.mkdir(u'./知乎电子书临时资源库') 102 | Path.chdir(u'./知乎电子书临时资源库') 103 | Path.mkdir(u'./知乎网页池') 104 | Path.mkdir(u'./知乎图片池') 105 | Path.mkdir(u'./知乎电子书临时文件池') 106 | Path.reset_path() 107 | return 108 | 109 | @staticmethod 110 | def is_file(path): 111 | return os.path.isfile(path) 112 | 113 | @staticmethod 114 | def get_img_size_by_filename_kb(filename): 115 | path = Path.image_pool_path + '/' + filename 116 | if Path.is_file(path) : 117 | return os.path.getsize(path) / 1024 118 | return 0 -------------------------------------------------------------------------------- /src/tools/template.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | class Template(object): 4 | """ 5 | 文件模版 6 | """ 7 | # type : str 8 | # interface : title, body 9 | base = open('./www/template/base.html', 'r').read() 10 | 11 | # type : str 12 | book_info = base.format( 13 | **{ 14 | 'title': '{title}', 15 | 'body': open('./www/template/info_page/book.html', 'r').read() 16 | } 17 | ) 18 | # type : str 19 | question_info = base.format( 20 | **{ 21 | 'title': '{title}', 22 | 'body': open('./www/template/info_page/question.html', 'r').read() 23 | } 24 | ) 25 | 26 | # type : str 27 | author_info = base.format( 28 | **{ 29 | 'title': '{title}', 30 | 'body': open('./www/template/info_page/author.html', 'r').read() 31 | } 32 | ) 33 | 34 | # type : str 35 | topic_info = base.format( 36 | **{ 37 | 'title': '{title}', 38 | 'body': open('./www/template/info_page/topic.html', 'r').read() 39 | } 40 | ) 41 | 42 | # type : str 43 | collection_info = base.format( 44 | **{ 45 | 'title': '{title}', 46 | 'body': open('./www/template/info_page/collection.html', 'r').read() 47 | } 48 | ) 49 | 50 | # type : str 51 | column_info = base.format( 52 | **{ 53 | 'title': '{title}', 54 | 'body': open('./www/template/info_page/column.html', 'r').read() 55 | } 56 | ) 57 | 58 | # type : str 59 | article_info = base.format( 60 | **{ 61 | 'title': '{title}', 62 | 'body': open('./www/template/info_page/article.html', 'r').read() 63 | } 64 | ) 65 | 66 | # type : str 67 | question = base.format( 68 | **{ 69 | 'title': '{title}', 70 | 'body': open('./www/template/content/question/question.html', 'r').read() 71 | } 72 | ) 73 | 74 | # type : str 75 | answer = open('./www/template/content/question/answer.html', 'r').read() -------------------------------------------------------------------------------- /src/tools/template_config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from src.tools.path import Path 3 | 4 | 5 | class TemplateConfig(object): 6 | template_path = Path.base_path + u'/www/template' 7 | content_template_path = template_path + u'/content' 8 | content_info_template_path = content_template_path + u'/info' 9 | content_question_template_path = content_template_path + u'/question' 10 | front_page_template_path = template_path + u'/front_page' 11 | front_page_info_template_path = front_page_template_path + u'/info' 12 | 13 | content_base_uri = template_path + u'/base.html' 14 | 15 | # content 16 | ##info 17 | info_author_uri = content_info_template_path + u'/author.html' 18 | info_comment_uri = content_info_template_path + u'/comment.html' 19 | info_title_uri = content_info_template_path + u'/title.html' 20 | ##question 21 | question_answer_uri = content_question_template_path + u'/answer.html' 22 | question_question_uri = content_question_template_path + u'/question.html' 23 | 24 | # front_page 25 | front_page_author_uri = front_page_info_template_path + u'/author.html' 26 | front_page_collection_uri = front_page_info_template_path + u'/collection.html' 27 | front_page_column_uri = front_page_info_template_path + u'/column.html' 28 | front_page_topic_uri = front_page_info_template_path + u'/topic.html' 29 | front_page_question_uri = front_page_info_template_path + u'/question.html' 30 | front_page_answer_uri = front_page_info_template_path + u'/answer.html' 31 | front_page_article_uri = front_page_info_template_path + u'/article.html' 32 | 33 | front_page_base_uri = front_page_template_path + u'/base.html' 34 | -------------------------------------------------------------------------------- /src/tools/type.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from random import random 3 | 4 | 5 | class Type(object): 6 | # 未知类型 7 | unknown = 'unknown' 8 | 9 | answer = 'answer' 10 | question = 'question' 11 | topic = 'topic' 12 | collection = 'collection' 13 | author = 'author' 14 | column = 'column' 15 | article = 'article' 16 | 17 | pass 18 | 19 | 20 | class ImgQuality(object): 21 | raw = 2 # 原图 22 | big = 1 # 普通 23 | none = 0 # 无图 24 | 25 | @staticmethod 26 | def add_random_download_address_header_for_img_filename(file_uri): 27 | """ 28 | 随机补充一个前缀作为图片下载地址 29 | :param file_uri: 30 | :return: 31 | """ 32 | img_site_list = [ 33 | 'https://pic1.zhimg.com/', 34 | 'https://pic2.zhimg.com/', 35 | 'https://pic3.zhimg.com/', 36 | 'https://pic4.zhimg.com/', 37 | ] 38 | url = img_site_list[0] + file_uri 39 | return url 40 | -------------------------------------------------------------------------------- /unit/BS4/content.html: -------------------------------------------------------------------------------- 1 | 用于测试BS4解析结果 -------------------------------------------------------------------------------- /unit/BS4/parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | 4 | from bs4 import BeautifulSoup 5 | from src.lib.zhihu_parser.tools.parser_tools import ParserTools 6 | 7 | reload(sys) 8 | sys.setdefaultencoding('utf8') 9 | 10 | # sys.setrecursionlimit(1000000) # 为了适应知乎上的长答案,需要专门设下递归深度限制。。。 11 | # 添加库路径 12 | currentPath = sys.path[0].replace('unit', '') 13 | sys.path.append(currentPath) 14 | sys.path.append(currentPath + r'src') 15 | sys.path.append(currentPath + r'src\tools') 16 | sys.path.append(currentPath + r'src\parser') 17 | 18 | content = open(u'./content.html').read() 19 | 20 | parser = BeautifulSoup(content, 'html.parser') 21 | tag_content = ParserTools.get_tag_content(parser) 22 | parser 23 | -------------------------------------------------------------------------------- /unit/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- -------------------------------------------------------------------------------- /unit/addressFile/address_All: -------------------------------------------------------------------------------- 1 | 2 | 3 | #people 4 | http://www.zhihu.com/people/zhong-wen-71 5 | http://www.zhihu.com/people/Selerare 6 | http://www.zhihu.com/people/_Zen 7 | http://www.zhihu.com/people/mu-mu-55-53 8 | http://www.zhihu.com/people/tian-yu-bai 9 | http://www.zhihu.com/people/tian-yu-bai/followees 10 | http://www.zhihu.com/people/ying-ye-78 11 | #table 12 | http://www.zhihu.com/roundtable/superhero 13 | http://www.zhihu.com/roundtable/superhero/questions 14 | #topic 15 | http://www.zhihu.com/topic/19554151 16 | http://www.zhihu.com/topic/19551147 17 | http://www.zhihu.com/topic/19551147/top-answers 18 | http://www.zhihu.com/topic/19551147/questions 19 | http://www.zhihu.com/topic/19551147/organize 20 | http://www.zhihu.com/topic/19551147/manage 21 | http://www.zhihu.com/topic/19551147/log 22 | #article 23 | http://zhuanlan.zhihu.com/8hpencil/19929476 24 | #column 25 | http://zhuanlan.zhihu.com/8hpencil 26 | #collection 27 | http://www.zhihu.com/collection/32271511 28 | http://www.zhihu.com/collection/32271511/log 29 | -------------------------------------------------------------------------------- /unit/addressFile/answer: -------------------------------------------------------------------------------- 1 | #answer 2 | http://www.zhihu.com/question/25420679/answer/30790550?utm_source=weibo&utm_medium=weibo_share&utm_content=share_answer&utm_campaign=share_button 3 | -------------------------------------------------------------------------------- /unit/addressFile/article: -------------------------------------------------------------------------------- 1 | #article 2 | http://zhuanlan.zhihu.com/8hpencil/19929476 3 | -------------------------------------------------------------------------------- /unit/addressFile/collection: -------------------------------------------------------------------------------- 1 | #collection 2 | http://www.zhihu.com/collection/32271511 3 | http://www.zhihu.com/collection/32271511/log 4 | -------------------------------------------------------------------------------- /unit/addressFile/column: -------------------------------------------------------------------------------- 1 | #column 2 | http://zhuanlan.zhihu.com/8hpencil 3 | -------------------------------------------------------------------------------- /unit/addressFile/people: -------------------------------------------------------------------------------- 1 | #people 2 | http://www.zhihu.com/people/zhong-wen-71 3 | http://www.zhihu.com/people/Selerare 4 | http://www.zhihu.com/people/_Zen 5 | http://www.zhihu.com/people/mu-mu-55-53 6 | http://www.zhihu.com/people/tian-yu-bai 7 | http://www.zhihu.com/people/tian-yu-bai/followees 8 | http://www.zhihu.com/people/ying-ye-78 9 | -------------------------------------------------------------------------------- /unit/addressFile/question: -------------------------------------------------------------------------------- 1 | #question 2 | http://www.zhihu.com/question/27580793 3 | http://www.zhihu.com/question/27580793?sort=created 4 | http://www.zhihu.com/question/22921426?sort=created#521个回答 5 | http://www.zhihu.com/question/19568396#847个回答 6 | -------------------------------------------------------------------------------- /unit/addressFile/table: -------------------------------------------------------------------------------- 1 | #table 2 | http://www.zhihu.com/roundtable/superhero 3 | http://www.zhihu.com/roundtable/superhero/questions 4 | -------------------------------------------------------------------------------- /unit/addressFile/topic: -------------------------------------------------------------------------------- 1 | #topic 2 | http://www.zhihu.com/topic/19554151 3 | http://www.zhihu.com/topic/19551147 4 | http://www.zhihu.com/topic/19551147/top-answers 5 | http://www.zhihu.com/topic/19551147/questions 6 | http://www.zhihu.com/topic/19551147/organize 7 | http://www.zhihu.com/topic/19551147/manage 8 | http://www.zhihu.com/topic/19551147/log 9 | -------------------------------------------------------------------------------- /unit/demo/__init__.json: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /unit/demo/readme.md: -------------------------------------------------------------------------------- 1 | 将json转为正常的html后的结果 -------------------------------------------------------------------------------- /unit/oauth_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 使用该文件测试oauth的使用方法 3 | # 放置于首位 4 | import sys # 修改默认编码 5 | import os # 添加系统路径 6 | import json 7 | 8 | base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding)) 9 | sys.path.append(base_path + u'/src/lib') 10 | sys.path.append(base_path + u'/src/lib/oauth') 11 | 12 | reload(sys) 13 | sys.setdefaultencoding('utf-8') # 强制使用utf-8编码 14 | 15 | from zhihu_oauth import ZhihuClient 16 | 17 | from zhihu_oauth.exception import NeedCaptchaException 18 | 19 | client = ZhihuClient() 20 | 21 | test_email = 'mengqingxue2014@qq.com' 22 | test_password = '131724qingxue' 23 | token_file = './token.pkl' 24 | 25 | if os.path.lexists(token_file): 26 | client.load_token(token_file) 27 | print 'load token success' 28 | else: 29 | try: 30 | login_result = client.login(test_email, test_password) 31 | except NeedCaptchaException: 32 | # 保存验证码并提示输入,重新登录 33 | print u'登录失败,需要输入验证码' 34 | with open('a.gif', 'wb') as f: 35 | f.write(client.get_captcha()) 36 | captcha = raw_input(u'please input captcha:') 37 | login_result = client.login(test_email, test_password, captcha) 38 | print 'login result => ' 39 | print login_result 40 | client.save_token(token_file) 41 | print 'save token success' 42 | 43 | # question 44 | response_file_uri = './question_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构 45 | question_id = 35005800 46 | question = client.question(question_id) 47 | data = question.pure_data 48 | response_json = json.dumps(data) 49 | response_file = open(response_file_uri, 'w+') 50 | response_file.write(response_json) 51 | print u"数据保存完成" 52 | 53 | response_file_uri = './people_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构 54 | people_id = '404-Page-Not-found' 55 | people = client.people(people_id) 56 | for i in people.answers: 57 | data = i.pure_data 58 | response_json = json.dumps(data) 59 | response_file = open(response_file_uri, 'w+') 60 | response_file.write(response_json) 61 | print u"数据保存完成" -------------------------------------------------------------------------------- /unit/parser_unit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | 4 | # 添加库路径 5 | currentPath = sys.path[0].replace('unit', '') 6 | sys.path.append(currentPath) 7 | sys.path.append(currentPath + r'src') 8 | sys.path.append(currentPath + r'src\tools') 9 | sys.path.append(currentPath + r'src\parser') 10 | sys.path.append(currentPath + r'src\lib') # 扩展库地址 11 | 12 | from src.lib.zhihu_parser.author import AuthorParser 13 | from src.lib.zhihu_parser.collection import CollectionParser 14 | from src.lib.zhihu_parser.question import QuestionParser 15 | from src.lib.zhihu_parser.topic import TopicParser 16 | from src.tools.debug import Debug 17 | 18 | reload(sys) 19 | sys.setdefaultencoding('utf8') 20 | 21 | 22 | 23 | sys.setrecursionlimit(1000000) # 为了适应知乎上的长答案,需要专门设下递归深度限制。。。 24 | 25 | is_info = 0 26 | kind = 'author' # 直接在这里替换类别即可完成测试。可供测试的类别见字典键值 27 | unit ={ 28 | 'answer':{ 29 | 'src_answer':'./unit_html/single_answer.html', 30 | 'src_info':'./unit_html/single_answer.html', 31 | 'parser':QuestionParser, 32 | }, 33 | 'question':{ 34 | 'src_answer':'./unit_html/single_question.html', 35 | 'src_info':'./unit_html/single_question.html', 36 | 'parser':QuestionParser, 37 | }, 38 | 'author':{ 39 | 'src_answer':'./unit_html/author.html', 40 | 'src_info':'./unit_html/author_info.html', 41 | 'parser':AuthorParser, 42 | }, 43 | 'topic':{ 44 | 'src_answer':'./unit_html/topic.html', 45 | 'src_info':'./unit_html/topic.html', 46 | 'parser':TopicParser, 47 | }, 48 | 'collection':{ 49 | 'src_answer': './unit_html/collection.html', 50 | 'src_info': './unit_html/collection.html', 51 | 'parser': CollectionParser, 52 | }, 53 | 'private_collection': { 54 | 'src_answer':'./unit_html/private_collection.html', 55 | 'src_info':'./unit_html/private_collection.html', 56 | 'parser':CollectionParser, 57 | }, 58 | } 59 | if is_info: 60 | src = unit[kind]['src_info'] 61 | else: 62 | src = unit[kind]['src_answer'] 63 | 64 | content = open(src, 'r').read() 65 | parser = unit[kind]['parser'](content) 66 | 67 | 68 | if is_info: 69 | Debug.print_dict(parser.get_extra_info()) 70 | print '----------------------' 71 | print '==========================' 72 | else: 73 | for answer in parser.get_answer_list(): 74 | Debug.print_dict(answer) 75 | print '----------------------' 76 | print '==========================' 77 | 78 | for question in parser.get_question_info_list(): 79 | Debug.print_dict(question) 80 | print '----------------------' 81 | -------------------------------------------------------------------------------- /unit/unit_html/author.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/author.html -------------------------------------------------------------------------------- /unit/unit_html/author_info.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/author_info.html -------------------------------------------------------------------------------- /unit/unit_html/collection.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/collection.html -------------------------------------------------------------------------------- /unit/unit_html/private_collection.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/private_collection.html -------------------------------------------------------------------------------- /unit/unit_html/single_answer.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/single_answer.html -------------------------------------------------------------------------------- /unit/unit_html/single_question.html: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /unit/unit_html/topic.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/topic.html -------------------------------------------------------------------------------- /unit/unit_html/topic_info.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/topic_info.html -------------------------------------------------------------------------------- /www/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /www/css/customer.css: -------------------------------------------------------------------------------- 1 | /* 设置背景色,清除缩进 */ 2 | body { 3 | text-indent: 0 !important; 4 | duokan-text-indent: 0 !important; 5 | word-wrap: break-word; /* 自动折行 */ 6 | } 7 | 8 | /* color */ 9 | .bg-zhihu-blue-light { 10 | background-color: #428ECE; 11 | } 12 | 13 | .bg-zhihu-blue-deep { 14 | background-color: #3982C6; 15 | } 16 | 17 | .bg-duokan-yellow { 18 | /*多看阅读默认背景色*/ 19 | background-color: #F7EFE7; 20 | } 21 | 22 | /* 清除浮动 */ 23 | div.clear-float { 24 | clear: both; 25 | } 26 | 27 | /* 隐藏空图片 */ 28 | img[src=''], img[src$='./images/'] { 29 | display: none; 30 | } 31 | 32 | .text-center { 33 | text-align: center; 34 | } 35 | 36 | .v-center { 37 | vertical-align: middle; 38 | } 39 | 40 | .margin-center { 41 | margin: 0 auto; 42 | } 43 | 44 | /* 目录页面 */ 45 | /* 隐藏多余的『目录』两字 */ 46 | div.index-content > li { 47 | display: none; 48 | } 49 | 50 | div.index-content a { 51 | font-size: 1em; 52 | } 53 | 54 | div.index-content ul > li > a { 55 | font-size: 1em; 56 | font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体; 57 | color: #3d3e45; 58 | } 59 | 60 | /* 首页描述信息 */ 61 | div.front-page.description { 62 | margin: 2em 0; 63 | } 64 | 65 | /* 评论信息 */ 66 | div.extra-info p { 67 | float: left; 68 | } 69 | 70 | div.extra-info p.update-date { 71 | float: right; 72 | } 73 | 74 | div.extra-info { 75 | margin: 1em 0; 76 | } 77 | 78 | /* 用户信息 */ 79 | div.author-base { 80 | margin: 1em 0; 81 | font-size: 16px; 82 | vertical-align: middle; 83 | } 84 | 85 | span.author-sign { 86 | margin-left: 1em; 87 | font-family: 'DK-KAITI', '楷体'; 88 | } 89 | 90 | span.author-name a { 91 | font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体; 92 | color: #3d3e45; 93 | font-weight: bold; 94 | } 95 | 96 | div.author-logo { 97 | float: right; 98 | } 99 | 100 | div.author-logo img { 101 | vertical-align: middle; 102 | margin-left: 0.2em; 103 | margin-bottom: 0.2em; 104 | } 105 | 106 | /* 问题详情 */ 107 | 108 | /* 实现知乎周刊的全屏效果*/ 109 | @media handheld { 110 | div.question { 111 | margin: -10em -4em 0 -4em; /*上、右、下、左,顺时针*/ 112 | padding: 10em 2em 0 4em; 113 | } 114 | } 115 | 116 | div.question-title { 117 | width: 100%; 118 | overflow: hidden; 119 | } 120 | 121 | div.question-title h1 { 122 | font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 宋体; 123 | color: #FFFFFF; 124 | vertical-align: middle; 125 | text-align: left; 126 | padding: 1em; 127 | width: 70%; 128 | margin: 3em 0 3em 0; 129 | float: right; 130 | font-size: 1.5em; 131 | } 132 | 133 | @media handheld { 134 | div.question-title h1 { 135 | margin: 0 0 3em 0; 136 | padding-right: 3em; 137 | } 138 | } 139 | 140 | div.question-info { 141 | font-family: 'DK-SONGTI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 宋体; 142 | color: #FFF !important; 143 | font-size: 1em; 144 | margin: 0 1em; 145 | padding-bottom: 1em; 146 | } 147 | 148 | @media handheld { 149 | div.question-info { 150 | margin: 0 -4em 0 -4em; 151 | padding: 0 5em 2em 5em; 152 | } 153 | } 154 | 155 | div.question-info a { 156 | color: #CCC; 157 | } 158 | 159 | /* 答案内容 */ 160 | div.content { 161 | font-family: 'DK-SONGTI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体; 162 | color: #000; 163 | font-size: 16px; 164 | } 165 | 166 | div.content img { 167 | max-width: 100%; 168 | margin-bottom: 2em; 169 | } 170 | 171 | /* article */ 172 | div.title-image { 173 | text-align: center; 174 | } 175 | 176 | div.title-image img { 177 | width: 100%; 178 | } 179 | 180 | /* 禁止信息页中的详情表格分页 */ 181 | div.front-page.detail-info table.margin-center { 182 | page-break-inside: avoid; 183 | } 184 | 185 | /* 禁止目录panel分页 */ 186 | div.index-content.panel { 187 | page-break-inside: avoid; 188 | } 189 | -------------------------------------------------------------------------------- /www/image/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/image/cover.jpg -------------------------------------------------------------------------------- /www/image/kanshan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/image/kanshan.png -------------------------------------------------------------------------------- /www/template/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /www/template/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {title} 5 | 6 | 7 | 8 | 9 | 10 | {body} 11 | 12 | -------------------------------------------------------------------------------- /www/template/content/question/answer.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |
6 | 9 | 10 | 11 | {author_name} 12 | 13 | 14 | {author_headline} 15 |
16 | 17 |
18 |
19 |
20 | 21 |
22 | {content} 23 |
24 | 25 |
26 |
27 |

评论数:{comment_count}

28 | 29 |

赞同数:{voteup_count}

30 | 31 |

更新时间:{updated_time}

32 |
33 |
34 |
35 | 36 |
37 | -------------------------------------------------------------------------------- /www/template/content/question/question.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
5 |
6 |

{title}

7 |
8 |
9 |
10 |
11 | {description} 12 |
13 |
14 |
15 |
16 | {answer} 17 |
-------------------------------------------------------------------------------- /www/template/front_page/base.html: -------------------------------------------------------------------------------- 1 |
2 | 4 | 5 |

{title}

6 |
7 | {description} 8 |
9 |
10 | {detail_info} 11 |
12 |

版权信息:知乎协议

13 |
-------------------------------------------------------------------------------- /www/template/front_page/info/answer.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/answer.html -------------------------------------------------------------------------------- /www/template/front_page/info/article.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/article.html -------------------------------------------------------------------------------- /www/template/front_page/info/author.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 |
5 | 用户信息 6 |
用户名 13 | {name} 14 |
关注人数{follower}
提问{asks}
回答{answers}
专栏文章{posts}
公共编辑次数{logs}
被赞同{agree}
被收藏{collected}
被感谢{thanks}
被分享{shared}
54 | -------------------------------------------------------------------------------- /www/template/front_page/info/collection.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
5 | 收藏夹信息 6 |
评论数{comment}
关注人数{follower}
20 | -------------------------------------------------------------------------------- /www/template/front_page/info/column.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
5 | 专栏信息 6 |
创建者{creator_name}
文章总数{article}
关注人数{follower}
24 | -------------------------------------------------------------------------------- /www/template/front_page/info/question.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/question.html -------------------------------------------------------------------------------- /www/template/front_page/info/topic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
5 | 话题信息 6 |
关注人数{follower}
16 | -------------------------------------------------------------------------------- /www/template/info_page/article.html: -------------------------------------------------------------------------------- 1 |
2 |
文章来自专栏-{name}
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
专栏文章数{postsCount}
11 |
12 | -------------------------------------------------------------------------------- /www/template/info_page/author.html: -------------------------------------------------------------------------------- 1 |
2 |
{name}的知乎回答集锦
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
回答数{answer_count}
被关注数{follower_count}
累计收获赞同{voteup_count}
19 |
20 | -------------------------------------------------------------------------------- /www/template/info_page/book.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | {title} 4 |
5 | -------------------------------------------------------------------------------- /www/template/info_page/collection.html: -------------------------------------------------------------------------------- 1 |
2 |
收藏夹:{title}
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
答案数{answer_count}
关注人数{follower_count}
15 |
16 | -------------------------------------------------------------------------------- /www/template/info_page/column.html: -------------------------------------------------------------------------------- 1 |
2 |
专栏-{name}
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
文章数{postsCount}
11 |
12 | -------------------------------------------------------------------------------- /www/template/info_page/question.html: -------------------------------------------------------------------------------- 1 |
2 | 知乎问题-{title} 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
回答数{answer_count}
关注人数{follower_count}
评论数{comment_count}
19 |
20 | -------------------------------------------------------------------------------- /www/template/info_page/topic.html: -------------------------------------------------------------------------------- 1 |
2 |
话题:{name}
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
问题数{questions_count}
11 |
12 | -------------------------------------------------------------------------------- /zhihuHelp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 放置于首位 3 | import sys # 修改默认编码 4 | import os # 添加系统路径 5 | import traceback 6 | 7 | base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding)) 8 | sys.path.insert(0, base_path + u'/src/lib') # 添加基础库路径 使用insert方式,确保优先启用项目自带源码包 9 | sys.path.insert(0, base_path + u'/src/lib/oauth') # zhihu oauth 类需要作为默认类导入,否则无法运行 - - 10 | 11 | reload(sys) 12 | sys.setdefaultencoding('utf-8') 13 | 14 | # 执行主程序 15 | from src.main import ZhihuHelp 16 | 17 | try: 18 | helper = ZhihuHelp() 19 | helper.start() 20 | except Exception: 21 | traceback.print_exc() 22 | print u"助手发生异常,点击任意键退出" 23 | raw_input() 24 | pass 25 | --------------------------------------------------------------------------------