├── .gitattributes
├── .gitignore
├── ReadList_for_test.txt
├── db
└── zhihuhelp.sql
├── readme.md
├── src
├── __init__.py
├── command_parser.py
├── container
│ ├── __init__.py
│ ├── book.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── answer.py
│ │ ├── article.py
│ │ ├── author.py
│ │ ├── collection.py
│ │ ├── column.py
│ │ ├── question.py
│ │ └── topic.py
│ ├── image_container.py
│ ├── task.py
│ └── task_result.py
├── lib
│ ├── __init__.py
│ ├── epub
│ │ ├── __init__.py
│ │ ├── directory.py
│ │ ├── epub.py
│ │ ├── inf.py
│ │ ├── mime_type.py
│ │ ├── opf.py
│ │ ├── template
│ │ │ ├── META-INF
│ │ │ │ ├── container
│ │ │ │ │ └── container.xml
│ │ │ │ └── duokan_container
│ │ │ │ │ └── duokan-extension.xml
│ │ │ ├── OEBPS
│ │ │ │ ├── opf
│ │ │ │ │ ├── content.xml
│ │ │ │ │ ├── guide
│ │ │ │ │ │ └── item.xml
│ │ │ │ │ ├── manifest
│ │ │ │ │ │ └── item.xml
│ │ │ │ │ ├── metadata
│ │ │ │ │ │ ├── book_id.xml
│ │ │ │ │ │ ├── cover.xml
│ │ │ │ │ │ ├── creator.xml
│ │ │ │ │ │ ├── language.xml
│ │ │ │ │ │ └── title.xml
│ │ │ │ │ └── spine
│ │ │ │ │ │ ├── item.xml
│ │ │ │ │ │ └── item_nolinear.xml
│ │ │ │ └── toc
│ │ │ │ │ ├── content.xml
│ │ │ │ │ ├── docTitle
│ │ │ │ │ └── title.xml
│ │ │ │ │ ├── head
│ │ │ │ │ ├── depth.xml
│ │ │ │ │ └── uid.xml
│ │ │ │ │ └── navMap
│ │ │ │ │ └── item.xml
│ │ │ └── directory
│ │ │ │ ├── chapter.html
│ │ │ │ ├── content.html
│ │ │ │ ├── finish_chapter.html
│ │ │ │ ├── item_leaf.html
│ │ │ │ └── item_root.html
│ │ ├── toc.py
│ │ ├── tools
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── epub_config.py
│ │ │ └── epub_path.py
│ │ └── zhihuhelp_tools
│ │ │ ├── __init__.py
│ │ │ ├── debug.py
│ │ │ └── path.py
│ ├── oauth
│ │ ├── __init__.py
│ │ └── zhihu_oauth
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ ├── exception.py
│ │ │ ├── helpers.py
│ │ │ ├── oauth
│ │ │ ├── __init__.py
│ │ │ ├── before_login_auth.py
│ │ │ ├── im_android.py
│ │ │ ├── setting.py
│ │ │ ├── token.py
│ │ │ ├── utils.py
│ │ │ └── zhihu_oauth.py
│ │ │ ├── setting.py
│ │ │ ├── utils.py
│ │ │ └── zhcls
│ │ │ ├── __init__.py
│ │ │ ├── activity.py
│ │ │ ├── answer.py
│ │ │ ├── article.py
│ │ │ ├── base.py
│ │ │ ├── collection.py
│ │ │ ├── column.py
│ │ │ ├── comment.py
│ │ │ ├── generator.py
│ │ │ ├── live.py
│ │ │ ├── me.py
│ │ │ ├── message.py
│ │ │ ├── normal.py
│ │ │ ├── other.py
│ │ │ ├── people.py
│ │ │ ├── question.py
│ │ │ ├── streaming.py
│ │ │ ├── topic.py
│ │ │ ├── urls.py
│ │ │ ├── utils.py
│ │ │ └── whisper.py
│ └── requests
│ │ ├── __init__.py
│ │ ├── _internal_utils.py
│ │ ├── adapters.py
│ │ ├── api.py
│ │ ├── auth.py
│ │ ├── cacert.pem
│ │ ├── certs.py
│ │ ├── compat.py
│ │ ├── cookies.py
│ │ ├── exceptions.py
│ │ ├── hooks.py
│ │ ├── models.py
│ │ ├── packages
│ │ ├── README.rst
│ │ ├── __init__.py
│ │ ├── chardet
│ │ │ ├── __init__.py
│ │ │ ├── big5freq.py
│ │ │ ├── big5prober.py
│ │ │ ├── chardetect.py
│ │ │ ├── chardistribution.py
│ │ │ ├── charsetgroupprober.py
│ │ │ ├── charsetprober.py
│ │ │ ├── codingstatemachine.py
│ │ │ ├── compat.py
│ │ │ ├── constants.py
│ │ │ ├── cp949prober.py
│ │ │ ├── escprober.py
│ │ │ ├── escsm.py
│ │ │ ├── eucjpprober.py
│ │ │ ├── euckrfreq.py
│ │ │ ├── euckrprober.py
│ │ │ ├── euctwfreq.py
│ │ │ ├── euctwprober.py
│ │ │ ├── gb2312freq.py
│ │ │ ├── gb2312prober.py
│ │ │ ├── hebrewprober.py
│ │ │ ├── jisfreq.py
│ │ │ ├── jpcntx.py
│ │ │ ├── langbulgarianmodel.py
│ │ │ ├── langcyrillicmodel.py
│ │ │ ├── langgreekmodel.py
│ │ │ ├── langhebrewmodel.py
│ │ │ ├── langhungarianmodel.py
│ │ │ ├── langthaimodel.py
│ │ │ ├── latin1prober.py
│ │ │ ├── mbcharsetprober.py
│ │ │ ├── mbcsgroupprober.py
│ │ │ ├── mbcssm.py
│ │ │ ├── sbcharsetprober.py
│ │ │ ├── sbcsgroupprober.py
│ │ │ ├── sjisprober.py
│ │ │ ├── universaldetector.py
│ │ │ └── utf8prober.py
│ │ └── urllib3
│ │ │ ├── __init__.py
│ │ │ ├── _collections.py
│ │ │ ├── connection.py
│ │ │ ├── connectionpool.py
│ │ │ ├── contrib
│ │ │ ├── __init__.py
│ │ │ ├── appengine.py
│ │ │ ├── ntlmpool.py
│ │ │ ├── pyopenssl.py
│ │ │ └── socks.py
│ │ │ ├── exceptions.py
│ │ │ ├── fields.py
│ │ │ ├── filepost.py
│ │ │ ├── packages
│ │ │ ├── __init__.py
│ │ │ ├── backports
│ │ │ │ ├── __init__.py
│ │ │ │ └── makefile.py
│ │ │ ├── ordered_dict.py
│ │ │ ├── six.py
│ │ │ └── ssl_match_hostname
│ │ │ │ ├── .gitignore
│ │ │ │ ├── __init__.py
│ │ │ │ └── _implementation.py
│ │ │ ├── poolmanager.py
│ │ │ ├── request.py
│ │ │ ├── response.py
│ │ │ └── util
│ │ │ ├── __init__.py
│ │ │ ├── connection.py
│ │ │ ├── request.py
│ │ │ ├── response.py
│ │ │ ├── retry.py
│ │ │ ├── ssl_.py
│ │ │ ├── timeout.py
│ │ │ └── url.py
│ │ ├── sessions.py
│ │ ├── status_codes.py
│ │ ├── structures.py
│ │ └── utils.py
├── login.py
├── main.py
├── tools
│ ├── __init__.py
│ ├── config.py
│ ├── controler.py
│ ├── db.py
│ ├── debug.py
│ ├── extra_tools.py
│ ├── http.py
│ ├── match.py
│ ├── path.py
│ ├── template.py
│ ├── template_config.py
│ └── type.py
└── worker.py
├── unit
├── BS4
│ ├── content.html
│ └── parser.py
├── __init__.py
├── addressFile
│ ├── address_All
│ ├── answer
│ ├── article
│ ├── collection
│ ├── column
│ ├── people
│ ├── question
│ ├── table
│ └── topic
├── demo
│ ├── __init__.json
│ ├── activity.html
│ ├── answer.html
│ ├── article.html
│ ├── collection.html
│ ├── columns.html
│ ├── people.html
│ ├── question.html
│ ├── question_answer.html
│ ├── readme.md
│ └── topic.html
├── oauth_test.py
├── parser_unit.py
└── unit_html
│ ├── author.html
│ ├── author_info.html
│ ├── collection.html
│ ├── private_collection.html
│ ├── single_answer.html
│ ├── single_question.html
│ ├── topic.html
│ └── topic_info.html
├── update.md
├── www
├── __init__.py
├── css
│ ├── bootstrap.css
│ ├── customer.css
│ ├── markdown.css
│ └── normalize.css
├── image
│ ├── cover.jpg
│ └── kanshan.png
└── template
│ ├── __init__.py
│ ├── base.html
│ ├── content
│ └── question
│ │ ├── answer.html
│ │ └── question.html
│ ├── front_page
│ ├── base.html
│ └── info
│ │ ├── answer.html
│ │ ├── article.html
│ │ ├── author.html
│ │ ├── collection.html
│ │ ├── column.html
│ │ ├── question.html
│ │ └── topic.html
│ └── info_page
│ ├── article.html
│ ├── author.html
│ ├── book.html
│ ├── collection.html
│ ├── column.html
│ ├── question.html
│ └── topic.html
├── zhihuHelp.py
└── 知乎助手实现思路.md
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 | *.sln merge=union
7 | *.csproj merge=union
8 | *.vbproj merge=union
9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 |
12 | # Standard to msysgit
13 | *.doc diff=astextplain
14 | *.DOC diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot diff=astextplain
18 | *.DOT diff=astextplain
19 | *.pdf diff=astextplain
20 | *.PDF diff=astextplain
21 | *.rtf diff=astextplain
22 | *.RTF diff=astextplain
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /config.json
2 | /.idea/dataSources.local.xml
3 | /.idea/workspace.xml
4 | /.idea/ZhihuHelp__Python.iml
5 | #################
6 | ## Eclipse
7 | #################
8 |
9 | *.pydevproject
10 | .project
11 | .metadata
12 | bin/
13 | tmp/
14 | *.tmp
15 | *.bak
16 | *.swp
17 | *~.nib
18 | local.properties
19 | .classpath
20 | .settings/
21 | .loadpath
22 |
23 | # External tool builders
24 | .externalToolBuilders/
25 |
26 | # Locally stored "Eclipse launch configurations"
27 | *.launch
28 |
29 | # CDT-specific
30 | .cproject
31 |
32 | # PDT-specific
33 | .buildpath
34 |
35 |
36 | #################
37 | ## Visual Studio
38 | #################
39 |
40 | ## Ignore Visual Studio temporary files, build results, and
41 | ## files generated by popular Visual Studio add-ons.
42 |
43 | # User-specific files
44 | *.suo
45 | *.user
46 | *.sln.docstates
47 |
48 | # Build results
49 |
50 | [Dd]ebug/
51 | [Rr]elease/
52 | x64/
53 | build/
54 | [Bb]in/
55 | [Oo]bj/
56 |
57 | # MSTest test Results
58 | [Tt]est[Rr]esult*/
59 | [Bb]uild[Ll]og.*
60 |
61 | *_i.c
62 | *_p.c
63 | *.ilk
64 | *.meta
65 | *.obj
66 | *.pch
67 | *.pdb
68 | *.pgc
69 | *.pgd
70 | *.rsp
71 | *.sbr
72 | *.tlb
73 | *.tli
74 | *.tlh
75 | *.tmp_proj
76 | *.log
77 | *.vspscc
78 | *.vssscc
79 | .builds
80 | *.pidb
81 | *.scc
82 |
83 | # Visual C++ cache files
84 | ipch/
85 | *.aps
86 | *.ncb
87 | *.opensdf
88 | *.sdf
89 | *.cachefile
90 |
91 | # Visual Studio profiler
92 | *.psess
93 | *.vsp
94 | *.vspx
95 |
96 | # Guidance Automation Toolkit
97 | *.gpState
98 |
99 | # ReSharper is a .NET coding add-in
100 | _ReSharper*/
101 | *.[Rr]e[Ss]harper
102 |
103 | # TeamCity is a build add-in
104 | _TeamCity*
105 |
106 | # DotCover is a Code Coverage Tool
107 | *.dotCover
108 |
109 | # NCrunch
110 | *.ncrunch*
111 | .*crunch*.local.xml
112 |
113 | # Installshield output folder
114 | [Ee]xpress/
115 |
116 | # DocProject is a documentation generator add-in
117 | DocProject/buildhelp/
118 | DocProject/Help/*.HxT
119 | DocProject/Help/*.HxC
120 | DocProject/Help/*.hhc
121 | DocProject/Help/*.hhk
122 | DocProject/Help/*.hhp
123 | DocProject/Help/Html2
124 | DocProject/Help/html
125 |
126 | # Click-Once directory
127 | publish/
128 |
129 | # Publish Web Output
130 | *.Publish.xml
131 | *.pubxml
132 |
133 | # NuGet Packages Directory
134 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
135 | #packages/
136 |
137 | # Windows Azure Build Output
138 | csx
139 | *.build.csdef
140 |
141 | # Windows Store app package directory
142 | AppPackages/
143 |
144 | # Others
145 | sql/
146 | *.Cache
147 | ClientBin/
148 | [Ss]tyle[Cc]op.*
149 | ~$*
150 | *~
151 | *.dbmdl
152 | *.[Pp]ublish.xml
153 | *.pfx
154 | *.publishsettings
155 |
156 | # RIA/Silverlight projects
157 | Generated_Code/
158 |
159 | # Backup & report files from converting an old project file to a newer
160 | # Visual Studio version. Backup files are not needed, because we have git ;-)
161 | _UpgradeReport_Files/
162 | Backup*/
163 | UpgradeLog*.XML
164 | UpgradeLog*.htm
165 |
166 | # SQL Server files
167 | App_Data/*.mdf
168 | App_Data/*.ldf
169 |
170 | #############
171 | ## Windows detritus
172 | #############
173 |
174 | # Windows image file caches
175 | Thumbs.db
176 | ehthumbs.db
177 |
178 | # Folder config file
179 | Desktop.ini
180 |
181 | # Recycle Bin used on file shares
182 | $RECYCLE.BIN/
183 |
184 | # Mac crap
185 | .DS_Store
186 |
187 |
188 | #############
189 | ## Python
190 | #############
191 |
192 | *.py[co]
193 |
194 | # Packages
195 | *.egg
196 | *.egg-info
197 | dist/
198 | eggs/
199 | parts/
200 | var/
201 | sdist/
202 | develop-eggs/
203 | .idea/
204 | 知乎电子书临时资源库/
205 | 知乎助手生成的电子书/
206 | .installed.cfg
207 |
208 | # Installer logs
209 | pip-log.txt
210 |
211 | # Unit test / coverage reports
212 | .coverage
213 | .tox
214 |
215 | #Translations
216 | *.mo
217 |
218 | #Mr Developer
219 | .mr.developer.cfg
220 |
221 | #Mine
222 | ReadList.txt
223 | misc.xml
224 | *.db
225 | *.sqlite
226 | *.ini
227 | zhihuhelp1.7.0/我是登陆知乎时的验证码.gif
228 | .idea/workspace.xml
229 | 我是登陆知乎时的验证码.gif
230 | # 自动生成的token
231 | token.pkl
232 | *.token
233 | 知乎登录密钥_token_file.token
234 |
235 | *.jpg
236 |
237 | *.xml
238 |
239 | *.xhtml
240 |
241 | *.epub
242 |
243 | *.opf
244 |
245 | *.ncx
246 |
247 | *.html
248 |
249 | *.css
250 |
--------------------------------------------------------------------------------
/ReadList_for_test.txt:
--------------------------------------------------------------------------------
1 | https://www.zhihu.com/people/404-Page-Not-found$https://www.zhihu.com/people/meng-qing-xue-94$https://www.zhihu.com/people/ying-ye-78 #用户答案收集测试-姚泽源-孟晴雪-影夜
2 | https://www.zhihu.com/topic/19560104 #话题-青岛
3 | https://www.zhihu.com/collection/19726342 #收藏夹-工作
4 | https://www.zhihu.com/collection/34451960 #孟晴雪-私人收藏夹
5 | http://zhuanlan.zhihu.com/patisserie #专栏-甘党万歳
6 | http://zhuanlan.zhihu.com/jiageng/20084431 #单篇专栏测试 - 一只吐泡泡的扇贝
7 | https://www.zhihu.com/question/19611675/answer/29365393?from=profile_answer_card # 单个答案测试-青岛旅游攻略
8 | https://www.zhihu.com/question/22719537 # 单个问题测试-如何保存某位知乎用户的所有答案?
9 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/__init__.py
--------------------------------------------------------------------------------
/src/command_parser.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from src.container.task import QuestionTask, AnswerTask, AuthorTask, CollectionTask, TopicTask, \
3 | ArticleTask, ColumnTask
4 | from src.tools.debug import Debug
5 | from src.tools.match import Match
6 | from src.tools.type import Type
7 |
8 |
9 | class CommandParser(object):
10 | u"""
11 | 通过Parser类,生成任务列表,以task容器列表的形式返回回去
12 | """
13 |
14 | @staticmethod
15 | def get_task_list(command):
16 | u"""
17 | 解析指令类型
18 | """
19 | command = command \
20 | .replace(' ', '') \
21 | .replace('\r', '') \
22 | .replace('\n', '') \
23 | .replace('\t', '') \
24 | .split('#')[0]
25 | command_list = command.split('$')
26 |
27 | task_list = []
28 | for command in command_list:
29 | task = CommandParser.parse_command(command)
30 | if not task:
31 | continue
32 | task_list.append(task)
33 | return task_list
34 |
35 | @staticmethod
36 | def detect(command):
37 | for command_type in [
38 | Type.answer, Type.question,
39 | Type.author, Type.collection, Type.topic,
40 | Type.article, Type.column, # 文章必须放在专栏之前(否则检测类别的时候就一律检测为专栏了)
41 | ]:
42 | result = getattr(Match, command_type)(command)
43 | if result:
44 | return command_type
45 | return Type.unknown
46 |
47 | @staticmethod
48 | def parse_command(raw_command=''):
49 | u"""
50 | 分析单条命令并返回待完成的task
51 | """
52 | parser = {
53 | Type.author: CommandParser.parse_author,
54 | Type.answer: CommandParser.parse_answer,
55 | Type.question: CommandParser.parse_question,
56 | Type.collection: CommandParser.parse_collection,
57 | Type.topic: CommandParser.parse_topic,
58 | Type.article: CommandParser.parse_article,
59 | Type.column: CommandParser.parse_column,
60 | Type.unknown: CommandParser.parse_error,
61 | }
62 | kind = CommandParser.detect(raw_command)
63 | return parser[kind](raw_command)
64 |
65 | @staticmethod
66 | def parse_question(command):
67 | result = Match.question(command)
68 | question_id = result.group(u'question_id')
69 | task = QuestionTask(question_id)
70 | return task
71 |
72 | @staticmethod
73 | def parse_answer(command):
74 | result = Match.answer(command)
75 | question_id = result.group(u'question_id')
76 | answer_id = result.group(u'answer_id')
77 | task = AnswerTask(question_id, answer_id)
78 | return task
79 |
80 | @staticmethod
81 | def parse_author(command):
82 | result = Match.author(command)
83 | author_page_id = result.group(u'author_page_id')
84 | task = AuthorTask(author_page_id)
85 | return task
86 |
87 | @staticmethod
88 | def parse_collection(command):
89 | result = Match.collection(command)
90 | collection_id = result.group(u'collection_id')
91 | task = CollectionTask(collection_id)
92 | return task
93 |
94 | @staticmethod
95 | def parse_topic(command):
96 | result = Match.topic(command)
97 | topic_id = result.group(u'topic_id')
98 | task = TopicTask(topic_id)
99 | return task
100 |
101 | @staticmethod
102 | def parse_article(command):
103 | result = Match.article(command)
104 | column_id = result.group(u'column_id')
105 | article_id = result.group(u'article_id')
106 | task = ArticleTask(column_id, article_id)
107 | return task
108 |
109 | @staticmethod
110 | def parse_column(command):
111 | result = Match.column(command)
112 | column_id = result.group(u'column_id')
113 | task = ColumnTask(column_id)
114 | return task
115 |
116 | @staticmethod
117 | def parse_error(command):
118 | if command:
119 | Debug.logger.info(u"""无法解析记录:{}所属网址类型,请检查后重试。""".format(command))
120 | return
121 |
--------------------------------------------------------------------------------
/src/container/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/src/container/data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # 用于储存数据信息
--------------------------------------------------------------------------------
/src/container/data/answer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 |
4 | from src.tools.config import Config
5 | from src.tools.match import Match
6 | from src.tools.path import Path
7 |
8 |
9 | class Answer(object):
10 | def __init__(self, data):
11 | self.answer_id = data['answer_id']
12 | self.question_id = data['question_id']
13 | self.author_id = data['author_id']
14 | self.author_name = data['author_name']
15 | self.author_headline = data['author_headline']
16 | self.author_avatar_url = data['author_avatar_url']
17 | self.author_gender = data['author_gender']
18 | self.comment_count = data['comment_count']
19 | self.content = data['content']
20 | self.created_time = data['created_time']
21 | self.updated_time = data['updated_time']
22 | self.is_copyable = data['is_copyable']
23 | self.thanks_count = data['thanks_count']
24 | self.voteup_count = data['voteup_count']
25 |
26 | self.total_img_size_kb = 0 # 文件大小(只统计图片大小,包括答案内图片和答主头像,单位kb)
27 | self.img_filename_list = []
28 | return
29 |
30 | def download_img(self):
31 | from src.container.image_container import ImageContainer
32 | img_container = ImageContainer()
33 | img_src_dict = Match.match_img_with_src_dict(self.content)
34 | self.img_filename_list = []
35 | for img in img_src_dict:
36 | src = img_src_dict[img]
37 | filename = img_container.add(src)
38 | self.img_filename_list.append(filename)
39 | self.content = self.content.replace(img, Match.create_img_element_with_file_name(filename))
40 |
41 | # 答案作者的头像也要下载
42 | filename = img_container.add(self.author_avatar_url)
43 | self.img_filename_list.append(filename)
44 | self.author_avatar_url = Match.create_local_img_src(filename)
45 |
46 | img_container.start_download()
47 |
48 | # 下载完成后,更新图片大小
49 | for filename in self.img_filename_list:
50 | self.total_img_size_kb += Path.get_img_size_by_filename_kb(filename)
51 | return
--------------------------------------------------------------------------------
/src/container/data/article.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from src.container.data.author import Author
3 | from src.tools.db import DB
4 | from src.tools.match import Match
5 | from src.tools.path import Path
6 |
7 |
8 | class Article(object):
9 | u"""
10 | 文章容器
11 | """
12 | def __init__(self, data):
13 | self.article_id = data['article_id']
14 | self.title = data['title']
15 | self.updated_time = data['updated_time']
16 | self.voteup_count = data['voteup_count']
17 | self.image_url = data['image_url']
18 | self.column_id = data['column_id']
19 | self.content = data['content']
20 | self.comment_count = data['comment_count']
21 | self.author_id = data['author_id']
22 | self.author_name = data['author_name']
23 | self.author_headline = data['author_headline']
24 | self.author_avatar_url = data['author_avatar_url']
25 | self.author_gender = data['author_gender']
26 |
27 | self.total_img_size_kb = 0
28 | self.img_filename_list = []
29 | return
30 |
31 | def download_img(self):
32 | from src.container.image_container import ImageContainer
33 | img_container = ImageContainer()
34 | img_src_dict = Match.match_img_with_src_dict(self.content)
35 | self.img_filename_list = []
36 | for img in img_src_dict:
37 | src = img_src_dict[img]
38 | filename = img_container.add(src)
39 | self.img_filename_list.append(filename)
40 | self.content = self.content.replace(img, Match.create_img_element_with_file_name(filename))
41 |
42 | # 下载文章封面图像
43 | filename = img_container.add(self.image_url)
44 | self.img_filename_list.append(filename)
45 | self.image_url = Match.create_local_img_src(filename)
46 |
47 | # 下载用户头像
48 | filename = img_container.add(self.author_avatar_url)
49 | self.img_filename_list.append(filename)
50 | self.author_avatar_url = Match.create_local_img_src(filename)
51 |
52 | img_container.start_download()
53 |
54 | # 下载完成后,更新图片大小
55 | for filename in self.img_filename_list:
56 | self.total_img_size_kb += Path.get_img_size_by_filename_kb(filename)
57 | return
--------------------------------------------------------------------------------
/src/container/data/author.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | class Author(object):
5 | def __init__(self, data):
6 | self.author_id = data['author_id']
7 | self.author_page_id = data['author_page_id']
8 | self.answer_count = data['answer_count']
9 | self.articles_count = data['articles_count']
10 | self.avatar_url = data['avatar_url']
11 | self.columns_count = data['columns_count']
12 | self.description = data['description']
13 | self.favorite_count = data['favorite_count']
14 | self.favorited_count = data['favorited_count']
15 | self.follower_count = data['follower_count']
16 | self.following_columns_count = data['following_columns_count']
17 | self.following_count = data['following_count']
18 | self.following_question_count = data['following_question_count']
19 | self.following_topic_count = data['following_topic_count']
20 | self.gender = data['gender']
21 | self.headline = data['headline']
22 | self.name = data['name']
23 | self.question_count = data['question_count']
24 | self.shared_count = data['shared_count']
25 | self.is_bind_sina = data['is_bind_sina']
26 | self.thanked_count = data['thanked_count']
27 | self.sina_weibo_name = data['sina_weibo_name']
28 | self.sina_weibo_url = data['sina_weibo_url']
29 | self.voteup_count = data['voteup_count']
30 | return
--------------------------------------------------------------------------------
/src/container/data/collection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | class Collection(object):
5 | def __init__(self, data):
6 | self.collection_id = data['collection_id']
7 | self.answer_count = data['answer_count']
8 | self.comment_count = data['comment_count']
9 | self.created_time = data['created_time']
10 | self.follower_count = data['follower_count']
11 | self.description = data['description']
12 | self.title = data['title']
13 | self.updated_time = data['updated_time']
14 | self.creator_id = data['creator_id']
15 | self.creator_name = data['creator_name']
16 | self.creator_headline = data['creator_headline']
17 | self.creator_avatar_url = data['creator_avatar_url']
18 | self.collected_answer_id_list = data['collected_answer_id_list']
19 | return
--------------------------------------------------------------------------------
/src/container/data/column.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | class Column(object):
5 | def __init__(self, data):
6 | self.column_id = data['column_id']
7 | self.title = data['title']
8 | self.article_count = data['article_count']
9 | self.follower_count = data['follower_count']
10 | self.description = data['description']
11 | self.image_url = data['image_url']
12 | return
13 |
--------------------------------------------------------------------------------
/src/container/data/question.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | class Question(object):
5 | def __init__(self, data):
6 | self.question_id = data['question_id']
7 | self.answer_count = data['answer_count']
8 | self.comment_count = data['comment_count']
9 | self.follower_count = data['follower_count']
10 | self.title = data['title']
11 | self.detail = data['detail']
12 | self.updated_time = data['updated_time']
13 | return
--------------------------------------------------------------------------------
/src/container/data/topic.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | class Topic(object):
5 | def __init__(self, data):
6 | self.topic_id = data['topic_id']
7 | self.avatar_url = data['avatar_url']
8 | self.best_answerers_count = data['best_answerers_count']
9 | self.best_answers_count = data['best_answers_count']
10 | self.excerpt = data['excerpt']
11 | self.followers_count = data['followers_count']
12 | self.introduction = data['introduction']
13 | self.name = data['name']
14 | self.questions_count = data['questions_count']
15 | self.unanswered_count = data['unanswered_count']
16 | self.best_answer_id_list = data['best_answer_id_list']
17 | return
--------------------------------------------------------------------------------
/src/container/image_container.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import hashlib
3 | import os.path
4 |
5 | from src.tools.config import Config
6 | from src.tools.controler import Control
7 | from src.tools.debug import Debug
8 | from src.tools.extra_tools import ExtraTools
9 | from src.tools.http import Http
10 | from src.tools.match import Match
11 | from src.tools.path import Path
12 |
13 |
14 | class ImageContainer(object):
15 | def __init__(self, save_path=''):
16 | if len(save_path) == 0:
17 | save_path = Path.image_pool_path
18 | self.save_path = save_path
19 | self.container = {}
20 | self.md5 = hashlib.md5()
21 | return
22 |
23 | def set_save_path(self, save_path):
24 | self.save_path = save_path
25 | return
26 |
27 | def add(self, href):
28 | """
29 | :param href: 图片地址
30 | :return:
31 | """
32 | self.container[href] = self.create_image(href)
33 | return self.get_filename(href)
34 |
35 | def delete(self, href):
36 | del self.container[href]
37 | return
38 |
39 | def get_filename(self, href):
40 | image = self.container.get(href)
41 | if image:
42 | return image['filename']
43 | return ''
44 |
45 | def get_filename_list(self):
46 | return self.container.values()
47 |
48 | def download(self, index):
49 | image = self.container[index]
50 | filename = image['filename']
51 | href = image['href']
52 | # 下载图片时自动把https换成http,以便加速图片下载过程
53 | href = href.replace('https://', 'http://')
54 |
55 | if os.path.isfile(self.save_path + '/' + filename):
56 | return
57 | Debug.print_in_single_line(u'开始下载图片{}'.format(href))
58 | if href:
59 | content = Http.get_content(url=href, timeout=Config.timeout_download_picture)
60 | if not content:
61 | Debug.logger.debug(u'图片『{}』下载失败'.format(href))
62 | content = ''
63 | else:
64 | Debug.print_in_single_line(u'图片{}下载完成'.format(href))
65 | else:
66 | # 当下载地址为空的时候,就没必要再去下载了
67 | content = ''
68 | with open(self.save_path + '/' + filename, 'wb') as image:
69 | image.write(content)
70 | return
71 |
72 | def start_download(self):
73 | argv = {'func': self.download, # 所有待存入数据库中的数据都应当是list
74 | 'iterable': self.container, }
75 | Control.control_center(argv, self.container)
76 | return
77 |
78 | def create_image(self, href):
79 | # 在这里,根据图片配置对文件类别进行统一处理
80 | href = self.transfer_img_href_by_config_quality(href)
81 | image = {'filename': self.create_filename(href), 'href': href}
82 | return image
83 |
84 | def transfer_img_href_by_config_quality(self, raw_href):
85 | href = Match.generate_img_src(raw_href, Config.picture_quality)
86 | if href is None:
87 | href = raw_href
88 | return href
89 |
90 | def create_filename(self, href):
91 | filename = ExtraTools.md5(href) + '.jpg'
92 | return filename
93 |
--------------------------------------------------------------------------------
/src/container/task.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from src.tools.type import Type
3 |
4 |
5 | class Task(object):
6 | def __init__(self, task_type):
7 | self.task_type = task_type
8 | return
9 |
10 | def get_task_type(self):
11 | return self.task_type
12 |
13 |
14 | class AuthorTask(Task):
15 | def __init__(self, author_page_id):
16 | Task.__init__(self, Type.author)
17 | self.author_page_id = author_page_id
18 | return
19 |
20 |
21 | class TopicTask(Task):
22 | def __init__(self, topic_id):
23 | Task.__init__(self, Type.topic)
24 | self.topic_id = int(topic_id)
25 | return
26 |
27 |
28 | class CollectionTask(Task):
29 | def __init__(self, collection_id):
30 | Task.__init__(self, Type.collection)
31 | self.collection_id = int(collection_id)
32 | return
33 |
34 |
35 | class QuestionTask(Task):
36 | def __init__(self, question_id):
37 | Task.__init__(self, Type.question)
38 | self.question_id = int(question_id)
39 | return
40 |
41 |
42 | class AnswerTask(Task):
43 | def __init__(self, question_id, answer_id):
44 | Task.__init__(self, Type.answer)
45 | self.question_id = int(question_id)
46 | self.answer_id = int(answer_id)
47 | return
48 |
49 |
50 | class ColumnTask(Task):
51 | def __init__(self, column_id):
52 | Task.__init__(self, Type.column)
53 | self.column_id = column_id
54 | return
55 |
56 |
57 | class ArticleTask(Task):
58 | def __init__(self, column_id, article_id):
59 | Task.__init__(self, Type.article)
60 | self.column_id = column_id
61 | self.article_id = int(article_id)
62 | return
63 |
--------------------------------------------------------------------------------
/src/lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/src/lib/epub/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/src/lib/epub/directory.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .zhihuhelp_tools.path import Path
3 | from .tools.base import Base
4 |
5 |
6 | class Directory(Base):
7 | def __init__(self):
8 | Base.__init__(self)
9 | self.chapter_deep = 0
10 | return
11 |
12 | def add_html(self, src, title):
13 | template = self.get_template('directory', 'item_leaf')
14 | self.content += template.format(href=Path.get_filename(src), title=title)
15 | return
16 |
17 | def create_chapter(self, src, title):
18 | template = self.get_template('directory', 'item_root')
19 | item = template.format(href=Path.get_filename(src), title=title)
20 | if self.chapter_deep == 0:
21 | template = self.get_template('directory', 'chapter')
22 | item = template.format(item=item, title=u'目录')
23 | self.content += item
24 |
25 | self.chapter_deep += 1
26 | return
27 |
28 | def finish_chapter(self):
29 | if self.chapter_deep == 1:
30 | template = self.get_template('directory', 'finish_chapter')
31 | self.content += template
32 |
33 | self.chapter_deep -= 1
34 | return
35 |
36 | def get_content(self):
37 | template = self.get_template('directory', 'content')
38 | return template.format(content=self.content)
39 |
--------------------------------------------------------------------------------
/src/lib/epub/inf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .tools.epub_config import EpubConfig
3 | from .tools.epub_path import EpubPath
4 |
5 | from .zhihuhelp_tools.path import Path
6 |
7 |
8 | class INF(object):
9 | def __init__(self):
10 | return
11 |
12 | @staticmethod
13 | def add_container():
14 | Path.copy(EpubConfig.container_uri, EpubPath.meta_inf_path)
15 | return
16 |
17 | @staticmethod
18 | def add_duokan_ext():
19 | Path.copy(EpubConfig.duokan_container_uri, EpubPath.meta_inf_path)
20 | return
21 |
--------------------------------------------------------------------------------
/src/lib/epub/mime_type.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .tools.epub_path import EpubPath
3 |
4 |
5 | class MimeType(object):
6 | def __init__(self):
7 | self.content = u'application/epub+zip'
8 | return
9 |
10 | def create(self):
11 | with open(EpubPath.work_path + '/mimetype', 'w') as mimetype:
12 | mimetype.write(self.content)
13 | return
14 |
--------------------------------------------------------------------------------
/src/lib/epub/template/META-INF/container/container.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/src/lib/epub/template/META-INF/duokan_container/duokan-extension.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/content.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | {metadata}
7 |
8 |
9 |
10 | {manifest}
11 |
12 |
13 | {spine}
14 |
15 |
16 | {guide}
17 |
18 |
19 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/guide/item.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/manifest/item.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/book_id.xml:
--------------------------------------------------------------------------------
1 | {uid}
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/cover.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/creator.xml:
--------------------------------------------------------------------------------
1 | {creator}
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/language.xml:
--------------------------------------------------------------------------------
1 | {language}
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/title.xml:
--------------------------------------------------------------------------------
1 | {title}
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/spine/item.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/spine/item_nolinear.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/content.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 | {head}
7 |
8 |
9 |
10 |
11 | {doc_title}
12 |
13 |
14 | {nav_point}
15 |
16 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/docTitle/title.xml:
--------------------------------------------------------------------------------
1 |
2 | {title}
3 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/head/depth.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/head/uid.xml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/navMap/item.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | {title}
4 |
5 |
6 |
7 |
8 | {extend_nav_point}
9 |
10 |
--------------------------------------------------------------------------------
/src/lib/epub/template/directory/chapter.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
{item}
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/src/lib/epub/template/directory/content.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
目录
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | {content}
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/src/lib/epub/template/directory/finish_chapter.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/src/lib/epub/template/directory/item_leaf.html:
--------------------------------------------------------------------------------
1 | {title}
2 |
--------------------------------------------------------------------------------
/src/lib/epub/template/directory/item_root.html:
--------------------------------------------------------------------------------
1 | {title}
2 |
--------------------------------------------------------------------------------
/src/lib/epub/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/src/lib/epub/tools/base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .epub_config import EpubConfig
3 |
4 |
5 | class Base(object):
6 | def __init__(self):
7 | self.content = ''
8 | return
9 |
10 | def get_template(self, template_kind, template_name):
11 | template_uri = '{}_{}_uri'.format(template_kind, template_name)
12 | with open(getattr(EpubConfig, template_uri)) as template:
13 | content = template.read()
14 | return content
15 |
16 | def get_content(self):
17 | return self.content
18 |
--------------------------------------------------------------------------------
/src/lib/epub/tools/epub_config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .epub_path import EpubPath
3 |
4 |
5 | class EpubConfig(object):
6 | u"""
7 | 用于记录epub创建过程中所需用到的常量
8 | 比如,常见模板的实际路径
9 | """
10 | base_path = EpubPath.base_path + u'/template'
11 | # META-INF
12 | container_uri = base_path + u'/META-INF/container/container.xml'
13 | duokan_container_uri = base_path + u'/META-INF/duokan_container/duokan-extension.xml'
14 |
15 | # OEBPS
16 |
17 | ## OPF
18 | opf_content_uri = base_path + u'/OEBPS/opf/content.xml'
19 |
20 | ### guide
21 | guide_item_uri = base_path + u'/OEBPS/opf/guide/item.xml'
22 |
23 | ### metadata
24 | metadata_cover_uri = base_path + u'/OEBPS/opf/metadata/cover.xml'
25 | metadata_creator_uri = base_path + u'/OEBPS/opf/metadata/creator.xml'
26 | metadata_book_id_uri = base_path + u'/OEBPS/opf/metadata/book_id.xml'
27 | metadata_title_uri = base_path + u'/OEBPS/opf/metadata/title.xml'
28 | metadata_language_uri = base_path + u'/OEBPS/opf/metadata/language.xml'
29 |
30 | ### manifest
31 | manifest_item_uri = base_path + u'/OEBPS/opf/manifest/item.xml'
32 |
33 | ### spine
34 | spine_item_uri = base_path + u'/OEBPS/opf/spine/item.xml'
35 | spine_item_nolinear_uri = base_path + u'/OEBPS/opf/spine/item_nolinear.xml'
36 |
37 |
38 | ## TOC
39 | toc_content_uri = base_path + u'/OEBPS/toc/content.xml'
40 | ###head
41 | head_uid_uri = base_path + u'/OEBPS/toc/head/uid.xml'
42 | head_depth_uri = base_path + u'/OEBPS/toc/head/depth.xml'
43 |
44 | # doc_title
45 | doc_title_title_uri = base_path + u'/OEBPS/toc/docTitle/title.xml'
46 |
47 | ### ncx
48 | ncx_item_uri = base_path + u'/OEBPS/toc/navMap/item.xml'
49 |
50 | # Directory
51 | directory_item_root_uri = base_path + u'/directory/item_root.html'
52 | directory_item_leaf_uri = base_path + u'/directory/item_leaf.html'
53 | directory_chapter_uri = base_path + u'/directory/chapter.html'
54 | directory_finish_chapter_uri = base_path + u'/directory/finish_chapter.html'
55 | directory_content_uri = base_path + u'/directory/content.html'
56 |
57 | # Default
58 | book_id = u'create_by_yaozeyuan'
59 | book_title = u'no_title'
60 | creator = u'zhihuhelp'
61 | uid = u'urn:uuid:create-by-yao-ze-yuan-Tsingtao'
62 | identifier = u''
63 | language = u'zh'
64 |
--------------------------------------------------------------------------------
/src/lib/epub/tools/epub_path.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import sys
4 | from ..zhihuhelp_tools.path import Path
5 |
6 | class EpubPath(object):
7 | file_path = os.path.realpath(__file__)
8 | base_path = os.path.dirname(file_path)
9 | base_path = unicode(os.path.dirname(base_path).decode(sys.stdout.encoding)) # 库文件位置
10 |
11 | work_path = base_path # 默认以库位置作为初始工作地址
12 | output_path = os.path.dirname(work_path) # 默认以工作目录的上一级为输出目录
13 | meta_inf_path = work_path + u'/META-INF'
14 | oebps_path = work_path + u'/OEBPS'
15 | image_path = work_path + u'/images'
16 | html_path = oebps_path + u'/html'
17 | style_path = oebps_path + u'/style'
18 |
19 | @staticmethod
20 | def set_work_path(work_path):
21 | EpubPath.work_path = work_path
22 | EpubPath.meta_inf_path = EpubPath.work_path + u'/META-INF'
23 | EpubPath.oebps_path = EpubPath.work_path + u'/OEBPS'
24 | EpubPath.image_path = EpubPath.oebps_path + u'/images'
25 | EpubPath.html_path = EpubPath.oebps_path + u'/html'
26 | EpubPath.style_path = EpubPath.oebps_path + u'/style'
27 | return
28 |
29 | @staticmethod
30 | def set_output_path(output_path):
31 | EpubPath.output_path = output_path
32 | return
33 |
34 | @staticmethod
35 | def init_epub_path(work_path):
36 | """
37 | 设置工作地址,根据该路径进行创建文件夹,生成epub,压缩等操作
38 | """
39 | EpubPath.set_work_path(work_path)
40 | Path.mkdir(EpubPath.meta_inf_path)
41 | Path.mkdir(EpubPath.oebps_path)
42 | Path.chdir(EpubPath.oebps_path)
43 | Path.mkdir(EpubPath.html_path)
44 | Path.mkdir(EpubPath.image_path)
45 | Path.mkdir(EpubPath.style_path)
46 | return
47 |
48 | @staticmethod
49 | def reset_path():
50 | Path.chdir(EpubPath.work_path)
51 | return
52 |
--------------------------------------------------------------------------------
/src/lib/epub/zhihuhelp_tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __version__ = '2015-12-19'
3 |
--------------------------------------------------------------------------------
/src/lib/epub/zhihuhelp_tools/debug.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import logging
3 | import logging.handlers
4 | import sys
5 |
6 |
7 | class Debug(object):
8 | u"""
9 | 打印日志
10 | """
11 | handler = logging.StreamHandler() # 实例化handler
12 | fmt = '%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(message)s'
13 |
14 | formatter = logging.Formatter(fmt) # 实例化formatter
15 | handler.setFormatter(formatter) # 为handler添加formatter
16 |
17 | logger = logging.getLogger('main') # 获取名为main的logger
18 | logger.addHandler(handler) # 为logger添加handler
19 | logger.setLevel(logging.INFO) # 发布时关闭log输出
20 |
21 | # 辅助函数
22 | @staticmethod
23 | def print_in_single_line(text=''):
24 | try:
25 | sys.stdout.write("\r" + " " * 60 + '\r')
26 | sys.stdout.flush()
27 | sys.stdout.write(text)
28 | sys.stdout.flush()
29 | except:
30 | pass
31 | return
32 |
33 | @staticmethod
34 | def print_dict(data={}, key='', prefix=''):
35 | try:
36 | if isinstance(data, dict):
37 | for key in data:
38 | Debug.print_dict(data[key], key, prefix + ' ')
39 | else:
40 | if isinstance(data, basestring):
41 | print prefix + unicode(key) + ' => ' + data
42 | else:
43 | print prefix + unicode(key) + ' => ' + unicode(data)
44 | except UnicodeEncodeError as error:
45 | Debug.logger.info(u'编码异常')
46 | Debug.logger.info(u'系统默认编码为:' + sys.getdefaultencoding())
47 | # raise error
48 | return
49 |
--------------------------------------------------------------------------------
/src/lib/epub/zhihuhelp_tools/path.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import shutil
4 | import locale
5 |
6 |
7 | class Path(object):
8 | base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding())) # 初始地址,不含分隔符
9 |
10 | config_path = base_path + u'/config.json'
11 |
12 | www_css = base_path + u'/www/css'
13 | www_image = base_path + u'/www/images'
14 |
15 | html_pool_path = base_path + u'/知乎电子书临时资源库/知乎网页池'
16 | image_pool_path = base_path + u'/知乎电子书临时资源库/知乎图片池'
17 | result_path = base_path + u'./知乎助手生成的电子书'
18 |
19 | @staticmethod
20 | def reset_path():
21 | Path.chdir(Path.base_path)
22 | return
23 |
24 | @staticmethod
25 | def pwd():
26 | print os.path.realpath('.')
27 | return
28 |
29 | @staticmethod
30 | def get_pwd():
31 | path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
32 | return path
33 |
34 | @staticmethod
35 | def mkdir(path):
36 | try:
37 | os.mkdir(path)
38 | except OSError:
39 | # Debug.logger.debug(u'指定目录已存在')
40 | pass
41 | return
42 |
43 | @staticmethod
44 | def chdir(path):
45 | try:
46 | os.chdir(path)
47 | except OSError:
48 | # Debug.logger.debug(u'指定目录不存在,自动创建之')
49 | Path.mkdir(path)
50 | os.chdir(path)
51 | return
52 |
53 | @staticmethod
54 | def rmdir(path):
55 | if path:
56 | shutil.rmtree(path, ignore_errors=True)
57 | return
58 |
59 | @staticmethod
60 | def copy(src, dst):
61 | if not os.path.exists(src):
62 | # Debug.logger.info('{}不存在,自动跳过'.format(src))
63 | return
64 | if os.path.isdir(src):
65 | shutil.copytree(src, dst)
66 | else:
67 | shutil.copy(src=src, dst=dst)
68 | return
69 |
70 | @staticmethod
71 | def get_filename(src):
72 | return os.path.basename(src)
73 |
74 | @staticmethod
75 | def init_base_path():
76 | Path.base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
77 | Path.config_path = Path.base_path + u'/config.json'
78 | Path.sql_path = Path.base_path + u'/db/zhihuhelp.sql'
79 |
80 | Path.www_css = Path.base_path + u'/www/css'
81 | Path.www_image = Path.base_path + u'/www/images'
82 |
83 | Path.html_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎网页池'
84 | Path.image_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎图片池'
85 | Path.result_path = Path.base_path + u'./知乎助手生成的电子书'
86 |
87 | return
88 |
89 | @staticmethod
90 | def init_work_directory():
91 | Path.reset_path()
92 | Path.mkdir(u'./知乎助手生成的电子书')
93 | Path.mkdir(u'./知乎电子书临时资源库')
94 | Path.chdir(u'./知乎电子书临时资源库')
95 | Path.mkdir(u'./知乎网页池')
96 | Path.mkdir(u'./知乎图片池')
97 | Path.reset_path()
98 | return
99 |
100 | @staticmethod
101 | def is_file(path):
102 | return os.path.isfile(path)
103 |
--------------------------------------------------------------------------------
/src/lib/oauth/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from .client import ZhihuClient
4 | from .exception import (
5 | ZhihuWarning, IgnoreErrorDataWarning, CantGetTicketsWarning,
6 | ZhihuException, UnexpectedResponseException, GetDataErrorException,
7 | NeedCaptchaException, NeedLoginException, IdMustBeIntException,
8 | UnimplementedException,
9 | )
10 | from .helpers import shield, SHIELD_ACTION
11 | from .zhcls import (
12 | Activity, ActType, Answer, Article, Comment, Collection, Column, Comment,
13 | Live, LiveBadge, LiveTag, LiveTicket,
14 | Me, Message, People, Question, Topic, Whisper, ANONYMOUS
15 | )
16 |
17 | __all__ = ['ZhihuClient', 'ANONYMOUS', 'Activity', 'Answer', 'ActType',
18 | 'Article', 'Collection', 'Column', 'Comment',
19 | 'Live', 'LiveBadge', 'LiveTag', 'LiveTicket',
20 | 'Me', 'Message',
21 | 'People', 'Question', 'Topic', 'Whisper',
22 | 'ZhihuException', 'ZhihuWarning',
23 | 'NeedCaptchaException', 'UnexpectedResponseException',
24 | 'GetDataErrorException',
25 | 'SHIELD_ACTION', 'shield']
26 |
27 | __version__ = '0.0.30.post1'
28 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/helpers.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import warnings
6 |
7 | import requests.packages.urllib3 as urllib3
8 |
9 | from .zhcls.utils import SimpleEnum
10 | from .zhcls.generator import BaseGenerator, ActivityGenerator
11 | from .exception import ZhihuException, ZhihuWarning
12 |
13 |
14 | __all__ = ['SHIELD_ACTION', 'shield']
15 |
16 |
17 | SHIELD_ACTION = SimpleEnum(
18 | ['EXCEPTION', 'PASS', 'STOP']
19 | )
20 | """
21 | ActType 是用于表示 shield 抵挡 Exception 达到最大次数后的动作的枚举类,取值如下:
22 |
23 | ================= ====================
24 | 常量名 说明
25 | ================= ====================
26 | EXCEPTION 抛出异常
27 | PASS 跳过,获取下一个数据
28 | STOP 结束处理
29 | ================= ====================
30 | """
31 |
32 |
33 | def shield(inner, durability=3, start_at=0, action=SHIELD_ACTION.EXCEPTION):
34 | """
35 | shield 函数用于自动处理知乎的各种生成器
36 | (如 :any:`People.followers`, :any:`Question.answers`) 在获取分页数据时出错的情况。
37 |
38 | .. warning:: 用户动态的生成器因为获取方式比较特殊,无法被 shield 保护
39 |
40 | 用法:
41 |
42 | 比如我们想获取关注了某个专栏的用户分别关注了哪些话题……
43 |
44 | .. code-block:: python
45 |
46 | column = client.column('zijingnotes')
47 | result = []
48 | for user in shield(column.followers, action=SHIELD_ACTION.PASS):
49 | L = []
50 | print("Start proc user", user.name)
51 | if user.over:
52 | print(user.over_reason)
53 | continue
54 | for topic in shield(user.following_topics, action=SHIELD_ACTION.PASS):
55 | print("Add topic", topic.name)
56 | L.append(topic.name)
57 | result.append(L)
58 |
59 | # output result
60 |
61 | :param inner: 需要被保护的生成器
62 | :param int durability: 耐久度,表示获取同一数据最多连续出错几次
63 | :param int start_at: 从第几个数据开始获取
64 | :param action: 当耐久度消耗完后的动作,参见 :any:`SHIELD_ACTION`,默认动作是抛出异常
65 | :return: 新的生成器……
66 | """
67 | if not isinstance(inner, BaseGenerator):
68 | raise ValueError('First argument must be Zhihu Generator Classes')
69 | if isinstance(inner, ActivityGenerator):
70 | raise ValueError(' Activity Generator is the only one can\'t be shield')
71 | offset = start_at
72 | hp = durability
73 | while True:
74 | i = -1
75 | try:
76 | for i, x in enumerate(inner.jump(offset)):
77 | yield x
78 | hp = durability
79 | break
80 | except (ZhihuException, urllib3.exceptions.MaxRetryError) as e:
81 | offset += i + 1
82 | hp -= 1
83 | warnings.warn(
84 | "[{type}: {e}] be shield when get NO.{offset} data".format(
85 | type=e.__class__.__name__,
86 | e=e,
87 | offset=offset
88 | ),
89 | ZhihuWarning
90 | )
91 | if hp == 0:
92 | if action is SHIELD_ACTION.EXCEPTION:
93 | raise e
94 | elif action is SHIELD_ACTION.PASS:
95 | offset += 1
96 | hp = durability
97 | elif action is SHIELD_ACTION.STOP:
98 | break
99 | else:
100 | raise e
101 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from .im_android import ImZhihuAndroidClient
4 | from .before_login_auth import BeforeLoginAuth
5 | from .zhihu_oauth import ZhihuOAuth
6 | from .token import ZhihuToken
7 | from .utils import login_signature
8 |
9 | __all__ = ['ImZhihuAndroidClient', 'BeforeLoginAuth', 'ZhihuOAuth',
10 | 'ZhihuToken', 'login_signature']
11 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/before_login_auth.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | # from __future__ import unicode_literals
4 |
5 | from .im_android import ImZhihuAndroidClient
6 |
7 | __all__ = ['BeforeLoginAuth']
8 |
9 |
10 | class BeforeLoginAuth(ImZhihuAndroidClient):
11 | def __init__(self, client_id, api_version=None, app_version=None,
12 | app_build=None, app_za=None, uuid=None, ua=None):
13 | """
14 | .. inheritance-diagram:: BeforeLoginAuth
15 | :parts: 1
16 |
17 | 这个 Auth 在 :class:`.ImZhihuAndroidClient`
18 | 的基础上加上了发送 ``client_id`` 的功能。表示登录之前的基础验证。
19 |
20 | :param str|unicode client_id: 客户端 ID
21 |
22 | .. seealso::
23 | 以下参数的文档参见 :meth:`.ImZhihuAndroidClient.__init__`
24 |
25 | :param str|unicode api_version:
26 | :param str|unicode app_version:
27 | :param str|unicode app_build:
28 | :param str|unicode app_za:
29 | :param str|unicode uuid:
30 | :param str|unicode ua:
31 | """
32 | super(BeforeLoginAuth, self).__init__(
33 | api_version, app_version, app_build, app_za, uuid, ua)
34 | self._client_id = client_id
35 |
36 | def __call__(self, r):
37 | """
38 | .. note::
39 | requests 会自动调用这个方法
40 |
41 | 此函数在 PreparedRequest 的 HTTP header
42 | 里加上了 HTTP Authorization 头,值为 CLIENT_ID。
43 |
44 | 由于是 :class:`.ImZhihuAndroidClient` 的子类,也会自动加上描述 APP 信息的头。
45 |
46 | .. seealso::
47 | :meth:`.ImZhihuAndroidClient.__call__`
48 | """
49 | r = super(BeforeLoginAuth, self).__call__(r)
50 | r.headers['Authorization'] = 'oauth {0}'.format(str(self._client_id))
51 | return r
52 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/im_android.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | # from __future__ import unicode_literals
4 |
5 | from requests.auth import AuthBase
6 |
7 | from .setting import (
8 | API_VERSION, APP_VERSION, APP_BUILD, APP_ZA, UUID, DEFAULT_UA
9 | )
10 |
11 | __all__ = ['ImZhihuAndroidClient']
12 |
13 |
14 | class ImZhihuAndroidClient(AuthBase):
15 | def __init__(self, api_version=None, app_version=None,
16 | app_build=None, app_za=None, uuid=None, ua=None):
17 | """
18 | .. inheritance-diagram:: ImZhihuAndroidClient
19 |
20 | 这个 Auth 类用于模拟一些 Android 上的知乎官方客户端的特殊参数
21 |
22 | :param str|unicode api_version: 所用 API 版本
23 | :param str|unicode app_version: 客户端(APK) 版本
24 | :param str|unicode app_build: APP 类型?
25 | :param str|unicode app_za: APP 杂项,是一个 urlencoded 的 params dict
26 | :param str|unicode uuid: 暂时不知道是什么
27 | :param str|unicode ua: User-Agent,新 API 会验证 UA 了
28 | """
29 | self._api_version = api_version or API_VERSION
30 | self._app_version = app_version or APP_VERSION
31 | self._app_build = app_build or APP_BUILD
32 | self._app_za = app_za or APP_ZA
33 | self._uuid = uuid or UUID
34 | self._ua = ua or DEFAULT_UA
35 |
36 | def __call__(self, r):
37 | """
38 | .. note::
39 | requests 会自动调用这个方法
40 |
41 | 此函数在 PreparedRequest 的 HTTP header
42 | 里加上了模拟 Android 客户端所需要的附加属性
43 |
44 | .. seealso::
45 | 自动添加的属性参见 :meth:`__init__`
46 | """
47 | r.headers['x-api-version'] = self._api_version
48 | r.headers['x-app-version'] = self._app_version
49 | r.headers['x-app-build'] = self._app_build
50 | r.headers['x-app-za'] = self._app_za
51 | r.headers['x-uuid'] = self._uuid
52 | r.headers['User-Agent'] = self._ua
53 | return r
54 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/setting.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | # from __future__ import unicode_literals
4 |
5 | try:
6 | # python2
7 | from urllib import urlencode
8 | except ImportError:
9 | # python3
10 | # noinspection PyUnresolvedReferences,PyCompatibility
11 | from urllib.parse import urlencode
12 |
13 | ZHIHU_API_ROOT = 'https://api.zhihu.com'
14 | """知乎 API 的根目录"""
15 |
16 | # ------- Zhihu OAuth Keys -------
17 |
18 | CLIENT_ID = '8d5227e0aaaa4797a763ac64e0c3b8'
19 | """
20 | 默认的 CLIENT ID。
21 | 如果 :class:`.ZhihuClient` 构造时没有提供 CLIENT ID,则使用这个值。
22 | """
23 |
24 | APP_SECRET = 'ecbefbf6b17e47ecb9035107866380'
25 | """
26 | 默认的 SECRET。
27 | 如果 :class:`.ZhihuClient` 构造时没有提供 SECRET,则使用这个值。
28 | """
29 |
30 | # ------- Zhihu Client Info -------
31 |
32 | API_VERSION = '3.0.41'
33 | """
34 | 模拟 Android 官方客户端使用的参数,表示使用的 API 版本。
35 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 api_version,则使用这个值。
36 | """
37 |
38 | APP_VERSION = '4.12.0'
39 | """
40 | 模拟 Android 官方客户端使用的参数,表示使用的 APP 版本。
41 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_version,则使用这个值。
42 | """
43 |
44 | APP_BUILD = 'release'
45 | """
46 | 模拟 Android 官方客户端使用的参数,表示使用的 APP 的 Build 类型。
47 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_build,则使用这个值。
48 | """
49 |
50 | UUID = 'AHBCVBVCDAtLBfZCo1SYbPj8SgivYjqcGCs='
51 | """
52 | 新加的一个东西,暂时不知道是啥的 ID
53 | """
54 |
55 | DEFAULT_UA = 'Futureve/4.12.0 Mozilla/5.0 ' \
56 | '(Linux; Android 6.0; Google Nexus 5 - 6.0.0 - ' \
57 | 'API 23 - 1080x1920 Build/MRA58K; wv) ' \
58 | 'AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0'
59 | """
60 | 新版本的 API 开始检查 UA了。
61 | """
62 |
63 | APP_ZA = urlencode({
64 | 'OS': 'Android',
65 | 'Release': '6.0',
66 | 'Model': 'Google Nexus 5 - 6.0.0 - API 23 - 1080x1920',
67 | 'VersionName': APP_VERSION,
68 | 'VersionCode': '450',
69 | 'Width': '1080',
70 | 'Height': '1920',
71 | 'Installer': 'Google Play',
72 | })
73 | """
74 | 模拟 Android 官方客户端使用的参数,表示使用的 APP 的 杂项数据。
75 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_za,则使用这个值。
76 |
77 | .. note::
78 | 它是一个 url encode 后的 dict
79 |
80 | 参见 :meth:`.ImZhihuAndroidClient.__init__`
81 | """
82 |
83 | # ------- Zhihu API URL for Login -------
84 |
85 | CAPTCHA_URL = ZHIHU_API_ROOT + '/captcha'
86 | """
87 | 验证码相关
88 |
89 | :GET: 是否需要验证码
90 | :PUT: 获取验证码
91 | :POST: 提交验证码
92 | """
93 |
94 | # sign_in - POST - 用户登录
95 |
96 | LOGIN_URL = ZHIHU_API_ROOT + '/sign_in'
97 | """
98 | OAuth 登录地址
99 | """
100 |
101 | LOGIN_DATA = {
102 | 'grant_type': 'password',
103 | 'source': 'com.zhihu.android',
104 | 'client_id': '',
105 | 'signature': '',
106 | 'timestamp': '',
107 | 'username': '',
108 | 'password': '',
109 | }
110 | """
111 | 登录数据格式。需要填充的只有用户名和密码。
112 |
113 | `client_id` 会由 :class:`.ZhihuClient` 填写。
114 |
115 | `timestamp` 和 `signature` 会由 :class:`.ZhihuClient` 内部调用的
116 | :func:`.login_signature` 自动填写。
117 | """
118 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/token.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import json
6 | import pickle
7 | import time
8 |
9 | from ..exception import MyJSONDecodeError
10 |
11 | __all__ = ['ZhihuToken']
12 |
13 |
14 | class ZhihuToken:
15 | def __init__(self, user_id, uid, access_token, expires_in, token_type,
16 | refresh_token, cookie, lock_in=None, unlock_ticket=None):
17 | """
18 | 知乎令牌。
19 |
20 | 尽量不要直接使用这个类,而是用 :meth:`ZhihuToken.from_str` 或
21 | :meth:`ZhihuToken.form_dict` 或
22 | :meth:`ZhihuToken.from_file` 方法来构造。
23 |
24 | .. note::
25 |
26 | 本类仅在 :class:`.ZhihuClient` 类内使用,一般用户不需要了解。
27 |
28 | :param str|unicode user_id: 用户 ID
29 | :param int uid: 某个数字型用户 ID,貌似没啥用
30 | :param str|unicode access_token: 最重要的访问令牌
31 | :param int expires_in: 过期时间
32 | :param str|unicode token_type: 令牌类型
33 | :param str|unicode refresh_token: 刷新令牌
34 | :param str|unicode cookie: 登录成功后需要加上这段 Cookies
35 | :param int lock_in: 不知道用处
36 | :param str|unicode unlock_ticket: 不知道用处
37 | """
38 | self._create_at = time.time()
39 | self._user_id = uid
40 | self._uid = user_id
41 | self._access_token = access_token
42 | self._expires_in = expires_in
43 | self._expires_at = self._create_at + self._expires_in
44 | self._token_type = token_type
45 | self._refresh_token = refresh_token
46 | self._cookie = cookie
47 |
48 | # 以下两个属性暂时不知道用处
49 | self._lock_in = lock_in
50 | self._unlock_ticket = unlock_ticket
51 |
52 | @staticmethod
53 | def from_str(json_str):
54 | """
55 | 从字符串读取 token。
56 |
57 | :param str|unicode json_str: 一个合法的代表知乎 Token 的 JSON 字符串
58 | :rtype: :class:`ZhihuToken`
59 | :raise ValueError: 提供的参数不合法时
60 | """
61 | try:
62 | return ZhihuToken.from_dict(json.loads(json_str))
63 | except (MyJSONDecodeError, ValueError):
64 | raise ValueError(
65 | '"{json_str}" is NOT a valid zhihu token json string.'.format(
66 | json_str=json_str
67 | ))
68 |
69 | @staticmethod
70 | def from_dict(json_dict):
71 | """
72 | 从字典读取 token。
73 |
74 | :param dict json_dict: 一个代表知乎 Token 的字典
75 | :rtype: :class:`ZhihuToken`
76 | :raise ValueError: 提供的参数不合法时
77 | """
78 | try:
79 | return ZhihuToken(**json_dict)
80 | except TypeError:
81 | raise ValueError(
82 | '"{json_dict}" is NOT a valid zhihu token json.'.format(
83 | json_dict=json_dict
84 | ))
85 |
86 | @staticmethod
87 | def from_file(filename):
88 | """
89 | 从文件读取 token。
90 |
91 | :param str|unicode filename: 文件名
92 | :rtype: :class:`ZhihuToken`
93 | """
94 | with open(filename, 'rb') as f:
95 | return pickle.load(f)
96 |
97 | def save(self, filename):
98 | """
99 | 将 token 保存成文件。
100 |
101 | :param str|unicode filename: 文件名
102 | :return: 无返回值
103 | """
104 | with open(filename, 'wb') as f:
105 | pickle.dump(self, f)
106 |
107 | @property
108 | def user_id(self):
109 | """
110 | :return: 获取用户 ID
111 | :rtype: str
112 | """
113 | return self._user_id
114 |
115 | @property
116 | def type(self):
117 | """
118 | :return: 获取验证类型
119 | :rtype: str
120 | """
121 | return self._token_type
122 |
123 | @property
124 | def token(self):
125 | """
126 | :return: 获取访问令牌
127 | :rtype: str
128 | """
129 | return self._access_token
130 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import hashlib
6 | import hmac
7 | import time
8 |
9 | __all__ = ['login_signature']
10 |
11 |
12 | def login_signature(data, secret):
13 | """
14 | 为登录请求附加签名。
15 |
16 | :param dict data: POST 数据
17 | :param str|unicode secret: APP SECRET
18 | :return: 经过签名后的 dict, 增加了 timestamp 和 signature 两项
19 | """
20 | data['timestamp'] = str(int(time.time()))
21 |
22 | params = ''.join([
23 | data['grant_type'],
24 | data['client_id'],
25 | data['source'],
26 | data['timestamp'],
27 | ])
28 |
29 | data['signature'] = hmac.new(
30 | secret.encode('utf-8'),
31 | params.encode('utf-8'),
32 | hashlib.sha1
33 | ).hexdigest()
34 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/zhihu_oauth.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | # from __future__ import unicode_literals
4 |
5 | from .im_android import ImZhihuAndroidClient
6 | from .token import ZhihuToken
7 |
8 | __all__ = ['ZhihuOAuth']
9 |
10 |
11 | class ZhihuOAuth(ImZhihuAndroidClient):
12 | def __init__(self, token, api_version=None, app_version=None,
13 | app_build=None, app_za=None):
14 | """
15 | .. inheritance-diagram:: ZhihuOAuth
16 |
17 | 这个 Auth 在 :class:`.ImZhihuAndroidClient`
18 | 的基础上加上了发送 token 的功能。
19 |
20 | :param ZhihuToken token: 成功登录后得到的 Token
21 |
22 | .. seealso::
23 | 以下参数的文档参见 :meth:`.ImZhihuAndroidClient.__init__`
24 |
25 | :param api_version:
26 | :param app_version:
27 | :param app_build:
28 | :param app_za:
29 | """
30 | assert isinstance(token, ZhihuToken)
31 | super(ZhihuOAuth, self).__init__(
32 | api_version, app_version, app_build, app_za)
33 | self._token = token
34 |
35 | def __call__(self, r):
36 | """
37 | .. note::
38 | requests 会自动调用这个方法
39 |
40 | 此函数在 PreparedRequest 的 HTTP header
41 | 里加上了 HTTP Authorization 头,值为登录成功后 Zhihu 发的 access_token。
42 |
43 | 由于是 :class:`.ImZhihuAndroidClient` 的子类,也会自动加上描述 APP 信息的头。
44 |
45 | .. seealso::
46 | :meth:`.ImZhihuAndroidClient.__call__`
47 | """
48 | r = super(ZhihuOAuth, self).__call__(r)
49 | r.headers['Authorization'] = '{type} {token}'.format(
50 | type=str(self._token.type.capitalize()),
51 | token=str(self._token.token)
52 | )
53 | return r
54 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/setting.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import re
6 | import requests.adapters
7 |
8 | ADAPTER_WITH_RETRY = requests.adapters.HTTPAdapter(
9 | max_retries=requests.adapters.Retry(
10 | total=10,
11 | status_forcelist=[403, 408, 500, 502]
12 | )
13 | )
14 |
15 | CAPTCHA_FILE = 'captcha.gif'
16 | """
17 | 请求验证码后储存文件名的默认值,现在的值是当前目录下的 captcha.gif。
18 |
19 | 仅在 :meth:`.ZhihuClient.login_in_terminal` 中被使用。
20 | """
21 |
22 | re_answer_url = re.compile(
23 | r'^(?:https?://)?www.zhihu.com/question/\d+/answer/(\d+)/?$')
24 | """
25 | 答案 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
26 | """
27 |
28 | re_article_url = re.compile(r'^(?:https?://)?zhuanlan.zhihu.com/p/(\d+)/?$')
29 | """
30 | 文章 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
31 | """
32 |
33 | re_collection_url = re.compile(
34 | r'^(?:https?://)?www.zhihu.com/collection/(\d+)/?$')
35 | """
36 | 收藏夹 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
37 | """
38 |
39 | # TODO: 详细了解专栏 slug 的构成,更新正则
40 | re_column_url = re.compile(r'^(?:https?://)?zhuanlan.zhihu.com/([^/ ]+)/?$')
41 | """
42 | 专栏 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
43 | """
44 |
45 | re_live_url = re.compile(r'^(?:https?://)?www.zhihu.com/lives/(\d+)/?$')
46 | """
47 | Live URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
48 | """
49 |
50 | re_people_url = re.compile(r'^(?:https?://)?www.zhihu.com/people/([^/ ]+)/?$')
51 | """
52 | 用户 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
53 | """
54 |
55 | re_question_url = re.compile(r'^(?:https?://)?www.zhihu.com/question/(\d+)/?$')
56 | """
57 | 问题 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
58 | """
59 |
60 | re_topic_url = re.compile(r'^(?:https?://)?www.zhihu.com/topic/(\d+)/?$')
61 | """
62 | 问题 URL 的正则,用于 :any:`ZhihuClient.from_url` 方法。
63 | """
64 |
65 |
66 | RE_FUNC_MAP = {
67 | # RE func int id
68 | re_answer_url: ('answer', True),
69 | re_article_url: ('article', True),
70 | re_collection_url: ('collection', True),
71 | re_column_url: ('column', False),
72 | re_live_url: ('live', True),
73 | re_people_url: ('people', False),
74 | re_question_url: ('question', True),
75 | re_topic_url: ('topic', True),
76 | }
77 | """
78 | 正则表达式于 :any:`ZhihuClient` 的方法的对应关系。
79 |
80 | 键是正则,值是二元组,两个值分别是方法名和是否需要将 ``id`` 转化为整数。
81 | """
82 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import functools
6 |
7 | from .exception import NeedLoginException, IdMustBeIntException
8 |
9 | __all__ = ['need_login', 'int_id']
10 |
11 |
12 | def need_login(func):
13 | """
14 | 装饰器。作用于 :class:`.ZhihuClient` 中的某些方法,
15 | 强制它们必须在登录状态下才能被使用。
16 | """
17 | @functools.wraps(func)
18 | def wrapper(self, *args, **kwargs):
19 | if self.is_login():
20 | return func(self, *args, **kwargs)
21 | else:
22 | raise NeedLoginException(func.__name__)
23 |
24 | return wrapper
25 |
26 |
27 | def int_id(func):
28 | """
29 | 装饰器。作用于 :class:`.ZhihuClient` 中需要整型 ID 来构建对应知乎类的方法。
30 | 作用就是个强制类型检查。
31 |
32 | :raise: :class:`.IdMustBeIntException` 当传过来的 ID 不是整型的时候
33 | """
34 | @functools.wraps(func)
35 | def wrapper(self, *args, **kwargs):
36 | try:
37 | some_id = args[0]
38 | except IndexError:
39 | some_id = None
40 | if not isinstance(some_id, int):
41 | raise IdMustBeIntException(func)
42 | return func(self, *args, **kwargs)
43 |
44 | return wrapper
45 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from .activity import Activity, ActType
4 | from .answer import Answer
5 | from .article import Article
6 | from .collection import Collection
7 | from .column import Column
8 | from .comment import Comment
9 | from .live import Live, LiveBadge, LiveTag, LiveTicket
10 | from .me import Me
11 | from .message import Message
12 | from .people import People, ANONYMOUS
13 | from .question import Question
14 | from .topic import Topic
15 | from .whisper import Whisper
16 |
17 | __all__ = ['Activity', 'ActType', 'Answer', 'Article', 'Collection', 'Column',
18 | 'Comment', 'Live', 'LiveBadge', 'LiveTag', 'LiveTicket', 'Me',
19 | 'Message', 'People', 'ANONYMOUS', 'Question', 'Topic', 'Whisper']
20 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/article.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from .base import Base
6 | from .generator import generator_of
7 | from .other import other_obj
8 | from .normal import normal_attr
9 | from .streaming import streaming
10 | from .utils import common_save
11 | from .urls import (
12 | ARTICLE_DETAIL_URL,
13 | ARTICLE_COMMENTS_URL,
14 | )
15 |
16 | __all__ = ['Article']
17 |
18 |
19 | class Article(Base):
20 | def __init__(self, aid, cache, session):
21 | super(Article, self).__init__(aid, cache, session)
22 |
23 | def _build_url(self):
24 | return ARTICLE_DETAIL_URL.format(self.id)
25 |
26 | # ----- simple info -----
27 |
28 | @property
29 | @other_obj('people')
30 | def author(self):
31 | return None
32 |
33 | @property
34 | @streaming()
35 | def can_comment(self):
36 | """
37 | .. seealso:: :any:`Answer.can_comment`
38 | """
39 | return None
40 |
41 | @property
42 | @other_obj()
43 | def column(self):
44 | """
45 | 文章所属专栏。
46 |
47 | .. warning:: 当文章不属于任何专栏时值为 None,使用其属性前应先做检查。
48 | """
49 | return None
50 |
51 | @property
52 | @normal_attr()
53 | def comment_count(self):
54 | return None
55 |
56 | @property
57 | @normal_attr()
58 | def comment_permission(self):
59 | """
60 | .. seealso:: :any:`Answer.comment_permission`
61 | """
62 | return None
63 |
64 | @property
65 | @normal_attr()
66 | def content(self):
67 | return None
68 |
69 | @property
70 | @normal_attr()
71 | def excerpt(self):
72 | return None
73 |
74 | @property
75 | @normal_attr()
76 | def id(self):
77 | return self._id
78 |
79 | @property
80 | @normal_attr()
81 | def image_url(self):
82 | return None
83 |
84 | @property
85 | @streaming(use_cache=False)
86 | def suggest_edit(self):
87 | """
88 | .. seealso:: :any:`Answer.suggest_edit`
89 | """
90 | return None
91 |
92 | @property
93 | @normal_attr()
94 | def title(self):
95 | return None
96 |
97 | @property
98 | @normal_attr('updated')
99 | def updated_time(self):
100 | return None
101 |
102 | @property
103 | @normal_attr()
104 | def voteup_count(self):
105 | return None
106 |
107 | # ----- generators -----
108 |
109 | @property
110 | @generator_of(ARTICLE_COMMENTS_URL)
111 | def comments(self):
112 | return None
113 |
114 | # TODO: article.voters, API 接口未知
115 |
116 | # ----- other operate -----
117 |
118 | def save(self, path='.', filename=None, invalid_chars=None):
119 | """
120 | 除了默认文件名是文章标题外,和 :any:`Answer.save` 完全一致。
121 |
122 | .. seealso:: :any:`Answer.save`
123 |
124 | .. note:: TIPS
125 |
126 | 建议的使用方法:
127 |
128 | .. code-block:: python
129 |
130 | for article in column.articles:
131 | print(article.title)
132 | article.save(column.title)
133 |
134 | """
135 | if self._cache is None:
136 | self._get_data()
137 | common_save(path, filename, self.content, self.title, invalid_chars)
138 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/base.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import abc
6 |
7 | from ..exception import MyJSONDecodeError, GetDataErrorException
8 |
9 | __all__ = ['Base']
10 |
11 |
12 | class Base(object):
13 | def __init__(self, zhihu_obj_id, cache, session):
14 | """
15 |
16 | .. note:: Cache 与 Data
17 |
18 | :any:`Base` 类的 ``cache`` 参数表示已知的属性值。一般由另一个对象的
19 | JSON 数据中的一个属性充当。
20 |
21 | 比如 :any:`Answer.author` 方法,由于在请求 :any:`Answer` 的数据时,
22 | 原始 JSON 数据中就有关于作者的一些简单信息。比如 name,id,headline。
23 | 在使用此方法时就会将这些不完整的数据传递到 ``answer`` 对象 (类型为
24 | :any:`People`)的 ``cache`` 中。这样一来,在执行
25 | ``answer.author.name`` 时,取出名字的操作可以省去一次网络请求。
26 |
27 | :any:`normal_attr`,:any:`other_obj` 和 :any:`streaming` 装饰器都会
28 | 优先使用 ``cache`` 中的数据,当获取失败时才会调用
29 | :any:`_get_data` 方法请求数据。
30 |
31 | :param zhihu_obj_id: 构建知乎对象所用的 ID
32 | :param dict cache: 缓存数据,就是已知的这个对象的属性集
33 | :param session: 网络请求 Session
34 | """
35 | self._id = zhihu_obj_id
36 | self._cache = cache
37 | self._session = session
38 | self._data = None
39 |
40 | def _get_data(self):
41 | """
42 | 调用知乎 API 接口获取数据的主要方法。
43 |
44 | url 从 :any:`_build_url` 中获取。
45 |
46 | method 从 :any:`_method` 中获取。
47 |
48 | params 从 :any:`_build_params` 中获取。
49 |
50 | data 从 :any:`_build_data` 中获取。
51 |
52 | :raise: 当返回的数据无法被解析成 JSON
53 | 或 JSON 中含有 'message' 字段时,会抛出 :any:`GetDataErrorException`
54 | """
55 | if self._data is None:
56 | url = self._build_url()
57 | res = self._session.request(
58 | self._method(),
59 | url=url,
60 | params=self._build_params(),
61 | data=self._build_data(),
62 | )
63 | e = GetDataErrorException(
64 | url,
65 | res,
66 | 'a valid Zhihu {0} JSON data'.format(self.__class__.__name__),
67 | )
68 | try:
69 | json_dict = res.json()
70 | if 'error' in json_dict:
71 | raise e
72 | self._data = json_dict
73 | except MyJSONDecodeError:
74 | raise e
75 |
76 | @abc.abstractmethod
77 | def _build_url(self):
78 | """
79 | 子类 **必须** 重载这一函数,提供获取数据的 API URL。
80 |
81 | 一般格式为 ZHIHU_XXX_URL.format(self.id)
82 | """
83 | return ''
84 |
85 | # noinspection PyMethodMayBeStatic
86 | def _build_params(self):
87 | """
88 | 子类可以重载这一函数,提供请求 API 时要传递的参数。默认值为 None。
89 | """
90 | return None
91 |
92 | # noinspection PyMethodMayBeStatic
93 | def _build_data(self):
94 | """
95 | 子类可以重载这一函数,提供请求 API 时要传递的数据。默认值为 None。
96 | """
97 | return None
98 |
99 | # noinspection PyMethodMayBeStatic
100 | def _method(self):
101 | """
102 | 子类可以重载这一函数,提供 HTTP 请求的类型,默认值为 GET。
103 | """
104 | return 'GET'
105 |
106 | def refresh(self):
107 | """
108 | 删除自身的 cache 和 data,下一次获取属性会重新向知乎发送请求,获取最新数据。
109 | """
110 | self._data = self._cache = None
111 |
112 | @property
113 | def pure_data(self):
114 | """
115 | 调试用。返回现在对象内的 JSON 数据。
116 |
117 | 如果对象没有 cache 也没有 data,会自动发送数据请求 data。
118 | """
119 | if not self._cache:
120 | self._get_data()
121 | return {
122 | 'cache': self._cache,
123 | 'data': self._data,
124 | }
125 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/column.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from .base import Base
6 | from .generator import generator_of
7 | from .other import other_obj
8 | from .normal import normal_attr
9 | from .urls import (
10 | COLUMN_DETAIL_URL,
11 | COLUMN_ARTICLES_URL,
12 | COLUMN_FOLLOWERS_URL,
13 | )
14 |
15 | __all__ = ['Column']
16 |
17 |
18 | class Column(Base):
19 | def __init__(self, cid, cache, session):
20 | super(Column, self).__init__(cid, cache, session)
21 |
22 | def _build_url(self):
23 | return COLUMN_DETAIL_URL.format(self.id)
24 |
25 | # ---- simple info -----
26 |
27 | @property
28 | @normal_attr('articles_count')
29 | def article_count(self):
30 | return None
31 |
32 | @property
33 | def articles_count(self):
34 | return self.article_count
35 |
36 | @property
37 | @other_obj('people')
38 | def author(self):
39 | return None
40 |
41 | @property
42 | @normal_attr()
43 | def comment_permission(self):
44 | return None
45 |
46 | @property
47 | @normal_attr()
48 | def description(self):
49 | return None
50 |
51 | @property
52 | @normal_attr('followers')
53 | def follower_count(self):
54 | return None
55 |
56 | @property
57 | @normal_attr()
58 | def id(self):
59 | return self._id
60 |
61 | @property
62 | @normal_attr()
63 | def image_url(self):
64 | return None
65 |
66 | @property
67 | @normal_attr()
68 | def title(self):
69 | return None
70 |
71 | @property
72 | @normal_attr('updated')
73 | def updated_time(self):
74 | return None
75 |
76 | @property
77 | def updated(self):
78 | return self.updated_time
79 |
80 | # ----- generators -----
81 |
82 | @property
83 | @generator_of(COLUMN_ARTICLES_URL)
84 | def articles(self):
85 | return None
86 |
87 | @property
88 | @generator_of(COLUMN_FOLLOWERS_URL, 'people')
89 | def followers(self):
90 | return None
91 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/message.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from .base import Base
6 | from .other import other_obj
7 | from .normal import normal_attr
8 |
9 | __all__ = ['Message']
10 |
11 |
12 | class Message(Base):
13 | def __init__(self, mid, cache, session):
14 | super(Message, self).__init__(mid, cache, session)
15 |
16 | def _build_url(self):
17 | return ''
18 |
19 | # ----- simple info -----
20 |
21 | @property
22 | @normal_attr()
23 | def created_time(self):
24 | return None
25 |
26 | @property
27 | @normal_attr()
28 | def content(self):
29 | return None
30 |
31 | @property
32 | @other_obj('people')
33 | def sender(self):
34 | return None
35 |
36 | @property
37 | @other_obj('people')
38 | def receiver(self):
39 | return None
40 |
41 | def format(self, template='[{time}] {sender} --> {receiver}: {content}'):
42 | """
43 | 格式化输出消息
44 |
45 | ``{time}`` 时间戳;``{sender}`` 发送者用户名;``{receiver}`` 接收者用户名;
46 | ``{content}`` 消息内容
47 |
48 | 用法参见示例。
49 |
50 | :param str template: 格式化模板
51 | :return: 格式化后的字符串
52 | :rtype: str
53 | """
54 | return template.format(
55 | time=self.created_time,
56 | sender=self.sender.name,
57 | receiver=self.receiver.name,
58 | content=self.content,
59 | )
60 |
61 | def __str__(self):
62 | return self.format()
63 |
64 | __repr__ = __str__
65 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/normal.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals, print_function
4 |
5 | import functools
6 |
7 | from .utils import can_get_from
8 |
9 | __all__ = ['normal_attr']
10 |
11 |
12 | def normal_attr(name_in_json=None):
13 | """
14 |
15 | 本装饰器的作用为:
16 |
17 | 1. 标识这个属性为常规属性。
18 | 2. 自动从对象的数据中取对应属性返回,会自行判断需不需要请求网络。
19 |
20 | 取数据流程如下:
21 |
22 | 1. 如果 ``data`` 存在,转 2,否则转 3。
23 | 2. 尝试从 ``data`` 中取数据,成功则返回数据,否则返回被装饰函数的执行结果。
24 | 3. 尝试从 ``cache`` 中取需要的属性,成功则返回。
25 | 4. 判断属性名是不是 ``id``。不是转 5,是则返回被装饰函数的执行结果。(因为
26 | ``id`` 属性一般在 :any:`_build_url` 方法中需要引用,
27 | 如果这时向知乎请求数据会造成死循环。)
28 | 5. 则使用 API 请求数据。然后转 2。
29 |
30 | .. seealso:: 关于 cache 和 data
31 |
32 | 请看 :any:`Base` 类中的\ :any:`说明 `。
33 |
34 | :param str|unicode name_in_json: 需要取的属性在 JSON 中的名字。可空,默认值为
35 | 使用此装饰器的方法名。
36 | """
37 | def wrappers_wrapper(func):
38 |
39 | @functools.wraps(func)
40 | def wrapper(self, *args, **kwargs):
41 |
42 | def use_data_or_func(the_name, data):
43 | if can_get_from(the_name, data):
44 | return data[the_name]
45 | else:
46 | return func(self, *args, **kwargs)
47 |
48 | name = name_in_json if name_in_json else func.__name__
49 | if self._data:
50 | return use_data_or_func(name, self._data)
51 | elif self._cache and can_get_from(name, self._cache):
52 | return self._cache[name]
53 | else:
54 | # id is important, when there is no data, _build_url need it,
55 | # so, just return the function result
56 | if name == 'id':
57 | return func(self, *args, **kwargs)
58 |
59 | self._get_data()
60 | # noinspection PyTypeChecker
61 | if self._data:
62 | return use_data_or_func(name, self._data)
63 | return wrapper
64 |
65 | return wrappers_wrapper
66 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/other.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | import functools
6 | import importlib
7 |
8 | __all__ = ['other_obj']
9 |
10 |
11 | def other_obj(class_name=None, name_in_json=None, module_filename=None):
12 | """
13 |
14 | 本装饰器的作用为:
15 |
16 | 1. 标识这个属性为另一个知乎对象。
17 | 2. 自动从当前对象的数据中取出对应属性,构建成所需要的对象。
18 |
19 | 生成对象流程如下:
20 |
21 | 1. 尝试导入类名表示的类,如果获取失败则设为 :any:`Base` 类。
22 | 2. 将对象数据设置为被装饰函数的返回值,如果不为 None 则转 6
23 | 3. 尝试从 ``cache`` 中获取用来建立对象的数据。成功转 6。
24 | 4. 如果当前对象没有 ``data`` 则调用知乎 API 获取。
25 | 5. 尝试从 ``data`` 中获取数据,如果这个也没有就返回 None
26 | 6. 将获取到的数据作为 ``cache`` 构建第一步中的导入的知乎类对象。
27 |
28 | .. seealso:: 关于 cache 和 data
29 |
30 | 请看 :any:`Base` 类中的\ :any:`说明 `。
31 |
32 | :param class_name: 要生成的对象类名
33 | :param name_in_json: 属性在 JSON 里的键名。
34 | :param module_filename: 所在的模块的文件名
35 | """
36 | def wrappers_wrapper(func):
37 | @functools.wraps(func)
38 | def wrapper(self, *args, **kwargs):
39 | cls_name = class_name or func.__name__
40 | if cls_name.islower():
41 | cls_name = cls_name.capitalize()
42 | name_in_j = name_in_json or func.__name__
43 | file_name = module_filename or cls_name.lower()
44 |
45 | try:
46 | module = importlib.import_module(
47 | '.' + file_name,
48 | 'zhihu_oauth.zhcls'
49 | )
50 | cls = getattr(module, cls_name)
51 | except (ImportError, AttributeError):
52 | from .base import Base
53 | cls = Base
54 |
55 | cache = func(self, *args, **kwargs)
56 |
57 | if cache is None:
58 | if self._cache and name_in_j in self._cache:
59 | cache = self._cache[name_in_j]
60 | else:
61 | self._get_data()
62 | if self._data and name_in_j in self._data:
63 | cache = self._data[name_in_j]
64 |
65 | if cache is not None and 'id' in cache:
66 | return cls(cache['id'], cache, self._session)
67 | else:
68 | return None
69 |
70 | return wrapper
71 |
72 | return wrappers_wrapper
73 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/question.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from .base import Base
6 | from .generator import generator_of
7 | from .normal import normal_attr
8 | from .streaming import streaming
9 | from zhihu_oauth.zhcls.urls import (
10 | QUESTION_DETAIL_URL,
11 | QUESTION_ANSWERS_URL,
12 | QUESTION_COMMENTS_URL,
13 | QUESTION_FOLLOWERS_URL,
14 | QUESTION_TOPICS_URL,
15 | )
16 |
17 | __all__ = ['Question']
18 |
19 |
20 | class Question(Base):
21 | def __init__(self, qid, cache, session):
22 | super(Question, self).__init__(qid, cache, session)
23 |
24 | def _build_url(self):
25 | return QUESTION_DETAIL_URL.format(self._id)
26 |
27 | # ----- simple info -----
28 |
29 | @property
30 | @normal_attr()
31 | def allow_delete(self):
32 | return None
33 |
34 | @property
35 | @normal_attr()
36 | def answer_count(self):
37 | return None
38 |
39 | @property
40 | @normal_attr()
41 | def comment_count(self):
42 | return None
43 |
44 | @property
45 | @normal_attr("created")
46 | def created_time(self):
47 | return None
48 |
49 | @property
50 | @normal_attr('except')
51 | def excerpt(self):
52 | """
53 | 知乎返回的 json 里这一项叫做 except.... 也是醉了
54 | """
55 | return None
56 |
57 | @property
58 | @normal_attr()
59 | def follower_count(self):
60 | return None
61 |
62 | @property
63 | @normal_attr()
64 | def id(self):
65 | return self._id
66 |
67 | @property
68 | @normal_attr()
69 | def detail(self):
70 | return None
71 |
72 | @property
73 | @streaming()
74 | def redirection(self):
75 | """
76 | 常见返回值:
77 |
78 | .. code-block:: python
79 |
80 | {
81 | 'to':
82 | {
83 | 'url': 'https://api.zhihu.com/questions/19570036',
84 | 'id': 19570036,
85 | 'type': 'question',
86 | 'title': '什么是「问题重定向」?如何正确使用该功能解决重复问题?'
87 | },
88 | 'from':
89 | [
90 | {
91 | 'url': 'https://api.zhihu.com/questions/19772082',
92 | 'id': 19772082,
93 | 'type': 'question',
94 | 'title': '知乎上有重复的问题吗?'
95 | },
96 | {
97 | 'url': 'https://api.zhihu.com/questions/20830682',
98 | 'id': 20830682,
99 | 'type': 'question',
100 | 'title': '各位知友以为同一问题重复出现,知乎应如何应对?'
101 | }
102 | ]
103 | }
104 |
105 | 在使用 from 属性时遇到语法错误?请看 :ref:`说明 `
106 |
107 | """
108 | return None
109 |
110 | @property
111 | @streaming()
112 | def status(self):
113 | return None
114 |
115 | @property
116 | @streaming(use_cache=False)
117 | def suggest_edit(self):
118 | """
119 | 常见返回值:
120 |
121 | .. code-block:: python
122 |
123 | {'status': False', reason': ''}
124 |
125 | {'status': True, 'reason': '问题表意不明'}
126 | """
127 | return None
128 |
129 | @property
130 | @normal_attr()
131 | def title(self):
132 | return None
133 |
134 | @property
135 | @normal_attr()
136 | def updated_time(self):
137 | return None
138 |
139 | # ----- generators -----
140 |
141 | @property
142 | @generator_of(QUESTION_ANSWERS_URL)
143 | def answers(self):
144 | return None
145 |
146 | @property
147 | @generator_of(QUESTION_COMMENTS_URL)
148 | def comments(self):
149 | return None
150 |
151 | @property
152 | @generator_of(QUESTION_FOLLOWERS_URL, 'people')
153 | def followers(self):
154 | return None
155 |
156 | @property
157 | @generator_of(QUESTION_TOPICS_URL)
158 | def topics(self):
159 | return None
160 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/topic.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from .base import Base
6 | from .generator import generator_of
7 | from .normal import normal_attr
8 | from zhihu_oauth.zhcls.urls import (
9 | TOPIC_DETAIL_URL,
10 | TOPIC_BEST_ANSWERS_URL,
11 | TOPIC_BEST_ANSWERERS_URL,
12 | TOPIC_CHILDREN_URL,
13 | TOPIC_FOLLOWERS_URL,
14 | TOPIC_PARENTS_URL,
15 | TOPIC_UNANSWERED_QUESTION,
16 | )
17 |
18 | __all__ = ['Topic']
19 |
20 |
21 | class Topic(Base):
22 | def __init__(self, tid, cache, session):
23 | super(Topic, self).__init__(tid, cache, session)
24 |
25 | def _build_url(self):
26 | return TOPIC_DETAIL_URL.format(self.id)
27 |
28 | # ---- simple info -----
29 |
30 | @property
31 | @normal_attr()
32 | def avatar_url(self):
33 | return None
34 |
35 | @property
36 | @normal_attr('best_answers_count')
37 | def best_answer_count(self):
38 | return None
39 |
40 | @property
41 | def best_answers_count(self):
42 | return self.best_answer_count
43 |
44 | @property
45 | @normal_attr()
46 | def id(self):
47 | return self._id
48 |
49 | @property
50 | @normal_attr()
51 | def introduction(self):
52 | return None
53 |
54 | @property
55 | @normal_attr()
56 | def excerpt(self):
57 | return None
58 |
59 | @property
60 | def father_count(self):
61 | return self.parent_count
62 |
63 | @property
64 | @normal_attr('followers_count')
65 | def follower_count(self):
66 | return None
67 |
68 | @property
69 | def followers_count(self):
70 | return self.follower_count
71 |
72 | @property
73 | @normal_attr()
74 | def name(self):
75 | return None
76 |
77 | @property
78 | @normal_attr('father_count')
79 | def parent_count(self):
80 | return None
81 |
82 | @property
83 | @normal_attr('questions_count')
84 | def question_count(self):
85 | return None
86 |
87 | @property
88 | def questions_count(self):
89 | return self.question_count
90 |
91 | @property
92 | @normal_attr()
93 | def unanswered_count(self):
94 | return None
95 |
96 | # ----- generators -----
97 |
98 | @property
99 | @generator_of(TOPIC_BEST_ANSWERS_URL, 'answer')
100 | def best_answers(self):
101 | """
102 | 精华回答
103 | """
104 | return None
105 |
106 | @property
107 | @generator_of(TOPIC_BEST_ANSWERERS_URL, 'people')
108 | def best_answerers(self):
109 | """
110 | 好像叫,最佳回答者吧……
111 |
112 | best_answerers……知乎真会起名字……
113 | """
114 | return None
115 |
116 | @property
117 | @generator_of(TOPIC_CHILDREN_URL, 'topic')
118 | def children(self):
119 | """
120 | 子话题
121 | """
122 | return None
123 |
124 | @property
125 | @generator_of(TOPIC_FOLLOWERS_URL, 'people')
126 | def followers(self):
127 | return None
128 |
129 | @property
130 | @generator_of(TOPIC_PARENTS_URL, 'topic')
131 | def parents(self):
132 | """
133 | 父话题
134 | """
135 | return None
136 |
137 | @property
138 | @generator_of(TOPIC_UNANSWERED_QUESTION, 'question')
139 | def unanswered_questions(self):
140 | return None
141 |
--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/whisper.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from .base import Base
6 | from .other import other_obj
7 | from .generator import generator_of
8 | from .normal import normal_attr
9 | from .urls import MESSAGES_URL
10 |
11 | __all__ = ['Whisper']
12 |
13 |
14 | class Whisper(Base):
15 | """
16 | 唔,其实就是「和某人的所有消息」。
17 |
18 | 为这个东西命名我想了半天……最后群里的一个小姐姐说叫 Whisper 吧,我觉得很可以诶~
19 |
20 | 后来发现知乎接口里把这个叫做 Thread,嗯,不想改,我就是这么任性……
21 | """
22 | def _build_url(self):
23 | return ''
24 |
25 | def _get_data(self):
26 | pass
27 |
28 | def __init__(self, wid, cache, session):
29 | super(Whisper, self).__init__(wid, cache, session)
30 |
31 | # ----- simple info -----
32 |
33 | @property
34 | @normal_attr()
35 | def allow_reply(self):
36 | return None
37 |
38 | @property
39 | def id(self):
40 | return self._id
41 |
42 | @property
43 | @normal_attr()
44 | def snippet(self):
45 | """
46 | 最后一次私信的摘要
47 | """
48 | return None
49 |
50 | @property
51 | @normal_attr()
52 | def updated_time(self):
53 | return None
54 |
55 | @property
56 | @normal_attr()
57 | def unread_count(self):
58 | return None
59 |
60 | @property
61 | @other_obj('people', 'participant')
62 | def who(self):
63 | """
64 | 参与此私信会话的另一个知乎用户
65 | """
66 | return None
67 |
68 | @property
69 | @generator_of(MESSAGES_URL)
70 | def messages(self):
71 | return None
72 |
--------------------------------------------------------------------------------
/src/lib/requests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # __
4 | # /__) _ _ _ _ _/ _
5 | # / ( (- (/ (/ (- _) / _)
6 | # /
7 |
8 | """
9 | Requests HTTP library
10 | ~~~~~~~~~~~~~~~~~~~~~
11 |
12 | Requests is an HTTP library, written in Python, for human beings. Basic GET
13 | usage:
14 |
15 | >>> import requests
16 | >>> r = requests.get('https://www.python.org')
17 | >>> r.status_code
18 | 200
19 | >>> 'Python is a programming language' in r.content
20 | True
21 |
22 | ... or POST:
23 |
24 | >>> payload = dict(key1='value1', key2='value2')
25 | >>> r = requests.post('http://httpbin.org/post', data=payload)
26 | >>> print(r.text)
27 | {
28 | ...
29 | "form": {
30 | "key2": "value2",
31 | "key1": "value1"
32 | },
33 | ...
34 | }
35 |
36 | The other HTTP methods are supported - see `requests.api`. Full documentation
37 | is at .
38 |
39 | :copyright: (c) 2016 by Kenneth Reitz.
40 | :license: Apache 2.0, see LICENSE for more details.
41 | """
42 |
43 | __title__ = 'requests'
44 | __version__ = '2.11.1'
45 | __build__ = 0x021101
46 | __author__ = 'Kenneth Reitz'
47 | __license__ = 'Apache 2.0'
48 | __copyright__ = 'Copyright 2016 Kenneth Reitz'
49 |
50 | # Attempt to enable urllib3's SNI support, if possible
51 | try:
52 | from .packages.urllib3.contrib import pyopenssl
53 | pyopenssl.inject_into_urllib3()
54 | except ImportError:
55 | pass
56 |
57 | import warnings
58 |
59 | # urllib3's DependencyWarnings should be silenced.
60 | from .packages.urllib3.exceptions import DependencyWarning
61 | warnings.simplefilter('ignore', DependencyWarning)
62 |
63 | from . import utils
64 | from .models import Request, Response, PreparedRequest
65 | from .api import request, get, head, post, patch, put, delete, options
66 | from .sessions import session, Session
67 | from .status_codes import codes
68 | from .exceptions import (
69 | RequestException, Timeout, URLRequired,
70 | TooManyRedirects, HTTPError, ConnectionError,
71 | FileModeWarning, ConnectTimeout, ReadTimeout
72 | )
73 |
74 | # Set default logging handler to avoid "No handler found" warnings.
75 | import logging
76 | try: # Python 2.7+
77 | from logging import NullHandler
78 | except ImportError:
79 | class NullHandler(logging.Handler):
80 | def emit(self, record):
81 | pass
82 |
83 | logging.getLogger(__name__).addHandler(NullHandler())
84 |
85 | # FileModeWarnings go off per the default.
86 | warnings.simplefilter('default', FileModeWarning, append=True)
87 |
--------------------------------------------------------------------------------
/src/lib/requests/_internal_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests._internal_utils
5 | ~~~~~~~~~~~~~~
6 |
7 | Provides utility functions that are consumed internally by Requests
8 | which depend on extremely few external helpers (such as compat)
9 | """
10 |
11 | from .compat import is_py2, builtin_str
12 |
13 |
14 | def to_native_string(string, encoding='ascii'):
15 | """Given a string object, regardless of type, returns a representation of
16 | that string in the native string type, encoding and decoding where
17 | necessary. This assumes ASCII unless told otherwise.
18 | """
19 | if isinstance(string, builtin_str):
20 | out = string
21 | else:
22 | if is_py2:
23 | out = string.encode(encoding)
24 | else:
25 | out = string.decode(encoding)
26 |
27 | return out
28 |
--------------------------------------------------------------------------------
/src/lib/requests/certs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | requests.certs
6 | ~~~~~~~~~~~~~~
7 |
8 | This module returns the preferred default CA certificate bundle.
9 |
10 | If you are packaging Requests, e.g., for a Linux distribution or a managed
11 | environment, you can change the definition of where() to return a separately
12 | packaged CA bundle.
13 | """
14 | import os.path
15 |
16 | try:
17 | from certifi import where
18 | except ImportError:
19 | def where():
20 | """Return the preferred certificate bundle."""
21 | # vendored bundle inside Requests
22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem')
23 |
24 | if __name__ == '__main__':
25 | print(where())
26 |
--------------------------------------------------------------------------------
/src/lib/requests/compat.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.compat
5 | ~~~~~~~~~~~~~~~
6 |
7 | This module handles import compatibility issues between Python 2 and
8 | Python 3.
9 | """
10 |
11 | from .packages import chardet
12 |
13 | import sys
14 |
15 | # -------
16 | # Pythons
17 | # -------
18 |
19 | # Syntax sugar.
20 | _ver = sys.version_info
21 |
22 | #: Python 2.x?
23 | is_py2 = (_ver[0] == 2)
24 |
25 | #: Python 3.x?
26 | is_py3 = (_ver[0] == 3)
27 |
28 | try:
29 | import simplejson as json
30 | except (ImportError, SyntaxError):
31 | # simplejson does not support Python 3.2, it throws a SyntaxError
32 | # because of u'...' Unicode literals.
33 | import json
34 |
35 | # ---------
36 | # Specifics
37 | # ---------
38 |
39 | if is_py2:
40 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass
41 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
42 | from urllib2 import parse_http_list
43 | import cookielib
44 | from Cookie import Morsel
45 | from StringIO import StringIO
46 | from .packages.urllib3.packages.ordered_dict import OrderedDict
47 |
48 | builtin_str = str
49 | bytes = str
50 | str = unicode
51 | basestring = basestring
52 | numeric_types = (int, long, float)
53 |
54 | elif is_py3:
55 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
56 | from urllib.request import parse_http_list, getproxies, proxy_bypass
57 | from http import cookiejar as cookielib
58 | from http.cookies import Morsel
59 | from io import StringIO
60 | from collections import OrderedDict
61 |
62 | builtin_str = str
63 | str = str
64 | bytes = bytes
65 | basestring = (str, bytes)
66 | numeric_types = (int, float)
67 |
--------------------------------------------------------------------------------
/src/lib/requests/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.exceptions
5 | ~~~~~~~~~~~~~~~~~~~
6 |
7 | This module contains the set of Requests' exceptions.
8 | """
9 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
10 |
11 |
12 | class RequestException(IOError):
13 | """There was an ambiguous exception that occurred while handling your
14 | request.
15 | """
16 |
17 | def __init__(self, *args, **kwargs):
18 | """Initialize RequestException with `request` and `response` objects."""
19 | response = kwargs.pop('response', None)
20 | self.response = response
21 | self.request = kwargs.pop('request', None)
22 | if (response is not None and not self.request and
23 | hasattr(response, 'request')):
24 | self.request = self.response.request
25 | super(RequestException, self).__init__(*args, **kwargs)
26 |
27 |
28 | class HTTPError(RequestException):
29 | """An HTTP error occurred."""
30 |
31 |
32 | class ConnectionError(RequestException):
33 | """A Connection error occurred."""
34 |
35 |
36 | class ProxyError(ConnectionError):
37 | """A proxy error occurred."""
38 |
39 |
40 | class SSLError(ConnectionError):
41 | """An SSL error occurred."""
42 |
43 |
44 | class Timeout(RequestException):
45 | """The request timed out.
46 |
47 | Catching this error will catch both
48 | :exc:`~requests.exceptions.ConnectTimeout` and
49 | :exc:`~requests.exceptions.ReadTimeout` errors.
50 | """
51 |
52 |
53 | class ConnectTimeout(ConnectionError, Timeout):
54 | """The request timed out while trying to connect to the remote server.
55 |
56 | Requests that produced this error are safe to retry.
57 | """
58 |
59 |
60 | class ReadTimeout(Timeout):
61 | """The server did not send any data in the allotted amount of time."""
62 |
63 |
64 | class URLRequired(RequestException):
65 | """A valid URL is required to make a request."""
66 |
67 |
68 | class TooManyRedirects(RequestException):
69 | """Too many redirects."""
70 |
71 |
72 | class MissingSchema(RequestException, ValueError):
73 | """The URL schema (e.g. http or https) is missing."""
74 |
75 |
76 | class InvalidSchema(RequestException, ValueError):
77 | """See defaults.py for valid schemas."""
78 |
79 |
80 | class InvalidURL(RequestException, ValueError):
81 | """The URL provided was somehow invalid."""
82 |
83 |
84 | class InvalidHeader(RequestException, ValueError):
85 | """The header value provided was somehow invalid."""
86 |
87 |
88 | class ChunkedEncodingError(RequestException):
89 | """The server declared chunked encoding but sent an invalid chunk."""
90 |
91 |
92 | class ContentDecodingError(RequestException, BaseHTTPError):
93 | """Failed to decode response content"""
94 |
95 |
96 | class StreamConsumedError(RequestException, TypeError):
97 | """The content for this response was already consumed"""
98 |
99 |
100 | class RetryError(RequestException):
101 | """Custom retries logic failed"""
102 |
103 |
104 | # Warnings
105 |
106 |
107 | class RequestsWarning(Warning):
108 | """Base warning for Requests."""
109 | pass
110 |
111 |
112 | class FileModeWarning(RequestsWarning, DeprecationWarning):
113 | """A file was opened in text mode, but Requests determined its binary length."""
114 | pass
115 |
--------------------------------------------------------------------------------
/src/lib/requests/hooks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.hooks
5 | ~~~~~~~~~~~~~~
6 |
7 | This module provides the capabilities for the Requests hooks system.
8 |
9 | Available hooks:
10 |
11 | ``response``:
12 | The response generated from a Request.
13 | """
14 | HOOKS = ['response']
15 |
16 |
17 | def default_hooks():
18 | return dict((event, []) for event in HOOKS)
19 |
20 | # TODO: response is the only one
21 |
22 |
23 | def dispatch_hook(key, hooks, hook_data, **kwargs):
24 | """Dispatches a hook dictionary on a given piece of data."""
25 | hooks = hooks or dict()
26 | hooks = hooks.get(key)
27 | if hooks:
28 | if hasattr(hooks, '__call__'):
29 | hooks = [hooks]
30 | for hook in hooks:
31 | _hook_data = hook(hook_data, **kwargs)
32 | if _hook_data is not None:
33 | hook_data = _hook_data
34 | return hook_data
35 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/README.rst:
--------------------------------------------------------------------------------
1 | If you are planning to submit a pull request to requests with any changes in
2 | this library do not go any further. These are independent libraries which we
3 | vendor into requests. Any changes necessary to these libraries must be made in
4 | them and submitted as separate pull requests to those libraries.
5 |
6 | urllib3 pull requests go here: https://github.com/shazow/urllib3
7 |
8 | chardet pull requests go here: https://github.com/chardet/chardet
9 |
10 | See https://github.com/kennethreitz/requests/pull/1812#issuecomment-30854316
11 | for the reasoning behind this.
12 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Debian and other distributions "unbundle" requests' vendored dependencies, and
3 | rewrite all imports to use the global versions of ``urllib3`` and ``chardet``.
4 | The problem with this is that not only requests itself imports those
5 | dependencies, but third-party code outside of the distros' control too.
6 |
7 | In reaction to these problems, the distro maintainers replaced
8 | ``requests.packages`` with a magical "stub module" that imports the correct
9 | modules. The implementations were varying in quality and all had severe
10 | problems. For example, a symlink (or hardlink) that links the correct modules
11 | into place introduces problems regarding object identity, since you now have
12 | two modules in `sys.modules` with the same API, but different identities::
13 |
14 | requests.packages.urllib3 is not urllib3
15 |
16 | With version ``2.5.2``, requests started to maintain its own stub, so that
17 | distro-specific breakage would be reduced to a minimum, even though the whole
18 | issue is not requests' fault in the first place. See
19 | https://github.com/kennethreitz/requests/pull/2375 for the corresponding pull
20 | request.
21 | '''
22 |
23 | from __future__ import absolute_import
24 | import sys
25 |
26 | try:
27 | from . import urllib3
28 | except ImportError:
29 | import urllib3
30 | sys.modules['%s.urllib3' % __name__] = urllib3
31 |
32 | try:
33 | from . import chardet
34 | except ImportError:
35 | import chardet
36 | sys.modules['%s.chardet' % __name__] = chardet
37 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/__init__.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # This library is free software; you can redistribute it and/or
3 | # modify it under the terms of the GNU Lesser General Public
4 | # License as published by the Free Software Foundation; either
5 | # version 2.1 of the License, or (at your option) any later version.
6 | #
7 | # This library is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301 USA
16 | ######################### END LICENSE BLOCK #########################
17 |
18 | __version__ = "2.3.0"
19 | from sys import version_info
20 |
21 |
22 | def detect(aBuf):
23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
25 | raise ValueError('Expected a bytes object, not a unicode object')
26 |
27 | from . import universaldetector
28 | u = universaldetector.UniversalDetector()
29 | u.reset()
30 | u.feed(aBuf)
31 | u.close()
32 | return u.result
33 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/big5prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import Big5SMModel
32 |
33 |
34 | class Big5Prober(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(Big5SMModel)
38 | self._mDistributionAnalyzer = Big5DistributionAnalysis()
39 | self.reset()
40 |
41 | def get_charset_name(self):
42 | return "Big5"
43 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/chardetect.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Script which takes one or more file paths and reports on their detected
4 | encodings
5 |
6 | Example::
7 |
8 | % chardetect somefile someotherfile
9 | somefile: windows-1252 with confidence 0.5
10 | someotherfile: ascii with confidence 1.0
11 |
12 | If no paths are provided, it takes its input from stdin.
13 |
14 | """
15 |
16 | from __future__ import absolute_import, print_function, unicode_literals
17 |
18 | import argparse
19 | import sys
20 | from io import open
21 |
22 | from chardet import __version__
23 | from chardet.universaldetector import UniversalDetector
24 |
25 |
26 | def description_of(lines, name='stdin'):
27 | """
28 | Return a string describing the probable encoding of a file or
29 | list of strings.
30 |
31 | :param lines: The lines to get the encoding of.
32 | :type lines: Iterable of bytes
33 | :param name: Name of file or collection of lines
34 | :type name: str
35 | """
36 | u = UniversalDetector()
37 | for line in lines:
38 | u.feed(line)
39 | u.close()
40 | result = u.result
41 | if result['encoding']:
42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
43 | result['confidence'])
44 | else:
45 | return '{0}: no result'.format(name)
46 |
47 |
48 | def main(argv=None):
49 | '''
50 | Handles command line arguments and gets things started.
51 |
52 | :param argv: List of arguments, as if specified on the command-line.
53 | If None, ``sys.argv[1:]`` is used instead.
54 | :type argv: list of str
55 | '''
56 | # Get command line arguments
57 | parser = argparse.ArgumentParser(
58 | description="Takes one or more file paths and reports their detected \
59 | encodings",
60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
61 | conflict_handler='resolve')
62 | parser.add_argument('input',
63 | help='File whose encoding we would like to determine.',
64 | type=argparse.FileType('rb'), nargs='*',
65 | default=[sys.stdin])
66 | parser.add_argument('--version', action='version',
67 | version='%(prog)s {0}'.format(__version__))
68 | args = parser.parse_args(argv)
69 |
70 | for f in args.input:
71 | if f.isatty():
72 | print("You are running chardetect interactively. Press " +
73 | "CTRL-D twice at the start of a blank line to signal the " +
74 | "end of your input. If you want help, run chardetect " +
75 | "--help\n", file=sys.stderr)
76 | print(description_of(f, f.name))
77 |
78 |
79 | if __name__ == '__main__':
80 | main()
81 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | import sys
30 | from .charsetprober import CharSetProber
31 |
32 |
33 | class CharSetGroupProber(CharSetProber):
34 | def __init__(self):
35 | CharSetProber.__init__(self)
36 | self._mActiveNum = 0
37 | self._mProbers = []
38 | self._mBestGuessProber = None
39 |
40 | def reset(self):
41 | CharSetProber.reset(self)
42 | self._mActiveNum = 0
43 | for prober in self._mProbers:
44 | if prober:
45 | prober.reset()
46 | prober.active = True
47 | self._mActiveNum += 1
48 | self._mBestGuessProber = None
49 |
50 | def get_charset_name(self):
51 | if not self._mBestGuessProber:
52 | self.get_confidence()
53 | if not self._mBestGuessProber:
54 | return None
55 | # self._mBestGuessProber = self._mProbers[0]
56 | return self._mBestGuessProber.get_charset_name()
57 |
58 | def feed(self, aBuf):
59 | for prober in self._mProbers:
60 | if not prober:
61 | continue
62 | if not prober.active:
63 | continue
64 | st = prober.feed(aBuf)
65 | if not st:
66 | continue
67 | if st == constants.eFoundIt:
68 | self._mBestGuessProber = prober
69 | return self.get_state()
70 | elif st == constants.eNotMe:
71 | prober.active = False
72 | self._mActiveNum -= 1
73 | if self._mActiveNum <= 0:
74 | self._mState = constants.eNotMe
75 | return self.get_state()
76 | return self.get_state()
77 |
78 | def get_confidence(self):
79 | st = self.get_state()
80 | if st == constants.eFoundIt:
81 | return 0.99
82 | elif st == constants.eNotMe:
83 | return 0.01
84 | bestConf = 0.0
85 | self._mBestGuessProber = None
86 | for prober in self._mProbers:
87 | if not prober:
88 | continue
89 | if not prober.active:
90 | if constants._debug:
91 | sys.stderr.write(prober.get_charset_name()
92 | + ' not active\n')
93 | continue
94 | cf = prober.get_confidence()
95 | if constants._debug:
96 | sys.stderr.write('%s confidence = %s\n' %
97 | (prober.get_charset_name(), cf))
98 | if bestConf < cf:
99 | bestConf = cf
100 | self._mBestGuessProber = prober
101 | if not self._mBestGuessProber:
102 | return 0.0
103 | return bestConf
104 | # else:
105 | # self._mBestGuessProber = self._mProbers[0]
106 | # return self._mBestGuessProber.get_confidence()
107 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/charsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from . import constants
30 | import re
31 |
32 |
33 | class CharSetProber:
34 | def __init__(self):
35 | pass
36 |
37 | def reset(self):
38 | self._mState = constants.eDetecting
39 |
40 | def get_charset_name(self):
41 | return None
42 |
43 | def feed(self, aBuf):
44 | pass
45 |
46 | def get_state(self):
47 | return self._mState
48 |
49 | def get_confidence(self):
50 | return 0.0
51 |
52 | def filter_high_bit_only(self, aBuf):
53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54 | return aBuf
55 |
56 | def filter_without_english_letters(self, aBuf):
57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58 | return aBuf
59 |
60 | def filter_with_english_letters(self, aBuf):
61 | # TODO
62 | return aBuf
63 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .constants import eStart
29 | from .compat import wrap_ord
30 |
31 |
32 | class CodingStateMachine:
33 | def __init__(self, sm):
34 | self._mModel = sm
35 | self._mCurrentBytePos = 0
36 | self._mCurrentCharLen = 0
37 | self.reset()
38 |
39 | def reset(self):
40 | self._mCurrentState = eStart
41 |
42 | def next_state(self, c):
43 | # for each byte we get its class
44 | # if it is first byte, we also get byte length
45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte
46 | byteCls = self._mModel['classTable'][wrap_ord(c)]
47 | if self._mCurrentState == eStart:
48 | self._mCurrentBytePos = 0
49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
50 | # from byte's class and stateTable, we get its next state
51 | curr_state = (self._mCurrentState * self._mModel['classFactor']
52 | + byteCls)
53 | self._mCurrentState = self._mModel['stateTable'][curr_state]
54 | self._mCurrentBytePos += 1
55 | return self._mCurrentState
56 |
57 | def get_current_charlen(self):
58 | return self._mCurrentCharLen
59 |
60 | def get_coding_state_machine(self):
61 | return self._mModel['name']
62 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/compat.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # Contributor(s):
3 | # Ian Cordasco - port to Python
4 | #
5 | # This library is free software; you can redistribute it and/or
6 | # modify it under the terms of the GNU Lesser General Public
7 | # License as published by the Free Software Foundation; either
8 | # version 2.1 of the License, or (at your option) any later version.
9 | #
10 | # This library is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | # Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public
16 | # License along with this library; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 | # 02110-1301 USA
19 | ######################### END LICENSE BLOCK #########################
20 |
21 | import sys
22 |
23 |
24 | if sys.version_info < (3, 0):
25 | base_str = (str, unicode)
26 | else:
27 | base_str = (bytes, str)
28 |
29 |
30 | def wrap_ord(a):
31 | if sys.version_info < (3, 0) and isinstance(a, base_str):
32 | return ord(a)
33 | else:
34 | return a
35 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/constants.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | _debug = 0
30 |
31 | eDetecting = 0
32 | eFoundIt = 1
33 | eNotMe = 2
34 |
35 | eStart = 0
36 | eError = 1
37 | eItsMe = 2
38 |
39 | SHORTCUT_THRESHOLD = 0.95
40 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/cp949prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import CP949SMModel
32 |
33 |
34 | class CP949Prober(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(CP949SMModel)
38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 | # not different.
40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
41 | self.reset()
42 |
43 | def get_charset_name(self):
44 | return "CP949"
45 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/escprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
30 | ISO2022KRSMModel)
31 | from .charsetprober import CharSetProber
32 | from .codingstatemachine import CodingStateMachine
33 | from .compat import wrap_ord
34 |
35 |
36 | class EscCharSetProber(CharSetProber):
37 | def __init__(self):
38 | CharSetProber.__init__(self)
39 | self._mCodingSM = [
40 | CodingStateMachine(HZSMModel),
41 | CodingStateMachine(ISO2022CNSMModel),
42 | CodingStateMachine(ISO2022JPSMModel),
43 | CodingStateMachine(ISO2022KRSMModel)
44 | ]
45 | self.reset()
46 |
47 | def reset(self):
48 | CharSetProber.reset(self)
49 | for codingSM in self._mCodingSM:
50 | if not codingSM:
51 | continue
52 | codingSM.active = True
53 | codingSM.reset()
54 | self._mActiveSM = len(self._mCodingSM)
55 | self._mDetectedCharset = None
56 |
57 | def get_charset_name(self):
58 | return self._mDetectedCharset
59 |
60 | def get_confidence(self):
61 | if self._mDetectedCharset:
62 | return 0.99
63 | else:
64 | return 0.00
65 |
66 | def feed(self, aBuf):
67 | for c in aBuf:
68 | # PY3K: aBuf is a byte array, so c is an int, not a byte
69 | for codingSM in self._mCodingSM:
70 | if not codingSM:
71 | continue
72 | if not codingSM.active:
73 | continue
74 | codingState = codingSM.next_state(wrap_ord(c))
75 | if codingState == constants.eError:
76 | codingSM.active = False
77 | self._mActiveSM -= 1
78 | if self._mActiveSM <= 0:
79 | self._mState = constants.eNotMe
80 | return self.get_state()
81 | elif codingState == constants.eItsMe:
82 | self._mState = constants.eFoundIt
83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
84 | return self.get_state()
85 |
86 | return self.get_state()
87 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | import sys
29 | from . import constants
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .codingstatemachine import CodingStateMachine
32 | from .chardistribution import EUCJPDistributionAnalysis
33 | from .jpcntx import EUCJPContextAnalysis
34 | from .mbcssm import EUCJPSMModel
35 |
36 |
37 | class EUCJPProber(MultiByteCharSetProber):
38 | def __init__(self):
39 | MultiByteCharSetProber.__init__(self)
40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel)
41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
42 | self._mContextAnalyzer = EUCJPContextAnalysis()
43 | self.reset()
44 |
45 | def reset(self):
46 | MultiByteCharSetProber.reset(self)
47 | self._mContextAnalyzer.reset()
48 |
49 | def get_charset_name(self):
50 | return "EUC-JP"
51 |
52 | def feed(self, aBuf):
53 | aLen = len(aBuf)
54 | for i in range(0, aLen):
55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
56 | codingState = self._mCodingSM.next_state(aBuf[i])
57 | if codingState == constants.eError:
58 | if constants._debug:
59 | sys.stderr.write(self.get_charset_name()
60 | + ' prober hit error at byte ' + str(i)
61 | + '\n')
62 | self._mState = constants.eNotMe
63 | break
64 | elif codingState == constants.eItsMe:
65 | self._mState = constants.eFoundIt
66 | break
67 | elif codingState == constants.eStart:
68 | charLen = self._mCodingSM.get_current_charlen()
69 | if i == 0:
70 | self._mLastChar[1] = aBuf[0]
71 | self._mContextAnalyzer.feed(self._mLastChar, charLen)
72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 | else:
74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
76 | charLen)
77 |
78 | self._mLastChar[0] = aBuf[aLen - 1]
79 |
80 | if self.get_state() == constants.eDetecting:
81 | if (self._mContextAnalyzer.got_enough_data() and
82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
83 | self._mState = constants.eFoundIt
84 |
85 | return self.get_state()
86 |
87 | def get_confidence(self):
88 | contxtCf = self._mContextAnalyzer.get_confidence()
89 | distribCf = self._mDistributionAnalyzer.get_confidence()
90 | return max(contxtCf, distribCf)
91 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/euckrprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKRSMModel
32 |
33 |
34 | class EUCKRProber(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel)
38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
39 | self.reset()
40 |
41 | def get_charset_name(self):
42 | return "EUC-KR"
43 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/euctwprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTWSMModel
32 |
33 | class EUCTWProber(MultiByteCharSetProber):
34 | def __init__(self):
35 | MultiByteCharSetProber.__init__(self)
36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel)
37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
38 | self.reset()
39 |
40 | def get_charset_name(self):
41 | return "EUC-TW"
42 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312SMModel
32 |
33 | class GB2312Prober(MultiByteCharSetProber):
34 | def __init__(self):
35 | MultiByteCharSetProber.__init__(self)
36 | self._mCodingSM = CodingStateMachine(GB2312SMModel)
37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis()
38 | self.reset()
39 |
40 | def get_charset_name(self):
41 | return "GB2312"
42 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | # Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301 USA
28 | ######################### END LICENSE BLOCK #########################
29 |
30 | import sys
31 | from . import constants
32 | from .charsetprober import CharSetProber
33 |
34 |
35 | class MultiByteCharSetProber(CharSetProber):
36 | def __init__(self):
37 | CharSetProber.__init__(self)
38 | self._mDistributionAnalyzer = None
39 | self._mCodingSM = None
40 | self._mLastChar = [0, 0]
41 |
42 | def reset(self):
43 | CharSetProber.reset(self)
44 | if self._mCodingSM:
45 | self._mCodingSM.reset()
46 | if self._mDistributionAnalyzer:
47 | self._mDistributionAnalyzer.reset()
48 | self._mLastChar = [0, 0]
49 |
50 | def get_charset_name(self):
51 | pass
52 |
53 | def feed(self, aBuf):
54 | aLen = len(aBuf)
55 | for i in range(0, aLen):
56 | codingState = self._mCodingSM.next_state(aBuf[i])
57 | if codingState == constants.eError:
58 | if constants._debug:
59 | sys.stderr.write(self.get_charset_name()
60 | + ' prober hit error at byte ' + str(i)
61 | + '\n')
62 | self._mState = constants.eNotMe
63 | break
64 | elif codingState == constants.eItsMe:
65 | self._mState = constants.eFoundIt
66 | break
67 | elif codingState == constants.eStart:
68 | charLen = self._mCodingSM.get_current_charlen()
69 | if i == 0:
70 | self._mLastChar[1] = aBuf[0]
71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
72 | else:
73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
74 | charLen)
75 |
76 | self._mLastChar[0] = aBuf[aLen - 1]
77 |
78 | if self.get_state() == constants.eDetecting:
79 | if (self._mDistributionAnalyzer.got_enough_data() and
80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
81 | self._mState = constants.eFoundIt
82 |
83 | return self.get_state()
84 |
85 | def get_confidence(self):
86 | return self._mDistributionAnalyzer.get_confidence()
87 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | # Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301 USA
28 | ######################### END LICENSE BLOCK #########################
29 |
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 |
40 |
41 | class MBCSGroupProber(CharSetGroupProber):
42 | def __init__(self):
43 | CharSetGroupProber.__init__(self)
44 | self._mProbers = [
45 | UTF8Prober(),
46 | SJISProber(),
47 | EUCJPProber(),
48 | GB2312Prober(),
49 | EUCKRProber(),
50 | CP949Prober(),
51 | Big5Prober(),
52 | EUCTWProber()
53 | ]
54 | self.reset()
55 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 | Latin5CyrillicModel, MacCyrillicModel,
33 | Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 |
41 |
42 | class SBCSGroupProber(CharSetGroupProber):
43 | def __init__(self):
44 | CharSetGroupProber.__init__(self)
45 | self._mProbers = [
46 | SingleByteCharSetProber(Win1251CyrillicModel),
47 | SingleByteCharSetProber(Koi8rModel),
48 | SingleByteCharSetProber(Latin5CyrillicModel),
49 | SingleByteCharSetProber(MacCyrillicModel),
50 | SingleByteCharSetProber(Ibm866Model),
51 | SingleByteCharSetProber(Ibm855Model),
52 | SingleByteCharSetProber(Latin7GreekModel),
53 | SingleByteCharSetProber(Win1253GreekModel),
54 | SingleByteCharSetProber(Latin5BulgarianModel),
55 | SingleByteCharSetProber(Win1251BulgarianModel),
56 | SingleByteCharSetProber(Latin2HungarianModel),
57 | SingleByteCharSetProber(Win1250HungarianModel),
58 | SingleByteCharSetProber(TIS620ThaiModel),
59 | ]
60 | hebrewProber = HebrewProber()
61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
62 | False, hebrewProber)
63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
64 | hebrewProber)
65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
66 | self._mProbers.extend([hebrewProber, logicalHebrewProber,
67 | visualHebrewProber])
68 |
69 | self.reset()
70 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/sjisprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | import sys
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import SJISDistributionAnalysis
32 | from .jpcntx import SJISContextAnalysis
33 | from .mbcssm import SJISSMModel
34 | from . import constants
35 |
36 |
37 | class SJISProber(MultiByteCharSetProber):
38 | def __init__(self):
39 | MultiByteCharSetProber.__init__(self)
40 | self._mCodingSM = CodingStateMachine(SJISSMModel)
41 | self._mDistributionAnalyzer = SJISDistributionAnalysis()
42 | self._mContextAnalyzer = SJISContextAnalysis()
43 | self.reset()
44 |
45 | def reset(self):
46 | MultiByteCharSetProber.reset(self)
47 | self._mContextAnalyzer.reset()
48 |
49 | def get_charset_name(self):
50 | return self._mContextAnalyzer.get_charset_name()
51 |
52 | def feed(self, aBuf):
53 | aLen = len(aBuf)
54 | for i in range(0, aLen):
55 | codingState = self._mCodingSM.next_state(aBuf[i])
56 | if codingState == constants.eError:
57 | if constants._debug:
58 | sys.stderr.write(self.get_charset_name()
59 | + ' prober hit error at byte ' + str(i)
60 | + '\n')
61 | self._mState = constants.eNotMe
62 | break
63 | elif codingState == constants.eItsMe:
64 | self._mState = constants.eFoundIt
65 | break
66 | elif codingState == constants.eStart:
67 | charLen = self._mCodingSM.get_current_charlen()
68 | if i == 0:
69 | self._mLastChar[1] = aBuf[0]
70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
71 | charLen)
72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 | else:
74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
75 | - charLen], charLen)
76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
77 | charLen)
78 |
79 | self._mLastChar[0] = aBuf[aLen - 1]
80 |
81 | if self.get_state() == constants.eDetecting:
82 | if (self._mContextAnalyzer.got_enough_data() and
83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
84 | self._mState = constants.eFoundIt
85 |
86 | return self.get_state()
87 |
88 | def get_confidence(self):
89 | contxtCf = self._mContextAnalyzer.get_confidence()
90 | distribCf = self._mDistributionAnalyzer.get_confidence()
91 | return max(contxtCf, distribCf)
92 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/utf8prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | from .charsetprober import CharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8SMModel
32 |
33 | ONE_CHAR_PROB = 0.5
34 |
35 |
36 | class UTF8Prober(CharSetProber):
37 | def __init__(self):
38 | CharSetProber.__init__(self)
39 | self._mCodingSM = CodingStateMachine(UTF8SMModel)
40 | self.reset()
41 |
42 | def reset(self):
43 | CharSetProber.reset(self)
44 | self._mCodingSM.reset()
45 | self._mNumOfMBChar = 0
46 |
47 | def get_charset_name(self):
48 | return "utf-8"
49 |
50 | def feed(self, aBuf):
51 | for c in aBuf:
52 | codingState = self._mCodingSM.next_state(c)
53 | if codingState == constants.eError:
54 | self._mState = constants.eNotMe
55 | break
56 | elif codingState == constants.eItsMe:
57 | self._mState = constants.eFoundIt
58 | break
59 | elif codingState == constants.eStart:
60 | if self._mCodingSM.get_current_charlen() >= 2:
61 | self._mNumOfMBChar += 1
62 |
63 | if self.get_state() == constants.eDetecting:
64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
65 | self._mState = constants.eFoundIt
66 |
67 | return self.get_state()
68 |
69 | def get_confidence(self):
70 | unlike = 0.99
71 | if self._mNumOfMBChar < 6:
72 | for i in range(0, self._mNumOfMBChar):
73 | unlike = unlike * ONE_CHAR_PROB
74 | return 1.0 - unlike
75 | else:
76 | return unlike
77 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | urllib3 - Thread-safe connection pooling and re-using.
3 | """
4 |
5 | from __future__ import absolute_import
6 | import warnings
7 |
8 | from .connectionpool import (
9 | HTTPConnectionPool,
10 | HTTPSConnectionPool,
11 | connection_from_url
12 | )
13 |
14 | from . import exceptions
15 | from .filepost import encode_multipart_formdata
16 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
17 | from .response import HTTPResponse
18 | from .util.request import make_headers
19 | from .util.url import get_host
20 | from .util.timeout import Timeout
21 | from .util.retry import Retry
22 |
23 |
24 | # Set default logging handler to avoid "No handler found" warnings.
25 | import logging
26 | try: # Python 2.7+
27 | from logging import NullHandler
28 | except ImportError:
29 | class NullHandler(logging.Handler):
30 | def emit(self, record):
31 | pass
32 |
33 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
34 | __license__ = 'MIT'
35 | __version__ = '1.16'
36 |
37 | __all__ = (
38 | 'HTTPConnectionPool',
39 | 'HTTPSConnectionPool',
40 | 'PoolManager',
41 | 'ProxyManager',
42 | 'HTTPResponse',
43 | 'Retry',
44 | 'Timeout',
45 | 'add_stderr_logger',
46 | 'connection_from_url',
47 | 'disable_warnings',
48 | 'encode_multipart_formdata',
49 | 'get_host',
50 | 'make_headers',
51 | 'proxy_from_url',
52 | )
53 |
54 | logging.getLogger(__name__).addHandler(NullHandler())
55 |
56 |
57 | def add_stderr_logger(level=logging.DEBUG):
58 | """
59 | Helper for quickly adding a StreamHandler to the logger. Useful for
60 | debugging.
61 |
62 | Returns the handler after adding it.
63 | """
64 | # This method needs to be in this __init__.py to get the __name__ correct
65 | # even if urllib3 is vendored within another package.
66 | logger = logging.getLogger(__name__)
67 | handler = logging.StreamHandler()
68 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
69 | logger.addHandler(handler)
70 | logger.setLevel(level)
71 | logger.debug('Added a stderr logging handler to logger: %s', __name__)
72 | return handler
73 |
74 | # ... Clean up.
75 | del NullHandler
76 |
77 |
78 | # All warning filters *must* be appended unless you're really certain that they
79 | # shouldn't be: otherwise, it's very hard for users to use most Python
80 | # mechanisms to silence them.
81 | # SecurityWarning's always go off by default.
82 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
83 | # SubjectAltNameWarning's should go off once per host
84 | warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True)
85 | # InsecurePlatformWarning's don't vary between requests, so we keep it default.
86 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
87 | append=True)
88 | # SNIMissingWarnings should go off only once.
89 | warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True)
90 |
91 |
92 | def disable_warnings(category=exceptions.HTTPWarning):
93 | """
94 | Helper for quickly disabling all urllib3 warnings.
95 | """
96 | warnings.simplefilter('ignore', category)
97 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/lib/requests/packages/urllib3/contrib/__init__.py
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/filepost.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import codecs
3 |
4 | from uuid import uuid4
5 | from io import BytesIO
6 |
7 | from .packages import six
8 | from .packages.six import b
9 | from .fields import RequestField
10 |
11 | writer = codecs.lookup('utf-8')[3]
12 |
13 |
14 | def choose_boundary():
15 | """
16 | Our embarassingly-simple replacement for mimetools.choose_boundary.
17 | """
18 | return uuid4().hex
19 |
20 |
21 | def iter_field_objects(fields):
22 | """
23 | Iterate over fields.
24 |
25 | Supports list of (k, v) tuples and dicts, and lists of
26 | :class:`~urllib3.fields.RequestField`.
27 |
28 | """
29 | if isinstance(fields, dict):
30 | i = six.iteritems(fields)
31 | else:
32 | i = iter(fields)
33 |
34 | for field in i:
35 | if isinstance(field, RequestField):
36 | yield field
37 | else:
38 | yield RequestField.from_tuples(*field)
39 |
40 |
41 | def iter_fields(fields):
42 | """
43 | .. deprecated:: 1.6
44 |
45 | Iterate over fields.
46 |
47 | The addition of :class:`~urllib3.fields.RequestField` makes this function
48 | obsolete. Instead, use :func:`iter_field_objects`, which returns
49 | :class:`~urllib3.fields.RequestField` objects.
50 |
51 | Supports list of (k, v) tuples and dicts.
52 | """
53 | if isinstance(fields, dict):
54 | return ((k, v) for k, v in six.iteritems(fields))
55 |
56 | return ((k, v) for k, v in fields)
57 |
58 |
59 | def encode_multipart_formdata(fields, boundary=None):
60 | """
61 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
62 |
63 | :param fields:
64 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
65 |
66 | :param boundary:
67 | If not specified, then a random boundary will be generated using
68 | :func:`mimetools.choose_boundary`.
69 | """
70 | body = BytesIO()
71 | if boundary is None:
72 | boundary = choose_boundary()
73 |
74 | for field in iter_field_objects(fields):
75 | body.write(b('--%s\r\n' % (boundary)))
76 |
77 | writer(body).write(field.render_headers())
78 | data = field.data
79 |
80 | if isinstance(data, int):
81 | data = str(data) # Backwards compatibility
82 |
83 | if isinstance(data, six.text_type):
84 | writer(body).write(data)
85 | else:
86 | body.write(data)
87 |
88 | body.write(b'\r\n')
89 |
90 | body.write(b('--%s--\r\n' % (boundary)))
91 |
92 | content_type = str('multipart/form-data; boundary=%s' % boundary)
93 |
94 | return body.getvalue(), content_type
95 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from . import ssl_match_hostname
4 |
5 | __all__ = ('ssl_match_hostname', )
6 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/backports/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/lib/requests/packages/urllib3/packages/backports/__init__.py
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/backports/makefile.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | backports.makefile
4 | ~~~~~~~~~~~~~~~~~~
5 |
6 | Backports the Python 3 ``socket.makefile`` method for use with anything that
7 | wants to create a "fake" socket object.
8 | """
9 | import io
10 |
11 | from socket import SocketIO
12 |
13 |
14 | def backport_makefile(self, mode="r", buffering=None, encoding=None,
15 | errors=None, newline=None):
16 | """
17 | Backport of ``socket.makefile`` from Python 3.5.
18 | """
19 | if not set(mode) <= set(["r", "w", "b"]):
20 | raise ValueError(
21 | "invalid mode %r (only r, w, b allowed)" % (mode,)
22 | )
23 | writing = "w" in mode
24 | reading = "r" in mode or not writing
25 | assert reading or writing
26 | binary = "b" in mode
27 | rawmode = ""
28 | if reading:
29 | rawmode += "r"
30 | if writing:
31 | rawmode += "w"
32 | raw = SocketIO(self, rawmode)
33 | self._makefile_refs += 1
34 | if buffering is None:
35 | buffering = -1
36 | if buffering < 0:
37 | buffering = io.DEFAULT_BUFFER_SIZE
38 | if buffering == 0:
39 | if not binary:
40 | raise ValueError("unbuffered streams must be binary")
41 | return raw
42 | if reading and writing:
43 | buffer = io.BufferedRWPair(raw, raw, buffering)
44 | elif reading:
45 | buffer = io.BufferedReader(raw, buffering)
46 | else:
47 | assert writing
48 | buffer = io.BufferedWriter(raw, buffering)
49 | if binary:
50 | return buffer
51 | text = io.TextIOWrapper(buffer, encoding, errors, newline)
52 | text.mode = mode
53 | return text
54 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/ssl_match_hostname/.gitignore:
--------------------------------------------------------------------------------
1 | env
2 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | # Python 3.2+
3 | from ssl import CertificateError, match_hostname
4 | except ImportError:
5 | try:
6 | # Backport of the function from a pypi module
7 | from backports.ssl_match_hostname import CertificateError, match_hostname
8 | except ImportError:
9 | # Our vendored copy
10 | from ._implementation import CertificateError, match_hostname
11 |
12 | # Not needed, but documenting what we provide.
13 | __all__ = ('CertificateError', 'match_hostname')
14 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
2 |
3 | # Note: This file is under the PSF license as the code comes from the python
4 | # stdlib. http://docs.python.org/3/license.html
5 |
6 | import re
7 |
8 | __version__ = '3.4.0.2'
9 |
10 | class CertificateError(ValueError):
11 | pass
12 |
13 |
14 | def _dnsname_match(dn, hostname, max_wildcards=1):
15 | """Matching according to RFC 6125, section 6.4.3
16 |
17 | http://tools.ietf.org/html/rfc6125#section-6.4.3
18 | """
19 | pats = []
20 | if not dn:
21 | return False
22 |
23 | # Ported from python3-syntax:
24 | # leftmost, *remainder = dn.split(r'.')
25 | parts = dn.split(r'.')
26 | leftmost = parts[0]
27 | remainder = parts[1:]
28 |
29 | wildcards = leftmost.count('*')
30 | if wildcards > max_wildcards:
31 | # Issue #17980: avoid denials of service by refusing more
32 | # than one wildcard per fragment. A survey of established
33 | # policy among SSL implementations showed it to be a
34 | # reasonable choice.
35 | raise CertificateError(
36 | "too many wildcards in certificate DNS name: " + repr(dn))
37 |
38 | # speed up common case w/o wildcards
39 | if not wildcards:
40 | return dn.lower() == hostname.lower()
41 |
42 | # RFC 6125, section 6.4.3, subitem 1.
43 | # The client SHOULD NOT attempt to match a presented identifier in which
44 | # the wildcard character comprises a label other than the left-most label.
45 | if leftmost == '*':
46 | # When '*' is a fragment by itself, it matches a non-empty dotless
47 | # fragment.
48 | pats.append('[^.]+')
49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
50 | # RFC 6125, section 6.4.3, subitem 3.
51 | # The client SHOULD NOT attempt to match a presented identifier
52 | # where the wildcard character is embedded within an A-label or
53 | # U-label of an internationalized domain name.
54 | pats.append(re.escape(leftmost))
55 | else:
56 | # Otherwise, '*' matches any dotless string, e.g. www*
57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
58 |
59 | # add the remaining fragments, ignore any wildcards
60 | for frag in remainder:
61 | pats.append(re.escape(frag))
62 |
63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
64 | return pat.match(hostname)
65 |
66 |
67 | def match_hostname(cert, hostname):
68 | """Verify that *cert* (in decoded format as returned by
69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
70 | rules are followed, but IP addresses are not accepted for *hostname*.
71 |
72 | CertificateError is raised on failure. On success, the function
73 | returns nothing.
74 | """
75 | if not cert:
76 | raise ValueError("empty or no certificate")
77 | dnsnames = []
78 | san = cert.get('subjectAltName', ())
79 | for key, value in san:
80 | if key == 'DNS':
81 | if _dnsname_match(value, hostname):
82 | return
83 | dnsnames.append(value)
84 | if not dnsnames:
85 | # The subject is only checked when there is no dNSName entry
86 | # in subjectAltName
87 | for sub in cert.get('subject', ()):
88 | for key, value in sub:
89 | # XXX according to RFC 2818, the most specific Common Name
90 | # must be used.
91 | if key == 'commonName':
92 | if _dnsname_match(value, hostname):
93 | return
94 | dnsnames.append(value)
95 | if len(dnsnames) > 1:
96 | raise CertificateError("hostname %r "
97 | "doesn't match either of %s"
98 | % (hostname, ', '.join(map(repr, dnsnames))))
99 | elif len(dnsnames) == 1:
100 | raise CertificateError("hostname %r "
101 | "doesn't match %r"
102 | % (hostname, dnsnames[0]))
103 | else:
104 | raise CertificateError("no appropriate commonName or "
105 | "subjectAltName fields were found")
106 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | # For backwards compatibility, provide imports that used to be here.
3 | from .connection import is_connection_dropped
4 | from .request import make_headers
5 | from .response import is_fp_closed
6 | from .ssl_ import (
7 | SSLContext,
8 | HAS_SNI,
9 | IS_PYOPENSSL,
10 | assert_fingerprint,
11 | resolve_cert_reqs,
12 | resolve_ssl_version,
13 | ssl_wrap_socket,
14 | )
15 | from .timeout import (
16 | current_time,
17 | Timeout,
18 | )
19 |
20 | from .retry import Retry
21 | from .url import (
22 | get_host,
23 | parse_url,
24 | split_first,
25 | Url,
26 | )
27 |
28 | __all__ = (
29 | 'HAS_SNI',
30 | 'IS_PYOPENSSL',
31 | 'SSLContext',
32 | 'Retry',
33 | 'Timeout',
34 | 'Url',
35 | 'assert_fingerprint',
36 | 'current_time',
37 | 'is_connection_dropped',
38 | 'is_fp_closed',
39 | 'get_host',
40 | 'parse_url',
41 | 'make_headers',
42 | 'resolve_cert_reqs',
43 | 'resolve_ssl_version',
44 | 'split_first',
45 | 'ssl_wrap_socket',
46 | )
47 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/util/request.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from base64 import b64encode
3 |
4 | from ..packages.six import b
5 |
6 | ACCEPT_ENCODING = 'gzip,deflate'
7 |
8 |
9 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
10 | basic_auth=None, proxy_basic_auth=None, disable_cache=None):
11 | """
12 | Shortcuts for generating request headers.
13 |
14 | :param keep_alive:
15 | If ``True``, adds 'connection: keep-alive' header.
16 |
17 | :param accept_encoding:
18 | Can be a boolean, list, or string.
19 | ``True`` translates to 'gzip,deflate'.
20 | List will get joined by comma.
21 | String will be used as provided.
22 |
23 | :param user_agent:
24 | String representing the user-agent you want, such as
25 | "python-urllib3/0.6"
26 |
27 | :param basic_auth:
28 | Colon-separated username:password string for 'authorization: basic ...'
29 | auth header.
30 |
31 | :param proxy_basic_auth:
32 | Colon-separated username:password string for 'proxy-authorization: basic ...'
33 | auth header.
34 |
35 | :param disable_cache:
36 | If ``True``, adds 'cache-control: no-cache' header.
37 |
38 | Example::
39 |
40 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
41 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
42 | >>> make_headers(accept_encoding=True)
43 | {'accept-encoding': 'gzip,deflate'}
44 | """
45 | headers = {}
46 | if accept_encoding:
47 | if isinstance(accept_encoding, str):
48 | pass
49 | elif isinstance(accept_encoding, list):
50 | accept_encoding = ','.join(accept_encoding)
51 | else:
52 | accept_encoding = ACCEPT_ENCODING
53 | headers['accept-encoding'] = accept_encoding
54 |
55 | if user_agent:
56 | headers['user-agent'] = user_agent
57 |
58 | if keep_alive:
59 | headers['connection'] = 'keep-alive'
60 |
61 | if basic_auth:
62 | headers['authorization'] = 'Basic ' + \
63 | b64encode(b(basic_auth)).decode('utf-8')
64 |
65 | if proxy_basic_auth:
66 | headers['proxy-authorization'] = 'Basic ' + \
67 | b64encode(b(proxy_basic_auth)).decode('utf-8')
68 |
69 | if disable_cache:
70 | headers['cache-control'] = 'no-cache'
71 |
72 | return headers
73 |
--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/util/response.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from ..packages.six.moves import http_client as httplib
3 |
4 | from ..exceptions import HeaderParsingError
5 |
6 |
7 | def is_fp_closed(obj):
8 | """
9 | Checks whether a given file-like object is closed.
10 |
11 | :param obj:
12 | The file-like object to check.
13 | """
14 |
15 | try:
16 | # Check via the official file-like-object way.
17 | return obj.closed
18 | except AttributeError:
19 | pass
20 |
21 | try:
22 | # Check if the object is a container for another file-like object that
23 | # gets released on exhaustion (e.g. HTTPResponse).
24 | return obj.fp is None
25 | except AttributeError:
26 | pass
27 |
28 | raise ValueError("Unable to determine whether fp is closed.")
29 |
30 |
31 | def assert_header_parsing(headers):
32 | """
33 | Asserts whether all headers have been successfully parsed.
34 | Extracts encountered errors from the result of parsing headers.
35 |
36 | Only works on Python 3.
37 |
38 | :param headers: Headers to verify.
39 | :type headers: `httplib.HTTPMessage`.
40 |
41 | :raises urllib3.exceptions.HeaderParsingError:
42 | If parsing errors are found.
43 | """
44 |
45 | # This will fail silently if we pass in the wrong kind of parameter.
46 | # To make debugging easier add an explicit check.
47 | if not isinstance(headers, httplib.HTTPMessage):
48 | raise TypeError('expected httplib.Message, got {0}.'.format(
49 | type(headers)))
50 |
51 | defects = getattr(headers, 'defects', None)
52 | get_payload = getattr(headers, 'get_payload', None)
53 |
54 | unparsed_data = None
55 | if get_payload: # Platform-specific: Python 3.
56 | unparsed_data = get_payload()
57 |
58 | if defects or unparsed_data:
59 | raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
60 |
61 |
62 | def is_response_to_head(response):
63 | """
64 | Checks whether the request of a response has been a HEAD-request.
65 | Handles the quirks of AppEngine.
66 |
67 | :param conn:
68 | :type conn: :class:`httplib.HTTPResponse`
69 | """
70 | # FIXME: Can we do this somehow without accessing private httplib _method?
71 | method = response._method
72 | if isinstance(method, int): # Platform-specific: Appengine
73 | return method == 3
74 | return method.upper() == 'HEAD'
75 |
--------------------------------------------------------------------------------
/src/lib/requests/status_codes.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .structures import LookupDict
4 |
5 | _codes = {
6 |
7 | # Informational.
8 | 100: ('continue',),
9 | 101: ('switching_protocols',),
10 | 102: ('processing',),
11 | 103: ('checkpoint',),
12 | 122: ('uri_too_long', 'request_uri_too_long'),
13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
14 | 201: ('created',),
15 | 202: ('accepted',),
16 | 203: ('non_authoritative_info', 'non_authoritative_information'),
17 | 204: ('no_content',),
18 | 205: ('reset_content', 'reset'),
19 | 206: ('partial_content', 'partial'),
20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
21 | 208: ('already_reported',),
22 | 226: ('im_used',),
23 |
24 | # Redirection.
25 | 300: ('multiple_choices',),
26 | 301: ('moved_permanently', 'moved', '\\o-'),
27 | 302: ('found',),
28 | 303: ('see_other', 'other'),
29 | 304: ('not_modified',),
30 | 305: ('use_proxy',),
31 | 306: ('switch_proxy',),
32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'),
33 | 308: ('permanent_redirect',
34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
35 |
36 | # Client Error.
37 | 400: ('bad_request', 'bad'),
38 | 401: ('unauthorized',),
39 | 402: ('payment_required', 'payment'),
40 | 403: ('forbidden',),
41 | 404: ('not_found', '-o-'),
42 | 405: ('method_not_allowed', 'not_allowed'),
43 | 406: ('not_acceptable',),
44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
45 | 408: ('request_timeout', 'timeout'),
46 | 409: ('conflict',),
47 | 410: ('gone',),
48 | 411: ('length_required',),
49 | 412: ('precondition_failed', 'precondition'),
50 | 413: ('request_entity_too_large',),
51 | 414: ('request_uri_too_large',),
52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
54 | 417: ('expectation_failed',),
55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
56 | 421: ('misdirected_request',),
57 | 422: ('unprocessable_entity', 'unprocessable'),
58 | 423: ('locked',),
59 | 424: ('failed_dependency', 'dependency'),
60 | 425: ('unordered_collection', 'unordered'),
61 | 426: ('upgrade_required', 'upgrade'),
62 | 428: ('precondition_required', 'precondition'),
63 | 429: ('too_many_requests', 'too_many'),
64 | 431: ('header_fields_too_large', 'fields_too_large'),
65 | 444: ('no_response', 'none'),
66 | 449: ('retry_with', 'retry'),
67 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'),
68 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'),
69 | 499: ('client_closed_request',),
70 |
71 | # Server Error.
72 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'),
73 | 501: ('not_implemented',),
74 | 502: ('bad_gateway',),
75 | 503: ('service_unavailable', 'unavailable'),
76 | 504: ('gateway_timeout',),
77 | 505: ('http_version_not_supported', 'http_version'),
78 | 506: ('variant_also_negotiates',),
79 | 507: ('insufficient_storage',),
80 | 509: ('bandwidth_limit_exceeded', 'bandwidth'),
81 | 510: ('not_extended',),
82 | 511: ('network_authentication_required', 'network_auth', 'network_authentication'),
83 | }
84 |
85 | codes = LookupDict(name='status_codes')
86 |
87 | for code, titles in _codes.items():
88 | for title in titles:
89 | setattr(codes, title, code)
90 | if not title.startswith('\\'):
91 | setattr(codes, title.upper(), code)
92 |
--------------------------------------------------------------------------------
/src/lib/requests/structures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.structures
5 | ~~~~~~~~~~~~~~~~~~~
6 |
7 | Data structures that power Requests.
8 | """
9 |
10 | import collections
11 |
12 | from .compat import OrderedDict
13 |
14 |
15 | class CaseInsensitiveDict(collections.MutableMapping):
16 | """A case-insensitive ``dict``-like object.
17 |
18 | Implements all methods and operations of
19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also
20 | provides ``lower_items``.
21 |
22 | All keys are expected to be strings. The structure remembers the
23 | case of the last key to be set, and ``iter(instance)``,
24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
25 | will contain case-sensitive keys. However, querying and contains
26 | testing is case insensitive::
27 |
28 | cid = CaseInsensitiveDict()
29 | cid['Accept'] = 'application/json'
30 | cid['aCCEPT'] == 'application/json' # True
31 | list(cid) == ['Accept'] # True
32 |
33 | For example, ``headers['content-encoding']`` will return the
34 | value of a ``'Content-Encoding'`` response header, regardless
35 | of how the header name was originally stored.
36 |
37 | If the constructor, ``.update``, or equality comparison
38 | operations are given keys that have equal ``.lower()``s, the
39 | behavior is undefined.
40 | """
41 |
42 | def __init__(self, data=None, **kwargs):
43 | self._store = OrderedDict()
44 | if data is None:
45 | data = {}
46 | self.update(data, **kwargs)
47 |
48 | def __setitem__(self, key, value):
49 | # Use the lowercased key for lookups, but store the actual
50 | # key alongside the value.
51 | self._store[key.lower()] = (key, value)
52 |
53 | def __getitem__(self, key):
54 | return self._store[key.lower()][1]
55 |
56 | def __delitem__(self, key):
57 | del self._store[key.lower()]
58 |
59 | def __iter__(self):
60 | return (casedkey for casedkey, mappedvalue in self._store.values())
61 |
62 | def __len__(self):
63 | return len(self._store)
64 |
65 | def lower_items(self):
66 | """Like iteritems(), but with all lowercase keys."""
67 | return (
68 | (lowerkey, keyval[1])
69 | for (lowerkey, keyval)
70 | in self._store.items()
71 | )
72 |
73 | def __eq__(self, other):
74 | if isinstance(other, collections.Mapping):
75 | other = CaseInsensitiveDict(other)
76 | else:
77 | return NotImplemented
78 | # Compare insensitively
79 | return dict(self.lower_items()) == dict(other.lower_items())
80 |
81 | # Copy is required
82 | def copy(self):
83 | return CaseInsensitiveDict(self._store.values())
84 |
85 | def __repr__(self):
86 | return str(dict(self.items()))
87 |
88 |
89 | class LookupDict(dict):
90 | """Dictionary lookup object."""
91 |
92 | def __init__(self, name=None):
93 | self.name = name
94 | super(LookupDict, self).__init__()
95 |
96 | def __repr__(self):
97 | return '' % (self.name)
98 |
99 | def __getitem__(self, key):
100 | # We allow fall-through here, so values default to None
101 |
102 | return self.__dict__.get(key, None)
103 |
104 | def get(self, key, default=None):
105 | return self.__dict__.get(key, default)
106 |
--------------------------------------------------------------------------------
/src/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/src/tools/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import json
3 | import os
4 |
5 | from src.tools.path import Path
6 |
7 |
8 | class Config(object):
9 | u"""
10 | 用于储存、获取设置值、全局变量值
11 | """
12 | # 全局变量
13 | update_time = '2017-01-24' # 更新日期
14 |
15 | debug = False
16 | debug_for_create_book = False # 是否在测试电子书生成功能,在测试的话跳过网页抓取部分
17 | debug_for_thread = False # 是否在测试多线程功能,在测试的话改为单线程执行
18 |
19 | account = 'mengqingxue@yaozeyuan.online' # 默认账号密码, 2017年更新
20 | password = '912714398d' #
21 | remember_account = False # 是否使用已有密码
22 | max_thread = 10 # 最大线程数,其实设成5就行了,但下图片的时候还是得多开几个线程,所以还是设成10好了(反正冬天,CPU满了有利于室内保温 - -)
23 | picture_quality = 1 # 图片质量(0/1/2,无图/标清/原图)
24 | max_try = 5 # 下载图片时的最大尝试次数
25 | max_book_size_mb = 100 # 单个文件的最大大小(MB, 兆),超过这个数会自动分卷
26 | timeout_download_picture = 10 # 多给知乎服务器点时间,批量生成tex太痛苦了- -
27 | timeout_download_html = 5
28 |
29 | article_order_by = ' order by article_id asc ' # 文章排序顺序,默认:时间顺序正序
30 | answer_order_by = ' order by voteup_count desc ' # 答案排序顺序,默认:赞同数降序
31 | topic_or_collection_answer_order_by = ' ' # 话题/收藏夹中答案排序顺序,默认:按在话题/收藏夹中的顺序排列
32 |
33 |
34 | @staticmethod
35 | def init_config():
36 | Config.load()
37 | return
38 |
39 | @staticmethod
40 | def save():
41 | data = {}
42 | with open(Path.config_path, 'w') as f:
43 | for key in Config.__dict__:
44 | value = Config.__dict__[key]
45 | if '__' in key[:2]:
46 | # 内置属性直接跳过
47 | continue
48 | try:
49 | json.dumps(value)
50 | except TypeError:
51 | # 暴力判断是否可被序列化←_←
52 | pass
53 | else:
54 | data[key] = value
55 | json.dump(data, f, indent=4)
56 | return
57 |
58 | @staticmethod
59 | def load():
60 | if not os.path.isfile(Path.config_path):
61 | return
62 | with open(Path.config_path) as f:
63 | config = json.load(f)
64 | if not config.get('remember_account'):
65 | # 当选择不记住密码时,跳过读取,使用默认设置
66 | # 不考虑用户强行在配置文件中把account改成空的情况
67 | return
68 | for (key, value) in config.items():
69 | setattr(Config, key, value)
70 | return
71 |
--------------------------------------------------------------------------------
/src/tools/controler.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from multiprocessing.dummy import Pool as ThreadPool # 多线程并行库
3 |
4 | from src.tools.config import Config
5 |
6 |
7 | class Control(object):
8 | thread_pool = ThreadPool(Config.max_thread)
9 |
10 | @staticmethod
11 | def control_center(argv, test_flag):
12 | max_try = Config.max_try
13 | for time in range(max_try):
14 | if test_flag:
15 | if Config.debug_for_thread:
16 | Control.debug_control(argv)
17 | else:
18 | Control.release_control(argv)
19 | Control.thread_pool.map(**argv)
20 | return
21 |
22 | @staticmethod
23 | def debug_control(argv):
24 | for item in argv['iterable']:
25 | argv['func'](item)
26 | return
27 |
28 | @staticmethod
29 | def release_control(argv):
30 | try:
31 | Control.thread_pool.map(**argv)
32 | except Exception:
33 | # 按照惯例,报错全部pass掉
34 | # 等用户反馈了再开debug查吧
35 | pass
36 | return
37 |
--------------------------------------------------------------------------------
/src/tools/debug.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import logging
3 | import logging.handlers
4 | import sys
5 |
6 | from src.tools.config import Config
7 |
8 |
9 | class Debug(object):
10 | u"""
11 | 打印日志
12 | """
13 | logger = logging.getLogger('main') # 获取名为main的logger
14 | if Config.debug:
15 | logger.setLevel(logging.DEBUG) # debug模式
16 | else:
17 | logger.setLevel(logging.INFO) # 发布时关闭log输出
18 |
19 | # 辅助函数
20 | @staticmethod
21 | def print_in_single_line(text=''):
22 | try:
23 | sys.stdout.write("\r" + " " * 60 + '\r')
24 | sys.stdout.flush()
25 | sys.stdout.write(text)
26 | sys.stdout.flush()
27 | except:
28 | pass
29 | return
30 |
31 | @staticmethod
32 | def print_dict(data={}, key='', prefix=''):
33 | try:
34 | if isinstance(data, dict):
35 | for key in data:
36 | Debug.print_dict(data[key], key, prefix + ' ')
37 | else:
38 | if isinstance(data, basestring):
39 | print prefix + unicode(key) + ' => ' + data
40 | else:
41 | print prefix + unicode(key) + ' => ' + unicode(data)
42 | except UnicodeEncodeError as error:
43 | Debug.logger.info(u'编码异常')
44 | Debug.logger.info(u'系统默认编码为:' + sys.getdefaultencoding())
45 | # raise error
46 | return
47 |
48 | @staticmethod
49 | def print_config():
50 | Debug.print_dict(Config.__dict__)
51 | return
52 |
--------------------------------------------------------------------------------
/src/tools/extra_tools.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import hashlib
3 | import time
4 | import datetime
5 |
6 |
7 | class ExtraTools(object):
8 | @staticmethod
9 | def format_date(date_format, timestamp):
10 | ltime = time.localtime(timestamp)
11 | return time.strftime(date_format, ltime)
12 |
13 | @staticmethod
14 | def get_time():
15 | return str(time.time()).split('.')[0]
16 |
17 | @staticmethod
18 | def get_friendly_time():
19 | return datetime.datetime.today().isoformat().split('.')[0].replace(':', ':')
20 |
21 | @staticmethod
22 | def get_today():
23 | return datetime.date.today().isoformat()
24 |
25 | @staticmethod
26 | def get_yesterday():
27 | today = datetime.date.today()
28 | one = datetime.timedelta(days=1)
29 | yesterday = today - one
30 | return yesterday.isoformat()
31 |
32 | @staticmethod
33 | def md5(content):
34 | encrypt = hashlib.md5()
35 | encrypt.update(str(content))
36 | return encrypt.hexdigest()
37 |
--------------------------------------------------------------------------------
/src/tools/path.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import shutil
4 | import locale
5 |
6 |
7 | class Path(object):
8 | # 初始地址,不含分隔符
9 | # 此时sys.stdout.encoding已被修改为utf-8,故改为使用locale.getpreferredencoding()获取默认编码
10 | base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
11 |
12 | config_path = base_path + u'/config.json'
13 | db_path = base_path + u'/zhihuDB_18.sqlite'
14 | sql_path = base_path + u'/db/zhihuhelp.sql'
15 |
16 | www_css = base_path + u'/www/css'
17 | www_image = base_path + u'/www/images'
18 |
19 | html_pool_path = base_path + u'/知乎电子书临时资源库/知乎网页池'
20 | image_pool_path = base_path + u'/知乎电子书临时资源库/知乎图片池'
21 | book_pool_path = base_path + u'/知乎电子书临时资源库/知乎电子书临时文件池'
22 | result_path = base_path + u'/知乎助手生成的电子书'
23 |
24 | @staticmethod
25 | def reset_path():
26 | Path.chdir(Path.base_path)
27 | return
28 |
29 | @staticmethod
30 | def pwd():
31 | print os.path.realpath('.')
32 | return
33 |
34 | @staticmethod
35 | def get_pwd():
36 | path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
37 | return path
38 |
39 | @staticmethod
40 | def mkdir(path):
41 | try:
42 | os.mkdir(path)
43 | except OSError:
44 | # Debug.logger.debug(u'指定目录已存在')
45 | pass
46 | return
47 |
48 | @staticmethod
49 | def chdir(path):
50 | try:
51 | os.chdir(path)
52 | except OSError:
53 | # Debug.logger.debug(u'指定目录不存在,自动创建之')
54 | Path.mkdir(path)
55 | os.chdir(path)
56 | return
57 |
58 | @staticmethod
59 | def rmdir(path):
60 | if path:
61 | shutil.rmtree(path, ignore_errors=True)
62 | return
63 |
64 | @staticmethod
65 | def copy(src, dst):
66 | if not os.path.exists(src):
67 | # Debug.logger.info('{}不存在,自动跳过'.format(src))
68 | return
69 | if os.path.isdir(src):
70 | shutil.copytree(src, dst)
71 | else:
72 | shutil.copy(src=src, dst=dst)
73 | return
74 |
75 | @staticmethod
76 | def get_filename(src):
77 | return os.path.basename(src)
78 |
79 | @staticmethod
80 | def init_base_path():
81 | Path.base_path = Path.get_pwd()
82 |
83 | Path.config_path = Path.base_path + u'/config.json'
84 | Path.db_path = Path.base_path + u'/zhihuDB_18.sqlite'
85 | Path.sql_path = Path.base_path + u'/db/zhihuhelp.sql'
86 |
87 | Path.www_css = Path.base_path + u'/www/css'
88 | Path.www_image = Path.base_path + u'/www/images'
89 |
90 | Path.html_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎网页池'
91 | Path.image_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎图片池'
92 | Path.book_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎电子书临时文件池'
93 | Path.result_path = Path.base_path + u'/知乎助手生成的电子书'
94 |
95 | return
96 |
97 | @staticmethod
98 | def init_work_directory():
99 | Path.reset_path()
100 | Path.mkdir(u'./知乎助手生成的电子书')
101 | Path.mkdir(u'./知乎电子书临时资源库')
102 | Path.chdir(u'./知乎电子书临时资源库')
103 | Path.mkdir(u'./知乎网页池')
104 | Path.mkdir(u'./知乎图片池')
105 | Path.mkdir(u'./知乎电子书临时文件池')
106 | Path.reset_path()
107 | return
108 |
109 | @staticmethod
110 | def is_file(path):
111 | return os.path.isfile(path)
112 |
113 | @staticmethod
114 | def get_img_size_by_filename_kb(filename):
115 | path = Path.image_pool_path + '/' + filename
116 | if Path.is_file(path) :
117 | return os.path.getsize(path) / 1024
118 | return 0
--------------------------------------------------------------------------------
/src/tools/template.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class Template(object):
4 | """
5 | 文件模版
6 | """
7 | # type : str
8 | # interface : title, body
9 | base = open('./www/template/base.html', 'r').read()
10 |
11 | # type : str
12 | book_info = base.format(
13 | **{
14 | 'title': '{title}',
15 | 'body': open('./www/template/info_page/book.html', 'r').read()
16 | }
17 | )
18 | # type : str
19 | question_info = base.format(
20 | **{
21 | 'title': '{title}',
22 | 'body': open('./www/template/info_page/question.html', 'r').read()
23 | }
24 | )
25 |
26 | # type : str
27 | author_info = base.format(
28 | **{
29 | 'title': '{title}',
30 | 'body': open('./www/template/info_page/author.html', 'r').read()
31 | }
32 | )
33 |
34 | # type : str
35 | topic_info = base.format(
36 | **{
37 | 'title': '{title}',
38 | 'body': open('./www/template/info_page/topic.html', 'r').read()
39 | }
40 | )
41 |
42 | # type : str
43 | collection_info = base.format(
44 | **{
45 | 'title': '{title}',
46 | 'body': open('./www/template/info_page/collection.html', 'r').read()
47 | }
48 | )
49 |
50 | # type : str
51 | column_info = base.format(
52 | **{
53 | 'title': '{title}',
54 | 'body': open('./www/template/info_page/column.html', 'r').read()
55 | }
56 | )
57 |
58 | # type : str
59 | article_info = base.format(
60 | **{
61 | 'title': '{title}',
62 | 'body': open('./www/template/info_page/article.html', 'r').read()
63 | }
64 | )
65 |
66 | # type : str
67 | question = base.format(
68 | **{
69 | 'title': '{title}',
70 | 'body': open('./www/template/content/question/question.html', 'r').read()
71 | }
72 | )
73 |
74 | # type : str
75 | answer = open('./www/template/content/question/answer.html', 'r').read()
--------------------------------------------------------------------------------
/src/tools/template_config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from src.tools.path import Path
3 |
4 |
5 | class TemplateConfig(object):
6 | template_path = Path.base_path + u'/www/template'
7 | content_template_path = template_path + u'/content'
8 | content_info_template_path = content_template_path + u'/info'
9 | content_question_template_path = content_template_path + u'/question'
10 | front_page_template_path = template_path + u'/front_page'
11 | front_page_info_template_path = front_page_template_path + u'/info'
12 |
13 | content_base_uri = template_path + u'/base.html'
14 |
15 | # content
16 | ##info
17 | info_author_uri = content_info_template_path + u'/author.html'
18 | info_comment_uri = content_info_template_path + u'/comment.html'
19 | info_title_uri = content_info_template_path + u'/title.html'
20 | ##question
21 | question_answer_uri = content_question_template_path + u'/answer.html'
22 | question_question_uri = content_question_template_path + u'/question.html'
23 |
24 | # front_page
25 | front_page_author_uri = front_page_info_template_path + u'/author.html'
26 | front_page_collection_uri = front_page_info_template_path + u'/collection.html'
27 | front_page_column_uri = front_page_info_template_path + u'/column.html'
28 | front_page_topic_uri = front_page_info_template_path + u'/topic.html'
29 | front_page_question_uri = front_page_info_template_path + u'/question.html'
30 | front_page_answer_uri = front_page_info_template_path + u'/answer.html'
31 | front_page_article_uri = front_page_info_template_path + u'/article.html'
32 |
33 | front_page_base_uri = front_page_template_path + u'/base.html'
34 |
--------------------------------------------------------------------------------
/src/tools/type.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from random import random
3 |
4 |
5 | class Type(object):
6 | # 未知类型
7 | unknown = 'unknown'
8 |
9 | answer = 'answer'
10 | question = 'question'
11 | topic = 'topic'
12 | collection = 'collection'
13 | author = 'author'
14 | column = 'column'
15 | article = 'article'
16 |
17 | pass
18 |
19 |
20 | class ImgQuality(object):
21 | raw = 2 # 原图
22 | big = 1 # 普通
23 | none = 0 # 无图
24 |
25 | @staticmethod
26 | def add_random_download_address_header_for_img_filename(file_uri):
27 | """
28 | 随机补充一个前缀作为图片下载地址
29 | :param file_uri:
30 | :return:
31 | """
32 | img_site_list = [
33 | 'https://pic1.zhimg.com/',
34 | 'https://pic2.zhimg.com/',
35 | 'https://pic3.zhimg.com/',
36 | 'https://pic4.zhimg.com/',
37 | ]
38 | url = img_site_list[0] + file_uri
39 | return url
40 |
--------------------------------------------------------------------------------
/unit/BS4/content.html:
--------------------------------------------------------------------------------
1 | 用于测试BS4解析结果
--------------------------------------------------------------------------------
/unit/BS4/parser.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 |
4 | from bs4 import BeautifulSoup
5 | from src.lib.zhihu_parser.tools.parser_tools import ParserTools
6 |
7 | reload(sys)
8 | sys.setdefaultencoding('utf8')
9 |
10 | # sys.setrecursionlimit(1000000) # 为了适应知乎上的长答案,需要专门设下递归深度限制。。。
11 | # 添加库路径
12 | currentPath = sys.path[0].replace('unit', '')
13 | sys.path.append(currentPath)
14 | sys.path.append(currentPath + r'src')
15 | sys.path.append(currentPath + r'src\tools')
16 | sys.path.append(currentPath + r'src\parser')
17 |
18 | content = open(u'./content.html').read()
19 |
20 | parser = BeautifulSoup(content, 'html.parser')
21 | tag_content = ParserTools.get_tag_content(parser)
22 | parser
23 |
--------------------------------------------------------------------------------
/unit/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
--------------------------------------------------------------------------------
/unit/addressFile/address_All:
--------------------------------------------------------------------------------
1 |
2 |
3 | #people
4 | http://www.zhihu.com/people/zhong-wen-71
5 | http://www.zhihu.com/people/Selerare
6 | http://www.zhihu.com/people/_Zen
7 | http://www.zhihu.com/people/mu-mu-55-53
8 | http://www.zhihu.com/people/tian-yu-bai
9 | http://www.zhihu.com/people/tian-yu-bai/followees
10 | http://www.zhihu.com/people/ying-ye-78
11 | #table
12 | http://www.zhihu.com/roundtable/superhero
13 | http://www.zhihu.com/roundtable/superhero/questions
14 | #topic
15 | http://www.zhihu.com/topic/19554151
16 | http://www.zhihu.com/topic/19551147
17 | http://www.zhihu.com/topic/19551147/top-answers
18 | http://www.zhihu.com/topic/19551147/questions
19 | http://www.zhihu.com/topic/19551147/organize
20 | http://www.zhihu.com/topic/19551147/manage
21 | http://www.zhihu.com/topic/19551147/log
22 | #article
23 | http://zhuanlan.zhihu.com/8hpencil/19929476
24 | #column
25 | http://zhuanlan.zhihu.com/8hpencil
26 | #collection
27 | http://www.zhihu.com/collection/32271511
28 | http://www.zhihu.com/collection/32271511/log
29 |
--------------------------------------------------------------------------------
/unit/addressFile/answer:
--------------------------------------------------------------------------------
1 | #answer
2 | http://www.zhihu.com/question/25420679/answer/30790550?utm_source=weibo&utm_medium=weibo_share&utm_content=share_answer&utm_campaign=share_button
3 |
--------------------------------------------------------------------------------
/unit/addressFile/article:
--------------------------------------------------------------------------------
1 | #article
2 | http://zhuanlan.zhihu.com/8hpencil/19929476
3 |
--------------------------------------------------------------------------------
/unit/addressFile/collection:
--------------------------------------------------------------------------------
1 | #collection
2 | http://www.zhihu.com/collection/32271511
3 | http://www.zhihu.com/collection/32271511/log
4 |
--------------------------------------------------------------------------------
/unit/addressFile/column:
--------------------------------------------------------------------------------
1 | #column
2 | http://zhuanlan.zhihu.com/8hpencil
3 |
--------------------------------------------------------------------------------
/unit/addressFile/people:
--------------------------------------------------------------------------------
1 | #people
2 | http://www.zhihu.com/people/zhong-wen-71
3 | http://www.zhihu.com/people/Selerare
4 | http://www.zhihu.com/people/_Zen
5 | http://www.zhihu.com/people/mu-mu-55-53
6 | http://www.zhihu.com/people/tian-yu-bai
7 | http://www.zhihu.com/people/tian-yu-bai/followees
8 | http://www.zhihu.com/people/ying-ye-78
9 |
--------------------------------------------------------------------------------
/unit/addressFile/question:
--------------------------------------------------------------------------------
1 | #question
2 | http://www.zhihu.com/question/27580793
3 | http://www.zhihu.com/question/27580793?sort=created
4 | http://www.zhihu.com/question/22921426?sort=created#521个回答
5 | http://www.zhihu.com/question/19568396#847个回答
6 |
--------------------------------------------------------------------------------
/unit/addressFile/table:
--------------------------------------------------------------------------------
1 | #table
2 | http://www.zhihu.com/roundtable/superhero
3 | http://www.zhihu.com/roundtable/superhero/questions
4 |
--------------------------------------------------------------------------------
/unit/addressFile/topic:
--------------------------------------------------------------------------------
1 | #topic
2 | http://www.zhihu.com/topic/19554151
3 | http://www.zhihu.com/topic/19551147
4 | http://www.zhihu.com/topic/19551147/top-answers
5 | http://www.zhihu.com/topic/19551147/questions
6 | http://www.zhihu.com/topic/19551147/organize
7 | http://www.zhihu.com/topic/19551147/manage
8 | http://www.zhihu.com/topic/19551147/log
9 |
--------------------------------------------------------------------------------
/unit/demo/__init__.json:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/unit/demo/readme.md:
--------------------------------------------------------------------------------
1 | 将json转为正常的html后的结果
--------------------------------------------------------------------------------
/unit/oauth_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # 使用该文件测试oauth的使用方法
3 | # 放置于首位
4 | import sys # 修改默认编码
5 | import os # 添加系统路径
6 | import json
7 |
8 | base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding))
9 | sys.path.append(base_path + u'/src/lib')
10 | sys.path.append(base_path + u'/src/lib/oauth')
11 |
12 | reload(sys)
13 | sys.setdefaultencoding('utf-8') # 强制使用utf-8编码
14 |
15 | from zhihu_oauth import ZhihuClient
16 |
17 | from zhihu_oauth.exception import NeedCaptchaException
18 |
19 | client = ZhihuClient()
20 |
21 | test_email = 'mengqingxue2014@qq.com'
22 | test_password = '131724qingxue'
23 | token_file = './token.pkl'
24 |
25 | if os.path.lexists(token_file):
26 | client.load_token(token_file)
27 | print 'load token success'
28 | else:
29 | try:
30 | login_result = client.login(test_email, test_password)
31 | except NeedCaptchaException:
32 | # 保存验证码并提示输入,重新登录
33 | print u'登录失败,需要输入验证码'
34 | with open('a.gif', 'wb') as f:
35 | f.write(client.get_captcha())
36 | captcha = raw_input(u'please input captcha:')
37 | login_result = client.login(test_email, test_password, captcha)
38 | print 'login result => '
39 | print login_result
40 | client.save_token(token_file)
41 | print 'save token success'
42 |
43 | # question
44 | response_file_uri = './question_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
45 | question_id = 35005800
46 | question = client.question(question_id)
47 | data = question.pure_data
48 | response_json = json.dumps(data)
49 | response_file = open(response_file_uri, 'w+')
50 | response_file.write(response_json)
51 | print u"数据保存完成"
52 |
53 | response_file_uri = './people_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
54 | people_id = '404-Page-Not-found'
55 | people = client.people(people_id)
56 | for i in people.answers:
57 | data = i.pure_data
58 | response_json = json.dumps(data)
59 | response_file = open(response_file_uri, 'w+')
60 | response_file.write(response_json)
61 | print u"数据保存完成"
--------------------------------------------------------------------------------
/unit/parser_unit.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 |
4 | # 添加库路径
5 | currentPath = sys.path[0].replace('unit', '')
6 | sys.path.append(currentPath)
7 | sys.path.append(currentPath + r'src')
8 | sys.path.append(currentPath + r'src\tools')
9 | sys.path.append(currentPath + r'src\parser')
10 | sys.path.append(currentPath + r'src\lib') # 扩展库地址
11 |
12 | from src.lib.zhihu_parser.author import AuthorParser
13 | from src.lib.zhihu_parser.collection import CollectionParser
14 | from src.lib.zhihu_parser.question import QuestionParser
15 | from src.lib.zhihu_parser.topic import TopicParser
16 | from src.tools.debug import Debug
17 |
18 | reload(sys)
19 | sys.setdefaultencoding('utf8')
20 |
21 |
22 |
23 | sys.setrecursionlimit(1000000) # 为了适应知乎上的长答案,需要专门设下递归深度限制。。。
24 |
25 | is_info = 0
26 | kind = 'author' # 直接在这里替换类别即可完成测试。可供测试的类别见字典键值
27 | unit ={
28 | 'answer':{
29 | 'src_answer':'./unit_html/single_answer.html',
30 | 'src_info':'./unit_html/single_answer.html',
31 | 'parser':QuestionParser,
32 | },
33 | 'question':{
34 | 'src_answer':'./unit_html/single_question.html',
35 | 'src_info':'./unit_html/single_question.html',
36 | 'parser':QuestionParser,
37 | },
38 | 'author':{
39 | 'src_answer':'./unit_html/author.html',
40 | 'src_info':'./unit_html/author_info.html',
41 | 'parser':AuthorParser,
42 | },
43 | 'topic':{
44 | 'src_answer':'./unit_html/topic.html',
45 | 'src_info':'./unit_html/topic.html',
46 | 'parser':TopicParser,
47 | },
48 | 'collection':{
49 | 'src_answer': './unit_html/collection.html',
50 | 'src_info': './unit_html/collection.html',
51 | 'parser': CollectionParser,
52 | },
53 | 'private_collection': {
54 | 'src_answer':'./unit_html/private_collection.html',
55 | 'src_info':'./unit_html/private_collection.html',
56 | 'parser':CollectionParser,
57 | },
58 | }
59 | if is_info:
60 | src = unit[kind]['src_info']
61 | else:
62 | src = unit[kind]['src_answer']
63 |
64 | content = open(src, 'r').read()
65 | parser = unit[kind]['parser'](content)
66 |
67 |
68 | if is_info:
69 | Debug.print_dict(parser.get_extra_info())
70 | print '----------------------'
71 | print '=========================='
72 | else:
73 | for answer in parser.get_answer_list():
74 | Debug.print_dict(answer)
75 | print '----------------------'
76 | print '=========================='
77 |
78 | for question in parser.get_question_info_list():
79 | Debug.print_dict(question)
80 | print '----------------------'
81 |
--------------------------------------------------------------------------------
/unit/unit_html/author.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/author.html
--------------------------------------------------------------------------------
/unit/unit_html/author_info.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/author_info.html
--------------------------------------------------------------------------------
/unit/unit_html/collection.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/collection.html
--------------------------------------------------------------------------------
/unit/unit_html/private_collection.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/private_collection.html
--------------------------------------------------------------------------------
/unit/unit_html/single_answer.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/single_answer.html
--------------------------------------------------------------------------------
/unit/unit_html/single_question.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/unit/unit_html/topic.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/topic.html
--------------------------------------------------------------------------------
/unit/unit_html/topic_info.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/topic_info.html
--------------------------------------------------------------------------------
/www/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/www/css/customer.css:
--------------------------------------------------------------------------------
1 | /* 设置背景色,清除缩进 */
2 | body {
3 | text-indent: 0 !important;
4 | duokan-text-indent: 0 !important;
5 | word-wrap: break-word; /* 自动折行 */
6 | }
7 |
8 | /* color */
9 | .bg-zhihu-blue-light {
10 | background-color: #428ECE;
11 | }
12 |
13 | .bg-zhihu-blue-deep {
14 | background-color: #3982C6;
15 | }
16 |
17 | .bg-duokan-yellow {
18 | /*多看阅读默认背景色*/
19 | background-color: #F7EFE7;
20 | }
21 |
22 | /* 清除浮动 */
23 | div.clear-float {
24 | clear: both;
25 | }
26 |
27 | /* 隐藏空图片 */
28 | img[src=''], img[src$='./images/'] {
29 | display: none;
30 | }
31 |
32 | .text-center {
33 | text-align: center;
34 | }
35 |
36 | .v-center {
37 | vertical-align: middle;
38 | }
39 |
40 | .margin-center {
41 | margin: 0 auto;
42 | }
43 |
44 | /* 目录页面 */
45 | /* 隐藏多余的『目录』两字 */
46 | div.index-content > li {
47 | display: none;
48 | }
49 |
50 | div.index-content a {
51 | font-size: 1em;
52 | }
53 |
54 | div.index-content ul > li > a {
55 | font-size: 1em;
56 | font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体;
57 | color: #3d3e45;
58 | }
59 |
60 | /* 首页描述信息 */
61 | div.front-page.description {
62 | margin: 2em 0;
63 | }
64 |
65 | /* 评论信息 */
66 | div.extra-info p {
67 | float: left;
68 | }
69 |
70 | div.extra-info p.update-date {
71 | float: right;
72 | }
73 |
74 | div.extra-info {
75 | margin: 1em 0;
76 | }
77 |
78 | /* 用户信息 */
79 | div.author-base {
80 | margin: 1em 0;
81 | font-size: 16px;
82 | vertical-align: middle;
83 | }
84 |
85 | span.author-sign {
86 | margin-left: 1em;
87 | font-family: 'DK-KAITI', '楷体';
88 | }
89 |
90 | span.author-name a {
91 | font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体;
92 | color: #3d3e45;
93 | font-weight: bold;
94 | }
95 |
96 | div.author-logo {
97 | float: right;
98 | }
99 |
100 | div.author-logo img {
101 | vertical-align: middle;
102 | margin-left: 0.2em;
103 | margin-bottom: 0.2em;
104 | }
105 |
106 | /* 问题详情 */
107 |
108 | /* 实现知乎周刊的全屏效果*/
109 | @media handheld {
110 | div.question {
111 | margin: -10em -4em 0 -4em; /*上、右、下、左,顺时针*/
112 | padding: 10em 2em 0 4em;
113 | }
114 | }
115 |
116 | div.question-title {
117 | width: 100%;
118 | overflow: hidden;
119 | }
120 |
121 | div.question-title h1 {
122 | font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 宋体;
123 | color: #FFFFFF;
124 | vertical-align: middle;
125 | text-align: left;
126 | padding: 1em;
127 | width: 70%;
128 | margin: 3em 0 3em 0;
129 | float: right;
130 | font-size: 1.5em;
131 | }
132 |
133 | @media handheld {
134 | div.question-title h1 {
135 | margin: 0 0 3em 0;
136 | padding-right: 3em;
137 | }
138 | }
139 |
140 | div.question-info {
141 | font-family: 'DK-SONGTI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 宋体;
142 | color: #FFF !important;
143 | font-size: 1em;
144 | margin: 0 1em;
145 | padding-bottom: 1em;
146 | }
147 |
148 | @media handheld {
149 | div.question-info {
150 | margin: 0 -4em 0 -4em;
151 | padding: 0 5em 2em 5em;
152 | }
153 | }
154 |
155 | div.question-info a {
156 | color: #CCC;
157 | }
158 |
159 | /* 答案内容 */
160 | div.content {
161 | font-family: 'DK-SONGTI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体;
162 | color: #000;
163 | font-size: 16px;
164 | }
165 |
166 | div.content img {
167 | max-width: 100%;
168 | margin-bottom: 2em;
169 | }
170 |
171 | /* article */
172 | div.title-image {
173 | text-align: center;
174 | }
175 |
176 | div.title-image img {
177 | width: 100%;
178 | }
179 |
180 | /* 禁止信息页中的详情表格分页 */
181 | div.front-page.detail-info table.margin-center {
182 | page-break-inside: avoid;
183 | }
184 |
185 | /* 禁止目录panel分页 */
186 | div.index-content.panel {
187 | page-break-inside: avoid;
188 | }
189 |
--------------------------------------------------------------------------------
/www/image/cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/image/cover.jpg
--------------------------------------------------------------------------------
/www/image/kanshan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/image/kanshan.png
--------------------------------------------------------------------------------
/www/template/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/www/template/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {title}
5 |
6 |
7 |
8 |
9 |
10 | {body}
11 |
12 |
--------------------------------------------------------------------------------
/www/template/content/question/answer.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |

8 |
9 |
10 |
11 | {author_name}
12 |
13 |
14 |
{author_headline}
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | {content}
23 |
24 |
25 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/www/template/content/question/question.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
{title}
7 |
8 |
9 |
10 |
11 | {description}
12 |
13 |
14 |
15 |
16 | {answer}
17 |
--------------------------------------------------------------------------------
/www/template/front_page/base.html:
--------------------------------------------------------------------------------
1 |
2 |

4 |
5 |
{title}
6 |
7 | {description}
8 |
9 |
10 | {detail_info}
11 |
12 |
版权信息:知乎协议
13 |
--------------------------------------------------------------------------------
/www/template/front_page/info/answer.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/answer.html
--------------------------------------------------------------------------------
/www/template/front_page/info/article.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/article.html
--------------------------------------------------------------------------------
/www/template/front_page/info/author.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 用户信息
6 | |
7 |
8 |
9 |
10 |
11 | 用户名 |
12 |
13 | {name}
14 | |
15 |
16 |
17 | 关注人数 |
18 | {follower} |
19 |
20 |
21 | 提问 |
22 | {asks} |
23 |
24 |
25 | 回答 |
26 | {answers} |
27 |
28 |
29 | 专栏文章 |
30 | {posts} |
31 |
32 |
33 | 公共编辑次数 |
34 | {logs} |
35 |
36 |
37 | 被赞同 |
38 | {agree} |
39 |
40 |
41 | 被收藏 |
42 | {collected} |
43 |
44 |
45 | 被感谢 |
46 | {thanks} |
47 |
48 |
49 | 被分享 |
50 | {shared} |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/www/template/front_page/info/collection.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 收藏夹信息
6 | |
7 |
8 |
9 |
10 |
11 | 评论数 |
12 | {comment} |
13 |
14 |
15 | 关注人数 |
16 | {follower} |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/www/template/front_page/info/column.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 专栏信息
6 | |
7 |
8 |
9 |
10 |
11 | 创建者 |
12 | {creator_name} |
13 |
14 |
15 | 文章总数 |
16 | {article} |
17 |
18 |
19 | 关注人数 |
20 | {follower} |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/www/template/front_page/info/question.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/question.html
--------------------------------------------------------------------------------
/www/template/front_page/info/topic.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 话题信息
6 | |
7 |
8 |
9 |
10 |
11 | 关注人数 |
12 | {follower} |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/www/template/info_page/article.html:
--------------------------------------------------------------------------------
1 |
2 |
文章来自专栏-{name}
3 |
4 |
5 |
6 | 专栏文章数 |
7 | {postsCount} |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/www/template/info_page/author.html:
--------------------------------------------------------------------------------
1 |
2 |
{name}的知乎回答集锦
3 |
4 |
5 |
6 | 回答数 |
7 | {answer_count} |
8 |
9 |
10 | 被关注数 |
11 | {follower_count} |
12 |
13 |
14 | 累计收获赞同 |
15 | {voteup_count} |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/www/template/info_page/book.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | {title}
4 |
5 |
--------------------------------------------------------------------------------
/www/template/info_page/collection.html:
--------------------------------------------------------------------------------
1 |
2 |
收藏夹:{title}
3 |
4 |
5 |
6 | 答案数 |
7 | {answer_count} |
8 |
9 |
10 | 关注人数 |
11 | {follower_count} |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/www/template/info_page/column.html:
--------------------------------------------------------------------------------
1 |
2 |
专栏-{name}
3 |
4 |
5 |
6 | 文章数 |
7 | {postsCount} |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/www/template/info_page/question.html:
--------------------------------------------------------------------------------
1 |
2 | 知乎问题-{title}
3 |
4 |
5 |
6 | 回答数 |
7 | {answer_count} |
8 |
9 |
10 | 关注人数 |
11 | {follower_count} |
12 |
13 |
14 | 评论数 |
15 | {comment_count} |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/www/template/info_page/topic.html:
--------------------------------------------------------------------------------
1 |
2 |
话题:{name}
3 |
4 |
5 |
6 | 问题数 |
7 | {questions_count} |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/zhihuHelp.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # 放置于首位
3 | import sys # 修改默认编码
4 | import os # 添加系统路径
5 | import traceback
6 |
7 | base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding))
8 | sys.path.insert(0, base_path + u'/src/lib') # 添加基础库路径 使用insert方式,确保优先启用项目自带源码包
9 | sys.path.insert(0, base_path + u'/src/lib/oauth') # zhihu oauth 类需要作为默认类导入,否则无法运行 - -
10 |
11 | reload(sys)
12 | sys.setdefaultencoding('utf-8')
13 |
14 | # 执行主程序
15 | from src.main import ZhihuHelp
16 |
17 | try:
18 | helper = ZhihuHelp()
19 | helper.start()
20 | except Exception:
21 | traceback.print_exc()
22 | print u"助手发生异常,点击任意键退出"
23 | raw_input()
24 | pass
25 |
--------------------------------------------------------------------------------
评论数:{comment_count}
28 | 29 |赞同数:{voteup_count}
30 | 31 |更新时间:{updated_time}
32 |