├── .gitattributes
├── .gitignore
├── ReadList_for_test.txt
├── db
    └── zhihuhelp.sql
├── readme.md
├── src
    ├── __init__.py
    ├── command_parser.py
    ├── container
    │   ├── __init__.py
    │   ├── book.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── answer.py
    │   │   ├── article.py
    │   │   ├── author.py
    │   │   ├── collection.py
    │   │   ├── column.py
    │   │   ├── question.py
    │   │   └── topic.py
    │   ├── image_container.py
    │   ├── task.py
    │   └── task_result.py
    ├── lib
    │   ├── __init__.py
    │   ├── epub
    │   │   ├── __init__.py
    │   │   ├── directory.py
    │   │   ├── epub.py
    │   │   ├── inf.py
    │   │   ├── mime_type.py
    │   │   ├── opf.py
    │   │   ├── template
    │   │   │   ├── META-INF
    │   │   │   │   ├── container
    │   │   │   │   │   └── container.xml
    │   │   │   │   └── duokan_container
    │   │   │   │   │   └── duokan-extension.xml
    │   │   │   ├── OEBPS
    │   │   │   │   ├── opf
    │   │   │   │   │   ├── content.xml
    │   │   │   │   │   ├── guide
    │   │   │   │   │   │   └── item.xml
    │   │   │   │   │   ├── manifest
    │   │   │   │   │   │   └── item.xml
    │   │   │   │   │   ├── metadata
    │   │   │   │   │   │   ├── book_id.xml
    │   │   │   │   │   │   ├── cover.xml
    │   │   │   │   │   │   ├── creator.xml
    │   │   │   │   │   │   ├── language.xml
    │   │   │   │   │   │   └── title.xml
    │   │   │   │   │   └── spine
    │   │   │   │   │   │   ├── item.xml
    │   │   │   │   │   │   └── item_nolinear.xml
    │   │   │   │   └── toc
    │   │   │   │   │   ├── content.xml
    │   │   │   │   │   ├── docTitle
    │   │   │   │   │       └── title.xml
    │   │   │   │   │   ├── head
    │   │   │   │   │       ├── depth.xml
    │   │   │   │   │       └── uid.xml
    │   │   │   │   │   └── navMap
    │   │   │   │   │       └── item.xml
    │   │   │   └── directory
    │   │   │   │   ├── chapter.html
    │   │   │   │   ├── content.html
    │   │   │   │   ├── finish_chapter.html
    │   │   │   │   ├── item_leaf.html
    │   │   │   │   └── item_root.html
    │   │   ├── toc.py
    │   │   ├── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── epub_config.py
    │   │   │   └── epub_path.py
    │   │   └── zhihuhelp_tools
    │   │   │   ├── __init__.py
    │   │   │   ├── debug.py
    │   │   │   └── path.py
    │   ├── oauth
    │   │   ├── __init__.py
    │   │   └── zhihu_oauth
    │   │   │   ├── __init__.py
    │   │   │   ├── client.py
    │   │   │   ├── exception.py
    │   │   │   ├── helpers.py
    │   │   │   ├── oauth
    │   │   │       ├── __init__.py
    │   │   │       ├── before_login_auth.py
    │   │   │       ├── im_android.py
    │   │   │       ├── setting.py
    │   │   │       ├── token.py
    │   │   │       ├── utils.py
    │   │   │       └── zhihu_oauth.py
    │   │   │   ├── setting.py
    │   │   │   ├── utils.py
    │   │   │   └── zhcls
    │   │   │       ├── __init__.py
    │   │   │       ├── activity.py
    │   │   │       ├── answer.py
    │   │   │       ├── article.py
    │   │   │       ├── base.py
    │   │   │       ├── collection.py
    │   │   │       ├── column.py
    │   │   │       ├── comment.py
    │   │   │       ├── generator.py
    │   │   │       ├── live.py
    │   │   │       ├── me.py
    │   │   │       ├── message.py
    │   │   │       ├── normal.py
    │   │   │       ├── other.py
    │   │   │       ├── people.py
    │   │   │       ├── question.py
    │   │   │       ├── streaming.py
    │   │   │       ├── topic.py
    │   │   │       ├── urls.py
    │   │   │       ├── utils.py
    │   │   │       └── whisper.py
    │   └── requests
    │   │   ├── __init__.py
    │   │   ├── _internal_utils.py
    │   │   ├── adapters.py
    │   │   ├── api.py
    │   │   ├── auth.py
    │   │   ├── cacert.pem
    │   │   ├── certs.py
    │   │   ├── compat.py
    │   │   ├── cookies.py
    │   │   ├── exceptions.py
    │   │   ├── hooks.py
    │   │   ├── models.py
    │   │   ├── packages
    │   │       ├── README.rst
    │   │       ├── __init__.py
    │   │       ├── chardet
    │   │       │   ├── __init__.py
    │   │       │   ├── big5freq.py
    │   │       │   ├── big5prober.py
    │   │       │   ├── chardetect.py
    │   │       │   ├── chardistribution.py
    │   │       │   ├── charsetgroupprober.py
    │   │       │   ├── charsetprober.py
    │   │       │   ├── codingstatemachine.py
    │   │       │   ├── compat.py
    │   │       │   ├── constants.py
    │   │       │   ├── cp949prober.py
    │   │       │   ├── escprober.py
    │   │       │   ├── escsm.py
    │   │       │   ├── eucjpprober.py
    │   │       │   ├── euckrfreq.py
    │   │       │   ├── euckrprober.py
    │   │       │   ├── euctwfreq.py
    │   │       │   ├── euctwprober.py
    │   │       │   ├── gb2312freq.py
    │   │       │   ├── gb2312prober.py
    │   │       │   ├── hebrewprober.py
    │   │       │   ├── jisfreq.py
    │   │       │   ├── jpcntx.py
    │   │       │   ├── langbulgarianmodel.py
    │   │       │   ├── langcyrillicmodel.py
    │   │       │   ├── langgreekmodel.py
    │   │       │   ├── langhebrewmodel.py
    │   │       │   ├── langhungarianmodel.py
    │   │       │   ├── langthaimodel.py
    │   │       │   ├── latin1prober.py
    │   │       │   ├── mbcharsetprober.py
    │   │       │   ├── mbcsgroupprober.py
    │   │       │   ├── mbcssm.py
    │   │       │   ├── sbcharsetprober.py
    │   │       │   ├── sbcsgroupprober.py
    │   │       │   ├── sjisprober.py
    │   │       │   ├── universaldetector.py
    │   │       │   └── utf8prober.py
    │   │       └── urllib3
    │   │       │   ├── __init__.py
    │   │       │   ├── _collections.py
    │   │       │   ├── connection.py
    │   │       │   ├── connectionpool.py
    │   │       │   ├── contrib
    │   │       │       ├── __init__.py
    │   │       │       ├── appengine.py
    │   │       │       ├── ntlmpool.py
    │   │       │       ├── pyopenssl.py
    │   │       │       └── socks.py
    │   │       │   ├── exceptions.py
    │   │       │   ├── fields.py
    │   │       │   ├── filepost.py
    │   │       │   ├── packages
    │   │       │       ├── __init__.py
    │   │       │       ├── backports
    │   │       │       │   ├── __init__.py
    │   │       │       │   └── makefile.py
    │   │       │       ├── ordered_dict.py
    │   │       │       ├── six.py
    │   │       │       └── ssl_match_hostname
    │   │       │       │   ├── .gitignore
    │   │       │       │   ├── __init__.py
    │   │       │       │   └── _implementation.py
    │   │       │   ├── poolmanager.py
    │   │       │   ├── request.py
    │   │       │   ├── response.py
    │   │       │   └── util
    │   │       │       ├── __init__.py
    │   │       │       ├── connection.py
    │   │       │       ├── request.py
    │   │       │       ├── response.py
    │   │       │       ├── retry.py
    │   │       │       ├── ssl_.py
    │   │       │       ├── timeout.py
    │   │       │       └── url.py
    │   │   ├── sessions.py
    │   │   ├── status_codes.py
    │   │   ├── structures.py
    │   │   └── utils.py
    ├── login.py
    ├── main.py
    ├── tools
    │   ├── __init__.py
    │   ├── config.py
    │   ├── controler.py
    │   ├── db.py
    │   ├── debug.py
    │   ├── extra_tools.py
    │   ├── http.py
    │   ├── match.py
    │   ├── path.py
    │   ├── template.py
    │   ├── template_config.py
    │   └── type.py
    └── worker.py
├── unit
    ├── BS4
    │   ├── content.html
    │   └── parser.py
    ├── __init__.py
    ├── addressFile
    │   ├── address_All
    │   ├── answer
    │   ├── article
    │   ├── collection
    │   ├── column
    │   ├── people
    │   ├── question
    │   ├── table
    │   └── topic
    ├── demo
    │   ├── __init__.json
    │   ├── activity.html
    │   ├── answer.html
    │   ├── article.html
    │   ├── collection.html
    │   ├── columns.html
    │   ├── people.html
    │   ├── question.html
    │   ├── question_answer.html
    │   ├── readme.md
    │   └── topic.html
    ├── oauth_test.py
    ├── parser_unit.py
    └── unit_html
    │   ├── author.html
    │   ├── author_info.html
    │   ├── collection.html
    │   ├── private_collection.html
    │   ├── single_answer.html
    │   ├── single_question.html
    │   ├── topic.html
    │   └── topic_info.html
├── update.md
├── www
    ├── __init__.py
    ├── css
    │   ├── bootstrap.css
    │   ├── customer.css
    │   ├── markdown.css
    │   └── normalize.css
    ├── image
    │   ├── cover.jpg
    │   └── kanshan.png
    └── template
    │   ├── __init__.py
    │   ├── base.html
    │   ├── content
    │       └── question
    │       │   ├── answer.html
    │       │   └── question.html
    │   ├── front_page
    │       ├── base.html
    │       └── info
    │       │   ├── answer.html
    │       │   ├── article.html
    │       │   ├── author.html
    │       │   ├── collection.html
    │       │   ├── column.html
    │       │   ├── question.html
    │       │   └── topic.html
    │   └── info_page
    │       ├── article.html
    │       ├── author.html
    │       ├── book.html
    │       ├── collection.html
    │       ├── column.html
    │       ├── question.html
    │       └── topic.html
├── zhihuHelp.py
└── 知乎助手实现思路.md


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | *.sln    merge=union
 7 | *.csproj merge=union
 8 | *.vbproj merge=union
 9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 | 
12 | # Standard to msysgit
13 | *.doc	 diff=astextplain
14 | *.DOC	 diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot  diff=astextplain
18 | *.DOT  diff=astextplain
19 | *.pdf  diff=astextplain
20 | *.PDF	 diff=astextplain
21 | *.rtf	 diff=astextplain
22 | *.RTF	 diff=astextplain
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /config.json
  2 | /.idea/dataSources.local.xml
  3 | /.idea/workspace.xml
  4 | /.idea/ZhihuHelp__Python.iml
  5 | #################
  6 | ## Eclipse
  7 | #################
  8 | 
  9 | *.pydevproject
 10 | .project
 11 | .metadata
 12 | bin/
 13 | tmp/
 14 | *.tmp
 15 | *.bak
 16 | *.swp
 17 | *~.nib
 18 | local.properties
 19 | .classpath
 20 | .settings/
 21 | .loadpath
 22 | 
 23 | # External tool builders
 24 | .externalToolBuilders/
 25 | 
 26 | # Locally stored "Eclipse launch configurations"
 27 | *.launch
 28 | 
 29 | # CDT-specific
 30 | .cproject
 31 | 
 32 | # PDT-specific
 33 | .buildpath
 34 | 
 35 | 
 36 | #################
 37 | ## Visual Studio
 38 | #################
 39 | 
 40 | ## Ignore Visual Studio temporary files, build results, and
 41 | ## files generated by popular Visual Studio add-ons.
 42 | 
 43 | # User-specific files
 44 | *.suo
 45 | *.user
 46 | *.sln.docstates
 47 | 
 48 | # Build results
 49 | 
 50 | [Dd]ebug/
 51 | [Rr]elease/
 52 | x64/
 53 | build/
 54 | [Bb]in/
 55 | [Oo]bj/
 56 | 
 57 | # MSTest test Results
 58 | [Tt]est[Rr]esult*/
 59 | [Bb]uild[Ll]og.*
 60 | 
 61 | *_i.c
 62 | *_p.c
 63 | *.ilk
 64 | *.meta
 65 | *.obj
 66 | *.pch
 67 | *.pdb
 68 | *.pgc
 69 | *.pgd
 70 | *.rsp
 71 | *.sbr
 72 | *.tlb
 73 | *.tli
 74 | *.tlh
 75 | *.tmp_proj
 76 | *.log
 77 | *.vspscc
 78 | *.vssscc
 79 | .builds
 80 | *.pidb
 81 | *.scc
 82 | 
 83 | # Visual C++ cache files
 84 | ipch/
 85 | *.aps
 86 | *.ncb
 87 | *.opensdf
 88 | *.sdf
 89 | *.cachefile
 90 | 
 91 | # Visual Studio profiler
 92 | *.psess
 93 | *.vsp
 94 | *.vspx
 95 | 
 96 | # Guidance Automation Toolkit
 97 | *.gpState
 98 | 
 99 | # ReSharper is a .NET coding add-in
100 | _ReSharper*/
101 | *.[Rr]e[Ss]harper
102 | 
103 | # TeamCity is a build add-in
104 | _TeamCity*
105 | 
106 | # DotCover is a Code Coverage Tool
107 | *.dotCover
108 | 
109 | # NCrunch
110 | *.ncrunch*
111 | .*crunch*.local.xml
112 | 
113 | # Installshield output folder
114 | [Ee]xpress/
115 | 
116 | # DocProject is a documentation generator add-in
117 | DocProject/buildhelp/
118 | DocProject/Help/*.HxT
119 | DocProject/Help/*.HxC
120 | DocProject/Help/*.hhc
121 | DocProject/Help/*.hhk
122 | DocProject/Help/*.hhp
123 | DocProject/Help/Html2
124 | DocProject/Help/html
125 | 
126 | # Click-Once directory
127 | publish/
128 | 
129 | # Publish Web Output
130 | *.Publish.xml
131 | *.pubxml
132 | 
133 | # NuGet Packages Directory
134 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
135 | #packages/
136 | 
137 | # Windows Azure Build Output
138 | csx
139 | *.build.csdef
140 | 
141 | # Windows Store app package directory
142 | AppPackages/
143 | 
144 | # Others
145 | sql/
146 | *.Cache
147 | ClientBin/
148 | [Ss]tyle[Cc]op.*
149 | ~$*
150 | *~
151 | *.dbmdl
152 | *.[Pp]ublish.xml
153 | *.pfx
154 | *.publishsettings
155 | 
156 | # RIA/Silverlight projects
157 | Generated_Code/
158 | 
159 | # Backup & report files from converting an old project file to a newer
160 | # Visual Studio version. Backup files are not needed, because we have git ;-)
161 | _UpgradeReport_Files/
162 | Backup*/
163 | UpgradeLog*.XML
164 | UpgradeLog*.htm
165 | 
166 | # SQL Server files
167 | App_Data/*.mdf
168 | App_Data/*.ldf
169 | 
170 | #############
171 | ## Windows detritus
172 | #############
173 | 
174 | # Windows image file caches
175 | Thumbs.db
176 | ehthumbs.db
177 | 
178 | # Folder config file
179 | Desktop.ini
180 | 
181 | # Recycle Bin used on file shares
182 | $RECYCLE.BIN/
183 | 
184 | # Mac crap
185 | .DS_Store
186 | 
187 | 
188 | #############
189 | ## Python
190 | #############
191 | 
192 | *.py[co]
193 | 
194 | # Packages
195 | *.egg
196 | *.egg-info
197 | dist/
198 | eggs/
199 | parts/
200 | var/
201 | sdist/
202 | develop-eggs/
203 | .idea/
204 | 知乎电子书临时资源库/
205 | 知乎助手生成的电子书/
206 | .installed.cfg
207 | 
208 | # Installer logs
209 | pip-log.txt
210 | 
211 | # Unit test / coverage reports
212 | .coverage
213 | .tox
214 | 
215 | #Translations
216 | *.mo
217 | 
218 | #Mr Developer
219 | .mr.developer.cfg
220 | 
221 | #Mine
222 | ReadList.txt
223 | misc.xml
224 | *.db
225 | *.sqlite
226 | *.ini
227 | zhihuhelp1.7.0/我是登陆知乎时的验证码.gif
228 | .idea/workspace.xml
229 | 我是登陆知乎时的验证码.gif
230 | # 自动生成的token
231 | token.pkl
232 | *.token
233 | 知乎登录密钥_token_file.token
234 | 
235 | *.jpg
236 | 
237 | *.xml
238 | 
239 | *.xhtml
240 | 
241 | *.epub
242 | 
243 | *.opf
244 | 
245 | *.ncx
246 | 
247 | *.html
248 | 
249 | *.css
250 | 


--------------------------------------------------------------------------------
/ReadList_for_test.txt:
--------------------------------------------------------------------------------
1 | ﻿https://www.zhihu.com/people/404-Page-Not-found$https://www.zhihu.com/people/meng-qing-xue-94$https://www.zhihu.com/people/ying-ye-78 #用户答案收集测试-姚泽源-孟晴雪-影夜
2 | https://www.zhihu.com/topic/19560104 #话题-青岛
3 | https://www.zhihu.com/collection/19726342 #收藏夹-工作
4 | https://www.zhihu.com/collection/34451960 #孟晴雪-私人收藏夹
5 | http://zhuanlan.zhihu.com/patisserie #专栏-甘党万歳
6 | http://zhuanlan.zhihu.com/jiageng/20084431 #单篇专栏测试 - 一只吐泡泡的扇贝
7 | https://www.zhihu.com/question/19611675/answer/29365393?from=profile_answer_card # 单个答案测试-青岛旅游攻略
8 | https://www.zhihu.com/question/22719537 # 单个问题测试-如何保存某位知乎用户的所有答案？
9 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/__init__.py


--------------------------------------------------------------------------------
/src/command_parser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from src.container.task import QuestionTask, AnswerTask, AuthorTask, CollectionTask, TopicTask, \
  3 |     ArticleTask, ColumnTask
  4 | from src.tools.debug import Debug
  5 | from src.tools.match import Match
  6 | from src.tools.type import Type
  7 | 
  8 | 
  9 | class CommandParser(object):
 10 |     u"""
 11 |     通过Parser类，生成任务列表,以task容器列表的形式返回回去
 12 |     """
 13 | 
 14 |     @staticmethod
 15 |     def get_task_list(command):
 16 |         u"""
 17 |         解析指令类型
 18 |         """
 19 |         command = command \
 20 |             .replace(' ', '') \
 21 |             .replace('\r', '') \
 22 |             .replace('\n', '') \
 23 |             .replace('\t', '') \
 24 |             .split('#')[0]
 25 |         command_list = command.split('$')
 26 | 
 27 |         task_list = []
 28 |         for command in command_list:
 29 |             task = CommandParser.parse_command(command)
 30 |             if not task:
 31 |                 continue
 32 |             task_list.append(task)
 33 |         return task_list
 34 | 
 35 |     @staticmethod
 36 |     def detect(command):
 37 |         for command_type in [
 38 |             Type.answer, Type.question,
 39 |             Type.author, Type.collection, Type.topic,
 40 |             Type.article, Type.column,  # 文章必须放在专栏之前（否则检测类别的时候就一律检测为专栏了）
 41 |         ]:
 42 |             result = getattr(Match, command_type)(command)
 43 |             if result:
 44 |                 return command_type
 45 |         return Type.unknown
 46 | 
 47 |     @staticmethod
 48 |     def parse_command(raw_command=''):
 49 |         u"""
 50 |         分析单条命令并返回待完成的task
 51 |         """
 52 |         parser = {
 53 |             Type.author: CommandParser.parse_author,
 54 |             Type.answer: CommandParser.parse_answer,
 55 |             Type.question: CommandParser.parse_question,
 56 |             Type.collection: CommandParser.parse_collection,
 57 |             Type.topic: CommandParser.parse_topic,
 58 |             Type.article: CommandParser.parse_article,
 59 |             Type.column: CommandParser.parse_column,
 60 |             Type.unknown: CommandParser.parse_error,
 61 |         }
 62 |         kind = CommandParser.detect(raw_command)
 63 |         return parser[kind](raw_command)
 64 | 
 65 |     @staticmethod
 66 |     def parse_question(command):
 67 |         result = Match.question(command)
 68 |         question_id = result.group(u'question_id')
 69 |         task = QuestionTask(question_id)
 70 |         return task
 71 | 
 72 |     @staticmethod
 73 |     def parse_answer(command):
 74 |         result = Match.answer(command)
 75 |         question_id = result.group(u'question_id')
 76 |         answer_id = result.group(u'answer_id')
 77 |         task = AnswerTask(question_id, answer_id)
 78 |         return task
 79 | 
 80 |     @staticmethod
 81 |     def parse_author(command):
 82 |         result = Match.author(command)
 83 |         author_page_id = result.group(u'author_page_id')
 84 |         task = AuthorTask(author_page_id)
 85 |         return task
 86 | 
 87 |     @staticmethod
 88 |     def parse_collection(command):
 89 |         result = Match.collection(command)
 90 |         collection_id = result.group(u'collection_id')
 91 |         task = CollectionTask(collection_id)
 92 |         return task
 93 | 
 94 |     @staticmethod
 95 |     def parse_topic(command):
 96 |         result = Match.topic(command)
 97 |         topic_id = result.group(u'topic_id')
 98 |         task = TopicTask(topic_id)
 99 |         return task
100 | 
101 |     @staticmethod
102 |     def parse_article(command):
103 |         result = Match.article(command)
104 |         column_id = result.group(u'column_id')
105 |         article_id = result.group(u'article_id')
106 |         task = ArticleTask(column_id, article_id)
107 |         return task
108 | 
109 |     @staticmethod
110 |     def parse_column(command):
111 |         result = Match.column(command)
112 |         column_id = result.group(u'column_id')
113 |         task = ColumnTask(column_id)
114 |         return task
115 | 
116 |     @staticmethod
117 |     def parse_error(command):
118 |         if command:
119 |             Debug.logger.info(u"""无法解析记录:{}所属网址类型,请检查后重试。""".format(command))
120 |         return
121 | 


--------------------------------------------------------------------------------
/src/container/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/src/container/data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # 用于储存数据信息


--------------------------------------------------------------------------------
/src/container/data/answer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | 
 4 | from src.tools.config import Config
 5 | from src.tools.match import Match
 6 | from src.tools.path import Path
 7 | 
 8 | 
 9 | class Answer(object):
10 |     def __init__(self, data):
11 |         self.answer_id = data['answer_id']
12 |         self.question_id = data['question_id']
13 |         self.author_id = data['author_id']
14 |         self.author_name = data['author_name']
15 |         self.author_headline = data['author_headline']
16 |         self.author_avatar_url = data['author_avatar_url']
17 |         self.author_gender = data['author_gender']
18 |         self.comment_count = data['comment_count']
19 |         self.content = data['content']
20 |         self.created_time = data['created_time']
21 |         self.updated_time = data['updated_time']
22 |         self.is_copyable = data['is_copyable']
23 |         self.thanks_count = data['thanks_count']
24 |         self.voteup_count = data['voteup_count']
25 | 
26 |         self.total_img_size_kb = 0 # 文件大小(只统计图片大小，包括答案内图片和答主头像，单位kb)
27 |         self.img_filename_list = []
28 |         return
29 | 
30 |     def download_img(self):
31 |         from src.container.image_container import ImageContainer
32 |         img_container = ImageContainer()
33 |         img_src_dict = Match.match_img_with_src_dict(self.content)
34 |         self.img_filename_list = []
35 |         for img in img_src_dict:
36 |             src = img_src_dict[img]
37 |             filename = img_container.add(src)
38 |             self.img_filename_list.append(filename)
39 |             self.content = self.content.replace(img, Match.create_img_element_with_file_name(filename))
40 | 
41 |         #   答案作者的头像也要下载
42 |         filename = img_container.add(self.author_avatar_url)
43 |         self.img_filename_list.append(filename)
44 |         self.author_avatar_url = Match.create_local_img_src(filename)
45 | 
46 |         img_container.start_download()
47 | 
48 |         #   下载完成后，更新图片大小
49 |         for filename in self.img_filename_list:
50 |             self.total_img_size_kb += Path.get_img_size_by_filename_kb(filename)
51 |         return


--------------------------------------------------------------------------------
/src/container/data/article.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from src.container.data.author import Author
 3 | from src.tools.db import DB
 4 | from src.tools.match import Match
 5 | from src.tools.path import Path
 6 | 
 7 | 
 8 | class Article(object):
 9 |     u"""
10 |     文章容器
11 |     """
12 |     def __init__(self, data):
13 |         self.article_id = data['article_id']
14 |         self.title = data['title']
15 |         self.updated_time = data['updated_time']
16 |         self.voteup_count = data['voteup_count']
17 |         self.image_url = data['image_url']
18 |         self.column_id = data['column_id']
19 |         self.content = data['content']
20 |         self.comment_count = data['comment_count']
21 |         self.author_id = data['author_id']
22 |         self.author_name = data['author_name']
23 |         self.author_headline = data['author_headline']
24 |         self.author_avatar_url = data['author_avatar_url']
25 |         self.author_gender = data['author_gender']
26 | 
27 |         self.total_img_size_kb = 0
28 |         self.img_filename_list = []
29 |         return
30 | 
31 |     def download_img(self):
32 |         from src.container.image_container import ImageContainer
33 |         img_container = ImageContainer()
34 |         img_src_dict = Match.match_img_with_src_dict(self.content)
35 |         self.img_filename_list = []
36 |         for img in img_src_dict:
37 |             src = img_src_dict[img]
38 |             filename = img_container.add(src)
39 |             self.img_filename_list.append(filename)
40 |             self.content = self.content.replace(img, Match.create_img_element_with_file_name(filename))
41 | 
42 |         #   下载文章封面图像
43 |         filename = img_container.add(self.image_url)
44 |         self.img_filename_list.append(filename)
45 |         self.image_url = Match.create_local_img_src(filename)
46 | 
47 |         #   下载用户头像
48 |         filename = img_container.add(self.author_avatar_url)
49 |         self.img_filename_list.append(filename)
50 |         self.author_avatar_url = Match.create_local_img_src(filename)
51 | 
52 |         img_container.start_download()
53 | 
54 |         #   下载完成后，更新图片大小
55 |         for filename in self.img_filename_list:
56 |             self.total_img_size_kb += Path.get_img_size_by_filename_kb(filename)
57 |         return


--------------------------------------------------------------------------------
/src/container/data/author.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class Author(object):
 5 |     def __init__(self, data):
 6 |         self.author_id = data['author_id']
 7 |         self.author_page_id = data['author_page_id']
 8 |         self.answer_count = data['answer_count']
 9 |         self.articles_count = data['articles_count']
10 |         self.avatar_url = data['avatar_url']
11 |         self.columns_count = data['columns_count']
12 |         self.description = data['description']
13 |         self.favorite_count = data['favorite_count']
14 |         self.favorited_count = data['favorited_count']
15 |         self.follower_count = data['follower_count']
16 |         self.following_columns_count = data['following_columns_count']
17 |         self.following_count = data['following_count']
18 |         self.following_question_count = data['following_question_count']
19 |         self.following_topic_count = data['following_topic_count']
20 |         self.gender = data['gender']
21 |         self.headline = data['headline']
22 |         self.name = data['name']
23 |         self.question_count = data['question_count']
24 |         self.shared_count = data['shared_count']
25 |         self.is_bind_sina = data['is_bind_sina']
26 |         self.thanked_count = data['thanked_count']
27 |         self.sina_weibo_name = data['sina_weibo_name']
28 |         self.sina_weibo_url = data['sina_weibo_url']
29 |         self.voteup_count = data['voteup_count']
30 |         return


--------------------------------------------------------------------------------
/src/container/data/collection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class Collection(object):
 5 |     def __init__(self, data):
 6 |         self.collection_id = data['collection_id']
 7 |         self.answer_count = data['answer_count']
 8 |         self.comment_count = data['comment_count']
 9 |         self.created_time = data['created_time']
10 |         self.follower_count = data['follower_count']
11 |         self.description = data['description']
12 |         self.title = data['title']
13 |         self.updated_time = data['updated_time']
14 |         self.creator_id = data['creator_id']
15 |         self.creator_name = data['creator_name']
16 |         self.creator_headline = data['creator_headline']
17 |         self.creator_avatar_url = data['creator_avatar_url']
18 |         self.collected_answer_id_list = data['collected_answer_id_list']
19 |         return


--------------------------------------------------------------------------------
/src/container/data/column.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class Column(object):
 5 |     def __init__(self, data):
 6 |         self.column_id = data['column_id']
 7 |         self.title = data['title']
 8 |         self.article_count = data['article_count']
 9 |         self.follower_count = data['follower_count']
10 |         self.description = data['description']
11 |         self.image_url = data['image_url']
12 |         return
13 | 


--------------------------------------------------------------------------------
/src/container/data/question.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class Question(object):
 5 |     def __init__(self, data):
 6 |         self.question_id = data['question_id']
 7 |         self.answer_count = data['answer_count']
 8 |         self.comment_count = data['comment_count']
 9 |         self.follower_count = data['follower_count']
10 |         self.title = data['title']
11 |         self.detail = data['detail']
12 |         self.updated_time = data['updated_time']
13 |         return


--------------------------------------------------------------------------------
/src/container/data/topic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class Topic(object):
 5 |     def __init__(self, data):
 6 |         self.topic_id = data['topic_id']
 7 |         self.avatar_url = data['avatar_url']
 8 |         self.best_answerers_count = data['best_answerers_count']
 9 |         self.best_answers_count = data['best_answers_count']
10 |         self.excerpt = data['excerpt']
11 |         self.followers_count = data['followers_count']
12 |         self.introduction = data['introduction']
13 |         self.name = data['name']
14 |         self.questions_count = data['questions_count']
15 |         self.unanswered_count = data['unanswered_count']
16 |         self.best_answer_id_list = data['best_answer_id_list']
17 |         return


--------------------------------------------------------------------------------
/src/container/image_container.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import hashlib
 3 | import os.path
 4 | 
 5 | from src.tools.config import Config
 6 | from src.tools.controler import Control
 7 | from src.tools.debug import Debug
 8 | from src.tools.extra_tools import ExtraTools
 9 | from src.tools.http import Http
10 | from src.tools.match import Match
11 | from src.tools.path import Path
12 | 
13 | 
14 | class ImageContainer(object):
15 |     def __init__(self, save_path=''):
16 |         if len(save_path) == 0:
17 |             save_path = Path.image_pool_path
18 |         self.save_path = save_path
19 |         self.container = {}
20 |         self.md5 = hashlib.md5()
21 |         return
22 | 
23 |     def set_save_path(self, save_path):
24 |         self.save_path = save_path
25 |         return
26 | 
27 |     def add(self, href):
28 |         """
29 |         :param href:  图片地址
30 |         :return:
31 |         """
32 |         self.container[href] = self.create_image(href)
33 |         return self.get_filename(href)
34 | 
35 |     def delete(self, href):
36 |         del self.container[href]
37 |         return
38 | 
39 |     def get_filename(self, href):
40 |         image = self.container.get(href)
41 |         if image:
42 |             return image['filename']
43 |         return ''
44 | 
45 |     def get_filename_list(self):
46 |         return self.container.values()
47 | 
48 |     def download(self, index):
49 |         image = self.container[index]
50 |         filename = image['filename']
51 |         href = image['href']
52 |         #   下载图片时自动把https换成http，以便加速图片下载过程
53 |         href = href.replace('https://', 'http://')
54 | 
55 |         if os.path.isfile(self.save_path + '/' + filename):
56 |             return
57 |         Debug.print_in_single_line(u'开始下载图片{}'.format(href))
58 |         if href:
59 |             content = Http.get_content(url=href, timeout=Config.timeout_download_picture)
60 |             if not content:
61 |                 Debug.logger.debug(u'图片『{}』下载失败'.format(href))
62 |                 content = ''
63 |             else:
64 |                 Debug.print_in_single_line(u'图片{}下载完成'.format(href))
65 |         else:
66 |             #   当下载地址为空的时候，就没必要再去下载了
67 |             content = ''
68 |         with open(self.save_path + '/' + filename, 'wb') as image:
69 |             image.write(content)
70 |         return
71 | 
72 |     def start_download(self):
73 |         argv = {'func': self.download,  # 所有待存入数据库中的数据都应当是list
74 |                 'iterable': self.container, }
75 |         Control.control_center(argv, self.container)
76 |         return
77 | 
78 |     def create_image(self, href):
79 |         #   在这里，根据图片配置对文件类别进行统一处理
80 |         href = self.transfer_img_href_by_config_quality(href)
81 |         image = {'filename': self.create_filename(href), 'href': href}
82 |         return image
83 | 
84 |     def transfer_img_href_by_config_quality(self, raw_href):
85 |         href = Match.generate_img_src(raw_href, Config.picture_quality)
86 |         if href is None:
87 |             href = raw_href
88 |         return href
89 | 
90 |     def create_filename(self, href):
91 |         filename = ExtraTools.md5(href) + '.jpg'
92 |         return filename
93 | 


--------------------------------------------------------------------------------
/src/container/task.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from src.tools.type import Type
 3 | 
 4 | 
 5 | class Task(object):
 6 |     def __init__(self, task_type):
 7 |         self.task_type = task_type
 8 |         return
 9 | 
10 |     def get_task_type(self):
11 |         return self.task_type
12 | 
13 | 
14 | class AuthorTask(Task):
15 |     def __init__(self, author_page_id):
16 |         Task.__init__(self, Type.author)
17 |         self.author_page_id = author_page_id
18 |         return
19 | 
20 | 
21 | class TopicTask(Task):
22 |     def __init__(self, topic_id):
23 |         Task.__init__(self, Type.topic)
24 |         self.topic_id = int(topic_id)
25 |         return
26 | 
27 | 
28 | class CollectionTask(Task):
29 |     def __init__(self, collection_id):
30 |         Task.__init__(self, Type.collection)
31 |         self.collection_id = int(collection_id)
32 |         return
33 | 
34 | 
35 | class QuestionTask(Task):
36 |     def __init__(self, question_id):
37 |         Task.__init__(self, Type.question)
38 |         self.question_id = int(question_id)
39 |         return
40 | 
41 | 
42 | class AnswerTask(Task):
43 |     def __init__(self, question_id, answer_id):
44 |         Task.__init__(self, Type.answer)
45 |         self.question_id = int(question_id)
46 |         self.answer_id = int(answer_id)
47 |         return
48 | 
49 | 
50 | class ColumnTask(Task):
51 |     def __init__(self, column_id):
52 |         Task.__init__(self, Type.column)
53 |         self.column_id = column_id
54 |         return
55 | 
56 | 
57 | class ArticleTask(Task):
58 |     def __init__(self, column_id, article_id):
59 |         Task.__init__(self, Type.article)
60 |         self.column_id = column_id
61 |         self.article_id = int(article_id)
62 |         return
63 | 


--------------------------------------------------------------------------------
/src/lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/directory.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .zhihuhelp_tools.path import Path
 3 | from .tools.base import Base
 4 | 
 5 | 
 6 | class Directory(Base):
 7 |     def __init__(self):
 8 |         Base.__init__(self)
 9 |         self.chapter_deep = 0
10 |         return
11 | 
12 |     def add_html(self, src, title):
13 |         template = self.get_template('directory', 'item_leaf')
14 |         self.content += template.format(href=Path.get_filename(src), title=title)
15 |         return
16 | 
17 |     def create_chapter(self, src, title):
18 |         template = self.get_template('directory', 'item_root')
19 |         item = template.format(href=Path.get_filename(src), title=title)
20 |         if self.chapter_deep == 0:
21 |             template = self.get_template('directory', 'chapter')
22 |             item = template.format(item=item, title=u'目录')
23 |         self.content += item
24 | 
25 |         self.chapter_deep += 1
26 |         return
27 | 
28 |     def finish_chapter(self):
29 |         if self.chapter_deep == 1:
30 |             template = self.get_template('directory', 'finish_chapter')
31 |             self.content += template
32 | 
33 |         self.chapter_deep -= 1
34 |         return
35 | 
36 |     def get_content(self):
37 |         template = self.get_template('directory', 'content')
38 |         return template.format(content=self.content)
39 | 


--------------------------------------------------------------------------------
/src/lib/epub/inf.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .tools.epub_config import EpubConfig
 3 | from .tools.epub_path import EpubPath
 4 | 
 5 | from .zhihuhelp_tools.path import Path
 6 | 
 7 | 
 8 | class INF(object):
 9 |     def __init__(self):
10 |         return
11 | 
12 |     @staticmethod
13 |     def add_container():
14 |         Path.copy(EpubConfig.container_uri, EpubPath.meta_inf_path)
15 |         return
16 | 
17 |     @staticmethod
18 |     def add_duokan_ext():
19 |         Path.copy(EpubConfig.duokan_container_uri, EpubPath.meta_inf_path)
20 |         return
21 | 


--------------------------------------------------------------------------------
/src/lib/epub/mime_type.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .tools.epub_path import EpubPath
 3 | 
 4 | 
 5 | class MimeType(object):
 6 |     def __init__(self):
 7 |         self.content = u'application/epub+zip'
 8 |         return
 9 | 
10 |     def create(self):
11 |         with open(EpubPath.work_path + '/mimetype', 'w') as mimetype:
12 |             mimetype.write(self.content)
13 |         return
14 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/META-INF/container/container.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
3 |     <rootfiles>
4 |         <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
5 |     </rootfiles>
6 | </container>
7 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/META-INF/duokan_container/duokan-extension.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?>
2 | <duokan-extension version="2.0.0">
3 | </duokan-extension>
4 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/content.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <package xmlns="http://www.idpf.org/2007/opf"
 3 |          xmlns:dc="http://purl.org/dc/elements/1.1/"
 4 |          unique-identifier="create_by_yaozeyuan" version="2.0">
 5 |     <metadata>
 6 | {metadata}
 7 |     </metadata>
 8 |     <manifest>
 9 |         <item id="ncx" href="toc.ncx" media-type="text/xml"/>
10 | {manifest}
11 |     </manifest>
12 |     <spine toc="ncx">
13 | {spine}
14 |     </spine>
15 |     <guide>
16 | {guide}
17 |     </guide>
18 | </package>
19 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/guide/item.xml:
--------------------------------------------------------------------------------
1 |         <reference href="{href}" title="{title}" type="{item_type}"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/manifest/item.xml:
--------------------------------------------------------------------------------
1 |         <item id="{resource_id}" href="{href}" media-type="{media_type}"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/book_id.xml:
--------------------------------------------------------------------------------
1 |         <dc:identifier id="{book_id}">{uid}</dc:identifier>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/cover.xml:
--------------------------------------------------------------------------------
1 |     <meta name="cover" content="{image_id}"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/creator.xml:
--------------------------------------------------------------------------------
1 |         <dc:creator>{creator}</dc:creator>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/language.xml:
--------------------------------------------------------------------------------
1 |         <dc:language>{language}</dc:language>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/metadata/title.xml:
--------------------------------------------------------------------------------
1 |         <dc:title>{title}</dc:title>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/spine/item.xml:
--------------------------------------------------------------------------------
1 |         <itemref idref="{resource_id}"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/opf/spine/item_nolinear.xml:
--------------------------------------------------------------------------------
1 |         <itemref idref="{resource_id}" linear="no"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/content.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
 3 |         "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
 4 | <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
 5 |     <head>
 6 | {head}
 7 |         <meta name="dtb:totalPageCount" content="0"/>
 8 |         <meta name="dtb:maxPageNumber" content="0"/>
 9 |     </head>
10 |     <docTitle>
11 | {doc_title}
12 |     </docTitle>
13 |     <navMap>
14 | {nav_point}
15 |     </navMap>
16 | </ncx>


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/docTitle/title.xml:
--------------------------------------------------------------------------------
1 |         <text>
2 |             {title}
3 |         </text>


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/head/depth.xml:
--------------------------------------------------------------------------------
1 |         <meta name="dtb:depth" content="{depth}"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/head/uid.xml:
--------------------------------------------------------------------------------
1 |         <meta name="dtb:uid" content="{uid}"/>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/OEBPS/toc/navMap/item.xml:
--------------------------------------------------------------------------------
 1 |         <navPoint id="{resource_id}" playOrder="{resource_id}">
 2 |             <navLabel>
 3 |                 <text>{title}</text>
 4 |             </navLabel>
 5 | 
 6 |             <content src="{href}"/>
 7 | 
 8 | {extend_nav_point}
 9 |         </navPoint>
10 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/directory/chapter.html:
--------------------------------------------------------------------------------
1 | <div class="panel panel-success center-block">
2 |     <!-- Default panel contents -->
3 |     <div class="panel-heading">{item}</div>
4 |     <!-- List group -->
5 |     <div class="list-group">
6 | 
7 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/directory/content.html:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="zh-cn">
 3 | <head>
 4 |     <meta charset="UTF-8"/>
 5 |     <title>目录</title>
 6 |     <link rel="stylesheet" type="text/css" href="../style/normalize.css"/>
 7 |     <link rel="stylesheet" type="text/css" href="../style/markdown.css"/>
 8 |     <link rel="stylesheet" type="text/css" href="../style/customer.css"/>
 9 |     <link rel="stylesheet" type="text/css" href="../style/bootstrap.css"/>
10 | </head>
11 | <body>
12 | <div class="index-content">
13 |     {content}
14 | </div>
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/directory/finish_chapter.html:
--------------------------------------------------------------------------------
1 | </div>
2 | </div>
3 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/directory/item_leaf.html:
--------------------------------------------------------------------------------
1 | <a class="list-group-item" href="{href}">{title}</a>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/template/directory/item_root.html:
--------------------------------------------------------------------------------
1 | <a href="{href}">{title}</a>
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/src/lib/epub/tools/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .epub_config import EpubConfig
 3 | 
 4 | 
 5 | class Base(object):
 6 |     def __init__(self):
 7 |         self.content = ''
 8 |         return
 9 | 
10 |     def get_template(self, template_kind, template_name):
11 |         template_uri = '{}_{}_uri'.format(template_kind, template_name)
12 |         with open(getattr(EpubConfig, template_uri)) as template:
13 |             content = template.read()
14 |         return content
15 | 
16 |     def get_content(self):
17 |         return self.content
18 | 


--------------------------------------------------------------------------------
/src/lib/epub/tools/epub_config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .epub_path import EpubPath
 3 | 
 4 | 
 5 | class EpubConfig(object):
 6 |     u"""
 7 |     用于记录epub创建过程中所需用到的常量
 8 |     比如，常见模板的实际路径
 9 |     """
10 |     base_path = EpubPath.base_path + u'/template'
11 |     # META-INF
12 |     container_uri = base_path + u'/META-INF/container/container.xml'
13 |     duokan_container_uri = base_path + u'/META-INF/duokan_container/duokan-extension.xml'
14 | 
15 |     # OEBPS
16 | 
17 |     ## OPF
18 |     opf_content_uri = base_path + u'/OEBPS/opf/content.xml'
19 | 
20 |     ### guide
21 |     guide_item_uri = base_path + u'/OEBPS/opf/guide/item.xml'
22 | 
23 |     ### metadata
24 |     metadata_cover_uri = base_path + u'/OEBPS/opf/metadata/cover.xml'
25 |     metadata_creator_uri = base_path + u'/OEBPS/opf/metadata/creator.xml'
26 |     metadata_book_id_uri = base_path + u'/OEBPS/opf/metadata/book_id.xml'
27 |     metadata_title_uri = base_path + u'/OEBPS/opf/metadata/title.xml'
28 |     metadata_language_uri = base_path + u'/OEBPS/opf/metadata/language.xml'
29 | 
30 |     ### manifest
31 |     manifest_item_uri = base_path + u'/OEBPS/opf/manifest/item.xml'
32 | 
33 |     ### spine
34 |     spine_item_uri = base_path + u'/OEBPS/opf/spine/item.xml'
35 |     spine_item_nolinear_uri = base_path + u'/OEBPS/opf/spine/item_nolinear.xml'
36 | 
37 | 
38 |     ## TOC
39 |     toc_content_uri = base_path + u'/OEBPS/toc/content.xml'
40 |     ###head
41 |     head_uid_uri = base_path + u'/OEBPS/toc/head/uid.xml'
42 |     head_depth_uri = base_path + u'/OEBPS/toc/head/depth.xml'
43 | 
44 |     # doc_title
45 |     doc_title_title_uri = base_path + u'/OEBPS/toc/docTitle/title.xml'
46 | 
47 |     ### ncx
48 |     ncx_item_uri = base_path + u'/OEBPS/toc/navMap/item.xml'
49 | 
50 |     # Directory
51 |     directory_item_root_uri = base_path + u'/directory/item_root.html'
52 |     directory_item_leaf_uri = base_path + u'/directory/item_leaf.html'
53 |     directory_chapter_uri = base_path + u'/directory/chapter.html'
54 |     directory_finish_chapter_uri = base_path + u'/directory/finish_chapter.html'
55 |     directory_content_uri = base_path + u'/directory/content.html'
56 | 
57 |     # Default
58 |     book_id = u'create_by_yaozeyuan'
59 |     book_title = u'no_title'
60 |     creator = u'zhihuhelp'
61 |     uid = u'urn:uuid:create-by-yao-ze-yuan-Tsingtao'
62 |     identifier = u''
63 |     language = u'zh'
64 | 


--------------------------------------------------------------------------------
/src/lib/epub/tools/epub_path.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import sys
 4 | from ..zhihuhelp_tools.path import Path
 5 | 
 6 | class EpubPath(object):
 7 |     file_path = os.path.realpath(__file__)
 8 |     base_path = os.path.dirname(file_path)
 9 |     base_path = unicode(os.path.dirname(base_path).decode(sys.stdout.encoding))  # 库文件位置
10 | 
11 |     work_path = base_path  # 默认以库位置作为初始工作地址
12 |     output_path = os.path.dirname(work_path)  # 默认以工作目录的上一级为输出目录
13 |     meta_inf_path = work_path + u'/META-INF'
14 |     oebps_path = work_path + u'/OEBPS'
15 |     image_path = work_path + u'/images'
16 |     html_path = oebps_path + u'/html'
17 |     style_path = oebps_path + u'/style'
18 | 
19 |     @staticmethod
20 |     def set_work_path(work_path):
21 |         EpubPath.work_path = work_path
22 |         EpubPath.meta_inf_path = EpubPath.work_path + u'/META-INF'
23 |         EpubPath.oebps_path = EpubPath.work_path + u'/OEBPS'
24 |         EpubPath.image_path = EpubPath.oebps_path + u'/images'
25 |         EpubPath.html_path = EpubPath.oebps_path + u'/html'
26 |         EpubPath.style_path = EpubPath.oebps_path + u'/style'
27 |         return
28 | 
29 |     @staticmethod
30 |     def set_output_path(output_path):
31 |         EpubPath.output_path = output_path
32 |         return
33 | 
34 |     @staticmethod
35 |     def init_epub_path(work_path):
36 |         """
37 |         设置工作地址，根据该路径进行创建文件夹，生成epub，压缩等操作
38 |         """
39 |         EpubPath.set_work_path(work_path)
40 |         Path.mkdir(EpubPath.meta_inf_path)
41 |         Path.mkdir(EpubPath.oebps_path)
42 |         Path.chdir(EpubPath.oebps_path)
43 |         Path.mkdir(EpubPath.html_path)
44 |         Path.mkdir(EpubPath.image_path)
45 |         Path.mkdir(EpubPath.style_path)
46 |         return
47 | 
48 |     @staticmethod
49 |     def reset_path():
50 |         Path.chdir(EpubPath.work_path)
51 |         return
52 | 


--------------------------------------------------------------------------------
/src/lib/epub/zhihuhelp_tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __version__ = '2015-12-19'
3 | 


--------------------------------------------------------------------------------
/src/lib/epub/zhihuhelp_tools/debug.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import logging.handlers
 4 | import sys
 5 | 
 6 | 
 7 | class Debug(object):
 8 |     u"""
 9 |     打印日志
10 |     """
11 |     handler = logging.StreamHandler()  # 实例化handler
12 |     fmt = '%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(message)s'
13 | 
14 |     formatter = logging.Formatter(fmt)  # 实例化formatter
15 |     handler.setFormatter(formatter)  # 为handler添加formatter
16 | 
17 |     logger = logging.getLogger('main')  # 获取名为main的logger
18 |     logger.addHandler(handler)  # 为logger添加handler
19 |     logger.setLevel(logging.INFO)  # 发布时关闭log输出
20 | 
21 |     # 辅助函数
22 |     @staticmethod
23 |     def print_in_single_line(text=''):
24 |         try:
25 |             sys.stdout.write("\r" + " " * 60 + '\r')
26 |             sys.stdout.flush()
27 |             sys.stdout.write(text)
28 |             sys.stdout.flush()
29 |         except:
30 |             pass
31 |         return
32 | 
33 |     @staticmethod
34 |     def print_dict(data={}, key='', prefix=''):
35 |         try:
36 |             if isinstance(data, dict):
37 |                 for key in data:
38 |                     Debug.print_dict(data[key], key, prefix + '   ')
39 |             else:
40 |                 if isinstance(data, basestring):
41 |                     print prefix + unicode(key) + ' => ' + data
42 |                 else:
43 |                     print prefix + unicode(key) + ' => ' + unicode(data)
44 |         except UnicodeEncodeError as error:
45 |             Debug.logger.info(u'编码异常')
46 |             Debug.logger.info(u'系统默认编码为：' + sys.getdefaultencoding())
47 |             # raise error
48 |         return
49 | 


--------------------------------------------------------------------------------
/src/lib/epub/zhihuhelp_tools/path.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import shutil
  4 | import locale
  5 | 
  6 | 
  7 | class Path(object):
  8 |     base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))  # 初始地址,不含分隔符
  9 | 
 10 |     config_path = base_path + u'/config.json'
 11 | 
 12 |     www_css = base_path + u'/www/css'
 13 |     www_image = base_path + u'/www/images'
 14 | 
 15 |     html_pool_path = base_path + u'/知乎电子书临时资源库/知乎网页池'
 16 |     image_pool_path = base_path + u'/知乎电子书临时资源库/知乎图片池'
 17 |     result_path = base_path + u'./知乎助手生成的电子书'
 18 | 
 19 |     @staticmethod
 20 |     def reset_path():
 21 |         Path.chdir(Path.base_path)
 22 |         return
 23 | 
 24 |     @staticmethod
 25 |     def pwd():
 26 |         print os.path.realpath('.')
 27 |         return
 28 | 
 29 |     @staticmethod
 30 |     def get_pwd():
 31 |         path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
 32 |         return path
 33 | 
 34 |     @staticmethod
 35 |     def mkdir(path):
 36 |         try:
 37 |             os.mkdir(path)
 38 |         except OSError:
 39 |             # Debug.logger.debug(u'指定目录已存在')
 40 |             pass
 41 |         return
 42 | 
 43 |     @staticmethod
 44 |     def chdir(path):
 45 |         try:
 46 |             os.chdir(path)
 47 |         except OSError:
 48 |             # Debug.logger.debug(u'指定目录不存在，自动创建之')
 49 |             Path.mkdir(path)
 50 |             os.chdir(path)
 51 |         return
 52 | 
 53 |     @staticmethod
 54 |     def rmdir(path):
 55 |         if path:
 56 |             shutil.rmtree(path, ignore_errors=True)
 57 |         return
 58 | 
 59 |     @staticmethod
 60 |     def copy(src, dst):
 61 |         if not os.path.exists(src):
 62 |             # Debug.logger.info('{}不存在，自动跳过'.format(src))
 63 |             return
 64 |         if os.path.isdir(src):
 65 |             shutil.copytree(src, dst)
 66 |         else:
 67 |             shutil.copy(src=src, dst=dst)
 68 |         return
 69 | 
 70 |     @staticmethod
 71 |     def get_filename(src):
 72 |         return os.path.basename(src)
 73 | 
 74 |     @staticmethod
 75 |     def init_base_path():
 76 |         Path.base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
 77 |         Path.config_path = Path.base_path + u'/config.json'
 78 |         Path.sql_path = Path.base_path + u'/db/zhihuhelp.sql'
 79 | 
 80 |         Path.www_css = Path.base_path + u'/www/css'
 81 |         Path.www_image = Path.base_path + u'/www/images'
 82 | 
 83 |         Path.html_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎网页池'
 84 |         Path.image_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎图片池'
 85 |         Path.result_path = Path.base_path + u'./知乎助手生成的电子书'
 86 | 
 87 |         return
 88 | 
 89 |     @staticmethod
 90 |     def init_work_directory():
 91 |         Path.reset_path()
 92 |         Path.mkdir(u'./知乎助手生成的电子书')
 93 |         Path.mkdir(u'./知乎电子书临时资源库')
 94 |         Path.chdir(u'./知乎电子书临时资源库')
 95 |         Path.mkdir(u'./知乎网页池')
 96 |         Path.mkdir(u'./知乎图片池')
 97 |         Path.reset_path()
 98 |         return
 99 | 
100 |     @staticmethod
101 |     def is_file(path):
102 |         return os.path.isfile(path)
103 | 


--------------------------------------------------------------------------------
/src/lib/oauth/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from .client import ZhihuClient
 4 | from .exception import (
 5 |     ZhihuWarning, IgnoreErrorDataWarning, CantGetTicketsWarning,
 6 |     ZhihuException, UnexpectedResponseException, GetDataErrorException,
 7 |     NeedCaptchaException, NeedLoginException, IdMustBeIntException,
 8 |     UnimplementedException,
 9 | )
10 | from .helpers import shield, SHIELD_ACTION
11 | from .zhcls import (
12 |     Activity, ActType, Answer, Article, Comment, Collection, Column, Comment,
13 |     Live, LiveBadge, LiveTag, LiveTicket,
14 |     Me, Message, People, Question, Topic, Whisper, ANONYMOUS
15 | )
16 | 
17 | __all__ = ['ZhihuClient', 'ANONYMOUS', 'Activity', 'Answer', 'ActType',
18 |            'Article', 'Collection', 'Column', 'Comment',
19 |            'Live', 'LiveBadge', 'LiveTag', 'LiveTicket',
20 |            'Me', 'Message',
21 |            'People', 'Question', 'Topic', 'Whisper',
22 |            'ZhihuException', 'ZhihuWarning',
23 |            'NeedCaptchaException', 'UnexpectedResponseException',
24 |            'GetDataErrorException',
25 |            'SHIELD_ACTION', 'shield']
26 | 
27 | __version__ = '0.0.30.post1'
28 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/helpers.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from __future__ import unicode_literals
  4 | 
  5 | import warnings
  6 | 
  7 | import requests.packages.urllib3 as urllib3
  8 | 
  9 | from .zhcls.utils import SimpleEnum
 10 | from .zhcls.generator import BaseGenerator, ActivityGenerator
 11 | from .exception import ZhihuException, ZhihuWarning
 12 | 
 13 | 
 14 | __all__ = ['SHIELD_ACTION', 'shield']
 15 | 
 16 | 
 17 | SHIELD_ACTION = SimpleEnum(
 18 |     ['EXCEPTION', 'PASS', 'STOP']
 19 | )
 20 | """
 21 | ActType 是用于表示 shield 抵挡 Exception 达到最大次数后的动作的枚举类，取值如下：
 22 | 
 23 | ================= ====================
 24 | 常量名              说明
 25 | ================= ====================
 26 | EXCEPTION          抛出异常
 27 | PASS               跳过，获取下一个数据
 28 | STOP               结束处理
 29 | ================= ====================
 30 | """
 31 | 
 32 | 
 33 | def shield(inner, durability=3, start_at=0, action=SHIELD_ACTION.EXCEPTION):
 34 |     """
 35 |     shield 函数用于自动处理知乎的各种生成器
 36 |     （如 :any:`People.followers`, :any:`Question.answers`） 在获取分页数据时出错的情况。
 37 | 
 38 |     ..  warning:: 用户动态的生成器因为获取方式比较特殊，无法被 shield 保护
 39 | 
 40 |     用法：
 41 | 
 42 |     比如我们想获取关注了某个专栏的用户分别关注了哪些话题……
 43 | 
 44 |     ..  code-block:: python
 45 | 
 46 |         column = client.column('zijingnotes')
 47 |         result = []
 48 |         for user in shield(column.followers, action=SHIELD_ACTION.PASS):
 49 |             L = []
 50 |             print("Start proc user", user.name)
 51 |             if user.over:
 52 |                 print(user.over_reason)
 53 |                 continue
 54 |             for topic in shield(user.following_topics, action=SHIELD_ACTION.PASS):
 55 |                 print("Add topic", topic.name)
 56 |                 L.append(topic.name)
 57 |             result.append(L)
 58 | 
 59 |         # output result
 60 | 
 61 |     :param inner: 需要被保护的生成器
 62 |     :param int durability: 耐久度，表示获取同一数据最多连续出错几次
 63 |     :param int start_at: 从第几个数据开始获取
 64 |     :param action: 当耐久度消耗完后的动作，参见 :any:`SHIELD_ACTION`，默认动作是抛出异常
 65 |     :return: 新的生成器……
 66 |     """
 67 |     if not isinstance(inner, BaseGenerator):
 68 |         raise ValueError('First argument must be Zhihu Generator Classes')
 69 |     if isinstance(inner, ActivityGenerator):
 70 |         raise ValueError(' Activity Generator is the only one can\'t be shield')
 71 |     offset = start_at
 72 |     hp = durability
 73 |     while True:
 74 |         i = -1
 75 |         try:
 76 |             for i, x in enumerate(inner.jump(offset)):
 77 |                 yield x
 78 |                 hp = durability
 79 |             break
 80 |         except (ZhihuException, urllib3.exceptions.MaxRetryError) as e:
 81 |             offset += i + 1
 82 |             hp -= 1
 83 |             warnings.warn(
 84 |                 "[{type}: {e}] be shield when get NO.{offset} data".format(
 85 |                     type=e.__class__.__name__,
 86 |                     e=e,
 87 |                     offset=offset
 88 |                 ),
 89 |                 ZhihuWarning
 90 |             )
 91 |             if hp == 0:
 92 |                 if action is SHIELD_ACTION.EXCEPTION:
 93 |                     raise e
 94 |                 elif action is SHIELD_ACTION.PASS:
 95 |                     offset += 1
 96 |                     hp = durability
 97 |                 elif action is SHIELD_ACTION.STOP:
 98 |                     break
 99 |                 else:
100 |                     raise e
101 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from .im_android import ImZhihuAndroidClient
 4 | from .before_login_auth import BeforeLoginAuth
 5 | from .zhihu_oauth import ZhihuOAuth
 6 | from .token import ZhihuToken
 7 | from .utils import login_signature
 8 | 
 9 | __all__ = ['ImZhihuAndroidClient', 'BeforeLoginAuth', 'ZhihuOAuth',
10 |            'ZhihuToken', 'login_signature']
11 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/before_login_auth.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | # from __future__ import unicode_literals
 4 | 
 5 | from .im_android import ImZhihuAndroidClient
 6 | 
 7 | __all__ = ['BeforeLoginAuth']
 8 | 
 9 | 
10 | class BeforeLoginAuth(ImZhihuAndroidClient):
11 |     def __init__(self, client_id, api_version=None, app_version=None,
12 |                  app_build=None, app_za=None, uuid=None, ua=None):
13 |         """
14 |         ..  inheritance-diagram:: BeforeLoginAuth
15 |             :parts: 1
16 | 
17 |         这个 Auth 在 :class:`.ImZhihuAndroidClient`
18 |         的基础上加上了发送 ``client_id`` 的功能。表示登录之前的基础验证。
19 | 
20 |         :param str|unicode client_id: 客户端 ID
21 | 
22 |         ..  seealso::
23 |             以下参数的文档参见 :meth:`.ImZhihuAndroidClient.__init__`
24 | 
25 |         :param str|unicode api_version:
26 |         :param str|unicode app_version:
27 |         :param str|unicode app_build:
28 |         :param str|unicode app_za:
29 |         :param str|unicode uuid:
30 |         :param str|unicode ua:
31 |         """
32 |         super(BeforeLoginAuth, self).__init__(
33 |             api_version, app_version, app_build, app_za, uuid, ua)
34 |         self._client_id = client_id
35 | 
36 |     def __call__(self, r):
37 |         """
38 |         ..  note::
39 |             requests 会自动调用这个方法
40 | 
41 |         此函数在 PreparedRequest 的 HTTP header
42 |         里加上了 HTTP Authorization 头，值为 CLIENT_ID。
43 | 
44 |         由于是 :class:`.ImZhihuAndroidClient` 的子类，也会自动加上描述 APP 信息的头。
45 | 
46 |         ..  seealso::
47 |             :meth:`.ImZhihuAndroidClient.__call__`
48 |         """
49 |         r = super(BeforeLoginAuth, self).__call__(r)
50 |         r.headers['Authorization'] = 'oauth {0}'.format(str(self._client_id))
51 |         return r
52 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/im_android.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | # from __future__ import unicode_literals
 4 | 
 5 | from requests.auth import AuthBase
 6 | 
 7 | from .setting import (
 8 |     API_VERSION, APP_VERSION, APP_BUILD, APP_ZA, UUID, DEFAULT_UA
 9 | )
10 | 
11 | __all__ = ['ImZhihuAndroidClient']
12 | 
13 | 
14 | class ImZhihuAndroidClient(AuthBase):
15 |     def __init__(self, api_version=None, app_version=None,
16 |                  app_build=None, app_za=None, uuid=None, ua=None):
17 |         """
18 |         ..  inheritance-diagram:: ImZhihuAndroidClient
19 | 
20 |         这个 Auth 类用于模拟一些 Android 上的知乎官方客户端的特殊参数
21 | 
22 |         :param str|unicode api_version: 所用 API 版本
23 |         :param str|unicode app_version: 客户端(APK) 版本
24 |         :param str|unicode app_build: APP 类型？
25 |         :param str|unicode app_za: APP 杂项，是一个 urlencoded 的 params dict
26 |         :param str|unicode uuid: 暂时不知道是什么
27 |         :param str|unicode ua: User-Agent，新 API 会验证 UA 了
28 |         """
29 |         self._api_version = api_version or API_VERSION
30 |         self._app_version = app_version or APP_VERSION
31 |         self._app_build = app_build or APP_BUILD
32 |         self._app_za = app_za or APP_ZA
33 |         self._uuid = uuid or UUID
34 |         self._ua = ua or DEFAULT_UA
35 | 
36 |     def __call__(self, r):
37 |         """
38 |         ..  note::
39 |             requests 会自动调用这个方法
40 | 
41 |         此函数在 PreparedRequest 的 HTTP header
42 |         里加上了模拟 Android 客户端所需要的附加属性
43 | 
44 |         ..  seealso::
45 |             自动添加的属性参见 :meth:`__init__`
46 |         """
47 |         r.headers['x-api-version'] = self._api_version
48 |         r.headers['x-app-version'] = self._app_version
49 |         r.headers['x-app-build'] = self._app_build
50 |         r.headers['x-app-za'] = self._app_za
51 |         r.headers['x-uuid'] = self._uuid
52 |         r.headers['User-Agent'] = self._ua
53 |         return r
54 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/setting.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | # from __future__ import unicode_literals
  4 | 
  5 | try:
  6 |     # python2
  7 |     from urllib import urlencode
  8 | except ImportError:
  9 |     # python3
 10 |     # noinspection PyUnresolvedReferences,PyCompatibility
 11 |     from urllib.parse import urlencode
 12 | 
 13 | ZHIHU_API_ROOT = 'https://api.zhihu.com'
 14 | """知乎 API 的根目录"""
 15 | 
 16 | # ------- Zhihu OAuth Keys -------
 17 | 
 18 | CLIENT_ID = '8d5227e0aaaa4797a763ac64e0c3b8'
 19 | """
 20 | 默认的 CLIENT ID。
 21 | 如果 :class:`.ZhihuClient` 构造时没有提供 CLIENT ID，则使用这个值。
 22 | """
 23 | 
 24 | APP_SECRET = 'ecbefbf6b17e47ecb9035107866380'
 25 | """
 26 | 默认的 SECRET。
 27 | 如果 :class:`.ZhihuClient` 构造时没有提供 SECRET，则使用这个值。
 28 | """
 29 | 
 30 | # ------- Zhihu Client Info -------
 31 | 
 32 | API_VERSION = '3.0.41'
 33 | """
 34 | 模拟 Android 官方客户端使用的参数，表示使用的 API 版本。
 35 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 api_version，则使用这个值。
 36 | """
 37 | 
 38 | APP_VERSION = '4.12.0'
 39 | """
 40 | 模拟 Android 官方客户端使用的参数，表示使用的 APP 版本。
 41 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_version，则使用这个值。
 42 | """
 43 | 
 44 | APP_BUILD = 'release'
 45 | """
 46 | 模拟 Android 官方客户端使用的参数，表示使用的 APP 的 Build 类型。
 47 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_build，则使用这个值。
 48 | """
 49 | 
 50 | UUID = 'AHBCVBVCDAtLBfZCo1SYbPj8SgivYjqcGCs='
 51 | """
 52 | 新加的一个东西，暂时不知道是啥的 ID
 53 | """
 54 | 
 55 | DEFAULT_UA = 'Futureve/4.12.0 Mozilla/5.0 ' \
 56 |              '(Linux; Android 6.0; Google Nexus 5 - 6.0.0 - ' \
 57 |              'API 23 - 1080x1920 Build/MRA58K; wv) ' \
 58 |              'AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0'
 59 | """
 60 | 新版本的 API 开始检查 UA了。
 61 | """
 62 | 
 63 | APP_ZA = urlencode({
 64 |     'OS': 'Android',
 65 |     'Release': '6.0',
 66 |     'Model': 'Google Nexus 5 - 6.0.0 - API 23 - 1080x1920',
 67 |     'VersionName': APP_VERSION,
 68 |     'VersionCode': '450',
 69 |     'Width': '1080',
 70 |     'Height': '1920',
 71 |     'Installer': 'Google Play',
 72 | })
 73 | """
 74 | 模拟 Android 官方客户端使用的参数，表示使用的 APP 的 杂项数据。
 75 | 如果 :class:`.ImZhihuAndroidClient` 构造时没有提供 app_za，则使用这个值。
 76 | 
 77 | ..  note::
 78 |     它是一个 url encode 后的 dict
 79 | 
 80 |     参见 :meth:`.ImZhihuAndroidClient.__init__`
 81 | """
 82 | 
 83 | # ------- Zhihu API URL for Login -------
 84 | 
 85 | CAPTCHA_URL = ZHIHU_API_ROOT + '/captcha'
 86 | """
 87 | 验证码相关
 88 | 
 89 | :GET: 是否需要验证码
 90 | :PUT: 获取验证码
 91 | :POST: 提交验证码
 92 | """
 93 | 
 94 | # sign_in - POST - 用户登录
 95 | 
 96 | LOGIN_URL = ZHIHU_API_ROOT + '/sign_in'
 97 | """
 98 | OAuth 登录地址
 99 | """
100 | 
101 | LOGIN_DATA = {
102 |     'grant_type': 'password',
103 |     'source': 'com.zhihu.android',
104 |     'client_id': '',
105 |     'signature': '',
106 |     'timestamp': '',
107 |     'username': '',
108 |     'password': '',
109 | }
110 | """
111 | 登录数据格式。需要填充的只有用户名和密码。
112 | 
113 | `client_id` 会由 :class:`.ZhihuClient` 填写。
114 | 
115 | `timestamp` 和 `signature` 会由 :class:`.ZhihuClient` 内部调用的
116 | :func:`.login_signature` 自动填写。
117 | """
118 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/token.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from __future__ import unicode_literals
  4 | 
  5 | import json
  6 | import pickle
  7 | import time
  8 | 
  9 | from ..exception import MyJSONDecodeError
 10 | 
 11 | __all__ = ['ZhihuToken']
 12 | 
 13 | 
 14 | class ZhihuToken:
 15 |     def __init__(self, user_id, uid, access_token, expires_in, token_type,
 16 |                  refresh_token, cookie, lock_in=None, unlock_ticket=None):
 17 |         """
 18 |         知乎令牌。
 19 | 
 20 |         尽量不要直接使用这个类，而是用 :meth:`ZhihuToken.from_str` 或
 21 |         :meth:`ZhihuToken.form_dict` 或
 22 |         :meth:`ZhihuToken.from_file` 方法来构造。
 23 | 
 24 |         ..  note::
 25 | 
 26 |             本类仅在 :class:`.ZhihuClient` 类内使用，一般用户不需要了解。
 27 | 
 28 |         :param str|unicode user_id: 用户 ID
 29 |         :param int uid: 某个数字型用户 ID，貌似没啥用
 30 |         :param str|unicode access_token: 最重要的访问令牌
 31 |         :param int expires_in: 过期时间
 32 |         :param str|unicode token_type: 令牌类型
 33 |         :param str|unicode refresh_token: 刷新令牌
 34 |         :param str|unicode cookie: 登录成功后需要加上这段 Cookies
 35 |         :param int lock_in: 不知道用处
 36 |         :param str|unicode unlock_ticket: 不知道用处
 37 |         """
 38 |         self._create_at = time.time()
 39 |         self._user_id = uid
 40 |         self._uid = user_id
 41 |         self._access_token = access_token
 42 |         self._expires_in = expires_in
 43 |         self._expires_at = self._create_at + self._expires_in
 44 |         self._token_type = token_type
 45 |         self._refresh_token = refresh_token
 46 |         self._cookie = cookie
 47 | 
 48 |         # 以下两个属性暂时不知道用处
 49 |         self._lock_in = lock_in
 50 |         self._unlock_ticket = unlock_ticket
 51 | 
 52 |     @staticmethod
 53 |     def from_str(json_str):
 54 |         """
 55 |         从字符串读取 token。
 56 | 
 57 |         :param str|unicode json_str: 一个合法的代表知乎 Token 的 JSON 字符串
 58 |         :rtype: :class:`ZhihuToken`
 59 |         :raise ValueError: 提供的参数不合法时
 60 |         """
 61 |         try:
 62 |             return ZhihuToken.from_dict(json.loads(json_str))
 63 |         except (MyJSONDecodeError, ValueError):
 64 |             raise ValueError(
 65 |                 '"{json_str}" is NOT a valid zhihu token json string.'.format(
 66 |                     json_str=json_str
 67 |                 ))
 68 | 
 69 |     @staticmethod
 70 |     def from_dict(json_dict):
 71 |         """
 72 |         从字典读取 token。
 73 | 
 74 |         :param dict json_dict: 一个代表知乎 Token 的字典
 75 |         :rtype: :class:`ZhihuToken`
 76 |         :raise ValueError: 提供的参数不合法时
 77 |         """
 78 |         try:
 79 |             return ZhihuToken(**json_dict)
 80 |         except TypeError:
 81 |             raise ValueError(
 82 |                 '"{json_dict}" is NOT a valid zhihu token json.'.format(
 83 |                     json_dict=json_dict
 84 |                 ))
 85 | 
 86 |     @staticmethod
 87 |     def from_file(filename):
 88 |         """
 89 |         从文件读取 token。
 90 | 
 91 |         :param str|unicode filename: 文件名
 92 |         :rtype: :class:`ZhihuToken`
 93 |         """
 94 |         with open(filename, 'rb') as f:
 95 |             return pickle.load(f)
 96 | 
 97 |     def save(self, filename):
 98 |         """
 99 |         将 token 保存成文件。
100 | 
101 |         :param str|unicode filename: 文件名
102 |         :return: 无返回值
103 |         """
104 |         with open(filename, 'wb') as f:
105 |             pickle.dump(self, f)
106 | 
107 |     @property
108 |     def user_id(self):
109 |         """
110 |         :return: 获取用户 ID
111 |         :rtype: str
112 |         """
113 |         return self._user_id
114 | 
115 |     @property
116 |     def type(self):
117 |         """
118 |         :return: 获取验证类型
119 |         :rtype: str
120 |         """
121 |         return self._token_type
122 | 
123 |     @property
124 |     def token(self):
125 |         """
126 |         :return: 获取访问令牌
127 |         :rtype: str
128 |         """
129 |         return self._access_token
130 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | import hashlib
 6 | import hmac
 7 | import time
 8 | 
 9 | __all__ = ['login_signature']
10 | 
11 | 
12 | def login_signature(data, secret):
13 |     """
14 |     为登录请求附加签名。
15 | 
16 |     :param dict data: POST 数据
17 |     :param str|unicode secret: APP SECRET
18 |     :return: 经过签名后的 dict， 增加了 timestamp 和 signature 两项
19 |     """
20 |     data['timestamp'] = str(int(time.time()))
21 | 
22 |     params = ''.join([
23 |         data['grant_type'],
24 |         data['client_id'],
25 |         data['source'],
26 |         data['timestamp'],
27 |     ])
28 | 
29 |     data['signature'] = hmac.new(
30 |         secret.encode('utf-8'),
31 |         params.encode('utf-8'),
32 |         hashlib.sha1
33 |     ).hexdigest()
34 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/oauth/zhihu_oauth.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | # from __future__ import unicode_literals
 4 | 
 5 | from .im_android import ImZhihuAndroidClient
 6 | from .token import ZhihuToken
 7 | 
 8 | __all__ = ['ZhihuOAuth']
 9 | 
10 | 
11 | class ZhihuOAuth(ImZhihuAndroidClient):
12 |     def __init__(self, token, api_version=None, app_version=None,
13 |                  app_build=None, app_za=None):
14 |         """
15 |         ..  inheritance-diagram:: ZhihuOAuth
16 | 
17 |         这个 Auth 在 :class:`.ImZhihuAndroidClient`
18 |         的基础上加上了发送 token 的功能。
19 | 
20 |         :param ZhihuToken token: 成功登录后得到的 Token
21 | 
22 |         ..  seealso::
23 |             以下参数的文档参见 :meth:`.ImZhihuAndroidClient.__init__`
24 | 
25 |         :param api_version:
26 |         :param app_version:
27 |         :param app_build:
28 |         :param app_za:
29 |         """
30 |         assert isinstance(token, ZhihuToken)
31 |         super(ZhihuOAuth, self).__init__(
32 |             api_version, app_version, app_build, app_za)
33 |         self._token = token
34 | 
35 |     def __call__(self, r):
36 |         """
37 |         ..  note::
38 |             requests 会自动调用这个方法
39 | 
40 |         此函数在 PreparedRequest 的 HTTP header
41 |         里加上了 HTTP Authorization 头，值为登录成功后 Zhihu 发的 access_token。
42 | 
43 |         由于是 :class:`.ImZhihuAndroidClient` 的子类，也会自动加上描述 APP 信息的头。
44 | 
45 |         ..  seealso::
46 |             :meth:`.ImZhihuAndroidClient.__call__`
47 |         """
48 |         r = super(ZhihuOAuth, self).__call__(r)
49 |         r.headers['Authorization'] = '{type} {token}'.format(
50 |             type=str(self._token.type.capitalize()),
51 |             token=str(self._token.token)
52 |         )
53 |         return r
54 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/setting.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | import re
 6 | import requests.adapters
 7 | 
 8 | ADAPTER_WITH_RETRY = requests.adapters.HTTPAdapter(
 9 |     max_retries=requests.adapters.Retry(
10 |         total=10,
11 |         status_forcelist=[403, 408, 500, 502]
12 |     )
13 | )
14 | 
15 | CAPTCHA_FILE = 'captcha.gif'
16 | """
17 | 请求验证码后储存文件名的默认值，现在的值是当前目录下的 captcha.gif。
18 | 
19 | 仅在 :meth:`.ZhihuClient.login_in_terminal` 中被使用。
20 | """
21 | 
22 | re_answer_url = re.compile(
23 |     r'^(?:https?://)?www.zhihu.com/question/\d+/answer/(\d+)/?$')
24 | """
25 | 答案 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
26 | """
27 | 
28 | re_article_url = re.compile(r'^(?:https?://)?zhuanlan.zhihu.com/p/(\d+)/?$')
29 | """
30 | 文章 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
31 | """
32 | 
33 | re_collection_url = re.compile(
34 |     r'^(?:https?://)?www.zhihu.com/collection/(\d+)/?$')
35 | """
36 | 收藏夹 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
37 | """
38 | 
39 | # TODO: 详细了解专栏 slug 的构成，更新正则
40 | re_column_url = re.compile(r'^(?:https?://)?zhuanlan.zhihu.com/([^/ ]+)/?$')
41 | """
42 | 专栏 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
43 | """
44 | 
45 | re_live_url = re.compile(r'^(?:https?://)?www.zhihu.com/lives/(\d+)/?$')
46 | """
47 | Live URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
48 | """
49 | 
50 | re_people_url = re.compile(r'^(?:https?://)?www.zhihu.com/people/([^/ ]+)/?$')
51 | """
52 | 用户 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
53 | """
54 | 
55 | re_question_url = re.compile(r'^(?:https?://)?www.zhihu.com/question/(\d+)/?$')
56 | """
57 | 问题 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
58 | """
59 | 
60 | re_topic_url = re.compile(r'^(?:https?://)?www.zhihu.com/topic/(\d+)/?$')
61 | """
62 | 问题 URL 的正则，用于 :any:`ZhihuClient.from_url` 方法。
63 | """
64 | 
65 | 
66 | RE_FUNC_MAP = {
67 |     # RE             func      int id
68 |     re_answer_url: ('answer', True),
69 |     re_article_url: ('article', True),
70 |     re_collection_url: ('collection', True),
71 |     re_column_url: ('column', False),
72 |     re_live_url: ('live', True),
73 |     re_people_url: ('people', False),
74 |     re_question_url: ('question', True),
75 |     re_topic_url: ('topic', True),
76 | }
77 | """
78 | 正则表达式于 :any:`ZhihuClient` 的方法的对应关系。
79 | 
80 | 键是正则，值是二元组，两个值分别是方法名和是否需要将 ``id`` 转化为整数。
81 | """
82 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | import functools
 6 | 
 7 | from .exception import NeedLoginException, IdMustBeIntException
 8 | 
 9 | __all__ = ['need_login', 'int_id']
10 | 
11 | 
12 | def need_login(func):
13 |     """
14 |     装饰器。作用于 :class:`.ZhihuClient` 中的某些方法，
15 |     强制它们必须在登录状态下才能被使用。
16 |     """
17 |     @functools.wraps(func)
18 |     def wrapper(self, *args, **kwargs):
19 |         if self.is_login():
20 |             return func(self, *args, **kwargs)
21 |         else:
22 |             raise NeedLoginException(func.__name__)
23 | 
24 |     return wrapper
25 | 
26 | 
27 | def int_id(func):
28 |     """
29 |     装饰器。作用于 :class:`.ZhihuClient` 中需要整型 ID 来构建对应知乎类的方法。
30 |     作用就是个强制类型检查。
31 | 
32 |     :raise: :class:`.IdMustBeIntException` 当传过来的 ID 不是整型的时候
33 |     """
34 |     @functools.wraps(func)
35 |     def wrapper(self, *args, **kwargs):
36 |         try:
37 |             some_id = args[0]
38 |         except IndexError:
39 |             some_id = None
40 |         if not isinstance(some_id, int):
41 |             raise IdMustBeIntException(func)
42 |         return func(self, *args, **kwargs)
43 | 
44 |     return wrapper
45 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from .activity import Activity, ActType
 4 | from .answer import Answer
 5 | from .article import Article
 6 | from .collection import Collection
 7 | from .column import Column
 8 | from .comment import Comment
 9 | from .live import Live, LiveBadge, LiveTag, LiveTicket
10 | from .me import Me
11 | from .message import Message
12 | from .people import People, ANONYMOUS
13 | from .question import Question
14 | from .topic import Topic
15 | from .whisper import Whisper
16 | 
17 | __all__ = ['Activity', 'ActType', 'Answer', 'Article', 'Collection', 'Column',
18 |            'Comment', 'Live', 'LiveBadge', 'LiveTag', 'LiveTicket', 'Me',
19 |            'Message', 'People', 'ANONYMOUS', 'Question', 'Topic', 'Whisper']
20 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/article.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from __future__ import unicode_literals
  4 | 
  5 | from .base import Base
  6 | from .generator import generator_of
  7 | from .other import other_obj
  8 | from .normal import normal_attr
  9 | from .streaming import streaming
 10 | from .utils import common_save
 11 | from .urls import (
 12 |     ARTICLE_DETAIL_URL,
 13 |     ARTICLE_COMMENTS_URL,
 14 | )
 15 | 
 16 | __all__ = ['Article']
 17 | 
 18 | 
 19 | class Article(Base):
 20 |     def __init__(self, aid, cache, session):
 21 |         super(Article, self).__init__(aid, cache, session)
 22 | 
 23 |     def _build_url(self):
 24 |         return ARTICLE_DETAIL_URL.format(self.id)
 25 | 
 26 |     # ----- simple info -----
 27 | 
 28 |     @property
 29 |     @other_obj('people')
 30 |     def author(self):
 31 |         return None
 32 | 
 33 |     @property
 34 |     @streaming()
 35 |     def can_comment(self):
 36 |         """
 37 |         ..  seealso:: :any:`Answer.can_comment`
 38 |         """
 39 |         return None
 40 | 
 41 |     @property
 42 |     @other_obj()
 43 |     def column(self):
 44 |         """
 45 |         文章所属专栏。
 46 | 
 47 |         .. warning:: 当文章不属于任何专栏时值为 None，使用其属性前应先做检查。
 48 |         """
 49 |         return None
 50 | 
 51 |     @property
 52 |     @normal_attr()
 53 |     def comment_count(self):
 54 |         return None
 55 | 
 56 |     @property
 57 |     @normal_attr()
 58 |     def comment_permission(self):
 59 |         """
 60 |         ..  seealso:: :any:`Answer.comment_permission`
 61 |         """
 62 |         return None
 63 | 
 64 |     @property
 65 |     @normal_attr()
 66 |     def content(self):
 67 |         return None
 68 | 
 69 |     @property
 70 |     @normal_attr()
 71 |     def excerpt(self):
 72 |         return None
 73 | 
 74 |     @property
 75 |     @normal_attr()
 76 |     def id(self):
 77 |         return self._id
 78 | 
 79 |     @property
 80 |     @normal_attr()
 81 |     def image_url(self):
 82 |         return None
 83 | 
 84 |     @property
 85 |     @streaming(use_cache=False)
 86 |     def suggest_edit(self):
 87 |         """
 88 |         ..  seealso:: :any:`Answer.suggest_edit`
 89 |         """
 90 |         return None
 91 | 
 92 |     @property
 93 |     @normal_attr()
 94 |     def title(self):
 95 |         return None
 96 | 
 97 |     @property
 98 |     @normal_attr('updated')
 99 |     def updated_time(self):
100 |         return None
101 | 
102 |     @property
103 |     @normal_attr()
104 |     def voteup_count(self):
105 |         return None
106 | 
107 |     # ----- generators -----
108 | 
109 |     @property
110 |     @generator_of(ARTICLE_COMMENTS_URL)
111 |     def comments(self):
112 |         return None
113 | 
114 |     # TODO: article.voters, API 接口未知
115 | 
116 |     # ----- other operate -----
117 | 
118 |     def save(self, path='.', filename=None, invalid_chars=None):
119 |         """
120 |         除了默认文件名是文章标题外，和 :any:`Answer.save` 完全一致。
121 | 
122 |         ..  seealso:: :any:`Answer.save`
123 | 
124 |         ..  note:: TIPS
125 | 
126 |             建议的使用方法：
127 | 
128 |             ..  code-block:: python
129 | 
130 |                 for article in column.articles:
131 |                     print(article.title)
132 |                     article.save(column.title)
133 | 
134 |         """
135 |         if self._cache is None:
136 |             self._get_data()
137 |         common_save(path, filename, self.content, self.title, invalid_chars)
138 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/base.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from __future__ import unicode_literals
  4 | 
  5 | import abc
  6 | 
  7 | from ..exception import MyJSONDecodeError, GetDataErrorException
  8 | 
  9 | __all__ = ['Base']
 10 | 
 11 | 
 12 | class Base(object):
 13 |     def __init__(self, zhihu_obj_id, cache, session):
 14 |         """
 15 | 
 16 |         ..  note:: Cache 与 Data
 17 | 
 18 |             :any:`Base` 类的 ``cache`` 参数表示已知的属性值。一般由另一个对象的
 19 |             JSON 数据中的一个属性充当。
 20 | 
 21 |             比如 :any:`Answer.author` 方法，由于在请求 :any:`Answer` 的数据时，
 22 |             原始 JSON 数据中就有关于作者的一些简单信息。比如 name，id，headline。
 23 |             在使用此方法时就会将这些不完整的数据传递到 ``answer`` 对象 （类型为
 24 |             :any:`People`）的 ``cache`` 中。这样一来，在执行
 25 |             ``answer.author.name`` 时，取出名字的操作可以省去一次网络请求。
 26 | 
 27 |             :any:`normal_attr`，:any:`other_obj` 和 :any:`streaming` 装饰器都会
 28 |             优先使用 ``cache`` 中的数据，当获取失败时才会调用
 29 |             :any:`_get_data` 方法请求数据。
 30 | 
 31 |         :param zhihu_obj_id: 构建知乎对象所用的 ID
 32 |         :param dict cache: 缓存数据，就是已知的这个对象的属性集
 33 |         :param session: 网络请求 Session
 34 |         """
 35 |         self._id = zhihu_obj_id
 36 |         self._cache = cache
 37 |         self._session = session
 38 |         self._data = None
 39 | 
 40 |     def _get_data(self):
 41 |         """
 42 |         调用知乎 API 接口获取数据的主要方法。
 43 | 
 44 |         url 从 :any:`_build_url` 中获取。
 45 | 
 46 |         method 从 :any:`_method` 中获取。
 47 | 
 48 |         params 从 :any:`_build_params` 中获取。
 49 | 
 50 |         data 从 :any:`_build_data` 中获取。
 51 | 
 52 |         :raise: 当返回的数据无法被解析成 JSON
 53 |           或 JSON 中含有 'message' 字段时，会抛出 :any:`GetDataErrorException`
 54 |         """
 55 |         if self._data is None:
 56 |             url = self._build_url()
 57 |             res = self._session.request(
 58 |                 self._method(),
 59 |                 url=url,
 60 |                 params=self._build_params(),
 61 |                 data=self._build_data(),
 62 |             )
 63 |             e = GetDataErrorException(
 64 |                 url,
 65 |                 res,
 66 |                 'a valid Zhihu {0} JSON data'.format(self.__class__.__name__),
 67 |             )
 68 |             try:
 69 |                 json_dict = res.json()
 70 |                 if 'error' in json_dict:
 71 |                     raise e
 72 |                 self._data = json_dict
 73 |             except MyJSONDecodeError:
 74 |                 raise e
 75 | 
 76 |     @abc.abstractmethod
 77 |     def _build_url(self):
 78 |         """
 79 |         子类 **必须** 重载这一函数，提供获取数据的 API URL。
 80 | 
 81 |         一般格式为 ZHIHU_XXX_URL.format(self.id)
 82 |         """
 83 |         return ''
 84 | 
 85 |     # noinspection PyMethodMayBeStatic
 86 |     def _build_params(self):
 87 |         """
 88 |         子类可以重载这一函数，提供请求 API 时要传递的参数。默认值为 None。
 89 |         """
 90 |         return None
 91 | 
 92 |     # noinspection PyMethodMayBeStatic
 93 |     def _build_data(self):
 94 |         """
 95 |         子类可以重载这一函数，提供请求 API 时要传递的数据。默认值为 None。
 96 |         """
 97 |         return None
 98 | 
 99 |     # noinspection PyMethodMayBeStatic
100 |     def _method(self):
101 |         """
102 |         子类可以重载这一函数，提供 HTTP 请求的类型，默认值为 GET。
103 |         """
104 |         return 'GET'
105 | 
106 |     def refresh(self):
107 |         """
108 |         删除自身的 cache 和 data，下一次获取属性会重新向知乎发送请求，获取最新数据。
109 |         """
110 |         self._data = self._cache = None
111 | 
112 |     @property
113 |     def pure_data(self):
114 |         """
115 |         调试用。返回现在对象内的 JSON 数据。
116 | 
117 |         如果对象没有 cache 也没有 data，会自动发送数据请求 data。
118 |         """
119 |         if not self._cache:
120 |             self._get_data()
121 |         return {
122 |             'cache': self._cache,
123 |             'data': self._data,
124 |         }
125 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/column.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from .base import Base
 6 | from .generator import generator_of
 7 | from .other import other_obj
 8 | from .normal import normal_attr
 9 | from .urls import (
10 |     COLUMN_DETAIL_URL,
11 |     COLUMN_ARTICLES_URL,
12 |     COLUMN_FOLLOWERS_URL,
13 | )
14 | 
15 | __all__ = ['Column']
16 | 
17 | 
18 | class Column(Base):
19 |     def __init__(self, cid, cache, session):
20 |         super(Column, self).__init__(cid, cache, session)
21 | 
22 |     def _build_url(self):
23 |         return COLUMN_DETAIL_URL.format(self.id)
24 | 
25 |     # ---- simple info -----
26 | 
27 |     @property
28 |     @normal_attr('articles_count')
29 |     def article_count(self):
30 |         return None
31 | 
32 |     @property
33 |     def articles_count(self):
34 |         return self.article_count
35 | 
36 |     @property
37 |     @other_obj('people')
38 |     def author(self):
39 |         return None
40 | 
41 |     @property
42 |     @normal_attr()
43 |     def comment_permission(self):
44 |         return None
45 | 
46 |     @property
47 |     @normal_attr()
48 |     def description(self):
49 |         return None
50 | 
51 |     @property
52 |     @normal_attr('followers')
53 |     def follower_count(self):
54 |         return None
55 | 
56 |     @property
57 |     @normal_attr()
58 |     def id(self):
59 |         return self._id
60 | 
61 |     @property
62 |     @normal_attr()
63 |     def image_url(self):
64 |         return None
65 | 
66 |     @property
67 |     @normal_attr()
68 |     def title(self):
69 |         return None
70 | 
71 |     @property
72 |     @normal_attr('updated')
73 |     def updated_time(self):
74 |         return None
75 | 
76 |     @property
77 |     def updated(self):
78 |         return self.updated_time
79 | 
80 |     # ----- generators -----
81 | 
82 |     @property
83 |     @generator_of(COLUMN_ARTICLES_URL)
84 |     def articles(self):
85 |         return None
86 | 
87 |     @property
88 |     @generator_of(COLUMN_FOLLOWERS_URL, 'people')
89 |     def followers(self):
90 |         return None
91 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/message.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from .base import Base
 6 | from .other import other_obj
 7 | from .normal import normal_attr
 8 | 
 9 | __all__ = ['Message']
10 | 
11 | 
12 | class Message(Base):
13 |     def __init__(self, mid, cache, session):
14 |         super(Message, self).__init__(mid, cache, session)
15 | 
16 |     def _build_url(self):
17 |         return ''
18 | 
19 |     # ----- simple info -----
20 | 
21 |     @property
22 |     @normal_attr()
23 |     def created_time(self):
24 |         return None
25 | 
26 |     @property
27 |     @normal_attr()
28 |     def content(self):
29 |         return None
30 | 
31 |     @property
32 |     @other_obj('people')
33 |     def sender(self):
34 |         return None
35 | 
36 |     @property
37 |     @other_obj('people')
38 |     def receiver(self):
39 |         return None
40 | 
41 |     def format(self, template='[{time}] {sender} --> {receiver}: {content}'):
42 |         """
43 |         格式化输出消息
44 | 
45 |         ``{time}`` 时间戳；``{sender}`` 发送者用户名；``{receiver}`` 接收者用户名；
46 |         ``{content}`` 消息内容
47 | 
48 |         用法参见示例。
49 | 
50 |         :param str template: 格式化模板
51 |         :return: 格式化后的字符串
52 |         :rtype: str
53 |         """
54 |         return template.format(
55 |             time=self.created_time,
56 |             sender=self.sender.name,
57 |             receiver=self.receiver.name,
58 |             content=self.content,
59 |         )
60 | 
61 |     def __str__(self):
62 |         return self.format()
63 | 
64 |     __repr__ = __str__
65 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/normal.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals, print_function
 4 | 
 5 | import functools
 6 | 
 7 | from .utils import can_get_from
 8 | 
 9 | __all__ = ['normal_attr']
10 | 
11 | 
12 | def normal_attr(name_in_json=None):
13 |     """
14 | 
15 |     本装饰器的作用为：
16 | 
17 |     1. 标识这个属性为常规属性。
18 |     2. 自动从对象的数据中取对应属性返回，会自行判断需不需要请求网络。
19 | 
20 |     取数据流程如下：
21 | 
22 |     1. 如果 ``data`` 存在，转 2，否则转 3。
23 |     2. 尝试从 ``data`` 中取数据，成功则返回数据，否则返回被装饰函数的执行结果。
24 |     3. 尝试从 ``cache`` 中取需要的属性，成功则返回。
25 |     4. 判断属性名是不是 ``id``。不是转 5，是则返回被装饰函数的执行结果。（因为
26 |        ``id`` 属性一般在 :any:`_build_url` 方法中需要引用，
27 |        如果这时向知乎请求数据会造成死循环。）
28 |     5. 则使用 API 请求数据。然后转 2。
29 | 
30 |     ..  seealso:: 关于 cache 和 data
31 | 
32 |         请看 :any:`Base` 类中的\ :any:`说明 <Base.__init__>`。
33 | 
34 |     :param str|unicode name_in_json: 需要取的属性在 JSON 中的名字。可空，默认值为
35 |       使用此装饰器的方法名。
36 |     """
37 |     def wrappers_wrapper(func):
38 | 
39 |         @functools.wraps(func)
40 |         def wrapper(self, *args, **kwargs):
41 | 
42 |             def use_data_or_func(the_name, data):
43 |                 if can_get_from(the_name, data):
44 |                     return data[the_name]
45 |                 else:
46 |                     return func(self, *args, **kwargs)
47 | 
48 |             name = name_in_json if name_in_json else func.__name__
49 |             if self._data:
50 |                 return use_data_or_func(name, self._data)
51 |             elif self._cache and can_get_from(name, self._cache):
52 |                 return self._cache[name]
53 |             else:
54 |                 # id is important, when there is no data, _build_url need it,
55 |                 # so, just return the function result
56 |                 if name == 'id':
57 |                     return func(self, *args, **kwargs)
58 | 
59 |                 self._get_data()
60 |                 # noinspection PyTypeChecker
61 |                 if self._data:
62 |                     return use_data_or_func(name, self._data)
63 |         return wrapper
64 | 
65 |     return wrappers_wrapper
66 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/other.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | import functools
 6 | import importlib
 7 | 
 8 | __all__ = ['other_obj']
 9 | 
10 | 
11 | def other_obj(class_name=None, name_in_json=None, module_filename=None):
12 |     """
13 | 
14 |     本装饰器的作用为：
15 | 
16 |     1. 标识这个属性为另一个知乎对象。
17 |     2. 自动从当前对象的数据中取出对应属性，构建成所需要的对象。
18 | 
19 |     生成对象流程如下：
20 | 
21 |     1. 尝试导入类名表示的类，如果获取失败则设为 :any:`Base` 类。
22 |     2. 将对象数据设置为被装饰函数的返回值，如果不为 None 则转 6
23 |     3. 尝试从 ``cache`` 中获取用来建立对象的数据。成功转 6。
24 |     4. 如果当前对象没有 ``data`` 则调用知乎 API 获取。
25 |     5. 尝试从 ``data`` 中获取数据，如果这个也没有就返回 None
26 |     6. 将获取到的数据作为 ``cache`` 构建第一步中的导入的知乎类对象。
27 | 
28 |     ..  seealso:: 关于 cache 和 data
29 | 
30 |         请看 :any:`Base` 类中的\ :any:`说明 <Base.__init__>`。
31 | 
32 |     :param class_name: 要生成的对象类名
33 |     :param name_in_json: 属性在 JSON 里的键名。
34 |     :param module_filename: <class_name> 所在的模块的文件名
35 |     """
36 |     def wrappers_wrapper(func):
37 |         @functools.wraps(func)
38 |         def wrapper(self, *args, **kwargs):
39 |             cls_name = class_name or func.__name__
40 |             if cls_name.islower():
41 |                 cls_name = cls_name.capitalize()
42 |             name_in_j = name_in_json or func.__name__
43 |             file_name = module_filename or cls_name.lower()
44 | 
45 |             try:
46 |                 module = importlib.import_module(
47 |                     '.' + file_name,
48 |                     'zhihu_oauth.zhcls'
49 |                 )
50 |                 cls = getattr(module, cls_name)
51 |             except (ImportError, AttributeError):
52 |                 from .base import Base
53 |                 cls = Base
54 | 
55 |             cache = func(self, *args, **kwargs)
56 | 
57 |             if cache is None:
58 |                 if self._cache and name_in_j in self._cache:
59 |                     cache = self._cache[name_in_j]
60 |                 else:
61 |                     self._get_data()
62 |                     if self._data and name_in_j in self._data:
63 |                         cache = self._data[name_in_j]
64 | 
65 |             if cache is not None and 'id' in cache:
66 |                 return cls(cache['id'], cache, self._session)
67 |             else:
68 |                 return None
69 | 
70 |         return wrapper
71 | 
72 |     return wrappers_wrapper
73 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/question.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from __future__ import unicode_literals
  4 | 
  5 | from .base import Base
  6 | from .generator import generator_of
  7 | from .normal import normal_attr
  8 | from .streaming import streaming
  9 | from zhihu_oauth.zhcls.urls import (
 10 |     QUESTION_DETAIL_URL,
 11 |     QUESTION_ANSWERS_URL,
 12 |     QUESTION_COMMENTS_URL,
 13 |     QUESTION_FOLLOWERS_URL,
 14 |     QUESTION_TOPICS_URL,
 15 | )
 16 | 
 17 | __all__ = ['Question']
 18 | 
 19 | 
 20 | class Question(Base):
 21 |     def __init__(self, qid, cache, session):
 22 |         super(Question, self).__init__(qid, cache, session)
 23 | 
 24 |     def _build_url(self):
 25 |         return QUESTION_DETAIL_URL.format(self._id)
 26 | 
 27 |     # ----- simple info -----
 28 | 
 29 |     @property
 30 |     @normal_attr()
 31 |     def allow_delete(self):
 32 |         return None
 33 | 
 34 |     @property
 35 |     @normal_attr()
 36 |     def answer_count(self):
 37 |         return None
 38 | 
 39 |     @property
 40 |     @normal_attr()
 41 |     def comment_count(self):
 42 |         return None
 43 | 
 44 |     @property
 45 |     @normal_attr("created")
 46 |     def created_time(self):
 47 |         return None
 48 | 
 49 |     @property
 50 |     @normal_attr('except')
 51 |     def excerpt(self):
 52 |         """
 53 |         知乎返回的 json 里这一项叫做 except.... 也是醉了
 54 |         """
 55 |         return None
 56 | 
 57 |     @property
 58 |     @normal_attr()
 59 |     def follower_count(self):
 60 |         return None
 61 | 
 62 |     @property
 63 |     @normal_attr()
 64 |     def id(self):
 65 |         return self._id
 66 | 
 67 |     @property
 68 |     @normal_attr()
 69 |     def detail(self):
 70 |         return None
 71 | 
 72 |     @property
 73 |     @streaming()
 74 |     def redirection(self):
 75 |         """
 76 |         常见返回值：
 77 | 
 78 |         ..  code-block:: python
 79 | 
 80 |             {
 81 |                 'to':
 82 |                 {
 83 |                     'url': 'https://api.zhihu.com/questions/19570036',
 84 |                     'id': 19570036,
 85 |                     'type': 'question',
 86 |                     'title': '什么是「问题重定向」？如何正确使用该功能解决重复问题？'
 87 |                 },
 88 |                 'from':
 89 |                 [
 90 |                     {
 91 |                         'url': 'https://api.zhihu.com/questions/19772082',
 92 |                         'id': 19772082,
 93 |                         'type': 'question',
 94 |                         'title': '知乎上有重复的问题吗？'
 95 |                     },
 96 |                     {
 97 |                         'url': 'https://api.zhihu.com/questions/20830682',
 98 |                         'id': 20830682,
 99 |                         'type': 'question',
100 |                         'title': '各位知友以为同一问题重复出现，知乎应如何应对？'
101 |                     }
102 |                 ]
103 |             }
104 | 
105 |         在使用 from 属性时遇到语法错误？请看 :ref:`说明 <tips-for-conflict-with-keyword>`
106 | 
107 |         """
108 |         return None
109 | 
110 |     @property
111 |     @streaming()
112 |     def status(self):
113 |         return None
114 | 
115 |     @property
116 |     @streaming(use_cache=False)
117 |     def suggest_edit(self):
118 |         """
119 |         常见返回值：
120 | 
121 |         ..  code-block:: python
122 | 
123 |             {'status': False', reason': ''}
124 | 
125 |             {'status': True, 'reason': '问题表意不明'}
126 |         """
127 |         return None
128 | 
129 |     @property
130 |     @normal_attr()
131 |     def title(self):
132 |         return None
133 | 
134 |     @property
135 |     @normal_attr()
136 |     def updated_time(self):
137 |         return None
138 | 
139 |     # ----- generators -----
140 | 
141 |     @property
142 |     @generator_of(QUESTION_ANSWERS_URL)
143 |     def answers(self):
144 |         return None
145 | 
146 |     @property
147 |     @generator_of(QUESTION_COMMENTS_URL)
148 |     def comments(self):
149 |         return None
150 | 
151 |     @property
152 |     @generator_of(QUESTION_FOLLOWERS_URL, 'people')
153 |     def followers(self):
154 |         return None
155 | 
156 |     @property
157 |     @generator_of(QUESTION_TOPICS_URL)
158 |     def topics(self):
159 |         return None
160 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/topic.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from __future__ import unicode_literals
  4 | 
  5 | from .base import Base
  6 | from .generator import generator_of
  7 | from .normal import normal_attr
  8 | from zhihu_oauth.zhcls.urls import (
  9 |     TOPIC_DETAIL_URL,
 10 |     TOPIC_BEST_ANSWERS_URL,
 11 |     TOPIC_BEST_ANSWERERS_URL,
 12 |     TOPIC_CHILDREN_URL,
 13 |     TOPIC_FOLLOWERS_URL,
 14 |     TOPIC_PARENTS_URL,
 15 |     TOPIC_UNANSWERED_QUESTION,
 16 | )
 17 | 
 18 | __all__ = ['Topic']
 19 | 
 20 | 
 21 | class Topic(Base):
 22 |     def __init__(self, tid, cache, session):
 23 |         super(Topic, self).__init__(tid, cache, session)
 24 | 
 25 |     def _build_url(self):
 26 |         return TOPIC_DETAIL_URL.format(self.id)
 27 | 
 28 |     # ---- simple info -----
 29 | 
 30 |     @property
 31 |     @normal_attr()
 32 |     def avatar_url(self):
 33 |         return None
 34 | 
 35 |     @property
 36 |     @normal_attr('best_answers_count')
 37 |     def best_answer_count(self):
 38 |         return None
 39 | 
 40 |     @property
 41 |     def best_answers_count(self):
 42 |         return self.best_answer_count
 43 | 
 44 |     @property
 45 |     @normal_attr()
 46 |     def id(self):
 47 |         return self._id
 48 | 
 49 |     @property
 50 |     @normal_attr()
 51 |     def introduction(self):
 52 |         return None
 53 | 
 54 |     @property
 55 |     @normal_attr()
 56 |     def excerpt(self):
 57 |         return None
 58 | 
 59 |     @property
 60 |     def father_count(self):
 61 |         return self.parent_count
 62 | 
 63 |     @property
 64 |     @normal_attr('followers_count')
 65 |     def follower_count(self):
 66 |         return None
 67 | 
 68 |     @property
 69 |     def followers_count(self):
 70 |         return self.follower_count
 71 | 
 72 |     @property
 73 |     @normal_attr()
 74 |     def name(self):
 75 |         return None
 76 | 
 77 |     @property
 78 |     @normal_attr('father_count')
 79 |     def parent_count(self):
 80 |         return None
 81 | 
 82 |     @property
 83 |     @normal_attr('questions_count')
 84 |     def question_count(self):
 85 |         return None
 86 | 
 87 |     @property
 88 |     def questions_count(self):
 89 |         return self.question_count
 90 | 
 91 |     @property
 92 |     @normal_attr()
 93 |     def unanswered_count(self):
 94 |         return None
 95 | 
 96 |     # ----- generators -----
 97 | 
 98 |     @property
 99 |     @generator_of(TOPIC_BEST_ANSWERS_URL, 'answer')
100 |     def best_answers(self):
101 |         """
102 |         精华回答
103 |         """
104 |         return None
105 | 
106 |     @property
107 |     @generator_of(TOPIC_BEST_ANSWERERS_URL, 'people')
108 |     def best_answerers(self):
109 |         """
110 |         好像叫，最佳回答者吧……
111 | 
112 |         best_answerers……知乎真会起名字……
113 |         """
114 |         return None
115 | 
116 |     @property
117 |     @generator_of(TOPIC_CHILDREN_URL, 'topic')
118 |     def children(self):
119 |         """
120 |         子话题
121 |         """
122 |         return None
123 | 
124 |     @property
125 |     @generator_of(TOPIC_FOLLOWERS_URL, 'people')
126 |     def followers(self):
127 |         return None
128 | 
129 |     @property
130 |     @generator_of(TOPIC_PARENTS_URL, 'topic')
131 |     def parents(self):
132 |         """
133 |         父话题
134 |         """
135 |         return None
136 | 
137 |     @property
138 |     @generator_of(TOPIC_UNANSWERED_QUESTION, 'question')
139 |     def unanswered_questions(self):
140 |         return None
141 | 


--------------------------------------------------------------------------------
/src/lib/oauth/zhihu_oauth/zhcls/whisper.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from .base import Base
 6 | from .other import other_obj
 7 | from .generator import generator_of
 8 | from .normal import normal_attr
 9 | from .urls import MESSAGES_URL
10 | 
11 | __all__ = ['Whisper']
12 | 
13 | 
14 | class Whisper(Base):
15 |     """
16 |     唔，其实就是「和某人的所有消息」。
17 | 
18 |     为这个东西命名我想了半天……最后群里的一个小姐姐说叫 Whisper 吧，我觉得很可以诶~
19 | 
20 |     后来发现知乎接口里把这个叫做 Thread，嗯，不想改，我就是这么任性……
21 |     """
22 |     def _build_url(self):
23 |         return ''
24 | 
25 |     def _get_data(self):
26 |         pass
27 | 
28 |     def __init__(self, wid, cache, session):
29 |         super(Whisper, self).__init__(wid, cache, session)
30 | 
31 |     # ----- simple info -----
32 | 
33 |     @property
34 |     @normal_attr()
35 |     def allow_reply(self):
36 |         return None
37 | 
38 |     @property
39 |     def id(self):
40 |         return self._id
41 | 
42 |     @property
43 |     @normal_attr()
44 |     def snippet(self):
45 |         """
46 |         最后一次私信的摘要
47 |         """
48 |         return None
49 | 
50 |     @property
51 |     @normal_attr()
52 |     def updated_time(self):
53 |         return None
54 | 
55 |     @property
56 |     @normal_attr()
57 |     def unread_count(self):
58 |         return None
59 | 
60 |     @property
61 |     @other_obj('people', 'participant')
62 |     def who(self):
63 |         """
64 |         参与此私信会话的另一个知乎用户
65 |         """
66 |         return None
67 | 
68 |     @property
69 |     @generator_of(MESSAGES_URL)
70 |     def messages(self):
71 |         return None
72 | 


--------------------------------------------------------------------------------
/src/lib/requests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   __
 4 | #  /__)  _  _     _   _ _/   _
 5 | # / (   (- (/ (/ (- _)  /  _)
 6 | #          /
 7 | 
 8 | """
 9 | Requests HTTP library
10 | ~~~~~~~~~~~~~~~~~~~~~
11 | 
12 | Requests is an HTTP library, written in Python, for human beings. Basic GET
13 | usage:
14 | 
15 |    >>> import requests
16 |    >>> r = requests.get('https://www.python.org')
17 |    >>> r.status_code
18 |    200
19 |    >>> 'Python is a programming language' in r.content
20 |    True
21 | 
22 | ... or POST:
23 | 
24 |    >>> payload = dict(key1='value1', key2='value2')
25 |    >>> r = requests.post('http://httpbin.org/post', data=payload)
26 |    >>> print(r.text)
27 |    {
28 |      ...
29 |      "form": {
30 |        "key2": "value2",
31 |        "key1": "value1"
32 |      },
33 |      ...
34 |    }
35 | 
36 | The other HTTP methods are supported - see `requests.api`. Full documentation
37 | is at <http://python-requests.org>.
38 | 
39 | :copyright: (c) 2016 by Kenneth Reitz.
40 | :license: Apache 2.0, see LICENSE for more details.
41 | """
42 | 
43 | __title__ = 'requests'
44 | __version__ = '2.11.1'
45 | __build__ = 0x021101
46 | __author__ = 'Kenneth Reitz'
47 | __license__ = 'Apache 2.0'
48 | __copyright__ = 'Copyright 2016 Kenneth Reitz'
49 | 
50 | # Attempt to enable urllib3's SNI support, if possible
51 | try:
52 |     from .packages.urllib3.contrib import pyopenssl
53 |     pyopenssl.inject_into_urllib3()
54 | except ImportError:
55 |     pass
56 | 
57 | import warnings
58 | 
59 | # urllib3's DependencyWarnings should be silenced.
60 | from .packages.urllib3.exceptions import DependencyWarning
61 | warnings.simplefilter('ignore', DependencyWarning)
62 | 
63 | from . import utils
64 | from .models import Request, Response, PreparedRequest
65 | from .api import request, get, head, post, patch, put, delete, options
66 | from .sessions import session, Session
67 | from .status_codes import codes
68 | from .exceptions import (
69 |     RequestException, Timeout, URLRequired,
70 |     TooManyRedirects, HTTPError, ConnectionError,
71 |     FileModeWarning, ConnectTimeout, ReadTimeout
72 | )
73 | 
74 | # Set default logging handler to avoid "No handler found" warnings.
75 | import logging
76 | try:  # Python 2.7+
77 |     from logging import NullHandler
78 | except ImportError:
79 |     class NullHandler(logging.Handler):
80 |         def emit(self, record):
81 |             pass
82 | 
83 | logging.getLogger(__name__).addHandler(NullHandler())
84 | 
85 | # FileModeWarnings go off per the default.
86 | warnings.simplefilter('default', FileModeWarning, append=True)
87 | 


--------------------------------------------------------------------------------
/src/lib/requests/_internal_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests._internal_utils
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | Provides utility functions that are consumed internally by Requests
 8 | which depend on extremely few external helpers (such as compat)
 9 | """
10 | 
11 | from .compat import is_py2, builtin_str
12 | 
13 | 
14 | def to_native_string(string, encoding='ascii'):
15 |     """Given a string object, regardless of type, returns a representation of
16 |     that string in the native string type, encoding and decoding where
17 |     necessary. This assumes ASCII unless told otherwise.
18 |     """
19 |     if isinstance(string, builtin_str):
20 |         out = string
21 |     else:
22 |         if is_py2:
23 |             out = string.encode(encoding)
24 |         else:
25 |             out = string.decode(encoding)
26 | 
27 |     return out
28 | 


--------------------------------------------------------------------------------
/src/lib/requests/certs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | requests.certs
 6 | ~~~~~~~~~~~~~~
 7 | 
 8 | This module returns the preferred default CA certificate bundle.
 9 | 
10 | If you are packaging Requests, e.g., for a Linux distribution or a managed
11 | environment, you can change the definition of where() to return a separately
12 | packaged CA bundle.
13 | """
14 | import os.path
15 | 
16 | try:
17 |     from certifi import where
18 | except ImportError:
19 |     def where():
20 |         """Return the preferred certificate bundle."""
21 |         # vendored bundle inside Requests
22 |         return os.path.join(os.path.dirname(__file__), 'cacert.pem')
23 | 
24 | if __name__ == '__main__':
25 |     print(where())
26 | 


--------------------------------------------------------------------------------
/src/lib/requests/compat.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests.compat
 5 | ~~~~~~~~~~~~~~~
 6 | 
 7 | This module handles import compatibility issues between Python 2 and
 8 | Python 3.
 9 | """
10 | 
11 | from .packages import chardet
12 | 
13 | import sys
14 | 
15 | # -------
16 | # Pythons
17 | # -------
18 | 
19 | # Syntax sugar.
20 | _ver = sys.version_info
21 | 
22 | #: Python 2.x?
23 | is_py2 = (_ver[0] == 2)
24 | 
25 | #: Python 3.x?
26 | is_py3 = (_ver[0] == 3)
27 | 
28 | try:
29 |     import simplejson as json
30 | except (ImportError, SyntaxError):
31 |     # simplejson does not support Python 3.2, it throws a SyntaxError
32 |     # because of u'...' Unicode literals.
33 |     import json
34 | 
35 | # ---------
36 | # Specifics
37 | # ---------
38 | 
39 | if is_py2:
40 |     from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass
41 |     from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
42 |     from urllib2 import parse_http_list
43 |     import cookielib
44 |     from Cookie import Morsel
45 |     from StringIO import StringIO
46 |     from .packages.urllib3.packages.ordered_dict import OrderedDict
47 | 
48 |     builtin_str = str
49 |     bytes = str
50 |     str = unicode
51 |     basestring = basestring
52 |     numeric_types = (int, long, float)
53 | 
54 | elif is_py3:
55 |     from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
56 |     from urllib.request import parse_http_list, getproxies, proxy_bypass
57 |     from http import cookiejar as cookielib
58 |     from http.cookies import Morsel
59 |     from io import StringIO
60 |     from collections import OrderedDict
61 | 
62 |     builtin_str = str
63 |     str = str
64 |     bytes = bytes
65 |     basestring = (str, bytes)
66 |     numeric_types = (int, float)
67 | 


--------------------------------------------------------------------------------
/src/lib/requests/exceptions.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.exceptions
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | This module contains the set of Requests' exceptions.
  8 | """
  9 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
 10 | 
 11 | 
 12 | class RequestException(IOError):
 13 |     """There was an ambiguous exception that occurred while handling your
 14 |     request.
 15 |     """
 16 | 
 17 |     def __init__(self, *args, **kwargs):
 18 |         """Initialize RequestException with `request` and `response` objects."""
 19 |         response = kwargs.pop('response', None)
 20 |         self.response = response
 21 |         self.request = kwargs.pop('request', None)
 22 |         if (response is not None and not self.request and
 23 |                 hasattr(response, 'request')):
 24 |             self.request = self.response.request
 25 |         super(RequestException, self).__init__(*args, **kwargs)
 26 | 
 27 | 
 28 | class HTTPError(RequestException):
 29 |     """An HTTP error occurred."""
 30 | 
 31 | 
 32 | class ConnectionError(RequestException):
 33 |     """A Connection error occurred."""
 34 | 
 35 | 
 36 | class ProxyError(ConnectionError):
 37 |     """A proxy error occurred."""
 38 | 
 39 | 
 40 | class SSLError(ConnectionError):
 41 |     """An SSL error occurred."""
 42 | 
 43 | 
 44 | class Timeout(RequestException):
 45 |     """The request timed out.
 46 | 
 47 |     Catching this error will catch both
 48 |     :exc:`~requests.exceptions.ConnectTimeout` and
 49 |     :exc:`~requests.exceptions.ReadTimeout` errors.
 50 |     """
 51 | 
 52 | 
 53 | class ConnectTimeout(ConnectionError, Timeout):
 54 |     """The request timed out while trying to connect to the remote server.
 55 | 
 56 |     Requests that produced this error are safe to retry.
 57 |     """
 58 | 
 59 | 
 60 | class ReadTimeout(Timeout):
 61 |     """The server did not send any data in the allotted amount of time."""
 62 | 
 63 | 
 64 | class URLRequired(RequestException):
 65 |     """A valid URL is required to make a request."""
 66 | 
 67 | 
 68 | class TooManyRedirects(RequestException):
 69 |     """Too many redirects."""
 70 | 
 71 | 
 72 | class MissingSchema(RequestException, ValueError):
 73 |     """The URL schema (e.g. http or https) is missing."""
 74 | 
 75 | 
 76 | class InvalidSchema(RequestException, ValueError):
 77 |     """See defaults.py for valid schemas."""
 78 | 
 79 | 
 80 | class InvalidURL(RequestException, ValueError):
 81 |     """The URL provided was somehow invalid."""
 82 | 
 83 | 
 84 | class InvalidHeader(RequestException, ValueError):
 85 |     """The header value provided was somehow invalid."""
 86 | 
 87 | 
 88 | class ChunkedEncodingError(RequestException):
 89 |     """The server declared chunked encoding but sent an invalid chunk."""
 90 | 
 91 | 
 92 | class ContentDecodingError(RequestException, BaseHTTPError):
 93 |     """Failed to decode response content"""
 94 | 
 95 | 
 96 | class StreamConsumedError(RequestException, TypeError):
 97 |     """The content for this response was already consumed"""
 98 | 
 99 | 
100 | class RetryError(RequestException):
101 |     """Custom retries logic failed"""
102 | 
103 | 
104 | # Warnings
105 | 
106 | 
107 | class RequestsWarning(Warning):
108 |     """Base warning for Requests."""
109 |     pass
110 | 
111 | 
112 | class FileModeWarning(RequestsWarning, DeprecationWarning):
113 |     """A file was opened in text mode, but Requests determined its binary length."""
114 |     pass
115 | 


--------------------------------------------------------------------------------
/src/lib/requests/hooks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests.hooks
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | This module provides the capabilities for the Requests hooks system.
 8 | 
 9 | Available hooks:
10 | 
11 | ``response``:
12 |     The response generated from a Request.
13 | """
14 | HOOKS = ['response']
15 | 
16 | 
17 | def default_hooks():
18 |     return dict((event, []) for event in HOOKS)
19 | 
20 | # TODO: response is the only one
21 | 
22 | 
23 | def dispatch_hook(key, hooks, hook_data, **kwargs):
24 |     """Dispatches a hook dictionary on a given piece of data."""
25 |     hooks = hooks or dict()
26 |     hooks = hooks.get(key)
27 |     if hooks:
28 |         if hasattr(hooks, '__call__'):
29 |             hooks = [hooks]
30 |         for hook in hooks:
31 |             _hook_data = hook(hook_data, **kwargs)
32 |             if _hook_data is not None:
33 |                 hook_data = _hook_data
34 |     return hook_data
35 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/README.rst:
--------------------------------------------------------------------------------
 1 | If you are planning to submit a pull request to requests with any changes in 
 2 | this library do not go any further. These are independent libraries which we
 3 | vendor into requests. Any changes necessary to these libraries must be made in
 4 | them and submitted as separate pull requests to those libraries.
 5 | 
 6 | urllib3 pull requests go here: https://github.com/shazow/urllib3
 7 | 
 8 | chardet pull requests go here: https://github.com/chardet/chardet
 9 | 
10 | See https://github.com/kennethreitz/requests/pull/1812#issuecomment-30854316
11 | for the reasoning behind this.
12 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Debian and other distributions "unbundle" requests' vendored dependencies, and
 3 | rewrite all imports to use the global versions of ``urllib3`` and ``chardet``.
 4 | The problem with this is that not only requests itself imports those
 5 | dependencies, but third-party code outside of the distros' control too.
 6 | 
 7 | In reaction to these problems, the distro maintainers replaced
 8 | ``requests.packages`` with a magical "stub module" that imports the correct
 9 | modules. The implementations were varying in quality and all had severe
10 | problems. For example, a symlink (or hardlink) that links the correct modules
11 | into place introduces problems regarding object identity, since you now have
12 | two modules in `sys.modules` with the same API, but different identities::
13 | 
14 |     requests.packages.urllib3 is not urllib3
15 | 
16 | With version ``2.5.2``, requests started to maintain its own stub, so that
17 | distro-specific breakage would be reduced to a minimum, even though the whole
18 | issue is not requests' fault in the first place. See
19 | https://github.com/kennethreitz/requests/pull/2375 for the corresponding pull
20 | request.
21 | '''
22 | 
23 | from __future__ import absolute_import
24 | import sys
25 | 
26 | try:
27 |     from . import urllib3
28 | except ImportError:
29 |     import urllib3
30 |     sys.modules['%s.urllib3' % __name__] = urllib3
31 | 
32 | try:
33 |     from . import chardet
34 | except ImportError:
35 |     import chardet
36 |     sys.modules['%s.chardet' % __name__] = chardet
37 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # This library is free software; you can redistribute it and/or
 3 | # modify it under the terms of the GNU Lesser General Public
 4 | # License as published by the Free Software Foundation; either
 5 | # version 2.1 of the License, or (at your option) any later version.
 6 | #
 7 | # This library is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301  USA
16 | ######################### END LICENSE BLOCK #########################
17 | 
18 | __version__ = "2.3.0"
19 | from sys import version_info
20 | 
21 | 
22 | def detect(aBuf):
23 |     if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
24 |             (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
25 |         raise ValueError('Expected a bytes object, not a unicode object')
26 | 
27 |     from . import universaldetector
28 |     u = universaldetector.UniversalDetector()
29 |     u.reset()
30 |     u.feed(aBuf)
31 |     u.close()
32 |     return u.result
33 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/big5prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Communicator client code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import Big5SMModel
32 | 
33 | 
34 | class Big5Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(Big5SMModel)
38 |         self._mDistributionAnalyzer = Big5DistributionAnalysis()
39 |         self.reset()
40 | 
41 |     def get_charset_name(self):
42 |         return "Big5"
43 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/chardetect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Script which takes one or more file paths and reports on their detected
 4 | encodings
 5 | 
 6 | Example::
 7 | 
 8 |     % chardetect somefile someotherfile
 9 |     somefile: windows-1252 with confidence 0.5
10 |     someotherfile: ascii with confidence 1.0
11 | 
12 | If no paths are provided, it takes its input from stdin.
13 | 
14 | """
15 | 
16 | from __future__ import absolute_import, print_function, unicode_literals
17 | 
18 | import argparse
19 | import sys
20 | from io import open
21 | 
22 | from chardet import __version__
23 | from chardet.universaldetector import UniversalDetector
24 | 
25 | 
26 | def description_of(lines, name='stdin'):
27 |     """
28 |     Return a string describing the probable encoding of a file or
29 |     list of strings.
30 | 
31 |     :param lines: The lines to get the encoding of.
32 |     :type lines: Iterable of bytes
33 |     :param name: Name of file or collection of lines
34 |     :type name: str
35 |     """
36 |     u = UniversalDetector()
37 |     for line in lines:
38 |         u.feed(line)
39 |     u.close()
40 |     result = u.result
41 |     if result['encoding']:
42 |         return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
43 |                                                      result['confidence'])
44 |     else:
45 |         return '{0}: no result'.format(name)
46 | 
47 | 
48 | def main(argv=None):
49 |     '''
50 |     Handles command line arguments and gets things started.
51 | 
52 |     :param argv: List of arguments, as if specified on the command-line.
53 |                  If None, ``sys.argv[1:]`` is used instead.
54 |     :type argv: list of str
55 |     '''
56 |     # Get command line arguments
57 |     parser = argparse.ArgumentParser(
58 |         description="Takes one or more file paths and reports their detected \
59 |                      encodings",
60 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
61 |         conflict_handler='resolve')
62 |     parser.add_argument('input',
63 |                         help='File whose encoding we would like to determine.',
64 |                         type=argparse.FileType('rb'), nargs='*',
65 |                         default=[sys.stdin])
66 |     parser.add_argument('--version', action='version',
67 |                         version='%(prog)s {0}'.format(__version__))
68 |     args = parser.parse_args(argv)
69 | 
70 |     for f in args.input:
71 |         if f.isatty():
72 |             print("You are running chardetect interactively. Press " +
73 |                   "CTRL-D twice at the start of a blank line to signal the " +
74 |                   "end of your input. If you want help, run chardetect " +
75 |                   "--help\n", file=sys.stderr)
76 |         print(description_of(f, f.name))
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Communicator client code.
  3 | # 
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | # 
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | # 
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | # 
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from . import constants
 29 | import sys
 30 | from .charsetprober import CharSetProber
 31 | 
 32 | 
 33 | class CharSetGroupProber(CharSetProber):
 34 |     def __init__(self):
 35 |         CharSetProber.__init__(self)
 36 |         self._mActiveNum = 0
 37 |         self._mProbers = []
 38 |         self._mBestGuessProber = None
 39 | 
 40 |     def reset(self):
 41 |         CharSetProber.reset(self)
 42 |         self._mActiveNum = 0
 43 |         for prober in self._mProbers:
 44 |             if prober:
 45 |                 prober.reset()
 46 |                 prober.active = True
 47 |                 self._mActiveNum += 1
 48 |         self._mBestGuessProber = None
 49 | 
 50 |     def get_charset_name(self):
 51 |         if not self._mBestGuessProber:
 52 |             self.get_confidence()
 53 |             if not self._mBestGuessProber:
 54 |                 return None
 55 | #                self._mBestGuessProber = self._mProbers[0]
 56 |         return self._mBestGuessProber.get_charset_name()
 57 | 
 58 |     def feed(self, aBuf):
 59 |         for prober in self._mProbers:
 60 |             if not prober:
 61 |                 continue
 62 |             if not prober.active:
 63 |                 continue
 64 |             st = prober.feed(aBuf)
 65 |             if not st:
 66 |                 continue
 67 |             if st == constants.eFoundIt:
 68 |                 self._mBestGuessProber = prober
 69 |                 return self.get_state()
 70 |             elif st == constants.eNotMe:
 71 |                 prober.active = False
 72 |                 self._mActiveNum -= 1
 73 |                 if self._mActiveNum <= 0:
 74 |                     self._mState = constants.eNotMe
 75 |                     return self.get_state()
 76 |         return self.get_state()
 77 | 
 78 |     def get_confidence(self):
 79 |         st = self.get_state()
 80 |         if st == constants.eFoundIt:
 81 |             return 0.99
 82 |         elif st == constants.eNotMe:
 83 |             return 0.01
 84 |         bestConf = 0.0
 85 |         self._mBestGuessProber = None
 86 |         for prober in self._mProbers:
 87 |             if not prober:
 88 |                 continue
 89 |             if not prober.active:
 90 |                 if constants._debug:
 91 |                     sys.stderr.write(prober.get_charset_name()
 92 |                                      + ' not active\n')
 93 |                 continue
 94 |             cf = prober.get_confidence()
 95 |             if constants._debug:
 96 |                 sys.stderr.write('%s confidence = %s\n' %
 97 |                                  (prober.get_charset_name(), cf))
 98 |             if bestConf < cf:
 99 |                 bestConf = cf
100 |                 self._mBestGuessProber = prober
101 |         if not self._mBestGuessProber:
102 |             return 0.0
103 |         return bestConf
104 | #        else:
105 | #            self._mBestGuessProber = self._mProbers[0]
106 | #            return self._mBestGuessProber.get_confidence()
107 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/charsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from . import constants
30 | import re
31 | 
32 | 
33 | class CharSetProber:
34 |     def __init__(self):
35 |         pass
36 | 
37 |     def reset(self):
38 |         self._mState = constants.eDetecting
39 | 
40 |     def get_charset_name(self):
41 |         return None
42 | 
43 |     def feed(self, aBuf):
44 |         pass
45 | 
46 |     def get_state(self):
47 |         return self._mState
48 | 
49 |     def get_confidence(self):
50 |         return 0.0
51 | 
52 |     def filter_high_bit_only(self, aBuf):
53 |         aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54 |         return aBuf
55 | 
56 |     def filter_without_english_letters(self, aBuf):
57 |         aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58 |         return aBuf
59 | 
60 |     def filter_with_english_letters(self, aBuf):
61 |         # TODO
62 |         return aBuf
63 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .constants import eStart
29 | from .compat import wrap_ord
30 | 
31 | 
32 | class CodingStateMachine:
33 |     def __init__(self, sm):
34 |         self._mModel = sm
35 |         self._mCurrentBytePos = 0
36 |         self._mCurrentCharLen = 0
37 |         self.reset()
38 | 
39 |     def reset(self):
40 |         self._mCurrentState = eStart
41 | 
42 |     def next_state(self, c):
43 |         # for each byte we get its class
44 |         # if it is first byte, we also get byte length
45 |         # PY3K: aBuf is a byte stream, so c is an int, not a byte
46 |         byteCls = self._mModel['classTable'][wrap_ord(c)]
47 |         if self._mCurrentState == eStart:
48 |             self._mCurrentBytePos = 0
49 |             self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
50 |         # from byte's class and stateTable, we get its next state
51 |         curr_state = (self._mCurrentState * self._mModel['classFactor']
52 |                       + byteCls)
53 |         self._mCurrentState = self._mModel['stateTable'][curr_state]
54 |         self._mCurrentBytePos += 1
55 |         return self._mCurrentState
56 | 
57 |     def get_current_charlen(self):
58 |         return self._mCurrentCharLen
59 | 
60 |     def get_coding_state_machine(self):
61 |         return self._mModel['name']
62 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/compat.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # Contributor(s):
 3 | #   Ian Cordasco - port to Python
 4 | #
 5 | # This library is free software; you can redistribute it and/or
 6 | # modify it under the terms of the GNU Lesser General Public
 7 | # License as published by the Free Software Foundation; either
 8 | # version 2.1 of the License, or (at your option) any later version.
 9 | #
10 | # This library is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 | # Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public
16 | # License along with this library; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 | # 02110-1301  USA
19 | ######################### END LICENSE BLOCK #########################
20 | 
21 | import sys
22 | 
23 | 
24 | if sys.version_info < (3, 0):
25 |     base_str = (str, unicode)
26 | else:
27 |     base_str = (bytes, str)
28 | 
29 | 
30 | def wrap_ord(a):
31 |     if sys.version_info < (3, 0) and isinstance(a, base_str):
32 |         return ord(a)
33 |     else:
34 |         return a
35 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/constants.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | # 
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | # 
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | _debug = 0
30 | 
31 | eDetecting = 0
32 | eFoundIt = 1
33 | eNotMe = 2
34 | 
35 | eStart = 0
36 | eError = 1
37 | eItsMe = 2
38 | 
39 | SHORTCUT_THRESHOLD = 0.95
40 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/cp949prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import CP949SMModel
32 | 
33 | 
34 | class CP949Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(CP949SMModel)
38 |         # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 |         #       not different.
40 |         self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
41 |         self.reset()
42 | 
43 |     def get_charset_name(self):
44 |         return "CP949"
45 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/escprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from . import constants
29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
30 |                     ISO2022KRSMModel)
31 | from .charsetprober import CharSetProber
32 | from .codingstatemachine import CodingStateMachine
33 | from .compat import wrap_ord
34 | 
35 | 
36 | class EscCharSetProber(CharSetProber):
37 |     def __init__(self):
38 |         CharSetProber.__init__(self)
39 |         self._mCodingSM = [
40 |             CodingStateMachine(HZSMModel),
41 |             CodingStateMachine(ISO2022CNSMModel),
42 |             CodingStateMachine(ISO2022JPSMModel),
43 |             CodingStateMachine(ISO2022KRSMModel)
44 |         ]
45 |         self.reset()
46 | 
47 |     def reset(self):
48 |         CharSetProber.reset(self)
49 |         for codingSM in self._mCodingSM:
50 |             if not codingSM:
51 |                 continue
52 |             codingSM.active = True
53 |             codingSM.reset()
54 |         self._mActiveSM = len(self._mCodingSM)
55 |         self._mDetectedCharset = None
56 | 
57 |     def get_charset_name(self):
58 |         return self._mDetectedCharset
59 | 
60 |     def get_confidence(self):
61 |         if self._mDetectedCharset:
62 |             return 0.99
63 |         else:
64 |             return 0.00
65 | 
66 |     def feed(self, aBuf):
67 |         for c in aBuf:
68 |             # PY3K: aBuf is a byte array, so c is an int, not a byte
69 |             for codingSM in self._mCodingSM:
70 |                 if not codingSM:
71 |                     continue
72 |                 if not codingSM.active:
73 |                     continue
74 |                 codingState = codingSM.next_state(wrap_ord(c))
75 |                 if codingState == constants.eError:
76 |                     codingSM.active = False
77 |                     self._mActiveSM -= 1
78 |                     if self._mActiveSM <= 0:
79 |                         self._mState = constants.eNotMe
80 |                         return self.get_state()
81 |                 elif codingState == constants.eItsMe:
82 |                     self._mState = constants.eFoundIt
83 |                     self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
84 |                     return self.get_state()
85 | 
86 |         return self.get_state()
87 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import sys
29 | from . import constants
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .codingstatemachine import CodingStateMachine
32 | from .chardistribution import EUCJPDistributionAnalysis
33 | from .jpcntx import EUCJPContextAnalysis
34 | from .mbcssm import EUCJPSMModel
35 | 
36 | 
37 | class EUCJPProber(MultiByteCharSetProber):
38 |     def __init__(self):
39 |         MultiByteCharSetProber.__init__(self)
40 |         self._mCodingSM = CodingStateMachine(EUCJPSMModel)
41 |         self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
42 |         self._mContextAnalyzer = EUCJPContextAnalysis()
43 |         self.reset()
44 | 
45 |     def reset(self):
46 |         MultiByteCharSetProber.reset(self)
47 |         self._mContextAnalyzer.reset()
48 | 
49 |     def get_charset_name(self):
50 |         return "EUC-JP"
51 | 
52 |     def feed(self, aBuf):
53 |         aLen = len(aBuf)
54 |         for i in range(0, aLen):
55 |             # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
56 |             codingState = self._mCodingSM.next_state(aBuf[i])
57 |             if codingState == constants.eError:
58 |                 if constants._debug:
59 |                     sys.stderr.write(self.get_charset_name()
60 |                                      + ' prober hit error at byte ' + str(i)
61 |                                      + '\n')
62 |                 self._mState = constants.eNotMe
63 |                 break
64 |             elif codingState == constants.eItsMe:
65 |                 self._mState = constants.eFoundIt
66 |                 break
67 |             elif codingState == constants.eStart:
68 |                 charLen = self._mCodingSM.get_current_charlen()
69 |                 if i == 0:
70 |                     self._mLastChar[1] = aBuf[0]
71 |                     self._mContextAnalyzer.feed(self._mLastChar, charLen)
72 |                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 |                 else:
74 |                     self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
75 |                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
76 |                                                      charLen)
77 | 
78 |         self._mLastChar[0] = aBuf[aLen - 1]
79 | 
80 |         if self.get_state() == constants.eDetecting:
81 |             if (self._mContextAnalyzer.got_enough_data() and
82 |                (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
83 |                 self._mState = constants.eFoundIt
84 | 
85 |         return self.get_state()
86 | 
87 |     def get_confidence(self):
88 |         contxtCf = self._mContextAnalyzer.get_confidence()
89 |         distribCf = self._mDistributionAnalyzer.get_confidence()
90 |         return max(contxtCf, distribCf)
91 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/euckrprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKRSMModel
32 | 
33 | 
34 | class EUCKRProber(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(EUCKRSMModel)
38 |         self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
39 |         self.reset()
40 | 
41 |     def get_charset_name(self):
42 |         return "EUC-KR"
43 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/euctwprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | # 
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | # 
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTWSMModel
32 | 
33 | class EUCTWProber(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         MultiByteCharSetProber.__init__(self)
36 |         self._mCodingSM = CodingStateMachine(EUCTWSMModel)
37 |         self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
38 |         self.reset()
39 | 
40 |     def get_charset_name(self):
41 |         return "EUC-TW"
42 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | # 
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | # 
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312SMModel
32 | 
33 | class GB2312Prober(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         MultiByteCharSetProber.__init__(self)
36 |         self._mCodingSM = CodingStateMachine(GB2312SMModel)
37 |         self._mDistributionAnalyzer = GB2312DistributionAnalysis()
38 |         self.reset()
39 | 
40 |     def get_charset_name(self):
41 |         return "GB2312"
42 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | import sys
31 | from . import constants
32 | from .charsetprober import CharSetProber
33 | 
34 | 
35 | class MultiByteCharSetProber(CharSetProber):
36 |     def __init__(self):
37 |         CharSetProber.__init__(self)
38 |         self._mDistributionAnalyzer = None
39 |         self._mCodingSM = None
40 |         self._mLastChar = [0, 0]
41 | 
42 |     def reset(self):
43 |         CharSetProber.reset(self)
44 |         if self._mCodingSM:
45 |             self._mCodingSM.reset()
46 |         if self._mDistributionAnalyzer:
47 |             self._mDistributionAnalyzer.reset()
48 |         self._mLastChar = [0, 0]
49 | 
50 |     def get_charset_name(self):
51 |         pass
52 | 
53 |     def feed(self, aBuf):
54 |         aLen = len(aBuf)
55 |         for i in range(0, aLen):
56 |             codingState = self._mCodingSM.next_state(aBuf[i])
57 |             if codingState == constants.eError:
58 |                 if constants._debug:
59 |                     sys.stderr.write(self.get_charset_name()
60 |                                      + ' prober hit error at byte ' + str(i)
61 |                                      + '\n')
62 |                 self._mState = constants.eNotMe
63 |                 break
64 |             elif codingState == constants.eItsMe:
65 |                 self._mState = constants.eFoundIt
66 |                 break
67 |             elif codingState == constants.eStart:
68 |                 charLen = self._mCodingSM.get_current_charlen()
69 |                 if i == 0:
70 |                     self._mLastChar[1] = aBuf[0]
71 |                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
72 |                 else:
73 |                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
74 |                                                      charLen)
75 | 
76 |         self._mLastChar[0] = aBuf[aLen - 1]
77 | 
78 |         if self.get_state() == constants.eDetecting:
79 |             if (self._mDistributionAnalyzer.got_enough_data() and
80 |                     (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
81 |                 self._mState = constants.eFoundIt
82 | 
83 |         return self.get_state()
84 | 
85 |     def get_confidence(self):
86 |         return self._mDistributionAnalyzer.get_confidence()
87 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 | 
40 | 
41 | class MBCSGroupProber(CharSetGroupProber):
42 |     def __init__(self):
43 |         CharSetGroupProber.__init__(self)
44 |         self._mProbers = [
45 |             UTF8Prober(),
46 |             SJISProber(),
47 |             EUCJPProber(),
48 |             GB2312Prober(),
49 |             EUCKRProber(),
50 |             CP949Prober(),
51 |             Big5Prober(),
52 |             EUCTWProber()
53 |         ]
54 |         self.reset()
55 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 |                                 Latin5CyrillicModel, MacCyrillicModel,
33 |                                 Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 | 
41 | 
42 | class SBCSGroupProber(CharSetGroupProber):
43 |     def __init__(self):
44 |         CharSetGroupProber.__init__(self)
45 |         self._mProbers = [
46 |             SingleByteCharSetProber(Win1251CyrillicModel),
47 |             SingleByteCharSetProber(Koi8rModel),
48 |             SingleByteCharSetProber(Latin5CyrillicModel),
49 |             SingleByteCharSetProber(MacCyrillicModel),
50 |             SingleByteCharSetProber(Ibm866Model),
51 |             SingleByteCharSetProber(Ibm855Model),
52 |             SingleByteCharSetProber(Latin7GreekModel),
53 |             SingleByteCharSetProber(Win1253GreekModel),
54 |             SingleByteCharSetProber(Latin5BulgarianModel),
55 |             SingleByteCharSetProber(Win1251BulgarianModel),
56 |             SingleByteCharSetProber(Latin2HungarianModel),
57 |             SingleByteCharSetProber(Win1250HungarianModel),
58 |             SingleByteCharSetProber(TIS620ThaiModel),
59 |         ]
60 |         hebrewProber = HebrewProber()
61 |         logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
62 |                                                       False, hebrewProber)
63 |         visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
64 |                                                      hebrewProber)
65 |         hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
66 |         self._mProbers.extend([hebrewProber, logicalHebrewProber,
67 |                                visualHebrewProber])
68 | 
69 |         self.reset()
70 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/sjisprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import sys
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import SJISDistributionAnalysis
32 | from .jpcntx import SJISContextAnalysis
33 | from .mbcssm import SJISSMModel
34 | from . import constants
35 | 
36 | 
37 | class SJISProber(MultiByteCharSetProber):
38 |     def __init__(self):
39 |         MultiByteCharSetProber.__init__(self)
40 |         self._mCodingSM = CodingStateMachine(SJISSMModel)
41 |         self._mDistributionAnalyzer = SJISDistributionAnalysis()
42 |         self._mContextAnalyzer = SJISContextAnalysis()
43 |         self.reset()
44 | 
45 |     def reset(self):
46 |         MultiByteCharSetProber.reset(self)
47 |         self._mContextAnalyzer.reset()
48 | 
49 |     def get_charset_name(self):
50 |         return self._mContextAnalyzer.get_charset_name()
51 | 
52 |     def feed(self, aBuf):
53 |         aLen = len(aBuf)
54 |         for i in range(0, aLen):
55 |             codingState = self._mCodingSM.next_state(aBuf[i])
56 |             if codingState == constants.eError:
57 |                 if constants._debug:
58 |                     sys.stderr.write(self.get_charset_name()
59 |                                      + ' prober hit error at byte ' + str(i)
60 |                                      + '\n')
61 |                 self._mState = constants.eNotMe
62 |                 break
63 |             elif codingState == constants.eItsMe:
64 |                 self._mState = constants.eFoundIt
65 |                 break
66 |             elif codingState == constants.eStart:
67 |                 charLen = self._mCodingSM.get_current_charlen()
68 |                 if i == 0:
69 |                     self._mLastChar[1] = aBuf[0]
70 |                     self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
71 |                                                 charLen)
72 |                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 |                 else:
74 |                     self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
75 |                                                      - charLen], charLen)
76 |                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
77 |                                                      charLen)
78 | 
79 |         self._mLastChar[0] = aBuf[aLen - 1]
80 | 
81 |         if self.get_state() == constants.eDetecting:
82 |             if (self._mContextAnalyzer.got_enough_data() and
83 |                (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
84 |                 self._mState = constants.eFoundIt
85 | 
86 |         return self.get_state()
87 | 
88 |     def get_confidence(self):
89 |         contxtCf = self._mContextAnalyzer.get_confidence()
90 |         distribCf = self._mDistributionAnalyzer.get_confidence()
91 |         return max(contxtCf, distribCf)
92 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/chardet/utf8prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from . import constants
29 | from .charsetprober import CharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8SMModel
32 | 
33 | ONE_CHAR_PROB = 0.5
34 | 
35 | 
36 | class UTF8Prober(CharSetProber):
37 |     def __init__(self):
38 |         CharSetProber.__init__(self)
39 |         self._mCodingSM = CodingStateMachine(UTF8SMModel)
40 |         self.reset()
41 | 
42 |     def reset(self):
43 |         CharSetProber.reset(self)
44 |         self._mCodingSM.reset()
45 |         self._mNumOfMBChar = 0
46 | 
47 |     def get_charset_name(self):
48 |         return "utf-8"
49 | 
50 |     def feed(self, aBuf):
51 |         for c in aBuf:
52 |             codingState = self._mCodingSM.next_state(c)
53 |             if codingState == constants.eError:
54 |                 self._mState = constants.eNotMe
55 |                 break
56 |             elif codingState == constants.eItsMe:
57 |                 self._mState = constants.eFoundIt
58 |                 break
59 |             elif codingState == constants.eStart:
60 |                 if self._mCodingSM.get_current_charlen() >= 2:
61 |                     self._mNumOfMBChar += 1
62 | 
63 |         if self.get_state() == constants.eDetecting:
64 |             if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
65 |                 self._mState = constants.eFoundIt
66 | 
67 |         return self.get_state()
68 | 
69 |     def get_confidence(self):
70 |         unlike = 0.99
71 |         if self._mNumOfMBChar < 6:
72 |             for i in range(0, self._mNumOfMBChar):
73 |                 unlike = unlike * ONE_CHAR_PROB
74 |             return 1.0 - unlike
75 |         else:
76 |             return unlike
77 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | urllib3 - Thread-safe connection pooling and re-using.
 3 | """
 4 | 
 5 | from __future__ import absolute_import
 6 | import warnings
 7 | 
 8 | from .connectionpool import (
 9 |     HTTPConnectionPool,
10 |     HTTPSConnectionPool,
11 |     connection_from_url
12 | )
13 | 
14 | from . import exceptions
15 | from .filepost import encode_multipart_formdata
16 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
17 | from .response import HTTPResponse
18 | from .util.request import make_headers
19 | from .util.url import get_host
20 | from .util.timeout import Timeout
21 | from .util.retry import Retry
22 | 
23 | 
24 | # Set default logging handler to avoid "No handler found" warnings.
25 | import logging
26 | try:  # Python 2.7+
27 |     from logging import NullHandler
28 | except ImportError:
29 |     class NullHandler(logging.Handler):
30 |         def emit(self, record):
31 |             pass
32 | 
33 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
34 | __license__ = 'MIT'
35 | __version__ = '1.16'
36 | 
37 | __all__ = (
38 |     'HTTPConnectionPool',
39 |     'HTTPSConnectionPool',
40 |     'PoolManager',
41 |     'ProxyManager',
42 |     'HTTPResponse',
43 |     'Retry',
44 |     'Timeout',
45 |     'add_stderr_logger',
46 |     'connection_from_url',
47 |     'disable_warnings',
48 |     'encode_multipart_formdata',
49 |     'get_host',
50 |     'make_headers',
51 |     'proxy_from_url',
52 | )
53 | 
54 | logging.getLogger(__name__).addHandler(NullHandler())
55 | 
56 | 
57 | def add_stderr_logger(level=logging.DEBUG):
58 |     """
59 |     Helper for quickly adding a StreamHandler to the logger. Useful for
60 |     debugging.
61 | 
62 |     Returns the handler after adding it.
63 |     """
64 |     # This method needs to be in this __init__.py to get the __name__ correct
65 |     # even if urllib3 is vendored within another package.
66 |     logger = logging.getLogger(__name__)
67 |     handler = logging.StreamHandler()
68 |     handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
69 |     logger.addHandler(handler)
70 |     logger.setLevel(level)
71 |     logger.debug('Added a stderr logging handler to logger: %s', __name__)
72 |     return handler
73 | 
74 | # ... Clean up.
75 | del NullHandler
76 | 
77 | 
78 | # All warning filters *must* be appended unless you're really certain that they
79 | # shouldn't be: otherwise, it's very hard for users to use most Python
80 | # mechanisms to silence them.
81 | # SecurityWarning's always go off by default.
82 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
83 | # SubjectAltNameWarning's should go off once per host
84 | warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True)
85 | # InsecurePlatformWarning's don't vary between requests, so we keep it default.
86 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
87 |                       append=True)
88 | # SNIMissingWarnings should go off only once.
89 | warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True)
90 | 
91 | 
92 | def disable_warnings(category=exceptions.HTTPWarning):
93 |     """
94 |     Helper for quickly disabling all urllib3 warnings.
95 |     """
96 |     warnings.simplefilter('ignore', category)
97 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/lib/requests/packages/urllib3/contrib/__init__.py


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/filepost.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import codecs
 3 | 
 4 | from uuid import uuid4
 5 | from io import BytesIO
 6 | 
 7 | from .packages import six
 8 | from .packages.six import b
 9 | from .fields import RequestField
10 | 
11 | writer = codecs.lookup('utf-8')[3]
12 | 
13 | 
14 | def choose_boundary():
15 |     """
16 |     Our embarassingly-simple replacement for mimetools.choose_boundary.
17 |     """
18 |     return uuid4().hex
19 | 
20 | 
21 | def iter_field_objects(fields):
22 |     """
23 |     Iterate over fields.
24 | 
25 |     Supports list of (k, v) tuples and dicts, and lists of
26 |     :class:`~urllib3.fields.RequestField`.
27 | 
28 |     """
29 |     if isinstance(fields, dict):
30 |         i = six.iteritems(fields)
31 |     else:
32 |         i = iter(fields)
33 | 
34 |     for field in i:
35 |         if isinstance(field, RequestField):
36 |             yield field
37 |         else:
38 |             yield RequestField.from_tuples(*field)
39 | 
40 | 
41 | def iter_fields(fields):
42 |     """
43 |     .. deprecated:: 1.6
44 | 
45 |     Iterate over fields.
46 | 
47 |     The addition of :class:`~urllib3.fields.RequestField` makes this function
48 |     obsolete. Instead, use :func:`iter_field_objects`, which returns
49 |     :class:`~urllib3.fields.RequestField` objects.
50 | 
51 |     Supports list of (k, v) tuples and dicts.
52 |     """
53 |     if isinstance(fields, dict):
54 |         return ((k, v) for k, v in six.iteritems(fields))
55 | 
56 |     return ((k, v) for k, v in fields)
57 | 
58 | 
59 | def encode_multipart_formdata(fields, boundary=None):
60 |     """
61 |     Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
62 | 
63 |     :param fields:
64 |         Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
65 | 
66 |     :param boundary:
67 |         If not specified, then a random boundary will be generated using
68 |         :func:`mimetools.choose_boundary`.
69 |     """
70 |     body = BytesIO()
71 |     if boundary is None:
72 |         boundary = choose_boundary()
73 | 
74 |     for field in iter_field_objects(fields):
75 |         body.write(b('--%s\r\n' % (boundary)))
76 | 
77 |         writer(body).write(field.render_headers())
78 |         data = field.data
79 | 
80 |         if isinstance(data, int):
81 |             data = str(data)  # Backwards compatibility
82 | 
83 |         if isinstance(data, six.text_type):
84 |             writer(body).write(data)
85 |         else:
86 |             body.write(data)
87 | 
88 |         body.write(b'\r\n')
89 | 
90 |     body.write(b('--%s--\r\n' % (boundary)))
91 | 
92 |     content_type = str('multipart/form-data; boundary=%s' % boundary)
93 | 
94 |     return body.getvalue(), content_type
95 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from . import ssl_match_hostname
4 | 
5 | __all__ = ('ssl_match_hostname', )
6 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/backports/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/src/lib/requests/packages/urllib3/packages/backports/__init__.py


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/backports/makefile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | backports.makefile
 4 | ~~~~~~~~~~~~~~~~~~
 5 | 
 6 | Backports the Python 3 ``socket.makefile`` method for use with anything that
 7 | wants to create a "fake" socket object.
 8 | """
 9 | import io
10 | 
11 | from socket import SocketIO
12 | 
13 | 
14 | def backport_makefile(self, mode="r", buffering=None, encoding=None,
15 |                       errors=None, newline=None):
16 |     """
17 |     Backport of ``socket.makefile`` from Python 3.5.
18 |     """
19 |     if not set(mode) <= set(["r", "w", "b"]):
20 |         raise ValueError(
21 |             "invalid mode %r (only r, w, b allowed)" % (mode,)
22 |         )
23 |     writing = "w" in mode
24 |     reading = "r" in mode or not writing
25 |     assert reading or writing
26 |     binary = "b" in mode
27 |     rawmode = ""
28 |     if reading:
29 |         rawmode += "r"
30 |     if writing:
31 |         rawmode += "w"
32 |     raw = SocketIO(self, rawmode)
33 |     self._makefile_refs += 1
34 |     if buffering is None:
35 |         buffering = -1
36 |     if buffering < 0:
37 |         buffering = io.DEFAULT_BUFFER_SIZE
38 |     if buffering == 0:
39 |         if not binary:
40 |             raise ValueError("unbuffered streams must be binary")
41 |         return raw
42 |     if reading and writing:
43 |         buffer = io.BufferedRWPair(raw, raw, buffering)
44 |     elif reading:
45 |         buffer = io.BufferedReader(raw, buffering)
46 |     else:
47 |         assert writing
48 |         buffer = io.BufferedWriter(raw, buffering)
49 |     if binary:
50 |         return buffer
51 |     text = io.TextIOWrapper(buffer, encoding, errors, newline)
52 |     text.mode = mode
53 |     return text
54 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/ssl_match_hostname/.gitignore:
--------------------------------------------------------------------------------
1 | env
2 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     # Python 3.2+
 3 |     from ssl import CertificateError, match_hostname
 4 | except ImportError:
 5 |     try:
 6 |         # Backport of the function from a pypi module
 7 |         from backports.ssl_match_hostname import CertificateError, match_hostname
 8 |     except ImportError:
 9 |         # Our vendored copy
10 |         from ._implementation import CertificateError, match_hostname
11 | 
12 | # Not needed, but documenting what we provide.
13 | __all__ = ('CertificateError', 'match_hostname')
14 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
  1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
  2 | 
  3 | # Note: This file is under the PSF license as the code comes from the python
  4 | # stdlib.   http://docs.python.org/3/license.html
  5 | 
  6 | import re
  7 | 
  8 | __version__ = '3.4.0.2'
  9 | 
 10 | class CertificateError(ValueError):
 11 |     pass
 12 | 
 13 | 
 14 | def _dnsname_match(dn, hostname, max_wildcards=1):
 15 |     """Matching according to RFC 6125, section 6.4.3
 16 | 
 17 |     http://tools.ietf.org/html/rfc6125#section-6.4.3
 18 |     """
 19 |     pats = []
 20 |     if not dn:
 21 |         return False
 22 | 
 23 |     # Ported from python3-syntax:
 24 |     # leftmost, *remainder = dn.split(r'.')
 25 |     parts = dn.split(r'.')
 26 |     leftmost = parts[0]
 27 |     remainder = parts[1:]
 28 | 
 29 |     wildcards = leftmost.count('*')
 30 |     if wildcards > max_wildcards:
 31 |         # Issue #17980: avoid denials of service by refusing more
 32 |         # than one wildcard per fragment.  A survey of established
 33 |         # policy among SSL implementations showed it to be a
 34 |         # reasonable choice.
 35 |         raise CertificateError(
 36 |             "too many wildcards in certificate DNS name: " + repr(dn))
 37 | 
 38 |     # speed up common case w/o wildcards
 39 |     if not wildcards:
 40 |         return dn.lower() == hostname.lower()
 41 | 
 42 |     # RFC 6125, section 6.4.3, subitem 1.
 43 |     # The client SHOULD NOT attempt to match a presented identifier in which
 44 |     # the wildcard character comprises a label other than the left-most label.
 45 |     if leftmost == '*':
 46 |         # When '*' is a fragment by itself, it matches a non-empty dotless
 47 |         # fragment.
 48 |         pats.append('[^.]+')
 49 |     elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
 50 |         # RFC 6125, section 6.4.3, subitem 3.
 51 |         # The client SHOULD NOT attempt to match a presented identifier
 52 |         # where the wildcard character is embedded within an A-label or
 53 |         # U-label of an internationalized domain name.
 54 |         pats.append(re.escape(leftmost))
 55 |     else:
 56 |         # Otherwise, '*' matches any dotless string, e.g. www*
 57 |         pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
 58 | 
 59 |     # add the remaining fragments, ignore any wildcards
 60 |     for frag in remainder:
 61 |         pats.append(re.escape(frag))
 62 | 
 63 |     pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
 64 |     return pat.match(hostname)
 65 | 
 66 | 
 67 | def match_hostname(cert, hostname):
 68 |     """Verify that *cert* (in decoded format as returned by
 69 |     SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
 70 |     rules are followed, but IP addresses are not accepted for *hostname*.
 71 | 
 72 |     CertificateError is raised on failure. On success, the function
 73 |     returns nothing.
 74 |     """
 75 |     if not cert:
 76 |         raise ValueError("empty or no certificate")
 77 |     dnsnames = []
 78 |     san = cert.get('subjectAltName', ())
 79 |     for key, value in san:
 80 |         if key == 'DNS':
 81 |             if _dnsname_match(value, hostname):
 82 |                 return
 83 |             dnsnames.append(value)
 84 |     if not dnsnames:
 85 |         # The subject is only checked when there is no dNSName entry
 86 |         # in subjectAltName
 87 |         for sub in cert.get('subject', ()):
 88 |             for key, value in sub:
 89 |                 # XXX according to RFC 2818, the most specific Common Name
 90 |                 # must be used.
 91 |                 if key == 'commonName':
 92 |                     if _dnsname_match(value, hostname):
 93 |                         return
 94 |                     dnsnames.append(value)
 95 |     if len(dnsnames) > 1:
 96 |         raise CertificateError("hostname %r "
 97 |             "doesn't match either of %s"
 98 |             % (hostname, ', '.join(map(repr, dnsnames))))
 99 |     elif len(dnsnames) == 1:
100 |         raise CertificateError("hostname %r "
101 |             "doesn't match %r"
102 |             % (hostname, dnsnames[0]))
103 |     else:
104 |         raise CertificateError("no appropriate commonName or "
105 |             "subjectAltName fields were found")
106 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | # For backwards compatibility, provide imports that used to be here.
 3 | from .connection import is_connection_dropped
 4 | from .request import make_headers
 5 | from .response import is_fp_closed
 6 | from .ssl_ import (
 7 |     SSLContext,
 8 |     HAS_SNI,
 9 |     IS_PYOPENSSL,
10 |     assert_fingerprint,
11 |     resolve_cert_reqs,
12 |     resolve_ssl_version,
13 |     ssl_wrap_socket,
14 | )
15 | from .timeout import (
16 |     current_time,
17 |     Timeout,
18 | )
19 | 
20 | from .retry import Retry
21 | from .url import (
22 |     get_host,
23 |     parse_url,
24 |     split_first,
25 |     Url,
26 | )
27 | 
28 | __all__ = (
29 |     'HAS_SNI',
30 |     'IS_PYOPENSSL',
31 |     'SSLContext',
32 |     'Retry',
33 |     'Timeout',
34 |     'Url',
35 |     'assert_fingerprint',
36 |     'current_time',
37 |     'is_connection_dropped',
38 |     'is_fp_closed',
39 |     'get_host',
40 |     'parse_url',
41 |     'make_headers',
42 |     'resolve_cert_reqs',
43 |     'resolve_ssl_version',
44 |     'split_first',
45 |     'ssl_wrap_socket',
46 | )
47 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/util/request.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from base64 import b64encode
 3 | 
 4 | from ..packages.six import b
 5 | 
 6 | ACCEPT_ENCODING = 'gzip,deflate'
 7 | 
 8 | 
 9 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
10 |                  basic_auth=None, proxy_basic_auth=None, disable_cache=None):
11 |     """
12 |     Shortcuts for generating request headers.
13 | 
14 |     :param keep_alive:
15 |         If ``True``, adds 'connection: keep-alive' header.
16 | 
17 |     :param accept_encoding:
18 |         Can be a boolean, list, or string.
19 |         ``True`` translates to 'gzip,deflate'.
20 |         List will get joined by comma.
21 |         String will be used as provided.
22 | 
23 |     :param user_agent:
24 |         String representing the user-agent you want, such as
25 |         "python-urllib3/0.6"
26 | 
27 |     :param basic_auth:
28 |         Colon-separated username:password string for 'authorization: basic ...'
29 |         auth header.
30 | 
31 |     :param proxy_basic_auth:
32 |         Colon-separated username:password string for 'proxy-authorization: basic ...'
33 |         auth header.
34 | 
35 |     :param disable_cache:
36 |         If ``True``, adds 'cache-control: no-cache' header.
37 | 
38 |     Example::
39 | 
40 |         >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
41 |         {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
42 |         >>> make_headers(accept_encoding=True)
43 |         {'accept-encoding': 'gzip,deflate'}
44 |     """
45 |     headers = {}
46 |     if accept_encoding:
47 |         if isinstance(accept_encoding, str):
48 |             pass
49 |         elif isinstance(accept_encoding, list):
50 |             accept_encoding = ','.join(accept_encoding)
51 |         else:
52 |             accept_encoding = ACCEPT_ENCODING
53 |         headers['accept-encoding'] = accept_encoding
54 | 
55 |     if user_agent:
56 |         headers['user-agent'] = user_agent
57 | 
58 |     if keep_alive:
59 |         headers['connection'] = 'keep-alive'
60 | 
61 |     if basic_auth:
62 |         headers['authorization'] = 'Basic ' + \
63 |             b64encode(b(basic_auth)).decode('utf-8')
64 | 
65 |     if proxy_basic_auth:
66 |         headers['proxy-authorization'] = 'Basic ' + \
67 |             b64encode(b(proxy_basic_auth)).decode('utf-8')
68 | 
69 |     if disable_cache:
70 |         headers['cache-control'] = 'no-cache'
71 | 
72 |     return headers
73 | 


--------------------------------------------------------------------------------
/src/lib/requests/packages/urllib3/util/response.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from ..packages.six.moves import http_client as httplib
 3 | 
 4 | from ..exceptions import HeaderParsingError
 5 | 
 6 | 
 7 | def is_fp_closed(obj):
 8 |     """
 9 |     Checks whether a given file-like object is closed.
10 | 
11 |     :param obj:
12 |         The file-like object to check.
13 |     """
14 | 
15 |     try:
16 |         # Check via the official file-like-object way.
17 |         return obj.closed
18 |     except AttributeError:
19 |         pass
20 | 
21 |     try:
22 |         # Check if the object is a container for another file-like object that
23 |         # gets released on exhaustion (e.g. HTTPResponse).
24 |         return obj.fp is None
25 |     except AttributeError:
26 |         pass
27 | 
28 |     raise ValueError("Unable to determine whether fp is closed.")
29 | 
30 | 
31 | def assert_header_parsing(headers):
32 |     """
33 |     Asserts whether all headers have been successfully parsed.
34 |     Extracts encountered errors from the result of parsing headers.
35 | 
36 |     Only works on Python 3.
37 | 
38 |     :param headers: Headers to verify.
39 |     :type headers: `httplib.HTTPMessage`.
40 | 
41 |     :raises urllib3.exceptions.HeaderParsingError:
42 |         If parsing errors are found.
43 |     """
44 | 
45 |     # This will fail silently if we pass in the wrong kind of parameter.
46 |     # To make debugging easier add an explicit check.
47 |     if not isinstance(headers, httplib.HTTPMessage):
48 |         raise TypeError('expected httplib.Message, got {0}.'.format(
49 |             type(headers)))
50 | 
51 |     defects = getattr(headers, 'defects', None)
52 |     get_payload = getattr(headers, 'get_payload', None)
53 | 
54 |     unparsed_data = None
55 |     if get_payload:  # Platform-specific: Python 3.
56 |         unparsed_data = get_payload()
57 | 
58 |     if defects or unparsed_data:
59 |         raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
60 | 
61 | 
62 | def is_response_to_head(response):
63 |     """
64 |     Checks whether the request of a response has been a HEAD-request.
65 |     Handles the quirks of AppEngine.
66 | 
67 |     :param conn:
68 |     :type conn: :class:`httplib.HTTPResponse`
69 |     """
70 |     # FIXME: Can we do this somehow without accessing private httplib _method?
71 |     method = response._method
72 |     if isinstance(method, int):  # Platform-specific: Appengine
73 |         return method == 3
74 |     return method.upper() == 'HEAD'
75 | 


--------------------------------------------------------------------------------
/src/lib/requests/status_codes.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .structures import LookupDict
 4 | 
 5 | _codes = {
 6 | 
 7 |     # Informational.
 8 |     100: ('continue',),
 9 |     101: ('switching_protocols',),
10 |     102: ('processing',),
11 |     103: ('checkpoint',),
12 |     122: ('uri_too_long', 'request_uri_too_long'),
13 |     200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
14 |     201: ('created',),
15 |     202: ('accepted',),
16 |     203: ('non_authoritative_info', 'non_authoritative_information'),
17 |     204: ('no_content',),
18 |     205: ('reset_content', 'reset'),
19 |     206: ('partial_content', 'partial'),
20 |     207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
21 |     208: ('already_reported',),
22 |     226: ('im_used',),
23 | 
24 |     # Redirection.
25 |     300: ('multiple_choices',),
26 |     301: ('moved_permanently', 'moved', '\\o-'),
27 |     302: ('found',),
28 |     303: ('see_other', 'other'),
29 |     304: ('not_modified',),
30 |     305: ('use_proxy',),
31 |     306: ('switch_proxy',),
32 |     307: ('temporary_redirect', 'temporary_moved', 'temporary'),
33 |     308: ('permanent_redirect',
34 |           'resume_incomplete', 'resume',),  # These 2 to be removed in 3.0
35 | 
36 |     # Client Error.
37 |     400: ('bad_request', 'bad'),
38 |     401: ('unauthorized',),
39 |     402: ('payment_required', 'payment'),
40 |     403: ('forbidden',),
41 |     404: ('not_found', '-o-'),
42 |     405: ('method_not_allowed', 'not_allowed'),
43 |     406: ('not_acceptable',),
44 |     407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
45 |     408: ('request_timeout', 'timeout'),
46 |     409: ('conflict',),
47 |     410: ('gone',),
48 |     411: ('length_required',),
49 |     412: ('precondition_failed', 'precondition'),
50 |     413: ('request_entity_too_large',),
51 |     414: ('request_uri_too_large',),
52 |     415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
53 |     416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
54 |     417: ('expectation_failed',),
55 |     418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
56 |     421: ('misdirected_request',),
57 |     422: ('unprocessable_entity', 'unprocessable'),
58 |     423: ('locked',),
59 |     424: ('failed_dependency', 'dependency'),
60 |     425: ('unordered_collection', 'unordered'),
61 |     426: ('upgrade_required', 'upgrade'),
62 |     428: ('precondition_required', 'precondition'),
63 |     429: ('too_many_requests', 'too_many'),
64 |     431: ('header_fields_too_large', 'fields_too_large'),
65 |     444: ('no_response', 'none'),
66 |     449: ('retry_with', 'retry'),
67 |     450: ('blocked_by_windows_parental_controls', 'parental_controls'),
68 |     451: ('unavailable_for_legal_reasons', 'legal_reasons'),
69 |     499: ('client_closed_request',),
70 | 
71 |     # Server Error.
72 |     500: ('internal_server_error', 'server_error', '/o\\', '✗'),
73 |     501: ('not_implemented',),
74 |     502: ('bad_gateway',),
75 |     503: ('service_unavailable', 'unavailable'),
76 |     504: ('gateway_timeout',),
77 |     505: ('http_version_not_supported', 'http_version'),
78 |     506: ('variant_also_negotiates',),
79 |     507: ('insufficient_storage',),
80 |     509: ('bandwidth_limit_exceeded', 'bandwidth'),
81 |     510: ('not_extended',),
82 |     511: ('network_authentication_required', 'network_auth', 'network_authentication'),
83 | }
84 | 
85 | codes = LookupDict(name='status_codes')
86 | 
87 | for code, titles in _codes.items():
88 |     for title in titles:
89 |         setattr(codes, title, code)
90 |         if not title.startswith('\\'):
91 |             setattr(codes, title.upper(), code)
92 | 


--------------------------------------------------------------------------------
/src/lib/requests/structures.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.structures
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | Data structures that power Requests.
  8 | """
  9 | 
 10 | import collections
 11 | 
 12 | from .compat import OrderedDict
 13 | 
 14 | 
 15 | class CaseInsensitiveDict(collections.MutableMapping):
 16 |     """A case-insensitive ``dict``-like object.
 17 | 
 18 |     Implements all methods and operations of
 19 |     ``collections.MutableMapping`` as well as dict's ``copy``. Also
 20 |     provides ``lower_items``.
 21 | 
 22 |     All keys are expected to be strings. The structure remembers the
 23 |     case of the last key to be set, and ``iter(instance)``,
 24 |     ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
 25 |     will contain case-sensitive keys. However, querying and contains
 26 |     testing is case insensitive::
 27 | 
 28 |         cid = CaseInsensitiveDict()
 29 |         cid['Accept'] = 'application/json'
 30 |         cid['aCCEPT'] == 'application/json'  # True
 31 |         list(cid) == ['Accept']  # True
 32 | 
 33 |     For example, ``headers['content-encoding']`` will return the
 34 |     value of a ``'Content-Encoding'`` response header, regardless
 35 |     of how the header name was originally stored.
 36 | 
 37 |     If the constructor, ``.update``, or equality comparison
 38 |     operations are given keys that have equal ``.lower()``s, the
 39 |     behavior is undefined.
 40 |     """
 41 | 
 42 |     def __init__(self, data=None, **kwargs):
 43 |         self._store = OrderedDict()
 44 |         if data is None:
 45 |             data = {}
 46 |         self.update(data, **kwargs)
 47 | 
 48 |     def __setitem__(self, key, value):
 49 |         # Use the lowercased key for lookups, but store the actual
 50 |         # key alongside the value.
 51 |         self._store[key.lower()] = (key, value)
 52 | 
 53 |     def __getitem__(self, key):
 54 |         return self._store[key.lower()][1]
 55 | 
 56 |     def __delitem__(self, key):
 57 |         del self._store[key.lower()]
 58 | 
 59 |     def __iter__(self):
 60 |         return (casedkey for casedkey, mappedvalue in self._store.values())
 61 | 
 62 |     def __len__(self):
 63 |         return len(self._store)
 64 | 
 65 |     def lower_items(self):
 66 |         """Like iteritems(), but with all lowercase keys."""
 67 |         return (
 68 |             (lowerkey, keyval[1])
 69 |             for (lowerkey, keyval)
 70 |             in self._store.items()
 71 |         )
 72 | 
 73 |     def __eq__(self, other):
 74 |         if isinstance(other, collections.Mapping):
 75 |             other = CaseInsensitiveDict(other)
 76 |         else:
 77 |             return NotImplemented
 78 |         # Compare insensitively
 79 |         return dict(self.lower_items()) == dict(other.lower_items())
 80 | 
 81 |     # Copy is required
 82 |     def copy(self):
 83 |         return CaseInsensitiveDict(self._store.values())
 84 | 
 85 |     def __repr__(self):
 86 |         return str(dict(self.items()))
 87 | 
 88 | 
 89 | class LookupDict(dict):
 90 |     """Dictionary lookup object."""
 91 | 
 92 |     def __init__(self, name=None):
 93 |         self.name = name
 94 |         super(LookupDict, self).__init__()
 95 | 
 96 |     def __repr__(self):
 97 |         return '<lookup \'%s\'>' % (self.name)
 98 | 
 99 |     def __getitem__(self, key):
100 |         # We allow fall-through here, so values default to None
101 | 
102 |         return self.__dict__.get(key, None)
103 | 
104 |     def get(self, key, default=None):
105 |         return self.__dict__.get(key, default)
106 | 


--------------------------------------------------------------------------------
/src/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/src/tools/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import json
 3 | import os
 4 | 
 5 | from src.tools.path import Path
 6 | 
 7 | 
 8 | class Config(object):
 9 |     u"""
10 |     用于储存、获取设置值、全局变量值
11 |     """
12 |     # 全局变量
13 |     update_time = '2017-01-24'  # 更新日期
14 | 
15 |     debug = False
16 |     debug_for_create_book = False # 是否在测试电子书生成功能，在测试的话跳过网页抓取部分
17 |     debug_for_thread = False # 是否在测试多线程功能，在测试的话改为单线程执行
18 | 
19 |     account = 'mengqingxue@yaozeyuan.online'  # 默认账号密码, 2017年更新
20 |     password = '912714398d'  #
21 |     remember_account = False  # 是否使用已有密码
22 |     max_thread = 10  # 最大线程数，其实设成5就行了，但下图片的时候还是得多开几个线程，所以还是设成10好了（反正冬天，CPU满了有利于室内保温 - -）
23 |     picture_quality = 1  # 图片质量（0/1/2，无图/标清/原图）
24 |     max_try = 5  # 下载图片时的最大尝试次数
25 |     max_book_size_mb = 100  # 单个文件的最大大小(MB, 兆)，超过这个数会自动分卷
26 |     timeout_download_picture = 10  # 多给知乎服务器点时间，批量生成tex太痛苦了- -
27 |     timeout_download_html = 5
28 | 
29 |     article_order_by = ' order by article_id asc '  # 文章排序顺序，默认：时间顺序正序
30 |     answer_order_by = ' order by voteup_count desc '  # 答案排序顺序，默认：赞同数降序
31 |     topic_or_collection_answer_order_by = ' '  # 话题/收藏夹中答案排序顺序，默认：按在话题/收藏夹中的顺序排列
32 | 
33 | 
34 |     @staticmethod
35 |     def init_config():
36 |         Config.load()
37 |         return
38 | 
39 |     @staticmethod
40 |     def save():
41 |         data = {}
42 |         with open(Path.config_path, 'w') as f:
43 |             for key in Config.__dict__:
44 |                 value = Config.__dict__[key]
45 |                 if '__' in key[:2]:
46 |                     #   内置属性直接跳过
47 |                     continue
48 |                 try:
49 |                     json.dumps(value)
50 |                 except TypeError:
51 |                     #   暴力判断是否可被序列化←_←
52 |                     pass
53 |                 else:
54 |                     data[key] = value
55 |             json.dump(data, f, indent=4)
56 |         return
57 | 
58 |     @staticmethod
59 |     def load():
60 |         if not os.path.isfile(Path.config_path):
61 |             return
62 |         with open(Path.config_path) as f:
63 |             config = json.load(f)
64 |             if not config.get('remember_account'):
65 |                 # 当选择不记住密码时，跳过读取，使用默认设置
66 |                 # 不考虑用户强行在配置文件中把account改成空的情况
67 |                 return
68 |         for (key, value) in config.items():
69 |             setattr(Config, key, value)
70 |         return
71 | 


--------------------------------------------------------------------------------
/src/tools/controler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from multiprocessing.dummy import Pool as ThreadPool  # 多线程并行库
 3 | 
 4 | from src.tools.config import Config
 5 | 
 6 | 
 7 | class Control(object):
 8 |     thread_pool = ThreadPool(Config.max_thread)
 9 | 
10 |     @staticmethod
11 |     def control_center(argv, test_flag):
12 |         max_try = Config.max_try
13 |         for time in range(max_try):
14 |             if test_flag:
15 |                 if Config.debug_for_thread:
16 |                     Control.debug_control(argv)
17 |                 else:
18 |                     Control.release_control(argv)
19 |                 Control.thread_pool.map(**argv)
20 |         return
21 | 
22 |     @staticmethod
23 |     def debug_control(argv):
24 |         for item in argv['iterable']:
25 |             argv['func'](item)
26 |         return
27 | 
28 |     @staticmethod
29 |     def release_control(argv):
30 |         try:
31 |             Control.thread_pool.map(**argv)
32 |         except Exception:
33 |             # 按照惯例，报错全部pass掉
34 |             # 等用户反馈了再开debug查吧
35 |             pass
36 |         return
37 | 


--------------------------------------------------------------------------------
/src/tools/debug.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import logging.handlers
 4 | import sys
 5 | 
 6 | from src.tools.config import Config
 7 | 
 8 | 
 9 | class Debug(object):
10 |     u"""
11 |     打印日志
12 |     """
13 |     logger = logging.getLogger('main')  # 获取名为main的logger
14 |     if Config.debug:
15 |         logger.setLevel(logging.DEBUG)  # debug模式
16 |     else:
17 |         logger.setLevel(logging.INFO)  # 发布时关闭log输出
18 | 
19 |     # 辅助函数
20 |     @staticmethod
21 |     def print_in_single_line(text=''):
22 |         try:
23 |             sys.stdout.write("\r" + " " * 60 + '\r')
24 |             sys.stdout.flush()
25 |             sys.stdout.write(text)
26 |             sys.stdout.flush()
27 |         except:
28 |             pass
29 |         return
30 | 
31 |     @staticmethod
32 |     def print_dict(data={}, key='', prefix=''):
33 |         try:
34 |             if isinstance(data, dict):
35 |                 for key in data:
36 |                     Debug.print_dict(data[key], key, prefix + '   ')
37 |             else:
38 |                 if isinstance(data, basestring):
39 |                     print prefix + unicode(key) + ' => ' + data
40 |                 else:
41 |                     print prefix + unicode(key) + ' => ' + unicode(data)
42 |         except UnicodeEncodeError as error:
43 |             Debug.logger.info(u'编码异常')
44 |             Debug.logger.info(u'系统默认编码为：' + sys.getdefaultencoding())
45 |             # raise error
46 |         return
47 | 
48 |     @staticmethod
49 |     def print_config():
50 |         Debug.print_dict(Config.__dict__)
51 |         return
52 | 


--------------------------------------------------------------------------------
/src/tools/extra_tools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import hashlib
 3 | import time
 4 | import datetime
 5 | 
 6 | 
 7 | class ExtraTools(object):
 8 |     @staticmethod
 9 |     def format_date(date_format, timestamp):
10 |         ltime = time.localtime(timestamp)
11 |         return time.strftime(date_format, ltime)
12 | 
13 |     @staticmethod
14 |     def get_time():
15 |         return str(time.time()).split('.')[0]
16 | 
17 |     @staticmethod
18 |     def get_friendly_time():
19 |         return datetime.datetime.today().isoformat().split('.')[0].replace(':', '：')
20 | 
21 |     @staticmethod
22 |     def get_today():
23 |         return datetime.date.today().isoformat()
24 | 
25 |     @staticmethod
26 |     def get_yesterday():
27 |         today = datetime.date.today()
28 |         one = datetime.timedelta(days=1)
29 |         yesterday = today - one
30 |         return yesterday.isoformat()
31 | 
32 |     @staticmethod
33 |     def md5(content):
34 |         encrypt = hashlib.md5()
35 |         encrypt.update(str(content))
36 |         return encrypt.hexdigest()
37 | 


--------------------------------------------------------------------------------
/src/tools/path.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import shutil
  4 | import locale
  5 | 
  6 | 
  7 | class Path(object):
  8 |     # 初始地址,不含分隔符
  9 |     # 此时sys.stdout.encoding已被修改为utf-8，故改为使用locale.getpreferredencoding()获取默认编码
 10 |     base_path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
 11 | 
 12 |     config_path = base_path + u'/config.json'
 13 |     db_path = base_path + u'/zhihuDB_18.sqlite'
 14 |     sql_path = base_path + u'/db/zhihuhelp.sql'
 15 | 
 16 |     www_css = base_path + u'/www/css'
 17 |     www_image = base_path + u'/www/images'
 18 | 
 19 |     html_pool_path = base_path + u'/知乎电子书临时资源库/知乎网页池'
 20 |     image_pool_path = base_path + u'/知乎电子书临时资源库/知乎图片池'
 21 |     book_pool_path = base_path + u'/知乎电子书临时资源库/知乎电子书临时文件池'
 22 |     result_path = base_path + u'/知乎助手生成的电子书'
 23 | 
 24 |     @staticmethod
 25 |     def reset_path():
 26 |         Path.chdir(Path.base_path)
 27 |         return
 28 | 
 29 |     @staticmethod
 30 |     def pwd():
 31 |         print os.path.realpath('.')
 32 |         return
 33 | 
 34 |     @staticmethod
 35 |     def get_pwd():
 36 |         path = unicode(os.path.abspath('.').decode(locale.getpreferredencoding()))
 37 |         return path
 38 | 
 39 |     @staticmethod
 40 |     def mkdir(path):
 41 |         try:
 42 |             os.mkdir(path)
 43 |         except OSError:
 44 |             # Debug.logger.debug(u'指定目录已存在')
 45 |             pass
 46 |         return
 47 | 
 48 |     @staticmethod
 49 |     def chdir(path):
 50 |         try:
 51 |             os.chdir(path)
 52 |         except OSError:
 53 |             # Debug.logger.debug(u'指定目录不存在，自动创建之')
 54 |             Path.mkdir(path)
 55 |             os.chdir(path)
 56 |         return
 57 | 
 58 |     @staticmethod
 59 |     def rmdir(path):
 60 |         if path:
 61 |             shutil.rmtree(path, ignore_errors=True)
 62 |         return
 63 | 
 64 |     @staticmethod
 65 |     def copy(src, dst):
 66 |         if not os.path.exists(src):
 67 |             # Debug.logger.info('{}不存在，自动跳过'.format(src))
 68 |             return
 69 |         if os.path.isdir(src):
 70 |             shutil.copytree(src, dst)
 71 |         else:
 72 |             shutil.copy(src=src, dst=dst)
 73 |         return
 74 | 
 75 |     @staticmethod
 76 |     def get_filename(src):
 77 |         return os.path.basename(src)
 78 | 
 79 |     @staticmethod
 80 |     def init_base_path():
 81 |         Path.base_path = Path.get_pwd()
 82 | 
 83 |         Path.config_path = Path.base_path + u'/config.json'
 84 |         Path.db_path = Path.base_path + u'/zhihuDB_18.sqlite'
 85 |         Path.sql_path = Path.base_path + u'/db/zhihuhelp.sql'
 86 | 
 87 |         Path.www_css = Path.base_path + u'/www/css'
 88 |         Path.www_image = Path.base_path + u'/www/images'
 89 | 
 90 |         Path.html_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎网页池'
 91 |         Path.image_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎图片池'
 92 |         Path.book_pool_path = Path.base_path + u'/知乎电子书临时资源库/知乎电子书临时文件池'
 93 |         Path.result_path = Path.base_path + u'/知乎助手生成的电子书'
 94 | 
 95 |         return
 96 | 
 97 |     @staticmethod
 98 |     def init_work_directory():
 99 |         Path.reset_path()
100 |         Path.mkdir(u'./知乎助手生成的电子书')
101 |         Path.mkdir(u'./知乎电子书临时资源库')
102 |         Path.chdir(u'./知乎电子书临时资源库')
103 |         Path.mkdir(u'./知乎网页池')
104 |         Path.mkdir(u'./知乎图片池')
105 |         Path.mkdir(u'./知乎电子书临时文件池')
106 |         Path.reset_path()
107 |         return
108 | 
109 |     @staticmethod
110 |     def is_file(path):
111 |         return os.path.isfile(path)
112 | 
113 |     @staticmethod
114 |     def get_img_size_by_filename_kb(filename):
115 |         path = Path.image_pool_path + '/' + filename
116 |         if Path.is_file(path) :
117 |             return os.path.getsize(path) / 1024
118 |         return 0


--------------------------------------------------------------------------------
/src/tools/template.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | class Template(object):
 4 |     """
 5 |     文件模版
 6 |     """
 7 |     #   type : str
 8 |     #   interface : title, body
 9 |     base = open('./www/template/base.html', 'r').read()
10 | 
11 |     #   type : str
12 |     book_info = base.format(
13 |         **{
14 |             'title': '{title}',
15 |             'body': open('./www/template/info_page/book.html', 'r').read()
16 |         }
17 |     )
18 |     #   type : str
19 |     question_info = base.format(
20 |         **{
21 |             'title': '{title}',
22 |             'body': open('./www/template/info_page/question.html', 'r').read()
23 |         }
24 |     )
25 | 
26 |     #   type : str
27 |     author_info = base.format(
28 |         **{
29 |             'title': '{title}',
30 |             'body': open('./www/template/info_page/author.html', 'r').read()
31 |         }
32 |     )
33 | 
34 |     #   type : str
35 |     topic_info = base.format(
36 |         **{
37 |             'title': '{title}',
38 |             'body': open('./www/template/info_page/topic.html', 'r').read()
39 |         }
40 |     )
41 | 
42 |     #   type : str
43 |     collection_info = base.format(
44 |         **{
45 |             'title': '{title}',
46 |             'body': open('./www/template/info_page/collection.html', 'r').read()
47 |         }
48 |     )
49 | 
50 |     #   type : str
51 |     column_info = base.format(
52 |         **{
53 |             'title': '{title}',
54 |             'body': open('./www/template/info_page/column.html', 'r').read()
55 |         }
56 |     )
57 | 
58 |     #   type : str
59 |     article_info = base.format(
60 |         **{
61 |             'title': '{title}',
62 |             'body': open('./www/template/info_page/article.html', 'r').read()
63 |         }
64 |     )
65 | 
66 |     #   type : str
67 |     question = base.format(
68 |         **{
69 |             'title': '{title}',
70 |             'body': open('./www/template/content/question/question.html', 'r').read()
71 |         }
72 |     )
73 | 
74 |     #   type : str
75 |     answer = open('./www/template/content/question/answer.html', 'r').read()


--------------------------------------------------------------------------------
/src/tools/template_config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from src.tools.path import Path
 3 | 
 4 | 
 5 | class TemplateConfig(object):
 6 |     template_path = Path.base_path + u'/www/template'
 7 |     content_template_path = template_path + u'/content'
 8 |     content_info_template_path = content_template_path + u'/info'
 9 |     content_question_template_path = content_template_path + u'/question'
10 |     front_page_template_path = template_path + u'/front_page'
11 |     front_page_info_template_path = front_page_template_path + u'/info'
12 | 
13 |     content_base_uri = template_path + u'/base.html'
14 | 
15 |     # content
16 |     ##info
17 |     info_author_uri = content_info_template_path + u'/author.html'
18 |     info_comment_uri = content_info_template_path + u'/comment.html'
19 |     info_title_uri = content_info_template_path + u'/title.html'
20 |     ##question
21 |     question_answer_uri = content_question_template_path + u'/answer.html'
22 |     question_question_uri = content_question_template_path + u'/question.html'
23 | 
24 |     # front_page
25 |     front_page_author_uri = front_page_info_template_path + u'/author.html'
26 |     front_page_collection_uri = front_page_info_template_path + u'/collection.html'
27 |     front_page_column_uri = front_page_info_template_path + u'/column.html'
28 |     front_page_topic_uri = front_page_info_template_path + u'/topic.html'
29 |     front_page_question_uri = front_page_info_template_path + u'/question.html'
30 |     front_page_answer_uri = front_page_info_template_path + u'/answer.html'
31 |     front_page_article_uri = front_page_info_template_path + u'/article.html'
32 | 
33 |     front_page_base_uri = front_page_template_path + u'/base.html'
34 | 


--------------------------------------------------------------------------------
/src/tools/type.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from random import random
 3 | 
 4 | 
 5 | class Type(object):
 6 |     #   未知类型
 7 |     unknown = 'unknown'
 8 | 
 9 |     answer = 'answer'
10 |     question = 'question'
11 |     topic = 'topic'
12 |     collection = 'collection'
13 |     author = 'author'
14 |     column = 'column'
15 |     article = 'article'
16 | 
17 |     pass
18 | 
19 | 
20 | class ImgQuality(object):
21 |     raw = 2  # 原图
22 |     big = 1  # 普通
23 |     none = 0  # 无图
24 | 
25 |     @staticmethod
26 |     def add_random_download_address_header_for_img_filename(file_uri):
27 |         """
28 |         随机补充一个前缀作为图片下载地址
29 |         :param file_uri:
30 |         :return:
31 |         """
32 |         img_site_list = [
33 |             'https://pic1.zhimg.com/',
34 |             'https://pic2.zhimg.com/',
35 |             'https://pic3.zhimg.com/',
36 |             'https://pic4.zhimg.com/',
37 |         ]
38 |         url = img_site_list[0] + file_uri
39 |         return url
40 | 


--------------------------------------------------------------------------------
/unit/BS4/content.html:
--------------------------------------------------------------------------------
1 | 用于测试BS4解析结果


--------------------------------------------------------------------------------
/unit/BS4/parser.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | from src.lib.zhihu_parser.tools.parser_tools import ParserTools
 6 | 
 7 | reload(sys)
 8 | sys.setdefaultencoding('utf8')
 9 | 
10 | # sys.setrecursionlimit(1000000)  # 为了适应知乎上的长答案，需要专门设下递归深度限制。。。
11 | # 添加库路径
12 | currentPath = sys.path[0].replace('unit', '')
13 | sys.path.append(currentPath)
14 | sys.path.append(currentPath + r'src')
15 | sys.path.append(currentPath + r'src\tools')
16 | sys.path.append(currentPath + r'src\parser')
17 | 
18 | content = open(u'./content.html').read()
19 | 
20 | parser = BeautifulSoup(content, 'html.parser')
21 | tag_content = ParserTools.get_tag_content(parser)
22 | parser
23 | 


--------------------------------------------------------------------------------
/unit/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-


--------------------------------------------------------------------------------
/unit/addressFile/address_All:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #people
 4 | http://www.zhihu.com/people/zhong-wen-71
 5 | http://www.zhihu.com/people/Selerare
 6 | http://www.zhihu.com/people/_Zen
 7 | http://www.zhihu.com/people/mu-mu-55-53
 8 | http://www.zhihu.com/people/tian-yu-bai
 9 | http://www.zhihu.com/people/tian-yu-bai/followees
10 | http://www.zhihu.com/people/ying-ye-78
11 | #table
12 | http://www.zhihu.com/roundtable/superhero
13 | http://www.zhihu.com/roundtable/superhero/questions
14 | #topic
15 | http://www.zhihu.com/topic/19554151
16 | http://www.zhihu.com/topic/19551147
17 | http://www.zhihu.com/topic/19551147/top-answers
18 | http://www.zhihu.com/topic/19551147/questions
19 | http://www.zhihu.com/topic/19551147/organize
20 | http://www.zhihu.com/topic/19551147/manage
21 | http://www.zhihu.com/topic/19551147/log
22 | #article
23 | http://zhuanlan.zhihu.com/8hpencil/19929476
24 | #column
25 | http://zhuanlan.zhihu.com/8hpencil
26 | #collection
27 | http://www.zhihu.com/collection/32271511
28 | http://www.zhihu.com/collection/32271511/log
29 | 


--------------------------------------------------------------------------------
/unit/addressFile/answer:
--------------------------------------------------------------------------------
1 | #answer
2 | http://www.zhihu.com/question/25420679/answer/30790550?utm_source=weibo&utm_medium=weibo_share&utm_content=share_answer&utm_campaign=share_button
3 | 


--------------------------------------------------------------------------------
/unit/addressFile/article:
--------------------------------------------------------------------------------
1 | #article
2 | http://zhuanlan.zhihu.com/8hpencil/19929476
3 | 


--------------------------------------------------------------------------------
/unit/addressFile/collection:
--------------------------------------------------------------------------------
1 | #collection
2 | http://www.zhihu.com/collection/32271511
3 | http://www.zhihu.com/collection/32271511/log
4 | 


--------------------------------------------------------------------------------
/unit/addressFile/column:
--------------------------------------------------------------------------------
1 | #column
2 | http://zhuanlan.zhihu.com/8hpencil
3 | 


--------------------------------------------------------------------------------
/unit/addressFile/people:
--------------------------------------------------------------------------------
1 | #people
2 | http://www.zhihu.com/people/zhong-wen-71
3 | http://www.zhihu.com/people/Selerare
4 | http://www.zhihu.com/people/_Zen
5 | http://www.zhihu.com/people/mu-mu-55-53
6 | http://www.zhihu.com/people/tian-yu-bai
7 | http://www.zhihu.com/people/tian-yu-bai/followees
8 | http://www.zhihu.com/people/ying-ye-78
9 | 


--------------------------------------------------------------------------------
/unit/addressFile/question:
--------------------------------------------------------------------------------
1 | #question
2 | http://www.zhihu.com/question/27580793
3 | http://www.zhihu.com/question/27580793?sort=created
4 | http://www.zhihu.com/question/22921426?sort=created#521个回答
5 | http://www.zhihu.com/question/19568396#847个回答
6 | 


--------------------------------------------------------------------------------
/unit/addressFile/table:
--------------------------------------------------------------------------------
1 | #table
2 | http://www.zhihu.com/roundtable/superhero
3 | http://www.zhihu.com/roundtable/superhero/questions
4 | 


--------------------------------------------------------------------------------
/unit/addressFile/topic:
--------------------------------------------------------------------------------
1 | #topic
2 | http://www.zhihu.com/topic/19554151
3 | http://www.zhihu.com/topic/19551147
4 | http://www.zhihu.com/topic/19551147/top-answers
5 | http://www.zhihu.com/topic/19551147/questions
6 | http://www.zhihu.com/topic/19551147/organize
7 | http://www.zhihu.com/topic/19551147/manage
8 | http://www.zhihu.com/topic/19551147/log
9 | 


--------------------------------------------------------------------------------
/unit/demo/__init__.json:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/unit/demo/readme.md:
--------------------------------------------------------------------------------
1 | 将json转为正常的html后的结果


--------------------------------------------------------------------------------
/unit/oauth_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #   使用该文件测试oauth的使用方法
 3 | # 放置于首位
 4 | import sys  # 修改默认编码
 5 | import os  # 添加系统路径
 6 | import json
 7 | 
 8 | base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding))
 9 | sys.path.append(base_path + u'/src/lib')
10 | sys.path.append(base_path + u'/src/lib/oauth')
11 | 
12 | reload(sys)
13 | sys.setdefaultencoding('utf-8') # 强制使用utf-8编码
14 | 
15 | from zhihu_oauth  import  ZhihuClient
16 | 
17 | from zhihu_oauth.exception import NeedCaptchaException
18 | 
19 | client = ZhihuClient()
20 | 
21 | test_email = 'mengqingxue2014@qq.com'
22 | test_password = '131724qingxue'
23 | token_file = './token.pkl'
24 | 
25 | if os.path.lexists(token_file):
26 |     client.load_token(token_file)
27 |     print 'load token success'
28 | else:
29 |     try:
30 |         login_result = client.login(test_email, test_password)
31 |     except NeedCaptchaException:
32 |         # 保存验证码并提示输入，重新登录
33 |         print u'登录失败，需要输入验证码'
34 |         with open('a.gif', 'wb') as f:
35 |             f.write(client.get_captcha())
36 |         captcha = raw_input(u'please input captcha:')
37 |         login_result = client.login(test_email, test_password, captcha)
38 |     print 'login result => '
39 |     print login_result
40 |     client.save_token(token_file)
41 |     print 'save token success'
42 | 
43 | # question
44 | response_file_uri = './question_response.html' # 将json输出到网页中，chrome下按F12选preview能看见浏览器渲染出的json数据结构
45 | question_id = 35005800
46 | question = client.question(question_id)
47 | data = question.pure_data
48 | response_json = json.dumps(data)
49 | response_file = open(response_file_uri, 'w+')
50 | response_file.write(response_json)
51 | print u"数据保存完成"
52 | 
53 | response_file_uri = './people_response.html' # 将json输出到网页中，chrome下按F12选preview能看见浏览器渲染出的json数据结构
54 | people_id = '404-Page-Not-found'
55 | people = client.people(people_id)
56 | for i in people.answers:
57 |     data = i.pure_data
58 |     response_json = json.dumps(data)
59 | response_file = open(response_file_uri, 'w+')
60 | response_file.write(response_json)
61 | print u"数据保存完成"


--------------------------------------------------------------------------------
/unit/parser_unit.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | # 添加库路径
 5 | currentPath = sys.path[0].replace('unit', '')
 6 | sys.path.append(currentPath)
 7 | sys.path.append(currentPath + r'src')
 8 | sys.path.append(currentPath + r'src\tools')
 9 | sys.path.append(currentPath + r'src\parser')
10 | sys.path.append(currentPath + r'src\lib')  # 扩展库地址
11 | 
12 | from src.lib.zhihu_parser.author import AuthorParser
13 | from src.lib.zhihu_parser.collection import CollectionParser
14 | from src.lib.zhihu_parser.question import QuestionParser
15 | from src.lib.zhihu_parser.topic import TopicParser
16 | from src.tools.debug import Debug
17 | 
18 | reload(sys)
19 | sys.setdefaultencoding('utf8')
20 | 
21 | 
22 | 
23 | sys.setrecursionlimit(1000000)  # 为了适应知乎上的长答案，需要专门设下递归深度限制。。。
24 | 
25 | is_info = 0
26 | kind = 'author'  # 直接在这里替换类别即可完成测试。可供测试的类别见字典键值
27 | unit ={
28 |     'answer':{
29 |         'src_answer':'./unit_html/single_answer.html',
30 |         'src_info':'./unit_html/single_answer.html',
31 |         'parser':QuestionParser,
32 |     },
33 |     'question':{
34 |         'src_answer':'./unit_html/single_question.html',
35 |         'src_info':'./unit_html/single_question.html',
36 |         'parser':QuestionParser,
37 |     },
38 |     'author':{
39 |         'src_answer':'./unit_html/author.html',
40 |         'src_info':'./unit_html/author_info.html',
41 |         'parser':AuthorParser,
42 |     },
43 |     'topic':{
44 |         'src_answer':'./unit_html/topic.html',
45 |         'src_info':'./unit_html/topic.html',
46 |         'parser':TopicParser,
47 |     },
48 |     'collection':{
49 |         'src_answer': './unit_html/collection.html',
50 |         'src_info': './unit_html/collection.html',
51 |         'parser': CollectionParser,
52 |     },
53 |     'private_collection': {
54 |         'src_answer':'./unit_html/private_collection.html',
55 |         'src_info':'./unit_html/private_collection.html',
56 |         'parser':CollectionParser,
57 |     },
58 | }
59 | if is_info:
60 |     src = unit[kind]['src_info']
61 | else:
62 |     src = unit[kind]['src_answer']
63 | 
64 | content = open(src, 'r').read()
65 | parser = unit[kind]['parser'](content)
66 | 
67 | 
68 | if is_info:
69 |     Debug.print_dict(parser.get_extra_info())
70 |     print '----------------------'
71 |     print '=========================='
72 | else:
73 |     for answer in parser.get_answer_list():
74 |         Debug.print_dict(answer)
75 |         print '----------------------'
76 |     print '=========================='
77 | 
78 |     for question in parser.get_question_info_list():
79 |         Debug.print_dict(question)
80 |         print '----------------------'
81 | 


--------------------------------------------------------------------------------
/unit/unit_html/author.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/author.html


--------------------------------------------------------------------------------
/unit/unit_html/author_info.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/author_info.html


--------------------------------------------------------------------------------
/unit/unit_html/collection.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/collection.html


--------------------------------------------------------------------------------
/unit/unit_html/private_collection.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/private_collection.html


--------------------------------------------------------------------------------
/unit/unit_html/single_answer.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/single_answer.html


--------------------------------------------------------------------------------
/unit/unit_html/single_question.html:
--------------------------------------------------------------------------------
1 | ﻿


--------------------------------------------------------------------------------
/unit/unit_html/topic.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/topic.html


--------------------------------------------------------------------------------
/unit/unit_html/topic_info.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/unit/unit_html/topic_info.html


--------------------------------------------------------------------------------
/www/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/www/css/customer.css:
--------------------------------------------------------------------------------
  1 | /* 设置背景色，清除缩进 */
  2 | body {
  3 |     text-indent: 0 !important;
  4 |     duokan-text-indent: 0 !important;
  5 |     word-wrap: break-word; /* 自动折行 */
  6 | }
  7 | 
  8 | /* color */
  9 | .bg-zhihu-blue-light {
 10 |     background-color: #428ECE;
 11 | }
 12 | 
 13 | .bg-zhihu-blue-deep {
 14 |     background-color: #3982C6;
 15 | }
 16 | 
 17 | .bg-duokan-yellow {
 18 |     /*多看阅读默认背景色*/
 19 |     background-color: #F7EFE7;
 20 | }
 21 | 
 22 | /* 清除浮动 */
 23 | div.clear-float {
 24 |     clear: both;
 25 | }
 26 | 
 27 | /* 隐藏空图片 */
 28 | img[src=''], img[src$='./images/'] {
 29 |     display: none;
 30 | }
 31 | 
 32 | .text-center {
 33 |     text-align: center;
 34 | }
 35 | 
 36 | .v-center {
 37 |     vertical-align: middle;
 38 | }
 39 | 
 40 | .margin-center {
 41 |     margin: 0 auto;
 42 | }
 43 | 
 44 | /* 目录页面 */
 45 | /* 隐藏多余的『目录』两字 */
 46 | div.index-content > li {
 47 |     display: none;
 48 | }
 49 | 
 50 | div.index-content a {
 51 |     font-size: 1em;
 52 | }
 53 | 
 54 | div.index-content ul > li > a {
 55 |     font-size: 1em;
 56 |     font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体;
 57 |     color: #3d3e45;
 58 | }
 59 | 
 60 | /* 首页描述信息 */
 61 | div.front-page.description {
 62 |     margin: 2em 0;
 63 | }
 64 | 
 65 | /* 评论信息 */
 66 | div.extra-info p {
 67 |     float: left;
 68 | }
 69 | 
 70 | div.extra-info p.update-date {
 71 |     float: right;
 72 | }
 73 | 
 74 | div.extra-info {
 75 |     margin: 1em 0;
 76 | }
 77 | 
 78 | /* 用户信息 */
 79 | div.author-base {
 80 |     margin: 1em 0;
 81 |     font-size: 16px;
 82 |     vertical-align: middle;
 83 | }
 84 | 
 85 | span.author-sign {
 86 |     margin-left: 1em;
 87 |     font-family: 'DK-KAITI', '楷体';
 88 | }
 89 | 
 90 | span.author-name a {
 91 |     font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体;
 92 |     color: #3d3e45;
 93 |     font-weight: bold;
 94 | }
 95 | 
 96 | div.author-logo {
 97 |     float: right;
 98 | }
 99 | 
100 | div.author-logo img {
101 |     vertical-align: middle;
102 |     margin-left: 0.2em;
103 |     margin-bottom: 0.2em;
104 | }
105 | 
106 | /* 问题详情 */
107 | 
108 | /* 实现知乎周刊的全屏效果*/
109 | @media handheld {
110 |     div.question {
111 |         margin: -10em -4em 0 -4em; /*上、右、下、左，顺时针*/
112 |         padding: 10em 2em 0 4em;
113 |     }
114 | }
115 | 
116 | div.question-title {
117 |     width: 100%;
118 |     overflow: hidden;
119 | }
120 | 
121 | div.question-title h1 {
122 |     font-family: 'DK-HEITI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 宋体;
123 |     color: #FFFFFF;
124 |     vertical-align: middle;
125 |     text-align: left;
126 |     padding: 1em;
127 |     width: 70%;
128 |     margin: 3em 0 3em 0;
129 |     float: right;
130 |     font-size: 1.5em;
131 | }
132 | 
133 | @media handheld {
134 |     div.question-title h1 {
135 |         margin: 0 0 3em 0;
136 |         padding-right: 3em;
137 |     }
138 | }
139 | 
140 | div.question-info {
141 |     font-family: 'DK-SONGTI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 宋体;
142 |     color: #FFF !important;
143 |     font-size: 1em;
144 |     margin: 0 1em;
145 |     padding-bottom: 1em;
146 | }
147 | 
148 | @media handheld {
149 |     div.question-info {
150 |         margin: 0 -4em 0 -4em;
151 |         padding: 0 5em 2em 5em;
152 |     }
153 | }
154 | 
155 | div.question-info a {
156 |     color: #CCC;
157 | }
158 | 
159 | /* 答案内容 */
160 | div.content {
161 |     font-family: 'DK-SONGTI', 'Microsoft Yahei', 微软雅黑, STHeiti, Hei, 'Heiti SC', 黑体;
162 |     color: #000;
163 |     font-size: 16px;
164 | }
165 | 
166 | div.content img {
167 |     max-width: 100%;
168 |     margin-bottom: 2em;
169 | }
170 | 
171 | /* article */
172 | div.title-image {
173 |     text-align: center;
174 | }
175 | 
176 | div.title-image img {
177 |     width: 100%;
178 | }
179 | 
180 | /* 禁止信息页中的详情表格分页 */
181 | div.front-page.detail-info table.margin-center {
182 |     page-break-inside: avoid;
183 | }
184 | 
185 | /* 禁止目录panel分页 */
186 | div.index-content.panel {
187 |     page-break-inside: avoid;
188 | }
189 | 


--------------------------------------------------------------------------------
/www/image/cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/image/cover.jpg


--------------------------------------------------------------------------------
/www/image/kanshan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/image/kanshan.png


--------------------------------------------------------------------------------
/www/template/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/www/template/base.html:
--------------------------------------------------------------------------------
 1 | <html xmlns="http://www.w3.org/1999/xhtml">
 2 |     <head>
 3 |         <meta charset="utf-8"/>
 4 |         <title>{title}</title>
 5 |         <link rel="stylesheet" type="text/css" href="../style/normalize.css"/>
 6 |         <link rel="stylesheet" type="text/css" href="../style/markdown.css"/>
 7 |         <link rel="stylesheet" type="text/css" href="../style/customer.css"/>
 8 |     </head>
 9 |     <body>
10 |  {body}
11 |     </body>
12 | </html>


--------------------------------------------------------------------------------
/www/template/content/question/answer.html:
--------------------------------------------------------------------------------
 1 | <div class="answer">
 2 | 
 3 |     <div class="author">
 4 |         <div class="author-info">
 5 |             <div class="author-base">
 6 |                 <div class="author-logo">
 7 |                     <img src="{author_avatar_url}" width="25" height="25"></img>
 8 |                 </div>
 9 | 
10 |                 <span class="author-name">
11 |             <a href="http://www.zhihu.com/people/{author_id}">{author_name}</a>
12 |         </span>
13 | 
14 |                 <span class="author-sign">{author_headline}</span>
15 |             </div>
16 | 
17 |             <div class="clear-float"></div>
18 |         </div>
19 |     </div>
20 | 
21 |     <div class="content">
22 |         {content}
23 |     </div>
24 | 
25 |     <div class="comment">
26 |         <div class="extra-info">
27 |             <p class="comment">评论数:{comment_count}</p>
28 | 
29 |             <p class="agree">赞同数:{voteup_count}</p>
30 | 
31 |             <p class="update-date">更新时间:{updated_time}</p>
32 |         </div>
33 |     </div>
34 | </div>
35 | 
36 | <hr/>
37 | 


--------------------------------------------------------------------------------
/www/template/content/question/question.html:
--------------------------------------------------------------------------------
 1 | <div class="bg-zhihu-blue-light">
 2 |     <div class="title-image">
 3 |     </div>
 4 |     <div class="question bg-zhihu-blue-light">
 5 |         <div class="question-title">
 6 |             <h1 class="bg-zhihu-blue-deep">{title}</h1>
 7 |         </div>
 8 |         <div class="clear-float"></div>
 9 |     </div>
10 |     <div class="question-info bg-zhihu-blue-light">
11 |         {description}
12 |     </div>
13 |     <div class="clear-float"></div>
14 | </div>
15 | <div class="answer">
16 |     {answer}
17 | </div>


--------------------------------------------------------------------------------
/www/template/front_page/base.html:
--------------------------------------------------------------------------------
 1 | <div class="container text-center">
 2 |     <img src="http://liukanshan.zhihu.com/images/downloads/avatars/classic/06-11c98f04.png" class="text-center v-center"
 3 |          width="320" height="320">
 4 | 
 5 |     <h1>{title}</h1>
 6 |     <div class="front-page description">
 7 |         {description}
 8 |     </div>
 9 |     <div class="front-page detail-info">
10 |         {detail_info}
11 |     </div>
12 |     <p>版权信息:<a href="http://www.zhihu.com/terms">知乎协议</a></p>
13 | </div>


--------------------------------------------------------------------------------
/www/template/front_page/info/answer.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/answer.html


--------------------------------------------------------------------------------
/www/template/front_page/info/article.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/article.html


--------------------------------------------------------------------------------
/www/template/front_page/info/author.html:
--------------------------------------------------------------------------------
 1 | <table class="margin-center">
 2 |     <thead>
 3 |     <tr>
 4 |         <td colspan="2">
 5 |             用户信息
 6 |         </td>
 7 |     </tr>
 8 |     </thead>
 9 |     <tbody>
10 |     <tr>
11 |         <th>用户名</th>
12 |         <td>
13 |             <a href="http://www.zhihu.com/people/{author_id}">{name}</a>
14 |         </td>
15 |     </tr>
16 |     <tr>
17 |         <th>关注人数</th>
18 |         <td>{follower}</td>
19 |     </tr>
20 |     <tr>
21 |         <th>提问</th>
22 |         <td>{asks}</td>
23 |     </tr>
24 |     <tr>
25 |         <th>回答</th>
26 |         <td>{answers}</td>
27 |     </tr>
28 |     <tr>
29 |         <th>专栏文章</th>
30 |         <td>{posts}</td>
31 |     </tr>
32 |     <tr>
33 |         <th>公共编辑次数</th>
34 |         <td>{logs}</td>
35 |     </tr>
36 |     <tr>
37 |         <th>被赞同</th>
38 |         <td>{agree}</td>
39 |     </tr>
40 |     <tr>
41 |         <th>被收藏</th>
42 |         <td>{collected}</td>
43 |     </tr>
44 |     <tr>
45 |         <th>被感谢</th>
46 |         <td>{thanks}</td>
47 |     </tr>
48 |     <tr>
49 |         <th>被分享</th>
50 |         <td>{shared}</td>
51 |     </tr>
52 |     </tbody>
53 | </table>
54 | 


--------------------------------------------------------------------------------
/www/template/front_page/info/collection.html:
--------------------------------------------------------------------------------
 1 | <table class="margin-center">
 2 |     <thead>
 3 |     <tr>
 4 |         <td colspan="2">
 5 |             收藏夹信息
 6 |         </td>
 7 |     </tr>
 8 |     </thead>
 9 |     <tbody>
10 |     <tr>
11 |         <th>评论数</th>
12 |         <td>{comment}</td>
13 |     </tr>
14 |     <tr>
15 |         <th>关注人数</th>
16 |         <td>{follower}</td>
17 |     </tr>
18 |     </tbody>
19 | </table>
20 | 


--------------------------------------------------------------------------------
/www/template/front_page/info/column.html:
--------------------------------------------------------------------------------
 1 | <table class="margin-center">
 2 |     <thead>
 3 |     <tr>
 4 |         <td colspan="2">
 5 |             专栏信息
 6 |         </td>
 7 |     </tr>
 8 |     </thead>
 9 |     <tbody>
10 |     <tr>
11 |         <th>创建者</th>
12 |         <td><a href="http://www.zhihu.com/people/{creator_hash}">{creator_name}</a></td>
13 |     </tr>
14 |     <tr>
15 |         <th>文章总数</th>
16 |         <td>{article}</td>
17 |     </tr>
18 |     <tr>
19 |         <th>关注人数</th>
20 |         <td>{follower}</td>
21 |     </tr>
22 |     </tbody>
23 | </table>
24 | 


--------------------------------------------------------------------------------
/www/template/front_page/info/question.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoZeyuan/ZhihuHelp_archived/a0e4a7acd4512452022ce088fff2adc6f8d30195/www/template/front_page/info/question.html


--------------------------------------------------------------------------------
/www/template/front_page/info/topic.html:
--------------------------------------------------------------------------------
 1 | <table class="margin-center">
 2 |     <thead>
 3 |     <tr>
 4 |         <td colspan="2">
 5 |             话题信息
 6 |         </td>
 7 |     </tr>
 8 |     </thead>
 9 |     <tbody>
10 |     <tr>
11 |         <th>关注人数</th>
12 |         <td>{follower}</td>
13 |     </tr>
14 |     </tbody>
15 | </table>
16 | 


--------------------------------------------------------------------------------
/www/template/info_page/article.html:
--------------------------------------------------------------------------------
 1 | <div>
 2 |     <div>文章来自专栏-{name}</div>
 3 |     <table class="margin-center">
 4 |         <tbody>
 5 |         <tr>
 6 |             <th>专栏文章数</th>
 7 |             <td>{postsCount}</td>
 8 |         </tr>
 9 |         </tbody>
10 |     </table>
11 | </div>
12 | 


--------------------------------------------------------------------------------
/www/template/info_page/author.html:
--------------------------------------------------------------------------------
 1 | <div>
 2 |     <div>{name}的知乎回答集锦</div>
 3 |     <table class="margin-center">
 4 |         <tbody>
 5 |         <tr>
 6 |             <th>回答数</th>
 7 |             <td>{answer_count}</td>
 8 |         </tr>
 9 |         <tr>
10 |             <th>被关注数</th>
11 |             <td>{follower_count}</td>
12 |         </tr>
13 |         <tr>
14 |             <th>累计收获赞同</th>
15 |             <td>{voteup_count}</td>
16 |         </tr>
17 |         </tbody>
18 |     </table>
19 | </div>
20 | 


--------------------------------------------------------------------------------
/www/template/info_page/book.html:
--------------------------------------------------------------------------------
1 | 
2 | <div>
3 |     {title}
4 | </div>
5 | 


--------------------------------------------------------------------------------
/www/template/info_page/collection.html:
--------------------------------------------------------------------------------
 1 | <div>
 2 |     <div>收藏夹：{title}</div>
 3 |     <table class="margin-center">
 4 |         <tbody>
 5 |         <tr>
 6 |             <th>答案数</th>
 7 |             <td>{answer_count}</td>
 8 |         </tr>
 9 |         <tr>
10 |             <th>关注人数</th>
11 |             <td>{follower_count}</td>
12 |         </tr>
13 |         </tbody>
14 |     </table>
15 | </div>
16 | 


--------------------------------------------------------------------------------
/www/template/info_page/column.html:
--------------------------------------------------------------------------------
 1 | <div>
 2 |     <div>专栏-{name}</div>
 3 |     <table class="margin-center">
 4 |         <tbody>
 5 |         <tr>
 6 |             <th>文章数</th>
 7 |             <td>{postsCount}</td>
 8 |         </tr>
 9 |         </tbody>
10 |     </table>
11 | </div>
12 | 


--------------------------------------------------------------------------------
/www/template/info_page/question.html:
--------------------------------------------------------------------------------
 1 | <div>
 2 |     知乎问题-{title}
 3 |     <table class="margin-center">
 4 |         <tbody>
 5 |         <tr>
 6 |             <th>回答数</th>
 7 |             <td>{answer_count}</td>
 8 |         </tr>
 9 |         <tr>
10 |             <th>关注人数</th>
11 |             <td>{follower_count}</td>
12 |         </tr>
13 |         <tr>
14 |             <th>评论数</th>
15 |             <td>{comment_count}</td>
16 |         </tr>
17 |         </tbody>
18 |     </table>
19 | </div>
20 | 


--------------------------------------------------------------------------------
/www/template/info_page/topic.html:
--------------------------------------------------------------------------------
 1 | <div>
 2 |     <div>话题:{name}</div>
 3 |     <table class="margin-center">
 4 |         <tbody>
 5 |         <tr>
 6 |             <th>问题数</th>
 7 |             <td>{questions_count}</td>
 8 |         </tr>
 9 |         </tbody>
10 |     </table>
11 | </div>
12 | 


--------------------------------------------------------------------------------
/zhihuHelp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # 放置于首位
 3 | import sys  # 修改默认编码
 4 | import os  # 添加系统路径
 5 | import traceback
 6 | 
 7 | base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding))
 8 | sys.path.insert(0, base_path + u'/src/lib')  # 添加基础库路径 使用insert方式，确保优先启用项目自带源码包
 9 | sys.path.insert(0, base_path + u'/src/lib/oauth')  # zhihu oauth 类需要作为默认类导入，否则无法运行 - -
10 | 
11 | reload(sys)
12 | sys.setdefaultencoding('utf-8')
13 | 
14 | #  执行主程序
15 | from src.main import ZhihuHelp
16 | 
17 | try:
18 |     helper = ZhihuHelp()
19 |     helper.start()
20 | except Exception:
21 |     traceback.print_exc()
22 |     print u"助手发生异常，点击任意键退出"
23 |     raw_input()
24 | pass
25 | 


--------------------------------------------------------------------------------