├── LICENSE
├── README.md
├── bin
└── build_4_win.7z
├── build
└── CMakeLists.txt
├── config
└── portals_list.json
├── pic
├── hardseed.gif
├── running.gif
└── seeds_and_pics.gif
└── src
├── .ycm_extra_conf.py
├── .ycm_extra_conf.pyc
├── lib
├── 3rd
│ └── json11
│ │ ├── LICENSE.txt
│ │ ├── README.md
│ │ ├── json11.cpp
│ │ ├── json11.hpp
│ │ └── test.cpp
├── helper
│ ├── CmdlineOption.cpp
│ ├── CmdlineOption.h
│ ├── Misc.cpp
│ ├── Misc.h
│ ├── RichTxt.h
│ ├── Time.cpp
│ ├── Time.h
│ ├── Webpage.cpp
│ └── Webpage.h
└── self
│ ├── Aicheng.cpp
│ ├── Aicheng.h
│ ├── AichengTopicWebpage.cpp
│ ├── AichengTopicWebpage.h
│ ├── AichengTopicsListWebpage.cpp
│ ├── AichengTopicsListWebpage.h
│ ├── Caoliu.cpp
│ ├── Caoliu.h
│ ├── CaoliuTopicWebpage.cpp
│ ├── CaoliuTopicWebpage.h
│ ├── CaoliuTopicsListWebpage.cpp
│ ├── CaoliuTopicsListWebpage.h
│ ├── JandownSeedWebpage.cpp
│ ├── JandownSeedWebpage.h
│ ├── RmdownSeedWebpage.cpp
│ ├── RmdownSeedWebpage.h
│ ├── SeedWebpage.cpp
│ ├── SeedWebpage.h
│ ├── TopicWebpage.cpp
│ ├── TopicWebpage.h
│ ├── TopicsListWebpage.cpp
│ └── TopicsListWebpage.h
└── main.cpp
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
给不了你梦中情人,至少还有硬盘女神:hardseed
2 | yangyangwithgnu@yeah.net
3 | http://yangyangwithgnu.github.io/
4 | 2016-02-04 14:53:51
5 |
6 |
7 | ##谢谢
8 |
9 | **捐赠:支付宝 yangyangwithgnu@yeah.net ,支付宝二维码(左),微信二维码(右)**
10 |
11 |
12 |
13 |
14 |
15 | **二手书**:书,我提高开发技能的重要手段之一,随着职业生涯的发展,书籍也在不断增多,对我而言,一本书最多读三遍,再往后,几乎没有什么营养吸收,这部分书对我已基本无用,但对其他人可能仍有价值,所以,为合理利用资源,我决定低价出售这些书,希望达到两个目的:0)用售出的钱购买更多新书(没当过雷锋的朋友 (๑´ڡ`๑));1)你低价购得需要的书(虽然二手)。到 https://github.com/yangyangwithgnu/used_books 看看有无你钟意的。
16 |
17 |
18 | ##公告
19 | ----------------
20 |
21 | **讨论**:任何意见建议移步 https://www.v2ex.com/t/123175
22 |
23 | **声明**:我本人绝对尊重各大爱的论坛,提供的资源不仅优质而且免费,我只是懒、足够的懒。请大家支持这些论坛,多用页面访问、多点击广告、多解囊捐赠。*我..在..干..嘛 @_@#*
24 |
25 | **注意**
26 | + 代理是一切的先决条件。你可以使用自己的代理工具,用 hardseed 的命令行选项 --proxy 指定本地中转地址及端口,也可以用我为你预配置的 goagent 代理工具,位于 https://github.com/yangyangwithgnu/goagent_out_of_box_yang
27 |
28 |
29 | ##版本
30 | ----------------
31 |
32 | **[v0.2.14-1,修正,2016-02-04]**:0)行了、行了,我抱歉,不知道有这么多 win 用户需要 hardseed,之前是我狭隘了,只考虑到 unix-like 用户。编译好的 win 版本程序送你,位于 bin\build_4_win.7z;1)另外,osX 下的构建方面进行了细化。新年快乐!
33 | **[v0.2.14,修正,2016-01-31]**:0)忽略解析 aicheng 站务相关帖子。
34 | **[v0.2.13,修正,2016-01-17]**:0)修正 caoliu 翻页的错误。
35 | **[v0.2.12,优化,2015-05-26]**:0)先前 hardseed 中硬编码 aicheng 和 caoliu 论坛入口地址,地址一旦变更,每次需要重新调整代码,很是麻烦,现在我在本项目主页中放了一份配置文件 config/portals_list.json,hardseed 自动从该文件中获取最新论坛入口地址(安啦,我会及时更新的);1)调整部分公共库代码。
36 | **[v0.2.11,修正,2015-03-22]**:修正 aicheng 种子和图片解析错误的问题(别发邮件了哈,亲,邮箱都他妈快撑爆了) 。
37 | **[v0.2.10,修正,2014-12-07]**:caoliu 地址变更,shit :-P
38 | **[v0.2.09,修正,2014-11-30]**:caoliu 地址变更。
39 | **[v0.2.08,修正,2014-10-21]**:0)仅解析主贴的图片而不再解析回帖,以避免下载无关图片;1)aicheng 论坛地址变更;2)部分用户有自己的代理工具,为缩短下载时长,将预配置的 goagent 独立成一个 github 项目。
40 | **[v0.2.07,修正,2014-09-25]**:windows 禁止文件名中含有 /:\*?\\<>"| 等字符,否则将导致非法路径错误,修正 hardseed 生成的文件名中可能含有如上字符的问题。
41 | **[v0.2.06,优化,2014-09-09]**:caoliu 原地址无法访问,更新地址;取消 caoliu 自拍套图最多只能下载 256 张的限制。
42 | **[v0.2.05,优化,2014-08-17]**:程序功能无任何更新,仅更新代理工具 goagent 配置文件 proxy.ini:一是设置 obfuscate = 1 开启流量混淆以正确解析出可用 GGC IP,一是设置 pagespeed = 1 以提升 GAE 的下行速度。
43 | **[v0.2.05,修正,2014-08-13]**:0)修正帖子部分图片 URL 未解析的问题;1)修正图片序号错误的问题;2)优化图片下载等待时长算法,不再以 --timeout-download-picture 作为绝对等待时长,而是将其作为指导值,一旦图片下载失败 hardseed 将自动计算下次重新下载所需的等待时长,同时,与“速度过低视为下载失败”的机制结合,提升图片下载等待耗时;3)升级 goagent 至 3.1.21,采用 goagent 默认 proxy.ini,而不再使用自定义 iplist (很多朋友反应采用先前我自定义 iplist 版本的 goagent 速度不理想,这是由于 GGC IP 与不同网络环境有关,我用 checkgoogleip 跑出来 GGC IP 最适合我的网络环境,不见得适合你,所以,权衡之下,还是用 goagent 自带的 GGC IP,至少这合适于大多数人)。
44 | **[v0.2.04,修正,2014-08-10]**:0)由于对 % 进行 URL 转义使得部分图片的 URL 生成错误,导致图片下载失败,本版本已修正;1)剔除长年显示异常的图床网站 iceimg.com;2)引入均速过低视为下载失败的机制,持续(8s)低速(4KB/s)终止当次下载,重新向服务端发起新请求,开启新一次的下载,以缩短下载错误 URL 图片等待时长;3)修正 aicheng 帖子列表页面中帖子名解析错误的问题;4)取消单个代理服务器并行下载上限数 8 的限制。
45 | **[v0.2.03,修正,2014-08-08]**:0)修正部分图片缺失扩展名的问题;1)默认下载帖子数量从 128 调整为 64;2)更换新的 GGC IP 进代理工具 goagent 的 proxy.ini 中以提升代理速度。
46 | **[v0.2.02,优化,2014-08-06]**:程序无任何功能变更,仅是优化代码,合并部分通用代码至公共库、增加用于验证代理出口 IP 和伪装浏览器的 user-agent 的接口。
47 | **[v0.2.01,修正,2014-07-28]**:修正临时文件未删除的错误。
48 | **[v0.2.00,新增,2014-07-23]**:应 @sigmadog 需求,增加抓取 caoliu 上自拍套图(江湖人称“達蓋爾的旗幟”)的功能。
49 | **[v0.1.00,修正,2014-07-21]**:caoliu 论坛增加了反机器人机制,若翻页过快则视为机器人行为,下载页面为空白页。此版本可应对它的反机器人机制。
50 |
51 |
52 | ##演示
53 | ----------------
54 | *hardseed*
55 | 
56 | *running*
57 | 
58 | *more seeds and pictures*
59 | 
60 |
61 | http://v.youku.com/v_show/id_XNzQxOTk0NTE2.html
62 |
63 |
64 | ##man
65 | ----------------
66 |
67 | **hardseed** is a batch seeds and pictures download utiltiy from CaoLiu and AiCheng forum. It's easy and simple to use. Usually, you could issue it as follow:
68 | ```
69 | $ hardseed
70 | ```
71 | or
72 | ```
73 | $ hardseed --saveas-path ~/downloads --topics-range 8 64 --av-class aicheng_west --timeout-download-picture 32 --hate X-Art --proxy http://127.0.0.1:8087
74 | ```
75 |
76 | --help
77 | Show this help infomation what you are seeing.
78 |
79 | --version
80 | Show current version.
81 |
82 | --av-class
83 | There are 13 av classes:
84 | - caoliu_west_reposted
85 | - caoliu_cartoon_reposted
86 | - caoliu_asia_mosaicked_reposted
87 | - caoliu_asia_non_mosaicked_reposted
88 | - caoliu_west_original
89 | - caoliu_cartoon_original
90 | - caoliu_asia_mosaicked_original
91 | - caoliu_asia_non_mosaicked_original
92 | - caoliu_selfie
93 | - aicheng_west
94 | - aicheng_cartoon
95 | - aicheng_asia_mosaicked
96 | - aicheng_asia_non_mosaicked
97 |
98 | As the name implies, "caoliu" stands for CaoLiu forum, "aicheng" for AiCheng forum, "reposted" and "original" are clearity, you konw which one is your best lover (yes, only one).
99 | The default is aicheng_asia_mosaicked.
100 |
101 | --concurrent-tasks
102 | You can set more than one proxy, each proxy could more than one concurrent tasks. This option set the number of concurrent tasks of each proxy.
103 | The max and default number is 8.
104 |
105 | --timeout-download-picture
106 | Some pictures too big to download in few seconds. So, you should set the download picture timeout seconds.
107 | The default timeout is 16 seconds.
108 |
109 | --topics-range
110 | Set the range of to download topics. E.G.:
111 | - topics-range 2 16
112 | - topics-range 8 (I.E., --topics-range 1 8)
113 | - topics-range -1 (I.E., all topics of this av class)
114 |
115 | The default topics range is 64.
116 |
117 | --saveas-path
118 | Set the path to save seeds and pictures. The rule of dir: [avclass][range]@hhmmss. E.G., [aicheng_west][2~32]@124908/.
119 | The default directory is home directory (or windows is C:\\).
120 |
121 | --hate
122 | If you hate some subject topics, you can ignore them by setting this option with keywords in topic title, split by space-char ' ', and case sensitive. E.G., --hate 孕妇 重口味. When --hate keywords list conflict with --like, --hate first.
123 |
124 | --like
125 | If you like some subject topics, you can grab them by setting this option with keywords in topic title, split by space-char ' ', and case sensitive. E.G., --like 苍井空 小泽玛利亚. When --like keywords list conflict with --hate, --hate first.
126 |
127 | --proxy
128 | As you know, the government likes blocking adult websites, so, I do suggest you to set --proxy option. Hardseed supports more proxys:
129 | - GoAgent (STRONGLY recommended), --proxy http://127.0.0.1:8087
130 | - shadowsocks, --proxy socks5://127.0.0.1:1080, or socks5h://127.0.0.1:1080
131 | - SSH, --proxy socks4://127.0.0.1:7070
132 | - VPN (PPTP and openVPN), --proxy ""
133 |
134 | It is important that you should know, you can set more proxys at the same time, split by space-char ' '. As the --concurrent-tasks option says, each proxy could more than one concurrent tasks, now, what about more proxys? Yes, yes, the speed of downloading seed and pictures is very very fast. E.G., --concurrent-tasks 8 --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 socks4://127.0.0.1:7070, the number of concurrent tasks is 8\*3.
135 | If you wanna how to install and configure various kinds of proxy, please access my homepage "3.2 搭梯翻墙" https://github.com/yangyangwithgnu/the_new_world_linux#3.2
136 | If you are not good at computer, there is a newest goagent for floks who are not good at computer by me, yes, out of box. see https://github.com/yangyangwithgnu/goagent_out_of_box_yang
137 |
138 | The default http://127.0.0.1:8087.
139 |
140 | That's all. Any suggestions let me know by yangyangwithgnu@yeah.net or http://yangyangwithgnu.github.io/, big thanks to you. Kiddo, take care of your body. :-)
141 |
142 |
143 | ##中文
144 | --------
145 |
146 | hardseed 希望带给你(硬盘)女神!女神的种子和图片。
147 |
148 | ###【翻墙】
149 | 你知道,这一切的一切都在墙外,所以你得具备翻墙环境,hardseed 才能帮你拉女神。hardseed 支持 goagent、shadowsocks、SSH、VPN (PPTP 和 openVPN)等各类代理模式,甚至你可以同时使用多种代理以极速下载。从普及度、稳定性、高效性来看,goagent 最优。“我一小白,平时工作压力本来就大,就想看看女神轻松下,你还让我折腾代理!没人性!”,嘚,亲,咱是做服务的。我帮你配置了一份开箱即用的 goagent,位于 https://github.com/yangyangwithgnu/goagent_out_of_box_yang ,下载后,linux 用户,命令行中运行
150 | ```
151 | $ python proxy.py
152 | ```
153 | windows 亲,双击运行 goagent.exe (**管理员权限**)。
154 |
155 | ###【下载】
156 |
157 | ####『windows』
158 | 亲,往右上看,找到“download ZIP”,点击下载。
159 |
160 | ####『linux』
161 | ```
162 | $ git clone https://github.com/yangyangwithgnu/hardseed.git
163 | ```
164 |
165 | ###【源码安装】
166 |
167 | ####『windows』
168 | 这基本没 windows 用户什么事儿,除非你有 cygwin,否则你没法编译源码,没事,帮你弄好了,我的定位是牙医界的服务人员,服务很重要,二进制执行程序位于 hardseed\bin\build_4_win.7z。
169 |
170 | ####『linux』
171 | 0)唯一依赖 libcurl,请自行安装;
172 | 1)代码采用 C++11 编写,gcc 版本不低于 4.7.1。
173 | 2)命令行下运行:
174 | ```
175 | $ cd hardseed/build/
176 | $ cmake .
177 | $ make && make install
178 | ```
179 |
180 | ####『osX』
181 | 首先,将 build/CMakeLists.txt 中的
182 | ```
183 | TARGET_LINK_LIBRARIES(hardseed curl pthread)
184 | ```
185 | 替换成
186 | ```
187 | TARGET_LINK_LIBRARIES(hardseed curl pthread iconv)
188 | ```
189 |
190 | 然后,将 build/CMakeLists.txt 中
191 | ```
192 | ## osX
193 | ##>>>>>>>>>>>>>>>>>>>>>>
194 |
195 | #SET(CMAKE_CXX_COMPILER "g++")
196 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3")
197 | #SET(CMAKE_BUILD_TYPE release)
198 | #ADD_EXECUTABLE(hardseed ${SRC_LIST})
199 | #TARGET_LINK_LIBRARIES(hardseed curl pthread iconv)
200 | #INSTALL(PROGRAMS hardseed DESTINATION /usr/bin/)
201 | ```
202 | 第一列的 # 删除;
203 |
204 | 接着,将 build/CMakeLists.txt 中
205 | ```
206 | # release
207 | SET(CMAKE_CXX_COMPILER "g++")
208 | SET(CMAKE_CXX_FLAGS "-std=c++11 -O3")
209 | SET(CMAKE_BUILD_TYPE release)
210 | ADD_EXECUTABLE(hardseed ${SRC_LIST})
211 | TARGET_LINK_LIBRARIES(hardseed curl pthread)
212 | INSTALL(PROGRAMS hardseed DESTINATION /usr/bin/)
213 | ```
214 | 删掉;
215 |
216 | 最后,剩下步骤同 linux 构建方法。
217 |
218 |
219 |
220 | ###【使用】
221 | **亲,听好了,运行 hardseed 前务必确保代理程序已正常运行,否则,别说女神,蚊子都碰不到。**
222 |
223 | ####『windows』
224 | 先进入 hardseed\bin\,解压 build_4_win.7z,选中 hardseed.exe,右键设置**以管理员权限运行该程序**,接着键入 alt-d 将光标定位到文件管理器的地址栏中,键入 CMD 启动命令行窗口,在 CMD 中键入
225 | ```
226 | X:\hardseed\bin\windows> hardseed.exe
227 | ```
228 | 这时,hardseed 开始玩命儿地为你下载女神图片和种子,经过 2 分 8 秒,找到类似 C:\\[aicheng_asia_mosaicked][1~128]@20140822\ 的目录,女神们那儿等你!
229 |
230 | ####『linux』
231 | 同 windows 下运行一样,全用默认命令行参数运行
232 | ```
233 | $ hardseed
234 | ```
235 | 执行完成后,你会看到 ~/[aicheng_asia_mosaicked][1~128]@014822/,你要的都在那儿。或者,玩点高级的
236 | ```
237 | $ hardseed --saveas-path ~/downloads --topics-range 256 --av-class aicheng_west
238 | ```
239 | 其中,--saveas-path 指定存放路径为 ~/downloads/;--topics-range 指定解析的帖子范围从第 1 张到第 256 张帖子;--av-class 指定女神类型为欧美。完整命令行选项请 --hlep 查看。
240 |
241 | ###【FQA】
242 |
243 | **Q1**:为何 windows 版的可执行文件目录 build_4_win\ 下有一堆 cyg\*.dll 文件?
244 | **A1**:hardseed 是用 C++ 编写的遵循 SUS(单一 unix 规范)的原生 linux 程序,理论上,在任何 unix-like(linux、BSD、osX) 系统上均可正常源码编译,唯独不支持 windows,为让 hardseed 具备跨平台能力,须借由某种工具(或环境)将 hardseed 转换成 windows 下的执行程序。cygwin 就是这种环境,我把 hardseed 源码纳入 cygwin 环境中重新编译,即可生成 windows 下的可执行程序 hardseed.exe,在这个过程中,cygwin 会加入些自己的代码和中转库到 hardseed.exe 中,cyg\*.dll 就是各类中转库。
245 |
246 | **Q2**:为何运行 windows 版的执行程序总有如下警告
247 | ```
248 | Preferred POSIX equivalent is: /cygdrive/c/xxxx, CYGWIN environment variable option "nodosfilewarning" turns off this warning. Consult the user's guide for more details about POSIX paths ...
249 | ```
250 | 影响正常运行么?
251 | **A2**:linux 与 windows 有很多基础设施的差异,路径表示方式就算其一,如,前者是 /this/is/linux/path/,后者 C:\this\is\windows\path\,A1 中提过 hardseed 是 linux 下的原生程序,代码中全采用的 linux 路径规则,运行 hardseed.exe 时, cygwin 自动进行路径规则转换,所以出现本问题中的警告信息以告知用户路径可能有变化。这完全不影响 hardseed.exe 正常运行。如果厌恶这些提示,可以在环境变量中增加 CYGWIN=nodosfilewarning (win7 用户:computer - properties - advanced system settings - advanced - environment variables - new,variable name 填入 CYGWIN,variable value 中填入 nodosfilewarning,保存即可)。
252 |
253 | **Q3**:运行 hardseed 后啥都没下载呢?还提示 There is no topic which you like?
254 | **A3**:有几种可能:
255 | * 未成功翻墙。请自行参阅你的翻墙工具帮助文档,修正即可。windows 用户注意检查是否以**管理员权限运行翻墙工具**;
256 | * 网页翻墙已成功但仍无法下载。请检查你的代理工具是否成功接收 hardseed 的代理请求(如,goagent 窗口中可查看),windows 用户注意检查是否以**管理员权限运行 hardseed.exe**;
257 | * hardseed 翻墙已成功但仍无法下载。你指定了 --like xxxx 命令行选项,hardseed 将查找标题中是否含有关键字 xxxx,若没有则忽略相关帖子。更换其他关键字。
258 |
259 | **Q4**:我已经在墙外,为何仍下载失败?
260 | **A4**:hardseed 默认采用 goagent 作为代理工具,即,默认本地代理中转地址为 http://127.0.0.1:8087 。如果你已在墙外无须代理即可访问 caoliu 和 aicheng 论坛,那么需要告知 hardseed 不再走本地代理中转而应直接访问,即:
261 | ```
262 | --proxy ""
263 | ```
264 |
265 | **Q5**:如何加快下载速度?
266 | **A5**:最直接会想到多线程下载,一条线程负责下载一个页面,逻辑上,线程数越多、下载速度越快,实际上,存在代理服务器和被访服务器两方面的限制:
267 | * 代理服务器方面的限制,代理服务器为不同用户提供代理服务,为避免相互影响,通常它会限制单个用户的流量和请求频率,所以,hardseed 在指定代理服务器上的线程数一定是有个上限;
268 | * 被访服务器方面的限制,你访问的论坛不会低能到不控制请求频率,举个例,正常情况你 4 秒钟可以打开 4 张 caoliu 论坛的帖子,一旦 caoliu 服务器发现你 1 秒钟打开了 32 张帖子那一定将此视为机器人行为,从而拒绝响应。
269 |
270 | 正由于存在代理服务器和被访服务器两方面的限制,线程数不能无限大,从我多次测试的经验来看,**单个代理服务器**访问被访服务器的并行线程数设定为 8 条最为稳定,否则容易引起代理服务器和被访服务器停服。同个时刻有大量用户在访问 caoliu 论坛,肯定远超 1 秒钟打开了 32 张帖子的频率,为何 caoliu 没对所有用户拒绝请求?显然,这些请求来自不同 IP 的电脑终端,按这个思路,如果 hardseed 若能通过多个不同 IP 访问 caoliu,对于代理服务器和被访服务器来说请求数量都变少了,那完全可以绕开 caoliu 对单个 IP 请求频率过快的限制。由于我们采用代理访问,发起访问请求的 IP 就是代理服务器的 IP,显然,只要 hardseed 支持同时使用多个代理服务器,那么一切问题就简单了。所以,我**赋予了 hardseed 多路代理的能力**。hardseed 支持 4 种代理模式:
271 | * goagent (STRONGLY recommended), --proxy http://127.0.0.1:8087
272 | * shadowsocks, --proxy socks5://127.0.0.1:1080, or socks5h://127.0.0.1:1080
273 | * SSH, --proxy socks4://127.0.0.1:7070
274 | * VPN (PPTP and openVPN), --proxy ""
275 |
276 | 其中,除 VPN 外(这是种全局代理模式),其他三种代理模式可混用,也就是说,你可以同时指定 goagent、shadowsocks、SSH 等三种代理模式
277 | ```
278 | --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 socks4://127.0.0.1:7070
279 | ```
280 | 这样,hardseed 就能用 8 * 3 条线程并行下载。另外,goagent 都是通过 GAE 集群发起到网络请求,所以不存在同个机器上配置多个 goagent 的做法;SSH(获取免费帐号 http://www.fastssh.com/ ) 和 shadowsocks(获取免费帐号 https://shadowsocks.net/get ) 代理,你可以获取多个不同的代理服务器(不同的 SSH 或者 shadowsocks 代理的本地端口必须自行设置成不同的),因此可以实现多个不同 IP 发起网络请求。换言之,你可以同时拥有 1 个 goagent、n 个 SSH、m 个 shadowsocks 个代理出口 IP,每个 IP 本来允许使用 8 条线程,那么共计就有 (1 + n + m) * 8 条线程并行下载,速度自然上去了。
281 | 我个人偏爱 shadowsocks,以此举例来说:先在 https://shadowsocks.net/get 获取了 4 个 shadowsocks 帐号,本地端口分别配置成 1080、1081、1082、1083,运行此 4 个 shadowsocks 代理程序;同时,运行 goagent 代理程序;然后,在 hardseed 的命令行参数设定
282 | ```
283 | --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 socks5://127.0.0.1:1081 socks5://127.0.0.1:1082 socks5://127.0.0.1:1083
284 | ```
285 | 这时,如果你的 --concurrent-tasks 设定为 8(默认值),那么,hardseed 将启用 (4 + 1) * 8 条线程并行下载。那速度,飞快、快 ... *(注,有些 shadowsocks 代理服务器禁止下载,若有异常,将其从 --proxy 代理列表中剔除之。若求稳定,只用 goagent)*
286 |
287 | **Q6**:如何搜索喜欢的视频?
288 | **A6**:--like 选项可以指定多个关键字(空格隔开)参数,帖子标题中出现相关关键字之一便纳入下载范围,否则不下载。通常来说,帖子标题中文字有简体、繁体、日文等三种可能,所以你应该都指定,比如,喜欢“护士”和“情侣”系列,先简译繁 http://www.aies.cn/ ,简译日 http://fanyi.baidu.com/#zh/jp/ ,再由 --topics-range 指定搜索的帖子数量,由 --like 指定搜索关键字:
289 | ```
290 | --topics-range 1024 --like 护士 護士 看護婦 情侣 情侶 カップル
291 | ```
292 |
293 | **Q7**:如何下载高清?
294 | **A7**:hardseed 并不直接支持高清类型下载,只能间接实现,由 --topics-range 指定搜索的帖子数量,由 --like 指定“高清”相关关键字进行下载,比如:
295 | ```
296 | --topics-range 1024 --like 1080P 720P HD 高清 ハイビジョン
297 | ```
298 |
299 | **Q8**:为何有些种子和图片名是无意义字符,类似 (rename)bltouujdrbwcrrcg.torrent?
300 | **A8**:OS 对文件名长度是有限制的,hardseed 是以帖子名作为种子和图片的文件名,一旦帖子名超长将导致文件名超长。由于 hardseed 是采用 ASCII 而非 UNICODE 作为字符存储方式,一个文字可能占一个字节(如,字母“a”)也可能占两个字节(如,汉字“好”),假如文件名最后一个文字是“好”,且刚好文件名超长了一个字节,如果 hardseed 简单地截断“好”的第二个字节,那将导致整个文件名变成乱码。所以,hardseed 用了另外种变通方式,取 16 个 a-z 间的随机字母以及前缀“(rename)”作为文件基础名。
301 |
302 | **Q9**:为何相同的图片要下载两次?
303 | **A9**:有些发帖者担心单一图床挂掉,一般将同个图片上传到两个不同图床上,在帖子中同时发布两个图床的不同地址,hardseed 无法判断图片是否相同(其实非要弄也是可以实现的,只请求 HTTP 头,判断下两个图片的大小及最后更新时间,我觉得没这个必要),所以都下载。
304 |
305 | **Q10**:为何常有类似下面的图片下载报错
306 | ```
307 | failure (download error from http://cl.man.lv/htm_data/2/1407/1174338.html. pictures error: http://p1.imageab.com/2014/07/24/902135bff7a83cd71836764b795c0879.jpg, http://p1.imageab.com/2014/07/24/6cea50f80bba80536ba6cd9da7ba17df.jpg )
308 | ```
309 | **A10**:几张图片下载失败无伤大雅。具体原因很多,常见如下:
310 | * 图床挂了,hardseed 无能为力;
311 | * 发帖者发布的图片 URL 有误,hardseed 无能为力;
312 | * 图片太大、网速太慢,hardseed 在 --timeout-download-picture 指定时间内(默认 16 秒)未下载完整,这时,你可以将 --timeout-download-picture 指定为更大的下载等待时长(如,64),但这会增加整个下载时长;
313 | * 代理服务器限制下载,禁用其他代理只用 goagent。
314 |
315 | **Q11**:我没指定任何忽略关键字,为什么 hardseed 强制取消下载“连发, 連发, 连發, 連發, 连弹, ★㊣, 合辑, 合集, 合輯, nike, 最新の美女骑兵㊣, 精選, 精选”这类合集帖子?
316 | **A11**:两方面原因。一方面,合集均是把以往的单个帖子合并一起再发布,完全重复;一方面,虽然帖子中有多部不同片子的图片,但实际上帖子中的种子只是其中一部片子的,没有意义。
317 |
318 | **Q12**:很多片子迅雷报违规资源,下载速度奇慢,如何破?
319 | **A12**:**第一**,尽可能下新片,道理很简单,越新的片子被举报违规的可能性越小,具体而言,你应该用 hardseed 抓取最新帖子的种子,并且尽可能及时下载;**第二**,借助第三方工具一定程度绕开迅雷对违规资源的限制,ThunderSuperSpeedHacker(《论逆向工程的重要性》,唉,当年多么痴迷 (°Д°)),前提你必须是迅雷会员,否则任何方法均无效。迅雷通过离线空间和高速通道两种途径为会员提速,一旦发现违规资源则关闭离线空间和高速通道两个途径,离线空间是否开启是在服务端控制,客户端的任何外力作用均无效,但是,高速通道是否开启则是在客户端控制,这就为第三方工具强制开启高速通道提供了环境,ThunderSuperSpeedHacker 可以做到。用法很简单,先退出迅雷相关进程(thunder.exe、thunderplatform.exe),再运行 ThunderSuperSpeedHacker 点击“破解”即可。那么,有了 ThunderSuperSpeedHacker 是否一定就能享受高速通道了么?不一定,ThunderSuperSpeedHacker 对迅雷版本敏感。对于迅雷 v7.9.37.4952 及后续版本,一旦 ThunderSuperSpeedHacker 介入将导致迅雷僵死。解决办法:
320 | 0)首先,下载老版本迅雷。有很多网站提供迅雷历史版本下载,不过,安全原则之一,尽可能从官网下载,所以,我只信任迅雷官网上的历史版本。在迅雷首页(http://www.kankan.com/ )右上角有最新版迅雷下载地址,也就是说,要找到迅雷历史版本下载地址,只要找到迅雷官网首页某个历史快照即可,用时光机器(http://web.archive.org/web/ 墙外)很容易做到,比如,4 月 1 号的首页快照(http://web.archive.org/web/20150401032902/http://www.kankan.com/ )对应版本 v7.9.34.4908,下载地址为 http://down.sandai.net/thunder7/Thunder_kk_7.9.34.4908Preview.exe ;
321 | 1)接着,防止自动升级。一旦运行迅雷,它将在后台自动强制升级至最新版,所以,你得暴力阻止其升级,删除升级相关程序(xlliveud.exe、liveudinstaller.exe、thunderliveupdate.xar)即可;
322 | 2)最后,使用 ThunderSuperSpeedHacker 破解违规资源高速通道限制即可。
323 |
324 | **Q13**:hardseed 在 windows 环境下载的文件部分无法删除?
325 | **A13**:hardseed 正在写文件时被 ctrl-c 强制退出,文件锁未被 cyg\*.dll 释放,而 cyg\*.dll 已加载至 CMD 进程空间,所以,请先关闭所有 CMD 窗口,尝试删除相关文件,若不行,请再开新 CMD 窗口后执行
326 | ```
327 | X:\> rd /S C:\[aicheng_west][1~128]@010825\
328 | ```
329 |
330 | **Q14**:为何出现类似如下报错?
331 | ```
332 | "" - failure (download error from http://cl.man.lv/htm_data/4/1408/1189943.html. seed error: )
333 | ```
334 | **A14**:代理工具的问题。你知道,hardseed 默认采用使用 goagent 作为代理工具,一方面它算是目前使用门槛最低的代理工具,但同时,另一方面它也存在并发请求数过低的限制,一旦并发数过高,goagent 代理返回的都是空白文件,这直接导致 hardseed 抛出如上错误信息。所以,我给你两方面的建议:
335 | * 弃用 goagent,换用 shadowsocks。shadowsocks 轻量代理,速度非常优雅,我曾对它有过简单介绍,https://github.com/yangyangwithgnu/the_new_world_linux#3.2.4 。考虑到 goagent 的并发限制,--concurrent-tasks 默认设置为 8,现在改用 shadowsocks,你完全可以将 --concurrent-tasks 设置成 32 或者更大的数字,你会发现,下载 128 张帖子也就半分钟的事儿;
336 | * 如果你仍坚持使用 goagent,请 --concurrent-tasks 减小至 4 或者更小的数字。
337 |
338 |
339 | ##忠告
340 | -------------
341 |
342 | 你,党之栋梁、国之人才,注意身体,千万!
343 |
344 |
--------------------------------------------------------------------------------
/bin/build_4_win.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/bin/build_4_win.7z
--------------------------------------------------------------------------------
/build/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | PROJECT(main)
2 |
3 |
4 | SET(SRC_LIST ../src/main.cpp
5 | ../src/lib/self/TopicsListWebpage.cpp ../src/lib/self/AichengTopicsListWebpage.cpp ../src/lib/self/CaoliuTopicsListWebpage.cpp
6 | ../src/lib/self/TopicWebpage.cpp ../src/lib/self/AichengTopicWebpage.cpp ../src/lib/self/CaoliuTopicWebpage.cpp
7 | ../src/lib/self/SeedWebpage.cpp ../src/lib/self/JandownSeedWebpage.cpp ../src/lib/self/RmdownSeedWebpage.cpp
8 | ../src/lib/self/Aicheng.cpp ../src/lib/self/Caoliu.cpp
9 | ../src/lib/helper/Webpage.cpp
10 | ../src/lib/helper/Time.cpp ../src/lib/helper/CmdlineOption.cpp ../src/lib/helper/Misc.cpp
11 | ../src/lib/3rd/json11/json11.cpp)
12 |
13 | # linux
14 | #>>>>>>>>>>>>>>>>>>>>>>
15 |
16 | ## debug
17 | #SET(CMAKE_CXX_COMPILER "clang++")
18 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -Werror -Weverything -Wno-documentation -Wno-disabled-macro-expansion -Wno-float-equal -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-global-constructors -Wno-exit-time-destructors -Wno-missing-prototypes -Wno-padded -Wno-old-style-cast -Wno-weak-vtables")
19 | #SET(CMAKE_BUILD_TYPE debug)
20 | #ADD_EXECUTABLE(main ${SRC_LIST})
21 | #TARGET_LINK_LIBRARIES(main curl pthread)
22 |
23 | # release
24 | SET(CMAKE_CXX_COMPILER "g++")
25 | SET(CMAKE_CXX_FLAGS "-std=c++11 -O3")
26 | SET(CMAKE_BUILD_TYPE release)
27 | ADD_EXECUTABLE(hardseed ${SRC_LIST})
28 | TARGET_LINK_LIBRARIES(hardseed curl pthread)
29 | INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin)
30 |
31 | #<<<<<<<<<<<<<<<<<<<<<<
32 |
33 |
34 | ## cygwin
35 | ##>>>>>>>>>>>>>>>>>>>>>>
36 |
37 | #SET(CMAKE_CXX_COMPILER "g++")
38 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -s -DCYGWIN")
39 | #SET(CMAKE_BUILD_TYPE release)
40 | #ADD_EXECUTABLE(hardseed ${SRC_LIST})
41 | #target_link_libraries(hardseed /bin/cygcurl-4.dll)
42 | #target_link_libraries(hardseed /lib/libiconv.a)
43 |
44 | ##<<<<<<<<<<<<<<<<<<<<<<
45 |
46 |
47 | ## osX
48 | ##>>>>>>>>>>>>>>>>>>>>>>
49 |
50 | #SET(CMAKE_CXX_COMPILER "g++")
51 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3")
52 | #SET(CMAKE_BUILD_TYPE release)
53 | #ADD_EXECUTABLE(hardseed ${SRC_LIST})
54 | #TARGET_LINK_LIBRARIES(hardseed curl pthread iconv)
55 | #INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin)
56 |
57 | ##<<<<<<<<<<<<<<<<<<<<<<
58 |
--------------------------------------------------------------------------------
/config/portals_list.json:
--------------------------------------------------------------------------------
1 | {
2 | "caoliu":"http://cl.bearhk.info/",
3 | "aicheng":"http://www.ac168.info/bt/"
4 | }
5 |
--------------------------------------------------------------------------------
/pic/hardseed.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/pic/hardseed.gif
--------------------------------------------------------------------------------
/pic/running.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/pic/running.gif
--------------------------------------------------------------------------------
/pic/seeds_and_pics.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/pic/seeds_and_pics.gif
--------------------------------------------------------------------------------
/src/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
1 | # This file is NOT licensed under the GPLv3, which is the license for the rest
2 | # of YouCompleteMe.
3 | #
4 | # Here's the license text for this file:
5 | #
6 | # This is free and unencumbered software released into the public domain.
7 | #
8 | # Anyone is free to copy, modify, publish, use, compile, sell, or
9 | # distribute this software, either in source code form or as a compiled
10 | # binary, for any purpose, commercial or non-commercial, and by any
11 | # means.
12 | #
13 | # In jurisdictions that recognize copyright laws, the author or authors
14 | # of this software dedicate any and all copyright interest in the
15 | # software to the public domain. We make this dedication for the benefit
16 | # of the public at large and to the detriment of our heirs and
17 | # successors. We intend this dedication to be an overt act of
18 | # relinquishment in perpetuity of all present and future rights to this
19 | # software under copyright law.
20 | #
21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27 | # OTHER DEALINGS IN THE SOFTWARE.
28 | #
29 | # For more information, please refer to
30 |
31 | import os
32 | import ycm_core
33 |
34 | # These are the compilation flags that will be used in case there's no
35 | # compilation database set (by default, one is not set).
36 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
37 | flags = [
38 | '-std=c++11',
39 | '-Werror',
40 | '-Weverything',
41 | '-Wno-documentation',
42 | '-Wno-deprecated-declarations',
43 | '-Wno-disabled-macro-expansion',
44 | '-Wno-float-equal',
45 | '-Wno-c++98-compat',
46 | '-Wno-c++98-compat-pedantic',
47 | '-Wno-global-constructors',
48 | '-Wno-exit-time-destructors',
49 | '-Wno-missing-prototypes',
50 | '-Wno-padded',
51 | '-Wno-old-style-cast',
52 | '-x',
53 | 'c++',
54 | '-I',
55 | '.',
56 | 'isystem',
57 | '/usr/include/',
58 | ]
59 |
60 |
61 | # Set this to the absolute path to the folder (NOT the file!) containing the
62 | # compile_commands.json file to use that instead of 'flags'. See here for
63 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
64 | #
65 | # Most projects will NOT need to set this to anything; you can just change the
66 | # 'flags' list of compilation flags. Notice that YCM itself uses that approach.
67 | compilation_database_folder = ''
68 |
69 | if compilation_database_folder:
70 | database = ycm_core.CompilationDatabase( compilation_database_folder )
71 | else:
72 | database = None
73 |
74 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
75 |
76 | def DirectoryOfThisScript():
77 | return os.path.dirname( os.path.abspath( __file__ ) )
78 |
79 |
80 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
81 | if not working_directory:
82 | return list( flags )
83 | new_flags = []
84 | make_next_absolute = False
85 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
86 | for flag in flags:
87 | new_flag = flag
88 |
89 | if make_next_absolute:
90 | make_next_absolute = False
91 | if not flag.startswith( '/' ):
92 | new_flag = os.path.join( working_directory, flag )
93 |
94 | for path_flag in path_flags:
95 | if flag == path_flag:
96 | make_next_absolute = True
97 | break
98 |
99 | if flag.startswith( path_flag ):
100 | path = flag[ len( path_flag ): ]
101 | new_flag = path_flag + os.path.join( working_directory, path )
102 | break
103 |
104 | if new_flag:
105 | new_flags.append( new_flag )
106 | return new_flags
107 |
108 |
109 | def IsHeaderFile( filename ):
110 | extension = os.path.splitext( filename )[ 1 ]
111 | return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
112 |
113 |
114 | def GetCompilationInfoForFile( filename ):
115 | # The compilation_commands.json file generated by CMake does not have entries
116 | # for header files. So we do our best by asking the db for flags for a
117 | # corresponding source file, if any. If one exists, the flags for that file
118 | # should be good enough.
119 | if IsHeaderFile( filename ):
120 | basename = os.path.splitext( filename )[ 0 ]
121 | for extension in SOURCE_EXTENSIONS:
122 | replacement_file = basename + extension
123 | if os.path.exists( replacement_file ):
124 | compilation_info = database.GetCompilationInfoForFile(
125 | replacement_file )
126 | if compilation_info.compiler_flags_:
127 | return compilation_info
128 | return None
129 | return database.GetCompilationInfoForFile( filename )
130 |
131 |
132 | def FlagsForFile( filename, **kwargs ):
133 | if database:
134 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a
135 | # python list, but a "list-like" StringVec object
136 | compilation_info = GetCompilationInfoForFile( filename )
137 | if not compilation_info:
138 | return None
139 |
140 | final_flags = MakeRelativePathsInFlagsAbsolute(
141 | compilation_info.compiler_flags_,
142 | compilation_info.compiler_working_dir_ )
143 | else:
144 | relative_to = DirectoryOfThisScript()
145 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
146 |
147 | return {
148 | 'flags': final_flags,
149 | 'do_cache': True
150 | }
151 |
--------------------------------------------------------------------------------
/src/.ycm_extra_conf.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/src/.ycm_extra_conf.pyc
--------------------------------------------------------------------------------
/src/lib/3rd/json11/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013 Dropbox, Inc.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/src/lib/3rd/json11/README.md:
--------------------------------------------------------------------------------
1 | json11
2 | ------
3 |
4 | json11 is a tiny JSON library for C++11, providing JSON parsing and serialization.
5 |
6 | The core object provided by the library is json11::Json. A Json object represents any JSON
7 | value: null, bool, number (int or double), string (std::string), array (std::vector), or
8 | object (std::map).
9 |
10 | Json objects act like values. They can be assigned, copied, moved, compared for equality or
11 | order, and so on. There are also helper methods Json::dump, to serialize a Json to a string, and
12 | Json::parse (static) to parse a std::string as a Json object.
13 |
14 | It's easy to make a JSON object with C++11's new initializer syntax:
15 |
16 | Json my_json = Json::object {
17 | { "key1", "value1" },
18 | { "key2", false },
19 | { "key3", Json::array { 1, 2, 3 } },
20 | };
21 | std::string json_str = my_json.dump();
22 |
23 | There are also implicit constructors that allow standard and user-defined types to be
24 | automatically converted to JSON. For example:
25 |
26 | class Point {
27 | public:
28 | int x;
29 | int y;
30 | Point (int x, int y) : x(x), y(y) {}
31 | Json to_json() const { return Json::array { x, y }; }
32 | };
33 |
34 | std::vector points = { { 1, 2 }, { 10, 20 }, { 100, 200 } };
35 | std::string points_json = Json(points).dump();
36 |
37 | JSON values can have their values queried and inspected:
38 |
39 | Json json = Json::array { Json::object { { "k", "v" } } };
40 | std::string str = json[0]["k"].string_value();
41 |
42 | More documentation is still to come. For now, see json11.hpp.
43 |
--------------------------------------------------------------------------------
/src/lib/3rd/json11/json11.cpp:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2013 Dropbox, Inc.
2 | *
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy
4 | * of this software and associated documentation files (the "Software"), to deal
5 | * in the Software without restriction, including without limitation the rights
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | * copies of the Software, and to permit persons to whom the Software is
8 | * furnished to do so, subject to the following conditions:
9 | *
10 | * The above copyright notice and this permission notice shall be included in
11 | * all copies or substantial portions of the Software.
12 | *
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | * THE SOFTWARE.
20 | */
21 |
22 | #include "json11.hpp"
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | namespace json11 {
29 |
30 | static const int max_depth = 200;
31 |
32 | using std::string;
33 | using std::vector;
34 | using std::map;
35 | using std::make_shared;
36 | using std::initializer_list;
37 | using std::move;
38 |
39 | /* * * * * * * * * * * * * * * * * * * *
40 | * Serialization
41 | */
42 |
43 | static void dump(std::nullptr_t, string &out) {
44 | out += "null";
45 | }
46 |
47 | static void dump(double value, string &out) {
48 | char buf[32];
49 | snprintf(buf, sizeof buf, "%.17g", value);
50 | out += buf;
51 | }
52 |
53 | static void dump(int value, string &out) {
54 | char buf[32];
55 | snprintf(buf, sizeof buf, "%d", value);
56 | out += buf;
57 | }
58 |
59 | static void dump(bool value, string &out) {
60 | out += value ? "true" : "false";
61 | }
62 |
63 | static void dump(const string &value, string &out) {
64 | out += '"';
65 | for (size_t i = 0; i < value.length(); i++) {
66 | const char ch = value[i];
67 | if (ch == '\\') {
68 | out += "\\\\";
69 | } else if (ch == '"') {
70 | out += "\\\"";
71 | } else if (ch == '\b') {
72 | out += "\\b";
73 | } else if (ch == '\f') {
74 | out += "\\f";
75 | } else if (ch == '\n') {
76 | out += "\\n";
77 | } else if (ch == '\r') {
78 | out += "\\r";
79 | } else if (ch == '\t') {
80 | out += "\\t";
81 | } else if (static_cast(ch) <= 0x1f) {
82 | char buf[8];
83 | snprintf(buf, sizeof buf, "\\u%04x", ch);
84 | out += buf;
85 | } else if (static_cast(ch) == 0xe2 && static_cast(value[i+1]) == 0x80
86 | && static_cast(value[i+2]) == 0xa8) {
87 | out += "\\u2028";
88 | i += 2;
89 | } else if (static_cast(ch) == 0xe2 && static_cast(value[i+1]) == 0x80
90 | && static_cast(value[i+2]) == 0xa9) {
91 | out += "\\u2029";
92 | i += 2;
93 | } else {
94 | out += ch;
95 | }
96 | }
97 | out += '"';
98 | }
99 |
100 | static void dump(const Json::array &values, string &out) {
101 | bool first = true;
102 | out += "[";
103 | for (const auto &value : values) {
104 | if (!first)
105 | out += ", ";
106 | value.dump(out);
107 | first = false;
108 | }
109 | out += "]";
110 | }
111 |
112 | static void dump(const Json::object &values, string &out) {
113 | bool first = true;
114 | out += "{";
115 | for (const auto &kv : values) {
116 | if (!first)
117 | out += ", ";
118 | dump(kv.first, out);
119 | out += ": ";
120 | kv.second.dump(out);
121 | first = false;
122 | }
123 | out += "}";
124 | }
125 |
126 | void Json::dump(string &out) const {
127 | m_ptr->dump(out);
128 | }
129 |
130 | /* * * * * * * * * * * * * * * * * * * *
131 | * Value wrappers
132 | */
133 |
134 | template
135 | class Value : public JsonValue {
136 | protected:
137 |
138 | // Constructors
139 | explicit Value(const T &value) : m_value(value) {}
140 | explicit Value(T &&value) : m_value(move(value)) {}
141 |
142 | // Get type tag
143 | Json::Type type() const override {
144 | return tag;
145 | }
146 |
147 | // Comparisons
148 | bool equals(const JsonValue * other) const override {
149 | return m_value == static_cast *>(other)->m_value;
150 | }
151 | bool less(const JsonValue * other) const override {
152 | return m_value < static_cast *>(other)->m_value;
153 | }
154 |
155 | const T m_value;
156 | void dump(string &out) const override { json11::dump(m_value, out); }
157 | };
158 |
159 | class JsonDouble final : public Value {
160 | double number_value() const override { return m_value; }
161 | int int_value() const override { return static_cast(m_value); }
162 | bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
163 | bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
164 | public:
165 | explicit JsonDouble(double value) : Value(value) {}
166 | };
167 |
168 | class JsonInt final : public Value {
169 | double number_value() const override { return m_value; }
170 | int int_value() const override { return m_value; }
171 | bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
172 | bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
173 | public:
174 | explicit JsonInt(int value) : Value(value) {}
175 | };
176 |
177 | class JsonBoolean final : public Value {
178 | bool bool_value() const override { return m_value; }
179 | public:
180 | explicit JsonBoolean(bool value) : Value(value) {}
181 | };
182 |
183 | class JsonString final : public Value {
184 | const string &string_value() const override { return m_value; }
185 | public:
186 | explicit JsonString(const string &value) : Value(value) {}
187 | explicit JsonString(string &&value) : Value(move(value)) {}
188 | };
189 |
190 | class JsonArray final : public Value {
191 | const Json::array &array_items() const override { return m_value; }
192 | const Json & operator[](size_t i) const override;
193 | public:
194 | explicit JsonArray(const Json::array &value) : Value(value) {}
195 | explicit JsonArray(Json::array &&value) : Value(move(value)) {}
196 | };
197 |
198 | class JsonObject final : public Value {
199 | const Json::object &object_items() const override { return m_value; }
200 | const Json & operator[](const string &key) const override;
201 | public:
202 | explicit JsonObject(const Json::object &value) : Value(value) {}
203 | explicit JsonObject(Json::object &&value) : Value(move(value)) {}
204 | };
205 |
206 | class JsonNull final : public Value {
207 | public:
208 | JsonNull() : Value(nullptr) {}
209 | };
210 |
211 | /* * * * * * * * * * * * * * * * * * * *
212 | * Static globals - static-init-safe
213 | */
214 | struct Statics {
215 | const std::shared_ptr null = make_shared();
216 | const std::shared_ptr t = make_shared(true);
217 | const std::shared_ptr f = make_shared(false);
218 | const string empty_string;
219 | const vector empty_vector;
220 | const map empty_map;
221 | Statics() {}
222 | };
223 |
224 | const Statics & statics() {
225 | static const Statics s {};
226 | return s;
227 | }
228 |
229 | const Json & static_null() {
230 | // This has to be separate, not in Statics, because Json() accesses statics().null.
231 | static const Json json_null;
232 | return json_null;
233 | }
234 |
235 | /* * * * * * * * * * * * * * * * * * * *
236 | * Constructors
237 | */
238 |
239 | Json::Json() noexcept : m_ptr(statics().null) {}
240 | Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
241 | Json::Json(double value) : m_ptr(make_shared(value)) {}
242 | Json::Json(int value) : m_ptr(make_shared(value)) {}
243 | Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
244 | Json::Json(const string &value) : m_ptr(make_shared(value)) {}
245 | Json::Json(string &&value) : m_ptr(make_shared(move(value))) {}
246 | Json::Json(const char * value) : m_ptr(make_shared(value)) {}
247 | Json::Json(const Json::array &values) : m_ptr(make_shared(values)) {}
248 | Json::Json(Json::array &&values) : m_ptr(make_shared(move(values))) {}
249 | Json::Json(const Json::object &values) : m_ptr(make_shared(values)) {}
250 | Json::Json(Json::object &&values) : m_ptr(make_shared(move(values))) {}
251 |
252 | /* * * * * * * * * * * * * * * * * * * *
253 | * Accessors
254 | */
255 |
256 | Json::Type Json::type() const { return m_ptr->type(); }
257 | double Json::number_value() const { return m_ptr->number_value(); }
258 | int Json::int_value() const { return m_ptr->int_value(); }
259 | bool Json::bool_value() const { return m_ptr->bool_value(); }
260 | const string & Json::string_value() const { return m_ptr->string_value(); }
261 | const vector & Json::array_items() const { return m_ptr->array_items(); }
262 | const map & Json::object_items() const { return m_ptr->object_items(); }
263 | const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
264 | const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
265 |
266 | double JsonValue::number_value() const { return 0; }
267 | int JsonValue::int_value() const { return 0; }
268 | bool JsonValue::bool_value() const { return false; }
269 | const string & JsonValue::string_value() const { return statics().empty_string; }
270 | const vector & JsonValue::array_items() const { return statics().empty_vector; }
271 | const map & JsonValue::object_items() const { return statics().empty_map; }
272 | const Json & JsonValue::operator[] (size_t) const { return static_null(); }
273 | const Json & JsonValue::operator[] (const string &) const { return static_null(); }
274 |
275 | const Json & JsonObject::operator[] (const string &key) const {
276 | auto iter = m_value.find(key);
277 | return (iter == m_value.end()) ? static_null() : iter->second;
278 | }
279 | const Json & JsonArray::operator[] (size_t i) const {
280 | if (i >= m_value.size()) return static_null();
281 | else return m_value[i];
282 | }
283 |
284 | /* * * * * * * * * * * * * * * * * * * *
285 | * Comparison
286 | */
287 |
288 | bool Json::operator== (const Json &other) const {
289 | if (m_ptr->type() != other.m_ptr->type())
290 | return false;
291 |
292 | return m_ptr->equals(other.m_ptr.get());
293 | }
294 |
295 | bool Json::operator< (const Json &other) const {
296 | if (m_ptr->type() != other.m_ptr->type())
297 | return m_ptr->type() < other.m_ptr->type();
298 |
299 | return m_ptr->less(other.m_ptr.get());
300 | }
301 |
302 | /* * * * * * * * * * * * * * * * * * * *
303 | * Parsing
304 | */
305 |
306 | /* esc(c)
307 | *
308 | * Format char c suitable for printing in an error message.
309 | */
310 | static inline string esc(char c) {
311 | char buf[12];
312 | if (static_cast(c) >= 0x20 && static_cast(c) <= 0x7f) {
313 | snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
314 | } else {
315 | snprintf(buf, sizeof buf, "(%d)", c);
316 | }
317 | return string(buf);
318 | }
319 |
320 | static inline bool in_range(long x, long lower, long upper) {
321 | return (x >= lower && x <= upper);
322 | }
323 |
324 | /* JsonParser
325 | *
326 | * Object that tracks all state of an in-progress parse.
327 | */
328 | struct JsonParser {
329 |
330 | /* State
331 | */
332 | const string &str;
333 | size_t i;
334 | string &err;
335 | bool failed;
336 |
337 | /* fail(msg, err_ret = Json())
338 | *
339 | * Mark this parse as failed.
340 | */
341 | Json fail(string &&msg) {
342 | return fail(move(msg), Json());
343 | }
344 |
345 | template
346 | T fail(string &&msg, const T err_ret) {
347 | if (!failed)
348 | err = std::move(msg);
349 | failed = true;
350 | return err_ret;
351 | }
352 |
353 | /* consume_whitespace()
354 | *
355 | * Advance until the current character is non-whitespace.
356 | */
357 | void consume_whitespace() {
358 | while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
359 | i++;
360 | }
361 |
362 | /* get_next_token()
363 | *
364 | * Return the next non-whitespace character. If the end of the input is reached,
365 | * flag an error and return 0.
366 | */
367 | char get_next_token() {
368 | consume_whitespace();
369 | if (i == str.size())
370 | return (char)(fail("unexpected end of input", 0));
371 |
372 | return str[i++];
373 | }
374 |
375 | /* encode_utf8(pt, out)
376 | *
377 | * Encode pt as UTF-8 and add it to out.
378 | */
379 | void encode_utf8(long pt, string & out) {
380 | if (pt < 0)
381 | return;
382 |
383 | if (pt < 0x80) {
384 | out += static_cast(pt);
385 | } else if (pt < 0x800) {
386 | out += static_cast((pt >> 6) | 0xC0);
387 | out += static_cast((pt & 0x3F) | 0x80);
388 | } else if (pt < 0x10000) {
389 | out += static_cast((pt >> 12) | 0xE0);
390 | out += static_cast(((pt >> 6) & 0x3F) | 0x80);
391 | out += static_cast((pt & 0x3F) | 0x80);
392 | } else {
393 | out += static_cast((pt >> 18) | 0xF0);
394 | out += static_cast(((pt >> 12) & 0x3F) | 0x80);
395 | out += static_cast(((pt >> 6) & 0x3F) | 0x80);
396 | out += static_cast((pt & 0x3F) | 0x80);
397 | }
398 | }
399 |
400 | /* parse_string()
401 | *
402 | * Parse a string, starting at the current position.
403 | */
404 | string parse_string() {
405 | string out;
406 | long last_escaped_codepoint = -1;
407 | while (true) {
408 | if (i == str.size())
409 | return fail("unexpected end of input in string", "");
410 |
411 | char ch = str[i++];
412 |
413 | if (ch == '"') {
414 | encode_utf8(last_escaped_codepoint, out);
415 | return out;
416 | }
417 |
418 | if (in_range(ch, 0, 0x1f))
419 | return fail("unescaped " + esc(ch) + " in string", "");
420 |
421 | // The usual case: non-escaped characters
422 | if (ch != '\\') {
423 | encode_utf8(last_escaped_codepoint, out);
424 | last_escaped_codepoint = -1;
425 | out += ch;
426 | continue;
427 | }
428 |
429 | // Handle escapes
430 | if (i == str.size())
431 | return fail("unexpected end of input in string", "");
432 |
433 | ch = str[i++];
434 |
435 | if (ch == 'u') {
436 | // Extract 4-byte escape sequence
437 | string esc = str.substr(i, 4);
438 | // Explicitly check length of the substring. The following loop
439 | // relies on std::string returning the terminating NUL when
440 | // accessing str[length]. Checking here reduces brittleness.
441 | if (esc.length() < 4) {
442 | return fail("bad \\u escape: " + esc, "");
443 | }
444 | for (int j = 0; j < 4; j++) {
445 | if (!in_range(esc[(unsigned int)j], 'a', 'f') && !in_range(esc[(unsigned int)j], 'A', 'F')
446 | && !in_range(esc[(unsigned int)j], '0', '9'))
447 | return fail("bad \\u escape: " + esc, "");
448 | }
449 |
450 | long codepoint = strtol(esc.data(), nullptr, 16);
451 |
452 | // JSON specifies that characters outside the BMP shall be encoded as a pair
453 | // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
454 | // whether we're in the middle of such a beast: the previous codepoint was an
455 | // escaped lead (high) surrogate, and this is a trail (low) surrogate.
456 | if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
457 | && in_range(codepoint, 0xDC00, 0xDFFF)) {
458 | // Reassemble the two surrogate pairs into one astral-plane character, per
459 | // the UTF-16 algorithm.
460 | encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
461 | | (codepoint - 0xDC00)) + 0x10000, out);
462 | last_escaped_codepoint = -1;
463 | } else {
464 | encode_utf8(last_escaped_codepoint, out);
465 | last_escaped_codepoint = codepoint;
466 | }
467 |
468 | i += 4;
469 | continue;
470 | }
471 |
472 | encode_utf8(last_escaped_codepoint, out);
473 | last_escaped_codepoint = -1;
474 |
475 | if (ch == 'b') {
476 | out += '\b';
477 | } else if (ch == 'f') {
478 | out += '\f';
479 | } else if (ch == 'n') {
480 | out += '\n';
481 | } else if (ch == 'r') {
482 | out += '\r';
483 | } else if (ch == 't') {
484 | out += '\t';
485 | } else if (ch == '"' || ch == '\\' || ch == '/') {
486 | out += ch;
487 | } else {
488 | return fail("invalid escape character " + esc(ch), "");
489 | }
490 | }
491 | }
492 |
493 | /* parse_number()
494 | *
495 | * Parse a double.
496 | */
497 | Json parse_number() {
498 | size_t start_pos = i;
499 |
500 | if (str[i] == '-')
501 | i++;
502 |
503 | // Integer part
504 | if (str[i] == '0') {
505 | i++;
506 | if (in_range(str[i], '0', '9'))
507 | return fail("leading 0s not permitted in numbers");
508 | } else if (in_range(str[i], '1', '9')) {
509 | i++;
510 | while (in_range(str[i], '0', '9'))
511 | i++;
512 | } else {
513 | return fail("invalid " + esc(str[i]) + " in number");
514 | }
515 |
516 | if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
517 | && (i - start_pos) <= static_cast(std::numeric_limits::digits10)) {
518 | return std::atoi(str.c_str() + start_pos);
519 | }
520 |
521 | // Decimal part
522 | if (str[i] == '.') {
523 | i++;
524 | if (!in_range(str[i], '0', '9'))
525 | return fail("at least one digit required in fractional part");
526 |
527 | while (in_range(str[i], '0', '9'))
528 | i++;
529 | }
530 |
531 | // Exponent part
532 | if (str[i] == 'e' || str[i] == 'E') {
533 | i++;
534 |
535 | if (str[i] == '+' || str[i] == '-')
536 | i++;
537 |
538 | if (!in_range(str[i], '0', '9'))
539 | return fail("at least one digit required in exponent");
540 |
541 | while (in_range(str[i], '0', '9'))
542 | i++;
543 | }
544 |
545 | return std::strtod(str.c_str() + start_pos, nullptr);
546 | }
547 |
548 | /* expect(str, res)
549 | *
550 | * Expect that 'str' starts at the character that was just read. If it does, advance
551 | * the input and return res. If not, flag an error.
552 | */
553 | Json expect(const string &expected, Json res) {
554 | assert(i != 0);
555 | i--;
556 | if (str.compare(i, expected.length(), expected) == 0) {
557 | i += expected.length();
558 | return res;
559 | } else {
560 | return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
561 | }
562 | }
563 |
564 | /* parse_json()
565 | *
566 | * Parse a JSON object.
567 | */
568 | Json parse_json(int depth) {
569 | if (depth > max_depth) {
570 | return fail("exceeded maximum nesting depth");
571 | }
572 |
573 | char ch = get_next_token();
574 | if (failed)
575 | return Json();
576 |
577 | if (ch == '-' || (ch >= '0' && ch <= '9')) {
578 | i--;
579 | return parse_number();
580 | }
581 |
582 | if (ch == 't')
583 | return expect("true", true);
584 |
585 | if (ch == 'f')
586 | return expect("false", false);
587 |
588 | if (ch == 'n')
589 | return expect("null", Json());
590 |
591 | if (ch == '"')
592 | return parse_string();
593 |
594 | if (ch == '{') {
595 | map data;
596 | ch = get_next_token();
597 | if (ch == '}')
598 | return data;
599 |
600 | while (1) {
601 | if (ch != '"')
602 | return fail("expected '\"' in object, got " + esc(ch));
603 |
604 | string key = parse_string();
605 | if (failed)
606 | return Json();
607 |
608 | ch = get_next_token();
609 | if (ch != ':')
610 | return fail("expected ':' in object, got " + esc(ch));
611 |
612 | data[std::move(key)] = parse_json(depth + 1);
613 | if (failed)
614 | return Json();
615 |
616 | ch = get_next_token();
617 | if (ch == '}')
618 | break;
619 | if (ch != ',')
620 | return fail("expected ',' in object, got " + esc(ch));
621 |
622 | ch = get_next_token();
623 | }
624 | return data;
625 | }
626 |
627 | if (ch == '[') {
628 | vector data;
629 | ch = get_next_token();
630 | if (ch == ']')
631 | return data;
632 |
633 | while (1) {
634 | i--;
635 | data.push_back(parse_json(depth + 1));
636 | if (failed)
637 | return Json();
638 |
639 | ch = get_next_token();
640 | if (ch == ']')
641 | break;
642 | if (ch != ',')
643 | return fail("expected ',' in list, got " + esc(ch));
644 |
645 | ch = get_next_token();
646 | (void)ch;
647 | }
648 | return data;
649 | }
650 |
651 | return fail("expected value, got " + esc(ch));
652 | }
653 | };
654 |
655 | Json Json::parse(const string &in, string &err) {
656 | JsonParser parser { in, 0, err, false };
657 | Json result = parser.parse_json(0);
658 |
659 | // Check for any trailing garbage
660 | parser.consume_whitespace();
661 | if (parser.i != in.size())
662 | return parser.fail("unexpected trailing " + esc(in[parser.i]));
663 |
664 | return result;
665 | }
666 |
667 | // Documented in json11.hpp
668 | vector Json::parse_multi(const string &in, string &err) {
669 | JsonParser parser { in, 0, err, false };
670 |
671 | vector json_vec;
672 | while (parser.i != in.size() && !parser.failed) {
673 | json_vec.push_back(parser.parse_json(0));
674 | // Check for another object
675 | parser.consume_whitespace();
676 | }
677 | return json_vec;
678 | }
679 |
680 | /* * * * * * * * * * * * * * * * * * * *
681 | * Shape-checking
682 | */
683 |
684 | bool Json::has_shape(const shape & types, string & err) const {
685 | if (!is_object()) {
686 | err = "expected JSON object, got " + dump();
687 | return false;
688 | }
689 |
690 | for (auto & item : types) {
691 | if ((*this)[item.first].type() != item.second) {
692 | err = "bad type for " + item.first + " in " + dump();
693 | return false;
694 | }
695 | }
696 |
697 | return true;
698 | }
699 |
700 | } // namespace json11
701 |
--------------------------------------------------------------------------------
/src/lib/3rd/json11/json11.hpp:
--------------------------------------------------------------------------------
1 | /* json11
2 | *
3 | * json11 is a tiny JSON library for C++11, providing JSON parsing and serialization.
4 | *
5 | * The core object provided by the library is json11::Json. A Json object represents any JSON
6 | * value: null, bool, number (int or double), string (std::string), array (std::vector), or
7 | * object (std::map).
8 | *
9 | * Json objects act like values: they can be assigned, copied, moved, compared for equality or
10 | * order, etc. There are also helper methods Json::dump, to serialize a Json to a string, and
11 | * Json::parse (static) to parse a std::string as a Json object.
12 | *
13 | * Internally, the various types of Json object are represented by the JsonValue class
14 | * hierarchy.
15 | *
16 | * A note on numbers - JSON specifies the syntax of number formatting but not its semantics,
17 | * so some JSON implementations distinguish between integers and floating-point numbers, while
18 | * some don't. In json11, we choose the latter. Because some JSON implementations (namely
19 | * Javascript itself) treat all numbers as the same type, distinguishing the two leads
20 | * to JSON that will be *silently* changed by a round-trip through those implementations.
21 | * Dangerous! To avoid that risk, json11 stores all numbers as double internally, but also
22 | * provides integer helpers.
23 | *
24 | * Fortunately, double-precision IEEE754 ('double') can precisely store any integer in the
25 | * range +/-2^53, which includes every 'int' on most systems. (Timestamps often use int64
26 | * or long long to avoid the Y2038K problem; a double storing microseconds since some epoch
27 | * will be exact for +/- 275 years.)
28 | */
29 |
30 | /* Copyright (c) 2013 Dropbox, Inc.
31 | *
32 | * Permission is hereby granted, free of charge, to any person obtaining a copy
33 | * of this software and associated documentation files (the "Software"), to deal
34 | * in the Software without restriction, including without limitation the rights
35 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
36 | * copies of the Software, and to permit persons to whom the Software is
37 | * furnished to do so, subject to the following conditions:
38 | *
39 | * The above copyright notice and this permission notice shall be included in
40 | * all copies or substantial portions of the Software.
41 | *
42 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
43 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
44 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
45 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
46 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
47 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
48 | * THE SOFTWARE.
49 | */
50 |
51 | #pragma once
52 |
53 | #include
54 | #include
55 | #include
56 | #include
57 | #include
58 |
59 | namespace json11 {
60 |
61 | class JsonValue;
62 |
63 | class Json final {
64 | public:
65 | // Types
66 | enum Type {
67 | NUL, NUMBER, BOOL, STRING, ARRAY, OBJECT
68 | };
69 |
70 | // Array and object typedefs
71 | typedef std::vector array;
72 | typedef std::map object;
73 |
74 | // Constructors for the various types of JSON value.
75 | Json() noexcept; // NUL
76 | Json(std::nullptr_t) noexcept; // NUL
77 | Json(double value); // NUMBER
78 | Json(int value); // NUMBER
79 | Json(bool value); // BOOL
80 | Json(const std::string &value); // STRING
81 | Json(std::string &&value); // STRING
82 | Json(const char * value); // STRING
83 | Json(const array &values); // ARRAY
84 | Json(array &&values); // ARRAY
85 | Json(const object &values); // OBJECT
86 | Json(object &&values); // OBJECT
87 |
88 | // Implicit constructor: anything with a to_json() function.
89 | template
90 | Json(const T & t) : Json(t.to_json()) {}
91 |
92 | // Implicit constructor: map-like objects (std::map, std::unordered_map, etc)
93 | template ::value
95 | && std::is_constructible::value,
96 | int>::type = 0>
97 | Json(const M & m) : Json(object(m.begin(), m.end())) {}
98 |
99 | // Implicit constructor: vector-like objects (std::list, std::vector, std::set, etc)
100 | template ::value,
102 | int>::type = 0>
103 | Json(const V & v) : Json(array(v.begin(), v.end())) {}
104 |
105 | // This prevents Json(some_pointer) from accidentally producing a bool. Use
106 | // Json(bool(some_pointer)) if that behavior is desired.
107 | Json(void *) = delete;
108 |
109 | // Accessors
110 | Type type() const;
111 |
112 | bool is_null() const { return type() == NUL; }
113 | bool is_number() const { return type() == NUMBER; }
114 | bool is_bool() const { return type() == BOOL; }
115 | bool is_string() const { return type() == STRING; }
116 | bool is_array() const { return type() == ARRAY; }
117 | bool is_object() const { return type() == OBJECT; }
118 |
119 | // Return the enclosed value if this is a number, 0 otherwise. Note that json11 does not
120 | // distinguish between integer and non-integer numbers - number_value() and int_value()
121 | // can both be applied to a NUMBER-typed object.
122 | double number_value() const;
123 | int int_value() const;
124 |
125 | // Return the enclosed value if this is a boolean, false otherwise.
126 | bool bool_value() const;
127 | // Return the enclosed string if this is a string, "" otherwise.
128 | const std::string &string_value() const;
129 | // Return the enclosed std::vector if this is an array, or an empty vector otherwise.
130 | const array &array_items() const;
131 | // Return the enclosed std::map if this is an object, or an empty map otherwise.
132 | const object &object_items() const;
133 |
134 | // Return a reference to arr[i] if this is an array, Json() otherwise.
135 | const Json & operator[](size_t i) const;
136 | // Return a reference to obj[key] if this is an object, Json() otherwise.
137 | const Json & operator[](const std::string &key) const;
138 |
139 | // Serialize.
140 | void dump(std::string &out) const;
141 | std::string dump() const {
142 | std::string out;
143 | dump(out);
144 | return out;
145 | }
146 |
147 | // Parse. If parse fails, return Json() and assign an error message to err.
148 | static Json parse(const std::string & in, std::string & err);
149 | static Json parse(const char * in, std::string & err) {
150 | if (in) {
151 | return parse(std::string(in), err);
152 | } else {
153 | err = "null input";
154 | return nullptr;
155 | }
156 | }
157 | // Parse multiple objects, concatenated or separated by whitespace
158 | static std::vector parse_multi(const std::string & in, std::string & err);
159 |
160 | bool operator== (const Json &rhs) const;
161 | bool operator< (const Json &rhs) const;
162 | bool operator!= (const Json &rhs) const { return !(*this == rhs); }
163 | bool operator<= (const Json &rhs) const { return !(rhs < *this); }
164 | bool operator> (const Json &rhs) const { return (rhs < *this); }
165 | bool operator>= (const Json &rhs) const { return !(*this < rhs); }
166 |
167 | /* has_shape(types, err)
168 | *
169 | * Return true if this is a JSON object and, for each item in types, has a field of
170 | * the given type. If not, return false and set err to a descriptive message.
171 | */
172 | typedef std::initializer_list> shape;
173 | bool has_shape(const shape & types, std::string & err) const;
174 |
175 | private:
176 | std::shared_ptr m_ptr;
177 | };
178 |
179 | // Internal class hierarchy - JsonValue objects are not exposed to users of this API.
180 | class JsonValue {
181 | protected:
182 | friend class Json;
183 | friend class JsonInt;
184 | friend class JsonDouble;
185 | virtual Json::Type type() const = 0;
186 | virtual bool equals(const JsonValue * other) const = 0;
187 | virtual bool less(const JsonValue * other) const = 0;
188 | virtual void dump(std::string &out) const = 0;
189 | virtual double number_value() const;
190 | virtual int int_value() const;
191 | virtual bool bool_value() const;
192 | virtual const std::string &string_value() const;
193 | virtual const Json::array &array_items() const;
194 | virtual const Json &operator[](size_t i) const;
195 | virtual const Json::object &object_items() const;
196 | virtual const Json &operator[](const std::string &key) const;
197 | virtual ~JsonValue() {}
198 | };
199 |
200 | } // namespace json11
201 |
--------------------------------------------------------------------------------
/src/lib/3rd/json11/test.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "json11.hpp"
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | using namespace json11;
12 | using std::string;
13 |
14 | // Check that Json has the properties we want.
15 | #include
16 | #define CHECK_TRAIT(x) static_assert(std::x::value, #x)
17 | CHECK_TRAIT(is_nothrow_constructible);
18 | CHECK_TRAIT(is_nothrow_default_constructible);
19 | CHECK_TRAIT(is_copy_constructible);
20 | CHECK_TRAIT(is_nothrow_move_constructible);
21 | CHECK_TRAIT(is_copy_assignable);
22 | CHECK_TRAIT(is_nothrow_move_assignable);
23 | CHECK_TRAIT(is_nothrow_destructible);
24 |
25 | void parse_from_stdin() {
26 | string buf;
27 | while (!std::cin.eof()) buf += std::cin.get();
28 |
29 | string err;
30 | auto json = Json::parse(buf, err);
31 | if (!err.empty()) {
32 | printf("Failed: %s\n", err.c_str());
33 | } else {
34 | printf("Result: %s\n", json.dump().c_str());
35 | }
36 | }
37 |
38 | int main(int argc, char **argv) {
39 | if (argc == 2 && argv[1] == string("--stdin")) {
40 | parse_from_stdin();
41 | return 0;
42 | }
43 |
44 | const string simple_test =
45 | R"({"k1":"v1", "k2":42, "k3":["a",123,true,false,null]})";
46 |
47 | string err;
48 | auto json = Json::parse(simple_test, err);
49 |
50 | std::cout << "k1: " << json["k1"].string_value() << "\n";
51 | std::cout << "k3: " << json["k3"].dump() << "\n";
52 |
53 | for (auto &k : json["k3"].array_items()) {
54 | std::cout << " - " << k.dump() << "\n";
55 | }
56 |
57 | std::list l1 { 1, 2, 3 };
58 | std::vector l2 { 1, 2, 3 };
59 | std::set l3 { 1, 2, 3 };
60 | assert(Json(l1) == Json(l2));
61 | assert(Json(l2) == Json(l3));
62 |
63 | std::map m1 { { "k1", "v1" }, { "k2", "v2" } };
64 | std::unordered_map m2 { { "k1", "v1" }, { "k2", "v2" } };
65 | assert(Json(m1) == Json(m2));
66 |
67 | // Json literals
68 | Json obj = Json::object({
69 | { "k1", "v1" },
70 | { "k2", 42.0 },
71 | { "k3", Json::array({ "a", 123.0, true, false, nullptr }) },
72 | });
73 |
74 | std::cout << "obj: " << obj.dump() << "\n";
75 |
76 | assert(Json("a").number_value() == 0);
77 | assert(Json("a").string_value() == "a");
78 | assert(Json().number_value() == 0);
79 |
80 | assert(obj == json);
81 | assert(Json(42) == Json(42.0));
82 | assert(Json(42) != Json(42.1));
83 |
84 | const string unicode_escape_test =
85 | R"([ "blah\ud83d\udca9blah\ud83dblah\udca9blah\u0000blah\u1234" ])";
86 |
87 | const char utf8[] = "blah" "\xf0\x9f\x92\xa9" "blah" "\xed\xa0\xbd" "blah"
88 | "\xed\xb2\xa9" "blah" "\0" "blah" "\xe1\x88\xb4";
89 |
90 | Json uni = Json::parse(unicode_escape_test, err);
91 | assert(uni[0].string_value().size() == (sizeof utf8) - 1);
92 | assert(memcmp(uni[0].string_value().data(), utf8, sizeof utf8) == 0);
93 |
94 | Json my_json = Json::object {
95 | { "key1", "value1" },
96 | { "key2", false },
97 | { "key3", Json::array { 1, 2, 3 } },
98 | };
99 | std::string json_str = my_json.dump();
100 | printf("%s\n", json_str.c_str());
101 |
102 | class Point {
103 | public:
104 | int x;
105 | int y;
106 | Point (int x, int y) : x(x), y(y) {}
107 | Json to_json() const { return Json::array { x, y }; }
108 | };
109 |
110 | std::vector points = { { 1, 2 }, { 10, 20 }, { 100, 200 } };
111 | std::string points_json = Json(points).dump();
112 | printf("%s\n", points_json.c_str());
113 | }
114 |
--------------------------------------------------------------------------------
/src/lib/helper/CmdlineOption.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "CmdlineOption.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | using namespace std;
11 |
12 | static bool
13 | isOption (const string& str)
14 | {
15 | return( str.size() >= 3 && // the shortest option "--x"
16 | '-' == str[0] &&
17 | '-' == str[1] &&
18 | '-' != str[2] );
19 | }
20 |
21 | // cmdname --foo aa, for example, --foo is an option, aa is an argument.
22 | // convention about command line option:
23 | // 0) option must begin with -- (so, the shortest option --x has three characters), and the argument cannot begin with --;
24 | // 1) an argument must follow after an option;
25 | // 2) an option can follow as an argument or not, E.G., some option for true or false.
26 | // one option by one more arguments? E.G., --bar a b c d. It's ok.
27 | CmdlineOption::CmdlineOption (unsigned argc, char* argv[])
28 | {
29 | if (argc < 2) {
30 | return;
31 | }
32 |
33 | vector raw_options_list(argv + 1, argv + argc);
34 |
35 | string last_option;
36 | for (const auto& e : raw_options_list) {
37 | if (isOption(e)) {
38 | options_and_arguments_list_[e];
39 | last_option = e;
40 | } else {
41 | if (!last_option.empty()) {
42 | options_and_arguments_list_[last_option].push_back(e);
43 | }
44 | }
45 | }
46 |
47 | //// DEBUG. show the result of parsing command options
48 | //for (const auto& e : options_and_arguments_list_) {
49 | //const vector& arguments_list = e.second;
50 | //cout << e.first << "(" << arguments_list.size() << "): ";
51 | //copy(e.second.cbegin(), e.second.cend(), ostream_iterator(cout, ","));
52 | //cout << endl;
53 | //}
54 | }
55 |
56 | CmdlineOption::~CmdlineOption ()
57 | {
58 | ;
59 | }
60 |
61 | bool
62 | CmdlineOption::hasOption (const string& option) const
63 | {
64 | return(options_and_arguments_list_.cend() != options_and_arguments_list_.find(option));
65 | }
66 |
67 | const vector&
68 | CmdlineOption::getArgumentsList (const string& option)
69 | {
70 | static const vector empty_arguments_list;
71 | return(hasOption(option) ? options_and_arguments_list_[option] : empty_arguments_list);
72 | }
73 |
74 |
--------------------------------------------------------------------------------
/src/lib/helper/CmdlineOption.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 | #include
5 | #include
6 | #include
7 |
8 | using std::string;
9 | using std::pair;
10 | using std::vector;
11 | using std::unordered_map;
12 |
13 | class CmdlineOption
14 | {
15 | public:
16 | CmdlineOption (unsigned argc, char* argv[]);
17 | virtual ~CmdlineOption ();
18 | bool hasOption (const string& option) const;
19 | const vector& getArgumentsList (const string& option);
20 |
21 | private:
22 | unordered_map> options_and_arguments_list_;
23 | };
24 |
25 |
--------------------------------------------------------------------------------
/src/lib/helper/Misc.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "Misc.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | using std::bitset;
16 | using std::cout;
17 | using std::cerr;
18 | using std::endl;
19 | using std::make_pair;
20 | using std::ostream_iterator;
21 |
22 | // split raw string to more sub-str by token-chars.
23 | // note:
24 | // 0) case sensitive;
25 | // 1) if there are consecutive two token-chars in raw string, splitStr()
26 | // will make a empty sub-str into splited_substr_list.
27 | void
28 | splitStr ( const string& str,
29 | const string& tokens_list,
30 | vector& splited_substr_list,
31 | vector& appeared_tokens_list )
32 | {
33 | size_t begin_pos = 0, end_pos;
34 | while (begin_pos < str.size()) {
35 | const auto iter_token = find_first_of( str.cbegin() + (int)begin_pos, str.cend(),
36 | tokens_list.cbegin(), tokens_list.cend() );
37 | if (str.cend() == iter_token) {
38 | splited_substr_list.push_back(str.substr(begin_pos));
39 | break;
40 | }
41 |
42 | appeared_tokens_list.push_back(*iter_token);
43 | end_pos = (unsigned)(iter_token - str.cbegin());
44 | splited_substr_list.push_back(str.substr(begin_pos, end_pos - begin_pos));
45 |
46 | begin_pos = end_pos + 1;
47 | }
48 |
49 | if (splited_substr_list[0].empty()) {
50 | splited_substr_list.erase(splited_substr_list.begin());
51 | }
52 | }
53 |
54 | // first return is the string between keyword_begin and keyword_end;
55 | // second return is end_pos + keyword_end.size().
56 | pair
57 | fetchStringBetweenKeywords ( const string& txt,
58 | const string& keyword_begin,
59 | const string& keyword_end,
60 | size_t from_pos )
61 | {
62 | const auto begin_pos = txt.find(keyword_begin, from_pos);
63 | if (string::npos == begin_pos) {
64 | //cerr << "WARNING! fetchStringBetweenKeywords() CANNOT find the keyword \"" << kyeword_begin << "\"" << endl;
65 | return(make_pair("", 0));
66 | }
67 | const auto end_pos = txt.find(keyword_end, begin_pos + keyword_begin.size());
68 | if (string::npos == end_pos) {
69 | //cerr << "WARNING! fetchStringBetweenKeywords() CANNOT find the keyword \"" << kyeword_end << "\"" << endl;
70 | return(make_pair("", 0));
71 | }
72 |
73 |
74 | return(make_pair( txt.substr(begin_pos + keyword_begin.size(), end_pos - begin_pos - keyword_begin.size()),
75 | end_pos + keyword_end.size() ));
76 | }
77 |
78 | // get file size by FILE*.
79 | // return -1 if failure
80 | long
81 | getFileSize (FILE* fs)
82 | {
83 | // backup current offset
84 | long offset_bak = ftell(fs);
85 |
86 | // get the filesize
87 | fseek(fs, 0, SEEK_END);
88 | long file_size = ftell(fs);
89 |
90 | // restore last offset
91 | fseek(fs, offset_bak, SEEK_SET);
92 |
93 |
94 | return(file_size);
95 | }
96 |
97 | // process_name + process_id + thread_id + rand
98 | extern char *__progname;
99 | string
100 | makeRandomFilename (void)
101 | {
102 | static bool b_first = true;
103 | if (b_first) {
104 | srand((unsigned)time(NULL));
105 | b_first = false;
106 | }
107 |
108 | const string& filename = string(__progname) + "_" +
109 | convNumToStr(getpid()) + "_"
110 | + convNumToStr(pthread_self()) + "_"
111 | + convNumToStr(rand());
112 |
113 | #ifdef CYGWIN
114 | return("c:\\" + filename);
115 | #else
116 | return("/tmp/" + filename);
117 | #endif
118 | }
119 |
120 | // unicode 与 UTF8 间转换规则:
121 | // =================================================================================
122 | // | unicode 符号范围 | UTF8编码方式
123 | // n | (十六进制) | (二进制)
124 | // --+-----------------------+------------------------------------------------------
125 | // 1 | 0000 0000 - 0000 007F | 0xxxxxxx
126 | // 2 | 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
127 | // 3 | 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
128 | // 4 | 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
129 | // 5 | 0020 0000 - 03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
130 | // 6 | 0400 0000 - 7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
131 | // =================================================================================
132 | // UTF8 中剩余的 x 用其 unicode 中各位从右向左填充,若还有多余的位则 0 填。如,"严"
133 | // 的 unicode 是 4E25(100111000100101),根据上表,可以发现 4E25 处在第三行的范围内
134 | // (0000 0800-0000 FFFF),"严"的 UTF8 编码需要三个字节,即格式是
135 | // "1110xxxx 10xxxxxx 10xxxxxx",然后,从"严"的最后一个二进制位开始,依次从后向前填
136 | // 入格式中的 x,多出的位补 0。这样就得到了,"严"的 UTF8 编码是
137 | // "11100100 10111000 10100101",转换成十六进制就是 E4B8A5。
138 | //
139 | // 返回值:由于 UTF8 是变长编码格式,所以,需要返回转换后的 UTF8 编码有效字节数,以
140 | // 具体值。
141 | //
142 | // 注意:
143 | // 0)假定小尾存储;
144 | // 1)unicode 最多需要 4 个字节,UTF8 最多需要 6 个字节,所以,这就决定了型参类
145 | // 型必须为 unsigned int,返回值类型为 unsigned long long;
146 | //
147 | // 更多细节参见:http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html
148 | pair
149 | convertUnicodeToUtf8 (unsigned int unicode)
150 | {
151 | if (unicode <= 0x0000007F) {
152 | return(make_pair(1, unicode));
153 | } else if (0x00000080 <= unicode && unicode <= 0x000007FF) {
154 | bitset<16> unicode_bits(unicode);
155 | const string unicode_bits_str = unicode_bits.to_string();
156 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend());
157 | unicode_bits_str_reverse.insert(6, "00");
158 | unicode_bits_str_reverse.insert(6 + 2 + 5, "000");
159 | unicode_bits_str_reverse.resize(16);
160 | const bitset<16> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend()));
161 |
162 | bitset<16> utf8_lower("1100000010000000");
163 |
164 | bitset<16> utf8_bits = utf8_lower | masker;
165 |
166 | return(make_pair(2, utf8_bits.to_ullong()));
167 | } else if (0x00000800 <= unicode && unicode <= 0x0000FFFF) {
168 | bitset<16> unicode_bits(unicode);
169 | const string unicode_bits_str = unicode_bits.to_string();
170 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend());
171 | unicode_bits_str_reverse.insert(6, "00");
172 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00");
173 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 4, "0000");
174 | unicode_bits_str_reverse.resize(24);
175 | const bitset<24> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend()));
176 |
177 | bitset<24> utf8_lower("111000001000000010000000");
178 |
179 | bitset<24> utf8_bits = utf8_lower | masker;
180 |
181 | return(make_pair(3, utf8_bits.to_ullong()));
182 | } else if (0x00010000 <= unicode && unicode <= 0x0010FFFF) {
183 | bitset<32> unicode_bits(unicode);
184 | const string unicode_bits_str = unicode_bits.to_string();
185 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend());
186 | unicode_bits_str_reverse.insert(6, "00");
187 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00");
188 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6, "00");
189 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 3, "000");
190 | unicode_bits_str_reverse.resize(32);
191 | const bitset<32> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend()));
192 |
193 | bitset<32> utf8_lower("11110000100000001000000010000000");
194 |
195 | bitset<32> utf8_bits = utf8_lower | masker;
196 |
197 | return(make_pair(4, utf8_bits.to_ullong()));
198 | } else if (0x00200000 <= unicode && unicode <= 0x03FFFFFF) {
199 | bitset<32> unicode_bits(unicode);
200 | const string unicode_bits_str = unicode_bits.to_string();
201 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend());
202 | unicode_bits_str_reverse.insert(6, "00");
203 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00");
204 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6, "00");
205 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6, "00");
206 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 2, "00");
207 | unicode_bits_str_reverse.resize(40);
208 | const bitset<40> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend()));
209 |
210 | bitset<40> utf8_lower("1111100010000000100000001000000010000000");
211 |
212 | bitset<40> utf8_bits = utf8_lower | masker;
213 |
214 | return(make_pair(5, utf8_bits.to_ullong()));
215 | } else if (0x04000000 <= unicode && unicode <= 0x7FFFFFFF) {
216 | bitset<64> unicode_bits(unicode);
217 | const string unicode_bits_str = unicode_bits.to_string();
218 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend());
219 | unicode_bits_str_reverse.insert(6, "00");
220 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00");
221 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6, "00");
222 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6, "00");
223 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 6, "00");
224 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 1, "0");
225 | unicode_bits_str_reverse.resize(48);
226 | const bitset<48> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend()));
227 |
228 | bitset<48> utf8_lower("111111001000000010000000100000001000000010000000");
229 |
230 | bitset<48> utf8_bits = utf8_lower | masker;
231 |
232 | return(make_pair(6, utf8_bits.to_ullong()));
233 | } else {
234 | cerr << "WARNING! " << unicode << "is not a vaild unicode. " << endl;
235 | return(make_pair(0, 0));
236 | }
237 | }
238 |
239 | bool
240 | wait_cmd ( const string& cmd,
241 | const vector& argv,
242 | int* p_exitCode,
243 | bool b_echo )
244 | {
245 | bool b_executed_success = false;
246 | char** argv_tmp;
247 |
248 |
249 | // 回显命令行
250 | if (b_echo) {
251 | copy(argv.cbegin(), argv.cend(), ostream_iterator(cout, " "));
252 | cout << endl;
253 | }
254 |
255 | // 将vector中的命令行参数转换为char* []
256 | argv_tmp = new char* [argv.size() + 1]; // !!!子进程中是否产生内存泄漏??
257 | for (size_t i = 0; i != argv.size(); ++i) {
258 | argv_tmp[i] = const_cast(argv[i].c_str());
259 | }
260 | argv_tmp[argv.size()] = NULL;
261 |
262 | // 运行并等待子进程
263 | pid_t pid = fork();
264 | if (0 == pid) { // 子进程
265 | execvp(cmd.c_str(), argv_tmp);
266 | } else if (pid > 0) { // 父进程
267 | int status;
268 | waitpid(pid, &status, 0);
269 | // 命令正常结束。即通过exit()正常退出,而非通过kill异常结束,与exit()的返回值无关
270 | if (WIFEXITED(status)) {
271 | int exit_code = WEXITSTATUS(status); // 命令通过正常exit()结束时的返回值
272 |
273 | if (EXIT_SUCCESS == exit_code) {
274 | b_executed_success = true;
275 | }
276 |
277 | if (NULL != p_exitCode) {
278 | *p_exitCode = exit_code;
279 | }
280 | }
281 | }
282 |
283 | delete [] argv_tmp;
284 |
285 | return (b_executed_success );
286 | }
287 |
288 |
--------------------------------------------------------------------------------
/src/lib/helper/Misc.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | using std::string;
10 | using std::ostringstream;
11 | using std::vector;
12 | using std::pair;
13 |
14 |
15 | // why not std::to_string()?
16 | // you know, I have to port this linux code to win32 by cygwin, and there
17 | // is a bug on cygwin case it cannot find to_string(), so, I must do it
18 | // by myself
19 | template
20 | string
21 | convNumToStr (T num)
22 | {
23 | ostringstream oss;
24 | oss << num;
25 |
26 | return(oss.str());
27 | }
28 |
29 | // split raw string to more sub-str by token-chars.
30 | void
31 | splitStr ( const string& str,
32 | const string& tokens_list,
33 | vector& splited_substr_list,
34 | vector& appeared_tokens_list );
35 |
36 | // fetch string from txt betwen keyword_begin and keyword_end.
37 | // case sensitive
38 | pair
39 | fetchStringBetweenKeywords ( const string& txt,
40 | const string& keyword_begin,
41 | const string& keyword_end,
42 | size_t from_pos = 0 );
43 |
44 | // get file size by FILE*
45 | long
46 | getFileSize (FILE* fs);
47 |
48 | // get random filename, include path
49 | string
50 | makeRandomFilename (void);
51 |
52 | // unicode 转 UTF8
53 | pair
54 | convertUnicodeToUtf8 (unsigned int unicode);
55 |
56 | // fork() 启动新进程后立即返回,而本函数将等待新进程执行完毕后再返回
57 | bool
58 | wait_cmd ( const string& cmd,
59 | const vector& argv,
60 | int* p_exitCode = nullptr,
61 | bool b_echo = false );
62 |
--------------------------------------------------------------------------------
/src/lib/helper/RichTxt.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 | #include
5 |
6 | using std::string;
7 |
8 | namespace RichTxt
9 | {
10 | // bold
11 | static const string bold_on("\x1b[1m");
12 | static const string bold_off("\x1b[21m");
13 |
14 | // italic
15 | static const string italic_on("\x1b[3m");
16 | static const string italic_off("\x1b[23m");
17 |
18 | // underline
19 | static const string underline_on("\x1b[4m");
20 | static const string underline_off("\x1b[24m");
21 |
22 | // hide
23 | static const string hide_on("\x1b[8m");
24 | static const string hide_off("\x1b[28m");
25 |
26 | // deletline
27 | static const string deletline_on("\x1b[9m");
28 | static const string deletline_off("\x1b[29m");
29 |
30 | // foreground
31 | static const string foreground_black("\x1b[30m");
32 | static const string foreground_red("\x1b[31m");
33 | static const string foreground_green("\x1b[32m");
34 | static const string foreground_yellow("\x1b[33m");
35 | static const string foreground_blue("\x1b[34m");
36 | static const string foreground_magenta("\x1b[35m");
37 | static const string foreground_cyan("\x1b[36m");
38 | static const string foreground_white("\x1b[37m");
39 |
40 | // background
41 | static const string background_black("\x1b[40m");
42 | static const string background_red("\x1b[41m");
43 | static const string background_green("\x1b[42m");
44 | static const string background_yellow("\x1b[43m");
45 | static const string background_blue("\x1b[44m");
46 | static const string background_magenta("\x1b[45m");
47 | static const string background_cyan("\x1b[46m");
48 | static const string background_white("\x1b[47m");
49 |
50 | // reset all
51 | static const string reset_all("\x1b[0m");
52 | };
53 |
54 | // normal usage:
55 | // 0) cout << "email: " << RichTxt::bold_on << "yangyang.gnu@gmail.com" << RichTxt::bold_off << endl;
56 | // 1) string name("yangyang.gnu"); string name_italic = RichTxt::italic_on + RichTxt::background_green + name + RichTxt::italic_off;
57 |
58 |
--------------------------------------------------------------------------------
/src/lib/helper/Time.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "Time.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 |
12 | // why not std::to_string()?
13 | // you know, I have to port this linux code to win32 by cygwin, and there
14 | // is a bug on cygwin case it cannot find to_string(), so, I must do it by myself
15 | static string
16 | convUnsignedToStr (unsigned num)
17 | {
18 | ostringstream oss;
19 | oss << num;
20 |
21 | return(oss.str());
22 | }
23 |
24 | // string::resize() resize string from the first char to the last char,
25 | // resizeStringByEndian() resize string from the last char to the first char
26 | static string
27 | resizeStringByEndian (const string& str, unsigned digits, char ch = '0')
28 | {
29 | string strtmp(str.crbegin(), str.crend());
30 | strtmp.resize(digits, ch);
31 | reverse(strtmp.begin(), strtmp.end());
32 |
33 | return(strtmp);
34 | }
35 |
36 | Time::Time ()
37 | {
38 | time_t raw_time = time(nullptr);
39 | const struct tm* p_st = localtime(&raw_time);
40 |
41 | year_ = (unsigned)p_st->tm_year + 1900;
42 | month_ = (unsigned)p_st->tm_mon + 1;
43 | day_in_month_ = (unsigned)p_st->tm_mday;
44 | day_in_year_ = (unsigned)p_st->tm_yday + 1;
45 | day_in_week_ = (unsigned)p_st->tm_wday;
46 | hour_ = (unsigned)p_st->tm_hour;
47 | minute_ = (unsigned)p_st->tm_min;
48 | second_ = (unsigned)p_st->tm_sec;
49 | }
50 |
51 | Time::~Time ()
52 | {
53 | ;
54 | }
55 |
56 | unsigned
57 | Time::getYear (void) const
58 | {
59 | return(year_);
60 | }
61 |
62 | string
63 | Time::getYear (unsigned digits) const
64 | {
65 | return( 0 == digits ?
66 | convUnsignedToStr(getYear()) : resizeStringByEndian(convUnsignedToStr(getYear()), digits) );
67 | }
68 |
69 | unsigned
70 | Time::getMonth (void) const
71 | {
72 | return(month_);
73 | }
74 |
75 | string
76 | Time::getMonth (unsigned digits) const
77 | {
78 | return( 0 == digits ?
79 | convUnsignedToStr(getMonth()) : resizeStringByEndian(convUnsignedToStr(getMonth()), digits) );
80 | }
81 |
82 | unsigned
83 | Time::getDayInWeek (void) const
84 | {
85 | return(day_in_week_);
86 | }
87 |
88 | string
89 | Time::getDayInWeek (bool b_abbr) const
90 | {
91 | switch (getDayInWeek()) {
92 | case 1:
93 | return(b_abbr ? "mon" : "monday");
94 | case 2:
95 | return(b_abbr ? "tues" : "tuesday");
96 | case 3:
97 | return(b_abbr ? "wed" : "wednesday");
98 | case 4:
99 | return(b_abbr ? "thurs" : "thursday");
100 | case 5:
101 | return(b_abbr ? "fri" : "friday");
102 | case 6:
103 | return(b_abbr ? "sat" : "saturday");
104 | case 0:
105 | return(b_abbr ? "sun" : "sunday");
106 | default:
107 | return("");
108 | }
109 | }
110 |
111 | unsigned
112 | Time::getDayInMonth (void) const
113 | {
114 | return(day_in_month_);
115 | }
116 |
117 | string
118 | Time::getDayInMonth (unsigned digits) const
119 | {
120 | return( 0 == digits ?
121 | convUnsignedToStr(getDayInMonth()) : resizeStringByEndian(convUnsignedToStr(getDayInMonth()), digits) );
122 | }
123 |
124 | unsigned
125 | Time::getDayInYear (void) const
126 | {
127 | return(day_in_year_);
128 | }
129 |
130 | string
131 | Time::getDayInYear (unsigned digits) const
132 | {
133 | return( 0 == digits ?
134 | convUnsignedToStr(getDayInYear()) : resizeStringByEndian(convUnsignedToStr(getDayInYear()), digits) );
135 | }
136 | unsigned
137 | Time::getHour (void) const
138 | {
139 | return(hour_);
140 | }
141 |
142 | string
143 | Time::getHour (unsigned digits) const
144 | {
145 | return( 0 == digits ?
146 | convUnsignedToStr(getHour()) : resizeStringByEndian(convUnsignedToStr(getHour()), digits) );
147 | }
148 |
149 | unsigned
150 | Time::getMinute (void) const
151 | {
152 | return(minute_);
153 | }
154 |
155 | string
156 | Time::getMinute (unsigned digits) const
157 | {
158 | return( 0 == digits ?
159 | convUnsignedToStr(getMinute()) : resizeStringByEndian(convUnsignedToStr(getMinute()), digits) );
160 | }
161 |
162 | unsigned
163 | Time::getSecond (void) const
164 | {
165 | return(second_);
166 | }
167 |
168 | string
169 | Time::getSecond (unsigned digits) const
170 | {
171 | return( 0 == digits ?
172 | convUnsignedToStr(getSecond()) : resizeStringByEndian(convUnsignedToStr(getSecond()), digits) );
173 | }
174 |
175 |
--------------------------------------------------------------------------------
/src/lib/helper/Time.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 | #include
5 |
6 | using std::string;
7 |
8 | class Time
9 | {
10 | public:
11 | Time ();
12 | virtual ~Time ();
13 |
14 | unsigned getYear (void) const;
15 | string getYear (unsigned digits) const;
16 |
17 | unsigned getMonth (void) const;
18 | string getMonth (unsigned digits) const;
19 |
20 | unsigned getDayInWeek (void) const;
21 | string getDayInWeek (bool b_abbr) const;
22 | unsigned getDayInMonth (void) const;
23 | string getDayInMonth (unsigned digits) const;
24 | unsigned getDayInYear (void) const;
25 | string getDayInYear (unsigned digits) const;
26 |
27 | unsigned getHour (void) const;
28 | string getHour (unsigned digits) const;
29 |
30 | unsigned getMinute (void) const;
31 | string getMinute (unsigned digits) const;
32 |
33 | unsigned getSecond (void) const;
34 | string getSecond (unsigned digits) const;
35 |
36 | private:
37 | unsigned year_;
38 | unsigned month_;
39 | unsigned day_in_month_;
40 | unsigned day_in_week_;
41 | unsigned day_in_year_;
42 | unsigned hour_;
43 | unsigned minute_;
44 | unsigned second_;
45 | };
46 |
47 |
--------------------------------------------------------------------------------
/src/lib/helper/Webpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | using std::string;
10 | using std::vector;
11 | using std::pair;
12 |
13 | class Webpage
14 | {
15 | public:
16 | explicit Webpage ( const string& url,
17 | const string& filename = "",
18 | const string& proxy_addr = "",
19 | const unsigned timeout_second = 16,
20 | const unsigned retry_times = 2,
21 | const unsigned retry_sleep_second = 4,
22 | const string& user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0",
23 | const string& post_cookies = "",
24 | const vector>& post_sections_list = vector>(),
25 | bool b_redirct = true );
26 | virtual ~Webpage ();
27 |
28 | string getProxyAddr (void) const;
29 | string checkProxyOutIpByThirdparty (void) const;
30 | string checkProxyOutRegionByThirdparty (void) const;
31 |
32 | string getUserAgent (void) const;
33 | string checkUserAgentByThirdparty (void) const;
34 |
35 | const string& getTxt (void) const;
36 | const string& getTitle (void) const;
37 |
38 | long getLatestHttpStatusCode (void) const;
39 | bool isValidLatestHttpStatusCode (void) const;
40 |
41 | string getHttpHeader (const string& url) const;
42 | string getRemoteFiletype (const string& url) const;
43 | string getRemoteFilecharset (const string& url) const;
44 | string getRemoteFilesize (const string& url) const;
45 | string getRemoteFilename (const string& url) const;
46 | string getRemoteFiletime (const string& url) const;
47 |
48 | double getAvarSpeedDownload (void) const;
49 |
50 | bool isLoaded (void) const;
51 | size_t convertCharset (const string& src_charset, const string& dest_charset);
52 | bool saveasFile (const string& filename) const;
53 |
54 | bool downloadFile ( const string& url,
55 | const string& filename,
56 | const string& referer = "",
57 | const unsigned timeout_second = 0,
58 | const unsigned retry_times = 4,
59 | const unsigned retry_sleep_second = 4 );
60 |
61 | bool setMultiPostSectionsList (const vector>& post_sections_list);
62 | bool submitMultiPost ( const string& url,
63 | const string& filename,
64 | const vector>& post_sections_list,
65 | const unsigned timeout_second = 32,
66 | const unsigned retry_times = 4,
67 | const unsigned retry_sleep_second = 4 );
68 |
69 | string escapeUrl (const string& raw_url) const;
70 |
71 | const vector& getCookies (void) const;
72 |
73 | private:
74 | bool download_ ( const string& raw_url,
75 | const string& filename,
76 | const string& referer,
77 | const unsigned timeout_second,
78 | const unsigned retry_times,
79 | const unsigned retry_sleep_second );
80 | long parseLatestHttpStatusCode_ (void);
81 |
82 | private:
83 | enum HttpHeader_ {header, type, charset, length, name, modified};
84 | string requestHttpHeader_ ( const string& raw_url,
85 | HttpHeader_ header_item,
86 | const unsigned timeout_second = 4,
87 | const unsigned retry_times = 2,
88 | const unsigned retry_sleep_second = 2 ) const;
89 |
90 | private:
91 | CURL* p_curl_;
92 | string url_;
93 | char libcurl_err_info_buff_[CURL_ERROR_SIZE];
94 | string proxy_addr_;
95 | string txt_;
96 | string title_;
97 | bool b_loaded_ok_;
98 | long latest_http_status_code_;
99 | double aver_speed_download_;
100 | const string user_agent_;
101 | vector cookie_items_list_;
102 | };
103 |
104 |
105 | string convertUnicodeTxtToUtf8 (const string& unicode_txt);
106 | string unescapeHtml (const string& raw_txt);
107 |
108 |
--------------------------------------------------------------------------------
/src/lib/self/Aicheng.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "Aicheng.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "AichengTopicsListWebpage.h"
13 | #include "AichengTopicWebpage.h"
14 | #include "JandownSeedWebpage.h"
15 | #include "../helper/RichTxt.h"
16 |
17 |
18 | using namespace std;
19 |
20 |
21 | static mutex g_mtx;
22 |
23 | const string&
24 | Aicheng::getPortalWebpageUrl (void) const
25 | {
26 | return(portal_url_);
27 | }
28 |
29 | static const string&
30 | getTopicsListWebpagePartUrl (Aicheng::AvClass av_class)
31 | {
32 | static const string west_part_url("thread.php?fid=5");
33 | static const string cartoon_part_url("thread.php?fid=6");
34 | static const string asia_mosaicked_part_url("thread.php?fid=4");
35 | static const string asia_non_mosaicked_part_url("thread.php?fid=16");
36 |
37 | switch (av_class) {
38 | case Aicheng::west:
39 | return(west_part_url);
40 | case Aicheng::cartoon:
41 | return(cartoon_part_url);
42 | case Aicheng::asia_mosaicked:
43 | return(asia_mosaicked_part_url);
44 | case Aicheng::asia_non_mosaicked:
45 | return(asia_non_mosaicked_part_url);
46 | }
47 | }
48 |
49 | static const string
50 | getTopicsListWebpageUrl (const string& portal_url, Aicheng::AvClass av_class)
51 | {
52 | return(portal_url + getTopicsListWebpagePartUrl(av_class));
53 | }
54 |
55 | static bool
56 | isThereInList ( const string& webpage_title,
57 | const vector& ignore_keywords_list,
58 | string& which_keyword )
59 | {
60 | for (const auto& e : ignore_keywords_list) {
61 | if (!e.empty() && string::npos != webpage_title.find(e)) {
62 | which_keyword = e;
63 | return(true);
64 | }
65 | }
66 |
67 | return(false);
68 | }
69 |
70 | static bool
71 | parseValidTopicsUrls ( Aicheng::AvClass av_class,
72 | const string& portal_url,
73 | const string& proxy_addr,
74 | unsigned range_begin, unsigned range_end,
75 | const vector& hate_keywords_list,
76 | const vector& like_keywords_list,
77 | vector& valid_topics_urls_list,
78 | bool b_progress )
79 | {
80 | valid_topics_urls_list.clear();
81 |
82 | string current_url = getTopicsListWebpageUrl(portal_url, av_class);
83 | bool b_stop = false;
84 | unsigned topics_cnt = 0;
85 | while (!current_url.empty() && !b_stop) {
86 | AichengTopicsListWebpage aicheng_topicslist_webpage(portal_url, current_url, proxy_addr);
87 | if (!aicheng_topicslist_webpage.isLoaded()) {
88 | return(false);
89 | }
90 |
91 | const vector>& topics_title_and_url = aicheng_topicslist_webpage.getTitlesAndUrlsList();
92 | for (const auto& e : topics_title_and_url) {
93 | if (++topics_cnt > range_end) {
94 | b_stop = true;
95 | break;
96 | }
97 |
98 | const string& topic_title = e.first;
99 | const string& topic_url = e.second;
100 | static const string o_flag(RichTxt::bold_on + "O" + RichTxt::bold_off);
101 | static const string x_flag("x");
102 |
103 | // ignore the topics which do not in range
104 | if (topics_cnt < range_begin) {
105 | if (b_progress) {
106 | cout << x_flag << " " << flush;
107 | }
108 | continue;
109 | }
110 | // ignore the topics which contain hate keyword by user set
111 | string which_keyword;
112 | if (isThereInList(topic_title, hate_keywords_list, which_keyword)) {
113 | if (b_progress) {
114 | cout << x_flag << " " << flush;
115 | }
116 | continue;
117 | }
118 | // ignore the topics which do not contain like keyword by user set
119 | if ( !like_keywords_list.empty() &&
120 | !isThereInList(topic_title, like_keywords_list, which_keyword) ) {
121 | if (b_progress) {
122 | cout << x_flag << " " << flush;
123 | }
124 | continue;
125 | }
126 |
127 | valid_topics_urls_list.push_back(topic_url);
128 |
129 | if (b_progress) {
130 | cout << o_flag << " " << flush;
131 | }
132 | }
133 |
134 | current_url = aicheng_topicslist_webpage.getNextpageUrl();
135 | }
136 |
137 |
138 | return(true);
139 | }
140 |
141 | static void
142 | downloadTopicPicsAndSeed ( const string& topic_url,
143 | const string& proxy_addr,
144 | const string& path,
145 | unsigned timeout_download_pic,
146 | bool b_show_info )
147 | {
148 | AichengTopicWebpage aicheng_topics_webpage(topic_url, proxy_addr);
149 |
150 | // ready for the basename of pictures and seed.
151 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
152 | string base_name; // from topic title
153 |
154 | // 0) delete the web logo info;
155 | // 1) clear the "/" in topictitle string, if the "/" present in filename,
156 | // linux will treat it as directory, again, clear the "\" for windows;
157 | static const vector keyword_logos_list = { " 亚洲无码区 bt下载 - powered by phpwind.net",
158 | " 亚洲有码区 bt下载 - powered by phpwind.net",
159 | " 欧美区 bt下载 - powered by phpwind.net",
160 | " 动漫区 bt下载 - powered by phpwind.net",
161 | "|亚洲无码区 - bt下载 爱城 bt下载 ",
162 | "亚洲无码区 - bt下载 爱城 bt下载 ",
163 | "|亚洲有码区 - bt下载 爱城 bt下载 ",
164 | "亚洲有码区 - bt下载 爱城 bt下载 ",
165 | "|动漫区 - bt下载 爱城 bt下载 ",
166 | "动漫区 - bt下载 爱城 bt下载 ",
167 | "|欧美区 - bt下载 爱城 bt下载 ",
168 | "欧美区 - bt下载 爱城 bt下载 " };
169 | const string& topic_webpage_title = aicheng_topics_webpage.getTitle();
170 | auto keyword_logo_pos = string::npos;
171 | for (const auto& f : keyword_logos_list) {
172 | keyword_logo_pos = topic_webpage_title.find(f);
173 | if (string::npos != keyword_logo_pos) {
174 | break;
175 | }
176 | }
177 | remove_copy_if( topic_webpage_title.cbegin(),
178 | (string::npos == keyword_logo_pos) ? topic_webpage_title.cend() : topic_webpage_title.cbegin() + (int)keyword_logo_pos,
179 | back_inserter(base_name),
180 | [] (char ch) {return( '|' == ch || // invalid chars in windows-style filename
181 | '/' == ch ||
182 | '<' == ch ||
183 | '>' == ch ||
184 | '?' == ch ||
185 | '*' == ch ||
186 | ':' == ch ||
187 | '\\' == ch );} );
188 |
189 | // 2) the path + filename max length must less than pathconf(, _PC_NAME_MAX)
190 | const unsigned filename_max_length_without_postfix = (unsigned)pathconf(path.c_str(), _PC_NAME_MAX)
191 | - string("99").size() // picture number
192 | - string(".torrent").size();
193 | if (base_name.size() >= filename_max_length_without_postfix) {
194 | // the filename too long to create file. the way as following doesn't work, case filename encoding error:
195 | // base_name.resize(filename_max_length_without_postfix - 1), because this is string on char not wstring on wchar.
196 | // there is another stupid way, random name from 'a' to 'z'
197 | base_name.resize(16);
198 | generate( base_name.begin(), base_name.end(),
199 | [] () {return('a' + rand() % ('z' - 'a'));} );
200 | base_name = "(rename)" + base_name;
201 | }
202 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
203 |
204 | // download all pictures
205 | vector fail_download_pics_urls_list;
206 | bool b_download_pics_success = aicheng_topics_webpage.downloadAllPictures( path,
207 | base_name,
208 | timeout_download_pic,
209 | fail_download_pics_urls_list,
210 | 32 );
211 |
212 | // download seed
213 | bool b_downloaded_seed_success = false;
214 | if (!aicheng_topics_webpage.getSeedUrl().empty()) {
215 | JandownSeedWebpage jan_seed_webpage(aicheng_topics_webpage.getSeedUrl(), proxy_addr);
216 | b_downloaded_seed_success = jan_seed_webpage.downloadSeed(path, base_name);
217 | }
218 |
219 | // show result info
220 | if (!b_show_info) {
221 | return;
222 | }
223 | static const string success_info("success");
224 | static const string fail_info = RichTxt::foreground_red + "failure" + RichTxt::reset_all;
225 | g_mtx.lock();
226 | cout << " \"" << base_name << "\" - ";
227 | if (b_download_pics_success && b_downloaded_seed_success) {
228 | cout << success_info;
229 | } else {
230 | cout << fail_info << " (download error from " << topic_url << ". ";
231 | if (!b_download_pics_success) {
232 | cout << "pictures error: ";
233 | copy(fail_download_pics_urls_list.cbegin(), fail_download_pics_urls_list.cend(), ostream_iterator(cout, ", "));
234 | cout << "\b\b";
235 | }
236 | if (!b_downloaded_seed_success) {
237 | if (!b_download_pics_success) {
238 | cout << "; ";
239 | }
240 | cout << "seed error: " << aicheng_topics_webpage.getSeedUrl();
241 | }
242 | cout << ")";
243 | }
244 | cout << endl;
245 | g_mtx.unlock();
246 | }
247 |
248 | static const string&
249 | getNextProxyAddr (const vector& proxy_addrs_list)
250 | {
251 | if (proxy_addrs_list.empty()) {
252 | static const string empty_str("");
253 | return(empty_str);
254 | }
255 |
256 | static unsigned current_pos;
257 | if (current_pos >= proxy_addrs_list.size()) {
258 | current_pos = 0;
259 | }
260 | return(proxy_addrs_list[current_pos++]);
261 | }
262 |
263 | Aicheng::Aicheng ( const string& portal_url,
264 | AvClass av_class,
265 | const vector& proxy_addrs_list,
266 | unsigned range_begin, unsigned range_end,
267 | const vector& hate_keywords_list,
268 | const vector& like_keywords_list,
269 | unsigned threads_total,
270 | unsigned timeout_download_pic,
271 | const string& path )
272 | : portal_url_(portal_url)
273 | {
274 | // parse the URLs of valid topics by: range, hate keywords, like keywords
275 | cout << "Parse the URLs of topics from " << range_begin << " to " << range_end << ": " << flush;
276 | vector valid_topics_urls_list;
277 | parseValidTopicsUrls( av_class,
278 | portal_url,
279 | getNextProxyAddr(proxy_addrs_list),
280 | range_begin, range_end,
281 | hate_keywords_list,
282 | like_keywords_list,
283 | valid_topics_urls_list,
284 | true );
285 | if (valid_topics_urls_list.empty()) {
286 | cout << "(There is no topic which you like) " << endl;
287 | return;
288 | }
289 | cout << endl << endl;
290 |
291 | // download all pictures and seeds of topics
292 | cout << "Download the pictures and seeds of topics: " << endl;
293 | unsigned parsed_topics_cnt = 0;
294 | for (unsigned i = 0; i < (valid_topics_urls_list.size() / threads_total); ++i) {
295 | vector threads_list;
296 | for (unsigned j = 0; j < threads_total; ++j) {
297 | ++parsed_topics_cnt;
298 | threads_list.push_back(thread( &downloadTopicPicsAndSeed,
299 | ref(valid_topics_urls_list[i * threads_total + j]),
300 | ref(getNextProxyAddr(proxy_addrs_list)),
301 | ref(path),
302 | timeout_download_pic,
303 | true ));
304 | }
305 | for (auto& e : threads_list) {
306 | if (e.joinable()) {
307 | e.join();
308 | }
309 | }
310 |
311 | if (!threads_list.empty()) {
312 | cout << setprecision(1) << setiosflags(ios::fixed);
313 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- "
314 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size()
315 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl;
316 | cout << resetiosflags(ios::fixed);
317 | }
318 | }
319 |
320 | vector threads_list;
321 | for ( unsigned i = (valid_topics_urls_list.size() / threads_total) * threads_total;
322 | i < valid_topics_urls_list.size();
323 | ++i ) {
324 | ++parsed_topics_cnt;
325 | threads_list.push_back(thread( &downloadTopicPicsAndSeed,
326 | ref(valid_topics_urls_list[i]),
327 | ref(getNextProxyAddr(proxy_addrs_list)),
328 | ref(path),
329 | timeout_download_pic,
330 | true ));
331 | }
332 | for (auto& e : threads_list) {
333 | if (e.joinable()) {
334 | e.join();
335 | }
336 | }
337 | if (!threads_list.empty()) {
338 | cout << setprecision(1) << setiosflags(ios::fixed);
339 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- "
340 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size()
341 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl;
342 | cout << resetiosflags(ios::fixed);
343 | }
344 |
345 | cout << endl;
346 | cout << "Hey kiddo, your hot babes " << path << ", enjoy it! " << endl;
347 | }
348 |
349 | Aicheng::~Aicheng ()
350 | {
351 | ;
352 | }
353 |
354 |
--------------------------------------------------------------------------------
/src/lib/self/Aicheng.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 |
8 | using std::string;
9 | using std::vector;
10 |
11 |
12 | class Aicheng
13 | {
14 | public:
15 | enum AvClass {west, cartoon, asia_mosaicked, asia_non_mosaicked};
16 |
17 | public:
18 | Aicheng ( const string& portal_url,
19 | AvClass av_class,
20 | const vector& proxy_addrs_list,
21 | unsigned range_begin, unsigned range_end,
22 | const vector& hate_keywords_list,
23 | const vector& like_keywords_list,
24 | unsigned threads_total,
25 | unsigned timeout_download_pic,
26 | const string& path );
27 | virtual ~Aicheng ();
28 |
29 | const string& getPortalWebpageUrl (void) const;
30 |
31 |
32 | private:
33 | const string portal_url_;
34 | };
35 |
36 |
--------------------------------------------------------------------------------
/src/lib/self/AichengTopicWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "AichengTopicWebpage.h"
4 | #include
5 | #include
6 | #include
7 | #include "../helper/Misc.h"
8 |
9 |
10 | using namespace std;
11 |
12 | static bool
13 | parsePicturesUrlsHelper ( const string& webpage_txt,
14 | vector& pictures_urls_list,
15 | const string& keyword_begin,
16 | const string& keyword_end )
17 | {
18 | bool b_ok = false;
19 |
20 | size_t keyword_pic_begin_pos = 0;
21 | while (true) {
22 | // parse picture URL
23 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt,
24 | keyword_begin,
25 | keyword_end,
26 | keyword_pic_begin_pos );
27 | string pic_url = pair_tmp.first;
28 | if (pic_url.empty()) {
29 | break;
30 | }
31 | keyword_pic_begin_pos = pair_tmp.second;
32 | b_ok = true;
33 |
34 | // there are some bad picture-webspaces and logo pci, ignore them
35 | bool b_ignore_url = false;
36 | static const vector ignore_urls_keywords_list = {
37 | "iceimg.com",
38 | };
39 | for (const auto& e : ignore_urls_keywords_list) {
40 | if (string::npos != pic_url.find(e)) {
41 | b_ignore_url = true;
42 | break;
43 | }
44 | }
45 | if (b_ignore_url) {
46 | continue;
47 | }
48 |
49 | // convert https to http
50 | static const string keyword_https("https://");
51 | const auto https_pos = pic_url.find(keyword_https);
52 | if (string::npos != https_pos) {
53 | static const string keyword_http("http://");
54 | pic_url.replace(https_pos, keyword_https.size(), keyword_http);
55 | }
56 |
57 | // save the picture URL
58 | pictures_urls_list.push_back(pic_url);
59 | }
60 |
61 | return(b_ok);
62 | }
63 |
64 | static bool
65 | parsePicturesUrls (const string& webpage_txt, vector& pictures_urls_list)
66 | {
67 | pictures_urls_list.clear();
68 |
69 | // just parse the toptip
70 | static const string keyword_toptip_begin("本页主题: ");
71 | static const string keyword_toptip_end(">[楼 主]");
72 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt,
73 | keyword_toptip_begin,
74 | keyword_toptip_end );
75 | string toptip = pair_tmp.first;
76 | if (toptip.empty()) {
77 | cerr << "ERROR! there is no toptip. " << endl;
78 | return(false);
79 | }
80 |
81 | // the list may be on the webpage at the same time
82 | static const vector> begin_and_end_keywords_list = { make_pair(" keywords_seed_begin_list = { "http://www.jandown.com",
101 | "http://jandown.com",
102 | "http://www6.mimima.com",
103 | "http://mimima.com" };
104 |
105 | const auto body_pos = webpage_txt.find("");
106 | if (string::npos == body_pos) {
107 | //cerr << "warning! parseseedurl() cannot find the keyword \"\"" << endl;
108 | return(false);
109 | }
110 | const string& body = webpage_txt.substr(body_pos);
111 |
112 | for (const auto& e : keywords_seed_begin_list) {
113 | const string& keyword_seed_begin = e;
114 | static const string keyword_seed_end("\"");
115 |
116 | const pair& pair_tmp = fetchStringBetweenKeywords( body,
117 | keyword_seed_begin,
118 | keyword_seed_end );
119 | if (!pair_tmp.first.empty()) {
120 | seed_url = keyword_seed_begin + pair_tmp.first;
121 | return(true);
122 | }
123 | }
124 |
125 |
126 | return(false);
127 | }
128 |
129 | AichengTopicWebpage::AichengTopicWebpage (const string& url, const string& proxy_addr)
130 | : TopicWebpage(url, parsePicturesUrls, parseSeedUrl, proxy_addr, "gbk", "UTF-8")
131 | {
132 | ;
133 | }
134 |
135 | AichengTopicWebpage::~AichengTopicWebpage ()
136 | {
137 | ;
138 | }
139 |
140 |
--------------------------------------------------------------------------------
/src/lib/self/AichengTopicWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include "TopicWebpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 |
12 |
13 | class AichengTopicWebpage : public TopicWebpage
14 | {
15 | public:
16 | AichengTopicWebpage (const string& url, const string& proxy_addr);
17 | virtual ~AichengTopicWebpage ();
18 | };
19 |
20 |
--------------------------------------------------------------------------------
/src/lib/self/AichengTopicsListWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "AichengTopicsListWebpage.h"
4 | #include
5 | #include
6 | #include "../helper/Misc.h"
7 |
8 |
9 | using namespace std;
10 |
11 |
12 |
13 | static bool
14 | parseTitlesAndUrls ( const string& webpage_txt,
15 | const string& portal_url,
16 | vector>& titles_and_urls_list )
17 | {
18 | const unsigned size_back = titles_and_urls_list.size();
19 |
20 | const auto topics_list_txt_pos = webpage_txt.find(R"(style="border-top:0">普通主题 )");
21 | size_t keyword_topic_url_begin_pos = ((string::npos == topics_list_txt_pos) ? 0 : topics_list_txt_pos);
22 | size_t keyword_topic_url_end_pos = 0;
23 |
24 | while (true) {
25 | // parse topic URL
26 | static const string keyword_topic_url_begin("");
42 | const pair& pair_title = fetchStringBetweenKeywords( webpage_txt,
43 | keyword_topic_title_begin,
44 | keyword_topic_title_end,
45 | //keyword_topic_url_end_pos - keyword_topic_title_begin.size() );
46 | keyword_topic_url_end_pos );
47 | const string& topic_title = pair_title.first;
48 | keyword_topic_url_begin_pos = pair_title.second;
49 |
50 | // save url and title of the topic
51 | titles_and_urls_list.push_back(make_pair(topic_title, topic_url));
52 | }
53 |
54 | return(titles_and_urls_list.size() > size_back);
55 | }
56 |
57 | static bool
58 | parseNextpageUrl (const string& webpage_txt, const string& portal_url, string& nextpage_url)
59 | {
60 | nextpage_url.empty();
61 |
62 | static const string keyword_nextpage_begin("");
64 | const string& nextpage_url_part = fetchStringBetweenKeywords( webpage_txt,
65 | keyword_nextpage_begin,
66 | keyword_nextpage_end ).first;
67 | if (nextpage_url_part.empty()) {
68 | return(false);
69 | }
70 |
71 | // portal_url 中多了 "/bt"
72 | nextpage_url = string(portal_url.cbegin(), portal_url.cend() - (const int)string("/bt").length()) + nextpage_url_part;
73 |
74 | return(true);
75 | }
76 |
77 | AichengTopicsListWebpage::AichengTopicsListWebpage (const string& portal_url, const string& url, const string& proxy_addr)
78 | : TopicsListWebpage(portal_url, url, parseTitlesAndUrls, parseNextpageUrl, proxy_addr, "gbk", "UTF-8")
79 | {
80 | ;
81 | }
82 |
83 | AichengTopicsListWebpage::~AichengTopicsListWebpage ()
84 | {
85 | ;
86 | }
87 |
88 |
--------------------------------------------------------------------------------
/src/lib/self/AichengTopicsListWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include "TopicsListWebpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 |
12 |
13 | class AichengTopicsListWebpage : public TopicsListWebpage
14 | {
15 | public:
16 | AichengTopicsListWebpage (const string& portal_url, const string& url, const string& proxy_addr);
17 | virtual ~AichengTopicsListWebpage ();
18 | };
19 |
--------------------------------------------------------------------------------
/src/lib/self/Caoliu.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "Caoliu.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "CaoliuTopicsListWebpage.h"
13 | #include "CaoliuTopicWebpage.h"
14 | #include "RmdownSeedWebpage.h"
15 | #include "../helper/RichTxt.h"
16 |
17 |
18 | using namespace std;
19 |
20 | static mutex g_mtx;
21 |
22 | const string&
23 | Caoliu::getPortalWebpageUrl (void) const
24 | {
25 | return(portal_url_);
26 | }
27 |
28 | static const string&
29 | getTopicsListWebpagePartUrl (Caoliu::AvClass av_class)
30 | {
31 | // reposted
32 | static const string west_reposted_part_url("thread0806.php?fid=19");
33 | static const string cartoon_reposted_part_url("thread0806.php?fid=24");
34 | static const string asia_mosaicked_reposted_part_url("thread0806.php?fid=18");
35 | static const string asia_non_mosaicked_reposted_part_url("thread0806.php?fid=17");
36 |
37 | // original
38 | static const string west_original_part_url("thread0806.php?fid=4");
39 | static const string cartoon_original_part_url("thread0806.php?fid=5");
40 | static const string asia_mosaicked_original_part_url("thread0806.php?fid=15");
41 | static const string asia_non_mosaicked_original_part_url("thread0806.php?fid=2");
42 |
43 | // selfie
44 | static const string selfie_part_url("thread0806.php?fid=16");
45 |
46 | switch (av_class) {
47 | case Caoliu::west_reposted:
48 | return(west_reposted_part_url);
49 | case Caoliu::cartoon_reposted:
50 | return(cartoon_reposted_part_url);
51 | case Caoliu::asia_mosaicked_reposted:
52 | return(asia_mosaicked_reposted_part_url);
53 | case Caoliu::asia_non_mosaicked_reposted:
54 | return(asia_non_mosaicked_reposted_part_url);
55 | case Caoliu::west_original:
56 | return(west_original_part_url);
57 | case Caoliu::cartoon_original:
58 | return(cartoon_original_part_url);
59 | case Caoliu::asia_mosaicked_original:
60 | return(asia_mosaicked_original_part_url);
61 | case Caoliu::asia_non_mosaicked_original:
62 | return(asia_non_mosaicked_original_part_url);
63 | case Caoliu::selfie:
64 | return(selfie_part_url);
65 | }
66 | }
67 |
68 | static const string
69 | getTopicsListWebpageUrl (const string& portal_url, Caoliu::AvClass av_class)
70 | {
71 | return(portal_url + getTopicsListWebpagePartUrl(av_class));
72 | }
73 |
74 | static bool
75 | isThereInList ( const string& webpage_title,
76 | const vector& ignore_keywords_list,
77 | string& which_keyword )
78 | {
79 | for (const auto& e : ignore_keywords_list) {
80 | if (!e.empty() && string::npos != webpage_title.find(e)) {
81 | which_keyword = e;
82 | return(true);
83 | }
84 | }
85 |
86 | return(false);
87 | }
88 |
89 | static bool
90 | parseValidTopicsUrls ( Caoliu::AvClass av_class,
91 | const string& portal_url,
92 | const string& proxy_addr,
93 | unsigned range_begin, unsigned range_end,
94 | const vector& hate_keywords_list,
95 | const vector& like_keywords_list,
96 | vector& valid_topics_urls_list,
97 | bool b_progress )
98 | {
99 | valid_topics_urls_list.clear();
100 |
101 | string current_url = getTopicsListWebpageUrl(portal_url, av_class);
102 | bool b_stop = false;
103 | unsigned topics_cnt = 0;
104 | while (!current_url.empty() && !b_stop) {
105 | CaoliuTopicsListWebpage caoliu_topicslist_webpage(portal_url, current_url, proxy_addr);
106 | if (!caoliu_topicslist_webpage.isLoaded()) {
107 | return(false);
108 | }
109 |
110 | const vector>& topics_title_and_url = caoliu_topicslist_webpage.getTitlesAndUrlsList();
111 | for (const auto& e : topics_title_and_url) {
112 | if (++topics_cnt > range_end) {
113 | b_stop = true;
114 | break;
115 | }
116 |
117 | const string& topic_title = e.first;
118 | const string& topic_url = e.second;
119 | static const string o_flag(RichTxt::bold_on + "O" + RichTxt::bold_off);
120 | static const string x_flag("x");
121 |
122 | // ignore the topics which do not in range
123 | if (topics_cnt < range_begin) {
124 | if (b_progress) {
125 | cout << x_flag << " " << flush;
126 | }
127 | continue;
128 | }
129 | // ignore the topics which contain hate keyword by user set
130 | string which_keyword;
131 | if (isThereInList(topic_title, hate_keywords_list, which_keyword)) {
132 | if (b_progress) {
133 | cout << x_flag << " " << flush;
134 | }
135 | continue;
136 | }
137 | // ignore the topics which do not contain like keyword by user set
138 | if ( !like_keywords_list.empty() &&
139 | !isThereInList(topic_title, like_keywords_list, which_keyword) ) {
140 | if (b_progress) {
141 | cout << x_flag << " " << flush;
142 | }
143 | continue;
144 | }
145 |
146 | valid_topics_urls_list.push_back(topic_url);
147 |
148 | if (b_progress) {
149 | cout << o_flag << " " << flush;
150 | }
151 | }
152 |
153 | current_url = caoliu_topicslist_webpage.getNextpageUrl();
154 | }
155 |
156 |
157 | return(true);
158 | }
159 |
160 | static void
161 | downloadTopicPicsAndSeed ( const string& topic_url,
162 | const string& proxy_addr,
163 | const string& path,
164 | unsigned timeout_download_pic,
165 | unsigned pictures_total,
166 | bool b_download_seed,
167 | bool b_show_info )
168 | {
169 | CaoliuTopicWebpage caoliu_topics_webpage(topic_url, proxy_addr);
170 |
171 | // ready for the basename of pictures and seed.
172 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
173 | string base_name; // from topic title
174 |
175 | // 0) delete the web logo info;
176 | // 1) clear the "/" in topictitle string, if the "/" present in filename,
177 | // linux will treat it as directory, again, clear the "\" for windows;
178 | static const vector keyword_logos_list = {" 草榴社區 - powered by phpwind.net"};
179 | const string& topic_webpage_title = caoliu_topics_webpage.getTitle();
180 | auto keyword_logo_pos = string::npos;
181 | for (const auto& f : keyword_logos_list) {
182 | keyword_logo_pos = topic_webpage_title.find(f);
183 | if (string::npos != keyword_logo_pos) {
184 | break;
185 | }
186 | }
187 | remove_copy_if( topic_webpage_title.cbegin(),
188 | (string::npos == keyword_logo_pos) ? topic_webpage_title.cend() : topic_webpage_title.cbegin() + (int)keyword_logo_pos,
189 | back_inserter(base_name),
190 | [] (char ch) {return( '|' == ch || // invalid chars in windows-sytle filename
191 | '/' == ch ||
192 | '<' == ch ||
193 | '>' == ch ||
194 | '?' == ch ||
195 | '*' == ch ||
196 | ':' == ch ||
197 | '\\' == ch );} );
198 |
199 | // 2) the path + filename max length must less than pathconf(, _PC_NAME_MAX)
200 | const unsigned filename_max_length_without_postfix = (unsigned)pathconf(path.c_str(), _PC_NAME_MAX)
201 | - string("99").size() // picture number
202 | - string(".torrent").size();
203 | if (base_name.size() >= filename_max_length_without_postfix) {
204 | // the filename too long to create file. the way as following doesn't work, case filename encoding error:
205 | // base_name.resize(filename_max_length_without_postfix - 1), because this is string on char not wstring on wchar.
206 | // there is another stupid way, random name from 'a' to 'z'
207 | base_name.resize(16);
208 | generate( base_name.begin(), base_name.end(),
209 | [] () {return('a' + rand() % ('z' - 'a'));} );
210 | base_name = "(rename)" + base_name;
211 | }
212 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
213 |
214 | // download all pictures
215 | //const vector& urls = caoliu_topics_webpage.getPicturesUrlsList();
216 | //for (const auto& e : urls) {
217 | ////cout << "------------" << endl;
218 | //cout << e << "\n\t";
219 | //cout << caoliu_topics_webpage.getRemoteFiletype(e) << endl;
220 | ////cout << "------------" << endl;
221 | //}
222 | vector fail_download_pics_urls_list;
223 | bool b_download_pics_success = caoliu_topics_webpage.downloadAllPictures( path,
224 | base_name,
225 | timeout_download_pic,
226 | fail_download_pics_urls_list,
227 | pictures_total );
228 |
229 | // download seed
230 | bool b_downloaded_seed_success = true;
231 | if (b_download_seed) {
232 | b_downloaded_seed_success = false;
233 | if (!caoliu_topics_webpage.getSeedUrl().empty()) {
234 | RmdownSeedWebpage rm_seed_webpage(caoliu_topics_webpage.getSeedUrl(), proxy_addr);
235 | b_downloaded_seed_success = rm_seed_webpage.downloadSeed(path, base_name);
236 | }
237 | }
238 |
239 | // show result info
240 | if (!b_show_info) {
241 | return;
242 | }
243 | static const string success_info("success");
244 | static const string fail_info = RichTxt::foreground_red + "failure" + RichTxt::reset_all;
245 | g_mtx.lock();
246 | cout << " \"" << base_name << "\" - ";
247 | if (b_download_pics_success && b_downloaded_seed_success) {
248 | cout << success_info;
249 | } else {
250 | cout << fail_info << " (download error from " << topic_url << ". ";
251 | if (!b_download_pics_success) {
252 | cout << "pictures error: ";
253 | copy(fail_download_pics_urls_list.cbegin(), fail_download_pics_urls_list.cend(), ostream_iterator(cout, ", "));
254 | cout << "\b\b";
255 | }
256 | if (b_download_seed && !b_downloaded_seed_success) {
257 | if (!b_download_pics_success) {
258 | cout << "; ";
259 | }
260 | cout << "seed error: " << caoliu_topics_webpage.getSeedUrl();
261 | }
262 | cout << ")";
263 | }
264 | cout << endl;
265 | g_mtx.unlock();
266 | }
267 |
268 | static const string&
269 | getNextProxyAddr (const vector& proxy_addrs_list)
270 | {
271 | if (proxy_addrs_list.empty()) {
272 | static const string empty_str("");
273 | return(empty_str);
274 | }
275 |
276 | static unsigned current_pos;
277 | if (current_pos >= proxy_addrs_list.size()) {
278 | current_pos = 0;
279 | }
280 | return(proxy_addrs_list[current_pos++]);
281 | }
282 |
283 | Caoliu::Caoliu ( const string& portal_url,
284 | AvClass av_class,
285 | const vector& proxy_addrs_list,
286 | unsigned range_begin, unsigned range_end,
287 | const vector& hate_keywords_list,
288 | const vector& like_keywords_list,
289 | unsigned threads_total,
290 | unsigned timeout_download_pic,
291 | const string& path )
292 | : portal_url_(portal_url)
293 | {
294 | // parse the URLs of valid topics by: range, hate keywords, like keywords
295 | cout << "Parse the URLs of topics from " << range_begin << " to " << range_end << ": " << flush;
296 | vector valid_topics_urls_list;
297 | parseValidTopicsUrls( av_class,
298 | portal_url,
299 | getNextProxyAddr(proxy_addrs_list),
300 | range_begin, range_end,
301 | hate_keywords_list,
302 | like_keywords_list,
303 | valid_topics_urls_list,
304 | true );
305 | cout << endl;
306 | if (valid_topics_urls_list.empty()) {
307 | cout << "There is no topic which you like. " << endl;
308 | return;
309 | }
310 | cout << endl;
311 |
312 | // check just download picutures for dagaier?
313 | unsigned pictures_total = 2;
314 | bool b_download_seed = true;
315 | if (Caoliu::selfie == av_class) {
316 | pictures_total = 1024; // the max total
317 | b_download_seed = false;
318 | }
319 |
320 | // download all pictures and seeds of topics
321 | cout << "Download the pictures and seeds of topics: " << endl;
322 | unsigned parsed_topics_cnt = 0;
323 | for (unsigned i = 0; i < (valid_topics_urls_list.size() / threads_total); ++i) {
324 | vector threads_list;
325 | for (unsigned j = 0; j < threads_total; ++j) {
326 | ++parsed_topics_cnt;
327 | threads_list.push_back(thread( &downloadTopicPicsAndSeed,
328 | ref(valid_topics_urls_list[i * threads_total + j]),
329 | ref(getNextProxyAddr(proxy_addrs_list)),
330 | ref(path),
331 | timeout_download_pic,
332 | pictures_total,
333 | b_download_seed,
334 | true ));
335 | }
336 | for (auto& e : threads_list) {
337 | if (e.joinable()) {
338 | e.join();
339 | }
340 | }
341 |
342 | if (!threads_list.empty()) {
343 | cout << setprecision(1) << setiosflags(ios::fixed);
344 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- "
345 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size()
346 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl;
347 | cout << resetiosflags(ios::fixed);
348 | }
349 | }
350 |
351 | vector threads_list;
352 | for (unsigned i = (valid_topics_urls_list.size() / threads_total) * threads_total; i < valid_topics_urls_list.size(); ++i) {
353 | ++parsed_topics_cnt;
354 | threads_list.push_back(thread( &downloadTopicPicsAndSeed,
355 | ref(valid_topics_urls_list[i]),
356 | ref(getNextProxyAddr(proxy_addrs_list)),
357 | ref(path),
358 | timeout_download_pic,
359 | pictures_total,
360 | b_download_seed,
361 | true ));
362 | }
363 | for (auto& e : threads_list) {
364 | if (e.joinable()) {
365 | e.join();
366 | }
367 | }
368 | if (!threads_list.empty()) {
369 | cout << setprecision(1) << setiosflags(ios::fixed);
370 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- "
371 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size()
372 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl;
373 | cout << resetiosflags(ios::fixed);
374 | }
375 |
376 | cout << endl;
377 | cout << "Hey kiddo, your hot babes " << path << ", enjoy it! " << endl;
378 | }
379 |
380 | Caoliu::~Caoliu ()
381 | {
382 | ;
383 | }
384 |
385 |
--------------------------------------------------------------------------------
/src/lib/self/Caoliu.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 |
8 | using std::string;
9 | using std::vector;
10 |
11 |
12 | class Caoliu
13 | {
14 | public:
15 | enum AvClass { west_reposted, cartoon_reposted, asia_mosaicked_reposted, asia_non_mosaicked_reposted,
16 | west_original, cartoon_original, asia_mosaicked_original, asia_non_mosaicked_original,
17 | selfie };
18 |
19 | public:
20 | Caoliu ( const string& portal_url,
21 | AvClass av_class,
22 | const vector& proxy_addrs_list,
23 | unsigned range_begin, unsigned range_end,
24 | const vector& hate_keywords_list,
25 | const vector& like_keywords_list,
26 | unsigned threads_total,
27 | unsigned timeout_download_pic,
28 | const string& path );
29 | virtual ~Caoliu ();
30 |
31 | const string& getPortalWebpageUrl (void) const;
32 |
33 |
34 | private:
35 | const string portal_url_;
36 | };
37 |
38 |
--------------------------------------------------------------------------------
/src/lib/self/CaoliuTopicWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "CaoliuTopicWebpage.h"
4 | #include
5 | #include
6 | #include
7 | #include "../helper/Misc.h"
8 |
9 |
10 | using namespace std;
11 |
12 |
13 | static bool
14 | parsePicturesUrlsHelper ( const string& webpage_txt,
15 | vector& pictures_urls_list,
16 | const string& keyword_begin,
17 | const string& keyword_end )
18 | {
19 | bool b_ok = false;
20 |
21 | size_t start_pos = 0;
22 | while (true) {
23 | // parse picture URL
24 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt,
25 | keyword_begin,
26 | keyword_end,
27 | start_pos );
28 | string pic_url = pair_tmp.first;
29 | if (pic_url.empty()) {
30 | break;
31 | }
32 | start_pos = pair_tmp.second;
33 | b_ok = true;
34 |
35 | // there are some bad picture-webspaces and logo pci, ignore them
36 | bool b_ignore_url = false;
37 | static const vector ignore_urls_keywords_list = {
38 | "iceimg.com",
39 | "picuphost.com",
40 | // caoliu froum selfie member's logo.
41 | // http://ww4.sinaimg.cn/mw690/005uMz33gw1egsm41zq6qj30f80b4gm9.jpg
42 | // >>>>
43 | "005uMz33gw1eh3a1r6ak0j30d005zt98.jpg",
44 | "005uMz33gw1egsm41zq6qj30f80b4gm9.jpg",
45 | // <<<<
46 | };
47 | for (const auto& e : ignore_urls_keywords_list) {
48 | if (string::npos != pic_url.find(e)) {
49 | b_ignore_url = true;
50 | break;
51 | }
52 | }
53 | if (b_ignore_url) {
54 | continue;
55 | }
56 |
57 | // save the picture URL
58 | pictures_urls_list.push_back(pic_url);
59 | }
60 |
61 | return(b_ok);
62 | }
63 |
64 | static bool
65 | parsePicturesUrls (const string& webpage_txt, vector& pictures_urls_list)
66 | {
67 | pictures_urls_list.clear();
68 |
69 | // just parse the toptip
70 | static const string keyword_toptip_begin("本頁主題: ");
71 | static const string keyword_toptip_end("[樓主] ");
72 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt,
73 | keyword_toptip_begin,
74 | keyword_toptip_end );
75 | string toptip = pair_tmp.first;
76 | if (toptip.empty()) {
77 | cerr << "ERROR! there is no toptip. " << endl;
78 | return(false);
79 | }
80 |
81 | // the list may be on the webpage at the same time
82 | static const vector> begin_and_end_keywords_list = { make_pair(" keywords_seed_begin_list = { "http://www.rmdown.com/link.php?hash=",
101 | "http://rmdown.com/link.php?hash=",
102 | "http://www.xunfs.com/link.php?hash=",
103 | "http://xunfs.com/link.php?hash=" };
104 | for (const auto& e : keywords_seed_begin_list) {
105 | const string& keyword_seed_begin = e;
106 | static const string keyword_seed_end("");
107 |
108 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt,
109 | keyword_seed_begin,
110 | keyword_seed_end );
111 | if (!pair_tmp.first.empty()) {
112 | seed_url = keyword_seed_begin + pair_tmp.first;
113 | return(true);
114 | }
115 | }
116 |
117 |
118 | return(false);
119 | }
120 |
121 | CaoliuTopicWebpage::CaoliuTopicWebpage (const string& url, const string& proxy_addr)
122 | : TopicWebpage(url, parsePicturesUrls, parseSeedUrl, proxy_addr, "gbk", "UTF-8")
123 | {
124 | ;
125 | }
126 |
127 | CaoliuTopicWebpage::~CaoliuTopicWebpage ()
128 | {
129 | ;
130 | }
131 |
132 |
--------------------------------------------------------------------------------
/src/lib/self/CaoliuTopicWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include "TopicWebpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 |
12 |
13 | class CaoliuTopicWebpage : public TopicWebpage
14 | {
15 | public:
16 | CaoliuTopicWebpage (const string& url, const string& proxy_addr);
17 | virtual ~CaoliuTopicWebpage ();
18 | };
19 |
20 |
--------------------------------------------------------------------------------
/src/lib/self/CaoliuTopicsListWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "CaoliuTopicsListWebpage.h"
4 | #include
5 | #include
6 | #include "../helper/Misc.h"
7 |
8 | using namespace std;
9 |
10 |
11 |
12 | static bool
13 | parseTitlesAndUrls ( const string& webpage_txt,
14 | const string& portal_url,
15 | vector>& titles_and_urls_list )
16 | {
17 | const unsigned size_back = titles_and_urls_list.size();
18 |
19 | size_t keyword_topic_url_begin_pos = 0, keyword_topic_url_end_pos = 0;
20 | keyword_topic_url_begin_pos = webpage_txt.find("普通主題");
21 | if (string::npos == keyword_topic_url_begin_pos) {
22 | keyword_topic_url_begin_pos = 0;
23 | }
24 |
25 | while (true) {
26 | // parse topic URL
27 | static const string keyword_topic_url_begin("");
44 | const pair& pair_title = fetchStringBetweenKeywords( webpage_txt,
45 | keyword_topic_title_begin,
46 | keyword_topic_title_end,
47 | keyword_topic_url_end_pos );
48 | const string& topic_title = pair_title.first;
49 | keyword_topic_url_begin_pos = pair_title.second;
50 |
51 | // save url and title of the topic
52 | titles_and_urls_list.push_back(make_pair(topic_title, topic_url));
53 | }
54 |
55 | return(titles_and_urls_list.size() > size_back);
56 | }
57 |
58 | static bool
59 | parseNextpageUrl (const string& webpage_txt, const string& portal_url, string& nextpage_url)
60 | {
61 | nextpage_url.empty();
62 |
63 | static const string keyword_nextpage("下一頁");
64 | const auto keyword_nextpage_pos = webpage_txt.find(keyword_nextpage);
65 | if (string::npos == keyword_nextpage_pos) {
66 | return(false);
67 | }
68 |
69 | static const string keyword_href("
6 | #include
7 | #include "TopicsListWebpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 |
12 |
13 | class CaoliuTopicsListWebpage : public TopicsListWebpage
14 | {
15 | public:
16 | CaoliuTopicsListWebpage (const string& portal_url, const string& url, const string& proxy_addr);
17 | virtual ~CaoliuTopicsListWebpage ();
18 | };
19 |
--------------------------------------------------------------------------------
/src/lib/self/JandownSeedWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "JandownSeedWebpage.h"
4 | #include
5 | #include
6 | #include
7 | #include "../helper/Misc.h"
8 |
9 |
10 | using namespace std;
11 |
12 | static bool
13 | parsePostMultiSections ( const string& webpage_txt,
14 | vector>& post_sections_list )
15 | {
16 | // parse the code section
17 | static const string keyword_code_section_begin(" ");
19 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt,
20 | keyword_code_section_begin,
21 | keyword_code_section_end );
22 | const string& ref_content = pair_tmp.first;
23 | if (ref_content.empty()) {
24 | cerr << "WARNING! parsePostMultiSections() CANNOT find the keyword "
25 | << "\"" << keyword_code_section_begin << "\"" << " and "
26 | << "\"" << keyword_code_section_end << "\"" << endl;
27 | return(false);
28 | }
29 |
30 | post_sections_list.push_back(make_pair("code", ref_content));
31 | return(true);
32 | }
33 |
34 | // seed fetch URL. http://www.jandown.com/ and http://www6.mimima.com/ are
35 | // the same one website, on the other word, from http://www.jandown.com/abcd
36 | // download the seed file same as from http://www6.mimima.com/abcd, so, I need
37 | // just ONE fetch URL
38 | JandownSeedWebpage::JandownSeedWebpage (const string& url, const string& proxy_addr)
39 | : SeedWebpage(url, proxy_addr, "http://www.jandown.com/fetch.php", parsePostMultiSections)
40 | {
41 | ;
42 | }
43 |
44 | JandownSeedWebpage::~JandownSeedWebpage ()
45 | {
46 | ;
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/src/lib/self/JandownSeedWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include "SeedWebpage.h"
7 |
8 | using std::string;
9 |
10 |
11 | class JandownSeedWebpage : public SeedWebpage
12 | {
13 | public:
14 | JandownSeedWebpage (const string& url, const string& proxy_addr);
15 | virtual ~JandownSeedWebpage ();
16 | };
17 |
--------------------------------------------------------------------------------
/src/lib/self/RmdownSeedWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "RmdownSeedWebpage.h"
4 | #include
5 | #include
6 | #include
7 | #include "../helper/Misc.h"
8 |
9 |
10 | using namespace std;
11 |
12 | static bool
13 | parsePostMultiSections ( const string& webpage_txt,
14 | vector>& post_sections_list )
15 | {
16 | // parse the ref section
17 | static const string& keyword_ref_section_begin(" & pair_tmp = fetchStringBetweenKeywords( webpage_txt,
20 | keyword_ref_section_begin,
21 | keyword_ref_section_end );
22 | const string& ref_content = pair_tmp.first;
23 | if (ref_content.empty()) {
24 | cerr << "WARNING! parsePostMultiSections() CANNOT find the keyword "
25 | << "\"" << keyword_ref_section_begin << "\"" << " and "
26 | << "\"" << keyword_ref_section_end << "\"" << endl;
27 | return(false);
28 | }
29 | post_sections_list.push_back(make_pair("ref", ref_content));
30 | const auto keyword_ref_section_end_pos = pair_tmp.second;
31 |
32 | // parse the reff section
33 | static const string& keyword_reff_section_begin("value=\"");
34 | static const string& keyword_reff_section_end("\"");
35 | const pair& pair_tmp2 = fetchStringBetweenKeywords( webpage_txt,
36 | keyword_reff_section_begin,
37 | keyword_reff_section_end,
38 | keyword_ref_section_end_pos );
39 | const string& reff_content = pair_tmp2.first;
40 | if (reff_content.empty()) {
41 | cerr << "WARNING! parsePostMultiSections() CANNOT find the keyword "
42 | << "\"" << keyword_reff_section_begin << "\"" << " and "
43 | << "\"" << keyword_reff_section_end << "\"" << endl;
44 | return(false);
45 | }
46 | post_sections_list.push_back(make_pair("reff", reff_content));
47 |
48 |
49 | return(true);
50 | }
51 |
52 | // seed fetch URL. http://www.rmdown.com/ and http://www.xunfs.com/ are
53 | // the same one website, on the other word, from http://www.rmdown.com/abcd
54 | // download the seed file same as from http://www.xunfs.com/abcd, so, I need
55 | // just ONE fetch URL
56 | RmdownSeedWebpage::RmdownSeedWebpage (const string& url, const string& proxy_addr)
57 | : SeedWebpage(url, proxy_addr, "http://www.rmdown.com/download.php", parsePostMultiSections)
58 | {
59 | ;
60 | }
61 |
62 | RmdownSeedWebpage::~RmdownSeedWebpage ()
63 | {
64 | ;
65 | }
66 |
67 |
--------------------------------------------------------------------------------
/src/lib/self/RmdownSeedWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include "SeedWebpage.h"
7 |
8 | using std::string;
9 |
10 |
11 | class RmdownSeedWebpage : public SeedWebpage
12 | {
13 | public:
14 | RmdownSeedWebpage (const string& url, const string& proxy_addr);
15 | virtual ~RmdownSeedWebpage ();
16 | };
17 |
--------------------------------------------------------------------------------
/src/lib/self/SeedWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "SeedWebpage.h"
4 | #include
5 | #include
6 | #include
7 |
8 | using namespace std;
9 |
10 | SeedWebpage::SeedWebpage ( const string& url,
11 | const string& proxy_addr,
12 | const string& post_url,
13 | ParsePostMultiSections parsePostMultiSections )
14 | : Webpage(url, "", proxy_addr), post_url_(post_url)
15 | {
16 | if (!isLoaded()) {
17 | cerr << "WARNING! SeedWebpage::SeedWebpage() CANNOT load webpage \""
18 | << url << "\"" << endl;
19 | return;
20 | }
21 |
22 | // parse the post method multi sections
23 | parsePostMultiSections(getTxt(), post_sections_list_);
24 | }
25 |
26 | SeedWebpage::~SeedWebpage ()
27 | {
28 | ;
29 | }
30 |
31 | // this is a multipart/formdata style HTTP post method
32 | bool
33 | SeedWebpage::downloadSeed (const string& path, const string& base_name)
34 | {
35 | if (post_sections_list_.empty()) {
36 | return(false);
37 | }
38 |
39 | // make seed name
40 | static const string seed_postfix(".torrent");
41 | string seed_filename = path + "/" + base_name + seed_postfix;
42 |
43 |
44 | return(submitMultiPost(post_url_, seed_filename, post_sections_list_));
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/src/lib/self/SeedWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include "../helper/Webpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 | using std::pair;
12 |
13 |
14 | class SeedWebpage : public Webpage
15 | {
16 | public:
17 | // callback function for parse the multi sections of post
18 | typedef bool (*ParsePostMultiSections) ( const string& webpage_txt,
19 | vector>& post_sections_list );
20 |
21 | public:
22 | SeedWebpage ( const string& url,
23 | const string& proxy_addr,
24 | const string& post_url,
25 | ParsePostMultiSections parsePostMultiSections );
26 | virtual ~SeedWebpage ();
27 | bool downloadSeed (const string& path, const string& base_name);
28 |
29 | private:
30 | const string post_url_;
31 | vector> post_sections_list_;
32 | };
33 |
--------------------------------------------------------------------------------
/src/lib/self/TopicWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "TopicWebpage.h"
4 | #include
5 | #include
6 | #include "../helper/Misc.h"
7 |
8 | #include
9 |
10 | using namespace std;
11 |
12 |
13 | TopicWebpage::TopicWebpage ( const string& url,
14 | ParsePicturesUrls parsePicturesUrls,
15 | ParseSeedUrl parseSeedUrl,
16 | const string& proxy_addr,
17 | const string& src_charset,
18 | const string& dest_charset )
19 | : Webpage(url, "", proxy_addr)
20 | {
21 | if (!isLoaded()) {
22 | return;
23 | }
24 |
25 | // charset convert
26 | if (!src_charset.empty() && !dest_charset.empty()) {
27 | convertCharset(src_charset, dest_charset);
28 | }
29 |
30 | // parse the URLs of av pictures
31 | if (!parsePicturesUrls(getTxt(), pictures_urls_list_)) {
32 | //cerr << "WARNING! parsePicturesUrls() failure from " << url << endl;
33 | ;
34 | }
35 |
36 | // parse the URLs of seed
37 | if (!parseSeedUrl(getTxt(), seed_url_)) {
38 | //cerr << "WARNING! parseSeedUrl() failure from " << url << endl;
39 | ;
40 | }
41 | }
42 |
43 | TopicWebpage::~TopicWebpage ()
44 | {
45 | ;
46 | }
47 |
48 | const vector&
49 | TopicWebpage::getPicturesUrlsList (void) const
50 | {
51 | return(pictures_urls_list_);
52 | }
53 |
54 | const string&
55 | TopicWebpage::getSeedUrl (void) const
56 | {
57 | return(seed_url_);
58 | }
59 |
60 | // the name rule of pictures: topictitle-0.jpg, topictitle-1.jpg, topictitle-[x].jpg
61 | bool
62 | TopicWebpage::downloadAllPictures ( const string& path,
63 | const string& base_name,
64 | unsigned timeout_download_pic,
65 | vector& fail_download_pics_urls_list,
66 | unsigned pictures_max_num )
67 | {
68 | fail_download_pics_urls_list.clear();
69 |
70 | for ( unsigned i = 0, sucess_cnt = 0;
71 | i < pictures_urls_list_.size() && sucess_cnt < pictures_max_num;
72 | ++i ) {
73 | const string& picture_url = pictures_urls_list_[i];
74 |
75 | // make picture postfix name
76 | //string postfix_name("jpeg"); // sometime get the remote filetype failure, so I set the default postfix
77 | string postfix_name("");
78 | static const unsigned get_remote_filetype_retry_times = 2;
79 | static const unsigned get_remote_filetype_sleep_second = 2;
80 | for (unsigned j = 0; j < get_remote_filetype_retry_times; ++j) {
81 | const string& tmp = getRemoteFiletype(picture_url);
82 | static const string keyword("image/");
83 | const auto pos = tmp.find(keyword);
84 | if (string::npos != pos) {
85 | postfix_name = tmp.substr(pos + keyword.size());
86 | break;
87 | }
88 | sleep(get_remote_filetype_sleep_second);
89 | }
90 | // neither gif (because gifs almost be AD) nor cannot get the file type, ignore
91 | if ("gif" == postfix_name || "" == postfix_name) {
92 | continue;
93 | }
94 |
95 | // download pic
96 | const string& pic_filename = path + "/" + base_name + "-" + convNumToStr(sucess_cnt) + "." + postfix_name;
97 | if (downloadFile(picture_url, pic_filename, "", timeout_download_pic)) {
98 | ++sucess_cnt;
99 | continue;
100 | }
101 |
102 | //cerr << "WARNING! CANNOT download " << pictures_urls_list_[i] << endl;
103 | fail_download_pics_urls_list.push_back(pictures_urls_list_[i]);
104 | remove(pic_filename.c_str());
105 | }
106 |
107 |
108 | return(fail_download_pics_urls_list.empty());
109 | }
110 |
111 |
--------------------------------------------------------------------------------
/src/lib/self/TopicWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include "../helper/Webpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 |
12 |
13 | class TopicWebpage : public Webpage
14 | {
15 | public:
16 | // callback function for parse the URLs of av pictures
17 | typedef bool (*ParsePicturesUrls) (const string& webpage_txt, vector& pictures_urls_list);
18 | // callback function for parse the URLs of seed
19 | typedef bool (*ParseSeedUrl) (const string& webpage_txt, string& seed_url);
20 |
21 | public:
22 | TopicWebpage ( const string& url,
23 | ParsePicturesUrls parsePicturesUrls,
24 | ParseSeedUrl parseSeedUrl,
25 | const string& proxy_addr,
26 | const string& src_charset,
27 | const string& dest_charset );
28 | virtual ~TopicWebpage ();
29 | const string& getSeedUrl (void) const;
30 | const vector& getPicturesUrlsList (void) const;
31 | bool downloadAllPictures ( const string& path,
32 | const string& base_name,
33 | unsigned timeout_download_pic,
34 | vector& fail_download_pics_urls_list,
35 | unsigned pictures_max_num = 32 );
36 |
37 | private:
38 | string seed_url_;
39 | vector pictures_urls_list_;
40 | };
41 |
42 |
--------------------------------------------------------------------------------
/src/lib/self/TopicsListWebpage.cpp:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #include "TopicsListWebpage.h"
4 | #include
5 |
6 |
7 | using namespace std;
8 |
9 |
10 | TopicsListWebpage::TopicsListWebpage ( const string& portal_url,
11 | const string& url,
12 | TopicsListWebpage::ParseTitlesAndUrls parseTitlesAndUrls,
13 | TopicsListWebpage::ParseNextpageUrl parseNextpageUrl,
14 | const string& proxy_addr,
15 | const string& src_charset,
16 | const string& dest_charset )
17 | : Webpage(url, "", proxy_addr, 16, 4, 4),
18 | portal_url_(portal_url)
19 | {
20 | if (!isLoaded()) {
21 | return;
22 | }
23 |
24 | // charset convert
25 | if (!src_charset.empty() && !dest_charset.empty()) {
26 | convertCharset(src_charset, dest_charset);
27 | }
28 |
29 | // parse the URLs and titles of all topics on topicslist webpage
30 | const string& webpage_txt = getTxt();
31 | parseTitlesAndUrls(webpage_txt, portal_url_, titles_and_urls_list_);
32 |
33 | // unescape html for title
34 | for (auto& e : titles_and_urls_list_) {
35 | string& title = e.first;
36 | title = unescapeHtml(title);
37 | }
38 |
39 | // parse the next topicslist webpage URL
40 | parseNextpageUrl(webpage_txt, portal_url_, nextpage_url_);
41 | }
42 |
43 | TopicsListWebpage::~TopicsListWebpage ()
44 | {
45 | ;
46 | }
47 |
48 | // if there is no more topicslist page, return empty string
49 | const string&
50 | TopicsListWebpage::getNextpageUrl (void) const
51 | {
52 | return(nextpage_url_);
53 | }
54 |
55 | // first title, second url
56 | const vector>&
57 | TopicsListWebpage::getTitlesAndUrlsList (void) const
58 | {
59 | return(titles_and_urls_list_);
60 | }
61 |
62 | const string&
63 | TopicsListWebpage::getPortalWebpageUrl (void) const
64 | {
65 | return(portal_url_);
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/src/lib/self/TopicsListWebpage.h:
--------------------------------------------------------------------------------
1 | // last modified
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include "../helper/Webpage.h"
8 |
9 | using std::string;
10 | using std::vector;
11 | using std::pair;
12 |
13 |
14 | class TopicsListWebpage : public Webpage
15 | {
16 | public:
17 | // callback function for parse titles and URLs of topics
18 | typedef bool (*ParseTitlesAndUrls) ( const string& webpage_txt,
19 | const string& portal_url,
20 | vector>& titles_and_urls_list );
21 | // callback function for parse next topicslist URL
22 | typedef bool (*ParseNextpageUrl) ( const string& webpage_txt,
23 | const string& portal_url,
24 | string& nextpage_url );
25 |
26 | public:
27 | TopicsListWebpage ( const string& portal_url,
28 | const string& url,
29 | ParseTitlesAndUrls parseTitlesAndUrls,
30 | ParseNextpageUrl parseNextpageUrl,
31 | const string& proxy_addr = "",
32 | const string& src_charset = "",
33 | const string& dest_charset = "" );
34 | virtual ~TopicsListWebpage ();
35 | const vector>& getTitlesAndUrlsList (void) const;
36 | const string& getNextpageUrl (void) const;
37 | const string& getPortalWebpageUrl (void) const;
38 |
39 |
40 | private:
41 | vector> titles_and_urls_list_;
42 | string nextpage_url_;
43 | const string portal_url_;
44 | };
45 |
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include