├── LICENSE ├── README.md ├── bin └── build_4_win.7z ├── build └── CMakeLists.txt ├── config └── portals_list.json ├── pic ├── hardseed.gif ├── running.gif └── seeds_and_pics.gif └── src ├── .ycm_extra_conf.py ├── .ycm_extra_conf.pyc ├── lib ├── 3rd │ └── json11 │ │ ├── LICENSE.txt │ │ ├── README.md │ │ ├── json11.cpp │ │ ├── json11.hpp │ │ └── test.cpp ├── helper │ ├── CmdlineOption.cpp │ ├── CmdlineOption.h │ ├── Misc.cpp │ ├── Misc.h │ ├── RichTxt.h │ ├── Time.cpp │ ├── Time.h │ ├── Webpage.cpp │ └── Webpage.h └── self │ ├── Aicheng.cpp │ ├── Aicheng.h │ ├── AichengTopicWebpage.cpp │ ├── AichengTopicWebpage.h │ ├── AichengTopicsListWebpage.cpp │ ├── AichengTopicsListWebpage.h │ ├── Caoliu.cpp │ ├── Caoliu.h │ ├── CaoliuTopicWebpage.cpp │ ├── CaoliuTopicWebpage.h │ ├── CaoliuTopicsListWebpage.cpp │ ├── CaoliuTopicsListWebpage.h │ ├── JandownSeedWebpage.cpp │ ├── JandownSeedWebpage.h │ ├── RmdownSeedWebpage.cpp │ ├── RmdownSeedWebpage.h │ ├── SeedWebpage.cpp │ ├── SeedWebpage.h │ ├── TopicWebpage.cpp │ ├── TopicWebpage.h │ ├── TopicsListWebpage.cpp │ └── TopicsListWebpage.h └── main.cpp /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

给不了你梦中情人,至少还有硬盘女神:hardseed

2 | yangyangwithgnu@yeah.net 3 | http://yangyangwithgnu.github.io/ 4 | 2016-02-04 14:53:51 5 | 6 | 7 | ##谢谢 8 | 9 | **捐赠:支付宝 yangyangwithgnu@yeah.net ,支付宝二维码(左),微信二维码(右)** 10 |
11 | 12 |
13 |
14 | 15 | **二手书**:书,我提高开发技能的重要手段之一,随着职业生涯的发展,书籍也在不断增多,对我而言,一本书最多读三遍,再往后,几乎没有什么营养吸收,这部分书对我已基本无用,但对其他人可能仍有价值,所以,为合理利用资源,我决定低价出售这些书,希望达到两个目的:0)用售出的钱购买更多新书(没当过雷锋的朋友 (๑´ڡ`๑));1)你低价购得需要的书(虽然二手)。到 https://github.com/yangyangwithgnu/used_books 看看有无你钟意的。 16 | 17 | 18 | ##公告 19 | ---------------- 20 | 21 | **讨论**:任何意见建议移步 https://www.v2ex.com/t/123175 22 | 23 | **声明**:我本人绝对尊重各大爱的论坛,提供的资源不仅优质而且免费,我只是懒、足够的懒。请大家支持这些论坛,多用页面访问、多点击广告、多解囊捐赠。*我..在..干..嘛 @_@#* 24 | 25 | **注意** 26 | + 代理是一切的先决条件。你可以使用自己的代理工具,用 hardseed 的命令行选项 --proxy 指定本地中转地址及端口,也可以用我为你预配置的 goagent 代理工具,位于 https://github.com/yangyangwithgnu/goagent_out_of_box_yang 27 | 28 | 29 | ##版本 30 | ---------------- 31 | 32 | **[v0.2.14-1,修正,2016-02-04]**:0)行了、行了,我抱歉,不知道有这么多 win 用户需要 hardseed,之前是我狭隘了,只考虑到 unix-like 用户。编译好的 win 版本程序送你,位于 bin\build_4_win.7z;1)另外,osX 下的构建方面进行了细化。新年快乐! 33 | **[v0.2.14,修正,2016-01-31]**:0)忽略解析 aicheng 站务相关帖子。 34 | **[v0.2.13,修正,2016-01-17]**:0)修正 caoliu 翻页的错误。 35 | **[v0.2.12,优化,2015-05-26]**:0)先前 hardseed 中硬编码 aicheng 和 caoliu 论坛入口地址,地址一旦变更,每次需要重新调整代码,很是麻烦,现在我在本项目主页中放了一份配置文件 config/portals_list.json,hardseed 自动从该文件中获取最新论坛入口地址(安啦,我会及时更新的);1)调整部分公共库代码。 36 | **[v0.2.11,修正,2015-03-22]**:修正 aicheng 种子和图片解析错误的问题(别发邮件了哈,亲,邮箱都他妈快撑爆了) 。 37 | **[v0.2.10,修正,2014-12-07]**:caoliu 地址变更,shit :-P 38 | **[v0.2.09,修正,2014-11-30]**:caoliu 地址变更。 39 | **[v0.2.08,修正,2014-10-21]**:0)仅解析主贴的图片而不再解析回帖,以避免下载无关图片;1)aicheng 论坛地址变更;2)部分用户有自己的代理工具,为缩短下载时长,将预配置的 goagent 独立成一个 github 项目。 40 | **[v0.2.07,修正,2014-09-25]**:windows 禁止文件名中含有 /:\*?\\<>"| 等字符,否则将导致非法路径错误,修正 hardseed 生成的文件名中可能含有如上字符的问题。 41 | **[v0.2.06,优化,2014-09-09]**:caoliu 原地址无法访问,更新地址;取消 caoliu 自拍套图最多只能下载 256 张的限制。 42 | **[v0.2.05,优化,2014-08-17]**:程序功能无任何更新,仅更新代理工具 goagent 配置文件 proxy.ini:一是设置 obfuscate = 1 开启流量混淆以正确解析出可用 GGC IP,一是设置 pagespeed = 1 以提升 GAE 的下行速度。 43 | **[v0.2.05,修正,2014-08-13]**:0)修正帖子部分图片 URL 未解析的问题;1)修正图片序号错误的问题;2)优化图片下载等待时长算法,不再以 --timeout-download-picture 作为绝对等待时长,而是将其作为指导值,一旦图片下载失败 hardseed 将自动计算下次重新下载所需的等待时长,同时,与“速度过低视为下载失败”的机制结合,提升图片下载等待耗时;3)升级 goagent 至 3.1.21,采用 goagent 默认 proxy.ini,而不再使用自定义 iplist (很多朋友反应采用先前我自定义 iplist 版本的 goagent 速度不理想,这是由于 GGC IP 与不同网络环境有关,我用 checkgoogleip 跑出来 GGC IP 最适合我的网络环境,不见得适合你,所以,权衡之下,还是用 goagent 自带的 GGC IP,至少这合适于大多数人)。 44 | **[v0.2.04,修正,2014-08-10]**:0)由于对 % 进行 URL 转义使得部分图片的 URL 生成错误,导致图片下载失败,本版本已修正;1)剔除长年显示异常的图床网站 iceimg.com;2)引入均速过低视为下载失败的机制,持续(8s)低速(4KB/s)终止当次下载,重新向服务端发起新请求,开启新一次的下载,以缩短下载错误 URL 图片等待时长;3)修正 aicheng 帖子列表页面中帖子名解析错误的问题;4)取消单个代理服务器并行下载上限数 8 的限制。 45 | **[v0.2.03,修正,2014-08-08]**:0)修正部分图片缺失扩展名的问题;1)默认下载帖子数量从 128 调整为 64;2)更换新的 GGC IP 进代理工具 goagent 的 proxy.ini 中以提升代理速度。 46 | **[v0.2.02,优化,2014-08-06]**:程序无任何功能变更,仅是优化代码,合并部分通用代码至公共库、增加用于验证代理出口 IP 和伪装浏览器的 user-agent 的接口。 47 | **[v0.2.01,修正,2014-07-28]**:修正临时文件未删除的错误。 48 | **[v0.2.00,新增,2014-07-23]**:应 @sigmadog 需求,增加抓取 caoliu 上自拍套图(江湖人称“達蓋爾的旗幟”)的功能。 49 | **[v0.1.00,修正,2014-07-21]**:caoliu 论坛增加了反机器人机制,若翻页过快则视为机器人行为,下载页面为空白页。此版本可应对它的反机器人机制。 50 | 51 | 52 | ##演示 53 | ---------------- 54 | *hardseed* 55 | ![hardseed gif demo](https://raw.githubusercontent.com/yangyangwithgnu/hardseed/master/pic/hardseed.gif) 56 | *running* 57 | ![hardseed gif demo](https://raw.githubusercontent.com/yangyangwithgnu/hardseed/master/pic/running.gif) 58 | *more seeds and pictures* 59 | ![hardseed gif demo](https://raw.githubusercontent.com/yangyangwithgnu/hardseed/master/pic/seeds_and_pics.gif) 60 | 61 | http://v.youku.com/v_show/id_XNzQxOTk0NTE2.html 62 | 63 | 64 | ##man 65 | ---------------- 66 | 67 | **hardseed** is a batch seeds and pictures download utiltiy from CaoLiu and AiCheng forum. It's easy and simple to use. Usually, you could issue it as follow: 68 | ``` 69 | $ hardseed 70 | ``` 71 | or 72 | ``` 73 | $ hardseed --saveas-path ~/downloads --topics-range 8 64 --av-class aicheng_west --timeout-download-picture 32 --hate X-Art --proxy http://127.0.0.1:8087 74 | ``` 75 | 76 | --help 77 | Show this help infomation what you are seeing. 78 | 79 | --version 80 | Show current version. 81 | 82 | --av-class 83 | There are 13 av classes: 84 | - caoliu_west_reposted 85 | - caoliu_cartoon_reposted 86 | - caoliu_asia_mosaicked_reposted 87 | - caoliu_asia_non_mosaicked_reposted 88 | - caoliu_west_original 89 | - caoliu_cartoon_original 90 | - caoliu_asia_mosaicked_original 91 | - caoliu_asia_non_mosaicked_original 92 | - caoliu_selfie 93 | - aicheng_west 94 | - aicheng_cartoon 95 | - aicheng_asia_mosaicked 96 | - aicheng_asia_non_mosaicked 97 | 98 | As the name implies, "caoliu" stands for CaoLiu forum, "aicheng" for AiCheng forum, "reposted" and "original" are clearity, you konw which one is your best lover (yes, only one). 99 | The default is aicheng_asia_mosaicked. 100 | 101 | --concurrent-tasks 102 | You can set more than one proxy, each proxy could more than one concurrent tasks. This option set the number of concurrent tasks of each proxy. 103 | The max and default number is 8. 104 | 105 | --timeout-download-picture 106 | Some pictures too big to download in few seconds. So, you should set the download picture timeout seconds. 107 | The default timeout is 16 seconds. 108 | 109 | --topics-range 110 | Set the range of to download topics. E.G.: 111 | - topics-range 2 16 112 | - topics-range 8 (I.E., --topics-range 1 8) 113 | - topics-range -1 (I.E., all topics of this av class) 114 | 115 | The default topics range is 64. 116 | 117 | --saveas-path 118 | Set the path to save seeds and pictures. The rule of dir: [avclass][range]@hhmmss. E.G., [aicheng_west][2~32]@124908/. 119 | The default directory is home directory (or windows is C:\\). 120 | 121 | --hate 122 | If you hate some subject topics, you can ignore them by setting this option with keywords in topic title, split by space-char ' ', and case sensitive. E.G., --hate 孕妇 重口味. When --hate keywords list conflict with --like, --hate first. 123 | 124 | --like 125 | If you like some subject topics, you can grab them by setting this option with keywords in topic title, split by space-char ' ', and case sensitive. E.G., --like 苍井空 小泽玛利亚. When --like keywords list conflict with --hate, --hate first. 126 | 127 | --proxy 128 | As you know, the government likes blocking adult websites, so, I do suggest you to set --proxy option. Hardseed supports more proxys: 129 | - GoAgent (STRONGLY recommended), --proxy http://127.0.0.1:8087 130 | - shadowsocks, --proxy socks5://127.0.0.1:1080, or socks5h://127.0.0.1:1080 131 | - SSH, --proxy socks4://127.0.0.1:7070 132 | - VPN (PPTP and openVPN), --proxy "" 133 | 134 | It is important that you should know, you can set more proxys at the same time, split by space-char ' '. As the --concurrent-tasks option says, each proxy could more than one concurrent tasks, now, what about more proxys? Yes, yes, the speed of downloading seed and pictures is very very fast. E.G., --concurrent-tasks 8 --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 socks4://127.0.0.1:7070, the number of concurrent tasks is 8\*3. 135 | If you wanna how to install and configure various kinds of proxy, please access my homepage "3.2 搭梯翻墙" https://github.com/yangyangwithgnu/the_new_world_linux#3.2 136 | If you are not good at computer, there is a newest goagent for floks who are not good at computer by me, yes, out of box. see https://github.com/yangyangwithgnu/goagent_out_of_box_yang 137 | 138 | The default http://127.0.0.1:8087. 139 | 140 | That's all. Any suggestions let me know by yangyangwithgnu@yeah.net or http://yangyangwithgnu.github.io/, big thanks to you. Kiddo, take care of your body. :-) 141 | 142 | 143 | ##中文 144 | -------- 145 | 146 | hardseed 希望带给你(硬盘)女神!女神的种子和图片。 147 | 148 | ###【翻墙】 149 | 你知道,这一切的一切都在墙外,所以你得具备翻墙环境,hardseed 才能帮你拉女神。hardseed 支持 goagent、shadowsocks、SSH、VPN (PPTP 和 openVPN)等各类代理模式,甚至你可以同时使用多种代理以极速下载。从普及度、稳定性、高效性来看,goagent 最优。“我一小白,平时工作压力本来就大,就想看看女神轻松下,你还让我折腾代理!没人性!”,嘚,亲,咱是做服务的。我帮你配置了一份开箱即用的 goagent,位于 https://github.com/yangyangwithgnu/goagent_out_of_box_yang ,下载后,linux 用户,命令行中运行 150 | ``` 151 | $ python proxy.py 152 | ``` 153 | windows 亲,双击运行 goagent.exe (**管理员权限**)。 154 | 155 | ###【下载】 156 | 157 | ####『windows』 158 | 亲,往右上看,找到“download ZIP”,点击下载。 159 | 160 | ####『linux』 161 | ``` 162 | $ git clone https://github.com/yangyangwithgnu/hardseed.git 163 | ``` 164 | 165 | ###【源码安装】 166 | 167 | ####『windows』 168 | 这基本没 windows 用户什么事儿,除非你有 cygwin,否则你没法编译源码,没事,帮你弄好了,我的定位是牙医界的服务人员,服务很重要,二进制执行程序位于 hardseed\bin\build_4_win.7z。 169 | 170 | ####『linux』 171 | 0)唯一依赖 libcurl,请自行安装; 172 | 1)代码采用 C++11 编写,gcc 版本不低于 4.7.1。 173 | 2)命令行下运行: 174 | ``` 175 | $ cd hardseed/build/ 176 | $ cmake . 177 | $ make && make install 178 | ``` 179 | 180 | ####『osX』 181 | 首先,将 build/CMakeLists.txt 中的 182 | ``` 183 | TARGET_LINK_LIBRARIES(hardseed curl pthread) 184 | ``` 185 | 替换成 186 | ``` 187 | TARGET_LINK_LIBRARIES(hardseed curl pthread iconv) 188 | ``` 189 | 190 | 然后,将 build/CMakeLists.txt 中 191 | ``` 192 | ## osX 193 | ##>>>>>>>>>>>>>>>>>>>>>> 194 | 195 | #SET(CMAKE_CXX_COMPILER "g++") 196 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") 197 | #SET(CMAKE_BUILD_TYPE release) 198 | #ADD_EXECUTABLE(hardseed ${SRC_LIST}) 199 | #TARGET_LINK_LIBRARIES(hardseed curl pthread iconv) 200 | #INSTALL(PROGRAMS hardseed DESTINATION /usr/bin/) 201 | ``` 202 | 第一列的 # 删除; 203 | 204 | 接着,将 build/CMakeLists.txt 中 205 | ``` 206 | # release 207 | SET(CMAKE_CXX_COMPILER "g++") 208 | SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") 209 | SET(CMAKE_BUILD_TYPE release) 210 | ADD_EXECUTABLE(hardseed ${SRC_LIST}) 211 | TARGET_LINK_LIBRARIES(hardseed curl pthread) 212 | INSTALL(PROGRAMS hardseed DESTINATION /usr/bin/) 213 | ``` 214 | 删掉; 215 | 216 | 最后,剩下步骤同 linux 构建方法。 217 | 218 | 219 | 220 | ###【使用】 221 | **亲,听好了,运行 hardseed 前务必确保代理程序已正常运行,否则,别说女神,蚊子都碰不到。** 222 | 223 | ####『windows』 224 | 先进入 hardseed\bin\,解压 build_4_win.7z,选中 hardseed.exe,右键设置**以管理员权限运行该程序**,接着键入 alt-d 将光标定位到文件管理器的地址栏中,键入 CMD 启动命令行窗口,在 CMD 中键入 225 | ``` 226 | X:\hardseed\bin\windows> hardseed.exe 227 | ``` 228 | 这时,hardseed 开始玩命儿地为你下载女神图片和种子,经过 2 分 8 秒,找到类似 C:\\[aicheng_asia_mosaicked][1~128]@20140822\ 的目录,女神们那儿等你! 229 | 230 | ####『linux』 231 | 同 windows 下运行一样,全用默认命令行参数运行 232 | ``` 233 | $ hardseed 234 | ``` 235 | 执行完成后,你会看到 ~/[aicheng_asia_mosaicked][1~128]@014822/,你要的都在那儿。或者,玩点高级的 236 | ``` 237 | $ hardseed --saveas-path ~/downloads --topics-range 256 --av-class aicheng_west 238 | ``` 239 | 其中,--saveas-path 指定存放路径为 ~/downloads/;--topics-range 指定解析的帖子范围从第 1 张到第 256 张帖子;--av-class 指定女神类型为欧美。完整命令行选项请 --hlep 查看。 240 | 241 | ###【FQA】 242 | 243 | **Q1**:为何 windows 版的可执行文件目录 build_4_win\ 下有一堆 cyg\*.dll 文件? 244 | **A1**:hardseed 是用 C++ 编写的遵循 SUS(单一 unix 规范)的原生 linux 程序,理论上,在任何 unix-like(linux、BSD、osX) 系统上均可正常源码编译,唯独不支持 windows,为让 hardseed 具备跨平台能力,须借由某种工具(或环境)将 hardseed 转换成 windows 下的执行程序。cygwin 就是这种环境,我把 hardseed 源码纳入 cygwin 环境中重新编译,即可生成 windows 下的可执行程序 hardseed.exe,在这个过程中,cygwin 会加入些自己的代码和中转库到 hardseed.exe 中,cyg\*.dll 就是各类中转库。 245 | 246 | **Q2**:为何运行 windows 版的执行程序总有如下警告 247 | ``` 248 | Preferred POSIX equivalent is: /cygdrive/c/xxxx, CYGWIN environment variable option "nodosfilewarning" turns off this warning. Consult the user's guide for more details about POSIX paths ... 249 | ``` 250 | 影响正常运行么? 251 | **A2**:linux 与 windows 有很多基础设施的差异,路径表示方式就算其一,如,前者是 /this/is/linux/path/,后者 C:\this\is\windows\path\,A1 中提过 hardseed 是 linux 下的原生程序,代码中全采用的 linux 路径规则,运行 hardseed.exe 时, cygwin 自动进行路径规则转换,所以出现本问题中的警告信息以告知用户路径可能有变化。这完全不影响 hardseed.exe 正常运行。如果厌恶这些提示,可以在环境变量中增加 CYGWIN=nodosfilewarning (win7 用户:computer - properties - advanced system settings - advanced - environment variables - new,variable name 填入 CYGWIN,variable value 中填入 nodosfilewarning,保存即可)。 252 | 253 | **Q3**:运行 hardseed 后啥都没下载呢?还提示 There is no topic which you like? 254 | **A3**:有几种可能: 255 | * 未成功翻墙。请自行参阅你的翻墙工具帮助文档,修正即可。windows 用户注意检查是否以**管理员权限运行翻墙工具**; 256 | * 网页翻墙已成功但仍无法下载。请检查你的代理工具是否成功接收 hardseed 的代理请求(如,goagent 窗口中可查看),windows 用户注意检查是否以**管理员权限运行 hardseed.exe**; 257 | * hardseed 翻墙已成功但仍无法下载。你指定了 --like xxxx 命令行选项,hardseed 将查找标题中是否含有关键字 xxxx,若没有则忽略相关帖子。更换其他关键字。 258 | 259 | **Q4**:我已经在墙外,为何仍下载失败? 260 | **A4**:hardseed 默认采用 goagent 作为代理工具,即,默认本地代理中转地址为 http://127.0.0.1:8087 。如果你已在墙外无须代理即可访问 caoliu 和 aicheng 论坛,那么需要告知 hardseed 不再走本地代理中转而应直接访问,即: 261 | ``` 262 | --proxy "" 263 | ``` 264 | 265 | **Q5**:如何加快下载速度? 266 | **A5**:最直接会想到多线程下载,一条线程负责下载一个页面,逻辑上,线程数越多、下载速度越快,实际上,存在代理服务器和被访服务器两方面的限制: 267 | * 代理服务器方面的限制,代理服务器为不同用户提供代理服务,为避免相互影响,通常它会限制单个用户的流量和请求频率,所以,hardseed 在指定代理服务器上的线程数一定是有个上限; 268 | * 被访服务器方面的限制,你访问的论坛不会低能到不控制请求频率,举个例,正常情况你 4 秒钟可以打开 4 张 caoliu 论坛的帖子,一旦 caoliu 服务器发现你 1 秒钟打开了 32 张帖子那一定将此视为机器人行为,从而拒绝响应。 269 | 270 | 正由于存在代理服务器和被访服务器两方面的限制,线程数不能无限大,从我多次测试的经验来看,**单个代理服务器**访问被访服务器的并行线程数设定为 8 条最为稳定,否则容易引起代理服务器和被访服务器停服。同个时刻有大量用户在访问 caoliu 论坛,肯定远超 1 秒钟打开了 32 张帖子的频率,为何 caoliu 没对所有用户拒绝请求?显然,这些请求来自不同 IP 的电脑终端,按这个思路,如果 hardseed 若能通过多个不同 IP 访问 caoliu,对于代理服务器和被访服务器来说请求数量都变少了,那完全可以绕开 caoliu 对单个 IP 请求频率过快的限制。由于我们采用代理访问,发起访问请求的 IP 就是代理服务器的 IP,显然,只要 hardseed 支持同时使用多个代理服务器,那么一切问题就简单了。所以,我**赋予了 hardseed 多路代理的能力**。hardseed 支持 4 种代理模式: 271 | * goagent (STRONGLY recommended), --proxy http://127.0.0.1:8087 272 | * shadowsocks, --proxy socks5://127.0.0.1:1080, or socks5h://127.0.0.1:1080 273 | * SSH, --proxy socks4://127.0.0.1:7070 274 | * VPN (PPTP and openVPN), --proxy "" 275 | 276 | 其中,除 VPN 外(这是种全局代理模式),其他三种代理模式可混用,也就是说,你可以同时指定 goagent、shadowsocks、SSH 等三种代理模式 277 | ``` 278 | --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 socks4://127.0.0.1:7070 279 | ``` 280 | 这样,hardseed 就能用 8 * 3 条线程并行下载。另外,goagent 都是通过 GAE 集群发起到网络请求,所以不存在同个机器上配置多个 goagent 的做法;SSH(获取免费帐号 http://www.fastssh.com/ ) 和 shadowsocks(获取免费帐号 https://shadowsocks.net/get ) 代理,你可以获取多个不同的代理服务器(不同的 SSH 或者 shadowsocks 代理的本地端口必须自行设置成不同的),因此可以实现多个不同 IP 发起网络请求。换言之,你可以同时拥有 1 个 goagent、n 个 SSH、m 个 shadowsocks 个代理出口 IP,每个 IP 本来允许使用 8 条线程,那么共计就有 (1 + n + m) * 8 条线程并行下载,速度自然上去了。 281 | 我个人偏爱 shadowsocks,以此举例来说:先在 https://shadowsocks.net/get 获取了 4 个 shadowsocks 帐号,本地端口分别配置成 1080、1081、1082、1083,运行此 4 个 shadowsocks 代理程序;同时,运行 goagent 代理程序;然后,在 hardseed 的命令行参数设定 282 | ``` 283 | --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 socks5://127.0.0.1:1081 socks5://127.0.0.1:1082 socks5://127.0.0.1:1083 284 | ``` 285 | 这时,如果你的 --concurrent-tasks 设定为 8(默认值),那么,hardseed 将启用 (4 + 1) * 8 条线程并行下载。那速度,飞快、快 ... *(注,有些 shadowsocks 代理服务器禁止下载,若有异常,将其从 --proxy 代理列表中剔除之。若求稳定,只用 goagent)* 286 | 287 | **Q6**:如何搜索喜欢的视频? 288 | **A6**:--like 选项可以指定多个关键字(空格隔开)参数,帖子标题中出现相关关键字之一便纳入下载范围,否则不下载。通常来说,帖子标题中文字有简体、繁体、日文等三种可能,所以你应该都指定,比如,喜欢“护士”和“情侣”系列,先简译繁 http://www.aies.cn/ ,简译日 http://fanyi.baidu.com/#zh/jp/ ,再由 --topics-range 指定搜索的帖子数量,由 --like 指定搜索关键字: 289 | ``` 290 | --topics-range 1024 --like 护士 護士 看護婦 情侣 情侶 カップル 291 | ``` 292 | 293 | **Q7**:如何下载高清? 294 | **A7**:hardseed 并不直接支持高清类型下载,只能间接实现,由 --topics-range 指定搜索的帖子数量,由 --like 指定“高清”相关关键字进行下载,比如: 295 | ``` 296 | --topics-range 1024 --like 1080P 720P HD 高清 ハイビジョン 297 | ``` 298 | 299 | **Q8**:为何有些种子和图片名是无意义字符,类似 (rename)bltouujdrbwcrrcg.torrent? 300 | **A8**:OS 对文件名长度是有限制的,hardseed 是以帖子名作为种子和图片的文件名,一旦帖子名超长将导致文件名超长。由于 hardseed 是采用 ASCII 而非 UNICODE 作为字符存储方式,一个文字可能占一个字节(如,字母“a”)也可能占两个字节(如,汉字“好”),假如文件名最后一个文字是“好”,且刚好文件名超长了一个字节,如果 hardseed 简单地截断“好”的第二个字节,那将导致整个文件名变成乱码。所以,hardseed 用了另外种变通方式,取 16 个 a-z 间的随机字母以及前缀“(rename)”作为文件基础名。 301 | 302 | **Q9**:为何相同的图片要下载两次? 303 | **A9**:有些发帖者担心单一图床挂掉,一般将同个图片上传到两个不同图床上,在帖子中同时发布两个图床的不同地址,hardseed 无法判断图片是否相同(其实非要弄也是可以实现的,只请求 HTTP 头,判断下两个图片的大小及最后更新时间,我觉得没这个必要),所以都下载。 304 | 305 | **Q10**:为何常有类似下面的图片下载报错 306 | ``` 307 | failure (download error from http://cl.man.lv/htm_data/2/1407/1174338.html. pictures error: http://p1.imageab.com/2014/07/24/902135bff7a83cd71836764b795c0879.jpg, http://p1.imageab.com/2014/07/24/6cea50f80bba80536ba6cd9da7ba17df.jpg ) 308 | ``` 309 | **A10**:几张图片下载失败无伤大雅。具体原因很多,常见如下: 310 | * 图床挂了,hardseed 无能为力; 311 | * 发帖者发布的图片 URL 有误,hardseed 无能为力; 312 | * 图片太大、网速太慢,hardseed 在 --timeout-download-picture 指定时间内(默认 16 秒)未下载完整,这时,你可以将 --timeout-download-picture 指定为更大的下载等待时长(如,64),但这会增加整个下载时长; 313 | * 代理服务器限制下载,禁用其他代理只用 goagent。 314 | 315 | **Q11**:我没指定任何忽略关键字,为什么 hardseed 强制取消下载“连发, 連发, 连發, 連發, 连弹, ★㊣, 合辑, 合集, 合輯, nike, 最新の美女骑兵㊣, 精選, 精选”这类合集帖子? 316 | **A11**:两方面原因。一方面,合集均是把以往的单个帖子合并一起再发布,完全重复;一方面,虽然帖子中有多部不同片子的图片,但实际上帖子中的种子只是其中一部片子的,没有意义。 317 | 318 | **Q12**:很多片子迅雷报违规资源,下载速度奇慢,如何破? 319 | **A12**:**第一**,尽可能下新片,道理很简单,越新的片子被举报违规的可能性越小,具体而言,你应该用 hardseed 抓取最新帖子的种子,并且尽可能及时下载;**第二**,借助第三方工具一定程度绕开迅雷对违规资源的限制,ThunderSuperSpeedHacker(《论逆向工程的重要性》,唉,当年多么痴迷 (°Д°)),前提你必须是迅雷会员,否则任何方法均无效。迅雷通过离线空间和高速通道两种途径为会员提速,一旦发现违规资源则关闭离线空间和高速通道两个途径,离线空间是否开启是在服务端控制,客户端的任何外力作用均无效,但是,高速通道是否开启则是在客户端控制,这就为第三方工具强制开启高速通道提供了环境,ThunderSuperSpeedHacker 可以做到。用法很简单,先退出迅雷相关进程(thunder.exe、thunderplatform.exe),再运行 ThunderSuperSpeedHacker 点击“破解”即可。那么,有了 ThunderSuperSpeedHacker 是否一定就能享受高速通道了么?不一定,ThunderSuperSpeedHacker 对迅雷版本敏感。对于迅雷 v7.9.37.4952 及后续版本,一旦 ThunderSuperSpeedHacker 介入将导致迅雷僵死。解决办法: 320 | 0)首先,下载老版本迅雷。有很多网站提供迅雷历史版本下载,不过,安全原则之一,尽可能从官网下载,所以,我只信任迅雷官网上的历史版本。在迅雷首页(http://www.kankan.com/ )右上角有最新版迅雷下载地址,也就是说,要找到迅雷历史版本下载地址,只要找到迅雷官网首页某个历史快照即可,用时光机器(http://web.archive.org/web/ 墙外)很容易做到,比如,4 月 1 号的首页快照(http://web.archive.org/web/20150401032902/http://www.kankan.com/ )对应版本 v7.9.34.4908,下载地址为 http://down.sandai.net/thunder7/Thunder_kk_7.9.34.4908Preview.exe ; 321 | 1)接着,防止自动升级。一旦运行迅雷,它将在后台自动强制升级至最新版,所以,你得暴力阻止其升级,删除升级相关程序(xlliveud.exe、liveudinstaller.exe、thunderliveupdate.xar)即可; 322 | 2)最后,使用 ThunderSuperSpeedHacker 破解违规资源高速通道限制即可。 323 | 324 | **Q13**:hardseed 在 windows 环境下载的文件部分无法删除? 325 | **A13**:hardseed 正在写文件时被 ctrl-c 强制退出,文件锁未被 cyg\*.dll 释放,而 cyg\*.dll 已加载至 CMD 进程空间,所以,请先关闭所有 CMD 窗口,尝试删除相关文件,若不行,请再开新 CMD 窗口后执行 326 | ``` 327 | X:\> rd /S C:\[aicheng_west][1~128]@010825\ 328 | ``` 329 | 330 | **Q14**:为何出现类似如下报错? 331 | ``` 332 | "" - failure (download error from http://cl.man.lv/htm_data/4/1408/1189943.html. seed error: ) 333 | ``` 334 | **A14**:代理工具的问题。你知道,hardseed 默认采用使用 goagent 作为代理工具,一方面它算是目前使用门槛最低的代理工具,但同时,另一方面它也存在并发请求数过低的限制,一旦并发数过高,goagent 代理返回的都是空白文件,这直接导致 hardseed 抛出如上错误信息。所以,我给你两方面的建议: 335 | * 弃用 goagent,换用 shadowsocks。shadowsocks 轻量代理,速度非常优雅,我曾对它有过简单介绍,https://github.com/yangyangwithgnu/the_new_world_linux#3.2.4 。考虑到 goagent 的并发限制,--concurrent-tasks 默认设置为 8,现在改用 shadowsocks,你完全可以将 --concurrent-tasks 设置成 32 或者更大的数字,你会发现,下载 128 张帖子也就半分钟的事儿; 336 | * 如果你仍坚持使用 goagent,请 --concurrent-tasks 减小至 4 或者更小的数字。 337 | 338 | 339 | ##忠告 340 | ------------- 341 | 342 | 你,党之栋梁、国之人才,注意身体,千万! 343 | 344 | -------------------------------------------------------------------------------- /bin/build_4_win.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/bin/build_4_win.7z -------------------------------------------------------------------------------- /build/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(main) 2 | 3 | 4 | SET(SRC_LIST ../src/main.cpp 5 | ../src/lib/self/TopicsListWebpage.cpp ../src/lib/self/AichengTopicsListWebpage.cpp ../src/lib/self/CaoliuTopicsListWebpage.cpp 6 | ../src/lib/self/TopicWebpage.cpp ../src/lib/self/AichengTopicWebpage.cpp ../src/lib/self/CaoliuTopicWebpage.cpp 7 | ../src/lib/self/SeedWebpage.cpp ../src/lib/self/JandownSeedWebpage.cpp ../src/lib/self/RmdownSeedWebpage.cpp 8 | ../src/lib/self/Aicheng.cpp ../src/lib/self/Caoliu.cpp 9 | ../src/lib/helper/Webpage.cpp 10 | ../src/lib/helper/Time.cpp ../src/lib/helper/CmdlineOption.cpp ../src/lib/helper/Misc.cpp 11 | ../src/lib/3rd/json11/json11.cpp) 12 | 13 | # linux 14 | #>>>>>>>>>>>>>>>>>>>>>> 15 | 16 | ## debug 17 | #SET(CMAKE_CXX_COMPILER "clang++") 18 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -Werror -Weverything -Wno-documentation -Wno-disabled-macro-expansion -Wno-float-equal -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-global-constructors -Wno-exit-time-destructors -Wno-missing-prototypes -Wno-padded -Wno-old-style-cast -Wno-weak-vtables") 19 | #SET(CMAKE_BUILD_TYPE debug) 20 | #ADD_EXECUTABLE(main ${SRC_LIST}) 21 | #TARGET_LINK_LIBRARIES(main curl pthread) 22 | 23 | # release 24 | SET(CMAKE_CXX_COMPILER "g++") 25 | SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") 26 | SET(CMAKE_BUILD_TYPE release) 27 | ADD_EXECUTABLE(hardseed ${SRC_LIST}) 28 | TARGET_LINK_LIBRARIES(hardseed curl pthread) 29 | INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin) 30 | 31 | #<<<<<<<<<<<<<<<<<<<<<< 32 | 33 | 34 | ## cygwin 35 | ##>>>>>>>>>>>>>>>>>>>>>> 36 | 37 | #SET(CMAKE_CXX_COMPILER "g++") 38 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -s -DCYGWIN") 39 | #SET(CMAKE_BUILD_TYPE release) 40 | #ADD_EXECUTABLE(hardseed ${SRC_LIST}) 41 | #target_link_libraries(hardseed /bin/cygcurl-4.dll) 42 | #target_link_libraries(hardseed /lib/libiconv.a) 43 | 44 | ##<<<<<<<<<<<<<<<<<<<<<< 45 | 46 | 47 | ## osX 48 | ##>>>>>>>>>>>>>>>>>>>>>> 49 | 50 | #SET(CMAKE_CXX_COMPILER "g++") 51 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") 52 | #SET(CMAKE_BUILD_TYPE release) 53 | #ADD_EXECUTABLE(hardseed ${SRC_LIST}) 54 | #TARGET_LINK_LIBRARIES(hardseed curl pthread iconv) 55 | #INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin) 56 | 57 | ##<<<<<<<<<<<<<<<<<<<<<< 58 | -------------------------------------------------------------------------------- /config/portals_list.json: -------------------------------------------------------------------------------- 1 | { 2 | "caoliu":"http://cl.bearhk.info/", 3 | "aicheng":"http://www.ac168.info/bt/" 4 | } 5 | -------------------------------------------------------------------------------- /pic/hardseed.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/pic/hardseed.gif -------------------------------------------------------------------------------- /pic/running.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/pic/running.gif -------------------------------------------------------------------------------- /pic/seeds_and_pics.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/pic/seeds_and_pics.gif -------------------------------------------------------------------------------- /src/.ycm_extra_conf.py: -------------------------------------------------------------------------------- 1 | # This file is NOT licensed under the GPLv3, which is the license for the rest 2 | # of YouCompleteMe. 3 | # 4 | # Here's the license text for this file: 5 | # 6 | # This is free and unencumbered software released into the public domain. 7 | # 8 | # Anyone is free to copy, modify, publish, use, compile, sell, or 9 | # distribute this software, either in source code form or as a compiled 10 | # binary, for any purpose, commercial or non-commercial, and by any 11 | # means. 12 | # 13 | # In jurisdictions that recognize copyright laws, the author or authors 14 | # of this software dedicate any and all copyright interest in the 15 | # software to the public domain. We make this dedication for the benefit 16 | # of the public at large and to the detriment of our heirs and 17 | # successors. We intend this dedication to be an overt act of 18 | # relinquishment in perpetuity of all present and future rights to this 19 | # software under copyright law. 20 | # 21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 22 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 23 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 24 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 25 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 26 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27 | # OTHER DEALINGS IN THE SOFTWARE. 28 | # 29 | # For more information, please refer to 30 | 31 | import os 32 | import ycm_core 33 | 34 | # These are the compilation flags that will be used in case there's no 35 | # compilation database set (by default, one is not set). 36 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. 37 | flags = [ 38 | '-std=c++11', 39 | '-Werror', 40 | '-Weverything', 41 | '-Wno-documentation', 42 | '-Wno-deprecated-declarations', 43 | '-Wno-disabled-macro-expansion', 44 | '-Wno-float-equal', 45 | '-Wno-c++98-compat', 46 | '-Wno-c++98-compat-pedantic', 47 | '-Wno-global-constructors', 48 | '-Wno-exit-time-destructors', 49 | '-Wno-missing-prototypes', 50 | '-Wno-padded', 51 | '-Wno-old-style-cast', 52 | '-x', 53 | 'c++', 54 | '-I', 55 | '.', 56 | 'isystem', 57 | '/usr/include/', 58 | ] 59 | 60 | 61 | # Set this to the absolute path to the folder (NOT the file!) containing the 62 | # compile_commands.json file to use that instead of 'flags'. See here for 63 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html 64 | # 65 | # Most projects will NOT need to set this to anything; you can just change the 66 | # 'flags' list of compilation flags. Notice that YCM itself uses that approach. 67 | compilation_database_folder = '' 68 | 69 | if compilation_database_folder: 70 | database = ycm_core.CompilationDatabase( compilation_database_folder ) 71 | else: 72 | database = None 73 | 74 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ] 75 | 76 | def DirectoryOfThisScript(): 77 | return os.path.dirname( os.path.abspath( __file__ ) ) 78 | 79 | 80 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): 81 | if not working_directory: 82 | return list( flags ) 83 | new_flags = [] 84 | make_next_absolute = False 85 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] 86 | for flag in flags: 87 | new_flag = flag 88 | 89 | if make_next_absolute: 90 | make_next_absolute = False 91 | if not flag.startswith( '/' ): 92 | new_flag = os.path.join( working_directory, flag ) 93 | 94 | for path_flag in path_flags: 95 | if flag == path_flag: 96 | make_next_absolute = True 97 | break 98 | 99 | if flag.startswith( path_flag ): 100 | path = flag[ len( path_flag ): ] 101 | new_flag = path_flag + os.path.join( working_directory, path ) 102 | break 103 | 104 | if new_flag: 105 | new_flags.append( new_flag ) 106 | return new_flags 107 | 108 | 109 | def IsHeaderFile( filename ): 110 | extension = os.path.splitext( filename )[ 1 ] 111 | return extension in [ '.h', '.hxx', '.hpp', '.hh' ] 112 | 113 | 114 | def GetCompilationInfoForFile( filename ): 115 | # The compilation_commands.json file generated by CMake does not have entries 116 | # for header files. So we do our best by asking the db for flags for a 117 | # corresponding source file, if any. If one exists, the flags for that file 118 | # should be good enough. 119 | if IsHeaderFile( filename ): 120 | basename = os.path.splitext( filename )[ 0 ] 121 | for extension in SOURCE_EXTENSIONS: 122 | replacement_file = basename + extension 123 | if os.path.exists( replacement_file ): 124 | compilation_info = database.GetCompilationInfoForFile( 125 | replacement_file ) 126 | if compilation_info.compiler_flags_: 127 | return compilation_info 128 | return None 129 | return database.GetCompilationInfoForFile( filename ) 130 | 131 | 132 | def FlagsForFile( filename, **kwargs ): 133 | if database: 134 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a 135 | # python list, but a "list-like" StringVec object 136 | compilation_info = GetCompilationInfoForFile( filename ) 137 | if not compilation_info: 138 | return None 139 | 140 | final_flags = MakeRelativePathsInFlagsAbsolute( 141 | compilation_info.compiler_flags_, 142 | compilation_info.compiler_working_dir_ ) 143 | else: 144 | relative_to = DirectoryOfThisScript() 145 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) 146 | 147 | return { 148 | 'flags': final_flags, 149 | 'do_cache': True 150 | } 151 | -------------------------------------------------------------------------------- /src/.ycm_extra_conf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyangwithgnu/hardseed/a1cf1be1d71fac52318e7c3cd396f95739a17920/src/.ycm_extra_conf.pyc -------------------------------------------------------------------------------- /src/lib/3rd/json11/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Dropbox, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/lib/3rd/json11/README.md: -------------------------------------------------------------------------------- 1 | json11 2 | ------ 3 | 4 | json11 is a tiny JSON library for C++11, providing JSON parsing and serialization. 5 | 6 | The core object provided by the library is json11::Json. A Json object represents any JSON 7 | value: null, bool, number (int or double), string (std::string), array (std::vector), or 8 | object (std::map). 9 | 10 | Json objects act like values. They can be assigned, copied, moved, compared for equality or 11 | order, and so on. There are also helper methods Json::dump, to serialize a Json to a string, and 12 | Json::parse (static) to parse a std::string as a Json object. 13 | 14 | It's easy to make a JSON object with C++11's new initializer syntax: 15 | 16 | Json my_json = Json::object { 17 | { "key1", "value1" }, 18 | { "key2", false }, 19 | { "key3", Json::array { 1, 2, 3 } }, 20 | }; 21 | std::string json_str = my_json.dump(); 22 | 23 | There are also implicit constructors that allow standard and user-defined types to be 24 | automatically converted to JSON. For example: 25 | 26 | class Point { 27 | public: 28 | int x; 29 | int y; 30 | Point (int x, int y) : x(x), y(y) {} 31 | Json to_json() const { return Json::array { x, y }; } 32 | }; 33 | 34 | std::vector points = { { 1, 2 }, { 10, 20 }, { 100, 200 } }; 35 | std::string points_json = Json(points).dump(); 36 | 37 | JSON values can have their values queried and inspected: 38 | 39 | Json json = Json::array { Json::object { { "k", "v" } } }; 40 | std::string str = json[0]["k"].string_value(); 41 | 42 | More documentation is still to come. For now, see json11.hpp. 43 | -------------------------------------------------------------------------------- /src/lib/3rd/json11/json11.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2013 Dropbox, Inc. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to deal 5 | * in the Software without restriction, including without limitation the rights 6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | * copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | * THE SOFTWARE. 20 | */ 21 | 22 | #include "json11.hpp" 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace json11 { 29 | 30 | static const int max_depth = 200; 31 | 32 | using std::string; 33 | using std::vector; 34 | using std::map; 35 | using std::make_shared; 36 | using std::initializer_list; 37 | using std::move; 38 | 39 | /* * * * * * * * * * * * * * * * * * * * 40 | * Serialization 41 | */ 42 | 43 | static void dump(std::nullptr_t, string &out) { 44 | out += "null"; 45 | } 46 | 47 | static void dump(double value, string &out) { 48 | char buf[32]; 49 | snprintf(buf, sizeof buf, "%.17g", value); 50 | out += buf; 51 | } 52 | 53 | static void dump(int value, string &out) { 54 | char buf[32]; 55 | snprintf(buf, sizeof buf, "%d", value); 56 | out += buf; 57 | } 58 | 59 | static void dump(bool value, string &out) { 60 | out += value ? "true" : "false"; 61 | } 62 | 63 | static void dump(const string &value, string &out) { 64 | out += '"'; 65 | for (size_t i = 0; i < value.length(); i++) { 66 | const char ch = value[i]; 67 | if (ch == '\\') { 68 | out += "\\\\"; 69 | } else if (ch == '"') { 70 | out += "\\\""; 71 | } else if (ch == '\b') { 72 | out += "\\b"; 73 | } else if (ch == '\f') { 74 | out += "\\f"; 75 | } else if (ch == '\n') { 76 | out += "\\n"; 77 | } else if (ch == '\r') { 78 | out += "\\r"; 79 | } else if (ch == '\t') { 80 | out += "\\t"; 81 | } else if (static_cast(ch) <= 0x1f) { 82 | char buf[8]; 83 | snprintf(buf, sizeof buf, "\\u%04x", ch); 84 | out += buf; 85 | } else if (static_cast(ch) == 0xe2 && static_cast(value[i+1]) == 0x80 86 | && static_cast(value[i+2]) == 0xa8) { 87 | out += "\\u2028"; 88 | i += 2; 89 | } else if (static_cast(ch) == 0xe2 && static_cast(value[i+1]) == 0x80 90 | && static_cast(value[i+2]) == 0xa9) { 91 | out += "\\u2029"; 92 | i += 2; 93 | } else { 94 | out += ch; 95 | } 96 | } 97 | out += '"'; 98 | } 99 | 100 | static void dump(const Json::array &values, string &out) { 101 | bool first = true; 102 | out += "["; 103 | for (const auto &value : values) { 104 | if (!first) 105 | out += ", "; 106 | value.dump(out); 107 | first = false; 108 | } 109 | out += "]"; 110 | } 111 | 112 | static void dump(const Json::object &values, string &out) { 113 | bool first = true; 114 | out += "{"; 115 | for (const auto &kv : values) { 116 | if (!first) 117 | out += ", "; 118 | dump(kv.first, out); 119 | out += ": "; 120 | kv.second.dump(out); 121 | first = false; 122 | } 123 | out += "}"; 124 | } 125 | 126 | void Json::dump(string &out) const { 127 | m_ptr->dump(out); 128 | } 129 | 130 | /* * * * * * * * * * * * * * * * * * * * 131 | * Value wrappers 132 | */ 133 | 134 | template 135 | class Value : public JsonValue { 136 | protected: 137 | 138 | // Constructors 139 | explicit Value(const T &value) : m_value(value) {} 140 | explicit Value(T &&value) : m_value(move(value)) {} 141 | 142 | // Get type tag 143 | Json::Type type() const override { 144 | return tag; 145 | } 146 | 147 | // Comparisons 148 | bool equals(const JsonValue * other) const override { 149 | return m_value == static_cast *>(other)->m_value; 150 | } 151 | bool less(const JsonValue * other) const override { 152 | return m_value < static_cast *>(other)->m_value; 153 | } 154 | 155 | const T m_value; 156 | void dump(string &out) const override { json11::dump(m_value, out); } 157 | }; 158 | 159 | class JsonDouble final : public Value { 160 | double number_value() const override { return m_value; } 161 | int int_value() const override { return static_cast(m_value); } 162 | bool equals(const JsonValue * other) const override { return m_value == other->number_value(); } 163 | bool less(const JsonValue * other) const override { return m_value < other->number_value(); } 164 | public: 165 | explicit JsonDouble(double value) : Value(value) {} 166 | }; 167 | 168 | class JsonInt final : public Value { 169 | double number_value() const override { return m_value; } 170 | int int_value() const override { return m_value; } 171 | bool equals(const JsonValue * other) const override { return m_value == other->number_value(); } 172 | bool less(const JsonValue * other) const override { return m_value < other->number_value(); } 173 | public: 174 | explicit JsonInt(int value) : Value(value) {} 175 | }; 176 | 177 | class JsonBoolean final : public Value { 178 | bool bool_value() const override { return m_value; } 179 | public: 180 | explicit JsonBoolean(bool value) : Value(value) {} 181 | }; 182 | 183 | class JsonString final : public Value { 184 | const string &string_value() const override { return m_value; } 185 | public: 186 | explicit JsonString(const string &value) : Value(value) {} 187 | explicit JsonString(string &&value) : Value(move(value)) {} 188 | }; 189 | 190 | class JsonArray final : public Value { 191 | const Json::array &array_items() const override { return m_value; } 192 | const Json & operator[](size_t i) const override; 193 | public: 194 | explicit JsonArray(const Json::array &value) : Value(value) {} 195 | explicit JsonArray(Json::array &&value) : Value(move(value)) {} 196 | }; 197 | 198 | class JsonObject final : public Value { 199 | const Json::object &object_items() const override { return m_value; } 200 | const Json & operator[](const string &key) const override; 201 | public: 202 | explicit JsonObject(const Json::object &value) : Value(value) {} 203 | explicit JsonObject(Json::object &&value) : Value(move(value)) {} 204 | }; 205 | 206 | class JsonNull final : public Value { 207 | public: 208 | JsonNull() : Value(nullptr) {} 209 | }; 210 | 211 | /* * * * * * * * * * * * * * * * * * * * 212 | * Static globals - static-init-safe 213 | */ 214 | struct Statics { 215 | const std::shared_ptr null = make_shared(); 216 | const std::shared_ptr t = make_shared(true); 217 | const std::shared_ptr f = make_shared(false); 218 | const string empty_string; 219 | const vector empty_vector; 220 | const map empty_map; 221 | Statics() {} 222 | }; 223 | 224 | const Statics & statics() { 225 | static const Statics s {}; 226 | return s; 227 | } 228 | 229 | const Json & static_null() { 230 | // This has to be separate, not in Statics, because Json() accesses statics().null. 231 | static const Json json_null; 232 | return json_null; 233 | } 234 | 235 | /* * * * * * * * * * * * * * * * * * * * 236 | * Constructors 237 | */ 238 | 239 | Json::Json() noexcept : m_ptr(statics().null) {} 240 | Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {} 241 | Json::Json(double value) : m_ptr(make_shared(value)) {} 242 | Json::Json(int value) : m_ptr(make_shared(value)) {} 243 | Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {} 244 | Json::Json(const string &value) : m_ptr(make_shared(value)) {} 245 | Json::Json(string &&value) : m_ptr(make_shared(move(value))) {} 246 | Json::Json(const char * value) : m_ptr(make_shared(value)) {} 247 | Json::Json(const Json::array &values) : m_ptr(make_shared(values)) {} 248 | Json::Json(Json::array &&values) : m_ptr(make_shared(move(values))) {} 249 | Json::Json(const Json::object &values) : m_ptr(make_shared(values)) {} 250 | Json::Json(Json::object &&values) : m_ptr(make_shared(move(values))) {} 251 | 252 | /* * * * * * * * * * * * * * * * * * * * 253 | * Accessors 254 | */ 255 | 256 | Json::Type Json::type() const { return m_ptr->type(); } 257 | double Json::number_value() const { return m_ptr->number_value(); } 258 | int Json::int_value() const { return m_ptr->int_value(); } 259 | bool Json::bool_value() const { return m_ptr->bool_value(); } 260 | const string & Json::string_value() const { return m_ptr->string_value(); } 261 | const vector & Json::array_items() const { return m_ptr->array_items(); } 262 | const map & Json::object_items() const { return m_ptr->object_items(); } 263 | const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; } 264 | const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; } 265 | 266 | double JsonValue::number_value() const { return 0; } 267 | int JsonValue::int_value() const { return 0; } 268 | bool JsonValue::bool_value() const { return false; } 269 | const string & JsonValue::string_value() const { return statics().empty_string; } 270 | const vector & JsonValue::array_items() const { return statics().empty_vector; } 271 | const map & JsonValue::object_items() const { return statics().empty_map; } 272 | const Json & JsonValue::operator[] (size_t) const { return static_null(); } 273 | const Json & JsonValue::operator[] (const string &) const { return static_null(); } 274 | 275 | const Json & JsonObject::operator[] (const string &key) const { 276 | auto iter = m_value.find(key); 277 | return (iter == m_value.end()) ? static_null() : iter->second; 278 | } 279 | const Json & JsonArray::operator[] (size_t i) const { 280 | if (i >= m_value.size()) return static_null(); 281 | else return m_value[i]; 282 | } 283 | 284 | /* * * * * * * * * * * * * * * * * * * * 285 | * Comparison 286 | */ 287 | 288 | bool Json::operator== (const Json &other) const { 289 | if (m_ptr->type() != other.m_ptr->type()) 290 | return false; 291 | 292 | return m_ptr->equals(other.m_ptr.get()); 293 | } 294 | 295 | bool Json::operator< (const Json &other) const { 296 | if (m_ptr->type() != other.m_ptr->type()) 297 | return m_ptr->type() < other.m_ptr->type(); 298 | 299 | return m_ptr->less(other.m_ptr.get()); 300 | } 301 | 302 | /* * * * * * * * * * * * * * * * * * * * 303 | * Parsing 304 | */ 305 | 306 | /* esc(c) 307 | * 308 | * Format char c suitable for printing in an error message. 309 | */ 310 | static inline string esc(char c) { 311 | char buf[12]; 312 | if (static_cast(c) >= 0x20 && static_cast(c) <= 0x7f) { 313 | snprintf(buf, sizeof buf, "'%c' (%d)", c, c); 314 | } else { 315 | snprintf(buf, sizeof buf, "(%d)", c); 316 | } 317 | return string(buf); 318 | } 319 | 320 | static inline bool in_range(long x, long lower, long upper) { 321 | return (x >= lower && x <= upper); 322 | } 323 | 324 | /* JsonParser 325 | * 326 | * Object that tracks all state of an in-progress parse. 327 | */ 328 | struct JsonParser { 329 | 330 | /* State 331 | */ 332 | const string &str; 333 | size_t i; 334 | string &err; 335 | bool failed; 336 | 337 | /* fail(msg, err_ret = Json()) 338 | * 339 | * Mark this parse as failed. 340 | */ 341 | Json fail(string &&msg) { 342 | return fail(move(msg), Json()); 343 | } 344 | 345 | template 346 | T fail(string &&msg, const T err_ret) { 347 | if (!failed) 348 | err = std::move(msg); 349 | failed = true; 350 | return err_ret; 351 | } 352 | 353 | /* consume_whitespace() 354 | * 355 | * Advance until the current character is non-whitespace. 356 | */ 357 | void consume_whitespace() { 358 | while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t') 359 | i++; 360 | } 361 | 362 | /* get_next_token() 363 | * 364 | * Return the next non-whitespace character. If the end of the input is reached, 365 | * flag an error and return 0. 366 | */ 367 | char get_next_token() { 368 | consume_whitespace(); 369 | if (i == str.size()) 370 | return (char)(fail("unexpected end of input", 0)); 371 | 372 | return str[i++]; 373 | } 374 | 375 | /* encode_utf8(pt, out) 376 | * 377 | * Encode pt as UTF-8 and add it to out. 378 | */ 379 | void encode_utf8(long pt, string & out) { 380 | if (pt < 0) 381 | return; 382 | 383 | if (pt < 0x80) { 384 | out += static_cast(pt); 385 | } else if (pt < 0x800) { 386 | out += static_cast((pt >> 6) | 0xC0); 387 | out += static_cast((pt & 0x3F) | 0x80); 388 | } else if (pt < 0x10000) { 389 | out += static_cast((pt >> 12) | 0xE0); 390 | out += static_cast(((pt >> 6) & 0x3F) | 0x80); 391 | out += static_cast((pt & 0x3F) | 0x80); 392 | } else { 393 | out += static_cast((pt >> 18) | 0xF0); 394 | out += static_cast(((pt >> 12) & 0x3F) | 0x80); 395 | out += static_cast(((pt >> 6) & 0x3F) | 0x80); 396 | out += static_cast((pt & 0x3F) | 0x80); 397 | } 398 | } 399 | 400 | /* parse_string() 401 | * 402 | * Parse a string, starting at the current position. 403 | */ 404 | string parse_string() { 405 | string out; 406 | long last_escaped_codepoint = -1; 407 | while (true) { 408 | if (i == str.size()) 409 | return fail("unexpected end of input in string", ""); 410 | 411 | char ch = str[i++]; 412 | 413 | if (ch == '"') { 414 | encode_utf8(last_escaped_codepoint, out); 415 | return out; 416 | } 417 | 418 | if (in_range(ch, 0, 0x1f)) 419 | return fail("unescaped " + esc(ch) + " in string", ""); 420 | 421 | // The usual case: non-escaped characters 422 | if (ch != '\\') { 423 | encode_utf8(last_escaped_codepoint, out); 424 | last_escaped_codepoint = -1; 425 | out += ch; 426 | continue; 427 | } 428 | 429 | // Handle escapes 430 | if (i == str.size()) 431 | return fail("unexpected end of input in string", ""); 432 | 433 | ch = str[i++]; 434 | 435 | if (ch == 'u') { 436 | // Extract 4-byte escape sequence 437 | string esc = str.substr(i, 4); 438 | // Explicitly check length of the substring. The following loop 439 | // relies on std::string returning the terminating NUL when 440 | // accessing str[length]. Checking here reduces brittleness. 441 | if (esc.length() < 4) { 442 | return fail("bad \\u escape: " + esc, ""); 443 | } 444 | for (int j = 0; j < 4; j++) { 445 | if (!in_range(esc[(unsigned int)j], 'a', 'f') && !in_range(esc[(unsigned int)j], 'A', 'F') 446 | && !in_range(esc[(unsigned int)j], '0', '9')) 447 | return fail("bad \\u escape: " + esc, ""); 448 | } 449 | 450 | long codepoint = strtol(esc.data(), nullptr, 16); 451 | 452 | // JSON specifies that characters outside the BMP shall be encoded as a pair 453 | // of 4-hex-digit \u escapes encoding their surrogate pair components. Check 454 | // whether we're in the middle of such a beast: the previous codepoint was an 455 | // escaped lead (high) surrogate, and this is a trail (low) surrogate. 456 | if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF) 457 | && in_range(codepoint, 0xDC00, 0xDFFF)) { 458 | // Reassemble the two surrogate pairs into one astral-plane character, per 459 | // the UTF-16 algorithm. 460 | encode_utf8((((last_escaped_codepoint - 0xD800) << 10) 461 | | (codepoint - 0xDC00)) + 0x10000, out); 462 | last_escaped_codepoint = -1; 463 | } else { 464 | encode_utf8(last_escaped_codepoint, out); 465 | last_escaped_codepoint = codepoint; 466 | } 467 | 468 | i += 4; 469 | continue; 470 | } 471 | 472 | encode_utf8(last_escaped_codepoint, out); 473 | last_escaped_codepoint = -1; 474 | 475 | if (ch == 'b') { 476 | out += '\b'; 477 | } else if (ch == 'f') { 478 | out += '\f'; 479 | } else if (ch == 'n') { 480 | out += '\n'; 481 | } else if (ch == 'r') { 482 | out += '\r'; 483 | } else if (ch == 't') { 484 | out += '\t'; 485 | } else if (ch == '"' || ch == '\\' || ch == '/') { 486 | out += ch; 487 | } else { 488 | return fail("invalid escape character " + esc(ch), ""); 489 | } 490 | } 491 | } 492 | 493 | /* parse_number() 494 | * 495 | * Parse a double. 496 | */ 497 | Json parse_number() { 498 | size_t start_pos = i; 499 | 500 | if (str[i] == '-') 501 | i++; 502 | 503 | // Integer part 504 | if (str[i] == '0') { 505 | i++; 506 | if (in_range(str[i], '0', '9')) 507 | return fail("leading 0s not permitted in numbers"); 508 | } else if (in_range(str[i], '1', '9')) { 509 | i++; 510 | while (in_range(str[i], '0', '9')) 511 | i++; 512 | } else { 513 | return fail("invalid " + esc(str[i]) + " in number"); 514 | } 515 | 516 | if (str[i] != '.' && str[i] != 'e' && str[i] != 'E' 517 | && (i - start_pos) <= static_cast(std::numeric_limits::digits10)) { 518 | return std::atoi(str.c_str() + start_pos); 519 | } 520 | 521 | // Decimal part 522 | if (str[i] == '.') { 523 | i++; 524 | if (!in_range(str[i], '0', '9')) 525 | return fail("at least one digit required in fractional part"); 526 | 527 | while (in_range(str[i], '0', '9')) 528 | i++; 529 | } 530 | 531 | // Exponent part 532 | if (str[i] == 'e' || str[i] == 'E') { 533 | i++; 534 | 535 | if (str[i] == '+' || str[i] == '-') 536 | i++; 537 | 538 | if (!in_range(str[i], '0', '9')) 539 | return fail("at least one digit required in exponent"); 540 | 541 | while (in_range(str[i], '0', '9')) 542 | i++; 543 | } 544 | 545 | return std::strtod(str.c_str() + start_pos, nullptr); 546 | } 547 | 548 | /* expect(str, res) 549 | * 550 | * Expect that 'str' starts at the character that was just read. If it does, advance 551 | * the input and return res. If not, flag an error. 552 | */ 553 | Json expect(const string &expected, Json res) { 554 | assert(i != 0); 555 | i--; 556 | if (str.compare(i, expected.length(), expected) == 0) { 557 | i += expected.length(); 558 | return res; 559 | } else { 560 | return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length())); 561 | } 562 | } 563 | 564 | /* parse_json() 565 | * 566 | * Parse a JSON object. 567 | */ 568 | Json parse_json(int depth) { 569 | if (depth > max_depth) { 570 | return fail("exceeded maximum nesting depth"); 571 | } 572 | 573 | char ch = get_next_token(); 574 | if (failed) 575 | return Json(); 576 | 577 | if (ch == '-' || (ch >= '0' && ch <= '9')) { 578 | i--; 579 | return parse_number(); 580 | } 581 | 582 | if (ch == 't') 583 | return expect("true", true); 584 | 585 | if (ch == 'f') 586 | return expect("false", false); 587 | 588 | if (ch == 'n') 589 | return expect("null", Json()); 590 | 591 | if (ch == '"') 592 | return parse_string(); 593 | 594 | if (ch == '{') { 595 | map data; 596 | ch = get_next_token(); 597 | if (ch == '}') 598 | return data; 599 | 600 | while (1) { 601 | if (ch != '"') 602 | return fail("expected '\"' in object, got " + esc(ch)); 603 | 604 | string key = parse_string(); 605 | if (failed) 606 | return Json(); 607 | 608 | ch = get_next_token(); 609 | if (ch != ':') 610 | return fail("expected ':' in object, got " + esc(ch)); 611 | 612 | data[std::move(key)] = parse_json(depth + 1); 613 | if (failed) 614 | return Json(); 615 | 616 | ch = get_next_token(); 617 | if (ch == '}') 618 | break; 619 | if (ch != ',') 620 | return fail("expected ',' in object, got " + esc(ch)); 621 | 622 | ch = get_next_token(); 623 | } 624 | return data; 625 | } 626 | 627 | if (ch == '[') { 628 | vector data; 629 | ch = get_next_token(); 630 | if (ch == ']') 631 | return data; 632 | 633 | while (1) { 634 | i--; 635 | data.push_back(parse_json(depth + 1)); 636 | if (failed) 637 | return Json(); 638 | 639 | ch = get_next_token(); 640 | if (ch == ']') 641 | break; 642 | if (ch != ',') 643 | return fail("expected ',' in list, got " + esc(ch)); 644 | 645 | ch = get_next_token(); 646 | (void)ch; 647 | } 648 | return data; 649 | } 650 | 651 | return fail("expected value, got " + esc(ch)); 652 | } 653 | }; 654 | 655 | Json Json::parse(const string &in, string &err) { 656 | JsonParser parser { in, 0, err, false }; 657 | Json result = parser.parse_json(0); 658 | 659 | // Check for any trailing garbage 660 | parser.consume_whitespace(); 661 | if (parser.i != in.size()) 662 | return parser.fail("unexpected trailing " + esc(in[parser.i])); 663 | 664 | return result; 665 | } 666 | 667 | // Documented in json11.hpp 668 | vector Json::parse_multi(const string &in, string &err) { 669 | JsonParser parser { in, 0, err, false }; 670 | 671 | vector json_vec; 672 | while (parser.i != in.size() && !parser.failed) { 673 | json_vec.push_back(parser.parse_json(0)); 674 | // Check for another object 675 | parser.consume_whitespace(); 676 | } 677 | return json_vec; 678 | } 679 | 680 | /* * * * * * * * * * * * * * * * * * * * 681 | * Shape-checking 682 | */ 683 | 684 | bool Json::has_shape(const shape & types, string & err) const { 685 | if (!is_object()) { 686 | err = "expected JSON object, got " + dump(); 687 | return false; 688 | } 689 | 690 | for (auto & item : types) { 691 | if ((*this)[item.first].type() != item.second) { 692 | err = "bad type for " + item.first + " in " + dump(); 693 | return false; 694 | } 695 | } 696 | 697 | return true; 698 | } 699 | 700 | } // namespace json11 701 | -------------------------------------------------------------------------------- /src/lib/3rd/json11/json11.hpp: -------------------------------------------------------------------------------- 1 | /* json11 2 | * 3 | * json11 is a tiny JSON library for C++11, providing JSON parsing and serialization. 4 | * 5 | * The core object provided by the library is json11::Json. A Json object represents any JSON 6 | * value: null, bool, number (int or double), string (std::string), array (std::vector), or 7 | * object (std::map). 8 | * 9 | * Json objects act like values: they can be assigned, copied, moved, compared for equality or 10 | * order, etc. There are also helper methods Json::dump, to serialize a Json to a string, and 11 | * Json::parse (static) to parse a std::string as a Json object. 12 | * 13 | * Internally, the various types of Json object are represented by the JsonValue class 14 | * hierarchy. 15 | * 16 | * A note on numbers - JSON specifies the syntax of number formatting but not its semantics, 17 | * so some JSON implementations distinguish between integers and floating-point numbers, while 18 | * some don't. In json11, we choose the latter. Because some JSON implementations (namely 19 | * Javascript itself) treat all numbers as the same type, distinguishing the two leads 20 | * to JSON that will be *silently* changed by a round-trip through those implementations. 21 | * Dangerous! To avoid that risk, json11 stores all numbers as double internally, but also 22 | * provides integer helpers. 23 | * 24 | * Fortunately, double-precision IEEE754 ('double') can precisely store any integer in the 25 | * range +/-2^53, which includes every 'int' on most systems. (Timestamps often use int64 26 | * or long long to avoid the Y2038K problem; a double storing microseconds since some epoch 27 | * will be exact for +/- 275 years.) 28 | */ 29 | 30 | /* Copyright (c) 2013 Dropbox, Inc. 31 | * 32 | * Permission is hereby granted, free of charge, to any person obtaining a copy 33 | * of this software and associated documentation files (the "Software"), to deal 34 | * in the Software without restriction, including without limitation the rights 35 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 36 | * copies of the Software, and to permit persons to whom the Software is 37 | * furnished to do so, subject to the following conditions: 38 | * 39 | * The above copyright notice and this permission notice shall be included in 40 | * all copies or substantial portions of the Software. 41 | * 42 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 43 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 44 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 45 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 46 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 47 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 48 | * THE SOFTWARE. 49 | */ 50 | 51 | #pragma once 52 | 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | 59 | namespace json11 { 60 | 61 | class JsonValue; 62 | 63 | class Json final { 64 | public: 65 | // Types 66 | enum Type { 67 | NUL, NUMBER, BOOL, STRING, ARRAY, OBJECT 68 | }; 69 | 70 | // Array and object typedefs 71 | typedef std::vector array; 72 | typedef std::map object; 73 | 74 | // Constructors for the various types of JSON value. 75 | Json() noexcept; // NUL 76 | Json(std::nullptr_t) noexcept; // NUL 77 | Json(double value); // NUMBER 78 | Json(int value); // NUMBER 79 | Json(bool value); // BOOL 80 | Json(const std::string &value); // STRING 81 | Json(std::string &&value); // STRING 82 | Json(const char * value); // STRING 83 | Json(const array &values); // ARRAY 84 | Json(array &&values); // ARRAY 85 | Json(const object &values); // OBJECT 86 | Json(object &&values); // OBJECT 87 | 88 | // Implicit constructor: anything with a to_json() function. 89 | template 90 | Json(const T & t) : Json(t.to_json()) {} 91 | 92 | // Implicit constructor: map-like objects (std::map, std::unordered_map, etc) 93 | template ::value 95 | && std::is_constructible::value, 96 | int>::type = 0> 97 | Json(const M & m) : Json(object(m.begin(), m.end())) {} 98 | 99 | // Implicit constructor: vector-like objects (std::list, std::vector, std::set, etc) 100 | template ::value, 102 | int>::type = 0> 103 | Json(const V & v) : Json(array(v.begin(), v.end())) {} 104 | 105 | // This prevents Json(some_pointer) from accidentally producing a bool. Use 106 | // Json(bool(some_pointer)) if that behavior is desired. 107 | Json(void *) = delete; 108 | 109 | // Accessors 110 | Type type() const; 111 | 112 | bool is_null() const { return type() == NUL; } 113 | bool is_number() const { return type() == NUMBER; } 114 | bool is_bool() const { return type() == BOOL; } 115 | bool is_string() const { return type() == STRING; } 116 | bool is_array() const { return type() == ARRAY; } 117 | bool is_object() const { return type() == OBJECT; } 118 | 119 | // Return the enclosed value if this is a number, 0 otherwise. Note that json11 does not 120 | // distinguish between integer and non-integer numbers - number_value() and int_value() 121 | // can both be applied to a NUMBER-typed object. 122 | double number_value() const; 123 | int int_value() const; 124 | 125 | // Return the enclosed value if this is a boolean, false otherwise. 126 | bool bool_value() const; 127 | // Return the enclosed string if this is a string, "" otherwise. 128 | const std::string &string_value() const; 129 | // Return the enclosed std::vector if this is an array, or an empty vector otherwise. 130 | const array &array_items() const; 131 | // Return the enclosed std::map if this is an object, or an empty map otherwise. 132 | const object &object_items() const; 133 | 134 | // Return a reference to arr[i] if this is an array, Json() otherwise. 135 | const Json & operator[](size_t i) const; 136 | // Return a reference to obj[key] if this is an object, Json() otherwise. 137 | const Json & operator[](const std::string &key) const; 138 | 139 | // Serialize. 140 | void dump(std::string &out) const; 141 | std::string dump() const { 142 | std::string out; 143 | dump(out); 144 | return out; 145 | } 146 | 147 | // Parse. If parse fails, return Json() and assign an error message to err. 148 | static Json parse(const std::string & in, std::string & err); 149 | static Json parse(const char * in, std::string & err) { 150 | if (in) { 151 | return parse(std::string(in), err); 152 | } else { 153 | err = "null input"; 154 | return nullptr; 155 | } 156 | } 157 | // Parse multiple objects, concatenated or separated by whitespace 158 | static std::vector parse_multi(const std::string & in, std::string & err); 159 | 160 | bool operator== (const Json &rhs) const; 161 | bool operator< (const Json &rhs) const; 162 | bool operator!= (const Json &rhs) const { return !(*this == rhs); } 163 | bool operator<= (const Json &rhs) const { return !(rhs < *this); } 164 | bool operator> (const Json &rhs) const { return (rhs < *this); } 165 | bool operator>= (const Json &rhs) const { return !(*this < rhs); } 166 | 167 | /* has_shape(types, err) 168 | * 169 | * Return true if this is a JSON object and, for each item in types, has a field of 170 | * the given type. If not, return false and set err to a descriptive message. 171 | */ 172 | typedef std::initializer_list> shape; 173 | bool has_shape(const shape & types, std::string & err) const; 174 | 175 | private: 176 | std::shared_ptr m_ptr; 177 | }; 178 | 179 | // Internal class hierarchy - JsonValue objects are not exposed to users of this API. 180 | class JsonValue { 181 | protected: 182 | friend class Json; 183 | friend class JsonInt; 184 | friend class JsonDouble; 185 | virtual Json::Type type() const = 0; 186 | virtual bool equals(const JsonValue * other) const = 0; 187 | virtual bool less(const JsonValue * other) const = 0; 188 | virtual void dump(std::string &out) const = 0; 189 | virtual double number_value() const; 190 | virtual int int_value() const; 191 | virtual bool bool_value() const; 192 | virtual const std::string &string_value() const; 193 | virtual const Json::array &array_items() const; 194 | virtual const Json &operator[](size_t i) const; 195 | virtual const Json::object &object_items() const; 196 | virtual const Json &operator[](const std::string &key) const; 197 | virtual ~JsonValue() {} 198 | }; 199 | 200 | } // namespace json11 201 | -------------------------------------------------------------------------------- /src/lib/3rd/json11/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "json11.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace json11; 12 | using std::string; 13 | 14 | // Check that Json has the properties we want. 15 | #include 16 | #define CHECK_TRAIT(x) static_assert(std::x::value, #x) 17 | CHECK_TRAIT(is_nothrow_constructible); 18 | CHECK_TRAIT(is_nothrow_default_constructible); 19 | CHECK_TRAIT(is_copy_constructible); 20 | CHECK_TRAIT(is_nothrow_move_constructible); 21 | CHECK_TRAIT(is_copy_assignable); 22 | CHECK_TRAIT(is_nothrow_move_assignable); 23 | CHECK_TRAIT(is_nothrow_destructible); 24 | 25 | void parse_from_stdin() { 26 | string buf; 27 | while (!std::cin.eof()) buf += std::cin.get(); 28 | 29 | string err; 30 | auto json = Json::parse(buf, err); 31 | if (!err.empty()) { 32 | printf("Failed: %s\n", err.c_str()); 33 | } else { 34 | printf("Result: %s\n", json.dump().c_str()); 35 | } 36 | } 37 | 38 | int main(int argc, char **argv) { 39 | if (argc == 2 && argv[1] == string("--stdin")) { 40 | parse_from_stdin(); 41 | return 0; 42 | } 43 | 44 | const string simple_test = 45 | R"({"k1":"v1", "k2":42, "k3":["a",123,true,false,null]})"; 46 | 47 | string err; 48 | auto json = Json::parse(simple_test, err); 49 | 50 | std::cout << "k1: " << json["k1"].string_value() << "\n"; 51 | std::cout << "k3: " << json["k3"].dump() << "\n"; 52 | 53 | for (auto &k : json["k3"].array_items()) { 54 | std::cout << " - " << k.dump() << "\n"; 55 | } 56 | 57 | std::list l1 { 1, 2, 3 }; 58 | std::vector l2 { 1, 2, 3 }; 59 | std::set l3 { 1, 2, 3 }; 60 | assert(Json(l1) == Json(l2)); 61 | assert(Json(l2) == Json(l3)); 62 | 63 | std::map m1 { { "k1", "v1" }, { "k2", "v2" } }; 64 | std::unordered_map m2 { { "k1", "v1" }, { "k2", "v2" } }; 65 | assert(Json(m1) == Json(m2)); 66 | 67 | // Json literals 68 | Json obj = Json::object({ 69 | { "k1", "v1" }, 70 | { "k2", 42.0 }, 71 | { "k3", Json::array({ "a", 123.0, true, false, nullptr }) }, 72 | }); 73 | 74 | std::cout << "obj: " << obj.dump() << "\n"; 75 | 76 | assert(Json("a").number_value() == 0); 77 | assert(Json("a").string_value() == "a"); 78 | assert(Json().number_value() == 0); 79 | 80 | assert(obj == json); 81 | assert(Json(42) == Json(42.0)); 82 | assert(Json(42) != Json(42.1)); 83 | 84 | const string unicode_escape_test = 85 | R"([ "blah\ud83d\udca9blah\ud83dblah\udca9blah\u0000blah\u1234" ])"; 86 | 87 | const char utf8[] = "blah" "\xf0\x9f\x92\xa9" "blah" "\xed\xa0\xbd" "blah" 88 | "\xed\xb2\xa9" "blah" "\0" "blah" "\xe1\x88\xb4"; 89 | 90 | Json uni = Json::parse(unicode_escape_test, err); 91 | assert(uni[0].string_value().size() == (sizeof utf8) - 1); 92 | assert(memcmp(uni[0].string_value().data(), utf8, sizeof utf8) == 0); 93 | 94 | Json my_json = Json::object { 95 | { "key1", "value1" }, 96 | { "key2", false }, 97 | { "key3", Json::array { 1, 2, 3 } }, 98 | }; 99 | std::string json_str = my_json.dump(); 100 | printf("%s\n", json_str.c_str()); 101 | 102 | class Point { 103 | public: 104 | int x; 105 | int y; 106 | Point (int x, int y) : x(x), y(y) {} 107 | Json to_json() const { return Json::array { x, y }; } 108 | }; 109 | 110 | std::vector points = { { 1, 2 }, { 10, 20 }, { 100, 200 } }; 111 | std::string points_json = Json(points).dump(); 112 | printf("%s\n", points_json.c_str()); 113 | } 114 | -------------------------------------------------------------------------------- /src/lib/helper/CmdlineOption.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "CmdlineOption.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | static bool 13 | isOption (const string& str) 14 | { 15 | return( str.size() >= 3 && // the shortest option "--x" 16 | '-' == str[0] && 17 | '-' == str[1] && 18 | '-' != str[2] ); 19 | } 20 | 21 | // cmdname --foo aa, for example, --foo is an option, aa is an argument. 22 | // convention about command line option: 23 | // 0) option must begin with -- (so, the shortest option --x has three characters), and the argument cannot begin with --; 24 | // 1) an argument must follow after an option; 25 | // 2) an option can follow as an argument or not, E.G., some option for true or false. 26 | // one option by one more arguments? E.G., --bar a b c d. It's ok. 27 | CmdlineOption::CmdlineOption (unsigned argc, char* argv[]) 28 | { 29 | if (argc < 2) { 30 | return; 31 | } 32 | 33 | vector raw_options_list(argv + 1, argv + argc); 34 | 35 | string last_option; 36 | for (const auto& e : raw_options_list) { 37 | if (isOption(e)) { 38 | options_and_arguments_list_[e]; 39 | last_option = e; 40 | } else { 41 | if (!last_option.empty()) { 42 | options_and_arguments_list_[last_option].push_back(e); 43 | } 44 | } 45 | } 46 | 47 | //// DEBUG. show the result of parsing command options 48 | //for (const auto& e : options_and_arguments_list_) { 49 | //const vector& arguments_list = e.second; 50 | //cout << e.first << "(" << arguments_list.size() << "): "; 51 | //copy(e.second.cbegin(), e.second.cend(), ostream_iterator(cout, ",")); 52 | //cout << endl; 53 | //} 54 | } 55 | 56 | CmdlineOption::~CmdlineOption () 57 | { 58 | ; 59 | } 60 | 61 | bool 62 | CmdlineOption::hasOption (const string& option) const 63 | { 64 | return(options_and_arguments_list_.cend() != options_and_arguments_list_.find(option)); 65 | } 66 | 67 | const vector& 68 | CmdlineOption::getArgumentsList (const string& option) 69 | { 70 | static const vector empty_arguments_list; 71 | return(hasOption(option) ? options_and_arguments_list_[option] : empty_arguments_list); 72 | } 73 | 74 | -------------------------------------------------------------------------------- /src/lib/helper/CmdlineOption.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | #include 5 | #include 6 | #include 7 | 8 | using std::string; 9 | using std::pair; 10 | using std::vector; 11 | using std::unordered_map; 12 | 13 | class CmdlineOption 14 | { 15 | public: 16 | CmdlineOption (unsigned argc, char* argv[]); 17 | virtual ~CmdlineOption (); 18 | bool hasOption (const string& option) const; 19 | const vector& getArgumentsList (const string& option); 20 | 21 | private: 22 | unordered_map> options_and_arguments_list_; 23 | }; 24 | 25 | -------------------------------------------------------------------------------- /src/lib/helper/Misc.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "Misc.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using std::bitset; 16 | using std::cout; 17 | using std::cerr; 18 | using std::endl; 19 | using std::make_pair; 20 | using std::ostream_iterator; 21 | 22 | // split raw string to more sub-str by token-chars. 23 | // note: 24 | // 0) case sensitive; 25 | // 1) if there are consecutive two token-chars in raw string, splitStr() 26 | // will make a empty sub-str into splited_substr_list. 27 | void 28 | splitStr ( const string& str, 29 | const string& tokens_list, 30 | vector& splited_substr_list, 31 | vector& appeared_tokens_list ) 32 | { 33 | size_t begin_pos = 0, end_pos; 34 | while (begin_pos < str.size()) { 35 | const auto iter_token = find_first_of( str.cbegin() + (int)begin_pos, str.cend(), 36 | tokens_list.cbegin(), tokens_list.cend() ); 37 | if (str.cend() == iter_token) { 38 | splited_substr_list.push_back(str.substr(begin_pos)); 39 | break; 40 | } 41 | 42 | appeared_tokens_list.push_back(*iter_token); 43 | end_pos = (unsigned)(iter_token - str.cbegin()); 44 | splited_substr_list.push_back(str.substr(begin_pos, end_pos - begin_pos)); 45 | 46 | begin_pos = end_pos + 1; 47 | } 48 | 49 | if (splited_substr_list[0].empty()) { 50 | splited_substr_list.erase(splited_substr_list.begin()); 51 | } 52 | } 53 | 54 | // first return is the string between keyword_begin and keyword_end; 55 | // second return is end_pos + keyword_end.size(). 56 | pair 57 | fetchStringBetweenKeywords ( const string& txt, 58 | const string& keyword_begin, 59 | const string& keyword_end, 60 | size_t from_pos ) 61 | { 62 | const auto begin_pos = txt.find(keyword_begin, from_pos); 63 | if (string::npos == begin_pos) { 64 | //cerr << "WARNING! fetchStringBetweenKeywords() CANNOT find the keyword \"" << kyeword_begin << "\"" << endl; 65 | return(make_pair("", 0)); 66 | } 67 | const auto end_pos = txt.find(keyword_end, begin_pos + keyword_begin.size()); 68 | if (string::npos == end_pos) { 69 | //cerr << "WARNING! fetchStringBetweenKeywords() CANNOT find the keyword \"" << kyeword_end << "\"" << endl; 70 | return(make_pair("", 0)); 71 | } 72 | 73 | 74 | return(make_pair( txt.substr(begin_pos + keyword_begin.size(), end_pos - begin_pos - keyword_begin.size()), 75 | end_pos + keyword_end.size() )); 76 | } 77 | 78 | // get file size by FILE*. 79 | // return -1 if failure 80 | long 81 | getFileSize (FILE* fs) 82 | { 83 | // backup current offset 84 | long offset_bak = ftell(fs); 85 | 86 | // get the filesize 87 | fseek(fs, 0, SEEK_END); 88 | long file_size = ftell(fs); 89 | 90 | // restore last offset 91 | fseek(fs, offset_bak, SEEK_SET); 92 | 93 | 94 | return(file_size); 95 | } 96 | 97 | // process_name + process_id + thread_id + rand 98 | extern char *__progname; 99 | string 100 | makeRandomFilename (void) 101 | { 102 | static bool b_first = true; 103 | if (b_first) { 104 | srand((unsigned)time(NULL)); 105 | b_first = false; 106 | } 107 | 108 | const string& filename = string(__progname) + "_" + 109 | convNumToStr(getpid()) + "_" 110 | + convNumToStr(pthread_self()) + "_" 111 | + convNumToStr(rand()); 112 | 113 | #ifdef CYGWIN 114 | return("c:\\" + filename); 115 | #else 116 | return("/tmp/" + filename); 117 | #endif 118 | } 119 | 120 | // unicode 与 UTF8 间转换规则: 121 | // ================================================================================= 122 | // | unicode 符号范围 | UTF8编码方式 123 | // n | (十六进制) | (二进制) 124 | // --+-----------------------+------------------------------------------------------ 125 | // 1 | 0000 0000 - 0000 007F | 0xxxxxxx 126 | // 2 | 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx 127 | // 3 | 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 128 | // 4 | 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 129 | // 5 | 0020 0000 - 03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 130 | // 6 | 0400 0000 - 7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 131 | // ================================================================================= 132 | // UTF8 中剩余的 x 用其 unicode 中各位从右向左填充,若还有多余的位则 0 填。如,"严" 133 | // 的 unicode 是 4E25(100111000100101),根据上表,可以发现 4E25 处在第三行的范围内 134 | // (0000 0800-0000 FFFF),"严"的 UTF8 编码需要三个字节,即格式是 135 | // "1110xxxx 10xxxxxx 10xxxxxx",然后,从"严"的最后一个二进制位开始,依次从后向前填 136 | // 入格式中的 x,多出的位补 0。这样就得到了,"严"的 UTF8 编码是 137 | // "11100100 10111000 10100101",转换成十六进制就是 E4B8A5。 138 | // 139 | // 返回值:由于 UTF8 是变长编码格式,所以,需要返回转换后的 UTF8 编码有效字节数,以 140 | // 具体值。 141 | // 142 | // 注意: 143 | // 0)假定小尾存储; 144 | // 1)unicode 最多需要 4 个字节,UTF8 最多需要 6 个字节,所以,这就决定了型参类 145 | // 型必须为 unsigned int,返回值类型为 unsigned long long; 146 | // 147 | // 更多细节参见:http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html 148 | pair 149 | convertUnicodeToUtf8 (unsigned int unicode) 150 | { 151 | if (unicode <= 0x0000007F) { 152 | return(make_pair(1, unicode)); 153 | } else if (0x00000080 <= unicode && unicode <= 0x000007FF) { 154 | bitset<16> unicode_bits(unicode); 155 | const string unicode_bits_str = unicode_bits.to_string(); 156 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend()); 157 | unicode_bits_str_reverse.insert(6, "00"); 158 | unicode_bits_str_reverse.insert(6 + 2 + 5, "000"); 159 | unicode_bits_str_reverse.resize(16); 160 | const bitset<16> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend())); 161 | 162 | bitset<16> utf8_lower("1100000010000000"); 163 | 164 | bitset<16> utf8_bits = utf8_lower | masker; 165 | 166 | return(make_pair(2, utf8_bits.to_ullong())); 167 | } else if (0x00000800 <= unicode && unicode <= 0x0000FFFF) { 168 | bitset<16> unicode_bits(unicode); 169 | const string unicode_bits_str = unicode_bits.to_string(); 170 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend()); 171 | unicode_bits_str_reverse.insert(6, "00"); 172 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00"); 173 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 4, "0000"); 174 | unicode_bits_str_reverse.resize(24); 175 | const bitset<24> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend())); 176 | 177 | bitset<24> utf8_lower("111000001000000010000000"); 178 | 179 | bitset<24> utf8_bits = utf8_lower | masker; 180 | 181 | return(make_pair(3, utf8_bits.to_ullong())); 182 | } else if (0x00010000 <= unicode && unicode <= 0x0010FFFF) { 183 | bitset<32> unicode_bits(unicode); 184 | const string unicode_bits_str = unicode_bits.to_string(); 185 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend()); 186 | unicode_bits_str_reverse.insert(6, "00"); 187 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00"); 188 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6, "00"); 189 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 3, "000"); 190 | unicode_bits_str_reverse.resize(32); 191 | const bitset<32> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend())); 192 | 193 | bitset<32> utf8_lower("11110000100000001000000010000000"); 194 | 195 | bitset<32> utf8_bits = utf8_lower | masker; 196 | 197 | return(make_pair(4, utf8_bits.to_ullong())); 198 | } else if (0x00200000 <= unicode && unicode <= 0x03FFFFFF) { 199 | bitset<32> unicode_bits(unicode); 200 | const string unicode_bits_str = unicode_bits.to_string(); 201 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend()); 202 | unicode_bits_str_reverse.insert(6, "00"); 203 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00"); 204 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6, "00"); 205 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6, "00"); 206 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 2, "00"); 207 | unicode_bits_str_reverse.resize(40); 208 | const bitset<40> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend())); 209 | 210 | bitset<40> utf8_lower("1111100010000000100000001000000010000000"); 211 | 212 | bitset<40> utf8_bits = utf8_lower | masker; 213 | 214 | return(make_pair(5, utf8_bits.to_ullong())); 215 | } else if (0x04000000 <= unicode && unicode <= 0x7FFFFFFF) { 216 | bitset<64> unicode_bits(unicode); 217 | const string unicode_bits_str = unicode_bits.to_string(); 218 | string unicode_bits_str_reverse(unicode_bits_str.crbegin(), unicode_bits_str.crend()); 219 | unicode_bits_str_reverse.insert(6, "00"); 220 | unicode_bits_str_reverse.insert(6 + 2 + 6, "00"); 221 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6, "00"); 222 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6, "00"); 223 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 6, "00"); 224 | unicode_bits_str_reverse.insert(6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 6 + 2 + 1, "0"); 225 | unicode_bits_str_reverse.resize(48); 226 | const bitset<48> masker(string(unicode_bits_str_reverse.crbegin(), unicode_bits_str_reverse.crend())); 227 | 228 | bitset<48> utf8_lower("111111001000000010000000100000001000000010000000"); 229 | 230 | bitset<48> utf8_bits = utf8_lower | masker; 231 | 232 | return(make_pair(6, utf8_bits.to_ullong())); 233 | } else { 234 | cerr << "WARNING! " << unicode << "is not a vaild unicode. " << endl; 235 | return(make_pair(0, 0)); 236 | } 237 | } 238 | 239 | bool 240 | wait_cmd ( const string& cmd, 241 | const vector& argv, 242 | int* p_exitCode, 243 | bool b_echo ) 244 | { 245 | bool b_executed_success = false; 246 | char** argv_tmp; 247 | 248 | 249 | // 回显命令行 250 | if (b_echo) { 251 | copy(argv.cbegin(), argv.cend(), ostream_iterator(cout, " ")); 252 | cout << endl; 253 | } 254 | 255 | // 将vector中的命令行参数转换为char* [] 256 | argv_tmp = new char* [argv.size() + 1]; // !!!子进程中是否产生内存泄漏?? 257 | for (size_t i = 0; i != argv.size(); ++i) { 258 | argv_tmp[i] = const_cast(argv[i].c_str()); 259 | } 260 | argv_tmp[argv.size()] = NULL; 261 | 262 | // 运行并等待子进程 263 | pid_t pid = fork(); 264 | if (0 == pid) { // 子进程 265 | execvp(cmd.c_str(), argv_tmp); 266 | } else if (pid > 0) { // 父进程 267 | int status; 268 | waitpid(pid, &status, 0); 269 | // 命令正常结束。即通过exit()正常退出,而非通过kill异常结束,与exit()的返回值无关 270 | if (WIFEXITED(status)) { 271 | int exit_code = WEXITSTATUS(status); // 命令通过正常exit()结束时的返回值 272 | 273 | if (EXIT_SUCCESS == exit_code) { 274 | b_executed_success = true; 275 | } 276 | 277 | if (NULL != p_exitCode) { 278 | *p_exitCode = exit_code; 279 | } 280 | } 281 | } 282 | 283 | delete [] argv_tmp; 284 | 285 | return (b_executed_success ); 286 | } 287 | 288 | -------------------------------------------------------------------------------- /src/lib/helper/Misc.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using std::string; 10 | using std::ostringstream; 11 | using std::vector; 12 | using std::pair; 13 | 14 | 15 | // why not std::to_string()? 16 | // you know, I have to port this linux code to win32 by cygwin, and there 17 | // is a bug on cygwin case it cannot find to_string(), so, I must do it 18 | // by myself 19 | template 20 | string 21 | convNumToStr (T num) 22 | { 23 | ostringstream oss; 24 | oss << num; 25 | 26 | return(oss.str()); 27 | } 28 | 29 | // split raw string to more sub-str by token-chars. 30 | void 31 | splitStr ( const string& str, 32 | const string& tokens_list, 33 | vector& splited_substr_list, 34 | vector& appeared_tokens_list ); 35 | 36 | // fetch string from txt betwen keyword_begin and keyword_end. 37 | // case sensitive 38 | pair 39 | fetchStringBetweenKeywords ( const string& txt, 40 | const string& keyword_begin, 41 | const string& keyword_end, 42 | size_t from_pos = 0 ); 43 | 44 | // get file size by FILE* 45 | long 46 | getFileSize (FILE* fs); 47 | 48 | // get random filename, include path 49 | string 50 | makeRandomFilename (void); 51 | 52 | // unicode 转 UTF8 53 | pair 54 | convertUnicodeToUtf8 (unsigned int unicode); 55 | 56 | // fork() 启动新进程后立即返回,而本函数将等待新进程执行完毕后再返回 57 | bool 58 | wait_cmd ( const string& cmd, 59 | const vector& argv, 60 | int* p_exitCode = nullptr, 61 | bool b_echo = false ); 62 | -------------------------------------------------------------------------------- /src/lib/helper/RichTxt.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | #include 5 | 6 | using std::string; 7 | 8 | namespace RichTxt 9 | { 10 | // bold 11 | static const string bold_on("\x1b[1m"); 12 | static const string bold_off("\x1b[21m"); 13 | 14 | // italic 15 | static const string italic_on("\x1b[3m"); 16 | static const string italic_off("\x1b[23m"); 17 | 18 | // underline 19 | static const string underline_on("\x1b[4m"); 20 | static const string underline_off("\x1b[24m"); 21 | 22 | // hide 23 | static const string hide_on("\x1b[8m"); 24 | static const string hide_off("\x1b[28m"); 25 | 26 | // deletline 27 | static const string deletline_on("\x1b[9m"); 28 | static const string deletline_off("\x1b[29m"); 29 | 30 | // foreground 31 | static const string foreground_black("\x1b[30m"); 32 | static const string foreground_red("\x1b[31m"); 33 | static const string foreground_green("\x1b[32m"); 34 | static const string foreground_yellow("\x1b[33m"); 35 | static const string foreground_blue("\x1b[34m"); 36 | static const string foreground_magenta("\x1b[35m"); 37 | static const string foreground_cyan("\x1b[36m"); 38 | static const string foreground_white("\x1b[37m"); 39 | 40 | // background 41 | static const string background_black("\x1b[40m"); 42 | static const string background_red("\x1b[41m"); 43 | static const string background_green("\x1b[42m"); 44 | static const string background_yellow("\x1b[43m"); 45 | static const string background_blue("\x1b[44m"); 46 | static const string background_magenta("\x1b[45m"); 47 | static const string background_cyan("\x1b[46m"); 48 | static const string background_white("\x1b[47m"); 49 | 50 | // reset all 51 | static const string reset_all("\x1b[0m"); 52 | }; 53 | 54 | // normal usage: 55 | // 0) cout << "email: " << RichTxt::bold_on << "yangyang.gnu@gmail.com" << RichTxt::bold_off << endl; 56 | // 1) string name("yangyang.gnu"); string name_italic = RichTxt::italic_on + RichTxt::background_green + name + RichTxt::italic_off; 57 | 58 | -------------------------------------------------------------------------------- /src/lib/helper/Time.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "Time.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | 12 | // why not std::to_string()? 13 | // you know, I have to port this linux code to win32 by cygwin, and there 14 | // is a bug on cygwin case it cannot find to_string(), so, I must do it by myself 15 | static string 16 | convUnsignedToStr (unsigned num) 17 | { 18 | ostringstream oss; 19 | oss << num; 20 | 21 | return(oss.str()); 22 | } 23 | 24 | // string::resize() resize string from the first char to the last char, 25 | // resizeStringByEndian() resize string from the last char to the first char 26 | static string 27 | resizeStringByEndian (const string& str, unsigned digits, char ch = '0') 28 | { 29 | string strtmp(str.crbegin(), str.crend()); 30 | strtmp.resize(digits, ch); 31 | reverse(strtmp.begin(), strtmp.end()); 32 | 33 | return(strtmp); 34 | } 35 | 36 | Time::Time () 37 | { 38 | time_t raw_time = time(nullptr); 39 | const struct tm* p_st = localtime(&raw_time); 40 | 41 | year_ = (unsigned)p_st->tm_year + 1900; 42 | month_ = (unsigned)p_st->tm_mon + 1; 43 | day_in_month_ = (unsigned)p_st->tm_mday; 44 | day_in_year_ = (unsigned)p_st->tm_yday + 1; 45 | day_in_week_ = (unsigned)p_st->tm_wday; 46 | hour_ = (unsigned)p_st->tm_hour; 47 | minute_ = (unsigned)p_st->tm_min; 48 | second_ = (unsigned)p_st->tm_sec; 49 | } 50 | 51 | Time::~Time () 52 | { 53 | ; 54 | } 55 | 56 | unsigned 57 | Time::getYear (void) const 58 | { 59 | return(year_); 60 | } 61 | 62 | string 63 | Time::getYear (unsigned digits) const 64 | { 65 | return( 0 == digits ? 66 | convUnsignedToStr(getYear()) : resizeStringByEndian(convUnsignedToStr(getYear()), digits) ); 67 | } 68 | 69 | unsigned 70 | Time::getMonth (void) const 71 | { 72 | return(month_); 73 | } 74 | 75 | string 76 | Time::getMonth (unsigned digits) const 77 | { 78 | return( 0 == digits ? 79 | convUnsignedToStr(getMonth()) : resizeStringByEndian(convUnsignedToStr(getMonth()), digits) ); 80 | } 81 | 82 | unsigned 83 | Time::getDayInWeek (void) const 84 | { 85 | return(day_in_week_); 86 | } 87 | 88 | string 89 | Time::getDayInWeek (bool b_abbr) const 90 | { 91 | switch (getDayInWeek()) { 92 | case 1: 93 | return(b_abbr ? "mon" : "monday"); 94 | case 2: 95 | return(b_abbr ? "tues" : "tuesday"); 96 | case 3: 97 | return(b_abbr ? "wed" : "wednesday"); 98 | case 4: 99 | return(b_abbr ? "thurs" : "thursday"); 100 | case 5: 101 | return(b_abbr ? "fri" : "friday"); 102 | case 6: 103 | return(b_abbr ? "sat" : "saturday"); 104 | case 0: 105 | return(b_abbr ? "sun" : "sunday"); 106 | default: 107 | return(""); 108 | } 109 | } 110 | 111 | unsigned 112 | Time::getDayInMonth (void) const 113 | { 114 | return(day_in_month_); 115 | } 116 | 117 | string 118 | Time::getDayInMonth (unsigned digits) const 119 | { 120 | return( 0 == digits ? 121 | convUnsignedToStr(getDayInMonth()) : resizeStringByEndian(convUnsignedToStr(getDayInMonth()), digits) ); 122 | } 123 | 124 | unsigned 125 | Time::getDayInYear (void) const 126 | { 127 | return(day_in_year_); 128 | } 129 | 130 | string 131 | Time::getDayInYear (unsigned digits) const 132 | { 133 | return( 0 == digits ? 134 | convUnsignedToStr(getDayInYear()) : resizeStringByEndian(convUnsignedToStr(getDayInYear()), digits) ); 135 | } 136 | unsigned 137 | Time::getHour (void) const 138 | { 139 | return(hour_); 140 | } 141 | 142 | string 143 | Time::getHour (unsigned digits) const 144 | { 145 | return( 0 == digits ? 146 | convUnsignedToStr(getHour()) : resizeStringByEndian(convUnsignedToStr(getHour()), digits) ); 147 | } 148 | 149 | unsigned 150 | Time::getMinute (void) const 151 | { 152 | return(minute_); 153 | } 154 | 155 | string 156 | Time::getMinute (unsigned digits) const 157 | { 158 | return( 0 == digits ? 159 | convUnsignedToStr(getMinute()) : resizeStringByEndian(convUnsignedToStr(getMinute()), digits) ); 160 | } 161 | 162 | unsigned 163 | Time::getSecond (void) const 164 | { 165 | return(second_); 166 | } 167 | 168 | string 169 | Time::getSecond (unsigned digits) const 170 | { 171 | return( 0 == digits ? 172 | convUnsignedToStr(getSecond()) : resizeStringByEndian(convUnsignedToStr(getSecond()), digits) ); 173 | } 174 | 175 | -------------------------------------------------------------------------------- /src/lib/helper/Time.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | #include 5 | 6 | using std::string; 7 | 8 | class Time 9 | { 10 | public: 11 | Time (); 12 | virtual ~Time (); 13 | 14 | unsigned getYear (void) const; 15 | string getYear (unsigned digits) const; 16 | 17 | unsigned getMonth (void) const; 18 | string getMonth (unsigned digits) const; 19 | 20 | unsigned getDayInWeek (void) const; 21 | string getDayInWeek (bool b_abbr) const; 22 | unsigned getDayInMonth (void) const; 23 | string getDayInMonth (unsigned digits) const; 24 | unsigned getDayInYear (void) const; 25 | string getDayInYear (unsigned digits) const; 26 | 27 | unsigned getHour (void) const; 28 | string getHour (unsigned digits) const; 29 | 30 | unsigned getMinute (void) const; 31 | string getMinute (unsigned digits) const; 32 | 33 | unsigned getSecond (void) const; 34 | string getSecond (unsigned digits) const; 35 | 36 | private: 37 | unsigned year_; 38 | unsigned month_; 39 | unsigned day_in_month_; 40 | unsigned day_in_week_; 41 | unsigned day_in_year_; 42 | unsigned hour_; 43 | unsigned minute_; 44 | unsigned second_; 45 | }; 46 | 47 | -------------------------------------------------------------------------------- /src/lib/helper/Webpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using std::string; 10 | using std::vector; 11 | using std::pair; 12 | 13 | class Webpage 14 | { 15 | public: 16 | explicit Webpage ( const string& url, 17 | const string& filename = "", 18 | const string& proxy_addr = "", 19 | const unsigned timeout_second = 16, 20 | const unsigned retry_times = 2, 21 | const unsigned retry_sleep_second = 4, 22 | const string& user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0", 23 | const string& post_cookies = "", 24 | const vector>& post_sections_list = vector>(), 25 | bool b_redirct = true ); 26 | virtual ~Webpage (); 27 | 28 | string getProxyAddr (void) const; 29 | string checkProxyOutIpByThirdparty (void) const; 30 | string checkProxyOutRegionByThirdparty (void) const; 31 | 32 | string getUserAgent (void) const; 33 | string checkUserAgentByThirdparty (void) const; 34 | 35 | const string& getTxt (void) const; 36 | const string& getTitle (void) const; 37 | 38 | long getLatestHttpStatusCode (void) const; 39 | bool isValidLatestHttpStatusCode (void) const; 40 | 41 | string getHttpHeader (const string& url) const; 42 | string getRemoteFiletype (const string& url) const; 43 | string getRemoteFilecharset (const string& url) const; 44 | string getRemoteFilesize (const string& url) const; 45 | string getRemoteFilename (const string& url) const; 46 | string getRemoteFiletime (const string& url) const; 47 | 48 | double getAvarSpeedDownload (void) const; 49 | 50 | bool isLoaded (void) const; 51 | size_t convertCharset (const string& src_charset, const string& dest_charset); 52 | bool saveasFile (const string& filename) const; 53 | 54 | bool downloadFile ( const string& url, 55 | const string& filename, 56 | const string& referer = "", 57 | const unsigned timeout_second = 0, 58 | const unsigned retry_times = 4, 59 | const unsigned retry_sleep_second = 4 ); 60 | 61 | bool setMultiPostSectionsList (const vector>& post_sections_list); 62 | bool submitMultiPost ( const string& url, 63 | const string& filename, 64 | const vector>& post_sections_list, 65 | const unsigned timeout_second = 32, 66 | const unsigned retry_times = 4, 67 | const unsigned retry_sleep_second = 4 ); 68 | 69 | string escapeUrl (const string& raw_url) const; 70 | 71 | const vector& getCookies (void) const; 72 | 73 | private: 74 | bool download_ ( const string& raw_url, 75 | const string& filename, 76 | const string& referer, 77 | const unsigned timeout_second, 78 | const unsigned retry_times, 79 | const unsigned retry_sleep_second ); 80 | long parseLatestHttpStatusCode_ (void); 81 | 82 | private: 83 | enum HttpHeader_ {header, type, charset, length, name, modified}; 84 | string requestHttpHeader_ ( const string& raw_url, 85 | HttpHeader_ header_item, 86 | const unsigned timeout_second = 4, 87 | const unsigned retry_times = 2, 88 | const unsigned retry_sleep_second = 2 ) const; 89 | 90 | private: 91 | CURL* p_curl_; 92 | string url_; 93 | char libcurl_err_info_buff_[CURL_ERROR_SIZE]; 94 | string proxy_addr_; 95 | string txt_; 96 | string title_; 97 | bool b_loaded_ok_; 98 | long latest_http_status_code_; 99 | double aver_speed_download_; 100 | const string user_agent_; 101 | vector cookie_items_list_; 102 | }; 103 | 104 | 105 | string convertUnicodeTxtToUtf8 (const string& unicode_txt); 106 | string unescapeHtml (const string& raw_txt); 107 | 108 | -------------------------------------------------------------------------------- /src/lib/self/Aicheng.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "Aicheng.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "AichengTopicsListWebpage.h" 13 | #include "AichengTopicWebpage.h" 14 | #include "JandownSeedWebpage.h" 15 | #include "../helper/RichTxt.h" 16 | 17 | 18 | using namespace std; 19 | 20 | 21 | static mutex g_mtx; 22 | 23 | const string& 24 | Aicheng::getPortalWebpageUrl (void) const 25 | { 26 | return(portal_url_); 27 | } 28 | 29 | static const string& 30 | getTopicsListWebpagePartUrl (Aicheng::AvClass av_class) 31 | { 32 | static const string west_part_url("thread.php?fid=5"); 33 | static const string cartoon_part_url("thread.php?fid=6"); 34 | static const string asia_mosaicked_part_url("thread.php?fid=4"); 35 | static const string asia_non_mosaicked_part_url("thread.php?fid=16"); 36 | 37 | switch (av_class) { 38 | case Aicheng::west: 39 | return(west_part_url); 40 | case Aicheng::cartoon: 41 | return(cartoon_part_url); 42 | case Aicheng::asia_mosaicked: 43 | return(asia_mosaicked_part_url); 44 | case Aicheng::asia_non_mosaicked: 45 | return(asia_non_mosaicked_part_url); 46 | } 47 | } 48 | 49 | static const string 50 | getTopicsListWebpageUrl (const string& portal_url, Aicheng::AvClass av_class) 51 | { 52 | return(portal_url + getTopicsListWebpagePartUrl(av_class)); 53 | } 54 | 55 | static bool 56 | isThereInList ( const string& webpage_title, 57 | const vector& ignore_keywords_list, 58 | string& which_keyword ) 59 | { 60 | for (const auto& e : ignore_keywords_list) { 61 | if (!e.empty() && string::npos != webpage_title.find(e)) { 62 | which_keyword = e; 63 | return(true); 64 | } 65 | } 66 | 67 | return(false); 68 | } 69 | 70 | static bool 71 | parseValidTopicsUrls ( Aicheng::AvClass av_class, 72 | const string& portal_url, 73 | const string& proxy_addr, 74 | unsigned range_begin, unsigned range_end, 75 | const vector& hate_keywords_list, 76 | const vector& like_keywords_list, 77 | vector& valid_topics_urls_list, 78 | bool b_progress ) 79 | { 80 | valid_topics_urls_list.clear(); 81 | 82 | string current_url = getTopicsListWebpageUrl(portal_url, av_class); 83 | bool b_stop = false; 84 | unsigned topics_cnt = 0; 85 | while (!current_url.empty() && !b_stop) { 86 | AichengTopicsListWebpage aicheng_topicslist_webpage(portal_url, current_url, proxy_addr); 87 | if (!aicheng_topicslist_webpage.isLoaded()) { 88 | return(false); 89 | } 90 | 91 | const vector>& topics_title_and_url = aicheng_topicslist_webpage.getTitlesAndUrlsList(); 92 | for (const auto& e : topics_title_and_url) { 93 | if (++topics_cnt > range_end) { 94 | b_stop = true; 95 | break; 96 | } 97 | 98 | const string& topic_title = e.first; 99 | const string& topic_url = e.second; 100 | static const string o_flag(RichTxt::bold_on + "O" + RichTxt::bold_off); 101 | static const string x_flag("x"); 102 | 103 | // ignore the topics which do not in range 104 | if (topics_cnt < range_begin) { 105 | if (b_progress) { 106 | cout << x_flag << " " << flush; 107 | } 108 | continue; 109 | } 110 | // ignore the topics which contain hate keyword by user set 111 | string which_keyword; 112 | if (isThereInList(topic_title, hate_keywords_list, which_keyword)) { 113 | if (b_progress) { 114 | cout << x_flag << " " << flush; 115 | } 116 | continue; 117 | } 118 | // ignore the topics which do not contain like keyword by user set 119 | if ( !like_keywords_list.empty() && 120 | !isThereInList(topic_title, like_keywords_list, which_keyword) ) { 121 | if (b_progress) { 122 | cout << x_flag << " " << flush; 123 | } 124 | continue; 125 | } 126 | 127 | valid_topics_urls_list.push_back(topic_url); 128 | 129 | if (b_progress) { 130 | cout << o_flag << " " << flush; 131 | } 132 | } 133 | 134 | current_url = aicheng_topicslist_webpage.getNextpageUrl(); 135 | } 136 | 137 | 138 | return(true); 139 | } 140 | 141 | static void 142 | downloadTopicPicsAndSeed ( const string& topic_url, 143 | const string& proxy_addr, 144 | const string& path, 145 | unsigned timeout_download_pic, 146 | bool b_show_info ) 147 | { 148 | AichengTopicWebpage aicheng_topics_webpage(topic_url, proxy_addr); 149 | 150 | // ready for the basename of pictures and seed. 151 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 152 | string base_name; // from topic title 153 | 154 | // 0) delete the web logo info; 155 | // 1) clear the "/" in topictitle string, if the "/" present in filename, 156 | // linux will treat it as directory, again, clear the "\" for windows; 157 | static const vector keyword_logos_list = { " 亚洲无码区 bt下载 - powered by phpwind.net", 158 | " 亚洲有码区 bt下载 - powered by phpwind.net", 159 | " 欧美区 bt下载 - powered by phpwind.net", 160 | " 动漫区 bt下载 - powered by phpwind.net", 161 | "|亚洲无码区 - bt下载 爱城 bt下载 ", 162 | "亚洲无码区 - bt下载 爱城 bt下载 ", 163 | "|亚洲有码区 - bt下载 爱城 bt下载 ", 164 | "亚洲有码区 - bt下载 爱城 bt下载 ", 165 | "|动漫区 - bt下载 爱城 bt下载 ", 166 | "动漫区 - bt下载 爱城 bt下载 ", 167 | "|欧美区 - bt下载 爱城 bt下载 ", 168 | "欧美区 - bt下载 爱城 bt下载 " }; 169 | const string& topic_webpage_title = aicheng_topics_webpage.getTitle(); 170 | auto keyword_logo_pos = string::npos; 171 | for (const auto& f : keyword_logos_list) { 172 | keyword_logo_pos = topic_webpage_title.find(f); 173 | if (string::npos != keyword_logo_pos) { 174 | break; 175 | } 176 | } 177 | remove_copy_if( topic_webpage_title.cbegin(), 178 | (string::npos == keyword_logo_pos) ? topic_webpage_title.cend() : topic_webpage_title.cbegin() + (int)keyword_logo_pos, 179 | back_inserter(base_name), 180 | [] (char ch) {return( '|' == ch || // invalid chars in windows-style filename 181 | '/' == ch || 182 | '<' == ch || 183 | '>' == ch || 184 | '?' == ch || 185 | '*' == ch || 186 | ':' == ch || 187 | '\\' == ch );} ); 188 | 189 | // 2) the path + filename max length must less than pathconf(, _PC_NAME_MAX) 190 | const unsigned filename_max_length_without_postfix = (unsigned)pathconf(path.c_str(), _PC_NAME_MAX) 191 | - string("99").size() // picture number 192 | - string(".torrent").size(); 193 | if (base_name.size() >= filename_max_length_without_postfix) { 194 | // the filename too long to create file. the way as following doesn't work, case filename encoding error: 195 | // base_name.resize(filename_max_length_without_postfix - 1), because this is string on char not wstring on wchar. 196 | // there is another stupid way, random name from 'a' to 'z' 197 | base_name.resize(16); 198 | generate( base_name.begin(), base_name.end(), 199 | [] () {return('a' + rand() % ('z' - 'a'));} ); 200 | base_name = "(rename)" + base_name; 201 | } 202 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 203 | 204 | // download all pictures 205 | vector fail_download_pics_urls_list; 206 | bool b_download_pics_success = aicheng_topics_webpage.downloadAllPictures( path, 207 | base_name, 208 | timeout_download_pic, 209 | fail_download_pics_urls_list, 210 | 32 ); 211 | 212 | // download seed 213 | bool b_downloaded_seed_success = false; 214 | if (!aicheng_topics_webpage.getSeedUrl().empty()) { 215 | JandownSeedWebpage jan_seed_webpage(aicheng_topics_webpage.getSeedUrl(), proxy_addr); 216 | b_downloaded_seed_success = jan_seed_webpage.downloadSeed(path, base_name); 217 | } 218 | 219 | // show result info 220 | if (!b_show_info) { 221 | return; 222 | } 223 | static const string success_info("success"); 224 | static const string fail_info = RichTxt::foreground_red + "failure" + RichTxt::reset_all; 225 | g_mtx.lock(); 226 | cout << " \"" << base_name << "\" - "; 227 | if (b_download_pics_success && b_downloaded_seed_success) { 228 | cout << success_info; 229 | } else { 230 | cout << fail_info << " (download error from " << topic_url << ". "; 231 | if (!b_download_pics_success) { 232 | cout << "pictures error: "; 233 | copy(fail_download_pics_urls_list.cbegin(), fail_download_pics_urls_list.cend(), ostream_iterator(cout, ", ")); 234 | cout << "\b\b"; 235 | } 236 | if (!b_downloaded_seed_success) { 237 | if (!b_download_pics_success) { 238 | cout << "; "; 239 | } 240 | cout << "seed error: " << aicheng_topics_webpage.getSeedUrl(); 241 | } 242 | cout << ")"; 243 | } 244 | cout << endl; 245 | g_mtx.unlock(); 246 | } 247 | 248 | static const string& 249 | getNextProxyAddr (const vector& proxy_addrs_list) 250 | { 251 | if (proxy_addrs_list.empty()) { 252 | static const string empty_str(""); 253 | return(empty_str); 254 | } 255 | 256 | static unsigned current_pos; 257 | if (current_pos >= proxy_addrs_list.size()) { 258 | current_pos = 0; 259 | } 260 | return(proxy_addrs_list[current_pos++]); 261 | } 262 | 263 | Aicheng::Aicheng ( const string& portal_url, 264 | AvClass av_class, 265 | const vector& proxy_addrs_list, 266 | unsigned range_begin, unsigned range_end, 267 | const vector& hate_keywords_list, 268 | const vector& like_keywords_list, 269 | unsigned threads_total, 270 | unsigned timeout_download_pic, 271 | const string& path ) 272 | : portal_url_(portal_url) 273 | { 274 | // parse the URLs of valid topics by: range, hate keywords, like keywords 275 | cout << "Parse the URLs of topics from " << range_begin << " to " << range_end << ": " << flush; 276 | vector valid_topics_urls_list; 277 | parseValidTopicsUrls( av_class, 278 | portal_url, 279 | getNextProxyAddr(proxy_addrs_list), 280 | range_begin, range_end, 281 | hate_keywords_list, 282 | like_keywords_list, 283 | valid_topics_urls_list, 284 | true ); 285 | if (valid_topics_urls_list.empty()) { 286 | cout << "(There is no topic which you like) " << endl; 287 | return; 288 | } 289 | cout << endl << endl; 290 | 291 | // download all pictures and seeds of topics 292 | cout << "Download the pictures and seeds of topics: " << endl; 293 | unsigned parsed_topics_cnt = 0; 294 | for (unsigned i = 0; i < (valid_topics_urls_list.size() / threads_total); ++i) { 295 | vector threads_list; 296 | for (unsigned j = 0; j < threads_total; ++j) { 297 | ++parsed_topics_cnt; 298 | threads_list.push_back(thread( &downloadTopicPicsAndSeed, 299 | ref(valid_topics_urls_list[i * threads_total + j]), 300 | ref(getNextProxyAddr(proxy_addrs_list)), 301 | ref(path), 302 | timeout_download_pic, 303 | true )); 304 | } 305 | for (auto& e : threads_list) { 306 | if (e.joinable()) { 307 | e.join(); 308 | } 309 | } 310 | 311 | if (!threads_list.empty()) { 312 | cout << setprecision(1) << setiosflags(ios::fixed); 313 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- " 314 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size() 315 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl; 316 | cout << resetiosflags(ios::fixed); 317 | } 318 | } 319 | 320 | vector threads_list; 321 | for ( unsigned i = (valid_topics_urls_list.size() / threads_total) * threads_total; 322 | i < valid_topics_urls_list.size(); 323 | ++i ) { 324 | ++parsed_topics_cnt; 325 | threads_list.push_back(thread( &downloadTopicPicsAndSeed, 326 | ref(valid_topics_urls_list[i]), 327 | ref(getNextProxyAddr(proxy_addrs_list)), 328 | ref(path), 329 | timeout_download_pic, 330 | true )); 331 | } 332 | for (auto& e : threads_list) { 333 | if (e.joinable()) { 334 | e.join(); 335 | } 336 | } 337 | if (!threads_list.empty()) { 338 | cout << setprecision(1) << setiosflags(ios::fixed); 339 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- " 340 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size() 341 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl; 342 | cout << resetiosflags(ios::fixed); 343 | } 344 | 345 | cout << endl; 346 | cout << "Hey kiddo, your hot babes " << path << ", enjoy it! " << endl; 347 | } 348 | 349 | Aicheng::~Aicheng () 350 | { 351 | ; 352 | } 353 | 354 | -------------------------------------------------------------------------------- /src/lib/self/Aicheng.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | using std::string; 9 | using std::vector; 10 | 11 | 12 | class Aicheng 13 | { 14 | public: 15 | enum AvClass {west, cartoon, asia_mosaicked, asia_non_mosaicked}; 16 | 17 | public: 18 | Aicheng ( const string& portal_url, 19 | AvClass av_class, 20 | const vector& proxy_addrs_list, 21 | unsigned range_begin, unsigned range_end, 22 | const vector& hate_keywords_list, 23 | const vector& like_keywords_list, 24 | unsigned threads_total, 25 | unsigned timeout_download_pic, 26 | const string& path ); 27 | virtual ~Aicheng (); 28 | 29 | const string& getPortalWebpageUrl (void) const; 30 | 31 | 32 | private: 33 | const string portal_url_; 34 | }; 35 | 36 | -------------------------------------------------------------------------------- /src/lib/self/AichengTopicWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "AichengTopicWebpage.h" 4 | #include 5 | #include 6 | #include 7 | #include "../helper/Misc.h" 8 | 9 | 10 | using namespace std; 11 | 12 | static bool 13 | parsePicturesUrlsHelper ( const string& webpage_txt, 14 | vector& pictures_urls_list, 15 | const string& keyword_begin, 16 | const string& keyword_end ) 17 | { 18 | bool b_ok = false; 19 | 20 | size_t keyword_pic_begin_pos = 0; 21 | while (true) { 22 | // parse picture URL 23 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 24 | keyword_begin, 25 | keyword_end, 26 | keyword_pic_begin_pos ); 27 | string pic_url = pair_tmp.first; 28 | if (pic_url.empty()) { 29 | break; 30 | } 31 | keyword_pic_begin_pos = pair_tmp.second; 32 | b_ok = true; 33 | 34 | // there are some bad picture-webspaces and logo pci, ignore them 35 | bool b_ignore_url = false; 36 | static const vector ignore_urls_keywords_list = { 37 | "iceimg.com", 38 | }; 39 | for (const auto& e : ignore_urls_keywords_list) { 40 | if (string::npos != pic_url.find(e)) { 41 | b_ignore_url = true; 42 | break; 43 | } 44 | } 45 | if (b_ignore_url) { 46 | continue; 47 | } 48 | 49 | // convert https to http 50 | static const string keyword_https("https://"); 51 | const auto https_pos = pic_url.find(keyword_https); 52 | if (string::npos != https_pos) { 53 | static const string keyword_http("http://"); 54 | pic_url.replace(https_pos, keyword_https.size(), keyword_http); 55 | } 56 | 57 | // save the picture URL 58 | pictures_urls_list.push_back(pic_url); 59 | } 60 | 61 | return(b_ok); 62 | } 63 | 64 | static bool 65 | parsePicturesUrls (const string& webpage_txt, vector& pictures_urls_list) 66 | { 67 | pictures_urls_list.clear(); 68 | 69 | // just parse the toptip 70 | static const string keyword_toptip_begin("本页主题:"); 71 | static const string keyword_toptip_end(">[楼 主]"); 72 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 73 | keyword_toptip_begin, 74 | keyword_toptip_end ); 75 | string toptip = pair_tmp.first; 76 | if (toptip.empty()) { 77 | cerr << "ERROR! there is no toptip. " << endl; 78 | return(false); 79 | } 80 | 81 | // the list may be on the webpage at the same time 82 | static const vector> begin_and_end_keywords_list = { make_pair(" keywords_seed_begin_list = { "http://www.jandown.com", 101 | "http://jandown.com", 102 | "http://www6.mimima.com", 103 | "http://mimima.com" }; 104 | 105 | const auto body_pos = webpage_txt.find(""); 106 | if (string::npos == body_pos) { 107 | //cerr << "warning! parseseedurl() cannot find the keyword \"\"" << endl; 108 | return(false); 109 | } 110 | const string& body = webpage_txt.substr(body_pos); 111 | 112 | for (const auto& e : keywords_seed_begin_list) { 113 | const string& keyword_seed_begin = e; 114 | static const string keyword_seed_end("\""); 115 | 116 | const pair& pair_tmp = fetchStringBetweenKeywords( body, 117 | keyword_seed_begin, 118 | keyword_seed_end ); 119 | if (!pair_tmp.first.empty()) { 120 | seed_url = keyword_seed_begin + pair_tmp.first; 121 | return(true); 122 | } 123 | } 124 | 125 | 126 | return(false); 127 | } 128 | 129 | AichengTopicWebpage::AichengTopicWebpage (const string& url, const string& proxy_addr) 130 | : TopicWebpage(url, parsePicturesUrls, parseSeedUrl, proxy_addr, "gbk", "UTF-8") 131 | { 132 | ; 133 | } 134 | 135 | AichengTopicWebpage::~AichengTopicWebpage () 136 | { 137 | ; 138 | } 139 | 140 | -------------------------------------------------------------------------------- /src/lib/self/AichengTopicWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include "TopicWebpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | 12 | 13 | class AichengTopicWebpage : public TopicWebpage 14 | { 15 | public: 16 | AichengTopicWebpage (const string& url, const string& proxy_addr); 17 | virtual ~AichengTopicWebpage (); 18 | }; 19 | 20 | -------------------------------------------------------------------------------- /src/lib/self/AichengTopicsListWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "AichengTopicsListWebpage.h" 4 | #include 5 | #include 6 | #include "../helper/Misc.h" 7 | 8 | 9 | using namespace std; 10 | 11 | 12 | 13 | static bool 14 | parseTitlesAndUrls ( const string& webpage_txt, 15 | const string& portal_url, 16 | vector>& titles_and_urls_list ) 17 | { 18 | const unsigned size_back = titles_and_urls_list.size(); 19 | 20 | const auto topics_list_txt_pos = webpage_txt.find(R"(style="border-top:0">普通主题)"); 21 | size_t keyword_topic_url_begin_pos = ((string::npos == topics_list_txt_pos) ? 0 : topics_list_txt_pos); 22 | size_t keyword_topic_url_end_pos = 0; 23 | 24 | while (true) { 25 | // parse topic URL 26 | static const string keyword_topic_url_begin("

& pair_url = fetchStringBetweenKeywords( webpage_txt, 29 | keyword_topic_url_begin, 30 | keyword_topic_url_end, 31 | keyword_topic_url_begin_pos ); 32 | const string& topic_url_part = pair_url.first; 33 | if (topic_url_part.empty()) { 34 | break; 35 | } 36 | const string topic_url = portal_url + topic_url_part; 37 | keyword_topic_url_end_pos = pair_url.second; 38 | 39 | // parse topic title 40 | static const string keyword_topic_title_begin("target=_blank>"); 41 | static const string keyword_topic_title_end("

"); 42 | const pair& pair_title = fetchStringBetweenKeywords( webpage_txt, 43 | keyword_topic_title_begin, 44 | keyword_topic_title_end, 45 | //keyword_topic_url_end_pos - keyword_topic_title_begin.size() ); 46 | keyword_topic_url_end_pos ); 47 | const string& topic_title = pair_title.first; 48 | keyword_topic_url_begin_pos = pair_title.second; 49 | 50 | // save url and title of the topic 51 | titles_and_urls_list.push_back(make_pair(topic_title, topic_url)); 52 | } 53 | 54 | return(titles_and_urls_list.size() > size_back); 55 | } 56 | 57 | static bool 58 | parseNextpageUrl (const string& webpage_txt, const string& portal_url, string& nextpage_url) 59 | { 60 | nextpage_url.empty(); 61 | 62 | static const string keyword_nextpage_begin(""); 64 | const string& nextpage_url_part = fetchStringBetweenKeywords( webpage_txt, 65 | keyword_nextpage_begin, 66 | keyword_nextpage_end ).first; 67 | if (nextpage_url_part.empty()) { 68 | return(false); 69 | } 70 | 71 | // portal_url 中多了 "/bt" 72 | nextpage_url = string(portal_url.cbegin(), portal_url.cend() - (const int)string("/bt").length()) + nextpage_url_part; 73 | 74 | return(true); 75 | } 76 | 77 | AichengTopicsListWebpage::AichengTopicsListWebpage (const string& portal_url, const string& url, const string& proxy_addr) 78 | : TopicsListWebpage(portal_url, url, parseTitlesAndUrls, parseNextpageUrl, proxy_addr, "gbk", "UTF-8") 79 | { 80 | ; 81 | } 82 | 83 | AichengTopicsListWebpage::~AichengTopicsListWebpage () 84 | { 85 | ; 86 | } 87 | 88 | -------------------------------------------------------------------------------- /src/lib/self/AichengTopicsListWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include "TopicsListWebpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | 12 | 13 | class AichengTopicsListWebpage : public TopicsListWebpage 14 | { 15 | public: 16 | AichengTopicsListWebpage (const string& portal_url, const string& url, const string& proxy_addr); 17 | virtual ~AichengTopicsListWebpage (); 18 | }; 19 | -------------------------------------------------------------------------------- /src/lib/self/Caoliu.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "Caoliu.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "CaoliuTopicsListWebpage.h" 13 | #include "CaoliuTopicWebpage.h" 14 | #include "RmdownSeedWebpage.h" 15 | #include "../helper/RichTxt.h" 16 | 17 | 18 | using namespace std; 19 | 20 | static mutex g_mtx; 21 | 22 | const string& 23 | Caoliu::getPortalWebpageUrl (void) const 24 | { 25 | return(portal_url_); 26 | } 27 | 28 | static const string& 29 | getTopicsListWebpagePartUrl (Caoliu::AvClass av_class) 30 | { 31 | // reposted 32 | static const string west_reposted_part_url("thread0806.php?fid=19"); 33 | static const string cartoon_reposted_part_url("thread0806.php?fid=24"); 34 | static const string asia_mosaicked_reposted_part_url("thread0806.php?fid=18"); 35 | static const string asia_non_mosaicked_reposted_part_url("thread0806.php?fid=17"); 36 | 37 | // original 38 | static const string west_original_part_url("thread0806.php?fid=4"); 39 | static const string cartoon_original_part_url("thread0806.php?fid=5"); 40 | static const string asia_mosaicked_original_part_url("thread0806.php?fid=15"); 41 | static const string asia_non_mosaicked_original_part_url("thread0806.php?fid=2"); 42 | 43 | // selfie 44 | static const string selfie_part_url("thread0806.php?fid=16"); 45 | 46 | switch (av_class) { 47 | case Caoliu::west_reposted: 48 | return(west_reposted_part_url); 49 | case Caoliu::cartoon_reposted: 50 | return(cartoon_reposted_part_url); 51 | case Caoliu::asia_mosaicked_reposted: 52 | return(asia_mosaicked_reposted_part_url); 53 | case Caoliu::asia_non_mosaicked_reposted: 54 | return(asia_non_mosaicked_reposted_part_url); 55 | case Caoliu::west_original: 56 | return(west_original_part_url); 57 | case Caoliu::cartoon_original: 58 | return(cartoon_original_part_url); 59 | case Caoliu::asia_mosaicked_original: 60 | return(asia_mosaicked_original_part_url); 61 | case Caoliu::asia_non_mosaicked_original: 62 | return(asia_non_mosaicked_original_part_url); 63 | case Caoliu::selfie: 64 | return(selfie_part_url); 65 | } 66 | } 67 | 68 | static const string 69 | getTopicsListWebpageUrl (const string& portal_url, Caoliu::AvClass av_class) 70 | { 71 | return(portal_url + getTopicsListWebpagePartUrl(av_class)); 72 | } 73 | 74 | static bool 75 | isThereInList ( const string& webpage_title, 76 | const vector& ignore_keywords_list, 77 | string& which_keyword ) 78 | { 79 | for (const auto& e : ignore_keywords_list) { 80 | if (!e.empty() && string::npos != webpage_title.find(e)) { 81 | which_keyword = e; 82 | return(true); 83 | } 84 | } 85 | 86 | return(false); 87 | } 88 | 89 | static bool 90 | parseValidTopicsUrls ( Caoliu::AvClass av_class, 91 | const string& portal_url, 92 | const string& proxy_addr, 93 | unsigned range_begin, unsigned range_end, 94 | const vector& hate_keywords_list, 95 | const vector& like_keywords_list, 96 | vector& valid_topics_urls_list, 97 | bool b_progress ) 98 | { 99 | valid_topics_urls_list.clear(); 100 | 101 | string current_url = getTopicsListWebpageUrl(portal_url, av_class); 102 | bool b_stop = false; 103 | unsigned topics_cnt = 0; 104 | while (!current_url.empty() && !b_stop) { 105 | CaoliuTopicsListWebpage caoliu_topicslist_webpage(portal_url, current_url, proxy_addr); 106 | if (!caoliu_topicslist_webpage.isLoaded()) { 107 | return(false); 108 | } 109 | 110 | const vector>& topics_title_and_url = caoliu_topicslist_webpage.getTitlesAndUrlsList(); 111 | for (const auto& e : topics_title_and_url) { 112 | if (++topics_cnt > range_end) { 113 | b_stop = true; 114 | break; 115 | } 116 | 117 | const string& topic_title = e.first; 118 | const string& topic_url = e.second; 119 | static const string o_flag(RichTxt::bold_on + "O" + RichTxt::bold_off); 120 | static const string x_flag("x"); 121 | 122 | // ignore the topics which do not in range 123 | if (topics_cnt < range_begin) { 124 | if (b_progress) { 125 | cout << x_flag << " " << flush; 126 | } 127 | continue; 128 | } 129 | // ignore the topics which contain hate keyword by user set 130 | string which_keyword; 131 | if (isThereInList(topic_title, hate_keywords_list, which_keyword)) { 132 | if (b_progress) { 133 | cout << x_flag << " " << flush; 134 | } 135 | continue; 136 | } 137 | // ignore the topics which do not contain like keyword by user set 138 | if ( !like_keywords_list.empty() && 139 | !isThereInList(topic_title, like_keywords_list, which_keyword) ) { 140 | if (b_progress) { 141 | cout << x_flag << " " << flush; 142 | } 143 | continue; 144 | } 145 | 146 | valid_topics_urls_list.push_back(topic_url); 147 | 148 | if (b_progress) { 149 | cout << o_flag << " " << flush; 150 | } 151 | } 152 | 153 | current_url = caoliu_topicslist_webpage.getNextpageUrl(); 154 | } 155 | 156 | 157 | return(true); 158 | } 159 | 160 | static void 161 | downloadTopicPicsAndSeed ( const string& topic_url, 162 | const string& proxy_addr, 163 | const string& path, 164 | unsigned timeout_download_pic, 165 | unsigned pictures_total, 166 | bool b_download_seed, 167 | bool b_show_info ) 168 | { 169 | CaoliuTopicWebpage caoliu_topics_webpage(topic_url, proxy_addr); 170 | 171 | // ready for the basename of pictures and seed. 172 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 173 | string base_name; // from topic title 174 | 175 | // 0) delete the web logo info; 176 | // 1) clear the "/" in topictitle string, if the "/" present in filename, 177 | // linux will treat it as directory, again, clear the "\" for windows; 178 | static const vector keyword_logos_list = {" 草榴社區 - powered by phpwind.net"}; 179 | const string& topic_webpage_title = caoliu_topics_webpage.getTitle(); 180 | auto keyword_logo_pos = string::npos; 181 | for (const auto& f : keyword_logos_list) { 182 | keyword_logo_pos = topic_webpage_title.find(f); 183 | if (string::npos != keyword_logo_pos) { 184 | break; 185 | } 186 | } 187 | remove_copy_if( topic_webpage_title.cbegin(), 188 | (string::npos == keyword_logo_pos) ? topic_webpage_title.cend() : topic_webpage_title.cbegin() + (int)keyword_logo_pos, 189 | back_inserter(base_name), 190 | [] (char ch) {return( '|' == ch || // invalid chars in windows-sytle filename 191 | '/' == ch || 192 | '<' == ch || 193 | '>' == ch || 194 | '?' == ch || 195 | '*' == ch || 196 | ':' == ch || 197 | '\\' == ch );} ); 198 | 199 | // 2) the path + filename max length must less than pathconf(, _PC_NAME_MAX) 200 | const unsigned filename_max_length_without_postfix = (unsigned)pathconf(path.c_str(), _PC_NAME_MAX) 201 | - string("99").size() // picture number 202 | - string(".torrent").size(); 203 | if (base_name.size() >= filename_max_length_without_postfix) { 204 | // the filename too long to create file. the way as following doesn't work, case filename encoding error: 205 | // base_name.resize(filename_max_length_without_postfix - 1), because this is string on char not wstring on wchar. 206 | // there is another stupid way, random name from 'a' to 'z' 207 | base_name.resize(16); 208 | generate( base_name.begin(), base_name.end(), 209 | [] () {return('a' + rand() % ('z' - 'a'));} ); 210 | base_name = "(rename)" + base_name; 211 | } 212 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 213 | 214 | // download all pictures 215 | //const vector& urls = caoliu_topics_webpage.getPicturesUrlsList(); 216 | //for (const auto& e : urls) { 217 | ////cout << "------------" << endl; 218 | //cout << e << "\n\t"; 219 | //cout << caoliu_topics_webpage.getRemoteFiletype(e) << endl; 220 | ////cout << "------------" << endl; 221 | //} 222 | vector fail_download_pics_urls_list; 223 | bool b_download_pics_success = caoliu_topics_webpage.downloadAllPictures( path, 224 | base_name, 225 | timeout_download_pic, 226 | fail_download_pics_urls_list, 227 | pictures_total ); 228 | 229 | // download seed 230 | bool b_downloaded_seed_success = true; 231 | if (b_download_seed) { 232 | b_downloaded_seed_success = false; 233 | if (!caoliu_topics_webpage.getSeedUrl().empty()) { 234 | RmdownSeedWebpage rm_seed_webpage(caoliu_topics_webpage.getSeedUrl(), proxy_addr); 235 | b_downloaded_seed_success = rm_seed_webpage.downloadSeed(path, base_name); 236 | } 237 | } 238 | 239 | // show result info 240 | if (!b_show_info) { 241 | return; 242 | } 243 | static const string success_info("success"); 244 | static const string fail_info = RichTxt::foreground_red + "failure" + RichTxt::reset_all; 245 | g_mtx.lock(); 246 | cout << " \"" << base_name << "\" - "; 247 | if (b_download_pics_success && b_downloaded_seed_success) { 248 | cout << success_info; 249 | } else { 250 | cout << fail_info << " (download error from " << topic_url << ". "; 251 | if (!b_download_pics_success) { 252 | cout << "pictures error: "; 253 | copy(fail_download_pics_urls_list.cbegin(), fail_download_pics_urls_list.cend(), ostream_iterator(cout, ", ")); 254 | cout << "\b\b"; 255 | } 256 | if (b_download_seed && !b_downloaded_seed_success) { 257 | if (!b_download_pics_success) { 258 | cout << "; "; 259 | } 260 | cout << "seed error: " << caoliu_topics_webpage.getSeedUrl(); 261 | } 262 | cout << ")"; 263 | } 264 | cout << endl; 265 | g_mtx.unlock(); 266 | } 267 | 268 | static const string& 269 | getNextProxyAddr (const vector& proxy_addrs_list) 270 | { 271 | if (proxy_addrs_list.empty()) { 272 | static const string empty_str(""); 273 | return(empty_str); 274 | } 275 | 276 | static unsigned current_pos; 277 | if (current_pos >= proxy_addrs_list.size()) { 278 | current_pos = 0; 279 | } 280 | return(proxy_addrs_list[current_pos++]); 281 | } 282 | 283 | Caoliu::Caoliu ( const string& portal_url, 284 | AvClass av_class, 285 | const vector& proxy_addrs_list, 286 | unsigned range_begin, unsigned range_end, 287 | const vector& hate_keywords_list, 288 | const vector& like_keywords_list, 289 | unsigned threads_total, 290 | unsigned timeout_download_pic, 291 | const string& path ) 292 | : portal_url_(portal_url) 293 | { 294 | // parse the URLs of valid topics by: range, hate keywords, like keywords 295 | cout << "Parse the URLs of topics from " << range_begin << " to " << range_end << ": " << flush; 296 | vector valid_topics_urls_list; 297 | parseValidTopicsUrls( av_class, 298 | portal_url, 299 | getNextProxyAddr(proxy_addrs_list), 300 | range_begin, range_end, 301 | hate_keywords_list, 302 | like_keywords_list, 303 | valid_topics_urls_list, 304 | true ); 305 | cout << endl; 306 | if (valid_topics_urls_list.empty()) { 307 | cout << "There is no topic which you like. " << endl; 308 | return; 309 | } 310 | cout << endl; 311 | 312 | // check just download picutures for dagaier? 313 | unsigned pictures_total = 2; 314 | bool b_download_seed = true; 315 | if (Caoliu::selfie == av_class) { 316 | pictures_total = 1024; // the max total 317 | b_download_seed = false; 318 | } 319 | 320 | // download all pictures and seeds of topics 321 | cout << "Download the pictures and seeds of topics: " << endl; 322 | unsigned parsed_topics_cnt = 0; 323 | for (unsigned i = 0; i < (valid_topics_urls_list.size() / threads_total); ++i) { 324 | vector threads_list; 325 | for (unsigned j = 0; j < threads_total; ++j) { 326 | ++parsed_topics_cnt; 327 | threads_list.push_back(thread( &downloadTopicPicsAndSeed, 328 | ref(valid_topics_urls_list[i * threads_total + j]), 329 | ref(getNextProxyAddr(proxy_addrs_list)), 330 | ref(path), 331 | timeout_download_pic, 332 | pictures_total, 333 | b_download_seed, 334 | true )); 335 | } 336 | for (auto& e : threads_list) { 337 | if (e.joinable()) { 338 | e.join(); 339 | } 340 | } 341 | 342 | if (!threads_list.empty()) { 343 | cout << setprecision(1) << setiosflags(ios::fixed); 344 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- " 345 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size() 346 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl; 347 | cout << resetiosflags(ios::fixed); 348 | } 349 | } 350 | 351 | vector threads_list; 352 | for (unsigned i = (valid_topics_urls_list.size() / threads_total) * threads_total; i < valid_topics_urls_list.size(); ++i) { 353 | ++parsed_topics_cnt; 354 | threads_list.push_back(thread( &downloadTopicPicsAndSeed, 355 | ref(valid_topics_urls_list[i]), 356 | ref(getNextProxyAddr(proxy_addrs_list)), 357 | ref(path), 358 | timeout_download_pic, 359 | pictures_total, 360 | b_download_seed, 361 | true )); 362 | } 363 | for (auto& e : threads_list) { 364 | if (e.joinable()) { 365 | e.join(); 366 | } 367 | } 368 | if (!threads_list.empty()) { 369 | cout << setprecision(1) << setiosflags(ios::fixed); 370 | cout << " " << RichTxt::bold_on << RichTxt::underline_on << "<---- " 371 | << 100.0 * parsed_topics_cnt / valid_topics_urls_list.size() 372 | << "% ---->" << RichTxt::underline_off << RichTxt::bold_off << endl; 373 | cout << resetiosflags(ios::fixed); 374 | } 375 | 376 | cout << endl; 377 | cout << "Hey kiddo, your hot babes " << path << ", enjoy it! " << endl; 378 | } 379 | 380 | Caoliu::~Caoliu () 381 | { 382 | ; 383 | } 384 | 385 | -------------------------------------------------------------------------------- /src/lib/self/Caoliu.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | using std::string; 9 | using std::vector; 10 | 11 | 12 | class Caoliu 13 | { 14 | public: 15 | enum AvClass { west_reposted, cartoon_reposted, asia_mosaicked_reposted, asia_non_mosaicked_reposted, 16 | west_original, cartoon_original, asia_mosaicked_original, asia_non_mosaicked_original, 17 | selfie }; 18 | 19 | public: 20 | Caoliu ( const string& portal_url, 21 | AvClass av_class, 22 | const vector& proxy_addrs_list, 23 | unsigned range_begin, unsigned range_end, 24 | const vector& hate_keywords_list, 25 | const vector& like_keywords_list, 26 | unsigned threads_total, 27 | unsigned timeout_download_pic, 28 | const string& path ); 29 | virtual ~Caoliu (); 30 | 31 | const string& getPortalWebpageUrl (void) const; 32 | 33 | 34 | private: 35 | const string portal_url_; 36 | }; 37 | 38 | -------------------------------------------------------------------------------- /src/lib/self/CaoliuTopicWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "CaoliuTopicWebpage.h" 4 | #include 5 | #include 6 | #include 7 | #include "../helper/Misc.h" 8 | 9 | 10 | using namespace std; 11 | 12 | 13 | static bool 14 | parsePicturesUrlsHelper ( const string& webpage_txt, 15 | vector& pictures_urls_list, 16 | const string& keyword_begin, 17 | const string& keyword_end ) 18 | { 19 | bool b_ok = false; 20 | 21 | size_t start_pos = 0; 22 | while (true) { 23 | // parse picture URL 24 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 25 | keyword_begin, 26 | keyword_end, 27 | start_pos ); 28 | string pic_url = pair_tmp.first; 29 | if (pic_url.empty()) { 30 | break; 31 | } 32 | start_pos = pair_tmp.second; 33 | b_ok = true; 34 | 35 | // there are some bad picture-webspaces and logo pci, ignore them 36 | bool b_ignore_url = false; 37 | static const vector ignore_urls_keywords_list = { 38 | "iceimg.com", 39 | "picuphost.com", 40 | // caoliu froum selfie member's logo. 41 | // http://ww4.sinaimg.cn/mw690/005uMz33gw1egsm41zq6qj30f80b4gm9.jpg 42 | // >>>> 43 | "005uMz33gw1eh3a1r6ak0j30d005zt98.jpg", 44 | "005uMz33gw1egsm41zq6qj30f80b4gm9.jpg", 45 | // <<<< 46 | }; 47 | for (const auto& e : ignore_urls_keywords_list) { 48 | if (string::npos != pic_url.find(e)) { 49 | b_ignore_url = true; 50 | break; 51 | } 52 | } 53 | if (b_ignore_url) { 54 | continue; 55 | } 56 | 57 | // save the picture URL 58 | pictures_urls_list.push_back(pic_url); 59 | } 60 | 61 | return(b_ok); 62 | } 63 | 64 | static bool 65 | parsePicturesUrls (const string& webpage_txt, vector& pictures_urls_list) 66 | { 67 | pictures_urls_list.clear(); 68 | 69 | // just parse the toptip 70 | static const string keyword_toptip_begin("本頁主題:"); 71 | static const string keyword_toptip_end("[樓主]"); 72 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 73 | keyword_toptip_begin, 74 | keyword_toptip_end ); 75 | string toptip = pair_tmp.first; 76 | if (toptip.empty()) { 77 | cerr << "ERROR! there is no toptip. " << endl; 78 | return(false); 79 | } 80 | 81 | // the list may be on the webpage at the same time 82 | static const vector> begin_and_end_keywords_list = { make_pair(" keywords_seed_begin_list = { "http://www.rmdown.com/link.php?hash=", 101 | "http://rmdown.com/link.php?hash=", 102 | "http://www.xunfs.com/link.php?hash=", 103 | "http://xunfs.com/link.php?hash=" }; 104 | for (const auto& e : keywords_seed_begin_list) { 105 | const string& keyword_seed_begin = e; 106 | static const string keyword_seed_end(""); 107 | 108 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 109 | keyword_seed_begin, 110 | keyword_seed_end ); 111 | if (!pair_tmp.first.empty()) { 112 | seed_url = keyword_seed_begin + pair_tmp.first; 113 | return(true); 114 | } 115 | } 116 | 117 | 118 | return(false); 119 | } 120 | 121 | CaoliuTopicWebpage::CaoliuTopicWebpage (const string& url, const string& proxy_addr) 122 | : TopicWebpage(url, parsePicturesUrls, parseSeedUrl, proxy_addr, "gbk", "UTF-8") 123 | { 124 | ; 125 | } 126 | 127 | CaoliuTopicWebpage::~CaoliuTopicWebpage () 128 | { 129 | ; 130 | } 131 | 132 | -------------------------------------------------------------------------------- /src/lib/self/CaoliuTopicWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include "TopicWebpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | 12 | 13 | class CaoliuTopicWebpage : public TopicWebpage 14 | { 15 | public: 16 | CaoliuTopicWebpage (const string& url, const string& proxy_addr); 17 | virtual ~CaoliuTopicWebpage (); 18 | }; 19 | 20 | -------------------------------------------------------------------------------- /src/lib/self/CaoliuTopicsListWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "CaoliuTopicsListWebpage.h" 4 | #include 5 | #include 6 | #include "../helper/Misc.h" 7 | 8 | using namespace std; 9 | 10 | 11 | 12 | static bool 13 | parseTitlesAndUrls ( const string& webpage_txt, 14 | const string& portal_url, 15 | vector>& titles_and_urls_list ) 16 | { 17 | const unsigned size_back = titles_and_urls_list.size(); 18 | 19 | size_t keyword_topic_url_begin_pos = 0, keyword_topic_url_end_pos = 0; 20 | keyword_topic_url_begin_pos = webpage_txt.find("普通主題"); 21 | if (string::npos == keyword_topic_url_begin_pos) { 22 | keyword_topic_url_begin_pos = 0; 23 | } 24 | 25 | while (true) { 26 | // parse topic URL 27 | static const string keyword_topic_url_begin("

& pair_url = fetchStringBetweenKeywords( webpage_txt, 31 | keyword_topic_url_begin + keyword_topic_url_begin2, 32 | keyword_topic_url_end, 33 | keyword_topic_url_begin_pos ); 34 | const string& topic_url_part = pair_url.first; 35 | if (topic_url_part.empty()) { 36 | break; 37 | } 38 | const string& topic_url = portal_url + keyword_topic_url_begin2 + topic_url_part; 39 | keyword_topic_url_end_pos = pair_url.second; 40 | 41 | // parse topic title 42 | static const string keyword_topic_title_begin("id=\"\">"); 43 | static const string keyword_topic_title_end("

"); 44 | const pair& pair_title = fetchStringBetweenKeywords( webpage_txt, 45 | keyword_topic_title_begin, 46 | keyword_topic_title_end, 47 | keyword_topic_url_end_pos ); 48 | const string& topic_title = pair_title.first; 49 | keyword_topic_url_begin_pos = pair_title.second; 50 | 51 | // save url and title of the topic 52 | titles_and_urls_list.push_back(make_pair(topic_title, topic_url)); 53 | } 54 | 55 | return(titles_and_urls_list.size() > size_back); 56 | } 57 | 58 | static bool 59 | parseNextpageUrl (const string& webpage_txt, const string& portal_url, string& nextpage_url) 60 | { 61 | nextpage_url.empty(); 62 | 63 | static const string keyword_nextpage("下一頁"); 64 | const auto keyword_nextpage_pos = webpage_txt.find(keyword_nextpage); 65 | if (string::npos == keyword_nextpage_pos) { 66 | return(false); 67 | } 68 | 69 | static const string keyword_href(" 6 | #include 7 | #include "TopicsListWebpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | 12 | 13 | class CaoliuTopicsListWebpage : public TopicsListWebpage 14 | { 15 | public: 16 | CaoliuTopicsListWebpage (const string& portal_url, const string& url, const string& proxy_addr); 17 | virtual ~CaoliuTopicsListWebpage (); 18 | }; 19 | -------------------------------------------------------------------------------- /src/lib/self/JandownSeedWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "JandownSeedWebpage.h" 4 | #include 5 | #include 6 | #include 7 | #include "../helper/Misc.h" 8 | 9 | 10 | using namespace std; 11 | 12 | static bool 13 | parsePostMultiSections ( const string& webpage_txt, 14 | vector>& post_sections_list ) 15 | { 16 | // parse the code section 17 | static const string keyword_code_section_begin(""); 19 | const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 20 | keyword_code_section_begin, 21 | keyword_code_section_end ); 22 | const string& ref_content = pair_tmp.first; 23 | if (ref_content.empty()) { 24 | cerr << "WARNING! parsePostMultiSections() CANNOT find the keyword " 25 | << "\"" << keyword_code_section_begin << "\"" << " and " 26 | << "\"" << keyword_code_section_end << "\"" << endl; 27 | return(false); 28 | } 29 | 30 | post_sections_list.push_back(make_pair("code", ref_content)); 31 | return(true); 32 | } 33 | 34 | // seed fetch URL. http://www.jandown.com/ and http://www6.mimima.com/ are 35 | // the same one website, on the other word, from http://www.jandown.com/abcd 36 | // download the seed file same as from http://www6.mimima.com/abcd, so, I need 37 | // just ONE fetch URL 38 | JandownSeedWebpage::JandownSeedWebpage (const string& url, const string& proxy_addr) 39 | : SeedWebpage(url, proxy_addr, "http://www.jandown.com/fetch.php", parsePostMultiSections) 40 | { 41 | ; 42 | } 43 | 44 | JandownSeedWebpage::~JandownSeedWebpage () 45 | { 46 | ; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /src/lib/self/JandownSeedWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include "SeedWebpage.h" 7 | 8 | using std::string; 9 | 10 | 11 | class JandownSeedWebpage : public SeedWebpage 12 | { 13 | public: 14 | JandownSeedWebpage (const string& url, const string& proxy_addr); 15 | virtual ~JandownSeedWebpage (); 16 | }; 17 | -------------------------------------------------------------------------------- /src/lib/self/RmdownSeedWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "RmdownSeedWebpage.h" 4 | #include 5 | #include 6 | #include 7 | #include "../helper/Misc.h" 8 | 9 | 10 | using namespace std; 11 | 12 | static bool 13 | parsePostMultiSections ( const string& webpage_txt, 14 | vector>& post_sections_list ) 15 | { 16 | // parse the ref section 17 | static const string& keyword_ref_section_begin("& pair_tmp = fetchStringBetweenKeywords( webpage_txt, 20 | keyword_ref_section_begin, 21 | keyword_ref_section_end ); 22 | const string& ref_content = pair_tmp.first; 23 | if (ref_content.empty()) { 24 | cerr << "WARNING! parsePostMultiSections() CANNOT find the keyword " 25 | << "\"" << keyword_ref_section_begin << "\"" << " and " 26 | << "\"" << keyword_ref_section_end << "\"" << endl; 27 | return(false); 28 | } 29 | post_sections_list.push_back(make_pair("ref", ref_content)); 30 | const auto keyword_ref_section_end_pos = pair_tmp.second; 31 | 32 | // parse the reff section 33 | static const string& keyword_reff_section_begin("value=\""); 34 | static const string& keyword_reff_section_end("\""); 35 | const pair& pair_tmp2 = fetchStringBetweenKeywords( webpage_txt, 36 | keyword_reff_section_begin, 37 | keyword_reff_section_end, 38 | keyword_ref_section_end_pos ); 39 | const string& reff_content = pair_tmp2.first; 40 | if (reff_content.empty()) { 41 | cerr << "WARNING! parsePostMultiSections() CANNOT find the keyword " 42 | << "\"" << keyword_reff_section_begin << "\"" << " and " 43 | << "\"" << keyword_reff_section_end << "\"" << endl; 44 | return(false); 45 | } 46 | post_sections_list.push_back(make_pair("reff", reff_content)); 47 | 48 | 49 | return(true); 50 | } 51 | 52 | // seed fetch URL. http://www.rmdown.com/ and http://www.xunfs.com/ are 53 | // the same one website, on the other word, from http://www.rmdown.com/abcd 54 | // download the seed file same as from http://www.xunfs.com/abcd, so, I need 55 | // just ONE fetch URL 56 | RmdownSeedWebpage::RmdownSeedWebpage (const string& url, const string& proxy_addr) 57 | : SeedWebpage(url, proxy_addr, "http://www.rmdown.com/download.php", parsePostMultiSections) 58 | { 59 | ; 60 | } 61 | 62 | RmdownSeedWebpage::~RmdownSeedWebpage () 63 | { 64 | ; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /src/lib/self/RmdownSeedWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include "SeedWebpage.h" 7 | 8 | using std::string; 9 | 10 | 11 | class RmdownSeedWebpage : public SeedWebpage 12 | { 13 | public: 14 | RmdownSeedWebpage (const string& url, const string& proxy_addr); 15 | virtual ~RmdownSeedWebpage (); 16 | }; 17 | -------------------------------------------------------------------------------- /src/lib/self/SeedWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "SeedWebpage.h" 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | SeedWebpage::SeedWebpage ( const string& url, 11 | const string& proxy_addr, 12 | const string& post_url, 13 | ParsePostMultiSections parsePostMultiSections ) 14 | : Webpage(url, "", proxy_addr), post_url_(post_url) 15 | { 16 | if (!isLoaded()) { 17 | cerr << "WARNING! SeedWebpage::SeedWebpage() CANNOT load webpage \"" 18 | << url << "\"" << endl; 19 | return; 20 | } 21 | 22 | // parse the post method multi sections 23 | parsePostMultiSections(getTxt(), post_sections_list_); 24 | } 25 | 26 | SeedWebpage::~SeedWebpage () 27 | { 28 | ; 29 | } 30 | 31 | // this is a multipart/formdata style HTTP post method 32 | bool 33 | SeedWebpage::downloadSeed (const string& path, const string& base_name) 34 | { 35 | if (post_sections_list_.empty()) { 36 | return(false); 37 | } 38 | 39 | // make seed name 40 | static const string seed_postfix(".torrent"); 41 | string seed_filename = path + "/" + base_name + seed_postfix; 42 | 43 | 44 | return(submitMultiPost(post_url_, seed_filename, post_sections_list_)); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/lib/self/SeedWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include "../helper/Webpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | using std::pair; 12 | 13 | 14 | class SeedWebpage : public Webpage 15 | { 16 | public: 17 | // callback function for parse the multi sections of post 18 | typedef bool (*ParsePostMultiSections) ( const string& webpage_txt, 19 | vector>& post_sections_list ); 20 | 21 | public: 22 | SeedWebpage ( const string& url, 23 | const string& proxy_addr, 24 | const string& post_url, 25 | ParsePostMultiSections parsePostMultiSections ); 26 | virtual ~SeedWebpage (); 27 | bool downloadSeed (const string& path, const string& base_name); 28 | 29 | private: 30 | const string post_url_; 31 | vector> post_sections_list_; 32 | }; 33 | -------------------------------------------------------------------------------- /src/lib/self/TopicWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "TopicWebpage.h" 4 | #include 5 | #include 6 | #include "../helper/Misc.h" 7 | 8 | #include 9 | 10 | using namespace std; 11 | 12 | 13 | TopicWebpage::TopicWebpage ( const string& url, 14 | ParsePicturesUrls parsePicturesUrls, 15 | ParseSeedUrl parseSeedUrl, 16 | const string& proxy_addr, 17 | const string& src_charset, 18 | const string& dest_charset ) 19 | : Webpage(url, "", proxy_addr) 20 | { 21 | if (!isLoaded()) { 22 | return; 23 | } 24 | 25 | // charset convert 26 | if (!src_charset.empty() && !dest_charset.empty()) { 27 | convertCharset(src_charset, dest_charset); 28 | } 29 | 30 | // parse the URLs of av pictures 31 | if (!parsePicturesUrls(getTxt(), pictures_urls_list_)) { 32 | //cerr << "WARNING! parsePicturesUrls() failure from " << url << endl; 33 | ; 34 | } 35 | 36 | // parse the URLs of seed 37 | if (!parseSeedUrl(getTxt(), seed_url_)) { 38 | //cerr << "WARNING! parseSeedUrl() failure from " << url << endl; 39 | ; 40 | } 41 | } 42 | 43 | TopicWebpage::~TopicWebpage () 44 | { 45 | ; 46 | } 47 | 48 | const vector& 49 | TopicWebpage::getPicturesUrlsList (void) const 50 | { 51 | return(pictures_urls_list_); 52 | } 53 | 54 | const string& 55 | TopicWebpage::getSeedUrl (void) const 56 | { 57 | return(seed_url_); 58 | } 59 | 60 | // the name rule of pictures: topictitle-0.jpg, topictitle-1.jpg, topictitle-[x].jpg 61 | bool 62 | TopicWebpage::downloadAllPictures ( const string& path, 63 | const string& base_name, 64 | unsigned timeout_download_pic, 65 | vector& fail_download_pics_urls_list, 66 | unsigned pictures_max_num ) 67 | { 68 | fail_download_pics_urls_list.clear(); 69 | 70 | for ( unsigned i = 0, sucess_cnt = 0; 71 | i < pictures_urls_list_.size() && sucess_cnt < pictures_max_num; 72 | ++i ) { 73 | const string& picture_url = pictures_urls_list_[i]; 74 | 75 | // make picture postfix name 76 | //string postfix_name("jpeg"); // sometime get the remote filetype failure, so I set the default postfix 77 | string postfix_name(""); 78 | static const unsigned get_remote_filetype_retry_times = 2; 79 | static const unsigned get_remote_filetype_sleep_second = 2; 80 | for (unsigned j = 0; j < get_remote_filetype_retry_times; ++j) { 81 | const string& tmp = getRemoteFiletype(picture_url); 82 | static const string keyword("image/"); 83 | const auto pos = tmp.find(keyword); 84 | if (string::npos != pos) { 85 | postfix_name = tmp.substr(pos + keyword.size()); 86 | break; 87 | } 88 | sleep(get_remote_filetype_sleep_second); 89 | } 90 | // neither gif (because gifs almost be AD) nor cannot get the file type, ignore 91 | if ("gif" == postfix_name || "" == postfix_name) { 92 | continue; 93 | } 94 | 95 | // download pic 96 | const string& pic_filename = path + "/" + base_name + "-" + convNumToStr(sucess_cnt) + "." + postfix_name; 97 | if (downloadFile(picture_url, pic_filename, "", timeout_download_pic)) { 98 | ++sucess_cnt; 99 | continue; 100 | } 101 | 102 | //cerr << "WARNING! CANNOT download " << pictures_urls_list_[i] << endl; 103 | fail_download_pics_urls_list.push_back(pictures_urls_list_[i]); 104 | remove(pic_filename.c_str()); 105 | } 106 | 107 | 108 | return(fail_download_pics_urls_list.empty()); 109 | } 110 | 111 | -------------------------------------------------------------------------------- /src/lib/self/TopicWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include "../helper/Webpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | 12 | 13 | class TopicWebpage : public Webpage 14 | { 15 | public: 16 | // callback function for parse the URLs of av pictures 17 | typedef bool (*ParsePicturesUrls) (const string& webpage_txt, vector& pictures_urls_list); 18 | // callback function for parse the URLs of seed 19 | typedef bool (*ParseSeedUrl) (const string& webpage_txt, string& seed_url); 20 | 21 | public: 22 | TopicWebpage ( const string& url, 23 | ParsePicturesUrls parsePicturesUrls, 24 | ParseSeedUrl parseSeedUrl, 25 | const string& proxy_addr, 26 | const string& src_charset, 27 | const string& dest_charset ); 28 | virtual ~TopicWebpage (); 29 | const string& getSeedUrl (void) const; 30 | const vector& getPicturesUrlsList (void) const; 31 | bool downloadAllPictures ( const string& path, 32 | const string& base_name, 33 | unsigned timeout_download_pic, 34 | vector& fail_download_pics_urls_list, 35 | unsigned pictures_max_num = 32 ); 36 | 37 | private: 38 | string seed_url_; 39 | vector pictures_urls_list_; 40 | }; 41 | 42 | -------------------------------------------------------------------------------- /src/lib/self/TopicsListWebpage.cpp: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #include "TopicsListWebpage.h" 4 | #include 5 | 6 | 7 | using namespace std; 8 | 9 | 10 | TopicsListWebpage::TopicsListWebpage ( const string& portal_url, 11 | const string& url, 12 | TopicsListWebpage::ParseTitlesAndUrls parseTitlesAndUrls, 13 | TopicsListWebpage::ParseNextpageUrl parseNextpageUrl, 14 | const string& proxy_addr, 15 | const string& src_charset, 16 | const string& dest_charset ) 17 | : Webpage(url, "", proxy_addr, 16, 4, 4), 18 | portal_url_(portal_url) 19 | { 20 | if (!isLoaded()) { 21 | return; 22 | } 23 | 24 | // charset convert 25 | if (!src_charset.empty() && !dest_charset.empty()) { 26 | convertCharset(src_charset, dest_charset); 27 | } 28 | 29 | // parse the URLs and titles of all topics on topicslist webpage 30 | const string& webpage_txt = getTxt(); 31 | parseTitlesAndUrls(webpage_txt, portal_url_, titles_and_urls_list_); 32 | 33 | // unescape html for title 34 | for (auto& e : titles_and_urls_list_) { 35 | string& title = e.first; 36 | title = unescapeHtml(title); 37 | } 38 | 39 | // parse the next topicslist webpage URL 40 | parseNextpageUrl(webpage_txt, portal_url_, nextpage_url_); 41 | } 42 | 43 | TopicsListWebpage::~TopicsListWebpage () 44 | { 45 | ; 46 | } 47 | 48 | // if there is no more topicslist page, return empty string 49 | const string& 50 | TopicsListWebpage::getNextpageUrl (void) const 51 | { 52 | return(nextpage_url_); 53 | } 54 | 55 | // first title, second url 56 | const vector>& 57 | TopicsListWebpage::getTitlesAndUrlsList (void) const 58 | { 59 | return(titles_and_urls_list_); 60 | } 61 | 62 | const string& 63 | TopicsListWebpage::getPortalWebpageUrl (void) const 64 | { 65 | return(portal_url_); 66 | } 67 | 68 | -------------------------------------------------------------------------------- /src/lib/self/TopicsListWebpage.h: -------------------------------------------------------------------------------- 1 | // last modified 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include "../helper/Webpage.h" 8 | 9 | using std::string; 10 | using std::vector; 11 | using std::pair; 12 | 13 | 14 | class TopicsListWebpage : public Webpage 15 | { 16 | public: 17 | // callback function for parse titles and URLs of topics 18 | typedef bool (*ParseTitlesAndUrls) ( const string& webpage_txt, 19 | const string& portal_url, 20 | vector>& titles_and_urls_list ); 21 | // callback function for parse next topicslist URL 22 | typedef bool (*ParseNextpageUrl) ( const string& webpage_txt, 23 | const string& portal_url, 24 | string& nextpage_url ); 25 | 26 | public: 27 | TopicsListWebpage ( const string& portal_url, 28 | const string& url, 29 | ParseTitlesAndUrls parseTitlesAndUrls, 30 | ParseNextpageUrl parseNextpageUrl, 31 | const string& proxy_addr = "", 32 | const string& src_charset = "", 33 | const string& dest_charset = "" ); 34 | virtual ~TopicsListWebpage (); 35 | const vector>& getTitlesAndUrlsList (void) const; 36 | const string& getNextpageUrl (void) const; 37 | const string& getPortalWebpageUrl (void) const; 38 | 39 | 40 | private: 41 | vector> titles_and_urls_list_; 42 | string nextpage_url_; 43 | const string portal_url_; 44 | }; 45 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "lib/helper/Webpage.h" 14 | #include "lib/self/Aicheng.h" 15 | #include "lib/self/Caoliu.h" 16 | #include "lib/helper/Time.h" 17 | #include "lib/helper/CmdlineOption.h" 18 | #include "lib/helper/RichTxt.h" 19 | #include "lib/helper/Misc.h" 20 | #include "lib/3rd/json11/json11.hpp" 21 | 22 | 23 | using namespace std; 24 | using namespace json11; 25 | 26 | 27 | static const string g_softname(RichTxt::bold_on + "hardseed" + RichTxt::bold_off); 28 | static const string g_version("0.2.14"); 29 | static const string g_myemail("yangyangwithgnu@yeah.net"); 30 | static const string g_myemail_color(RichTxt::bold_on + RichTxt::foreground_green + g_myemail + RichTxt::reset_all); 31 | static const string g_mywebspace("http://yangyangwithgnu.github.io/"); 32 | static const string g_mywebspace_color(RichTxt::bold_on + RichTxt::foreground_green + g_mywebspace + RichTxt::reset_all); 33 | 34 | static void 35 | showSexyGirl (void) 36 | { 37 | cout << endl << 38 | ".================================================================." << endl << 39 | //"|| *hardseed* vx.y.z ||" << endl << 40 | "|| " << g_softname << " v" << g_version << " ||" << endl << 41 | "|'--------------------------------------------------------------'|" << endl << 42 | "|| -- SEX IS ZERO (0), so, who wanna be the ONE (1), aha? ||" << endl << 43 | "|'=============================================================='|" << endl << 44 | "|| .::::. ||" << endl << 45 | "|| .::::::::. ||" << endl << 46 | "|| ::::::::::: ||" << endl << 47 | "|| ':::::::::::.. ||" << endl << 48 | "|| .:::::::::::::::' ||" << endl << 49 | "|| '::::::::::::::.` ||" << endl << 50 | "|| .::::::::::::::::.' ||" << endl << 51 | "|| .::::::::::::.. ||" << endl << 52 | "|| .::::::::::::::'' ||" << endl << 53 | "|| .:::. '::::::::'':::: ||" << endl << 54 | "|| .::::::::. ':::::' ':::: ||" << endl << 55 | "|| .::::':::::::. ::::: '::::. ||" << endl << 56 | "|| .:::::' ':::::::::. :::::. ':::. ||" << endl << 57 | "|| .:::::' ':::::::::.::::::. '::. ||" << endl << 58 | "|| .::::'' ':::::::::::::::' '::. ||" << endl << 59 | "|| .::'' '::::::::::::::' ::.. ||" << endl << 60 | "|| ..:::: ':::::::::::' :'''` ||" << endl << 61 | "|| ..''''':' '::::::.' ||" << endl << 62 | "|'=============================================================='|" << endl << 63 | //"|| yangyangwithgnu@yeah.net ||" << endl << 64 | "|| " << g_myemail_color << " ||" << endl << 65 | //"|| http://yangyangwithgnu.github.io/ ||" << endl << 66 | "|| " << g_mywebspace_color << " ||" << endl << 67 | "'================================================================'" << endl; 68 | } 69 | 70 | static void 71 | showHelpInfo (void) 72 | { 73 | cout << endl; 74 | cout << g_softname 75 | << " is a batch seeds and pictures download utiltiy from CaoLiu and AiCheng forum. " 76 | << "It's easy and simple to use. Usually, you could issue it as follow: " << endl 77 | << " $ hardseed" << endl 78 | << "or" << endl 79 | << " $ hardseed --saveas-path ~/downloads --concurrent-tasks 4 --topics-range 8 64" 80 | << " --av-class aicheng_west --timeout-download-picture 32 --hate X-Art --proxy http://127.0.0.1:8087" << endl; 81 | 82 | cout << endl; 83 | cout << " --help" << endl 84 | << " Show this help infomation what you are seeing. " << endl; 85 | 86 | cout << endl; 87 | cout << " --version" << endl 88 | << " Show current version. " << endl; 89 | 90 | cout << endl; 91 | cout << " --av-class" << endl 92 | << " There are 13 av classes: caoliu_west_reposted, caoliu_cartoon_reposted, " 93 | << "caoliu_asia_mosaicked_reposted, caoliu_asia_non_mosaicked_reposted, caoliu_west_original, " 94 | << "caoliu_cartoon_original, caoliu_asia_mosaicked_original, caoliu_asia_non_mosaicked_original, " 95 | << "caoliu_selfie, aicheng_west, aicheng_cartoon, aicheng_asia_mosaicked and aicheng_asia_non_mosaicked. " << endl 96 | << " As the name implies, \"caoliu\" stands for CaoLiu forum, \"aicheng\" for AiCheng forum, " 97 | << "\"reposted\" and \"original\" are clearity, and the \"selfie\" is photos by oneself, you konw " 98 | << "which one is your best lover (yes, only one). " << endl 99 | << " The default is aicheng_asia_mosaicked. " << endl; 100 | 101 | cout << endl; 102 | cout << " --concurrent-tasks" << endl 103 | << " You can set more than one proxy, each proxy could more than one concurrent tasks. This option " 104 | << "set the number of concurrent tasks of each prox. " << endl 105 | << " The default number is 8. " << endl; 106 | 107 | cout << endl; 108 | cout << " --timeout-download-picture" << endl 109 | << " Some pictures too big to download in few seconds. So, you should set the download picture " 110 | << "timeout seconds. " << endl 111 | << " The default timeout is 16 seconds." << endl; 112 | 113 | cout << endl; 114 | cout << " --topics-range" << endl 115 | << " Set the range of to download topics. E.G.: " << endl 116 | << " --topics-range 2 16" << endl 117 | << " --topics-range 8 (I.E., --topics-range 1 8)" << endl 118 | << " --topics-range -1 (I.E., all topics of this av class)" << endl 119 | << " The default topics range is 1 to 64. " << endl; 120 | 121 | cout << endl; 122 | cout << " --saveas-path" << endl 123 | << " Set the path to save seeds and pictures. The rule of dir: [avclass][range]@hhmmss. E.G., " 124 | << "[aicheng_west][2~32]@124908/. " << endl 125 | << " The default directory is home directory (or windows is C:\\). " << endl; 126 | 127 | cout << endl; 128 | cout << " --hate" << endl 129 | << " If you hate some subject topics, you can ignore them by setting this option with keywords " 130 | << "in topic title, split by space-char ' ', and case sensitive. E.G., --hate 孕妇 重口味. " 131 | << "When --hate keywords list conflict with --like, --hate first. " << endl; 132 | 133 | cout << endl; 134 | cout << " --like" << endl 135 | << " If you like some subject topics, you can grab them by setting this option with keywords " 136 | << "in topic title, split by space-char ' ', and case sensitive. E.G., --like 苍井空 小泽玛利亚. " 137 | << "When --like keywords list conflict with --hate, --hate first. " << endl; 138 | 139 | cout << endl; 140 | cout << " --proxy" << endl 141 | << " As you know, the government likes blocking adult websites, so, I do suggest you to set " 142 | << "--proxy option. Hardseed supports more proxys: " << endl 143 | << " a) GoAgent (STRONGLY recommended), --proxy http://127.0.0.1:8087" << endl 144 | << " b) shadowsocks, --proxy socks5://127.0.0.1:1080, or socks5h://127.0.0.1:1080" << endl 145 | << " c) SSH, --proxy socks4://127.0.0.1:7070" << endl 146 | << " d) VPN (PPTP and openVPN), --proxy \"\"" << endl 147 | << " It is important that you should know, you can set more proxys at the same time, split by " 148 | << "space-char ' '. As the --concurrent-tasks option says, each proxy could more than one concurrent " 149 | << "tasks, now, what about more proxys? Yes, yes, the speed of downloading seed and pictures is " 150 | << "very very fast. E.G., --concurrent-tasks 8 --proxy http://127.0.0.1:8087 socks5://127.0.0.1:1080 " 151 | << "socks4://127.0.0.1:7070, the number of concurrent tasks is 8*3. " << endl 152 | << " If you wanna how to install and configure various kinds of proxy, please access my homepage " 153 | << "\"3.2 搭梯翻墙\" https://github.com/yangyangwithgnu/the_new_world_linux#3.2 " << endl 154 | << " If you are not good at computer, there is a newest goagent for floks who are not good at computer " 155 | << "by me, yes, out of box. see https://github.com/yangyangwithgnu/goagent_out_of_box_yang " 156 | << " The default http://127.0.0.1:8087. " << endl; 157 | 158 | cout << endl; 159 | cout << " That's all. Any suggestions let me know by " 160 | << g_myemail_color 161 | << " or " 162 | << g_mywebspace_color 163 | << ", big thanks to you. Kiddo, take care of your body. :-)" << endl << endl; 164 | } 165 | 166 | static void 167 | showVersionInfo (void) 168 | { 169 | cout << "hardseed version " << g_version << endl 170 | << "email " << g_myemail << endl 171 | << "webspace " << g_mywebspace << endl << endl; 172 | } 173 | 174 | static bool 175 | parseTopicsRangeArgument ( const vector& topicsrange_arguments_list, 176 | unsigned& topics_range_begin, 177 | unsigned& topics_range_end ) 178 | { 179 | if (topicsrange_arguments_list.empty()) { 180 | return(false); 181 | } 182 | 183 | string begin_str = topicsrange_arguments_list[0]; 184 | string end_str; 185 | if (topicsrange_arguments_list.size() < 2) { 186 | end_str = begin_str; 187 | begin_str = "1"; 188 | } else { 189 | end_str = topicsrange_arguments_list[1]; 190 | } 191 | 192 | unsigned begin_tmp; 193 | if ( 0 == (begin_tmp = strtoul(begin_str.c_str(), nullptr, 0)) && 194 | '0' != begin_str[0] ) { 195 | return(false); 196 | } 197 | unsigned end_tmp; 198 | if ( 0 == (end_tmp = strtoul(end_str.c_str(), nullptr, 0)) && 199 | '0' != end_str[0] ) { 200 | return(false); 201 | } 202 | 203 | if (begin_tmp > end_tmp) { 204 | return(false); 205 | } 206 | 207 | topics_range_begin = begin_tmp; 208 | topics_range_end = end_tmp; 209 | 210 | 211 | return(true); 212 | } 213 | 214 | static void 215 | getPortalUrls (string& caoliu_portal_url, string& aicheng_portal_url) 216 | { 217 | //#ifdef CYGWIN 218 | caoliu_portal_url = "http://t66y.com/"; 219 | aicheng_portal_url = "http://www.ac168.info/bt/"; 220 | //#else 221 | //static const string portals_file_url("https://raw.githubusercontent.com/yangyangwithgnu/hardseed/master/config/portals_list.json"); 222 | //Webpage portals_list_webpage(portals_file_url); 223 | //if (!portals_list_webpage.isLoaded()) { 224 | //cerr << "ERROR! fail to load " << portals_file_url << endl; 225 | //exit(EXIT_FAILURE); 226 | //} 227 | //const string& portals_file_txt = portals_list_webpage.getTxt(); 228 | 229 | //string json_err_msg; 230 | //const auto json_portal_urls_list = Json::parse(portals_file_txt, json_err_msg); 231 | //if (!json_err_msg.empty()) { 232 | //cerr << "ERROR! fail to parse the portal URLs list JSON. because " 233 | //<< json_err_msg 234 | //<< endl; 235 | //exit(EXIT_FAILURE); 236 | //} 237 | 238 | //// caoliu 和 aicheng 论坛入口 URL 以 json 格式存放在本项目托管空间中,格式如下: 239 | /* 240 | * { 241 | * "caoliu": "http://cl.clme.me/", 242 | * "aicheng": "http://www.ac168.info/", 243 | * } 244 | */ 245 | //caoliu_portal_url = json_portal_urls_list["caoliu"].string_value(); 246 | //aicheng_portal_url = json_portal_urls_list["aicheng"].string_value(); 247 | //if (caoliu_portal_url.empty() || aicheng_portal_url.empty()) { 248 | //cerr << "ERROR! fail to parse caoliu and aicheng portal URL. " << endl; 249 | //exit(EXIT_FAILURE); 250 | //} 251 | //#endif 252 | } 253 | 254 | int 255 | main (int argc, char* argv[]) 256 | { 257 | // parse command line options 258 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 259 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 260 | 261 | CmdlineOption cmdline_options((unsigned)argc, argv); 262 | vector cmdline_arguments_list; 263 | 264 | // --help, first high priority, ignore other options 265 | if (cmdline_options.hasOption("--help")) { 266 | showHelpInfo(); 267 | return(EXIT_SUCCESS); 268 | } 269 | 270 | // --version, second high priority, ignore other options 271 | if (cmdline_options.hasOption("--version")) { 272 | showVersionInfo(); 273 | return(EXIT_SUCCESS); 274 | } 275 | 276 | // show the sexy girl ASCII art 277 | showSexyGirl(); 278 | cout << endl; 279 | sleep(2); 280 | 281 | // prompt turn on the goagent 282 | cout << RichTxt::bold_on 283 | << "************************ !! IMPORTANCE !! ************************" << endl 284 | << "******** please make sure the proxy program is running *********" << endl 285 | << "************************ !! IMPORTANCE !! ************************" << endl 286 | << RichTxt::bold_off << endl; 287 | 288 | // --av-class 289 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 290 | cout << "Your command arguments: " << endl; 291 | string av_class_name("aicheng_asia_mosaicked"); 292 | cmdline_arguments_list = cmdline_options.getArgumentsList("--av-class"); 293 | if (!cmdline_arguments_list.empty()) { 294 | av_class_name = cmdline_arguments_list[0]; 295 | } 296 | 297 | bool b_aicheng = true; 298 | Caoliu::AvClass caoliu_av_class = Caoliu::asia_mosaicked_original; 299 | Aicheng::AvClass aicheng_av_class = Aicheng::asia_mosaicked; 300 | if ("caoliu_west_original" == av_class_name) { 301 | caoliu_av_class = Caoliu::west_original; 302 | b_aicheng = false; 303 | } else if ("caoliu_cartoon_original" == av_class_name) { 304 | caoliu_av_class = Caoliu::cartoon_original; 305 | b_aicheng = false; 306 | } else if ("caoliu_asia_mosaicked_original" == av_class_name) { 307 | caoliu_av_class = Caoliu::asia_mosaicked_original; 308 | b_aicheng = false; 309 | } else if ("caoliu_asia_non_mosaicked_original" == av_class_name) { 310 | caoliu_av_class = Caoliu::asia_non_mosaicked_original; 311 | b_aicheng = false; 312 | } else if ("caoliu_west_reposted" == av_class_name) { 313 | caoliu_av_class = Caoliu::west_reposted; 314 | b_aicheng = false; 315 | } else if ("caoliu_cartoon_reposted" == av_class_name) { 316 | caoliu_av_class = Caoliu::cartoon_reposted; 317 | b_aicheng = false; 318 | } else if ("caoliu_asia_mosaicked_reposted" == av_class_name) { 319 | caoliu_av_class = Caoliu::asia_mosaicked_reposted; 320 | b_aicheng = false; 321 | } else if ("caoliu_asia_non_mosaicked_reposted" == av_class_name) { 322 | caoliu_av_class = Caoliu::asia_non_mosaicked_reposted; 323 | b_aicheng = false; 324 | } else if ("caoliu_selfie" == av_class_name) { 325 | caoliu_av_class = Caoliu::selfie; 326 | b_aicheng = false; 327 | } else if ("aicheng_west" == av_class_name) { 328 | aicheng_av_class = Aicheng::west; 329 | } else if ("aicheng_asia_mosaicked" == av_class_name) { 330 | aicheng_av_class = Aicheng::asia_mosaicked; 331 | } else if ("aicheng_cartoon" == av_class_name) { 332 | aicheng_av_class = Aicheng::cartoon; 333 | } else if ("aicheng_asia_non_mosaicked" == av_class_name) { 334 | aicheng_av_class = Aicheng::asia_non_mosaicked; 335 | } else { 336 | cerr << "ERROR! the --av-class argument invalid. More info please see --help. " << endl; 337 | return(EXIT_FAILURE); 338 | } 339 | 340 | cout << " the av class \"" << RichTxt::bold_on << av_class_name << RichTxt::bold_off << "\"; " << endl; 341 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 342 | 343 | // --timeout-download-picture 344 | unsigned timeout_download_pic = 16; // default timeout seconds 345 | cmdline_arguments_list = cmdline_options.getArgumentsList("--timeout-download-picture"); 346 | if (!cmdline_arguments_list.empty()) { 347 | unsigned tmp = strtoul(cmdline_arguments_list[0].c_str(), nullptr, 0); 348 | if (tmp > 0) { 349 | timeout_download_pic = tmp; 350 | } 351 | } 352 | cout << " the download picture timeout seconds " 353 | << "\"" << RichTxt::bold_on << timeout_download_pic << RichTxt::bold_off << "\"; " << endl; 354 | 355 | // --topics-range, E.G.: 356 | // --topics-range 2 16, 357 | // --topics-range 8 (I.E., --topics-range 1 8), 358 | // --topics-range 4 -1 (I.E., all topics of this class av) 359 | unsigned topics_range_begin = 1, topics_range_end = 64; // default range 360 | cmdline_arguments_list = cmdline_options.getArgumentsList("--topics-range"); 361 | if (!cmdline_arguments_list.empty()) { 362 | if (!parseTopicsRangeArgument(cmdline_arguments_list, topics_range_begin, topics_range_end)) { 363 | cerr << "ERROR! --topics-range argument setting wrong! " << endl; 364 | return(EXIT_FAILURE); 365 | } 366 | } 367 | cout << " the range of parsing topics " 368 | << RichTxt::bold_on << "[" << topics_range_begin << "~" << topics_range_end << "]" << RichTxt::bold_off 369 | << "; " << endl; 370 | 371 | // --saveas-path. 372 | // create dir to save seeds and pictures. the rule of dir: [avclass][range]@hhmmss. 373 | // E.G., [aicheng_west][8~16]@211159. default home directory. 374 | // >>>>>>>>>>>>>>>>>> 375 | string path; 376 | 377 | cmdline_arguments_list = cmdline_options.getArgumentsList("--saveas-path"); 378 | if (cmdline_arguments_list.empty()) { 379 | #ifdef CYGWIN 380 | const char* p_home = "C:\\"; 381 | #else 382 | const char* p_home = getenv("HOME"); 383 | #endif 384 | if (nullptr == p_home) { 385 | cerr << "ERROR! --saveas-path argument setting wrong! " << endl; 386 | return(EXIT_FAILURE); 387 | } 388 | path = p_home; 389 | } else { 390 | path = cmdline_arguments_list[0]; 391 | } 392 | path += "/["; 393 | 394 | // 1st, av class 395 | path += av_class_name; 396 | path += "]"; 397 | 398 | // 2nd, range 399 | path += "[" + convNumToStr(topics_range_begin) + "~" + convNumToStr(topics_range_end) + "]@"; 400 | 401 | // 3rd, time 402 | Time current_time; 403 | path += current_time.getHour(2) + current_time.getMinute(2) + current_time.getSecond(2); 404 | path += "/"; 405 | 406 | #ifdef CYGWIN 407 | // windows path style 408 | replace(path.begin(), path.end(), '/', '\\'); 409 | #endif 410 | 411 | // create dir 412 | if (-1 == mkdir(path.c_str(), 0755)) { 413 | cerr << "ERROR! cannot create " << path << ", " << strerror(errno) << endl; 414 | return(EXIT_FAILURE); 415 | } 416 | 417 | #ifndef CYGWIN 418 | // convert raw path to standard absolute path. To call realpath() success, 419 | // path must have created. 420 | char buffer[PATH_MAX]; 421 | realpath(path.c_str(), buffer); 422 | path = buffer; 423 | #endif 424 | 425 | cout << " the path to save seeds and pictures \"" << RichTxt::bold_on << path << RichTxt::bold_off << "\"; " << endl; 426 | // <<<<<<<<<<<<<<<<<< 427 | 428 | // --concurrent-tasks 429 | unsigned threads_total = 8; // the default number of threads 430 | cmdline_arguments_list = cmdline_options.getArgumentsList("--concurrent-tasks"); 431 | if (!cmdline_arguments_list.empty()) { 432 | unsigned tmp = strtoul(cmdline_arguments_list[0].c_str(), nullptr, 0); 433 | if (tmp <= 0) { 434 | cerr << "ERROR! --concurrent-tasks argument setting wrong. " << endl; 435 | return(EXIT_FAILURE); 436 | } 437 | threads_total = tmp; 438 | } 439 | cout << " the number of concurrent tasks \"" << RichTxt::bold_on << threads_total << RichTxt::bold_off << "\"; " << endl; 440 | 441 | // --hate. ignore the topics by user setting keywords in topic title, split by space-char ' '. 442 | // for example: --hate aa bb cc "d d". 443 | vector hate_keywords_list = { "连发", "連发", "连發", "連發", 444 | "连弹", "★㊣", "合辑", "合集", 445 | "合輯", "nike", "最新の美女骑兵㊣", 446 | "精選", "精选" }; // force to ignore the all-in-one topics 447 | cmdline_arguments_list = cmdline_options.getArgumentsList("--hate"); 448 | if (!cmdline_arguments_list.empty()) { 449 | hate_keywords_list.insert(hate_keywords_list.end(), cmdline_arguments_list.begin(), cmdline_arguments_list.end()); 450 | } 451 | cout << " ignore some topics which include the keywords as follow \"" << RichTxt::bold_on; 452 | if (hate_keywords_list.empty()) { 453 | cout << " "; 454 | } else { 455 | copy(hate_keywords_list.cbegin(), hate_keywords_list.cend(), ostream_iterator(cout, ", ")); 456 | } 457 | cout << RichTxt::bold_off << "\b\b\"; " << endl; 458 | 459 | // --like. 460 | vector like_keywords_list; 461 | cmdline_arguments_list = cmdline_options.getArgumentsList("--like"); 462 | if (!cmdline_arguments_list.empty()) { 463 | like_keywords_list.assign(cmdline_arguments_list.begin(), cmdline_arguments_list.end()); 464 | } 465 | cout << " just parse topics which include the kewords as follow \"" << RichTxt::bold_on; 466 | if (like_keywords_list.empty()) { 467 | cout << " "; 468 | } else { 469 | copy(like_keywords_list.cbegin(), like_keywords_list.cend(), ostream_iterator(cout, ", ")); 470 | } 471 | cout << RichTxt::bold_off << "\b\b\"; " << endl; 472 | 473 | // --proxy. prompt user to use proxy, because the caoliu bbs maybe block IP 474 | vector proxy_addrs_list = {"http://127.0.0.1:8087"}; // the default proxy is GoAgent 475 | cmdline_arguments_list = cmdline_options.getArgumentsList("--proxy"); 476 | if (!cmdline_arguments_list.empty()) { 477 | proxy_addrs_list = cmdline_arguments_list; 478 | } 479 | cout << " the proxy \"" << RichTxt::bold_on; 480 | copy(proxy_addrs_list.cbegin(), proxy_addrs_list.cend(), ostream_iterator(cout, ", ")); 481 | cout << "\b\b" << RichTxt::bold_off << "\". " << endl << endl; 482 | 483 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 484 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 485 | 486 | // 获取 caoliu 和 aicheng 两个论坛的入口 URL 487 | string caoliu_portal_url, aicheng_portal_url; 488 | getPortalUrls(caoliu_portal_url, aicheng_portal_url); 489 | 490 | // download pictures and seed 491 | if (b_aicheng) { 492 | Aicheng aicheng( aicheng_portal_url, 493 | aicheng_av_class, 494 | proxy_addrs_list, 495 | topics_range_begin, topics_range_end, 496 | hate_keywords_list, 497 | like_keywords_list, 498 | threads_total * proxy_addrs_list.size(), 499 | timeout_download_pic, 500 | path ); 501 | } else { 502 | Caoliu caoliu ( caoliu_portal_url, 503 | caoliu_av_class, 504 | proxy_addrs_list, 505 | topics_range_begin, topics_range_end, 506 | hate_keywords_list, 507 | like_keywords_list, 508 | threads_total * proxy_addrs_list.size(), 509 | timeout_download_pic, 510 | path ); 511 | } 512 | 513 | 514 | cout << endl; 515 | return(EXIT_SUCCESS); 516 | } 517 | 518 | --------------------------------------------------------------------------------