├── .idea ├── compiler.xml ├── misc.xml ├── vcs.xml └── workspace.xml ├── README.md ├── nyaCrawler.iml ├── pom.xml ├── src └── main │ └── java │ └── com │ └── cn │ ├── main │ └── nyaPictureMain.java │ └── util │ └── HttpClientUtil.java ├── target └── classes │ ├── META-INF │ └── nyaCrawler.kotlin_module │ └── com │ └── cn │ ├── main │ └── nyaPictureMain.class │ └── util │ └── HttpClientUtil.class └── web └── WEB-INF └── web.xml /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 17 | 18 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 51 | 52 | 53 | 54 | main 55 | 56 | 57 | 58 | 65 | 66 | 67 | 77 | 78 | 79 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 128 | 129 | 130 | 131 | 143 | 144 | 156 | 157 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 1564232410913 238 | 300 | 301 | 1576766569859 302 | 307 | 308 | 1576766587111 309 | 314 | 317 | 318 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 404 | 405 | 406 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 474 | 475 | 476 | 477 | 478 | 479 | No facets are configured 480 | 481 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 508 | 509 | 510 | 511 | 512 | 513 | Web|nyaCrawler 514 | 515 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 531 | 532 | 533 | 534 | 535 | 536 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nyaHentaiCrawler 2 | 喵绅士本子爬取(自用) 3 | 4 | 用于抓取喵绅士的本子,启用main输入本子对应网址即可使用,个人使用版 5 | 6 | //注意!!! 7 | //因源站更新过几版,且添加了反扒延迟等措施,因此已不在适用,仅做参考 8 | 9 | 2021/1/7更新 10 | 11 | 现在添加代理,可以本地配置代理爬取,源站依然可用 12 | 13 | 2021/3/15 14 | 原网址git挂了,现有其他地址访问 15 | 16 | 2021/12/3 17 | 原网址git又可以使用了,地址直接谷歌搜索喵绅士就行 18 | 原本准备用线程爬取,无需等待,想了下没必要能用就行,现在能看本子的地方太多了 19 | 20 | 2022/1/25 21 | 喵绅士阵亡了,可惜 22 | -------------------------------------------------------------------------------- /nyaCrawler.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.cn 8 | nyaCrawler 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.httpcomponents 14 | httpclient 15 | 4.5.2 16 | 17 | 18 | 19 | 20 | commons-io 21 | commons-io 22 | 2.5 23 | 24 | 25 | 26 | 27 | 28 | org.jsoup 29 | jsoup 30 | 1.11.3 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/main/java/com/cn/main/nyaPictureMain.java: -------------------------------------------------------------------------------- 1 | package com.cn.main; 2 | 3 | import com.cn.util.HttpClientUtil; 4 | import org.apache.commons.io.FileUtils; 5 | import org.apache.http.HttpEntity; 6 | import org.apache.http.HttpHost; 7 | import org.apache.http.client.config.RequestConfig; 8 | import org.apache.http.client.methods.CloseableHttpResponse; 9 | import org.apache.http.client.methods.HttpGet; 10 | import org.apache.http.impl.client.CloseableHttpClient; 11 | import org.apache.http.impl.client.HttpClients; 12 | import org.jsoup.Jsoup; 13 | import org.jsoup.nodes.Document; 14 | import org.jsoup.nodes.Element; 15 | import org.jsoup.select.Elements; 16 | 17 | import java.io.File; 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | public class nyaPictureMain { 24 | 25 | //存放目录 26 | private static String fileSource = "E://nyaManhua//new//"; 27 | 28 | public static void main(String[] args) throws Exception { 29 | 30 | 31 | List urlList = new ArrayList(); 32 | 33 | //地址 34 | urlList.add("https://zha.doghentai.com/g/338012/"); 35 | urlList.add(""); 36 | urlList.add(""); 37 | urlList.add(""); 38 | urlList.add(""); 39 | urlList.add(""); 40 | urlList.add(""); 41 | urlList.add(""); 42 | urlList.add(""); 43 | urlList.add(""); 44 | urlList.add(""); 45 | urlList.add(""); 46 | urlList.add(""); 47 | urlList.add(""); 48 | urlList.add(""); 49 | urlList.add(""); 50 | urlList.add(""); 51 | urlList.add(""); 52 | urlList.add(""); 53 | 54 | 55 | 56 | 57 | 58 | nyaPictureMain.crawlerNyaUrl(urlList); 59 | String exSite = "cmd /c start " + fileSource ; 60 | Runtime.getRuntime().exec(exSite); 61 | 62 | } 63 | 64 | 65 | public static void crawlerNyaPic(int picSum,String fileUrl,String intputFile,String suffix){ 66 | 67 | try { 68 | for (int i = 1; i <= picSum; i++) { 69 | // suffix = ".jpg"; //随时替换文件格式 70 | CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建HttpClient实例 71 | HttpGet httpGet = new HttpGet(fileUrl+i+suffix); // 创建Httpget实例 72 | //设置Http报文头信息 73 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"); 74 | httpGet.setHeader("accept", "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"); 75 | httpGet.setHeader("accept-encoding", "gzip, deflate, br"); 76 | httpGet.setHeader("referer", "https://zha.doghentai.com/"); 77 | httpGet.setHeader("sec-fetch-dest", "image"); 78 | httpGet.setHeader("accept-language", "zh-CN,zh;q=0.9,en;q=0.8"); 79 | 80 | HttpHost proxy = new HttpHost("127.0.0.1", 7890); 81 | //超时时间单位为毫秒 82 | RequestConfig defaultRequestConfig = RequestConfig.custom() 83 | .setConnectTimeout(1000).setSocketTimeout(30000) 84 | .setProxy(proxy).build(); 85 | httpClient = HttpClients.custom().setDefaultRequestConfig(defaultRequestConfig).build(); 86 | 87 | 88 | CloseableHttpResponse response = null; 89 | response = httpClient.execute(httpGet); // 执行http get请求 90 | HttpEntity entity = response.getEntity(); // 获取返回实体 91 | if(null != entity){ 92 | InputStream inputStream = entity.getContent();//返回一个输入流 93 | //输出图片 94 | FileUtils.copyInputStreamToFile(inputStream, new File(intputFile+i+suffix));//引用org.apache.commons.io.FileUtils 95 | System.out.println(i+suffix); 96 | } 97 | response.close(); // 关闭response 98 | httpClient.close(); // 关闭HttpClient实体 99 | 100 | } 101 | 102 | }catch (Exception e){ 103 | System.out.println(e); 104 | } 105 | } 106 | 107 | 108 | public static void crawlerNyaUrl(List urlList) throws Exception { 109 | 110 | Integer rateDow = 1; 111 | for(String url:urlList){ 112 | String html = ""; 113 | if(url.length() != 0){ 114 | html = HttpClientUtil.getSource(url); 115 | 116 | Document document = Jsoup.parse(html); 117 | Element element = document.selectFirst("div.container").selectFirst("a"); 118 | String coverImgUrl = element.select("img").attr("data-src"); 119 | 120 | //获取图片载点 121 | String[] ourStr = coverImgUrl.split("/"); 122 | //获取后缀 123 | String[] oursuffix = coverImgUrl.split("\\."); 124 | //获取数量 125 | Elements picSum = document.select("div.thumb-container"); 126 | //获取本子名字 127 | String benziName = element.select("img").attr("alt"); 128 | benziName = benziName.replaceAll("\\?","").replaceAll(":","").replaceAll(" ","").replaceAll("\\*",""); 129 | 130 | int count = picSum.size(); 131 | int benziN = Integer.parseInt(ourStr[ourStr.length-2]); 132 | String suffix = "."+oursuffix[oursuffix.length-1]; 133 | String fileUrl = "https://i0.nyacdn.com/galleries/"+benziN+"/"; 134 | String intputFile = fileSource +benziName +"//"; 135 | nyaPictureMain.crawlerNyaPic(count,fileUrl,intputFile,suffix); 136 | 137 | //缓存完后暂停几秒 138 | Thread.sleep(3000); 139 | } 140 | } 141 | 142 | System.out.println("喵变态图片缓存成功!!!!"); 143 | 144 | 145 | 146 | } 147 | 148 | 149 | } 150 | -------------------------------------------------------------------------------- /src/main/java/com/cn/util/HttpClientUtil.java: -------------------------------------------------------------------------------- 1 | package com.cn.util; 2 | 3 | import org.apache.http.HttpStatus; 4 | import org.apache.http.client.methods.CloseableHttpResponse; 5 | import org.apache.http.client.methods.HttpGet; 6 | import org.apache.http.impl.client.CloseableHttpClient; 7 | import org.apache.http.impl.client.HttpClients; 8 | import org.apache.http.util.EntityUtils; 9 | 10 | import java.io.IOException; 11 | 12 | public class HttpClientUtil { 13 | 14 | 15 | public static String getSource(String url) { 16 | String html = new String(); 17 | HttpGet httpget = new HttpGet(url); //创建Http请求实例,URL 如:https://cd.lianjia.com/ 18 | // 模拟浏览器,避免被服务器拒绝,返回返回403 forbidden的错误信息 19 | httpget.setHeader("User-Agent", 20 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"); 21 | 22 | CloseableHttpResponse response = null; 23 | CloseableHttpClient httpclient = HttpClients.createDefault(); // 使用默认的HttpClient 24 | try { 25 | response = httpclient.execute(httpget); 26 | if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { // 返回 200 表示成功 27 | html = EntityUtils.toString(response.getEntity(), "utf-8"); // 获取服务器响应实体的内容 28 | } 29 | } catch (IOException e) { 30 | e.printStackTrace(); 31 | } finally { 32 | if (response != null) { 33 | try { 34 | response.close(); 35 | } catch (IOException e) { 36 | e.printStackTrace(); 37 | } 38 | } 39 | } 40 | return html; 41 | } 42 | 43 | 44 | 45 | 46 | } 47 | -------------------------------------------------------------------------------- /target/classes/META-INF/nyaCrawler.kotlin_module: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /target/classes/com/cn/main/nyaPictureMain.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ERYhua/nyaHentaiCrawler/9e62548ae87a710b1a0588a07a43d43cef54657d/target/classes/com/cn/main/nyaPictureMain.class -------------------------------------------------------------------------------- /target/classes/com/cn/util/HttpClientUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ERYhua/nyaHentaiCrawler/9e62548ae87a710b1a0588a07a43d43cef54657d/target/classes/com/cn/util/HttpClientUtil.class -------------------------------------------------------------------------------- /web/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | --------------------------------------------------------------------------------