├── .idea
├── compiler.xml
├── misc.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── nyaCrawler.iml
├── pom.xml
├── src
└── main
│ └── java
│ └── com
│ └── cn
│ ├── main
│ └── nyaPictureMain.java
│ └── util
│ └── HttpClientUtil.java
├── target
└── classes
│ ├── META-INF
│ └── nyaCrawler.kotlin_module
│ └── com
│ └── cn
│ ├── main
│ └── nyaPictureMain.class
│ └── util
│ └── HttpClientUtil.class
└── web
└── WEB-INF
└── web.xml
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | main
55 |
56 |
57 |
58 |
59 |
60 |
61 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 | 1564232410913
238 |
239 |
240 | 1564232410913
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 | 1576766569859
302 |
303 |
304 |
305 | 1576766569859
306 |
307 |
308 | 1576766587111
309 |
310 |
311 |
312 | 1576766587111
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 | No facets are configured
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 | Web|nyaCrawler
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # nyaHentaiCrawler
2 | 喵绅士本子爬取(自用)
3 |
4 | 用于抓取喵绅士的本子,启用main输入本子对应网址即可使用,个人使用版
5 |
6 | //注意!!!
7 | //因源站更新过几版,且添加了反扒延迟等措施,因此已不在适用,仅做参考
8 |
9 | 2021/1/7更新
10 |
11 | 现在添加代理,可以本地配置代理爬取,源站依然可用
12 |
13 | 2021/3/15
14 | 原网址git挂了,现有其他地址访问
15 |
16 | 2021/12/3
17 | 原网址git又可以使用了,地址直接谷歌搜索喵绅士就行
18 | 原本准备用线程爬取,无需等待,想了下没必要能用就行,现在能看本子的地方太多了
19 |
20 | 2022/1/25
21 | 喵绅士阵亡了,可惜
22 |
--------------------------------------------------------------------------------
/nyaCrawler.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.cn
8 | nyaCrawler
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.httpcomponents
14 | httpclient
15 | 4.5.2
16 |
17 |
18 |
19 |
20 | commons-io
21 | commons-io
22 | 2.5
23 |
24 |
25 |
26 |
27 |
28 | org.jsoup
29 | jsoup
30 | 1.11.3
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/src/main/java/com/cn/main/nyaPictureMain.java:
--------------------------------------------------------------------------------
1 | package com.cn.main;
2 |
3 | import com.cn.util.HttpClientUtil;
4 | import org.apache.commons.io.FileUtils;
5 | import org.apache.http.HttpEntity;
6 | import org.apache.http.HttpHost;
7 | import org.apache.http.client.config.RequestConfig;
8 | import org.apache.http.client.methods.CloseableHttpResponse;
9 | import org.apache.http.client.methods.HttpGet;
10 | import org.apache.http.impl.client.CloseableHttpClient;
11 | import org.apache.http.impl.client.HttpClients;
12 | import org.jsoup.Jsoup;
13 | import org.jsoup.nodes.Document;
14 | import org.jsoup.nodes.Element;
15 | import org.jsoup.select.Elements;
16 |
17 | import java.io.File;
18 | import java.io.IOException;
19 | import java.io.InputStream;
20 | import java.util.ArrayList;
21 | import java.util.List;
22 |
23 | public class nyaPictureMain {
24 |
25 | //存放目录
26 | private static String fileSource = "E://nyaManhua//new//";
27 |
28 | public static void main(String[] args) throws Exception {
29 |
30 |
31 | List urlList = new ArrayList();
32 |
33 | //地址
34 | urlList.add("https://zha.doghentai.com/g/338012/");
35 | urlList.add("");
36 | urlList.add("");
37 | urlList.add("");
38 | urlList.add("");
39 | urlList.add("");
40 | urlList.add("");
41 | urlList.add("");
42 | urlList.add("");
43 | urlList.add("");
44 | urlList.add("");
45 | urlList.add("");
46 | urlList.add("");
47 | urlList.add("");
48 | urlList.add("");
49 | urlList.add("");
50 | urlList.add("");
51 | urlList.add("");
52 | urlList.add("");
53 |
54 |
55 |
56 |
57 |
58 | nyaPictureMain.crawlerNyaUrl(urlList);
59 | String exSite = "cmd /c start " + fileSource ;
60 | Runtime.getRuntime().exec(exSite);
61 |
62 | }
63 |
64 |
65 | public static void crawlerNyaPic(int picSum,String fileUrl,String intputFile,String suffix){
66 |
67 | try {
68 | for (int i = 1; i <= picSum; i++) {
69 | // suffix = ".jpg"; //随时替换文件格式
70 | CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建HttpClient实例
71 | HttpGet httpGet = new HttpGet(fileUrl+i+suffix); // 创建Httpget实例
72 | //设置Http报文头信息
73 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36");
74 | httpGet.setHeader("accept", "image/avif,image/webp,image/apng,image/*,*/*;q=0.8");
75 | httpGet.setHeader("accept-encoding", "gzip, deflate, br");
76 | httpGet.setHeader("referer", "https://zha.doghentai.com/");
77 | httpGet.setHeader("sec-fetch-dest", "image");
78 | httpGet.setHeader("accept-language", "zh-CN,zh;q=0.9,en;q=0.8");
79 |
80 | HttpHost proxy = new HttpHost("127.0.0.1", 7890);
81 | //超时时间单位为毫秒
82 | RequestConfig defaultRequestConfig = RequestConfig.custom()
83 | .setConnectTimeout(1000).setSocketTimeout(30000)
84 | .setProxy(proxy).build();
85 | httpClient = HttpClients.custom().setDefaultRequestConfig(defaultRequestConfig).build();
86 |
87 |
88 | CloseableHttpResponse response = null;
89 | response = httpClient.execute(httpGet); // 执行http get请求
90 | HttpEntity entity = response.getEntity(); // 获取返回实体
91 | if(null != entity){
92 | InputStream inputStream = entity.getContent();//返回一个输入流
93 | //输出图片
94 | FileUtils.copyInputStreamToFile(inputStream, new File(intputFile+i+suffix));//引用org.apache.commons.io.FileUtils
95 | System.out.println(i+suffix);
96 | }
97 | response.close(); // 关闭response
98 | httpClient.close(); // 关闭HttpClient实体
99 |
100 | }
101 |
102 | }catch (Exception e){
103 | System.out.println(e);
104 | }
105 | }
106 |
107 |
108 | public static void crawlerNyaUrl(List urlList) throws Exception {
109 |
110 | Integer rateDow = 1;
111 | for(String url:urlList){
112 | String html = "";
113 | if(url.length() != 0){
114 | html = HttpClientUtil.getSource(url);
115 |
116 | Document document = Jsoup.parse(html);
117 | Element element = document.selectFirst("div.container").selectFirst("a");
118 | String coverImgUrl = element.select("img").attr("data-src");
119 |
120 | //获取图片载点
121 | String[] ourStr = coverImgUrl.split("/");
122 | //获取后缀
123 | String[] oursuffix = coverImgUrl.split("\\.");
124 | //获取数量
125 | Elements picSum = document.select("div.thumb-container");
126 | //获取本子名字
127 | String benziName = element.select("img").attr("alt");
128 | benziName = benziName.replaceAll("\\?","").replaceAll(":","").replaceAll(" ","").replaceAll("\\*","");
129 |
130 | int count = picSum.size();
131 | int benziN = Integer.parseInt(ourStr[ourStr.length-2]);
132 | String suffix = "."+oursuffix[oursuffix.length-1];
133 | String fileUrl = "https://i0.nyacdn.com/galleries/"+benziN+"/";
134 | String intputFile = fileSource +benziName +"//";
135 | nyaPictureMain.crawlerNyaPic(count,fileUrl,intputFile,suffix);
136 |
137 | //缓存完后暂停几秒
138 | Thread.sleep(3000);
139 | }
140 | }
141 |
142 | System.out.println("喵变态图片缓存成功!!!!");
143 |
144 |
145 |
146 | }
147 |
148 |
149 | }
150 |
--------------------------------------------------------------------------------
/src/main/java/com/cn/util/HttpClientUtil.java:
--------------------------------------------------------------------------------
1 | package com.cn.util;
2 |
3 | import org.apache.http.HttpStatus;
4 | import org.apache.http.client.methods.CloseableHttpResponse;
5 | import org.apache.http.client.methods.HttpGet;
6 | import org.apache.http.impl.client.CloseableHttpClient;
7 | import org.apache.http.impl.client.HttpClients;
8 | import org.apache.http.util.EntityUtils;
9 |
10 | import java.io.IOException;
11 |
12 | public class HttpClientUtil {
13 |
14 |
15 | public static String getSource(String url) {
16 | String html = new String();
17 | HttpGet httpget = new HttpGet(url); //创建Http请求实例,URL 如:https://cd.lianjia.com/
18 | // 模拟浏览器,避免被服务器拒绝,返回返回403 forbidden的错误信息
19 | httpget.setHeader("User-Agent",
20 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
21 |
22 | CloseableHttpResponse response = null;
23 | CloseableHttpClient httpclient = HttpClients.createDefault(); // 使用默认的HttpClient
24 | try {
25 | response = httpclient.execute(httpget);
26 | if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { // 返回 200 表示成功
27 | html = EntityUtils.toString(response.getEntity(), "utf-8"); // 获取服务器响应实体的内容
28 | }
29 | } catch (IOException e) {
30 | e.printStackTrace();
31 | } finally {
32 | if (response != null) {
33 | try {
34 | response.close();
35 | } catch (IOException e) {
36 | e.printStackTrace();
37 | }
38 | }
39 | }
40 | return html;
41 | }
42 |
43 |
44 |
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/target/classes/META-INF/nyaCrawler.kotlin_module:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/target/classes/com/cn/main/nyaPictureMain.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ERYhua/nyaHentaiCrawler/9e62548ae87a710b1a0588a07a43d43cef54657d/target/classes/com/cn/main/nyaPictureMain.class
--------------------------------------------------------------------------------
/target/classes/com/cn/util/HttpClientUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ERYhua/nyaHentaiCrawler/9e62548ae87a710b1a0588a07a43d43cef54657d/target/classes/com/cn/util/HttpClientUtil.class
--------------------------------------------------------------------------------
/web/WEB-INF/web.xml:
--------------------------------------------------------------------------------
1 |
2 |
6 |
--------------------------------------------------------------------------------