├── .gitignore
├── .idea
├── Java.iml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── CrawlDouyuDanmu
├── .gitignore
├── .idea
│ ├── .name
│ ├── compiler.xml
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── uiDesigner.xml
│ ├── vcs.xml
│ └── workspace.xml
├── CrawlDouyuDanmu.iml
├── pom.xml
├── readme.md
└── src
│ └── main
│ └── java
│ ├── DouyuBarrageHandler.java
│ ├── MessageType.java
│ └── Start.java
├── CrawlPandaDanmu
├── .gitignore
├── .idea
│ ├── .name
│ ├── compiler.xml
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── encodings.xml
│ ├── libraries
│ │ ├── Maven__com_alibaba_fastjson_1_2_44.xml
│ │ ├── Maven__log4j_log4j_1_2_17.xml
│ │ ├── Maven__org_jsoup_jsoup_1_8_3.xml
│ │ ├── Maven__org_slf4j_slf4j_api_1_8_0_beta0.xml
│ │ └── Maven__org_slf4j_slf4j_log4j12_1_8_0_beta0.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── uiDesigner.xml
│ ├── vcs.xml
│ └── workspace.xml
├── CrawlPandaDanmu.iml
├── pom.xml
├── readme.md
└── src
│ └── main
│ ├── java
│ ├── Crawl.java
│ ├── MessageHandler.java
│ ├── Start.java
│ └── Utils.java
│ └── resources
│ ├── config.properties
│ └── log4j.properties
├── DownloadImg.java
├── InsulinPump
├── InsulinPump.java
├── People.java
├── PumpMain.java
└── PumpWindow.java
├── Main.java
├── MicroWave
├── MicroWave.java
├── MicroWaveMain.java
└── MicroWaveWindow.java
├── PullBookinfo
├── GetBookInfoThread.java
├── Main.java
├── WriteBookInfoToFile.java
└── WriteInfoToDB.java
├── README.md
├── Struts2FileUpAndDown
├── src
│ ├── com
│ │ └── geekgao
│ │ │ └── file
│ │ │ └── FileAction.java
│ └── struts.xml
└── web
│ ├── WEB-INF
│ └── web.xml
│ ├── downFile.jsp
│ ├── index.jsp
│ └── upFile.jsp
├── com
└── crawl
│ └── comments
│ ├── CrawlComments.java
│ ├── CrawlUtils.java
│ └── Main.java
├── medical_question
├── GetAnswers.java
└── WriteAnswersToFile.java
├── 文本情感分析
├── CalcWeightAndDoc.java
└── EmotionJudge.java
└── 西邮导游系统源码
├── 数据
├── data.txt
└── view.txt
└── 源码
├── images
├── icon.png
└── map.jpg
└── org
└── geekgao
└── guide
├── GuideAlgorithm.java
├── GuideSystem.java
├── GuideUtil.java
└── Vertex.java
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.log
3 |
--------------------------------------------------------------------------------
/.idea/Java.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
34 |
35 |
36 |
37 |
38 | true
39 | DEFINITION_ORDER
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 | 1540563728647
165 |
166 |
167 | 1540563728647
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .idea
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/.name:
--------------------------------------------------------------------------------
1 | CrawlDouyuDanmu
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 | 1.8
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | 1.8
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | 继续读取真正的消息内容
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 | true
82 | DEFINITION_ORDER
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 | project
406 |
407 |
408 |
409 |
410 | true
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 | 1453816946069
425 |
426 |
427 | 1453816946069
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 | byteArray.toByteArray()
531 | JAVA
532 | CODE_FRAGMENT
533 |
534 |
535 | content.length()
536 | JAVA
537 | CODE_FRAGMENT
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/CrawlDouyuDanmu.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.geekgao
8 | DouyuDanmu
9 | 1.0-SNAPSHOT
10 |
11 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/readme.md:
--------------------------------------------------------------------------------
1 | 使用[斗鱼开放协议](http://dev-bbs.douyutv.com/forum.php?mod=viewthread&tid=399&extra=page%3D1)完成的DouyuTV弹幕抓取工具。
2 |
3 | [www.douyu.com](https://www.douyu.com) barrage crawler, run with Start.java.
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/src/main/java/DouyuBarrageHandler.java:
--------------------------------------------------------------------------------
1 | import java.io.ByteArrayOutputStream;
2 | import java.io.IOException;
3 | import java.net.Socket;
4 | import java.nio.ByteBuffer;
5 | import java.nio.ByteOrder;
6 |
7 | public class DouyuBarrageHandler {
8 | private static String host = "openbarrage.douyutv.com";
9 |
10 | private static int port = 8601;
11 |
12 | private Socket serverSocket;
13 |
14 | private String roomId;
15 |
16 | public DouyuBarrageHandler(String roomId) {
17 | this.roomId = roomId;
18 |
19 | try {
20 | connect();
21 | login();
22 | } catch (IOException e) {
23 | e.printStackTrace();
24 | }
25 | }
26 |
27 | private void connect() throws IOException {
28 | serverSocket = new Socket(host, port);
29 |
30 | new Thread(new Runnable() {
31 | public void run() {
32 | while (true) {
33 | try {
34 | send("type@=mrkl");
35 | Thread.sleep(30000);
36 | } catch (IOException e) {
37 | e.printStackTrace();
38 | } catch (InterruptedException e) {
39 | e.printStackTrace();
40 | }
41 | }
42 | }
43 | }).start();
44 | }
45 |
46 | private void login() throws IOException {
47 | send("type@=loginreq/roomid@=" + roomId);
48 | send("type@=joingroup/rid@=" + roomId + "/gid@=-9999");
49 | }
50 |
51 | public String read() throws IOException {
52 | int msgSize = ByteBuffer.wrap(getBytes(4)).order(ByteOrder.LITTLE_ENDIAN).getInt();
53 | byte[] msgBytes =getBytes(msgSize);
54 |
55 | return new String(msgBytes, 8, msgSize - 9);
56 | }
57 |
58 | public void send(String msg) throws IOException {
59 | serverSocket.getOutputStream().write(getSendBytes(msg));
60 | serverSocket.getOutputStream().flush();
61 | }
62 |
63 | private byte[] getBytes(int byteCount) throws IOException {
64 | byte[] result = new byte[byteCount];
65 | int alreadyReadSize = 0;
66 |
67 | while (alreadyReadSize != byteCount) {
68 | alreadyReadSize += serverSocket.getInputStream().read(result, alreadyReadSize, byteCount - alreadyReadSize);
69 | }
70 |
71 | return result;
72 | }
73 |
74 | private byte[] getSendBytes(String msg) throws IOException {
75 | ByteArrayOutputStream outBytes = new ByteArrayOutputStream(getPacketSize(msg));
76 | outBytes.write(intToggle(getPacketSize(msg)));
77 | outBytes.write(intToggle(getPacketSize(msg)));
78 | outBytes.write(shortToggle(MessageType.SEND.getCode()));
79 | outBytes.write(0);
80 | outBytes.write(0);
81 | outBytes.write(msg.getBytes());
82 | outBytes.write(0);
83 |
84 | return outBytes.toByteArray();
85 | }
86 |
87 | private int getPacketSize(String msg) {
88 | return 9 + msg.length();
89 | }
90 |
91 | private byte[] intToggle(int value) {
92 | byte[] result = new byte[4];
93 | result[3] = (byte) ((value >> 24) & 0xFF);
94 | result[2] = (byte) ((value >> 16) & 0xFF);
95 | result[1] = (byte) ((value >> 8) & 0xFF);
96 | result[0] = (byte) (value & 0xFF);
97 |
98 | return result;
99 | }
100 |
101 | private byte[] shortToggle(short value) {
102 | byte[] result = new byte[2];
103 | result[1] = (byte) ((value >> 8) & 0xFF);
104 | result[0] = (byte) (value & 0xFF);
105 |
106 | return result;
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/src/main/java/MessageType.java:
--------------------------------------------------------------------------------
1 | public enum MessageType {
2 | SEND(689), RECV(690);
3 |
4 | private short code;
5 |
6 | MessageType(int code) {
7 | this.code = (short) code;
8 | }
9 |
10 | public short getCode() {
11 | return code;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/CrawlDouyuDanmu/src/main/java/Start.java:
--------------------------------------------------------------------------------
1 | import java.io.IOException;
2 |
3 | public class Start {
4 | public static void main(String[] args) throws IOException, InterruptedException {
5 | DouyuBarrageHandler server = new DouyuBarrageHandler("288016");
6 |
7 | while (true) {
8 | System.out.println(server.read());
9 | Thread.sleep(1);
10 | }
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .idea
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/.name:
--------------------------------------------------------------------------------
1 | CrawlPandaDanmu
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/libraries/Maven__com_alibaba_fastjson_1_2_44.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/libraries/Maven__log4j_log4j_1_2_17.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/libraries/Maven__org_jsoup_jsoup_1_8_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/libraries/Maven__org_slf4j_slf4j_api_1_8_0_beta0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_8_0_beta0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/CrawlPandaDanmu.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.geekgao
8 | PandaDanmu
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 1.8
17 | 1.8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | org.jsoup
26 | jsoup
27 | 1.8.3
28 |
29 |
30 | com.alibaba
31 | fastjson
32 | 1.2.44
33 |
34 |
35 | org.slf4j
36 | slf4j-log4j12
37 | 1.8.0-beta0
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/readme.md:
--------------------------------------------------------------------------------
1 | PandaTV弹幕抓取工具,具体说明见[知乎](https://www.zhihu.com/question/38807641/answer/84007935)
2 |
3 | [www.panda.tv](https://www.panda.tv/) barrage crawler, run with Start.java.
--------------------------------------------------------------------------------
/CrawlPandaDanmu/src/main/java/Crawl.java:
--------------------------------------------------------------------------------
1 | import com.alibaba.fastjson.JSON;
2 | import com.alibaba.fastjson.JSONArray;
3 | import com.alibaba.fastjson.JSONObject;
4 | import org.jsoup.Jsoup;
5 | import org.jsoup.nodes.Document;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | import java.io.ByteArrayOutputStream;
10 | import java.io.IOException;
11 | import java.io.OutputStream;
12 | import java.net.Socket;
13 | import java.util.List;
14 |
15 | /**
16 | * Created by geekgao on 16-1-29.
17 | * 进行抓取弹幕任务
18 | */
19 | public class Crawl extends Thread {
20 | private final static Logger LOGGER = LoggerFactory.getLogger(Crawl.class);
21 |
22 | //连接弹幕服务器的必要信息
23 | private String rid;
24 | private String appid;
25 | private String ts;
26 | private String sign;
27 | private String authType;
28 |
29 | //与弹幕服务器联系的socket
30 | private Socket socket;
31 | //弹幕服务器ip
32 | private String serverIp;
33 | //弹幕服务器端口
34 | private int port;
35 |
36 | /**
37 | * 初始化一些信息,注意是获取登录弹幕服务器的必要信息
38 | * @return 返回结果表示是否初始化成功
39 | */
40 | private boolean init() {
41 | String roomId = Utils.getRoomId();
42 | String url = "https://riven.panda.tv/chatroom/getinfo?roomid=" + roomId + "&app=1&_caller=panda-pc_web&_=" + System.currentTimeMillis();
43 | Document document;
44 | try {
45 | document = Jsoup.connect(url).get();
46 | LOGGER.info("从[" + url + "]获取登录弹幕服务器的必要信息");
47 | LOGGER.info("登录数据Json串:" + document.body().text());
48 | } catch (IOException e) {
49 | LOGGER.error("获取登录服务器的必要数据出错", e);
50 | return false;
51 | }
52 | JSONObject jsonObject = JSON.parseObject(document.body().text());
53 |
54 | int errno = jsonObject.getInteger("errno");
55 | if (errno == 0) {
56 | JSONObject tempJsonObject = jsonObject.getJSONObject("data");
57 | rid = String.valueOf(tempJsonObject.getLong("rid"));
58 | appid = tempJsonObject.getString("appid");
59 | ts = String.valueOf(tempJsonObject.getLong("ts"));
60 | sign = tempJsonObject.getString("sign");
61 | authType = tempJsonObject.getString("authType");
62 |
63 | JSONArray chatAddressList = tempJsonObject.getJSONArray("chat_addr_list");
64 | LOGGER.info("弹幕服务器数据:" + chatAddressList);
65 | //选第一个服务器登录
66 | serverIp = chatAddressList.getString(0).split(":",2)[0];
67 | port = Integer.parseInt(chatAddressList.getString(0).split(":", 2)[1]);
68 | } else {
69 | LOGGER.error("获取登录弹幕服务器的必要信息出错,程序将退出");
70 | return false;
71 | }
72 |
73 | return true;
74 | }
75 |
76 | /**
77 | * 与弹幕服务器取得联系,相当于登录弹幕服务器
78 | */
79 | private void login() throws IOException {
80 | socket = new Socket(serverIp,port);
81 | LOGGER.info("登录弹幕服务器:" + serverIp + ":" + port + "成功");
82 | String msg = "u:" + rid + "@" + appid + "\n" +
83 | "k:1\n" +
84 | "t:300\n" +
85 | "ts:" + ts + "\n" +
86 | "sign:" + sign + "\n" +
87 | "authtype:" + authType;
88 | ByteArrayOutputStream byteArray = new ByteArrayOutputStream();
89 | byte[] b = new byte[]{0x00, 0x06, 0x00, 0x02, 0x00, (byte) msg.length()};
90 | byteArray.write(b);
91 |
92 | byteArray.write(msg.getBytes("ISO-8859-1"));
93 | OutputStream outputStream = socket.getOutputStream();
94 | outputStream.write(byteArray.toByteArray());
95 |
96 | b = new byte[]{0x00, 0x06, 0x00, 0x00};
97 | outputStream.write(b);
98 | }
99 |
100 | @Override
101 | public void run() {
102 | MessageHandler messageHandler = null;
103 | OutputStream outputStream;
104 |
105 | try {
106 | if (!init()) {
107 | return;
108 | }
109 | login();
110 | messageHandler = new MessageHandler(socket);
111 | outputStream = socket.getOutputStream();
112 | long start = System.currentTimeMillis();
113 | while (true) {
114 | List messages = messageHandler.read();
115 | for (String msg: messages) {
116 | if (msg.equals("")) {
117 | continue;
118 | }
119 | try {
120 | JSONObject msgJsonObject = JSON.parseObject(msg);
121 | String type = msgJsonObject.getString("type");
122 | //发言弹幕type为1
123 | if (type.equals("1")) {
124 | String nickname = msgJsonObject.getJSONObject("data").getJSONObject("from").getString("nickName");
125 | String content = msgJsonObject.getJSONObject("data").getString("content");
126 | LOGGER.info("[" + nickname + "]:" + content);
127 | }
128 | } catch (Exception e) {
129 | LOGGER.error("获取消息内容时出错:" + msg, e);
130 | }
131 | }
132 |
133 | //心跳包
134 | if (System.currentTimeMillis() - start > 60000) {
135 | outputStream.write(new byte[]{0x00, 0x06, 0x00, 0x00});
136 | start = System.currentTimeMillis();
137 | }
138 | }
139 | } catch (IOException e) {
140 | LOGGER.error("获取弹幕时出错", e);
141 | } finally {
142 | try {
143 | if (messageHandler != null) {
144 | messageHandler.close();
145 | }
146 | } catch (IOException e) {
147 | LOGGER.error("调用MessageHandler close()方法时出错");
148 | }
149 | }
150 | }
151 | }
152 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/src/main/java/MessageHandler.java:
--------------------------------------------------------------------------------
1 | import java.io.ByteArrayOutputStream;
2 | import java.io.IOException;
3 | import java.io.InputStream;
4 | import java.net.Socket;
5 | import java.util.Arrays;
6 | import java.util.LinkedList;
7 | import java.util.List;
8 |
9 | /**
10 | * Created by geekgao on 16-1-27.
11 | */
12 | public class MessageHandler {
13 | private Socket socket;
14 | private InputStream inputStream;
15 |
16 | public MessageHandler(Socket socket) {
17 | this.socket = socket;
18 | }
19 |
20 | /**
21 | *
22 | * @return 返回人可阅读的json串
23 | * @throws IOException
24 | */
25 | public List read() throws IOException {
26 | if (inputStream == null) {
27 | inputStream = socket.getInputStream();
28 | }
29 | byte[] typeBytes = new byte[4];
30 |
31 | //读取前4个字节,得到数据类型信息
32 | for (int i = 0;i < 4;i++) {
33 | int tmp = inputStream.read();
34 | typeBytes[i] = (byte) tmp;
35 | }
36 |
37 | //最终的结果
38 | List result = new LinkedList();
39 | //这是一条弹幕信息
40 | if (typeBytes[0] == 0x00 && typeBytes[1] == 0x06 && typeBytes[2] == 0x00 && typeBytes[3] == 0x03) {
41 | //越过前面没用的字节,跳到标记内容长度的字节
42 | inputStream.skip(7);
43 | //下条内容的长度
44 | int contentLen = 0;
45 | //读取4个字节,得到数据长度
46 | for (int i = 3;i >= 0;i--) {
47 | int tmp = inputStream.read();
48 | contentLen += tmp * Math.pow(16,2 * i);
49 | }
50 |
51 | int len;
52 | int readLen = 0;
53 | byte[] bytes = new byte[contentLen];
54 | ByteArrayOutputStream byteArray = new ByteArrayOutputStream();
55 | while ((len = inputStream.read(bytes,0,contentLen - readLen)) != -1) {
56 | byteArray.write(bytes,0,len);
57 | readLen += len;
58 | if (readLen == contentLen) {
59 | break;
60 | }
61 | }
62 |
63 | bytes = byteArray.toByteArray();
64 | byte[] b = Arrays.copyOfRange(bytes, 8, 12);
65 | //找到人可识别的字符串放入结果集中
66 | for (int i = 0;i < bytes.length;) {
67 | //一段弹幕内容的开头
68 | if (bytes[i] == b[0] && bytes[i+1] == b[1] && bytes[i+2] == b[2] && bytes[i+3] == b[3]) {
69 | i += 4;
70 | //一段弹幕json字符串的长度
71 | int length = 0;
72 | //读取4个字节,得到弹幕数据长度
73 | for (int j = 0,k = 3;j < 4;j++,k--) {
74 | int n = bytes[i + j];
75 | /*
76 | 原数据一个字节可保存0~255的数,但是byte范围是-128~127,所以要变回原来的真实数据
77 | 后面的数据不变是因为后面的字符串都是ascii字符,都在0~127之内
78 | */
79 | if (n < 0) {
80 | n = 256 + bytes[i + j];
81 | }
82 |
83 | length += n * Math.pow(16,2 * k);
84 | }
85 | i += 4;
86 |
87 | result.add(Utils.unicode2String(new String(Arrays.copyOfRange(bytes,i,i + length))));
88 | i += length;
89 | } else {
90 | i++;
91 | }
92 | }
93 |
94 | } else if ((typeBytes[0] == 0x00 && typeBytes[1] == 0x06 && typeBytes[2] == 0x00 && typeBytes[3] == 0x06)) {
95 | //下条内容的长度
96 | int contentLen = 0;
97 | //读取2个字节,得到数据长度
98 | for (int i = 1;i >= 0;i--) {
99 | int tmp = inputStream.read();
100 | contentLen += tmp * Math.pow(16,2 * i);
101 | }
102 |
103 | inputStream.skip(contentLen);
104 | }
105 |
106 | return result;
107 | }
108 |
109 | public void close() throws IOException {
110 | socket.close();
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/src/main/java/Start.java:
--------------------------------------------------------------------------------
1 | import java.io.IOException;
2 |
3 | /**
4 | * Created by geekgao on 16-1-29.
5 | */
6 | public class Start {
7 | public static void main(String[] args) throws IOException {
8 | Crawl c = new Crawl();
9 | c.start();
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/src/main/java/Utils.java:
--------------------------------------------------------------------------------
1 | import java.io.IOException;
2 | import java.util.Properties;
3 |
4 | /**
5 | * Created by geekgao on 16-1-29.
6 | */
7 | public class Utils {
8 | private static Properties config = new Properties();
9 |
10 | static {
11 | try {
12 | config.load(Utils.class.getResourceAsStream("/config.properties"));
13 | } catch (IOException e) {
14 | e.printStackTrace();
15 | }
16 | }
17 |
18 | public static String getRoomId() {
19 | return config.getProperty("roomId");
20 | }
21 |
22 | /**
23 | * 将包含unicode的字符串 转 中文字符串
24 | * 将每个unicode编码计算出其值,再强转成char类型,然后将这个字符存储到字符串中
25 | */
26 | public static String unicode2String(String str) {
27 | StringBuilder result = new StringBuilder();
28 | for (int i = 0;i < str.length();) {
29 | if (str.charAt(i) == '\\' && str.charAt(i + 1) == 'u') {
30 | String unicode = str.substring(i + 2, i + 6);
31 | //确定是unicode编码
32 | if (unicode.matches("[0-9a-fA-F]{4}")) {
33 | //将得到的数值按照16进制解析为十进制整数,再強转为字符
34 | char ch = (char) Integer.parseInt(unicode, 16);
35 | //用得到的字符替换编码表达式
36 | result.append(ch);
37 | i += 6;
38 | } else {
39 | result.append("\\u");
40 | i += 2;
41 | }
42 | } else {
43 | result.append(str.charAt(i));
44 | i++;
45 | }
46 | }
47 |
48 | return result.toString();
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/CrawlPandaDanmu/src/main/resources/config.properties:
--------------------------------------------------------------------------------
1 | #要抓取的熊猫房间ID
2 | roomId = 10015
--------------------------------------------------------------------------------
/CrawlPandaDanmu/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, CONSOLE, FILE
2 | ## for console
3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss}[%p][%l] - %m%n
6 | ## for file
7 | log4j.appender.FILE=org.apache.log4j.RollingFileAppender
8 | log4j.appender.FILE.File=danmu.log
9 | log4j.appender.FILE.MaxFileSize=100MB
10 | log4j.appender.FILE.Append = true
11 | log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
12 | log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss}[%p][%l] - %m%n
--------------------------------------------------------------------------------
/DownloadImg.java:
--------------------------------------------------------------------------------
1 | import org.apache.http.HttpEntity;
2 | import org.apache.http.client.methods.CloseableHttpResponse;
3 | import org.apache.http.client.methods.HttpGet;
4 | import org.apache.http.impl.client.CloseableHttpClient;
5 | import org.apache.http.impl.client.HttpClients;
6 | import org.apache.http.util.EntityUtils;
7 |
8 | import java.io.*;
9 | import java.text.SimpleDateFormat;
10 | import java.util.Date;
11 | import java.util.regex.Matcher;
12 | import java.util.regex.Pattern;
13 |
14 | public class DownloadImg {
15 |
16 | public static void writeImgEntityToFile(HttpEntity imgEntity,String fileAddress) {
17 | File storeFile = new File(fileAddress);
18 | FileOutputStream output = null;
19 | try {
20 | output = new FileOutputStream(storeFile);
21 |
22 | if (imgEntity != null) {
23 | InputStream instream;
24 | instream = imgEntity.getContent();
25 | byte b[] = new byte[8 * 1024];
26 | int count;
27 | while ((count = instream.read(b)) != -1) {
28 | output.write(b, 0, count);
29 | }
30 |
31 | }
32 | } catch (FileNotFoundException e) {
33 | e.printStackTrace();
34 | } catch (IOException e) {
35 | e.printStackTrace();
36 | } finally {
37 | try {
38 | output.close();
39 | } catch (IOException e) {
40 | e.printStackTrace();
41 | }
42 | }
43 | }
44 |
45 | public static void main(String[] args) {
46 | System.out.println("获取Bing图片地址中……");
47 |
48 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
49 | CloseableHttpClient httpClient = HttpClients.createDefault();
50 | HttpGet httpGet = new HttpGet("http://cn.bing.com/");
51 | CloseableHttpResponse response = null;
52 | try {
53 | response = httpClient.execute(httpGet);
54 | Pattern p = Pattern.compile("g_img=\\{url:.*\\.jpg");
55 | Matcher m = p.matcher(EntityUtils.toString(response.getEntity()));
56 | String address = null;
57 | if (m.find()) {
58 | address = m.group().split("'")[1].split("'")[0];
59 | } else {
60 | System.exit(0);
61 | }
62 | System.out.println("图片地址:" + address);
63 | System.out.println("正在下载……");
64 | HttpGet getImage = new HttpGet(address);
65 | CloseableHttpResponse responseImg = httpClient.execute(getImage);
66 | HttpEntity entity = responseImg.getEntity();
67 |
68 | writeImgEntityToFile(entity,"/home/geekgao/图片/BingImg/" + dateFormat.format(new Date()) + ".jpg");
69 |
70 | System.out.println("下载完毕.");
71 | } catch (IOException e) {
72 | e.printStackTrace();
73 | } finally {
74 | try {
75 | httpClient.close();
76 | response.close();
77 | } catch (IOException e) {
78 | e.printStackTrace();
79 | }
80 | }
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/InsulinPump/InsulinPump.java:
--------------------------------------------------------------------------------
1 | import java.util.Timer;
2 | import java.util.TimerTask;
3 |
4 | class InsulinPump extends Thread{
5 | private double insulinQuantity;//胰岛素的量
6 | private double battery;//电池电量
7 | private double bloodSugar;//血糖值
8 | private double weight;//根据体重计算注射的胰岛素量
9 |
10 | public InsulinPump (double weight) {
11 | insulinQuantity = 1000;
12 | battery = 100;
13 | bloodSugar = 5;//正常情况:3.9--6.1 mmol/L
14 | this.weight = weight;
15 | }
16 |
17 | /**
18 | * 启动胰岛素泵,开启检测
19 | */
20 | public void run() {
21 | Timer timer = new Timer();
22 | timer.schedule(new TimerTask() {
23 | public void run() {
24 | battery -= 0.1;
25 | }
26 | },1000,1000);//10秒减少1个电
27 | }
28 |
29 | public double getInsulinQuantity() {
30 | return insulinQuantity;
31 | }
32 |
33 | public void setInsulinQuantity(double insulinQuantity) {
34 | this.insulinQuantity = insulinQuantity;
35 | }
36 |
37 | public double getBattery() {
38 | return battery;
39 | }
40 |
41 | public void setBattery(int battery) {
42 | this.battery = battery;
43 | }
44 |
45 | public double getBloodSugar() {
46 | return bloodSugar;
47 | }
48 |
49 | public void setBloodSugar(double bloodSugar) {
50 | this.bloodSugar = bloodSugar;
51 | }
52 |
53 | //调整胰岛素的量
54 | public double adjust() {
55 | double quantity = (bloodSugar * 18 - 100) * weight * 6 / 2000;
56 | insulinQuantity -= quantity;
57 | return quantity;
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/InsulinPump/People.java:
--------------------------------------------------------------------------------
1 | import java.util.Random;
2 | import java.util.Timer;
3 | import java.util.TimerTask;
4 |
5 | class People extends Thread {
6 | private double bloodSugar; //人体血糖值
7 | private Random random; //血糖数随机,3--8之间
8 |
9 | public People() {
10 | random = new Random(System.currentTimeMillis());
11 | bloodSugar = 3.9 + random.nextDouble() * 5;
12 | }
13 |
14 | @Override
15 | public void run() {
16 | Timer timer = new Timer();
17 | timer.schedule(new TimerTask() {
18 | @Override
19 | public void run() {
20 | bloodSugar = 3.9 + random.nextDouble() * 5;
21 | }
22 | },0,1000);//每1秒改变一次血糖值
23 | }
24 |
25 | public double getBloodSugar() {
26 | return bloodSugar;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/InsulinPump/PumpMain.java:
--------------------------------------------------------------------------------
1 | import org.apache.log4j.FileAppender;
2 | import org.apache.log4j.Logger;
3 | import org.apache.log4j.PatternLayout;
4 |
5 | import javax.swing.*;
6 | import java.awt.*;
7 | import java.awt.event.ActionEvent;
8 | import java.awt.event.ActionListener;
9 | import java.awt.event.WindowAdapter;
10 | import java.awt.event.WindowEvent;
11 | import java.io.IOException;
12 | import java.util.Timer;
13 | import java.util.TimerTask;
14 |
15 | public class PumpMain {
16 | public static void main(String[] args) {
17 |
18 | final Logger log = Logger.getLogger(PumpMain.class);
19 | try {
20 | log.addAppender(new FileAppender(new PatternLayout("[%d{yyyy/MM/dd-HH:mm:ss}]-%m%n"), "/home/geekgao/insulinPumpLog", true));
21 | } catch (IOException e) {
22 | e.printStackTrace();
23 | }
24 |
25 | final People people = new People();
26 |
27 | final JFrame jFrame = new JFrame("输入体重");
28 | final double[] weight = new double[1];
29 | jFrame.setLayout(new FlowLayout());
30 | jFrame.addWindowListener(new WindowAdapter() {
31 | @Override
32 | public void windowClosing(WindowEvent e) {
33 | weight[0] = 60;//关闭窗口的话,默认60公斤
34 | super.windowClosing(e);
35 | }
36 | });
37 | final JTextField textArea = new JTextField("输入您的体重(默认60公斤)");
38 | jFrame.add(textArea);
39 | JButton jButton = new JButton("确定");
40 | jFrame.add(jButton);
41 |
42 | jButton.addActionListener(new ActionListener() {
43 | public void actionPerformed(ActionEvent e) {
44 | try {
45 | weight[0] = Double.valueOf(textArea.getText());
46 | jFrame.setVisible(false);
47 | } catch (NumberFormatException e1) {
48 | JOptionPane.showMessageDialog(null, "数字格式不对");
49 | }
50 | }
51 | });
52 | jFrame.pack();
53 | jFrame.setLocationRelativeTo(null);
54 | jFrame.setResizable(false);
55 | jFrame.setVisible(true);
56 |
57 | while (weight[0] == 0 && jFrame.isVisible() == true) {
58 | try {
59 | Thread.sleep(50);
60 | } catch (InterruptedException e) {
61 | e.printStackTrace();
62 | }
63 | }
64 |
65 | log.info("开机,用户体重为" + weight[0] + "Kg");
66 |
67 | final InsulinPump insulinPump = new InsulinPump(weight[0]);
68 | final PumpWindow pumpWindow = new PumpWindow(insulinPump);
69 |
70 | people.start(); //人体运行
71 | insulinPump.start(); //胰岛素泵运行
72 |
73 | Timer timer = new Timer();
74 | timer.schedule(new TimerTask() {
75 | @Override
76 | public void run() {
77 | insulinPump.setBloodSugar(people.getBloodSugar());
78 |
79 | pumpWindow.updateTime();
80 | pumpWindow.setBattery(insulinPump.getBattery());
81 | pumpWindow.setBloodSugar(people.getBloodSugar());
82 | pumpWindow.setinsulinQuantity(insulinPump.getInsulinQuantity());
83 | pumpWindow.setStatus("无");
84 |
85 | if (insulinPump.getBloodSugar() <= 3.9 || insulinPump.getBloodSugar() >= 6.1) {
86 | double insertQuantity = insulinPump.adjust();
87 | String insertQuantityStr = String.valueOf(insertQuantity);
88 | pumpWindow.setStatus("正在注射" + insertQuantityStr.substring(0, insertQuantityStr.indexOf(".") + 2) + "个单位的胰岛素");
89 | log.info("注射" + insertQuantityStr.substring(0, insertQuantityStr.indexOf(".") + 2) + "个单位的胰岛素");
90 | pumpWindow.setinsulinQuantity(insulinPump.getInsulinQuantity());
91 | }
92 |
93 | if (insulinPump.getBattery() <= 0) {
94 | pumpWindow.setBattery(0);
95 | log.info("因未及时充电,本仪器将自动关机。");
96 | JOptionPane.showMessageDialog(null, "因未及时充电,本仪器将自动关机。");
97 | System.exit(0);
98 | }
99 |
100 | if (insulinPump.getInsulinQuantity() <= 0) {
101 | pumpWindow.setinsulinQuantity(0);
102 | log.info("胰岛素量不足!!!本仪器将自动关机");
103 | JOptionPane.showMessageDialog(null, "胰岛素量不足!!!本仪器将自动关机");
104 | System.exit(0);
105 | }
106 | }
107 | }, 0, 1000); //每秒更新一次显示的数据
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/InsulinPump/PumpWindow.java:
--------------------------------------------------------------------------------
1 | import org.apache.log4j.FileAppender;
2 | import org.apache.log4j.Logger;
3 | import org.apache.log4j.PatternLayout;
4 |
5 | import javax.swing.*;
6 | import java.awt.*;
7 | import java.awt.event.ActionEvent;
8 | import java.awt.event.ActionListener;
9 | import java.awt.event.WindowAdapter;
10 | import java.awt.event.WindowEvent;
11 | import java.io.IOException;
12 | import java.text.SimpleDateFormat;
13 | import java.util.Date;
14 |
15 | class PumpWindow extends JFrame{
16 | private JLabel time;
17 | private JLabel battery;
18 | private JLabel bloodSugar;
19 | private JLabel insulinQuantity;
20 | private JLabel status;
21 |
22 | private JButton charge;
23 | private JButton insertInsulin;
24 |
25 | private InsulinPump pump;
26 |
27 | private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd-HH:mm:ss");
28 | private Logger log;
29 |
30 | public void setBloodSugar(double bloodSugar) {
31 | String bloodSugarStr = String.valueOf(bloodSugar);
32 | this.bloodSugar.setText("血糖值:" + bloodSugarStr.substring(0, bloodSugarStr.indexOf(".") + 2) + "mmol/L");
33 | }
34 |
35 | public void setinsulinQuantity(double insulinQuantity) {
36 | String insulinQuantityStr = String.valueOf(insulinQuantity);
37 | this.insulinQuantity.setText("胰岛素量:" + insulinQuantityStr.substring(0, insulinQuantityStr.indexOf(".") + 2) + "单位");
38 | }
39 |
40 | public void setStatus(String status) {
41 | this.status.setText("当前状态:" + status);
42 | }
43 |
44 | public void setBattery(double battery) {
45 | String batteryStr = String.valueOf(battery);
46 | this.battery.setText("电量:" + batteryStr.substring(0, batteryStr.indexOf(".") + 2));//只获取小数点后1位
47 | }
48 |
49 | public void updateTime() {
50 | this.time.setText("时间:" + simpleDateFormat.format(new Date()));
51 | }
52 |
53 | //参数是这个窗口显示的泵子的引用
54 | public PumpWindow(final InsulinPump pump) {
55 | log = Logger.getLogger(PumpWindow.class);
56 | try {
57 | log.addAppender(new FileAppender(new PatternLayout("[%d{yyyy/MM/dd-HH:mm:ss}]-%m%n"), "/home/geekgao/insulinPumpLog", true));
58 | } catch (IOException e) {
59 | e.printStackTrace();
60 | }
61 | this.pump = pump;
62 |
63 | this.setLayout(new BorderLayout());
64 | this.addWindowListener(new WindowAdapter() {
65 | @Override
66 | public void windowClosing(WindowEvent e) {
67 | log.info("用户关机");
68 | System.exit(0);
69 | }
70 | });
71 |
72 | JPanel upPanel = new JPanel();
73 | upPanel.setLayout(new BorderLayout());
74 | this.add(upPanel, BorderLayout.NORTH);
75 | time = new JLabel();
76 | battery = new JLabel();
77 | upPanel.add(time, BorderLayout.WEST);
78 | upPanel.add(battery, BorderLayout.EAST);
79 |
80 |
81 | JPanel downPanel = new JPanel();
82 | this.add(downPanel, BorderLayout.SOUTH);
83 | charge = new JButton("充电");
84 | charge.addActionListener(new ActionListener() {
85 | public void actionPerformed(ActionEvent e) {
86 | log.info("充电完毕");
87 | pump.setBattery(100);
88 | }
89 | });
90 | downPanel.add(charge);
91 | insertInsulin = new JButton("加满胰岛素");
92 | insertInsulin.addActionListener(new ActionListener() {
93 | public void actionPerformed(ActionEvent e) {
94 | log.info("加满胰岛素");
95 | pump.setInsulinQuantity(1000);
96 | }
97 | });
98 | downPanel.add(insertInsulin);
99 |
100 | JPanel midPanel = new JPanel();
101 | midPanel.setLayout(new GridLayout(3, 1, 0, 0));
102 | this.add(midPanel, BorderLayout.CENTER);
103 | bloodSugar = new JLabel();
104 | midPanel.add(bloodSugar);
105 | insulinQuantity = new JLabel();
106 | midPanel.add(insulinQuantity);
107 | status = new JLabel();
108 | midPanel.add(status);
109 |
110 | this.setSize(400, 250);
111 | this.setResizable(false);
112 | this.setLocationRelativeTo(null);
113 | this.setVisible(true);
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/Main.java:
--------------------------------------------------------------------------------
1 | import org.apache.http.Consts;
2 | import org.apache.http.NameValuePair;
3 | import org.apache.http.client.config.CookieSpecs;
4 | import org.apache.http.client.config.RequestConfig;
5 | import org.apache.http.client.entity.UrlEncodedFormEntity;
6 | import org.apache.http.client.methods.CloseableHttpResponse;
7 | import org.apache.http.client.methods.HttpGet;
8 | import org.apache.http.client.methods.HttpPost;
9 | import org.apache.http.impl.client.CloseableHttpClient;
10 | import org.apache.http.impl.client.HttpClients;
11 | import org.apache.http.impl.client.SystemDefaultCredentialsProvider;
12 | import org.apache.http.message.BasicNameValuePair;
13 | import org.apache.http.util.EntityUtils;
14 |
15 | import java.io.File;
16 | import java.io.FileOutputStream;
17 | import java.io.IOException;
18 | import java.util.LinkedList;
19 | import java.util.List;
20 | import java.util.Scanner;
21 |
22 | public class Main {
23 | public static void main(String[] args) {
24 | RequestConfig requestConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD_STRICT).build();
25 | CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(requestConfig).build();
26 |
27 | HttpGet getHomePage = new HttpGet("http://www.zhihu.com/");
28 | try {
29 | //填充登陆请求中基本的参数
30 | CloseableHttpResponse response = httpClient.execute(getHomePage);
31 | String responseHtml = EntityUtils.toString(response.getEntity());
32 | String xsrfValue = responseHtml.split("")[0];
33 | System.out.println("_xsrf:" + xsrfValue);
34 | response.close();
35 | List valuePairs = new LinkedList();
36 | valuePairs.add(new BasicNameValuePair("_xsrf" , xsrfValue));
37 | valuePairs.add(new BasicNameValuePair("email", 用户名));
38 | valuePairs.add(new BasicNameValuePair("password", 密码));
39 | valuePairs.add(new BasicNameValuePair("rememberme", "true"));
40 |
41 | //获取验证码
42 | HttpGet getCaptcha = new HttpGet("http://www.zhihu.com/captcha.gif?r=" + System.currentTimeMillis() + "&type=login");
43 | CloseableHttpResponse imageResponse = httpClient.execute(getCaptcha);
44 | FileOutputStream out = new FileOutputStream("/tmp/zhihu.gif");
45 | byte[] bytes = new byte[8192];
46 | int len;
47 | while ((len = imageResponse.getEntity().getContent().read(bytes)) != -1) {
48 | out.write(bytes,0,len);
49 | }
50 | out.close();
51 | Runtime.getRuntime().exec("eog /tmp/zhihu.gif");//ubuntu下看图片的命令是eog
52 |
53 | //请用户输入验证码
54 | System.out.print("请输入验证码:");
55 | Scanner scanner = new Scanner(System.in);
56 | String captcha = scanner.next();
57 | valuePairs.add(new BasicNameValuePair("captcha", captcha));
58 |
59 | //完成登陆请求的构造
60 | UrlEncodedFormEntity entity = new UrlEncodedFormEntity(valuePairs, Consts.UTF_8);
61 | HttpPost post = new HttpPost("http://www.zhihu.com/login/email");
62 | post.setEntity(entity);
63 | httpClient.execute(post);//登录
64 |
65 | HttpGet g = new HttpGet("http://www.zhihu.com/question/following");//获取“我关注的问题”页面
66 | CloseableHttpResponse r = httpClient.execute(g);
67 | System.out.println(EntityUtils.toString(r.getEntity()));
68 | r.close();
69 | } catch (IOException e) {
70 | e.printStackTrace();
71 | } finally {
72 | try {
73 | httpClient.close();
74 | } catch (IOException e) {
75 | e.printStackTrace();
76 | }
77 | }
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/MicroWave/MicroWave.java:
--------------------------------------------------------------------------------
1 | import java.util.Timer;
2 | import java.util.TimerTask;
3 |
4 | public class MicroWave extends Thread{
5 | private int hour;//小时数
6 | private int minutes;//分钟数
7 | private int second;//秒数
8 | private boolean isRun;//是否正在运行
9 |
10 | public MicroWave() {
11 | hour = 0;
12 | minutes = 0;
13 | second = 0;
14 | isRun = false;
15 | }
16 |
17 | @Override
18 | public void run() {
19 | Timer timer = new Timer();
20 | timer.schedule(new TimerTask() {
21 | @Override
22 | public void run() {
23 |
24 | //如果正在运行就改变时间
25 | if (isRun) {
26 | if (second >= 1) {
27 | second--;
28 | } else {
29 | second = 59;
30 | if (minutes >= 1) {
31 | minutes--;
32 | } else {
33 | minutes = 59;
34 | if (hour >= 1) {
35 | hour--;
36 | } else {
37 | hour = 0;
38 | minutes = 0;
39 | second = 0;
40 | isRun = false;
41 | }
42 | }
43 | }
44 | }
45 | }
46 | },0,1000);
47 | }
48 |
49 | public int getHour() {
50 | return hour;
51 | }
52 |
53 | public void addHour(int hour) {
54 | this.hour += hour;
55 | }
56 |
57 | public int getMinutes() {
58 | return minutes;
59 | }
60 |
61 | public void addMinutes(int minutes) {
62 | this.minutes += minutes;
63 |
64 | if (this.minutes == 60) {
65 | this.minutes = 0;
66 | this.hour++;
67 | }
68 | }
69 |
70 | public int getSecond() {
71 | return second;
72 | }
73 |
74 | public void setHour(int hour) {
75 | this.hour = hour;
76 | }
77 |
78 | public void setMinutes(int minutes) {
79 | this.minutes = minutes;
80 | }
81 |
82 | public void setSecond(int second) {
83 | this.second = second;
84 | }
85 |
86 | public void addSecond(int second) {
87 |
88 | this.second += second;
89 |
90 | if (this.second == 60) {
91 | this.second = 0;
92 | minutes++;
93 | if (minutes == 60) {
94 | minutes = 0;
95 | hour++;
96 | }
97 | }
98 | }
99 |
100 | public void setIsRun(boolean isRun) {
101 | this.isRun = isRun;
102 | }
103 |
104 | public boolean isRun() {
105 | return isRun;
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/MicroWave/MicroWaveMain.java:
--------------------------------------------------------------------------------
1 | import java.util.Timer;
2 | import java.util.TimerTask;
3 |
4 | public class MicroWaveMain {
5 | public static void main(String[] args) {
6 | final MicroWave microWave = new MicroWave();
7 | final MicroWaveWindow microWaveWindow = new MicroWaveWindow(microWave);
8 |
9 | microWave.run();
10 | Timer timer = new Timer();
11 | timer.schedule(new TimerTask() {
12 | @Override
13 | public void run() {
14 | microWaveWindow.setTimeLable(microWave.getHour(), microWave.getMinutes(),microWave.getSecond());
15 | microWaveWindow.pack();//动态调整窗口大小
16 | }
17 | },0,50);
18 | }
19 | }
--------------------------------------------------------------------------------
/MicroWave/MicroWaveWindow.java:
--------------------------------------------------------------------------------
1 | import javax.swing.*;
2 | import java.awt.*;
3 | import java.awt.event.ActionEvent;
4 | import java.awt.event.ActionListener;
5 | import java.awt.event.WindowAdapter;
6 | import java.awt.event.WindowEvent;
7 |
8 | public class MicroWaveWindow extends JFrame{
9 |
10 | JLabel timeLable;//倒计时的时间标签
11 |
12 | public void setTimeLable(int hour,int minutes,int second) {
13 | String hourStr = String.format("%2d",hour);
14 | String minutesStr = String.format("%2d",minutes);
15 | String secondStr = String.format("%2d", second);
16 |
17 | hourStr = hourStr.replace(' ','0');
18 | minutesStr = minutesStr.replace(' ','0');
19 | secondStr = secondStr.replace(' ','0');
20 |
21 | this.timeLable.setText(hourStr + ":" + minutesStr + ":" + secondStr);
22 | }
23 |
24 | //传入它控制的微波炉
25 | public MicroWaveWindow(final MicroWave microWave) {
26 | this.setTitle("微波炉仿真程序");
27 | this.addWindowListener(new WindowAdapter() {
28 | @Override
29 | public void windowClosing(WindowEvent e) {
30 | System.exit(0);
31 | }
32 | });
33 |
34 | JPanel upPanel = new JPanel();
35 | JPanel midPanel = new JPanel();
36 | JPanel downPanel = new JPanel();
37 | this.setLayout(new BorderLayout());
38 | this.add(upPanel, BorderLayout.NORTH);
39 | timeLable = new JLabel();
40 | upPanel.add(timeLable);
41 | timeLable.setFont(new Font("Dialog", 1, 100));
42 | timeLable.setText(microWave.getHour() + ":" + microWave.getMinutes() + ":" + microWave.getSecond());
43 |
44 | this.add(midPanel, BorderLayout.CENTER);
45 | midPanel.setLayout(new BorderLayout());
46 | JPanel midUpPanel = new JPanel();
47 | JPanel midDownPanel = new JPanel();
48 | midPanel.add(midUpPanel,BorderLayout.NORTH);
49 | midPanel.add(midDownPanel,BorderLayout.SOUTH);
50 |
51 | JLabel fire = new JLabel("火力");
52 | JRadioButton bigFire = new JRadioButton("大火");
53 | JRadioButton midFire = new JRadioButton("中火");
54 | JRadioButton smallFire = new JRadioButton("小火");
55 | midUpPanel.add(fire);
56 | midUpPanel.add(bigFire);
57 | midUpPanel.add(midFire);
58 | midUpPanel.add(smallFire);
59 |
60 | ButtonGroup buttonGroup = new ButtonGroup();
61 | buttonGroup.add(bigFire);
62 | buttonGroup.add(midFire);
63 | buttonGroup.add(smallFire);
64 |
65 | JLabel addTime = new JLabel("时间");
66 | midDownPanel.add(addTime);
67 | JButton addHour = new JButton("+1小时");
68 | JButton addMinutes = new JButton("+1分钟");
69 | JButton addSecond = new JButton("+1秒");
70 | midDownPanel.add(addHour);
71 | midDownPanel.add(addMinutes);
72 | midDownPanel.add(addSecond);
73 |
74 | addHour.addActionListener(new ActionListener() {
75 | public void actionPerformed(ActionEvent e) {
76 | microWave.addHour(1);
77 | }
78 | });
79 | addMinutes.addActionListener(new ActionListener() {
80 | public void actionPerformed(ActionEvent e) {
81 | microWave.addMinutes(1);
82 | }
83 | });
84 | addSecond.addActionListener(new ActionListener() {
85 | public void actionPerformed(ActionEvent e) {
86 | microWave.addSecond(1);
87 | }
88 | });
89 |
90 |
91 | this.add(downPanel, BorderLayout.SOUTH);
92 | JLabel function = new JLabel("功能");
93 | JButton startButton = new JButton("开始");
94 | JButton pauseButton = new JButton("暂停");
95 | JButton stopButton = new JButton("停止");
96 | downPanel.add(function);
97 | downPanel.add(startButton);
98 | downPanel.add(pauseButton);
99 | downPanel.add(stopButton);
100 |
101 | startButton.addActionListener(new ActionListener() {
102 | public void actionPerformed(ActionEvent e) {
103 | if (microWave.getHour() == 0 && microWave.getMinutes() == 0 && microWave.getSecond() == 0) {
104 | JOptionPane.showMessageDialog(null,"时间为0!");
105 | } else {
106 | microWave.setIsRun(true);
107 | }
108 | }
109 | });
110 |
111 | pauseButton.addActionListener(new ActionListener() {
112 | public void actionPerformed(ActionEvent e) {
113 | microWave.setIsRun(false);
114 | }
115 | });
116 |
117 | stopButton.addActionListener(new ActionListener() {
118 | public void actionPerformed(ActionEvent e) {
119 | microWave.setIsRun(false);
120 | microWave.setHour(0);
121 | microWave.setMinutes(0);
122 | microWave.setSecond(0);
123 | }
124 | });
125 |
126 | this.pack();
127 | this.setLocationRelativeTo(null);
128 | this.setResizable(false);
129 | this.setVisible(true);
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/PullBookinfo/GetBookInfoThread.java:
--------------------------------------------------------------------------------
1 | import org.apache.http.HttpHeaders;
2 | import org.apache.http.client.methods.CloseableHttpResponse;
3 | import org.apache.http.client.methods.HttpGet;
4 | import org.apache.http.impl.client.CloseableHttpClient;
5 | import org.apache.http.util.EntityUtils;
6 | import org.dom4j.Element;
7 |
8 | import java.io.IOException;
9 | import java.util.regex.Matcher;
10 | import java.util.regex.Pattern;
11 |
12 | public class GetBookInfoThread extends Thread{
13 | private CloseableHttpClient httpClient;
14 | private String webAddress;
15 | private Element rootElement;
16 | private Pattern bookAuthorRegex;
17 | private Pattern bookPublishRegex;
18 | private Pattern bookIsbnRegex;
19 | private Pattern bookImgRegex;
20 | private String bookName;
21 |
22 | /**
23 | *
24 | * @param httpClient 用这个操作抓取
25 | * @param webAddress 这个是抓取的网址
26 | * @param rootElement 这个是一个xml文档的根节点,用这个来操作加入新的子节点
27 | */
28 | public GetBookInfoThread(CloseableHttpClient httpClient,String webAddress,String bookName,Element rootElement,Pattern bookAuthorRegex,Pattern bookPublishRegex,Pattern bookIsbnRegex,Pattern bookImgRegex) {
29 | this.httpClient = httpClient;
30 | this.webAddress = webAddress;
31 | this.rootElement = rootElement;
32 | this.bookAuthorRegex = bookAuthorRegex;
33 | this.bookPublishRegex = bookPublishRegex;
34 | this.bookIsbnRegex = bookIsbnRegex;
35 | this.bookName = bookName;
36 | this.bookImgRegex = bookImgRegex;
37 | }
38 |
39 | @Override
40 | public void run() {
41 | HttpGet getBookInfo = new HttpGet(webAddress);
42 | getBookInfo.addHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30");
43 | CloseableHttpResponse bookInfoResponse;
44 | String bookInfoCode = null;//书籍具体信息网页源码
45 | try {
46 | bookInfoResponse = httpClient.execute(getBookInfo);
47 | if (bookInfoResponse.getStatusLine().getStatusCode() != 200) {
48 | System.out.println("获取书本具体信息时出错,页面地址:" + webAddress + "错误信息" + bookInfoResponse.getStatusLine());
49 | return;
50 | }
51 |
52 | bookInfoCode = EntityUtils.toString(bookInfoResponse.getEntity());
53 | } catch (IOException e) {
54 | e.printStackTrace();
55 | }
56 |
57 | Matcher bookAuthorMatcher = bookAuthorRegex.matcher(bookInfoCode); //匹配作者
58 | Matcher bookPublishMatcher = bookPublishRegex.matcher(bookInfoCode); //匹配出版商
59 | Matcher bookIsbnMatcher = bookIsbnRegex.matcher(bookInfoCode); //匹配isbn
60 | Matcher bookImgMatcher = bookImgRegex.matcher(bookInfoCode); //匹配图片地址
61 |
62 | String bookName = this.bookName;
63 | String bookAuthor = "";
64 | String bookPublish = "";
65 | String bookIsbn = "";
66 | String bookLink = webAddress;
67 | String bookImg = "";
68 |
69 | if (bookAuthorMatcher.find()) {
70 | bookAuthor = bookAuthorMatcher.group(1);
71 | }
72 | if (bookPublishMatcher.find()) {
73 | bookPublish = bookPublishMatcher.group(1);
74 | }
75 | if (bookIsbnMatcher.find()) {
76 | bookIsbn = bookIsbnMatcher.group(1);
77 | }
78 | if (bookImgMatcher.find()) {
79 | bookImg = bookImgMatcher.group(1);
80 | }
81 |
82 | // System.out.println(bookName + "-" + bookAuthor + "-" + bookPublish + "-" + bookIsbn);
83 |
84 | Element bookElement = rootElement.addElement("book");//新建一个书的标签
85 | bookElement.addAttribute("id",String.valueOf(Main.bookId++));
86 | bookElement.addElement("name").setText(bookName);
87 | bookElement.addElement("author").setText(bookAuthor);
88 | bookElement.addElement("publish").setText(bookPublish);
89 | bookElement.addElement("isbn").setText(bookIsbn);
90 | bookElement.addElement("count").setText(String.valueOf((int)(Math.random() * 10) + 3));
91 | bookElement.addElement("link").setText(bookLink);
92 | bookElement.addElement("img").setText(bookImg);
93 |
94 | System.out.println("抓取了:" + webAddress + " " + bookName);
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/PullBookinfo/Main.java:
--------------------------------------------------------------------------------
1 | import org.apache.http.HttpHeaders;
2 | import org.apache.http.HttpHost;
3 | import org.apache.http.client.methods.CloseableHttpResponse;
4 | import org.apache.http.client.methods.HttpGet;
5 | import org.apache.http.impl.client.CloseableHttpClient;
6 | import org.apache.http.impl.client.HttpClients;
7 | import org.apache.http.util.EntityUtils;
8 | import org.dom4j.Document;
9 | import org.dom4j.DocumentHelper;
10 | import org.dom4j.Element;
11 |
12 | import java.io.IOException;
13 | import java.util.*;
14 | import java.util.regex.Matcher;
15 | import java.util.regex.Pattern;
16 |
17 | public class Main {
18 |
19 | CloseableHttpClient httpClient;
20 | static int bookId = 496;
21 | Map proxyMap;//ip->端口
22 | List ipList;//从这个list中读出ip,再由ip从map中读出端口
23 | int i = 0;//根据这个从list中取出ip,换上对应的代理
24 |
25 | public static void main(String[] args) {
26 | Main m = new Main();
27 |
28 | // List tagList = m.getTagList();
29 | List tagList = new LinkedList();
30 | // tagList.add("经典");
31 | // tagList.add("日本文学");
32 | // tagList.add("散文");
33 | // tagList.add("中国文学");
34 | // tagList.add("算法");
35 | // tagList.add("童话");
36 | // tagList.add("外国文学");
37 | // tagList.add("文学");
38 | // tagList.add("小说");
39 | // tagList.add("漫画");
40 | // tagList.add("诗词");
41 | // tagList.add("心理学");
42 | tagList.add("摄影");
43 | tagList.add("理财");
44 | tagList.add("经济学");
45 | m.pullAndWrite(tagList,10);
46 | }
47 |
48 | public Main() {
49 | // HttpHost proxy = new HttpHost("122.225.106.35",80);
50 | // httpClient = HttpClients.custom().setProxy(proxy).build();
51 | httpClient = HttpClients.createDefault();
52 | setProxyMap();
53 | }
54 |
55 | public void setProxyMap() {
56 | proxyMap = new HashMap();
57 | ipList = new LinkedList();
58 | proxyMap.put("211.68.122.171",80);ipList.add("211.68.122.171");
59 | }
60 |
61 | public List getTagList() {
62 | HttpGet getTag = new HttpGet("http://book.douban.com/tag/");
63 | getTag.addHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30");
64 | CloseableHttpResponse tagPageResponse = null;
65 | String tagPageCode = null;//网页源码
66 | try {
67 | tagPageResponse = httpClient.execute(getTag);
68 | tagPageCode = EntityUtils.toString(tagPageResponse.getEntity());
69 | tagPageResponse.close();
70 | } catch (IOException e) {
71 | e.printStackTrace();
72 | } finally {
73 | try {
74 | tagPageResponse.close();
75 | } catch (IOException e) {
76 | e.printStackTrace();
77 | }
78 | }
79 |
80 | Pattern p = Pattern.compile("class=\"tag\">(.*?)");
81 | Matcher m = p.matcher(tagPageCode);
82 | List resultTagList = new LinkedList();
83 | while (m.find()) {
84 | resultTagList.add(m.group(1));
85 | }
86 |
87 | return resultTagList;
88 | }
89 |
90 | /**
91 | *
92 | * @param tagList 要抓的图书的类别
93 | * @param maxPageNum 每种图书最多抓取的页数
94 | */
95 | public void pullAndWrite(List tagList,int maxPageNum) {
96 | Pattern bookAddressRegex = Pattern.compile("href=\"(.*?)\" class=\"title\" target=\"_blank\">(.*?)"); //获取具体书籍网址的正则
97 | Pattern bookAuthorRegex = Pattern.compile("(?s) 作者:.*?>(.*?)");//匹配作者
98 | Pattern bookPublishRegex = Pattern.compile("出版社: (.*?)
");
99 | Pattern bookIsbnRegex = Pattern.compile("ISBN: (.*?)
");
100 | Pattern bookImgRegex = Pattern.compile("
threadList = new LinkedList();
133 | while (m.find()) {
134 | threadList.add(new GetBookInfoThread(httpClient, m.group(1), m.group(2), rootElement, bookAuthorRegex, bookPublishRegex, bookIsbnRegex,bookImgRegex));
135 | findCount++;
136 | }
137 | //没有知道到代表这种类别的书都找完了,那么直接退出此类书籍的查找
138 | if (findCount == 0) {
139 | break;
140 | }
141 |
142 | for (Thread thread:threadList) {
143 | thread.start();
144 | }
145 | for (Thread thread:threadList) {
146 | try {
147 | thread.join();
148 | } catch (InterruptedException e) {
149 | e.printStackTrace();
150 | }
151 | }
152 | nowPageNum++;
153 | }
154 | //一个类别爬完了再写入
155 | new WriteBookInfoToFile(rootElement,"/home/geekgao/book/" + tag + ".xml").start(); //另开一个线程写入文件
156 |
157 | }
158 | }
159 |
160 | private void changeProxy() {
161 | if (i >= ipList.size()) {
162 | System.out.println("代理用完了,退出");
163 | System.exit(0);
164 | }
165 | String ip = ipList.get(i++);
166 | httpClient = HttpClients.custom().setProxy(new HttpHost(ip,proxyMap.get(ip))).build();
167 | System.out.println("换代理啦,使用代理:" + ip + ",端口:" + proxyMap.get(ip));
168 | }
169 |
170 | }
171 |
--------------------------------------------------------------------------------
/PullBookinfo/WriteBookInfoToFile.java:
--------------------------------------------------------------------------------
1 | import org.dom4j.Element;
2 | import org.dom4j.io.XMLWriter;
3 |
4 | import java.io.FileWriter;
5 | import java.io.IOException;
6 | import java.io.Writer;
7 |
8 | public class WriteBookInfoToFile extends Thread {
9 | private Element root;
10 | private String fileAddress;
11 |
12 | public WriteBookInfoToFile(Element root,String fileAddress) {
13 | this.root = root;
14 | this.fileAddress = fileAddress;
15 | }
16 |
17 | @Override
18 | public void run() {
19 | Writer fileWriter;
20 | try {
21 | fileWriter = new FileWriter(fileAddress);
22 | XMLWriter xmlWriter = new XMLWriter(fileWriter);
23 | xmlWriter.write(root);
24 | xmlWriter.close();
25 | System.out.println("[" + fileAddress + "]写入成功");
26 | } catch (IOException e) {
27 | e.printStackTrace();
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/PullBookinfo/WriteInfoToDB.java:
--------------------------------------------------------------------------------
1 | import org.dom4j.Document;
2 | import org.dom4j.DocumentException;
3 | import org.dom4j.Element;
4 | import org.dom4j.io.SAXReader;
5 |
6 | import java.io.File;
7 | import java.sql.DriverManager;
8 | import java.sql.SQLException;
9 | import java.sql.Statement;
10 | import java.util.List;
11 |
12 | public class WriteInfoToDB {
13 | public static void main(String[] args) {
14 | File folder = new File("/home/geekgao/book");
15 | File[] XMLS = folder.listFiles();
16 | SAXReader reader = new SAXReader();
17 | Statement statement = null; //用这个执行sql语句
18 | try {
19 | Class.forName("com.mysql.jdbc.Driver");// 动态加载mysql驱动
20 | statement = DriverManager.getConnection("jdbc:mysql://localhost:3306/BookManage?user=root&password=root").createStatement();
21 | } catch (SQLException e) {
22 | e.printStackTrace();
23 | } catch (ClassNotFoundException e) {
24 | e.printStackTrace();
25 | }
26 |
27 | for (File f:XMLS) {
28 | if (f.isDirectory()) {
29 | continue;
30 | }
31 | Document document = null;
32 | try {
33 | document = reader.read(f);
34 | } catch (DocumentException e) {
35 | e.printStackTrace();
36 | }
37 |
38 | Element root = document.getRootElement();
39 | List books = root.elements();
40 | for (Element book:books) {
41 | String name = null;
42 | String author = null;
43 | String publish = null;
44 | String isbn = null;
45 | String count = null;
46 | String link = null;
47 | String img = null;
48 | List b = book.elements();
49 | for (Element info:b) {
50 | if (info.getName().equals("name")) {
51 | name = info.getText();
52 | } else if (info.getName().equals("author")) {
53 | author = info.getText();
54 | } else if (info.getName().equals("publish")) {
55 | publish = info.getText();
56 | } else if (info.getName().equals("isbn")) {
57 | isbn = info.getText();
58 | } else if (info.getName().equals("count")) {
59 | count = info.getText();
60 | } else if (info.getName().equals("link")) {
61 | link = info.getText();
62 | } else if (info.getName().equals("img")) {
63 | img = info.getText();
64 | }
65 | // System.out.println(info.getName() + ": " + info.getText());
66 | }
67 | String sql = "INSERT INTO Book(bookPublish,bookName,bookAuthor,bookTag,bookIsbn,bookCount,bookRestCount,bookLink,bookImg) VALUES ('" + publish + "','" + name + "','" + author + "','" + f.getName().split("\\.")[0] + "','" + isbn + "','" + count + "','" + count + "','" + link + "','" + img + "');";
68 | try {
69 | statement.execute(sql);
70 | } catch (SQLException e) {
71 | System.err.println("sql语句处错误:" + e.getMessage());
72 | System.err.println("sql语句:" + sql);
73 | }
74 | }
75 | }
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Stargazers over time
2 | [](https://starchart.cc/gaopu/Java)
3 |
4 | ## 这个仓库内的代码是什么东西
5 | 都是以前上大学时自己写的一些东西,有的为了交作业,有的为了玩😊。
6 |
7 | ## 为什么很多份不相关的代码放在一个代码库
8 | 因为当初还不会合理正确的使用github代码库🙉。
9 |
10 | ## 值不值得star
11 | 不值得(不过也感谢star了的朋友❤️)。
12 |
13 | ## 最后
14 | 欢迎大家访问我的私人博客:[www.geekgao.cn](https://www.geekgao.cn)
15 |
16 | 我的博客搭建在腾讯云,新用户有优惠:[优惠购买腾讯云](https://curl.qcloud.com/JNxboKJ3)(不是新用户用新的账号登陆,用老身份认证也能享受优惠,听说是可这么操作三次),我用的是2核4G5M的配置。
17 |
--------------------------------------------------------------------------------
/Struts2FileUpAndDown/src/com/geekgao/file/FileAction.java:
--------------------------------------------------------------------------------
1 | package com.geekgao.file;
2 |
3 | import com.opensymphony.xwork2.ActionSupport;
4 |
5 | import java.io.*;
6 |
7 | public class FileAction extends ActionSupport {
8 |
9 | private File file;
10 | private String fileFileName;
11 | private String fileContentType;
12 |
13 | public File getFile() {
14 | return file;
15 | }
16 |
17 | public void setFile(File file) {
18 | this.file = file;
19 | }
20 |
21 | public String getFileFileName() {
22 | return fileFileName;
23 | }
24 |
25 | public void setFileFileName(String fileFileName) {
26 | this.fileFileName = fileFileName;
27 | }
28 |
29 | public String getFileContentType() {
30 | return fileContentType;
31 | }
32 |
33 | public void setFileContentType(String fileContentType) {
34 | this.fileContentType = fileContentType;
35 | }
36 |
37 | public String upFile() throws IOException {
38 | if (file == null) {
39 | return INPUT;
40 | }
41 |
42 | FileInputStream inFile = new FileInputStream(file);
43 | FileOutputStream outFle = new FileOutputStream(new File("/home/geekgao/" + fileFileName));
44 | byte[] b = new byte[8192];
45 | int bLength;
46 |
47 | while (-1 != (bLength = inFile.read(b))) {
48 | outFle.write(b,0,bLength);
49 | }
50 | inFile.close();
51 | outFle.close();
52 | return SUCCESS;
53 | }
54 |
55 | public String downFile() {
56 | return SUCCESS;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/Struts2FileUpAndDown/src/struts.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | /{1}File.jsp
13 |
14 |
15 |
16 | /index.jsp
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/Struts2FileUpAndDown/web/WEB-INF/web.xml:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 | struts2
8 | org.apache.struts2.dispatcher.ng.filter.StrutsPrepareAndExecuteFilter
9 |
10 |
11 | struts2
12 | /*
13 |
14 |
--------------------------------------------------------------------------------
/Struts2FileUpAndDown/web/downFile.jsp:
--------------------------------------------------------------------------------
1 | <%--
2 | Created by IntelliJ IDEA.
3 | User: geekgao
4 | Date: 15-7-25
5 | Time: 上午10:29
6 | To change this template use File | Settings | File Templates.
7 | --%>
8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %>
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/Struts2FileUpAndDown/web/index.jsp:
--------------------------------------------------------------------------------
1 | <%--
2 | Created by IntelliJ IDEA.
3 | User: geekgao
4 | Date: 15-7-25
5 | Time: 上午10:13
6 | To change this template use File | Settings | File Templates.
7 | --%>
8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %>
9 |
10 |
11 |
12 |
13 |
14 | 上传文件
15 | 下载文件
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Struts2FileUpAndDown/web/upFile.jsp:
--------------------------------------------------------------------------------
1 | <%@ taglib prefix="s" uri="/struts-tags" %>
2 | <%--
3 | Created by IntelliJ IDEA.
4 | User: geekgao
5 | Date: 15-7-25
6 | Time: 上午10:29
7 | To change this template use File | Settings | File Templates.
8 | --%>
9 | <%@ page contentType="text/html;charset=UTF-8" language="java" %>
10 |
11 |
12 |
13 |
14 |
15 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/com/crawl/comments/CrawlComments.java:
--------------------------------------------------------------------------------
1 | package com.crawl.comments;
2 |
3 | import org.apache.http.client.ClientProtocolException;
4 | import org.apache.http.client.config.RequestConfig;
5 | import org.apache.http.client.methods.HttpGet;
6 | import org.apache.http.conn.ConnectionPoolTimeoutException;
7 | import org.apache.http.impl.client.CloseableHttpClient;
8 | import org.apache.http.impl.client.HttpClients;
9 | import org.apache.http.util.EntityUtils;
10 | import org.dom4j.Element;
11 | import org.json.JSONArray;
12 | import org.json.JSONObject;
13 |
14 | import java.io.IOException;
15 | import java.net.SocketTimeoutException;
16 |
17 | /**
18 | * Created by geekgao on 15-10-19.
19 | */
20 | public class CrawlComments implements Runnable {
21 | private Element app;
22 | private int start;
23 | private int count;
24 | private int appId;
25 |
26 | public CrawlComments(Element app, int start, int count, int appId) {
27 | this.app = app;
28 | this.start = start;
29 | this.count = count;
30 | this.appId = appId;
31 | }
32 |
33 | private void setAppXml() throws IOException {
34 | //设置超时
35 | RequestConfig requestConfig = RequestConfig.custom().setConnectionRequestTimeout(2000).setSocketTimeout(6000).setConnectTimeout(2000).build();
36 | //建立client
37 | CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(requestConfig).build();
38 |
39 | HttpGet getContentJson = new HttpGet("http://comment.mobilem.360.cn/comment/getComments?baike=" + appId + "&level=0&start=" + start + "&count=" + count + "&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4");
40 | String contentJson = EntityUtils.toString(client.execute(getContentJson).getEntity());
41 |
42 | JSONObject jsonObject = new JSONObject(contentJson);
43 | JSONArray contentJsonArray = jsonObject.getJSONObject("data").getJSONArray("messages");
44 |
45 | for (int i = 0;i < contentJsonArray.length();i++) {
46 | JSONObject messageJsonObject = contentJsonArray.getJSONObject(i);
47 |
48 | String userid = messageJsonObject.getString("username");
49 | String time = messageJsonObject.getString("create_time");
50 | String score = String.valueOf(messageJsonObject.getInt("score"));
51 | String review = messageJsonObject.getString("content");
52 | String agreecount = messageJsonObject.getString("likes");
53 |
54 | Element comment = app.addElement("comment");
55 | comment.addElement("userid").setText(userid);
56 | comment.addElement("time").setText(time);
57 | comment.addElement("score").setText(score);
58 | comment.addElement("review").setText(review);
59 | comment.addElement("agreecount").setText(agreecount);
60 | }
61 | client.close();
62 | }
63 |
64 | public void run() {
65 | try {
66 | setAppXml();
67 | } catch (ConnectionPoolTimeoutException e) {
68 | System.err.println(appId + "号app从" + start + "开始的评论发生-ConnectionPoolTimeoutException");
69 | return;
70 | } catch (ClientProtocolException e) {
71 | e.printStackTrace();
72 | return;
73 | } catch (SocketTimeoutException e) {
74 | System.err.println(appId + "号app从" + start + "开始的评论发生-SocketTimeoutException");
75 | return;
76 | } catch (IOException e) {
77 | e.printStackTrace();
78 | return;
79 | }
80 | System.out.println(appId + "号app从" + start + "开始的评论抓取完毕");
81 | }
82 | }
--------------------------------------------------------------------------------
/com/crawl/comments/CrawlUtils.java:
--------------------------------------------------------------------------------
1 | package com.crawl.comments;
2 |
3 | import org.apache.http.client.methods.CloseableHttpResponse;
4 | import org.apache.http.client.methods.HttpGet;
5 | import org.apache.http.impl.client.CloseableHttpClient;
6 | import org.apache.http.impl.client.HttpClients;
7 | import org.apache.http.util.EntityUtils;
8 | import org.dom4j.Element;
9 | import org.dom4j.io.XMLWriter;
10 | import org.json.JSONObject;
11 |
12 | import java.io.FileWriter;
13 | import java.io.IOException;
14 | import java.io.Writer;
15 | import java.util.HashSet;
16 | import java.util.Set;
17 |
18 | /**
19 | * Created by geekgao on 15-10-25.
20 | */
21 | public class CrawlUtils {
22 | /**
23 | *
24 | * @param id appid
25 | * @return app名字
26 | */
27 | public static String getAppName(String id) throws IOException {
28 | CloseableHttpClient client = HttpClients.createDefault();
29 |
30 | HttpGet get = new HttpGet("http://zhushou.360.cn/detail/index/soft_id/" + id);
31 | CloseableHttpResponse response;
32 | try {
33 | response = client.execute(get);
34 | } catch (java.net.UnknownHostException e) {
35 | return null;
36 | }
37 | return EntityUtils.toString(response.getEntity()).split("")[1].split("<")[0];
38 | }
39 |
40 | /**
41 | *
42 | * @param xml xml文档
43 | * @param fileName 存储到这个地方
44 | */
45 | public static void writeXmlToFile(Element xml,String fileName) throws IOException {
46 | Writer fileWriter = new FileWriter(fileName);
47 | XMLWriter xmlWriter = new XMLWriter(fileWriter);
48 | xmlWriter.write(xml);
49 | xmlWriter.close();
50 | }
51 |
52 | /**
53 | * 获取需要下载的app的id
54 | * @param uri app类别页
55 | * @param limit 获取前limit个app的评论
56 | * @return
57 | */
58 | public static Set getAppIds(String uri,int limit) throws IOException {
59 | /*//因为根据网页源码每个appid会匹配到两次,所以获取limit个就必须获取2*limit次
60 | limit = limit * 2;
61 | Set appIds = null;
62 |
63 | //获取网页源码,得到appid
64 | HttpGet get = new HttpGet(uri);
65 | CloseableHttpResponse response = client.execute(get);
66 | String html = EntityUtils.toString(response.getEntity());
67 |
68 | Pattern getAppIdRegex = Pattern.compile("(?m)/detail/index/soft_id/(.*?)\"");
69 | Matcher matcher = getAppIdRegex.matcher(html);
70 |
71 | //至少有一个结果才new一个set
72 | if (matcher.find()) {
73 | appIds = new HashSet();
74 | } else {
75 | return appIds;
76 | }
77 |
78 | //控制获取的appid个数
79 | int count = 0;
80 | //把所有匹配到的appid加入到结果中
81 | do {
82 | if (count < limit) {
83 | appIds.add(matcher.group(1));
84 | count++;
85 | }
86 | } while (matcher.find());
87 |
88 | return appIds;*/
89 | Set s = new HashSet();
90 | // s.add("3581");
91 | // s.add("778702");
92 | // s.add("1586");
93 | // s.add("6276");
94 | // s.add("122437");
95 | // s.add("5632");
96 | // s.add("4107");
97 | // s.add("98008");
98 | // s.add("3100672");
99 | // s.add("2345172");
100 | // s.add("1343");
101 | // s.add("3094256");
102 | // s.add("101594");
103 | // s.add("1840672");
104 | // s.add("1643");
105 | // s.add("893686");
106 | // s.add("3032510");
107 | s.add("1936882");
108 | // s.add("7256");
109 | // s.add("727030");
110 |
111 | return s;
112 | }
113 |
114 | public static int getCommentCount(int appId) throws IOException {
115 | CloseableHttpClient client = HttpClients.createDefault();
116 |
117 | HttpGet getJson = new HttpGet("http://comment.mobilem.360.cn/comment/getComments?baike=" + appId + "&level=0&start=0&count=1&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4");
118 | CloseableHttpResponse response = client.execute(getJson);
119 | String json = EntityUtils.toString(response.getEntity());
120 | JSONObject jsonObject = new JSONObject(json);
121 |
122 | return jsonObject.getJSONObject("data").getInt("total");
123 | }
124 | }
--------------------------------------------------------------------------------
/com/crawl/comments/Main.java:
--------------------------------------------------------------------------------
1 | package com.crawl.comments;
2 |
3 | import org.apache.http.client.config.RequestConfig;
4 | import org.apache.http.client.methods.CloseableHttpResponse;
5 | import org.apache.http.client.methods.HttpGet;
6 | import org.apache.http.impl.client.CloseableHttpClient;
7 | import org.apache.http.impl.client.HttpClients;
8 | import org.apache.http.util.EntityUtils;
9 | import org.dom4j.DocumentHelper;
10 | import org.dom4j.Element;
11 | import org.json.JSONArray;
12 | import org.json.JSONObject;
13 |
14 | import java.io.IOException;
15 | import java.util.Set;
16 | import java.util.concurrent.ExecutorService;
17 | import java.util.concurrent.Executors;
18 |
19 | /**
20 | * Created by geekgao on 15-10-25.
21 | */
22 | public class Main {
23 | public static void main(String[] args) throws IOException, InterruptedException {
24 | //获取要抓取的app的id
25 | Set appIds = CrawlUtils.getAppIds("",1);
26 | //设置超时
27 | RequestConfig requestConfig = RequestConfig.custom().setConnectionRequestTimeout(2000).setSocketTimeout(2000).setConnectTimeout(2000).build();
28 | //建立client
29 | CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(requestConfig).build();
30 |
31 | for (String id:appIds) {
32 | //建立线程池
33 | ExecutorService executorService = Executors.newFixedThreadPool(30);
34 | //建立xml根节点
35 | Element app = DocumentHelper.createDocument().addElement("app");
36 |
37 | //添加appid节点
38 | String appName = CrawlUtils.getAppName(id);
39 | if (appName == null) {
40 | System.out.println(id + "号app名称评论抓取失败,所以跳过抓取评论");
41 | continue;
42 | }
43 |
44 | app.addElement("appid").setText(appName);
45 | System.out.println("开始抓取[" + appName + "],id=" + id);
46 |
47 | //获取app评分和各类型的评论数目信息
48 | HttpGet getJson = new HttpGet("http://comment.mobilem.360.cn/comment/getCommentTags?objid=" + id + "&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4");
49 | CloseableHttpResponse response = client.execute(getJson);
50 | String json = EntityUtils.toString(response.getEntity());
51 | JSONObject jsonObject = new JSONObject(json);
52 |
53 | //获取分数
54 | double overallrating = (Double.valueOf(jsonObject.getJSONObject("data").getJSONObject("score").getString("score"))) / 10;
55 | JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONArray("tag");
56 | String good = String.valueOf(jsonArray.getJSONObject(1).get("num"));
57 | String neutral = String.valueOf(jsonArray.getJSONObject(2).get("num"));
58 | String poor = String.valueOf(jsonArray.getJSONObject(3).get("num"));
59 |
60 | app.addElement("overallrating").setText(String.valueOf(overallrating));
61 | app.addElement("good").setText(String.valueOf(good));
62 | app.addElement("neutral").setText(neutral);
63 | app.addElement("poor").setText(poor);
64 |
65 | int commentsCount = CrawlUtils.getCommentCount(Integer.valueOf(id));
66 | System.out.println("[" + appName + "]总共" + commentsCount + "条评论");
67 | //每次获取的评论个数
68 | int count = 25;
69 | for (int start = 0;start < commentsCount;start += count) {
70 | //如果最后一次不够count个评论
71 | if (start + count > commentsCount) {
72 | count = commentsCount - start;
73 | }
74 |
75 | // System.out.println("从第" + start + "个评论开始抓取");
76 | executorService.submit(new CrawlComments(app, start, count, Integer.valueOf(id)));
77 | }
78 |
79 | executorService.shutdown();
80 | while (true) {
81 | if (executorService.isTerminated()) {
82 | break;
83 | }
84 | Thread.sleep(1000);
85 | }
86 |
87 | CrawlUtils.writeXmlToFile(app,"/home/geekgao/comments/" + System.currentTimeMillis() + ".xml");
88 | }
89 | client.close();
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/medical_question/GetAnswers.java:
--------------------------------------------------------------------------------
1 | import org.dom4j.DocumentHelper;
2 | import org.jsoup.Jsoup;
3 | import org.jsoup.nodes.Document;
4 | import org.jsoup.nodes.Element;
5 | import org.jsoup.select.Elements;
6 |
7 | import java.io.File;
8 | import java.io.IOException;
9 | import java.net.SocketTimeoutException;
10 | import java.util.*;
11 |
12 | public class GetAnswers {
13 |
14 | Map categoryLink;//获取八个"类别"的名字和链接
15 |
16 | public static void main(String[] args) {
17 | new GetAnswers().launch();
18 | }
19 |
20 | public void launch() {
21 | getCategoryLink();
22 | getAllAnswer();
23 | }
24 |
25 | private void getAllAnswer() {
26 | Set categoryName = categoryLink.keySet();
27 | for (String name:categoryName) {
28 | if (name.equals("呼吸内科") || name.equals("内分泌科") || name.equals("肾内科") || name.equals("消化内科") || name.equals("血液科") || name.equals("风湿科")) {
29 | continue;
30 | }
31 | String webAddress = categoryLink.get(name);//“页”的链接
32 |
33 | //链接地址不为空就表明还有下一页(到最后一页后设置为空)
34 | //循环遍历每一页
35 | int pageCount = 1;//记录抓取的页数
36 | int allPageCount = 0;//记录总共需要抓取的网页页数
37 |
38 | try {
39 | Document tempDom = Jsoup.connect(webAddress).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get();
40 | allPageCount = Integer.parseInt(tempDom.select(".pager-last").attr("href").split("=")[1].split("#")[0])/25 + 1;
41 | } catch (IOException e) {
42 | e.printStackTrace();
43 | }
44 |
45 | for (int i = 1;i <= allPageCount;i++) {
46 | List answerAddressList = new LinkedList();//先获取一页之内每一个有回答的问题的链接,存储在这个List里
47 | Document dom = null;
48 | try {
49 | dom = Jsoup.connect(webAddress + "?pn=" + (i - 1) * 25 + "#list").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get();
50 | Elements allQuestion = dom.select(".question-list").select(".question-item");
51 | for (Element question:allQuestion) {
52 | if (!question.select(".title-line").select(".question-answer-num").text().equals("0回答")) {
53 | answerAddressList.add(question.select(".title-line").select(".title-container").select(".question-title").attr("abs:href"));
54 | }
55 | }
56 | } catch (SocketTimeoutException e) {
57 | System.out.println("连接超时:" + webAddress);
58 | } catch (IOException e) {
59 | e.printStackTrace();
60 | System.out.println("意外错误:" + webAddress);
61 | }
62 | //上面这部分获取了一页上所有问题的链接
63 |
64 | //下面就进每一个问题的页面,抓取信息放到dom4j的Document中,最后用多线程写入文件
65 | Map answerDom4jDocument = new HashMap();//问题题目对应题目的document(题目是文件的文件名)
66 | for (String answerAddress:answerAddressList) {
67 | //System.out.println("正在抓[" + name + "]类别下的" + answerAddress);
68 | try {
69 | Document answerDom = Jsoup.connect(answerAddress).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get();
70 | String type = name;
71 | String title = answerDom.title();
72 | String answerNum = answerDom.select("#wgt-answers").select(".hd").select("h2").text().split("条")[0];
73 |
74 | org.dom4j.Document document = DocumentHelper.createDocument();//这个问题的dom树
75 | answerDom4jDocument.put(title,document);
76 | org.dom4j.Element root = document.addElement("root");
77 | root.addElement("question").addText(title).addAttribute("type",name);
78 | org.dom4j.Element answers = root.addElement("answers").addAttribute("number",answerNum);
79 |
80 | //答案有三种【题主选择的最佳答案,网友选择的最佳答案,其他答案】
81 |
82 | Elements allAnswer = answerDom.select(".bd.answer");
83 | //分别处理每一页的所有答案
84 | for (Element answer:allAnswer) {
85 | String text = answer.select(".answer-text").text();
86 | String username = answer.select(".line.info.f-aid").select(".user-name").text();
87 | if (username.equals("")) {
88 | username = answer.select(".line.info.f-aid").select(".mavin-name").text();
89 | }
90 | String grade = answer.select(".line.info.f-aid").text().split(" ")[answer.select(".line.info.f-aid").text().split(" ").length - 1];
91 | if (grade.equals("最快回答")) {
92 | grade = answer.select(".line.info.f-aid").text().split(" ")[answer.select(".line.info.f-aid").text().split(" ").length - 2];
93 | }
94 |
95 | //不是“*级”那就是一个专家的称号
96 | String author = "null";
97 | if (!grade.contains("级")) {
98 | author = grade;
99 | grade = answer.select(".line.info.f-aid").select(".f-orange.f-yahei.ml-5").select("span").text();
100 | }
101 |
102 | String support = answer.select(".line.content").select(".grid-r.f-aid").select(".evaluate").attr("data-evaluate");
103 | String unsupport = answer.select(".line.content").select(".grid-r.f-aid").select(".evaluate.evaluate-bad").attr("data-evaluate");
104 |
105 | org.dom4j.Element ans = answers.addElement("answer").addAttribute("username",username).addAttribute("grade",grade).addAttribute("author",author);
106 | ans.addElement("text").addText(text);
107 | ans.addElement("support").addText(support);
108 | ans.addElement("unsupport").addText(unsupport);
109 |
110 | if (answer.hasClass("wgt-replyer-best")) {
111 | ans.addElement("best_answer").addText("yes");
112 | } else {
113 | ans.addElement("best_answer").addText("no");
114 | }
115 | }
116 |
117 |
118 | } catch (SocketTimeoutException e) {
119 | System.out.println("连接超时:" + answerAddress);
120 | } catch (IOException e) {
121 | e.printStackTrace();
122 | System.out.println("意外错误:" + answerAddress);
123 | }
124 | }
125 |
126 | Set answerTitle = answerDom4jDocument.keySet();
127 | //多线程写入文件
128 | for (String title:answerTitle) {
129 | //分类目录不存在时创建文件夹
130 | if (!new File("/home/geekgao/medical_question/" + name).exists()) {
131 | new File("/home/geekgao/medical_question/" + name).mkdir();
132 | }
133 |
134 | new WriteAnswersToFile("/home/geekgao/medical_question/" + name + "/" + System.currentTimeMillis() + ".xml",answerDom4jDocument.get(title)).start();
135 | }
136 |
137 | System.out.println("[" + name + "]类别第" + pageCount++ +"页已写入文件.");
138 |
139 | }
140 | }
141 | }
142 |
143 | public GetAnswers() {
144 | categoryLink = new HashMap();
145 | }
146 |
147 | private void getCategoryLink() {
148 | try {
149 | Document dom = Jsoup.connect("http://zhidao.baidu.com/browse/790").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get();
150 | Element categorys = dom.select(".category-list").first();
151 | Elements allLi = categorys.select("li");
152 |
153 | for (Element li:allLi) {
154 | categoryLink.put(li.text(), "http://zhidao.baidu.com" + li.select("a").attr("href"));
155 | }
156 |
157 | } catch (SocketTimeoutException e) {
158 | System.out.println("连接超时:http://zhidao.baidu.com/browse/790");
159 | } catch (IOException e) {
160 | e.printStackTrace();
161 | System.out.println("连接超时:http://zhidao.baidu.com/browse/790");
162 | }
163 | }
164 | }
165 |
--------------------------------------------------------------------------------
/medical_question/WriteAnswersToFile.java:
--------------------------------------------------------------------------------
1 | import org.dom4j.Document;
2 | import org.dom4j.io.OutputFormat;
3 | import org.dom4j.io.XMLWriter;
4 |
5 | import java.io.File;
6 | import java.io.FileWriter;
7 | import java.io.IOException;
8 |
9 |
10 | public class WriteAnswersToFile extends Thread {
11 |
12 | private String address;//输出文件到哪个地址
13 | private Document dom;//将这个dom放入文件里存储
14 |
15 | /**
16 | *
17 | * @param address 文件将存储到这个地址
18 | * @param dom asd 即将存储到硬盘的xml文件
19 | */
20 | public WriteAnswersToFile(String address,Document dom) {
21 | this.address = address;
22 | this.dom = dom;
23 | }
24 |
25 | public void run() {
26 | OutputFormat outFormat = OutputFormat.createPrettyPrint();
27 | outFormat.setEncoding("UTF-8");
28 |
29 | try {
30 | XMLWriter xml = new XMLWriter(new FileWriter(new File(address)),outFormat);
31 | xml.write(dom);
32 | xml.close();
33 | } catch (IOException e) {
34 | e.printStackTrace();
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/文本情感分析/CalcWeightAndDoc.java:
--------------------------------------------------------------------------------
1 | package org.geekgao.one;
2 |
3 | import org.dom4j.Document;
4 | import org.dom4j.DocumentException;
5 | import org.dom4j.DocumentHelper;
6 | import org.dom4j.Element;
7 | import org.dom4j.io.OutputFormat;
8 | import org.dom4j.io.SAXReader;
9 | import org.dom4j.io.XMLWriter;
10 |
11 | import java.io.*;
12 | import java.util.*;
13 |
14 | public class CalcWeightAndDoc {
15 | //这三个常量是训练文章的存储的地方
16 | private final String positiveArticlePath = "/home/geekgao/practice/positive";
17 | private final String negativeArticlePath = "/home/geekgao/practice/negative";
18 | private final String unsureArticlePath = "/home/geekgao/practice/unsure";
19 |
20 | //这两个是词典的位置
21 | private final String posiDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-positive.txt";
22 | private final String negaDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-negative.txt";
23 |
24 | private Map positiveWord;//存储积极词汇的map
25 | private Map negativeWord;//存储消极词汇的map
26 | private Map unsureWord;//存储不确定词汇的map
27 |
28 | //这两个存储词典中的词语
29 | private Set positiveDict;
30 | private Set negativeDict;
31 |
32 | //需要的全局变量
33 | private boolean isGroup = false;
34 | String strTemp;//从xml文件解析词语时用到的临时变量
35 |
36 | public static void main(String[] args) {
37 | new CalcWeightAndDoc().launch();
38 | }
39 |
40 | public void launch() {
41 | positiveDict = new HashSet();
42 | negativeDict = new HashSet();
43 |
44 | readEmotionWord(positiveDict,posiDictPath);
45 | readEmotionWord(negativeDict,negaDictPath);
46 |
47 | //这里两个地址是目标地址,生成的文件就在下面两个地址里
48 | calcDoc("/home/geekgao/doc.xml");
49 | calcWeight("/home/geekgao/weight.xml");
50 |
51 | System.out.println("执行完毕!");
52 | }
53 |
54 | public void readEmotionWord(Set Dict, String dictPath) {
55 | File file = new File(dictPath);
56 | BufferedReader reader = null;
57 | try {
58 | String t;
59 | reader = new BufferedReader(new FileReader(file));
60 | while ((t = reader.readLine()) != null) {
61 | Dict.add(t);
62 | }
63 | } catch (IOException e) {
64 | e.printStackTrace();
65 | } finally {
66 | if (reader != null) {
67 | try {
68 | reader.close();
69 | } catch (IOException e) {
70 |
71 | }
72 | }
73 | }
74 | }
75 |
76 | //参数是生成的xml文件的路径与名字
77 | public void calcDoc(String resultPath) {
78 | File negative[] = new File(negativeArticlePath).listFiles();
79 | File positive[] = new File(positiveArticlePath).listFiles();
80 | File unsure[] = new File(unsureArticlePath).listFiles();
81 | double negCount = 0;
82 | double posCount = 0;
83 | double unsCount = 0;
84 |
85 | try {
86 | for (File file : negative) {
87 | if (file.isFile()) {
88 | negCount++;
89 | }
90 | }
91 |
92 | for (File file : positive) {
93 | if (file.isFile()) {
94 | posCount++;
95 | }
96 | }
97 |
98 | for (File file : unsure) {
99 | if (file.isFile()) {
100 | unsCount++;
101 | }
102 | }
103 | } catch(NullPointerException e){
104 | System.out.println("程序因为空引用结束!");
105 | System.exit(1);
106 | }
107 |
108 | //建立document对象
109 | try {
110 | Document document = DocumentHelper.createDocument();
111 |
112 | Element root = document.addElement("root");//添加文档根
113 | Element request = root.addElement("prior"); //添加root的子节点
114 | request.addAttribute("pNegative", String.valueOf(negCount/(negCount + posCount + unsCount)));
115 | request.addAttribute("pPositive", String.valueOf(posCount/(negCount + posCount + unsCount)));
116 | request.addAttribute("pUnsure", String.valueOf(unsCount/(negCount + posCount + unsCount)));
117 |
118 | OutputFormat format = OutputFormat.createPrettyPrint();
119 | format.setEncoding("UTF-8");//根据需要设置编码
120 | // 输出全部原始数据,并用它生成新的我们需要的XML文件
121 | XMLWriter writer2 = new XMLWriter(new FileWriter(new File(resultPath)), format);
122 | writer2.write(document); //输出到文件
123 | writer2.close();
124 | } catch (UnsupportedEncodingException e) {
125 | e.printStackTrace();
126 | } catch (IOException e) {
127 | e.printStackTrace();
128 | }
129 | }
130 |
131 | //参数是生成的xml文件的路径与名字
132 | public void calcWeight(String resultPath) {
133 | positiveWord = new HashMap();
134 | negativeWord = new HashMap();
135 | unsureWord = new HashMap();
136 |
137 | //计算各自类别所有文章中每个词汇出现的次数
138 | getWordMap(positiveWord,positiveArticlePath);
139 | getWordMap(negativeWord,negativeArticlePath);
140 | getWordMap(unsureWord,unsureArticlePath);
141 |
142 | //存储计算后验概率公式中的分母的第一部分,第二部分等于1
143 | double allPosWeight = 0;
144 | double allNegWeight = 0;
145 | double allUnsWeight = 0;
146 |
147 | //保留各个Map的情感词汇
148 | keepEmotionWord(positiveWord);
149 | keepEmotionWord(negativeWord);
150 | keepEmotionWord(unsureWord);
151 |
152 | /*System.out.println(positiveWord);
153 | System.out.println(negativeWord);
154 | System.out.println(unsureWord);*/
155 |
156 | /*
157 | (1)遍历positiveWord这个Map,得到里面的各个词语在积极词汇中的次数,再在其他两个Map中查看是否有这个词语,有,就把其他的那个
158 | 次数加到当前Map的当前词语的value上,并且删除那个Map中的当前词语;没有这个词的话,那么在那个;类别中出现的次数就是0.
159 | (2)遍历negativeWord,不用看positiveWord了,只需看unsureWord,处理方法同上。
160 | (3)遍历unsureWord,这些词在其他两个类别中都是0,直接得到在当前类别中的值
161 | */
162 |
163 | try {
164 | Document xmlFile = DocumentHelper.createDocument();//建立一个xml文档
165 | Element root = xmlFile.addElement("root");
166 |
167 | Set word = positiveWord.keySet();
168 | for (Iterator it = word.iterator();it.hasNext();) {
169 | String tmp = (String)it.next();
170 | Integer count = positiveWord.get(tmp);
171 | allPosWeight += count;
172 | }
173 |
174 | word = negativeWord.keySet();
175 | for (Iterator it = word.iterator();it.hasNext();) {
176 | String tmp = (String)it.next();
177 | Integer count = negativeWord.get(tmp);
178 | allNegWeight += count;
179 | }
180 |
181 | word = unsureWord.keySet();
182 | for (Iterator it = word.iterator();it.hasNext();) {
183 | String tmp = (String)it.next();
184 | Integer count = unsureWord.get(tmp);
185 | allUnsWeight += count;
186 | }
187 |
188 | word = positiveWord.keySet();
189 | for (Iterator it = word.iterator(); it.hasNext(); ) {
190 | Element wd = root.addElement("word");//建立新的词语节点
191 | String tmp = (String) it.next();
192 | wd.addAttribute("data",tmp);
193 | Integer count;
194 |
195 | count = positiveWord.get(tmp);
196 | wd.addAttribute("pPositive",String.valueOf(count / (allPosWeight + 1)));
197 |
198 | if (negativeWord.containsKey(tmp)) {
199 | count = negativeWord.get(tmp);
200 | negativeWord.remove(tmp);
201 | wd.addAttribute("pNegative",String .valueOf(count / (allNegWeight + 1)));
202 | } else {
203 | wd.addAttribute("pNegative","0");
204 | }
205 |
206 | if (unsureWord.containsKey(tmp)) {
207 | count = unsureWord.get(tmp);
208 | unsureWord.remove(tmp);
209 | wd.addAttribute("pUnsure",String.valueOf(count / (allUnsWeight + 1)));
210 | } else {
211 | wd.addAttribute("pUnsure","0");
212 | }
213 | }
214 |
215 | word = negativeWord.keySet();
216 | for (Iterator it = word.iterator(); it.hasNext(); ) {
217 | Element wd = root.addElement("word");//建立新的词语节点
218 | String tmp = (String) it.next();
219 | wd.addAttribute("data",tmp);
220 | Integer count;
221 |
222 | wd.addAttribute("pPositive","0");
223 | count = negativeWord.get(tmp);
224 | wd.addAttribute("pNegative",String .valueOf(count / (allNegWeight + 1)));
225 |
226 | if (unsureWord.containsKey(tmp)) {
227 | count = unsureWord.get(tmp);
228 | unsureWord.remove(tmp);
229 | wd.addAttribute("pUnsure",String.valueOf(count / (allUnsWeight + 1)));
230 | } else {
231 | wd.addAttribute("pUnsure","0");
232 | }
233 | }
234 |
235 | word = unsureWord.keySet();
236 | for (Iterator it = word.iterator(); it.hasNext(); ) {
237 | Element wd = root.addElement("word");//建立新的词语节点
238 | String tmp = (String) it.next();
239 | wd.addAttribute("data",tmp);
240 | Integer count;
241 |
242 | wd.addAttribute("pPositive","0");
243 | wd.addAttribute("pNegative","0");
244 | count = unsureWord.get(tmp);
245 | wd.addAttribute("pUnsure",String.valueOf(count / (allUnsWeight + 1)));
246 | }
247 |
248 | //输出全部原始数据,在编译器中显示
249 | OutputFormat format = OutputFormat.createPrettyPrint();
250 | format.setEncoding("UTF-8");//根据需要设置编码
251 | // 输出全部原始数据,并用它生成新的我们需要的XML文件
252 | XMLWriter writer2 = new XMLWriter(new FileWriter(new File(resultPath)), format);
253 | writer2.write(xmlFile); //输出到文件
254 | writer2.close();
255 | } catch (UnsupportedEncodingException e) {
256 | e.printStackTrace();
257 | } catch (IOException e) {
258 | e.printStackTrace();
259 | }
260 | }
261 |
262 | public void getWordMap(Map wordMap,String articlePath) {
263 | File articleArray[] = new File(articlePath).listFiles();//将文件夹中的文件都读取进来,下面就一个个的分析
264 |
265 | for (int i = 0;i < articleArray.length;i++) {
266 | calcFreauency(wordMap,articleArray[i]);
267 | }
268 | }
269 |
270 | //解析出文章中的词语,并且映射上频数
271 | public void calcFreauency(Map wordMap,File article) {
272 | try {
273 | //取得dom4j的解析器
274 | SAXReader reader = new SAXReader();
275 | //取得代表文档的Document对象
276 | Document document = reader.read(article);
277 | //取得根结点
278 | Element root = document.getRootElement();//取得根节点
279 |
280 | List> list1 = root.elements();//取得的子节点
281 | List> sentence_list = ((Element)list1.get(0)).elements();//下的集合
282 |
283 | List> tok_list;//下的集合
284 | //Dom4jDemo t = new Dom4jDemo();
285 | //遍历节点
286 | for (int i = 0; i < sentence_list.size(); i++) {
287 | tok_list = ((Element)sentence_list.get(i)).elements();//获得每个sentence的tok集合
288 | for (int j = 0;j < tok_list.size();j++) {
289 | setWordMap((Element)tok_list.get(j),wordMap);
290 | }
291 | }
292 | } catch (DocumentException e) {
293 | e.printStackTrace();
294 | }
295 | }
296 |
297 | public void setWordMap(Element tok,Map wordMap) {
298 | String type,text;
299 | List> list;
300 |
301 | if (!(tok.getName().equals("tok"))) {//如果不是tok节点,那么就不用处理了
302 | return ;
303 | }
304 | //获取属性type
305 | type = tok.attributeValue("type");
306 | //只访问原子节点
307 | if (type.equals("atom") && isGroup) {
308 | text = tok.getText();
309 | text = text.replace("\t", "");
310 | text = text.replace("\n", "");
311 | /*System.out.print(text + " ");*/
312 | strTemp = strTemp + text;
313 | } else if (type.equals("group")) {
314 | isGroup = true;
315 | strTemp = "";
316 | list = tok.elements();
317 | for (int k = 0,size3 = list.size();k < size3;k++) {
318 | tok = (Element)list.get(k);
319 | setWordMap(tok,wordMap);
320 | }
321 | Integer count = wordMap.get(strTemp);//计算当前map里面的当前text对应的次数
322 | wordMap.put(strTemp,count == null?1:count + 1);
323 | isGroup = false;
324 | }
325 | }
326 |
327 | public void keepEmotionWord(Map wordMap) {
328 | Set word = wordMap.keySet();
329 |
330 | for (Iterator it = word.iterator();it.hasNext();) {
331 | String tmp = (String)it.next();
332 | //两个情感词典都不包含这个词语,那么就把这个词语去掉
333 | if (!positiveDict.contains(tmp) && !negativeDict.contains(tmp)) {
334 | it.remove();
335 | }
336 | }
337 | }
338 | }
339 |
--------------------------------------------------------------------------------
/文本情感分析/EmotionJudge.java:
--------------------------------------------------------------------------------
1 | package org.geekgao.one;
2 |
3 | import org.dom4j.Document;
4 | import org.dom4j.DocumentException;
5 | import org.dom4j.Element;
6 | import org.dom4j.io.SAXReader;
7 |
8 | import java.io.BufferedReader;
9 | import java.io.File;
10 | import java.io.FileReader;
11 | import java.io.IOException;
12 | import java.util.*;
13 |
14 | public class EmotionJudge {
15 | private double priorPositive;//积极先验概率
16 | private double priorNegative;//消极先验概率
17 | private double priorUnsure;//不确定先验概率
18 |
19 | private Map backPositive;//词语的后验概率
20 | private Map backNegative;//同上
21 | private Map backUnsure;//同上
22 |
23 | private boolean isGroup = false;
24 | private String strTemp;
25 | private Map articleWordMap;
26 |
27 | //这两个是词典的位置
28 | private final String posiDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-positive.txt";
29 | private final String negaDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-negative.txt";
30 |
31 | //这两个存储词典中的词语
32 | private Set positiveDict;
33 | private Set negativeDict;
34 |
35 | public static void main(String [] args) {
36 | new EmotionJudge().launch();
37 | }
38 |
39 | public void launch() {
40 | getPrior();
41 | getBack();
42 |
43 | positiveDict = new HashSet();
44 | negativeDict = new HashSet();
45 | readEmotionWord(positiveDict, posiDictPath);
46 | readEmotionWord(negativeDict, negaDictPath);
47 | calc();
48 | }
49 |
50 | //获得先验概率
51 | public void getPrior() {
52 | SAXReader sax = new SAXReader();
53 | try {
54 | //从这读取doc的值
55 | Document document = sax.read(new File("/home/geekgao/doc.xml"));
56 | Element root = document.getRootElement();
57 | List prior = root.elements();
58 |
59 | priorPositive = Double.valueOf(prior.get(0).attributeValue("pPositive"));
60 | priorNegative = Double.valueOf(prior.get(0).attributeValue("pNegative"));
61 | priorUnsure = Double.valueOf(prior.get(0).attributeValue("pUnsure"));
62 |
63 | } catch (DocumentException e) {
64 | e.printStackTrace();
65 | }
66 | }
67 |
68 | //获得后验概率
69 | public void getBack() {
70 | SAXReader sax = new SAXReader();
71 | try {
72 | //从这读取weight的值
73 | Document document = sax.read(new File("/home/geekgao/weight.xml"));
74 | Element root = document.getRootElement();
75 | List back = root.elements();
76 |
77 | backNegative = new HashMap();
78 | backPositive = new HashMap();
79 | backUnsure = new HashMap();
80 |
81 | double backPos;//积极后验概率
82 | double backNeg;//消极后验概率
83 | double backUns;//不确定后验概率
84 | String word;
85 |
86 | for (int i = 0;i < back.size();i++) {
87 | backPos = Double.valueOf(back.get(i).attributeValue("pPositive"));
88 | backNeg = Double.valueOf(back.get(i).attributeValue("pNegative"));
89 | backUns = Double.valueOf(back.get(i).attributeValue("pUnsure"));
90 | word = back.get(i).attributeValue("data");
91 |
92 | backPositive.put(word,backPos);
93 | backNegative.put(word,backNeg);
94 | backUnsure.put(word,backUns);
95 | }
96 | } catch (DocumentException e) {
97 | e.printStackTrace();
98 | }
99 | }
100 |
101 | public void calc() {
102 | articleWordMap = new HashMap();
103 |
104 | //读取文章
105 | calcFreauency(articleWordMap,new File("/home/geekgao/朴素贝叶斯/500trainblogxml/positiveout/1377331000713.txt"));
106 | keepEmotionWord(articleWordMap);
107 |
108 | double allBackPos = 1;
109 | double allBackNeg = 1;
110 | double allBackUns = 1;
111 |
112 | Set word = articleWordMap.keySet();
113 |
114 | for (Iterator it = word.iterator();it.hasNext();) {
115 | String tmp = (String)it.next();
116 | double back;
117 | if (backPositive.containsKey(tmp)) {
118 | back = backPositive.get(tmp);
119 | allBackPos = Math.pow(back,articleWordMap.get(tmp)) * allBackPos;
120 | }
121 |
122 | if (backNegative.containsKey(tmp)) {
123 | back = backNegative.get(tmp);
124 | allBackNeg= Math.pow(back,articleWordMap.get(tmp)) * allBackNeg;
125 | }
126 |
127 | if (backUnsure.containsKey(tmp)) {
128 | back = backUnsure.get(tmp);
129 | allBackUns = Math.pow(back,articleWordMap.get(tmp)) * allBackUns;
130 | }
131 | }
132 |
133 | double resultPositive;
134 | double resultNegative;
135 | double resultUnsure;
136 |
137 | resultPositive = priorPositive * allBackPos;
138 | resultNegative = priorNegative * allBackNeg;
139 | resultUnsure = priorUnsure * allBackUns;
140 |
141 | System.out.println("积极:" + resultPositive);
142 | System.out.println("消极:" + resultNegative);
143 | System.out.println("不确定:" + resultUnsure);
144 | }
145 |
146 | //解析出文章中的词语,并且映射上频数
147 | public void calcFreauency(Map wordMap,File article) {
148 | try {
149 | //取得dom4j的解析器
150 | SAXReader reader = new SAXReader();
151 | //取得代表文档的Document对象
152 | Document document = reader.read(article);
153 | //取得根结点
154 | Element root = document.getRootElement();//取得根节点
155 |
156 | List> list1 = root.elements();//取得的子节点
157 | List> sentence_list = ((Element)list1.get(0)).elements();//下的集合
158 |
159 | List> tok_list;//下的集合
160 | //遍历节点
161 | for (int i = 0; i < sentence_list.size(); i++) {
162 | tok_list = ((Element)sentence_list.get(i)).elements();//获得每个sentence的tok集合
163 | for (int j = 0;j < tok_list.size();j++) {
164 | setWordMap((Element)tok_list.get(j),wordMap);
165 | }
166 | }
167 | } catch (DocumentException e) {
168 | e.printStackTrace();
169 | }
170 | }
171 |
172 | public void setWordMap(Element tok,Map wordMap) {
173 | String type,text;
174 | List> list;
175 |
176 | if (!(tok.getName().equals("tok"))) {//如果不是tok节点,那么就不用处理了
177 | return ;
178 | }
179 | //获取属性type
180 | type = tok.attributeValue("type");
181 | //只访问原子节点
182 | if (type.equals("atom") && isGroup) {
183 | text = tok.getText();
184 | text = text.replace("\t", "");
185 | text = text.replace("\n", "");
186 | /*System.out.print(text + " ");*/
187 | strTemp = strTemp + text;
188 | } else if (type.equals("group")) {
189 | isGroup = true;
190 | strTemp = "";
191 | list = tok.elements();
192 | for (int k = 0,size3 = list.size();k < size3;k++) {
193 | tok = (Element)list.get(k);
194 | setWordMap(tok,wordMap);
195 | }
196 | Integer count = wordMap.get(strTemp);//计算当前map里面的当前text对应的次数
197 | wordMap.put(strTemp,count == null?1:count + 1);
198 | isGroup = false;
199 | }
200 | }
201 |
202 | public void keepEmotionWord(Map wordMap) {
203 | Set word = wordMap.keySet();
204 |
205 | for (Iterator it = word.iterator();it.hasNext();) {
206 | String tmp = (String)it.next();
207 | //两个情感词典都不包含这个词语,那么就把这个词语去掉
208 | if (!positiveDict.contains(tmp) && !negativeDict.contains(tmp)) {
209 | it.remove();
210 | }
211 | }
212 | }
213 |
214 | public void readEmotionWord(Set Dict, String dictPath) {
215 | File file = new File(dictPath);
216 | BufferedReader reader = null;
217 | try {
218 | String t;
219 | reader = new BufferedReader(new FileReader(file));
220 | while ((t = reader.readLine()) != null) {
221 | Dict.add(t);
222 | }
223 | } catch (IOException e) {
224 | e.printStackTrace();
225 | } finally {
226 | if (reader != null) {
227 | try {
228 | reader.close();
229 | } catch (IOException e) {
230 |
231 | }
232 | }
233 | }
234 | }
235 | }
236 |
--------------------------------------------------------------------------------
/西邮导游系统源码/数据/data.txt:
--------------------------------------------------------------------------------
1 | 1 114 486 2 45
2 | 2 114 441 1 45 3 39 4 71
3 | 3 153 441 2 39
4 | 4 114 370 2 71 5 38
5 | 5 76 370 4 38 6 56
6 | 6 76 314 5 56 7 13 9 73
7 | 7 63 314 6 13
8 | 8 63 241 9 13
9 | 9 76 241 6 73 8 13 10 26 68 13
10 | 10 76 215 9 26 11 70
11 | 11 146 215 10 70 12 36
12 | 12 172 241 11 36 13 43
13 | 13 211 222 64 58 34 32 12 43
14 | 14 211 109 64 55 15 55
15 | 15 266 109 16 45 19 56 14 55
16 | 16 311 109 17 61 20 56 15 45
17 | 17 372 109 16 61 21 56 26 44
18 | 18 224 53 19 42
19 | 19 266 53 18 42 20 45 15 56
20 | 20 311 53 16 56 19 45 21 61
21 | 21 372 53 17 56 20 61 22 37
22 | 22 409 53 21 37 23 36
23 | 23 445 53 22 36 24 32
24 | 24 445 85 23 32 25 91 27 24
25 | 25 536 85 24 91
26 | 26 416 109 17 44 27 29 28 19
27 | 27 445 109 24 24 26 29 29 19
28 | 28 416 128 33 47 26 19 29 29
29 | 29 445 128 27 19 28 29 30 55
30 | 30 500 128 29 55 31 47
31 | 31 500 175 32 28 38 47 30 47
32 | 32 528 175 31 28
33 | 33 416 175 67 50 36 47 28 47
34 | 34 243 222 35 65 40 106 13 32
35 | 35 308 222 34 65 66 32 36 108 41 93
36 | 36 416 222 33 47 35 108 37 67 43 93
37 | 37 483 222 36 67 38 17 44 62
38 | 38 500 222 37 17 39 25 31 47
39 | 39 525 222 38 25
40 | 40 205 321 34 106 53 128
41 | 41 308 315 48 46 35 93 42 49
42 | 42 357 315 49 27 41 49 43 59
43 | 43 416 315 36 93 42 59 45 26
44 | 44 458 279 37 62 45 39
45 | 45 442 315 43 26 44 39 46 30
46 | 46 428 342 49 71 45 30 47 21
47 | 47 419 361 50 62 52 27 46 21
48 | 48 308 361 50 49 41 46
49 | 49 357 342 50 19 42 27 46 71
50 | 50 357 361 48 49 49 19 51 25 47 62
51 | 51 357 386 50 25 52 51 54 44
52 | 52 408 386 51 51 55 48 56 93 47 27
53 | 53 267 433 40 128 57 67
54 | 54 357 430 51 44 55 31 57 34
55 | 55 388 430 52 48 54 31 58 36
56 | 56 492 428 52 93 63 71
57 | 57 331 453 53 67 54 34 59 38
58 | 58 402 464 55 36 59 46 62 69
59 | 59 358 480 57 38 58 46 60 27
60 | 60 358 507 59 27 61 67
61 | 61 291 507 60 67
62 | 62 447 517 58 69 63 80
63 | 63 523 492 56 71 62 80
64 | 64 211 164 65 34 13 58 14 55
65 | 65 245 164 64 34
66 | 66 308 190 35 32
67 | 67 366 175 33 50
68 | 68 89 241 9 13
--------------------------------------------------------------------------------
/西邮导游系统源码/数据/view.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/数据/view.txt
--------------------------------------------------------------------------------
/西邮导游系统源码/源码/images/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/images/icon.png
--------------------------------------------------------------------------------
/西邮导游系统源码/源码/images/map.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/images/map.jpg
--------------------------------------------------------------------------------
/西邮导游系统源码/源码/org/geekgao/guide/GuideAlgorithm.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/GuideAlgorithm.java
--------------------------------------------------------------------------------
/西邮导游系统源码/源码/org/geekgao/guide/GuideSystem.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/GuideSystem.java
--------------------------------------------------------------------------------
/西邮导游系统源码/源码/org/geekgao/guide/GuideUtil.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/GuideUtil.java
--------------------------------------------------------------------------------
/西邮导游系统源码/源码/org/geekgao/guide/Vertex.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/Vertex.java
--------------------------------------------------------------------------------