├── .gitignore ├── .idea ├── Java.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── CrawlDouyuDanmu ├── .gitignore ├── .idea │ ├── .name │ ├── compiler.xml │ ├── copyright │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── uiDesigner.xml │ ├── vcs.xml │ └── workspace.xml ├── CrawlDouyuDanmu.iml ├── pom.xml ├── readme.md └── src │ └── main │ └── java │ ├── DouyuBarrageHandler.java │ ├── MessageType.java │ └── Start.java ├── CrawlPandaDanmu ├── .gitignore ├── .idea │ ├── .name │ ├── compiler.xml │ ├── copyright │ │ └── profiles_settings.xml │ ├── encodings.xml │ ├── libraries │ │ ├── Maven__com_alibaba_fastjson_1_2_44.xml │ │ ├── Maven__log4j_log4j_1_2_17.xml │ │ ├── Maven__org_jsoup_jsoup_1_8_3.xml │ │ ├── Maven__org_slf4j_slf4j_api_1_8_0_beta0.xml │ │ └── Maven__org_slf4j_slf4j_log4j12_1_8_0_beta0.xml │ ├── misc.xml │ ├── modules.xml │ ├── uiDesigner.xml │ ├── vcs.xml │ └── workspace.xml ├── CrawlPandaDanmu.iml ├── pom.xml ├── readme.md └── src │ └── main │ ├── java │ ├── Crawl.java │ ├── MessageHandler.java │ ├── Start.java │ └── Utils.java │ └── resources │ ├── config.properties │ └── log4j.properties ├── DownloadImg.java ├── InsulinPump ├── InsulinPump.java ├── People.java ├── PumpMain.java └── PumpWindow.java ├── Main.java ├── MicroWave ├── MicroWave.java ├── MicroWaveMain.java └── MicroWaveWindow.java ├── PullBookinfo ├── GetBookInfoThread.java ├── Main.java ├── WriteBookInfoToFile.java └── WriteInfoToDB.java ├── README.md ├── Struts2FileUpAndDown ├── src │ ├── com │ │ └── geekgao │ │ │ └── file │ │ │ └── FileAction.java │ └── struts.xml └── web │ ├── WEB-INF │ └── web.xml │ ├── downFile.jsp │ ├── index.jsp │ └── upFile.jsp ├── com └── crawl │ └── comments │ ├── CrawlComments.java │ ├── CrawlUtils.java │ └── Main.java ├── medical_question ├── GetAnswers.java └── WriteAnswersToFile.java ├── 文本情感分析 ├── CalcWeightAndDoc.java └── EmotionJudge.java └── 西邮导游系统源码 ├── 数据 ├── data.txt └── view.txt └── 源码 ├── images ├── icon.png └── map.jpg └── org └── geekgao └── guide ├── GuideAlgorithm.java ├── GuideSystem.java ├── GuideUtil.java └── Vertex.java /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.log 3 | -------------------------------------------------------------------------------- /.idea/Java.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 28 | 29 | 34 | 35 | 36 | 37 | 38 | true 39 | DEFINITION_ORDER 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 48 | 49 | 50 | 51 | 继续读取真正的消息内容 52 | 53 | 54 | 55 | 57 | 58 | 77 | 78 | 79 | 80 | 81 | true 82 | DEFINITION_ORDER 83 | 84 | 85 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 187 | 188 | 189 | 190 | 192 | 193 | 197 | 198 | 200 | 201 | 222 | 234 | 235 | 244 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 270 | 271 | 272 | 273 | 289 | 294 | 295 | 296 | 314 | 315 | 316 | 335 | 336 | 337 | 354 | 355 | 356 | 363 | 366 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | project 406 | 407 | 408 | 409 | 410 | true 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 1453816946069 425 | 439 | 440 | 441 | 442 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 507 | 508 | 511 | 514 | 515 | 516 | 518 | 519 | 520 | 522 | 523 | 524 | 527 | 528 | 529 | 530 | byteArray.toByteArray() 531 | JAVA 532 | CODE_FRAGMENT 533 | 534 | 535 | content.length() 536 | JAVA 537 | CODE_FRAGMENT 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 885 | 886 | 887 | 888 | -------------------------------------------------------------------------------- /CrawlDouyuDanmu/CrawlDouyuDanmu.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /CrawlDouyuDanmu/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.geekgao 8 | DouyuDanmu 9 | 1.0-SNAPSHOT 10 | 11 | -------------------------------------------------------------------------------- /CrawlDouyuDanmu/readme.md: -------------------------------------------------------------------------------- 1 | 使用[斗鱼开放协议](http://dev-bbs.douyutv.com/forum.php?mod=viewthread&tid=399&extra=page%3D1)完成的DouyuTV弹幕抓取工具。 2 | 3 | [www.douyu.com](https://www.douyu.com) barrage crawler, run with Start.java. -------------------------------------------------------------------------------- /CrawlDouyuDanmu/src/main/java/DouyuBarrageHandler.java: -------------------------------------------------------------------------------- 1 | import java.io.ByteArrayOutputStream; 2 | import java.io.IOException; 3 | import java.net.Socket; 4 | import java.nio.ByteBuffer; 5 | import java.nio.ByteOrder; 6 | 7 | public class DouyuBarrageHandler { 8 | private static String host = "openbarrage.douyutv.com"; 9 | 10 | private static int port = 8601; 11 | 12 | private Socket serverSocket; 13 | 14 | private String roomId; 15 | 16 | public DouyuBarrageHandler(String roomId) { 17 | this.roomId = roomId; 18 | 19 | try { 20 | connect(); 21 | login(); 22 | } catch (IOException e) { 23 | e.printStackTrace(); 24 | } 25 | } 26 | 27 | private void connect() throws IOException { 28 | serverSocket = new Socket(host, port); 29 | 30 | new Thread(new Runnable() { 31 | public void run() { 32 | while (true) { 33 | try { 34 | send("type@=mrkl"); 35 | Thread.sleep(30000); 36 | } catch (IOException e) { 37 | e.printStackTrace(); 38 | } catch (InterruptedException e) { 39 | e.printStackTrace(); 40 | } 41 | } 42 | } 43 | }).start(); 44 | } 45 | 46 | private void login() throws IOException { 47 | send("type@=loginreq/roomid@=" + roomId); 48 | send("type@=joingroup/rid@=" + roomId + "/gid@=-9999"); 49 | } 50 | 51 | public String read() throws IOException { 52 | int msgSize = ByteBuffer.wrap(getBytes(4)).order(ByteOrder.LITTLE_ENDIAN).getInt(); 53 | byte[] msgBytes =getBytes(msgSize); 54 | 55 | return new String(msgBytes, 8, msgSize - 9); 56 | } 57 | 58 | public void send(String msg) throws IOException { 59 | serverSocket.getOutputStream().write(getSendBytes(msg)); 60 | serverSocket.getOutputStream().flush(); 61 | } 62 | 63 | private byte[] getBytes(int byteCount) throws IOException { 64 | byte[] result = new byte[byteCount]; 65 | int alreadyReadSize = 0; 66 | 67 | while (alreadyReadSize != byteCount) { 68 | alreadyReadSize += serverSocket.getInputStream().read(result, alreadyReadSize, byteCount - alreadyReadSize); 69 | } 70 | 71 | return result; 72 | } 73 | 74 | private byte[] getSendBytes(String msg) throws IOException { 75 | ByteArrayOutputStream outBytes = new ByteArrayOutputStream(getPacketSize(msg)); 76 | outBytes.write(intToggle(getPacketSize(msg))); 77 | outBytes.write(intToggle(getPacketSize(msg))); 78 | outBytes.write(shortToggle(MessageType.SEND.getCode())); 79 | outBytes.write(0); 80 | outBytes.write(0); 81 | outBytes.write(msg.getBytes()); 82 | outBytes.write(0); 83 | 84 | return outBytes.toByteArray(); 85 | } 86 | 87 | private int getPacketSize(String msg) { 88 | return 9 + msg.length(); 89 | } 90 | 91 | private byte[] intToggle(int value) { 92 | byte[] result = new byte[4]; 93 | result[3] = (byte) ((value >> 24) & 0xFF); 94 | result[2] = (byte) ((value >> 16) & 0xFF); 95 | result[1] = (byte) ((value >> 8) & 0xFF); 96 | result[0] = (byte) (value & 0xFF); 97 | 98 | return result; 99 | } 100 | 101 | private byte[] shortToggle(short value) { 102 | byte[] result = new byte[2]; 103 | result[1] = (byte) ((value >> 8) & 0xFF); 104 | result[0] = (byte) (value & 0xFF); 105 | 106 | return result; 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /CrawlDouyuDanmu/src/main/java/MessageType.java: -------------------------------------------------------------------------------- 1 | public enum MessageType { 2 | SEND(689), RECV(690); 3 | 4 | private short code; 5 | 6 | MessageType(int code) { 7 | this.code = (short) code; 8 | } 9 | 10 | public short getCode() { 11 | return code; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /CrawlDouyuDanmu/src/main/java/Start.java: -------------------------------------------------------------------------------- 1 | import java.io.IOException; 2 | 3 | public class Start { 4 | public static void main(String[] args) throws IOException, InterruptedException { 5 | DouyuBarrageHandler server = new DouyuBarrageHandler("288016"); 6 | 7 | while (true) { 8 | System.out.println(server.read()); 9 | Thread.sleep(1); 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/.name: -------------------------------------------------------------------------------- 1 | CrawlPandaDanmu -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/libraries/Maven__com_alibaba_fastjson_1_2_44.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/libraries/Maven__log4j_log4j_1_2_17.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/libraries/Maven__org_jsoup_jsoup_1_8_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/libraries/Maven__org_slf4j_slf4j_api_1_8_0_beta0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_8_0_beta0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/CrawlPandaDanmu.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.geekgao 8 | PandaDanmu 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.maven.plugins 14 | maven-compiler-plugin 15 | 16 | 1.8 17 | 1.8 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | org.jsoup 26 | jsoup 27 | 1.8.3 28 | 29 | 30 | com.alibaba 31 | fastjson 32 | 1.2.44 33 | 34 | 35 | org.slf4j 36 | slf4j-log4j12 37 | 1.8.0-beta0 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/readme.md: -------------------------------------------------------------------------------- 1 | PandaTV弹幕抓取工具,具体说明见[知乎](https://www.zhihu.com/question/38807641/answer/84007935) 2 | 3 | [www.panda.tv](https://www.panda.tv/) barrage crawler, run with Start.java. -------------------------------------------------------------------------------- /CrawlPandaDanmu/src/main/java/Crawl.java: -------------------------------------------------------------------------------- 1 | import com.alibaba.fastjson.JSON; 2 | import com.alibaba.fastjson.JSONArray; 3 | import com.alibaba.fastjson.JSONObject; 4 | import org.jsoup.Jsoup; 5 | import org.jsoup.nodes.Document; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.io.ByteArrayOutputStream; 10 | import java.io.IOException; 11 | import java.io.OutputStream; 12 | import java.net.Socket; 13 | import java.util.List; 14 | 15 | /** 16 | * Created by geekgao on 16-1-29. 17 | * 进行抓取弹幕任务 18 | */ 19 | public class Crawl extends Thread { 20 | private final static Logger LOGGER = LoggerFactory.getLogger(Crawl.class); 21 | 22 | //连接弹幕服务器的必要信息 23 | private String rid; 24 | private String appid; 25 | private String ts; 26 | private String sign; 27 | private String authType; 28 | 29 | //与弹幕服务器联系的socket 30 | private Socket socket; 31 | //弹幕服务器ip 32 | private String serverIp; 33 | //弹幕服务器端口 34 | private int port; 35 | 36 | /** 37 | * 初始化一些信息,注意是获取登录弹幕服务器的必要信息 38 | * @return 返回结果表示是否初始化成功 39 | */ 40 | private boolean init() { 41 | String roomId = Utils.getRoomId(); 42 | String url = "https://riven.panda.tv/chatroom/getinfo?roomid=" + roomId + "&app=1&_caller=panda-pc_web&_=" + System.currentTimeMillis(); 43 | Document document; 44 | try { 45 | document = Jsoup.connect(url).get(); 46 | LOGGER.info("从[" + url + "]获取登录弹幕服务器的必要信息"); 47 | LOGGER.info("登录数据Json串:" + document.body().text()); 48 | } catch (IOException e) { 49 | LOGGER.error("获取登录服务器的必要数据出错", e); 50 | return false; 51 | } 52 | JSONObject jsonObject = JSON.parseObject(document.body().text()); 53 | 54 | int errno = jsonObject.getInteger("errno"); 55 | if (errno == 0) { 56 | JSONObject tempJsonObject = jsonObject.getJSONObject("data"); 57 | rid = String.valueOf(tempJsonObject.getLong("rid")); 58 | appid = tempJsonObject.getString("appid"); 59 | ts = String.valueOf(tempJsonObject.getLong("ts")); 60 | sign = tempJsonObject.getString("sign"); 61 | authType = tempJsonObject.getString("authType"); 62 | 63 | JSONArray chatAddressList = tempJsonObject.getJSONArray("chat_addr_list"); 64 | LOGGER.info("弹幕服务器数据:" + chatAddressList); 65 | //选第一个服务器登录 66 | serverIp = chatAddressList.getString(0).split(":",2)[0]; 67 | port = Integer.parseInt(chatAddressList.getString(0).split(":", 2)[1]); 68 | } else { 69 | LOGGER.error("获取登录弹幕服务器的必要信息出错,程序将退出"); 70 | return false; 71 | } 72 | 73 | return true; 74 | } 75 | 76 | /** 77 | * 与弹幕服务器取得联系,相当于登录弹幕服务器 78 | */ 79 | private void login() throws IOException { 80 | socket = new Socket(serverIp,port); 81 | LOGGER.info("登录弹幕服务器:" + serverIp + ":" + port + "成功"); 82 | String msg = "u:" + rid + "@" + appid + "\n" + 83 | "k:1\n" + 84 | "t:300\n" + 85 | "ts:" + ts + "\n" + 86 | "sign:" + sign + "\n" + 87 | "authtype:" + authType; 88 | ByteArrayOutputStream byteArray = new ByteArrayOutputStream(); 89 | byte[] b = new byte[]{0x00, 0x06, 0x00, 0x02, 0x00, (byte) msg.length()}; 90 | byteArray.write(b); 91 | 92 | byteArray.write(msg.getBytes("ISO-8859-1")); 93 | OutputStream outputStream = socket.getOutputStream(); 94 | outputStream.write(byteArray.toByteArray()); 95 | 96 | b = new byte[]{0x00, 0x06, 0x00, 0x00}; 97 | outputStream.write(b); 98 | } 99 | 100 | @Override 101 | public void run() { 102 | MessageHandler messageHandler = null; 103 | OutputStream outputStream; 104 | 105 | try { 106 | if (!init()) { 107 | return; 108 | } 109 | login(); 110 | messageHandler = new MessageHandler(socket); 111 | outputStream = socket.getOutputStream(); 112 | long start = System.currentTimeMillis(); 113 | while (true) { 114 | List messages = messageHandler.read(); 115 | for (String msg: messages) { 116 | if (msg.equals("")) { 117 | continue; 118 | } 119 | try { 120 | JSONObject msgJsonObject = JSON.parseObject(msg); 121 | String type = msgJsonObject.getString("type"); 122 | //发言弹幕type为1 123 | if (type.equals("1")) { 124 | String nickname = msgJsonObject.getJSONObject("data").getJSONObject("from").getString("nickName"); 125 | String content = msgJsonObject.getJSONObject("data").getString("content"); 126 | LOGGER.info("[" + nickname + "]:" + content); 127 | } 128 | } catch (Exception e) { 129 | LOGGER.error("获取消息内容时出错:" + msg, e); 130 | } 131 | } 132 | 133 | //心跳包 134 | if (System.currentTimeMillis() - start > 60000) { 135 | outputStream.write(new byte[]{0x00, 0x06, 0x00, 0x00}); 136 | start = System.currentTimeMillis(); 137 | } 138 | } 139 | } catch (IOException e) { 140 | LOGGER.error("获取弹幕时出错", e); 141 | } finally { 142 | try { 143 | if (messageHandler != null) { 144 | messageHandler.close(); 145 | } 146 | } catch (IOException e) { 147 | LOGGER.error("调用MessageHandler close()方法时出错"); 148 | } 149 | } 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/src/main/java/MessageHandler.java: -------------------------------------------------------------------------------- 1 | import java.io.ByteArrayOutputStream; 2 | import java.io.IOException; 3 | import java.io.InputStream; 4 | import java.net.Socket; 5 | import java.util.Arrays; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | 9 | /** 10 | * Created by geekgao on 16-1-27. 11 | */ 12 | public class MessageHandler { 13 | private Socket socket; 14 | private InputStream inputStream; 15 | 16 | public MessageHandler(Socket socket) { 17 | this.socket = socket; 18 | } 19 | 20 | /** 21 | * 22 | * @return 返回人可阅读的json串 23 | * @throws IOException 24 | */ 25 | public List read() throws IOException { 26 | if (inputStream == null) { 27 | inputStream = socket.getInputStream(); 28 | } 29 | byte[] typeBytes = new byte[4]; 30 | 31 | //读取前4个字节,得到数据类型信息 32 | for (int i = 0;i < 4;i++) { 33 | int tmp = inputStream.read(); 34 | typeBytes[i] = (byte) tmp; 35 | } 36 | 37 | //最终的结果 38 | List result = new LinkedList(); 39 | //这是一条弹幕信息 40 | if (typeBytes[0] == 0x00 && typeBytes[1] == 0x06 && typeBytes[2] == 0x00 && typeBytes[3] == 0x03) { 41 | //越过前面没用的字节,跳到标记内容长度的字节 42 | inputStream.skip(7); 43 | //下条内容的长度 44 | int contentLen = 0; 45 | //读取4个字节,得到数据长度 46 | for (int i = 3;i >= 0;i--) { 47 | int tmp = inputStream.read(); 48 | contentLen += tmp * Math.pow(16,2 * i); 49 | } 50 | 51 | int len; 52 | int readLen = 0; 53 | byte[] bytes = new byte[contentLen]; 54 | ByteArrayOutputStream byteArray = new ByteArrayOutputStream(); 55 | while ((len = inputStream.read(bytes,0,contentLen - readLen)) != -1) { 56 | byteArray.write(bytes,0,len); 57 | readLen += len; 58 | if (readLen == contentLen) { 59 | break; 60 | } 61 | } 62 | 63 | bytes = byteArray.toByteArray(); 64 | byte[] b = Arrays.copyOfRange(bytes, 8, 12); 65 | //找到人可识别的字符串放入结果集中 66 | for (int i = 0;i < bytes.length;) { 67 | //一段弹幕内容的开头 68 | if (bytes[i] == b[0] && bytes[i+1] == b[1] && bytes[i+2] == b[2] && bytes[i+3] == b[3]) { 69 | i += 4; 70 | //一段弹幕json字符串的长度 71 | int length = 0; 72 | //读取4个字节,得到弹幕数据长度 73 | for (int j = 0,k = 3;j < 4;j++,k--) { 74 | int n = bytes[i + j]; 75 | /* 76 | 原数据一个字节可保存0~255的数,但是byte范围是-128~127,所以要变回原来的真实数据 77 | 后面的数据不变是因为后面的字符串都是ascii字符,都在0~127之内 78 | */ 79 | if (n < 0) { 80 | n = 256 + bytes[i + j]; 81 | } 82 | 83 | length += n * Math.pow(16,2 * k); 84 | } 85 | i += 4; 86 | 87 | result.add(Utils.unicode2String(new String(Arrays.copyOfRange(bytes,i,i + length)))); 88 | i += length; 89 | } else { 90 | i++; 91 | } 92 | } 93 | 94 | } else if ((typeBytes[0] == 0x00 && typeBytes[1] == 0x06 && typeBytes[2] == 0x00 && typeBytes[3] == 0x06)) { 95 | //下条内容的长度 96 | int contentLen = 0; 97 | //读取2个字节,得到数据长度 98 | for (int i = 1;i >= 0;i--) { 99 | int tmp = inputStream.read(); 100 | contentLen += tmp * Math.pow(16,2 * i); 101 | } 102 | 103 | inputStream.skip(contentLen); 104 | } 105 | 106 | return result; 107 | } 108 | 109 | public void close() throws IOException { 110 | socket.close(); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/src/main/java/Start.java: -------------------------------------------------------------------------------- 1 | import java.io.IOException; 2 | 3 | /** 4 | * Created by geekgao on 16-1-29. 5 | */ 6 | public class Start { 7 | public static void main(String[] args) throws IOException { 8 | Crawl c = new Crawl(); 9 | c.start(); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/src/main/java/Utils.java: -------------------------------------------------------------------------------- 1 | import java.io.IOException; 2 | import java.util.Properties; 3 | 4 | /** 5 | * Created by geekgao on 16-1-29. 6 | */ 7 | public class Utils { 8 | private static Properties config = new Properties(); 9 | 10 | static { 11 | try { 12 | config.load(Utils.class.getResourceAsStream("/config.properties")); 13 | } catch (IOException e) { 14 | e.printStackTrace(); 15 | } 16 | } 17 | 18 | public static String getRoomId() { 19 | return config.getProperty("roomId"); 20 | } 21 | 22 | /** 23 | * 将包含unicode的字符串 转 中文字符串 24 | * 将每个unicode编码计算出其值,再强转成char类型,然后将这个字符存储到字符串中 25 | */ 26 | public static String unicode2String(String str) { 27 | StringBuilder result = new StringBuilder(); 28 | for (int i = 0;i < str.length();) { 29 | if (str.charAt(i) == '\\' && str.charAt(i + 1) == 'u') { 30 | String unicode = str.substring(i + 2, i + 6); 31 | //确定是unicode编码 32 | if (unicode.matches("[0-9a-fA-F]{4}")) { 33 | //将得到的数值按照16进制解析为十进制整数,再強转为字符 34 | char ch = (char) Integer.parseInt(unicode, 16); 35 | //用得到的字符替换编码表达式 36 | result.append(ch); 37 | i += 6; 38 | } else { 39 | result.append("\\u"); 40 | i += 2; 41 | } 42 | } else { 43 | result.append(str.charAt(i)); 44 | i++; 45 | } 46 | } 47 | 48 | return result.toString(); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /CrawlPandaDanmu/src/main/resources/config.properties: -------------------------------------------------------------------------------- 1 | #要抓取的熊猫房间ID 2 | roomId = 10015 -------------------------------------------------------------------------------- /CrawlPandaDanmu/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, CONSOLE, FILE 2 | ## for console 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss}[%p][%l] - %m%n 6 | ## for file 7 | log4j.appender.FILE=org.apache.log4j.RollingFileAppender 8 | log4j.appender.FILE.File=danmu.log 9 | log4j.appender.FILE.MaxFileSize=100MB 10 | log4j.appender.FILE.Append = true 11 | log4j.appender.FILE.layout=org.apache.log4j.PatternLayout 12 | log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss}[%p][%l] - %m%n -------------------------------------------------------------------------------- /DownloadImg.java: -------------------------------------------------------------------------------- 1 | import org.apache.http.HttpEntity; 2 | import org.apache.http.client.methods.CloseableHttpResponse; 3 | import org.apache.http.client.methods.HttpGet; 4 | import org.apache.http.impl.client.CloseableHttpClient; 5 | import org.apache.http.impl.client.HttpClients; 6 | import org.apache.http.util.EntityUtils; 7 | 8 | import java.io.*; 9 | import java.text.SimpleDateFormat; 10 | import java.util.Date; 11 | import java.util.regex.Matcher; 12 | import java.util.regex.Pattern; 13 | 14 | public class DownloadImg { 15 | 16 | public static void writeImgEntityToFile(HttpEntity imgEntity,String fileAddress) { 17 | File storeFile = new File(fileAddress); 18 | FileOutputStream output = null; 19 | try { 20 | output = new FileOutputStream(storeFile); 21 | 22 | if (imgEntity != null) { 23 | InputStream instream; 24 | instream = imgEntity.getContent(); 25 | byte b[] = new byte[8 * 1024]; 26 | int count; 27 | while ((count = instream.read(b)) != -1) { 28 | output.write(b, 0, count); 29 | } 30 | 31 | } 32 | } catch (FileNotFoundException e) { 33 | e.printStackTrace(); 34 | } catch (IOException e) { 35 | e.printStackTrace(); 36 | } finally { 37 | try { 38 | output.close(); 39 | } catch (IOException e) { 40 | e.printStackTrace(); 41 | } 42 | } 43 | } 44 | 45 | public static void main(String[] args) { 46 | System.out.println("获取Bing图片地址中……"); 47 | 48 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); 49 | CloseableHttpClient httpClient = HttpClients.createDefault(); 50 | HttpGet httpGet = new HttpGet("http://cn.bing.com/"); 51 | CloseableHttpResponse response = null; 52 | try { 53 | response = httpClient.execute(httpGet); 54 | Pattern p = Pattern.compile("g_img=\\{url:.*\\.jpg"); 55 | Matcher m = p.matcher(EntityUtils.toString(response.getEntity())); 56 | String address = null; 57 | if (m.find()) { 58 | address = m.group().split("'")[1].split("'")[0]; 59 | } else { 60 | System.exit(0); 61 | } 62 | System.out.println("图片地址:" + address); 63 | System.out.println("正在下载……"); 64 | HttpGet getImage = new HttpGet(address); 65 | CloseableHttpResponse responseImg = httpClient.execute(getImage); 66 | HttpEntity entity = responseImg.getEntity(); 67 | 68 | writeImgEntityToFile(entity,"/home/geekgao/图片/BingImg/" + dateFormat.format(new Date()) + ".jpg"); 69 | 70 | System.out.println("下载完毕."); 71 | } catch (IOException e) { 72 | e.printStackTrace(); 73 | } finally { 74 | try { 75 | httpClient.close(); 76 | response.close(); 77 | } catch (IOException e) { 78 | e.printStackTrace(); 79 | } 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /InsulinPump/InsulinPump.java: -------------------------------------------------------------------------------- 1 | import java.util.Timer; 2 | import java.util.TimerTask; 3 | 4 | class InsulinPump extends Thread{ 5 | private double insulinQuantity;//胰岛素的量 6 | private double battery;//电池电量 7 | private double bloodSugar;//血糖值 8 | private double weight;//根据体重计算注射的胰岛素量 9 | 10 | public InsulinPump (double weight) { 11 | insulinQuantity = 1000; 12 | battery = 100; 13 | bloodSugar = 5;//正常情况:3.9--6.1 mmol/L 14 | this.weight = weight; 15 | } 16 | 17 | /** 18 | * 启动胰岛素泵,开启检测 19 | */ 20 | public void run() { 21 | Timer timer = new Timer(); 22 | timer.schedule(new TimerTask() { 23 | public void run() { 24 | battery -= 0.1; 25 | } 26 | },1000,1000);//10秒减少1个电 27 | } 28 | 29 | public double getInsulinQuantity() { 30 | return insulinQuantity; 31 | } 32 | 33 | public void setInsulinQuantity(double insulinQuantity) { 34 | this.insulinQuantity = insulinQuantity; 35 | } 36 | 37 | public double getBattery() { 38 | return battery; 39 | } 40 | 41 | public void setBattery(int battery) { 42 | this.battery = battery; 43 | } 44 | 45 | public double getBloodSugar() { 46 | return bloodSugar; 47 | } 48 | 49 | public void setBloodSugar(double bloodSugar) { 50 | this.bloodSugar = bloodSugar; 51 | } 52 | 53 | //调整胰岛素的量 54 | public double adjust() { 55 | double quantity = (bloodSugar * 18 - 100) * weight * 6 / 2000; 56 | insulinQuantity -= quantity; 57 | return quantity; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /InsulinPump/People.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | import java.util.Timer; 3 | import java.util.TimerTask; 4 | 5 | class People extends Thread { 6 | private double bloodSugar; //人体血糖值 7 | private Random random; //血糖数随机,3--8之间 8 | 9 | public People() { 10 | random = new Random(System.currentTimeMillis()); 11 | bloodSugar = 3.9 + random.nextDouble() * 5; 12 | } 13 | 14 | @Override 15 | public void run() { 16 | Timer timer = new Timer(); 17 | timer.schedule(new TimerTask() { 18 | @Override 19 | public void run() { 20 | bloodSugar = 3.9 + random.nextDouble() * 5; 21 | } 22 | },0,1000);//每1秒改变一次血糖值 23 | } 24 | 25 | public double getBloodSugar() { 26 | return bloodSugar; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /InsulinPump/PumpMain.java: -------------------------------------------------------------------------------- 1 | import org.apache.log4j.FileAppender; 2 | import org.apache.log4j.Logger; 3 | import org.apache.log4j.PatternLayout; 4 | 5 | import javax.swing.*; 6 | import java.awt.*; 7 | import java.awt.event.ActionEvent; 8 | import java.awt.event.ActionListener; 9 | import java.awt.event.WindowAdapter; 10 | import java.awt.event.WindowEvent; 11 | import java.io.IOException; 12 | import java.util.Timer; 13 | import java.util.TimerTask; 14 | 15 | public class PumpMain { 16 | public static void main(String[] args) { 17 | 18 | final Logger log = Logger.getLogger(PumpMain.class); 19 | try { 20 | log.addAppender(new FileAppender(new PatternLayout("[%d{yyyy/MM/dd-HH:mm:ss}]-%m%n"), "/home/geekgao/insulinPumpLog", true)); 21 | } catch (IOException e) { 22 | e.printStackTrace(); 23 | } 24 | 25 | final People people = new People(); 26 | 27 | final JFrame jFrame = new JFrame("输入体重"); 28 | final double[] weight = new double[1]; 29 | jFrame.setLayout(new FlowLayout()); 30 | jFrame.addWindowListener(new WindowAdapter() { 31 | @Override 32 | public void windowClosing(WindowEvent e) { 33 | weight[0] = 60;//关闭窗口的话,默认60公斤 34 | super.windowClosing(e); 35 | } 36 | }); 37 | final JTextField textArea = new JTextField("输入您的体重(默认60公斤)"); 38 | jFrame.add(textArea); 39 | JButton jButton = new JButton("确定"); 40 | jFrame.add(jButton); 41 | 42 | jButton.addActionListener(new ActionListener() { 43 | public void actionPerformed(ActionEvent e) { 44 | try { 45 | weight[0] = Double.valueOf(textArea.getText()); 46 | jFrame.setVisible(false); 47 | } catch (NumberFormatException e1) { 48 | JOptionPane.showMessageDialog(null, "数字格式不对"); 49 | } 50 | } 51 | }); 52 | jFrame.pack(); 53 | jFrame.setLocationRelativeTo(null); 54 | jFrame.setResizable(false); 55 | jFrame.setVisible(true); 56 | 57 | while (weight[0] == 0 && jFrame.isVisible() == true) { 58 | try { 59 | Thread.sleep(50); 60 | } catch (InterruptedException e) { 61 | e.printStackTrace(); 62 | } 63 | } 64 | 65 | log.info("开机,用户体重为" + weight[0] + "Kg"); 66 | 67 | final InsulinPump insulinPump = new InsulinPump(weight[0]); 68 | final PumpWindow pumpWindow = new PumpWindow(insulinPump); 69 | 70 | people.start(); //人体运行 71 | insulinPump.start(); //胰岛素泵运行 72 | 73 | Timer timer = new Timer(); 74 | timer.schedule(new TimerTask() { 75 | @Override 76 | public void run() { 77 | insulinPump.setBloodSugar(people.getBloodSugar()); 78 | 79 | pumpWindow.updateTime(); 80 | pumpWindow.setBattery(insulinPump.getBattery()); 81 | pumpWindow.setBloodSugar(people.getBloodSugar()); 82 | pumpWindow.setinsulinQuantity(insulinPump.getInsulinQuantity()); 83 | pumpWindow.setStatus("无"); 84 | 85 | if (insulinPump.getBloodSugar() <= 3.9 || insulinPump.getBloodSugar() >= 6.1) { 86 | double insertQuantity = insulinPump.adjust(); 87 | String insertQuantityStr = String.valueOf(insertQuantity); 88 | pumpWindow.setStatus("正在注射" + insertQuantityStr.substring(0, insertQuantityStr.indexOf(".") + 2) + "个单位的胰岛素"); 89 | log.info("注射" + insertQuantityStr.substring(0, insertQuantityStr.indexOf(".") + 2) + "个单位的胰岛素"); 90 | pumpWindow.setinsulinQuantity(insulinPump.getInsulinQuantity()); 91 | } 92 | 93 | if (insulinPump.getBattery() <= 0) { 94 | pumpWindow.setBattery(0); 95 | log.info("因未及时充电,本仪器将自动关机。"); 96 | JOptionPane.showMessageDialog(null, "因未及时充电,本仪器将自动关机。"); 97 | System.exit(0); 98 | } 99 | 100 | if (insulinPump.getInsulinQuantity() <= 0) { 101 | pumpWindow.setinsulinQuantity(0); 102 | log.info("胰岛素量不足!!!本仪器将自动关机"); 103 | JOptionPane.showMessageDialog(null, "胰岛素量不足!!!本仪器将自动关机"); 104 | System.exit(0); 105 | } 106 | } 107 | }, 0, 1000); //每秒更新一次显示的数据 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /InsulinPump/PumpWindow.java: -------------------------------------------------------------------------------- 1 | import org.apache.log4j.FileAppender; 2 | import org.apache.log4j.Logger; 3 | import org.apache.log4j.PatternLayout; 4 | 5 | import javax.swing.*; 6 | import java.awt.*; 7 | import java.awt.event.ActionEvent; 8 | import java.awt.event.ActionListener; 9 | import java.awt.event.WindowAdapter; 10 | import java.awt.event.WindowEvent; 11 | import java.io.IOException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | 15 | class PumpWindow extends JFrame{ 16 | private JLabel time; 17 | private JLabel battery; 18 | private JLabel bloodSugar; 19 | private JLabel insulinQuantity; 20 | private JLabel status; 21 | 22 | private JButton charge; 23 | private JButton insertInsulin; 24 | 25 | private InsulinPump pump; 26 | 27 | private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd-HH:mm:ss"); 28 | private Logger log; 29 | 30 | public void setBloodSugar(double bloodSugar) { 31 | String bloodSugarStr = String.valueOf(bloodSugar); 32 | this.bloodSugar.setText("血糖值:" + bloodSugarStr.substring(0, bloodSugarStr.indexOf(".") + 2) + "mmol/L"); 33 | } 34 | 35 | public void setinsulinQuantity(double insulinQuantity) { 36 | String insulinQuantityStr = String.valueOf(insulinQuantity); 37 | this.insulinQuantity.setText("胰岛素量:" + insulinQuantityStr.substring(0, insulinQuantityStr.indexOf(".") + 2) + "单位"); 38 | } 39 | 40 | public void setStatus(String status) { 41 | this.status.setText("当前状态:" + status); 42 | } 43 | 44 | public void setBattery(double battery) { 45 | String batteryStr = String.valueOf(battery); 46 | this.battery.setText("电量:" + batteryStr.substring(0, batteryStr.indexOf(".") + 2));//只获取小数点后1位 47 | } 48 | 49 | public void updateTime() { 50 | this.time.setText("时间:" + simpleDateFormat.format(new Date())); 51 | } 52 | 53 | //参数是这个窗口显示的泵子的引用 54 | public PumpWindow(final InsulinPump pump) { 55 | log = Logger.getLogger(PumpWindow.class); 56 | try { 57 | log.addAppender(new FileAppender(new PatternLayout("[%d{yyyy/MM/dd-HH:mm:ss}]-%m%n"), "/home/geekgao/insulinPumpLog", true)); 58 | } catch (IOException e) { 59 | e.printStackTrace(); 60 | } 61 | this.pump = pump; 62 | 63 | this.setLayout(new BorderLayout()); 64 | this.addWindowListener(new WindowAdapter() { 65 | @Override 66 | public void windowClosing(WindowEvent e) { 67 | log.info("用户关机"); 68 | System.exit(0); 69 | } 70 | }); 71 | 72 | JPanel upPanel = new JPanel(); 73 | upPanel.setLayout(new BorderLayout()); 74 | this.add(upPanel, BorderLayout.NORTH); 75 | time = new JLabel(); 76 | battery = new JLabel(); 77 | upPanel.add(time, BorderLayout.WEST); 78 | upPanel.add(battery, BorderLayout.EAST); 79 | 80 | 81 | JPanel downPanel = new JPanel(); 82 | this.add(downPanel, BorderLayout.SOUTH); 83 | charge = new JButton("充电"); 84 | charge.addActionListener(new ActionListener() { 85 | public void actionPerformed(ActionEvent e) { 86 | log.info("充电完毕"); 87 | pump.setBattery(100); 88 | } 89 | }); 90 | downPanel.add(charge); 91 | insertInsulin = new JButton("加满胰岛素"); 92 | insertInsulin.addActionListener(new ActionListener() { 93 | public void actionPerformed(ActionEvent e) { 94 | log.info("加满胰岛素"); 95 | pump.setInsulinQuantity(1000); 96 | } 97 | }); 98 | downPanel.add(insertInsulin); 99 | 100 | JPanel midPanel = new JPanel(); 101 | midPanel.setLayout(new GridLayout(3, 1, 0, 0)); 102 | this.add(midPanel, BorderLayout.CENTER); 103 | bloodSugar = new JLabel(); 104 | midPanel.add(bloodSugar); 105 | insulinQuantity = new JLabel(); 106 | midPanel.add(insulinQuantity); 107 | status = new JLabel(); 108 | midPanel.add(status); 109 | 110 | this.setSize(400, 250); 111 | this.setResizable(false); 112 | this.setLocationRelativeTo(null); 113 | this.setVisible(true); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /Main.java: -------------------------------------------------------------------------------- 1 | import org.apache.http.Consts; 2 | import org.apache.http.NameValuePair; 3 | import org.apache.http.client.config.CookieSpecs; 4 | import org.apache.http.client.config.RequestConfig; 5 | import org.apache.http.client.entity.UrlEncodedFormEntity; 6 | import org.apache.http.client.methods.CloseableHttpResponse; 7 | import org.apache.http.client.methods.HttpGet; 8 | import org.apache.http.client.methods.HttpPost; 9 | import org.apache.http.impl.client.CloseableHttpClient; 10 | import org.apache.http.impl.client.HttpClients; 11 | import org.apache.http.impl.client.SystemDefaultCredentialsProvider; 12 | import org.apache.http.message.BasicNameValuePair; 13 | import org.apache.http.util.EntityUtils; 14 | 15 | import java.io.File; 16 | import java.io.FileOutputStream; 17 | import java.io.IOException; 18 | import java.util.LinkedList; 19 | import java.util.List; 20 | import java.util.Scanner; 21 | 22 | public class Main { 23 | public static void main(String[] args) { 24 | RequestConfig requestConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD_STRICT).build(); 25 | CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(requestConfig).build(); 26 | 27 | HttpGet getHomePage = new HttpGet("http://www.zhihu.com/"); 28 | try { 29 | //填充登陆请求中基本的参数 30 | CloseableHttpResponse response = httpClient.execute(getHomePage); 31 | String responseHtml = EntityUtils.toString(response.getEntity()); 32 | String xsrfValue = responseHtml.split("")[0]; 33 | System.out.println("_xsrf:" + xsrfValue); 34 | response.close(); 35 | List valuePairs = new LinkedList(); 36 | valuePairs.add(new BasicNameValuePair("_xsrf" , xsrfValue)); 37 | valuePairs.add(new BasicNameValuePair("email", 用户名)); 38 | valuePairs.add(new BasicNameValuePair("password", 密码)); 39 | valuePairs.add(new BasicNameValuePair("rememberme", "true")); 40 | 41 | //获取验证码 42 | HttpGet getCaptcha = new HttpGet("http://www.zhihu.com/captcha.gif?r=" + System.currentTimeMillis() + "&type=login"); 43 | CloseableHttpResponse imageResponse = httpClient.execute(getCaptcha); 44 | FileOutputStream out = new FileOutputStream("/tmp/zhihu.gif"); 45 | byte[] bytes = new byte[8192]; 46 | int len; 47 | while ((len = imageResponse.getEntity().getContent().read(bytes)) != -1) { 48 | out.write(bytes,0,len); 49 | } 50 | out.close(); 51 | Runtime.getRuntime().exec("eog /tmp/zhihu.gif");//ubuntu下看图片的命令是eog 52 | 53 | //请用户输入验证码 54 | System.out.print("请输入验证码:"); 55 | Scanner scanner = new Scanner(System.in); 56 | String captcha = scanner.next(); 57 | valuePairs.add(new BasicNameValuePair("captcha", captcha)); 58 | 59 | //完成登陆请求的构造 60 | UrlEncodedFormEntity entity = new UrlEncodedFormEntity(valuePairs, Consts.UTF_8); 61 | HttpPost post = new HttpPost("http://www.zhihu.com/login/email"); 62 | post.setEntity(entity); 63 | httpClient.execute(post);//登录 64 | 65 | HttpGet g = new HttpGet("http://www.zhihu.com/question/following");//获取“我关注的问题”页面 66 | CloseableHttpResponse r = httpClient.execute(g); 67 | System.out.println(EntityUtils.toString(r.getEntity())); 68 | r.close(); 69 | } catch (IOException e) { 70 | e.printStackTrace(); 71 | } finally { 72 | try { 73 | httpClient.close(); 74 | } catch (IOException e) { 75 | e.printStackTrace(); 76 | } 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /MicroWave/MicroWave.java: -------------------------------------------------------------------------------- 1 | import java.util.Timer; 2 | import java.util.TimerTask; 3 | 4 | public class MicroWave extends Thread{ 5 | private int hour;//小时数 6 | private int minutes;//分钟数 7 | private int second;//秒数 8 | private boolean isRun;//是否正在运行 9 | 10 | public MicroWave() { 11 | hour = 0; 12 | minutes = 0; 13 | second = 0; 14 | isRun = false; 15 | } 16 | 17 | @Override 18 | public void run() { 19 | Timer timer = new Timer(); 20 | timer.schedule(new TimerTask() { 21 | @Override 22 | public void run() { 23 | 24 | //如果正在运行就改变时间 25 | if (isRun) { 26 | if (second >= 1) { 27 | second--; 28 | } else { 29 | second = 59; 30 | if (minutes >= 1) { 31 | minutes--; 32 | } else { 33 | minutes = 59; 34 | if (hour >= 1) { 35 | hour--; 36 | } else { 37 | hour = 0; 38 | minutes = 0; 39 | second = 0; 40 | isRun = false; 41 | } 42 | } 43 | } 44 | } 45 | } 46 | },0,1000); 47 | } 48 | 49 | public int getHour() { 50 | return hour; 51 | } 52 | 53 | public void addHour(int hour) { 54 | this.hour += hour; 55 | } 56 | 57 | public int getMinutes() { 58 | return minutes; 59 | } 60 | 61 | public void addMinutes(int minutes) { 62 | this.minutes += minutes; 63 | 64 | if (this.minutes == 60) { 65 | this.minutes = 0; 66 | this.hour++; 67 | } 68 | } 69 | 70 | public int getSecond() { 71 | return second; 72 | } 73 | 74 | public void setHour(int hour) { 75 | this.hour = hour; 76 | } 77 | 78 | public void setMinutes(int minutes) { 79 | this.minutes = minutes; 80 | } 81 | 82 | public void setSecond(int second) { 83 | this.second = second; 84 | } 85 | 86 | public void addSecond(int second) { 87 | 88 | this.second += second; 89 | 90 | if (this.second == 60) { 91 | this.second = 0; 92 | minutes++; 93 | if (minutes == 60) { 94 | minutes = 0; 95 | hour++; 96 | } 97 | } 98 | } 99 | 100 | public void setIsRun(boolean isRun) { 101 | this.isRun = isRun; 102 | } 103 | 104 | public boolean isRun() { 105 | return isRun; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /MicroWave/MicroWaveMain.java: -------------------------------------------------------------------------------- 1 | import java.util.Timer; 2 | import java.util.TimerTask; 3 | 4 | public class MicroWaveMain { 5 | public static void main(String[] args) { 6 | final MicroWave microWave = new MicroWave(); 7 | final MicroWaveWindow microWaveWindow = new MicroWaveWindow(microWave); 8 | 9 | microWave.run(); 10 | Timer timer = new Timer(); 11 | timer.schedule(new TimerTask() { 12 | @Override 13 | public void run() { 14 | microWaveWindow.setTimeLable(microWave.getHour(), microWave.getMinutes(),microWave.getSecond()); 15 | microWaveWindow.pack();//动态调整窗口大小 16 | } 17 | },0,50); 18 | } 19 | } -------------------------------------------------------------------------------- /MicroWave/MicroWaveWindow.java: -------------------------------------------------------------------------------- 1 | import javax.swing.*; 2 | import java.awt.*; 3 | import java.awt.event.ActionEvent; 4 | import java.awt.event.ActionListener; 5 | import java.awt.event.WindowAdapter; 6 | import java.awt.event.WindowEvent; 7 | 8 | public class MicroWaveWindow extends JFrame{ 9 | 10 | JLabel timeLable;//倒计时的时间标签 11 | 12 | public void setTimeLable(int hour,int minutes,int second) { 13 | String hourStr = String.format("%2d",hour); 14 | String minutesStr = String.format("%2d",minutes); 15 | String secondStr = String.format("%2d", second); 16 | 17 | hourStr = hourStr.replace(' ','0'); 18 | minutesStr = minutesStr.replace(' ','0'); 19 | secondStr = secondStr.replace(' ','0'); 20 | 21 | this.timeLable.setText(hourStr + ":" + minutesStr + ":" + secondStr); 22 | } 23 | 24 | //传入它控制的微波炉 25 | public MicroWaveWindow(final MicroWave microWave) { 26 | this.setTitle("微波炉仿真程序"); 27 | this.addWindowListener(new WindowAdapter() { 28 | @Override 29 | public void windowClosing(WindowEvent e) { 30 | System.exit(0); 31 | } 32 | }); 33 | 34 | JPanel upPanel = new JPanel(); 35 | JPanel midPanel = new JPanel(); 36 | JPanel downPanel = new JPanel(); 37 | this.setLayout(new BorderLayout()); 38 | this.add(upPanel, BorderLayout.NORTH); 39 | timeLable = new JLabel(); 40 | upPanel.add(timeLable); 41 | timeLable.setFont(new Font("Dialog", 1, 100)); 42 | timeLable.setText(microWave.getHour() + ":" + microWave.getMinutes() + ":" + microWave.getSecond()); 43 | 44 | this.add(midPanel, BorderLayout.CENTER); 45 | midPanel.setLayout(new BorderLayout()); 46 | JPanel midUpPanel = new JPanel(); 47 | JPanel midDownPanel = new JPanel(); 48 | midPanel.add(midUpPanel,BorderLayout.NORTH); 49 | midPanel.add(midDownPanel,BorderLayout.SOUTH); 50 | 51 | JLabel fire = new JLabel("火力"); 52 | JRadioButton bigFire = new JRadioButton("大火"); 53 | JRadioButton midFire = new JRadioButton("中火"); 54 | JRadioButton smallFire = new JRadioButton("小火"); 55 | midUpPanel.add(fire); 56 | midUpPanel.add(bigFire); 57 | midUpPanel.add(midFire); 58 | midUpPanel.add(smallFire); 59 | 60 | ButtonGroup buttonGroup = new ButtonGroup(); 61 | buttonGroup.add(bigFire); 62 | buttonGroup.add(midFire); 63 | buttonGroup.add(smallFire); 64 | 65 | JLabel addTime = new JLabel("时间"); 66 | midDownPanel.add(addTime); 67 | JButton addHour = new JButton("+1小时"); 68 | JButton addMinutes = new JButton("+1分钟"); 69 | JButton addSecond = new JButton("+1秒"); 70 | midDownPanel.add(addHour); 71 | midDownPanel.add(addMinutes); 72 | midDownPanel.add(addSecond); 73 | 74 | addHour.addActionListener(new ActionListener() { 75 | public void actionPerformed(ActionEvent e) { 76 | microWave.addHour(1); 77 | } 78 | }); 79 | addMinutes.addActionListener(new ActionListener() { 80 | public void actionPerformed(ActionEvent e) { 81 | microWave.addMinutes(1); 82 | } 83 | }); 84 | addSecond.addActionListener(new ActionListener() { 85 | public void actionPerformed(ActionEvent e) { 86 | microWave.addSecond(1); 87 | } 88 | }); 89 | 90 | 91 | this.add(downPanel, BorderLayout.SOUTH); 92 | JLabel function = new JLabel("功能"); 93 | JButton startButton = new JButton("开始"); 94 | JButton pauseButton = new JButton("暂停"); 95 | JButton stopButton = new JButton("停止"); 96 | downPanel.add(function); 97 | downPanel.add(startButton); 98 | downPanel.add(pauseButton); 99 | downPanel.add(stopButton); 100 | 101 | startButton.addActionListener(new ActionListener() { 102 | public void actionPerformed(ActionEvent e) { 103 | if (microWave.getHour() == 0 && microWave.getMinutes() == 0 && microWave.getSecond() == 0) { 104 | JOptionPane.showMessageDialog(null,"时间为0!"); 105 | } else { 106 | microWave.setIsRun(true); 107 | } 108 | } 109 | }); 110 | 111 | pauseButton.addActionListener(new ActionListener() { 112 | public void actionPerformed(ActionEvent e) { 113 | microWave.setIsRun(false); 114 | } 115 | }); 116 | 117 | stopButton.addActionListener(new ActionListener() { 118 | public void actionPerformed(ActionEvent e) { 119 | microWave.setIsRun(false); 120 | microWave.setHour(0); 121 | microWave.setMinutes(0); 122 | microWave.setSecond(0); 123 | } 124 | }); 125 | 126 | this.pack(); 127 | this.setLocationRelativeTo(null); 128 | this.setResizable(false); 129 | this.setVisible(true); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /PullBookinfo/GetBookInfoThread.java: -------------------------------------------------------------------------------- 1 | import org.apache.http.HttpHeaders; 2 | import org.apache.http.client.methods.CloseableHttpResponse; 3 | import org.apache.http.client.methods.HttpGet; 4 | import org.apache.http.impl.client.CloseableHttpClient; 5 | import org.apache.http.util.EntityUtils; 6 | import org.dom4j.Element; 7 | 8 | import java.io.IOException; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | public class GetBookInfoThread extends Thread{ 13 | private CloseableHttpClient httpClient; 14 | private String webAddress; 15 | private Element rootElement; 16 | private Pattern bookAuthorRegex; 17 | private Pattern bookPublishRegex; 18 | private Pattern bookIsbnRegex; 19 | private Pattern bookImgRegex; 20 | private String bookName; 21 | 22 | /** 23 | * 24 | * @param httpClient 用这个操作抓取 25 | * @param webAddress 这个是抓取的网址 26 | * @param rootElement 这个是一个xml文档的根节点,用这个来操作加入新的子节点 27 | */ 28 | public GetBookInfoThread(CloseableHttpClient httpClient,String webAddress,String bookName,Element rootElement,Pattern bookAuthorRegex,Pattern bookPublishRegex,Pattern bookIsbnRegex,Pattern bookImgRegex) { 29 | this.httpClient = httpClient; 30 | this.webAddress = webAddress; 31 | this.rootElement = rootElement; 32 | this.bookAuthorRegex = bookAuthorRegex; 33 | this.bookPublishRegex = bookPublishRegex; 34 | this.bookIsbnRegex = bookIsbnRegex; 35 | this.bookName = bookName; 36 | this.bookImgRegex = bookImgRegex; 37 | } 38 | 39 | @Override 40 | public void run() { 41 | HttpGet getBookInfo = new HttpGet(webAddress); 42 | getBookInfo.addHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30"); 43 | CloseableHttpResponse bookInfoResponse; 44 | String bookInfoCode = null;//书籍具体信息网页源码 45 | try { 46 | bookInfoResponse = httpClient.execute(getBookInfo); 47 | if (bookInfoResponse.getStatusLine().getStatusCode() != 200) { 48 | System.out.println("获取书本具体信息时出错,页面地址:" + webAddress + "错误信息" + bookInfoResponse.getStatusLine()); 49 | return; 50 | } 51 | 52 | bookInfoCode = EntityUtils.toString(bookInfoResponse.getEntity()); 53 | } catch (IOException e) { 54 | e.printStackTrace(); 55 | } 56 | 57 | Matcher bookAuthorMatcher = bookAuthorRegex.matcher(bookInfoCode); //匹配作者 58 | Matcher bookPublishMatcher = bookPublishRegex.matcher(bookInfoCode); //匹配出版商 59 | Matcher bookIsbnMatcher = bookIsbnRegex.matcher(bookInfoCode); //匹配isbn 60 | Matcher bookImgMatcher = bookImgRegex.matcher(bookInfoCode); //匹配图片地址 61 | 62 | String bookName = this.bookName; 63 | String bookAuthor = ""; 64 | String bookPublish = ""; 65 | String bookIsbn = ""; 66 | String bookLink = webAddress; 67 | String bookImg = ""; 68 | 69 | if (bookAuthorMatcher.find()) { 70 | bookAuthor = bookAuthorMatcher.group(1); 71 | } 72 | if (bookPublishMatcher.find()) { 73 | bookPublish = bookPublishMatcher.group(1); 74 | } 75 | if (bookIsbnMatcher.find()) { 76 | bookIsbn = bookIsbnMatcher.group(1); 77 | } 78 | if (bookImgMatcher.find()) { 79 | bookImg = bookImgMatcher.group(1); 80 | } 81 | 82 | // System.out.println(bookName + "-" + bookAuthor + "-" + bookPublish + "-" + bookIsbn); 83 | 84 | Element bookElement = rootElement.addElement("book");//新建一个书的标签 85 | bookElement.addAttribute("id",String.valueOf(Main.bookId++)); 86 | bookElement.addElement("name").setText(bookName); 87 | bookElement.addElement("author").setText(bookAuthor); 88 | bookElement.addElement("publish").setText(bookPublish); 89 | bookElement.addElement("isbn").setText(bookIsbn); 90 | bookElement.addElement("count").setText(String.valueOf((int)(Math.random() * 10) + 3)); 91 | bookElement.addElement("link").setText(bookLink); 92 | bookElement.addElement("img").setText(bookImg); 93 | 94 | System.out.println("抓取了:" + webAddress + " " + bookName); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /PullBookinfo/Main.java: -------------------------------------------------------------------------------- 1 | import org.apache.http.HttpHeaders; 2 | import org.apache.http.HttpHost; 3 | import org.apache.http.client.methods.CloseableHttpResponse; 4 | import org.apache.http.client.methods.HttpGet; 5 | import org.apache.http.impl.client.CloseableHttpClient; 6 | import org.apache.http.impl.client.HttpClients; 7 | import org.apache.http.util.EntityUtils; 8 | import org.dom4j.Document; 9 | import org.dom4j.DocumentHelper; 10 | import org.dom4j.Element; 11 | 12 | import java.io.IOException; 13 | import java.util.*; 14 | import java.util.regex.Matcher; 15 | import java.util.regex.Pattern; 16 | 17 | public class Main { 18 | 19 | CloseableHttpClient httpClient; 20 | static int bookId = 496; 21 | Map proxyMap;//ip->端口 22 | List ipList;//从这个list中读出ip,再由ip从map中读出端口 23 | int i = 0;//根据这个从list中取出ip,换上对应的代理 24 | 25 | public static void main(String[] args) { 26 | Main m = new Main(); 27 | 28 | // List tagList = m.getTagList(); 29 | List tagList = new LinkedList(); 30 | // tagList.add("经典"); 31 | // tagList.add("日本文学"); 32 | // tagList.add("散文"); 33 | // tagList.add("中国文学"); 34 | // tagList.add("算法"); 35 | // tagList.add("童话"); 36 | // tagList.add("外国文学"); 37 | // tagList.add("文学"); 38 | // tagList.add("小说"); 39 | // tagList.add("漫画"); 40 | // tagList.add("诗词"); 41 | // tagList.add("心理学"); 42 | tagList.add("摄影"); 43 | tagList.add("理财"); 44 | tagList.add("经济学"); 45 | m.pullAndWrite(tagList,10); 46 | } 47 | 48 | public Main() { 49 | // HttpHost proxy = new HttpHost("122.225.106.35",80); 50 | // httpClient = HttpClients.custom().setProxy(proxy).build(); 51 | httpClient = HttpClients.createDefault(); 52 | setProxyMap(); 53 | } 54 | 55 | public void setProxyMap() { 56 | proxyMap = new HashMap(); 57 | ipList = new LinkedList(); 58 | proxyMap.put("211.68.122.171",80);ipList.add("211.68.122.171"); 59 | } 60 | 61 | public List getTagList() { 62 | HttpGet getTag = new HttpGet("http://book.douban.com/tag/"); 63 | getTag.addHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30"); 64 | CloseableHttpResponse tagPageResponse = null; 65 | String tagPageCode = null;//网页源码 66 | try { 67 | tagPageResponse = httpClient.execute(getTag); 68 | tagPageCode = EntityUtils.toString(tagPageResponse.getEntity()); 69 | tagPageResponse.close(); 70 | } catch (IOException e) { 71 | e.printStackTrace(); 72 | } finally { 73 | try { 74 | tagPageResponse.close(); 75 | } catch (IOException e) { 76 | e.printStackTrace(); 77 | } 78 | } 79 | 80 | Pattern p = Pattern.compile("class=\"tag\">(.*?)"); 81 | Matcher m = p.matcher(tagPageCode); 82 | List resultTagList = new LinkedList(); 83 | while (m.find()) { 84 | resultTagList.add(m.group(1)); 85 | } 86 | 87 | return resultTagList; 88 | } 89 | 90 | /** 91 | * 92 | * @param tagList 要抓的图书的类别 93 | * @param maxPageNum 每种图书最多抓取的页数 94 | */ 95 | public void pullAndWrite(List tagList,int maxPageNum) { 96 | Pattern bookAddressRegex = Pattern.compile("href=\"(.*?)\" class=\"title\" target=\"_blank\">(.*?)"); //获取具体书籍网址的正则 97 | Pattern bookAuthorRegex = Pattern.compile("(?s) 作者:.*?>(.*?)");//匹配作者 98 | Pattern bookPublishRegex = Pattern.compile("出版社: (.*?)
"); 99 | Pattern bookIsbnRegex = Pattern.compile("ISBN: (.*?)
"); 100 | Pattern bookImgRegex = Pattern.compile(" threadList = new LinkedList(); 133 | while (m.find()) { 134 | threadList.add(new GetBookInfoThread(httpClient, m.group(1), m.group(2), rootElement, bookAuthorRegex, bookPublishRegex, bookIsbnRegex,bookImgRegex)); 135 | findCount++; 136 | } 137 | //没有知道到代表这种类别的书都找完了,那么直接退出此类书籍的查找 138 | if (findCount == 0) { 139 | break; 140 | } 141 | 142 | for (Thread thread:threadList) { 143 | thread.start(); 144 | } 145 | for (Thread thread:threadList) { 146 | try { 147 | thread.join(); 148 | } catch (InterruptedException e) { 149 | e.printStackTrace(); 150 | } 151 | } 152 | nowPageNum++; 153 | } 154 | //一个类别爬完了再写入 155 | new WriteBookInfoToFile(rootElement,"/home/geekgao/book/" + tag + ".xml").start(); //另开一个线程写入文件 156 | 157 | } 158 | } 159 | 160 | private void changeProxy() { 161 | if (i >= ipList.size()) { 162 | System.out.println("代理用完了,退出"); 163 | System.exit(0); 164 | } 165 | String ip = ipList.get(i++); 166 | httpClient = HttpClients.custom().setProxy(new HttpHost(ip,proxyMap.get(ip))).build(); 167 | System.out.println("换代理啦,使用代理:" + ip + ",端口:" + proxyMap.get(ip)); 168 | } 169 | 170 | } 171 | -------------------------------------------------------------------------------- /PullBookinfo/WriteBookInfoToFile.java: -------------------------------------------------------------------------------- 1 | import org.dom4j.Element; 2 | import org.dom4j.io.XMLWriter; 3 | 4 | import java.io.FileWriter; 5 | import java.io.IOException; 6 | import java.io.Writer; 7 | 8 | public class WriteBookInfoToFile extends Thread { 9 | private Element root; 10 | private String fileAddress; 11 | 12 | public WriteBookInfoToFile(Element root,String fileAddress) { 13 | this.root = root; 14 | this.fileAddress = fileAddress; 15 | } 16 | 17 | @Override 18 | public void run() { 19 | Writer fileWriter; 20 | try { 21 | fileWriter = new FileWriter(fileAddress); 22 | XMLWriter xmlWriter = new XMLWriter(fileWriter); 23 | xmlWriter.write(root); 24 | xmlWriter.close(); 25 | System.out.println("[" + fileAddress + "]写入成功"); 26 | } catch (IOException e) { 27 | e.printStackTrace(); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /PullBookinfo/WriteInfoToDB.java: -------------------------------------------------------------------------------- 1 | import org.dom4j.Document; 2 | import org.dom4j.DocumentException; 3 | import org.dom4j.Element; 4 | import org.dom4j.io.SAXReader; 5 | 6 | import java.io.File; 7 | import java.sql.DriverManager; 8 | import java.sql.SQLException; 9 | import java.sql.Statement; 10 | import java.util.List; 11 | 12 | public class WriteInfoToDB { 13 | public static void main(String[] args) { 14 | File folder = new File("/home/geekgao/book"); 15 | File[] XMLS = folder.listFiles(); 16 | SAXReader reader = new SAXReader(); 17 | Statement statement = null; //用这个执行sql语句 18 | try { 19 | Class.forName("com.mysql.jdbc.Driver");// 动态加载mysql驱动 20 | statement = DriverManager.getConnection("jdbc:mysql://localhost:3306/BookManage?user=root&password=root").createStatement(); 21 | } catch (SQLException e) { 22 | e.printStackTrace(); 23 | } catch (ClassNotFoundException e) { 24 | e.printStackTrace(); 25 | } 26 | 27 | for (File f:XMLS) { 28 | if (f.isDirectory()) { 29 | continue; 30 | } 31 | Document document = null; 32 | try { 33 | document = reader.read(f); 34 | } catch (DocumentException e) { 35 | e.printStackTrace(); 36 | } 37 | 38 | Element root = document.getRootElement(); 39 | List books = root.elements(); 40 | for (Element book:books) { 41 | String name = null; 42 | String author = null; 43 | String publish = null; 44 | String isbn = null; 45 | String count = null; 46 | String link = null; 47 | String img = null; 48 | List b = book.elements(); 49 | for (Element info:b) { 50 | if (info.getName().equals("name")) { 51 | name = info.getText(); 52 | } else if (info.getName().equals("author")) { 53 | author = info.getText(); 54 | } else if (info.getName().equals("publish")) { 55 | publish = info.getText(); 56 | } else if (info.getName().equals("isbn")) { 57 | isbn = info.getText(); 58 | } else if (info.getName().equals("count")) { 59 | count = info.getText(); 60 | } else if (info.getName().equals("link")) { 61 | link = info.getText(); 62 | } else if (info.getName().equals("img")) { 63 | img = info.getText(); 64 | } 65 | // System.out.println(info.getName() + ": " + info.getText()); 66 | } 67 | String sql = "INSERT INTO Book(bookPublish,bookName,bookAuthor,bookTag,bookIsbn,bookCount,bookRestCount,bookLink,bookImg) VALUES ('" + publish + "','" + name + "','" + author + "','" + f.getName().split("\\.")[0] + "','" + isbn + "','" + count + "','" + count + "','" + link + "','" + img + "');"; 68 | try { 69 | statement.execute(sql); 70 | } catch (SQLException e) { 71 | System.err.println("sql语句处错误:" + e.getMessage()); 72 | System.err.println("sql语句:" + sql); 73 | } 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Stargazers over time 2 | [![Stargazers over time](https://starchart.cc/gaopu/Java.svg)](https://starchart.cc/gaopu/Java) 3 | 4 | ## 这个仓库内的代码是什么东西 5 | 都是以前上大学时自己写的一些东西,有的为了交作业,有的为了玩😊。 6 | 7 | ## 为什么很多份不相关的代码放在一个代码库 8 | 因为当初还不会合理正确的使用github代码库🙉。 9 | 10 | ## 值不值得star 11 | 不值得(不过也感谢star了的朋友❤️)。 12 | 13 | ## 最后 14 | 欢迎大家访问我的私人博客:[www.geekgao.cn](https://www.geekgao.cn) 15 | 16 | 我的博客搭建在腾讯云,新用户有优惠:[优惠购买腾讯云](https://curl.qcloud.com/JNxboKJ3)(不是新用户用新的账号登陆,用老身份认证也能享受优惠,听说是可这么操作三次),我用的是2核4G5M的配置。 17 | -------------------------------------------------------------------------------- /Struts2FileUpAndDown/src/com/geekgao/file/FileAction.java: -------------------------------------------------------------------------------- 1 | package com.geekgao.file; 2 | 3 | import com.opensymphony.xwork2.ActionSupport; 4 | 5 | import java.io.*; 6 | 7 | public class FileAction extends ActionSupport { 8 | 9 | private File file; 10 | private String fileFileName; 11 | private String fileContentType; 12 | 13 | public File getFile() { 14 | return file; 15 | } 16 | 17 | public void setFile(File file) { 18 | this.file = file; 19 | } 20 | 21 | public String getFileFileName() { 22 | return fileFileName; 23 | } 24 | 25 | public void setFileFileName(String fileFileName) { 26 | this.fileFileName = fileFileName; 27 | } 28 | 29 | public String getFileContentType() { 30 | return fileContentType; 31 | } 32 | 33 | public void setFileContentType(String fileContentType) { 34 | this.fileContentType = fileContentType; 35 | } 36 | 37 | public String upFile() throws IOException { 38 | if (file == null) { 39 | return INPUT; 40 | } 41 | 42 | FileInputStream inFile = new FileInputStream(file); 43 | FileOutputStream outFle = new FileOutputStream(new File("/home/geekgao/" + fileFileName)); 44 | byte[] b = new byte[8192]; 45 | int bLength; 46 | 47 | while (-1 != (bLength = inFile.read(b))) { 48 | outFle.write(b,0,bLength); 49 | } 50 | inFile.close(); 51 | outFle.close(); 52 | return SUCCESS; 53 | } 54 | 55 | public String downFile() { 56 | return SUCCESS; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /Struts2FileUpAndDown/src/struts.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | /{1}File.jsp 13 | 14 | 15 | 16 | /index.jsp 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Struts2FileUpAndDown/web/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | struts2 8 | org.apache.struts2.dispatcher.ng.filter.StrutsPrepareAndExecuteFilter 9 | 10 | 11 | struts2 12 | /* 13 | 14 | -------------------------------------------------------------------------------- /Struts2FileUpAndDown/web/downFile.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: geekgao 4 | Date: 15-7-25 5 | Time: 上午10:29 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Struts2FileUpAndDown/web/index.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: geekgao 4 | Date: 15-7-25 5 | Time: 上午10:13 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 上传文件 15 | 下载文件 16 | 17 | 18 | -------------------------------------------------------------------------------- /Struts2FileUpAndDown/web/upFile.jsp: -------------------------------------------------------------------------------- 1 | <%@ taglib prefix="s" uri="/struts-tags" %> 2 | <%-- 3 | Created by IntelliJ IDEA. 4 | User: geekgao 5 | Date: 15-7-25 6 | Time: 上午10:29 7 | To change this template use File | Settings | File Templates. 8 | --%> 9 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 |
19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /com/crawl/comments/CrawlComments.java: -------------------------------------------------------------------------------- 1 | package com.crawl.comments; 2 | 3 | import org.apache.http.client.ClientProtocolException; 4 | import org.apache.http.client.config.RequestConfig; 5 | import org.apache.http.client.methods.HttpGet; 6 | import org.apache.http.conn.ConnectionPoolTimeoutException; 7 | import org.apache.http.impl.client.CloseableHttpClient; 8 | import org.apache.http.impl.client.HttpClients; 9 | import org.apache.http.util.EntityUtils; 10 | import org.dom4j.Element; 11 | import org.json.JSONArray; 12 | import org.json.JSONObject; 13 | 14 | import java.io.IOException; 15 | import java.net.SocketTimeoutException; 16 | 17 | /** 18 | * Created by geekgao on 15-10-19. 19 | */ 20 | public class CrawlComments implements Runnable { 21 | private Element app; 22 | private int start; 23 | private int count; 24 | private int appId; 25 | 26 | public CrawlComments(Element app, int start, int count, int appId) { 27 | this.app = app; 28 | this.start = start; 29 | this.count = count; 30 | this.appId = appId; 31 | } 32 | 33 | private void setAppXml() throws IOException { 34 | //设置超时 35 | RequestConfig requestConfig = RequestConfig.custom().setConnectionRequestTimeout(2000).setSocketTimeout(6000).setConnectTimeout(2000).build(); 36 | //建立client 37 | CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(requestConfig).build(); 38 | 39 | HttpGet getContentJson = new HttpGet("http://comment.mobilem.360.cn/comment/getComments?baike=" + appId + "&level=0&start=" + start + "&count=" + count + "&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4"); 40 | String contentJson = EntityUtils.toString(client.execute(getContentJson).getEntity()); 41 | 42 | JSONObject jsonObject = new JSONObject(contentJson); 43 | JSONArray contentJsonArray = jsonObject.getJSONObject("data").getJSONArray("messages"); 44 | 45 | for (int i = 0;i < contentJsonArray.length();i++) { 46 | JSONObject messageJsonObject = contentJsonArray.getJSONObject(i); 47 | 48 | String userid = messageJsonObject.getString("username"); 49 | String time = messageJsonObject.getString("create_time"); 50 | String score = String.valueOf(messageJsonObject.getInt("score")); 51 | String review = messageJsonObject.getString("content"); 52 | String agreecount = messageJsonObject.getString("likes"); 53 | 54 | Element comment = app.addElement("comment"); 55 | comment.addElement("userid").setText(userid); 56 | comment.addElement("time").setText(time); 57 | comment.addElement("score").setText(score); 58 | comment.addElement("review").setText(review); 59 | comment.addElement("agreecount").setText(agreecount); 60 | } 61 | client.close(); 62 | } 63 | 64 | public void run() { 65 | try { 66 | setAppXml(); 67 | } catch (ConnectionPoolTimeoutException e) { 68 | System.err.println(appId + "号app从" + start + "开始的评论发生-ConnectionPoolTimeoutException"); 69 | return; 70 | } catch (ClientProtocolException e) { 71 | e.printStackTrace(); 72 | return; 73 | } catch (SocketTimeoutException e) { 74 | System.err.println(appId + "号app从" + start + "开始的评论发生-SocketTimeoutException"); 75 | return; 76 | } catch (IOException e) { 77 | e.printStackTrace(); 78 | return; 79 | } 80 | System.out.println(appId + "号app从" + start + "开始的评论抓取完毕"); 81 | } 82 | } -------------------------------------------------------------------------------- /com/crawl/comments/CrawlUtils.java: -------------------------------------------------------------------------------- 1 | package com.crawl.comments; 2 | 3 | import org.apache.http.client.methods.CloseableHttpResponse; 4 | import org.apache.http.client.methods.HttpGet; 5 | import org.apache.http.impl.client.CloseableHttpClient; 6 | import org.apache.http.impl.client.HttpClients; 7 | import org.apache.http.util.EntityUtils; 8 | import org.dom4j.Element; 9 | import org.dom4j.io.XMLWriter; 10 | import org.json.JSONObject; 11 | 12 | import java.io.FileWriter; 13 | import java.io.IOException; 14 | import java.io.Writer; 15 | import java.util.HashSet; 16 | import java.util.Set; 17 | 18 | /** 19 | * Created by geekgao on 15-10-25. 20 | */ 21 | public class CrawlUtils { 22 | /** 23 | * 24 | * @param id appid 25 | * @return app名字 26 | */ 27 | public static String getAppName(String id) throws IOException { 28 | CloseableHttpClient client = HttpClients.createDefault(); 29 | 30 | HttpGet get = new HttpGet("http://zhushou.360.cn/detail/index/soft_id/" + id); 31 | CloseableHttpResponse response; 32 | try { 33 | response = client.execute(get); 34 | } catch (java.net.UnknownHostException e) { 35 | return null; 36 | } 37 | return EntityUtils.toString(response.getEntity()).split("")[1].split("<")[0]; 38 | } 39 | 40 | /** 41 | * 42 | * @param xml xml文档 43 | * @param fileName 存储到这个地方 44 | */ 45 | public static void writeXmlToFile(Element xml,String fileName) throws IOException { 46 | Writer fileWriter = new FileWriter(fileName); 47 | XMLWriter xmlWriter = new XMLWriter(fileWriter); 48 | xmlWriter.write(xml); 49 | xmlWriter.close(); 50 | } 51 | 52 | /** 53 | * 获取需要下载的app的id 54 | * @param uri app类别页 55 | * @param limit 获取前limit个app的评论 56 | * @return 57 | */ 58 | public static Set<String> getAppIds(String uri,int limit) throws IOException { 59 | /*//因为根据网页源码每个appid会匹配到两次,所以获取limit个就必须获取2*limit次 60 | limit = limit * 2; 61 | Set<String> appIds = null; 62 | 63 | //获取网页源码,得到appid 64 | HttpGet get = new HttpGet(uri); 65 | CloseableHttpResponse response = client.execute(get); 66 | String html = EntityUtils.toString(response.getEntity()); 67 | 68 | Pattern getAppIdRegex = Pattern.compile("(?m)/detail/index/soft_id/(.*?)\""); 69 | Matcher matcher = getAppIdRegex.matcher(html); 70 | 71 | //至少有一个结果才new一个set 72 | if (matcher.find()) { 73 | appIds = new HashSet<String>(); 74 | } else { 75 | return appIds; 76 | } 77 | 78 | //控制获取的appid个数 79 | int count = 0; 80 | //把所有匹配到的appid加入到结果中 81 | do { 82 | if (count < limit) { 83 | appIds.add(matcher.group(1)); 84 | count++; 85 | } 86 | } while (matcher.find()); 87 | 88 | return appIds;*/ 89 | Set<String> s = new HashSet<String>(); 90 | // s.add("3581"); 91 | // s.add("778702"); 92 | // s.add("1586"); 93 | // s.add("6276"); 94 | // s.add("122437"); 95 | // s.add("5632"); 96 | // s.add("4107"); 97 | // s.add("98008"); 98 | // s.add("3100672"); 99 | // s.add("2345172"); 100 | // s.add("1343"); 101 | // s.add("3094256"); 102 | // s.add("101594"); 103 | // s.add("1840672"); 104 | // s.add("1643"); 105 | // s.add("893686"); 106 | // s.add("3032510"); 107 | s.add("1936882"); 108 | // s.add("7256"); 109 | // s.add("727030"); 110 | 111 | return s; 112 | } 113 | 114 | public static int getCommentCount(int appId) throws IOException { 115 | CloseableHttpClient client = HttpClients.createDefault(); 116 | 117 | HttpGet getJson = new HttpGet("http://comment.mobilem.360.cn/comment/getComments?baike=" + appId + "&level=0&start=0&count=1&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4"); 118 | CloseableHttpResponse response = client.execute(getJson); 119 | String json = EntityUtils.toString(response.getEntity()); 120 | JSONObject jsonObject = new JSONObject(json); 121 | 122 | return jsonObject.getJSONObject("data").getInt("total"); 123 | } 124 | } -------------------------------------------------------------------------------- /com/crawl/comments/Main.java: -------------------------------------------------------------------------------- 1 | package com.crawl.comments; 2 | 3 | import org.apache.http.client.config.RequestConfig; 4 | import org.apache.http.client.methods.CloseableHttpResponse; 5 | import org.apache.http.client.methods.HttpGet; 6 | import org.apache.http.impl.client.CloseableHttpClient; 7 | import org.apache.http.impl.client.HttpClients; 8 | import org.apache.http.util.EntityUtils; 9 | import org.dom4j.DocumentHelper; 10 | import org.dom4j.Element; 11 | import org.json.JSONArray; 12 | import org.json.JSONObject; 13 | 14 | import java.io.IOException; 15 | import java.util.Set; 16 | import java.util.concurrent.ExecutorService; 17 | import java.util.concurrent.Executors; 18 | 19 | /** 20 | * Created by geekgao on 15-10-25. 21 | */ 22 | public class Main { 23 | public static void main(String[] args) throws IOException, InterruptedException { 24 | //获取要抓取的app的id 25 | Set<String> appIds = CrawlUtils.getAppIds("",1); 26 | //设置超时 27 | RequestConfig requestConfig = RequestConfig.custom().setConnectionRequestTimeout(2000).setSocketTimeout(2000).setConnectTimeout(2000).build(); 28 | //建立client 29 | CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(requestConfig).build(); 30 | 31 | for (String id:appIds) { 32 | //建立线程池 33 | ExecutorService executorService = Executors.newFixedThreadPool(30); 34 | //建立xml根节点 35 | Element app = DocumentHelper.createDocument().addElement("app"); 36 | 37 | //添加appid节点 38 | String appName = CrawlUtils.getAppName(id); 39 | if (appName == null) { 40 | System.out.println(id + "号app名称评论抓取失败,所以跳过抓取评论"); 41 | continue; 42 | } 43 | 44 | app.addElement("appid").setText(appName); 45 | System.out.println("开始抓取[" + appName + "],id=" + id); 46 | 47 | //获取app评分和各类型的评论数目信息 48 | HttpGet getJson = new HttpGet("http://comment.mobilem.360.cn/comment/getCommentTags?objid=" + id + "&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4"); 49 | CloseableHttpResponse response = client.execute(getJson); 50 | String json = EntityUtils.toString(response.getEntity()); 51 | JSONObject jsonObject = new JSONObject(json); 52 | 53 | //获取分数 54 | double overallrating = (Double.valueOf(jsonObject.getJSONObject("data").getJSONObject("score").getString("score"))) / 10; 55 | JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONArray("tag"); 56 | String good = String.valueOf(jsonArray.getJSONObject(1).get("num")); 57 | String neutral = String.valueOf(jsonArray.getJSONObject(2).get("num")); 58 | String poor = String.valueOf(jsonArray.getJSONObject(3).get("num")); 59 | 60 | app.addElement("overallrating").setText(String.valueOf(overallrating)); 61 | app.addElement("good").setText(String.valueOf(good)); 62 | app.addElement("neutral").setText(neutral); 63 | app.addElement("poor").setText(poor); 64 | 65 | int commentsCount = CrawlUtils.getCommentCount(Integer.valueOf(id)); 66 | System.out.println("[" + appName + "]总共" + commentsCount + "条评论"); 67 | //每次获取的评论个数 68 | int count = 25; 69 | for (int start = 0;start < commentsCount;start += count) { 70 | //如果最后一次不够count个评论 71 | if (start + count > commentsCount) { 72 | count = commentsCount - start; 73 | } 74 | 75 | // System.out.println("从第" + start + "个评论开始抓取"); 76 | executorService.submit(new CrawlComments(app, start, count, Integer.valueOf(id))); 77 | } 78 | 79 | executorService.shutdown(); 80 | while (true) { 81 | if (executorService.isTerminated()) { 82 | break; 83 | } 84 | Thread.sleep(1000); 85 | } 86 | 87 | CrawlUtils.writeXmlToFile(app,"/home/geekgao/comments/" + System.currentTimeMillis() + ".xml"); 88 | } 89 | client.close(); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /medical_question/GetAnswers.java: -------------------------------------------------------------------------------- 1 | import org.dom4j.DocumentHelper; 2 | import org.jsoup.Jsoup; 3 | import org.jsoup.nodes.Document; 4 | import org.jsoup.nodes.Element; 5 | import org.jsoup.select.Elements; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | import java.net.SocketTimeoutException; 10 | import java.util.*; 11 | 12 | public class GetAnswers { 13 | 14 | Map<String,String> categoryLink;//获取八个"类别"的名字和链接 15 | 16 | public static void main(String[] args) { 17 | new GetAnswers().launch(); 18 | } 19 | 20 | public void launch() { 21 | getCategoryLink(); 22 | getAllAnswer(); 23 | } 24 | 25 | private void getAllAnswer() { 26 | Set<String> categoryName = categoryLink.keySet(); 27 | for (String name:categoryName) { 28 | if (name.equals("呼吸内科") || name.equals("内分泌科") || name.equals("肾内科") || name.equals("消化内科") || name.equals("血液科") || name.equals("风湿科")) { 29 | continue; 30 | } 31 | String webAddress = categoryLink.get(name);//“页”的链接 32 | 33 | //链接地址不为空就表明还有下一页(到最后一页后设置为空) 34 | //循环遍历每一页 35 | int pageCount = 1;//记录抓取的页数 36 | int allPageCount = 0;//记录总共需要抓取的网页页数 37 | 38 | try { 39 | Document tempDom = Jsoup.connect(webAddress).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get(); 40 | allPageCount = Integer.parseInt(tempDom.select(".pager-last").attr("href").split("=")[1].split("#")[0])/25 + 1; 41 | } catch (IOException e) { 42 | e.printStackTrace(); 43 | } 44 | 45 | for (int i = 1;i <= allPageCount;i++) { 46 | List<String> answerAddressList = new LinkedList<String>();//先获取一页之内每一个有回答的问题的链接,存储在这个List里 47 | Document dom = null; 48 | try { 49 | dom = Jsoup.connect(webAddress + "?pn=" + (i - 1) * 25 + "#list").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get(); 50 | Elements allQuestion = dom.select(".question-list").select(".question-item"); 51 | for (Element question:allQuestion) { 52 | if (!question.select(".title-line").select(".question-answer-num").text().equals("0回答")) { 53 | answerAddressList.add(question.select(".title-line").select(".title-container").select(".question-title").attr("abs:href")); 54 | } 55 | } 56 | } catch (SocketTimeoutException e) { 57 | System.out.println("连接超时:" + webAddress); 58 | } catch (IOException e) { 59 | e.printStackTrace(); 60 | System.out.println("意外错误:" + webAddress); 61 | } 62 | //上面这部分获取了一页上所有问题的链接 63 | 64 | //下面就进每一个问题的页面,抓取信息放到dom4j的Document中,最后用多线程写入文件 65 | Map<String, org.dom4j.Document> answerDom4jDocument = new HashMap<String, org.dom4j.Document>();//问题题目对应题目的document(题目是文件的文件名) 66 | for (String answerAddress:answerAddressList) { 67 | //System.out.println("正在抓[" + name + "]类别下的" + answerAddress); 68 | try { 69 | Document answerDom = Jsoup.connect(answerAddress).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get(); 70 | String type = name; 71 | String title = answerDom.title(); 72 | String answerNum = answerDom.select("#wgt-answers").select(".hd").select("h2").text().split("条")[0]; 73 | 74 | org.dom4j.Document document = DocumentHelper.createDocument();//这个问题的dom树 75 | answerDom4jDocument.put(title,document); 76 | org.dom4j.Element root = document.addElement("root"); 77 | root.addElement("question").addText(title).addAttribute("type",name); 78 | org.dom4j.Element answers = root.addElement("answers").addAttribute("number",answerNum); 79 | 80 | //答案有三种【题主选择的最佳答案,网友选择的最佳答案,其他答案】 81 | 82 | Elements allAnswer = answerDom.select(".bd.answer"); 83 | //分别处理每一页的所有答案 84 | for (Element answer:allAnswer) { 85 | String text = answer.select(".answer-text").text(); 86 | String username = answer.select(".line.info.f-aid").select(".user-name").text(); 87 | if (username.equals("")) { 88 | username = answer.select(".line.info.f-aid").select(".mavin-name").text(); 89 | } 90 | String grade = answer.select(".line.info.f-aid").text().split(" ")[answer.select(".line.info.f-aid").text().split(" ").length - 1]; 91 | if (grade.equals("最快回答")) { 92 | grade = answer.select(".line.info.f-aid").text().split(" ")[answer.select(".line.info.f-aid").text().split(" ").length - 2]; 93 | } 94 | 95 | //不是“*级”那就是一个专家的称号 96 | String author = "null"; 97 | if (!grade.contains("级")) { 98 | author = grade; 99 | grade = answer.select(".line.info.f-aid").select(".f-orange.f-yahei.ml-5").select("span").text(); 100 | } 101 | 102 | String support = answer.select(".line.content").select(".grid-r.f-aid").select(".evaluate").attr("data-evaluate"); 103 | String unsupport = answer.select(".line.content").select(".grid-r.f-aid").select(".evaluate.evaluate-bad").attr("data-evaluate"); 104 | 105 | org.dom4j.Element ans = answers.addElement("answer").addAttribute("username",username).addAttribute("grade",grade).addAttribute("author",author); 106 | ans.addElement("text").addText(text); 107 | ans.addElement("support").addText(support); 108 | ans.addElement("unsupport").addText(unsupport); 109 | 110 | if (answer.hasClass("wgt-replyer-best")) { 111 | ans.addElement("best_answer").addText("yes"); 112 | } else { 113 | ans.addElement("best_answer").addText("no"); 114 | } 115 | } 116 | 117 | 118 | } catch (SocketTimeoutException e) { 119 | System.out.println("连接超时:" + answerAddress); 120 | } catch (IOException e) { 121 | e.printStackTrace(); 122 | System.out.println("意外错误:" + answerAddress); 123 | } 124 | } 125 | 126 | Set<String> answerTitle = answerDom4jDocument.keySet(); 127 | //多线程写入文件 128 | for (String title:answerTitle) { 129 | //分类目录不存在时创建文件夹 130 | if (!new File("/home/geekgao/medical_question/" + name).exists()) { 131 | new File("/home/geekgao/medical_question/" + name).mkdir(); 132 | } 133 | 134 | new WriteAnswersToFile("/home/geekgao/medical_question/" + name + "/" + System.currentTimeMillis() + ".xml",answerDom4jDocument.get(title)).start(); 135 | } 136 | 137 | System.out.println("[" + name + "]类别第" + pageCount++ +"页已写入文件."); 138 | 139 | } 140 | } 141 | } 142 | 143 | public GetAnswers() { 144 | categoryLink = new HashMap<String, String>(); 145 | } 146 | 147 | private void getCategoryLink() { 148 | try { 149 | Document dom = Jsoup.connect("http://zhidao.baidu.com/browse/790").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 QIHU 360EE").timeout(5000).get(); 150 | Element categorys = dom.select(".category-list").first(); 151 | Elements allLi = categorys.select("li"); 152 | 153 | for (Element li:allLi) { 154 | categoryLink.put(li.text(), "http://zhidao.baidu.com" + li.select("a").attr("href")); 155 | } 156 | 157 | } catch (SocketTimeoutException e) { 158 | System.out.println("连接超时:http://zhidao.baidu.com/browse/790"); 159 | } catch (IOException e) { 160 | e.printStackTrace(); 161 | System.out.println("连接超时:http://zhidao.baidu.com/browse/790"); 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /medical_question/WriteAnswersToFile.java: -------------------------------------------------------------------------------- 1 | import org.dom4j.Document; 2 | import org.dom4j.io.OutputFormat; 3 | import org.dom4j.io.XMLWriter; 4 | 5 | import java.io.File; 6 | import java.io.FileWriter; 7 | import java.io.IOException; 8 | 9 | 10 | public class WriteAnswersToFile extends Thread { 11 | 12 | private String address;//输出文件到哪个地址 13 | private Document dom;//将这个dom放入文件里存储 14 | 15 | /** 16 | * 17 | * @param address 文件将存储到这个地址 18 | * @param dom asd 即将存储到硬盘的xml文件 19 | */ 20 | public WriteAnswersToFile(String address,Document dom) { 21 | this.address = address; 22 | this.dom = dom; 23 | } 24 | 25 | public void run() { 26 | OutputFormat outFormat = OutputFormat.createPrettyPrint(); 27 | outFormat.setEncoding("UTF-8"); 28 | 29 | try { 30 | XMLWriter xml = new XMLWriter(new FileWriter(new File(address)),outFormat); 31 | xml.write(dom); 32 | xml.close(); 33 | } catch (IOException e) { 34 | e.printStackTrace(); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /文本情感分析/CalcWeightAndDoc.java: -------------------------------------------------------------------------------- 1 | package org.geekgao.one; 2 | 3 | import org.dom4j.Document; 4 | import org.dom4j.DocumentException; 5 | import org.dom4j.DocumentHelper; 6 | import org.dom4j.Element; 7 | import org.dom4j.io.OutputFormat; 8 | import org.dom4j.io.SAXReader; 9 | import org.dom4j.io.XMLWriter; 10 | 11 | import java.io.*; 12 | import java.util.*; 13 | 14 | public class CalcWeightAndDoc { 15 | //这三个常量是训练文章的存储的地方 16 | private final String positiveArticlePath = "/home/geekgao/practice/positive"; 17 | private final String negativeArticlePath = "/home/geekgao/practice/negative"; 18 | private final String unsureArticlePath = "/home/geekgao/practice/unsure"; 19 | 20 | //这两个是词典的位置 21 | private final String posiDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-positive.txt"; 22 | private final String negaDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-negative.txt"; 23 | 24 | private Map<String,Integer> positiveWord;//存储积极词汇的map 25 | private Map<String,Integer> negativeWord;//存储消极词汇的map 26 | private Map<String,Integer> unsureWord;//存储不确定词汇的map 27 | 28 | //这两个存储词典中的词语 29 | private Set<String> positiveDict; 30 | private Set<String> negativeDict; 31 | 32 | //需要的全局变量 33 | private boolean isGroup = false; 34 | String strTemp;//从xml文件解析词语时用到的临时变量 35 | 36 | public static void main(String[] args) { 37 | new CalcWeightAndDoc().launch(); 38 | } 39 | 40 | public void launch() { 41 | positiveDict = new HashSet<String>(); 42 | negativeDict = new HashSet<String>(); 43 | 44 | readEmotionWord(positiveDict,posiDictPath); 45 | readEmotionWord(negativeDict,negaDictPath); 46 | 47 | //这里两个地址是目标地址,生成的文件就在下面两个地址里 48 | calcDoc("/home/geekgao/doc.xml"); 49 | calcWeight("/home/geekgao/weight.xml"); 50 | 51 | System.out.println("执行完毕!"); 52 | } 53 | 54 | public void readEmotionWord(Set<String> Dict, String dictPath) { 55 | File file = new File(dictPath); 56 | BufferedReader reader = null; 57 | try { 58 | String t; 59 | reader = new BufferedReader(new FileReader(file)); 60 | while ((t = reader.readLine()) != null) { 61 | Dict.add(t); 62 | } 63 | } catch (IOException e) { 64 | e.printStackTrace(); 65 | } finally { 66 | if (reader != null) { 67 | try { 68 | reader.close(); 69 | } catch (IOException e) { 70 | 71 | } 72 | } 73 | } 74 | } 75 | 76 | //参数是生成的xml文件的路径与名字 77 | public void calcDoc(String resultPath) { 78 | File negative[] = new File(negativeArticlePath).listFiles(); 79 | File positive[] = new File(positiveArticlePath).listFiles(); 80 | File unsure[] = new File(unsureArticlePath).listFiles(); 81 | double negCount = 0; 82 | double posCount = 0; 83 | double unsCount = 0; 84 | 85 | try { 86 | for (File file : negative) { 87 | if (file.isFile()) { 88 | negCount++; 89 | } 90 | } 91 | 92 | for (File file : positive) { 93 | if (file.isFile()) { 94 | posCount++; 95 | } 96 | } 97 | 98 | for (File file : unsure) { 99 | if (file.isFile()) { 100 | unsCount++; 101 | } 102 | } 103 | } catch(NullPointerException e){ 104 | System.out.println("程序因为空引用结束!"); 105 | System.exit(1); 106 | } 107 | 108 | //建立document对象 109 | try { 110 | Document document = DocumentHelper.createDocument(); 111 | 112 | Element root = document.addElement("root");//添加文档根 113 | Element request = root.addElement("prior"); //添加root的子节点 114 | request.addAttribute("pNegative", String.valueOf(negCount/(negCount + posCount + unsCount))); 115 | request.addAttribute("pPositive", String.valueOf(posCount/(negCount + posCount + unsCount))); 116 | request.addAttribute("pUnsure", String.valueOf(unsCount/(negCount + posCount + unsCount))); 117 | 118 | OutputFormat format = OutputFormat.createPrettyPrint(); 119 | format.setEncoding("UTF-8");//根据需要设置编码 120 | // 输出全部原始数据,并用它生成新的我们需要的XML文件 121 | XMLWriter writer2 = new XMLWriter(new FileWriter(new File(resultPath)), format); 122 | writer2.write(document); //输出到文件 123 | writer2.close(); 124 | } catch (UnsupportedEncodingException e) { 125 | e.printStackTrace(); 126 | } catch (IOException e) { 127 | e.printStackTrace(); 128 | } 129 | } 130 | 131 | //参数是生成的xml文件的路径与名字 132 | public void calcWeight(String resultPath) { 133 | positiveWord = new HashMap<String, Integer>(); 134 | negativeWord = new HashMap<String, Integer>(); 135 | unsureWord = new HashMap<String, Integer>(); 136 | 137 | //计算各自类别所有文章中每个词汇出现的次数 138 | getWordMap(positiveWord,positiveArticlePath); 139 | getWordMap(negativeWord,negativeArticlePath); 140 | getWordMap(unsureWord,unsureArticlePath); 141 | 142 | //存储计算后验概率公式中的分母的第一部分,第二部分等于1 143 | double allPosWeight = 0; 144 | double allNegWeight = 0; 145 | double allUnsWeight = 0; 146 | 147 | //保留各个Map的情感词汇 148 | keepEmotionWord(positiveWord); 149 | keepEmotionWord(negativeWord); 150 | keepEmotionWord(unsureWord); 151 | 152 | /*System.out.println(positiveWord); 153 | System.out.println(negativeWord); 154 | System.out.println(unsureWord);*/ 155 | 156 | /* 157 | (1)遍历positiveWord这个Map,得到里面的各个词语在积极词汇中的次数,再在其他两个Map中查看是否有这个词语,有,就把其他的那个 158 | 次数加到当前Map的当前词语的value上,并且删除那个Map中的当前词语;没有这个词的话,那么在那个;类别中出现的次数就是0. 159 | (2)遍历negativeWord,不用看positiveWord了,只需看unsureWord,处理方法同上。 160 | (3)遍历unsureWord,这些词在其他两个类别中都是0,直接得到在当前类别中的值 161 | */ 162 | 163 | try { 164 | Document xmlFile = DocumentHelper.createDocument();//建立一个xml文档 165 | Element root = xmlFile.addElement("root"); 166 | 167 | Set<String> word = positiveWord.keySet(); 168 | for (Iterator it = word.iterator();it.hasNext();) { 169 | String tmp = (String)it.next(); 170 | Integer count = positiveWord.get(tmp); 171 | allPosWeight += count; 172 | } 173 | 174 | word = negativeWord.keySet(); 175 | for (Iterator it = word.iterator();it.hasNext();) { 176 | String tmp = (String)it.next(); 177 | Integer count = negativeWord.get(tmp); 178 | allNegWeight += count; 179 | } 180 | 181 | word = unsureWord.keySet(); 182 | for (Iterator it = word.iterator();it.hasNext();) { 183 | String tmp = (String)it.next(); 184 | Integer count = unsureWord.get(tmp); 185 | allUnsWeight += count; 186 | } 187 | 188 | word = positiveWord.keySet(); 189 | for (Iterator it = word.iterator(); it.hasNext(); ) { 190 | Element wd = root.addElement("word");//建立新的词语节点 191 | String tmp = (String) it.next(); 192 | wd.addAttribute("data",tmp); 193 | Integer count; 194 | 195 | count = positiveWord.get(tmp); 196 | wd.addAttribute("pPositive",String.valueOf(count / (allPosWeight + 1))); 197 | 198 | if (negativeWord.containsKey(tmp)) { 199 | count = negativeWord.get(tmp); 200 | negativeWord.remove(tmp); 201 | wd.addAttribute("pNegative",String .valueOf(count / (allNegWeight + 1))); 202 | } else { 203 | wd.addAttribute("pNegative","0"); 204 | } 205 | 206 | if (unsureWord.containsKey(tmp)) { 207 | count = unsureWord.get(tmp); 208 | unsureWord.remove(tmp); 209 | wd.addAttribute("pUnsure",String.valueOf(count / (allUnsWeight + 1))); 210 | } else { 211 | wd.addAttribute("pUnsure","0"); 212 | } 213 | } 214 | 215 | word = negativeWord.keySet(); 216 | for (Iterator it = word.iterator(); it.hasNext(); ) { 217 | Element wd = root.addElement("word");//建立新的词语节点 218 | String tmp = (String) it.next(); 219 | wd.addAttribute("data",tmp); 220 | Integer count; 221 | 222 | wd.addAttribute("pPositive","0"); 223 | count = negativeWord.get(tmp); 224 | wd.addAttribute("pNegative",String .valueOf(count / (allNegWeight + 1))); 225 | 226 | if (unsureWord.containsKey(tmp)) { 227 | count = unsureWord.get(tmp); 228 | unsureWord.remove(tmp); 229 | wd.addAttribute("pUnsure",String.valueOf(count / (allUnsWeight + 1))); 230 | } else { 231 | wd.addAttribute("pUnsure","0"); 232 | } 233 | } 234 | 235 | word = unsureWord.keySet(); 236 | for (Iterator it = word.iterator(); it.hasNext(); ) { 237 | Element wd = root.addElement("word");//建立新的词语节点 238 | String tmp = (String) it.next(); 239 | wd.addAttribute("data",tmp); 240 | Integer count; 241 | 242 | wd.addAttribute("pPositive","0"); 243 | wd.addAttribute("pNegative","0"); 244 | count = unsureWord.get(tmp); 245 | wd.addAttribute("pUnsure",String.valueOf(count / (allUnsWeight + 1))); 246 | } 247 | 248 | //输出全部原始数据,在编译器中显示 249 | OutputFormat format = OutputFormat.createPrettyPrint(); 250 | format.setEncoding("UTF-8");//根据需要设置编码 251 | // 输出全部原始数据,并用它生成新的我们需要的XML文件 252 | XMLWriter writer2 = new XMLWriter(new FileWriter(new File(resultPath)), format); 253 | writer2.write(xmlFile); //输出到文件 254 | writer2.close(); 255 | } catch (UnsupportedEncodingException e) { 256 | e.printStackTrace(); 257 | } catch (IOException e) { 258 | e.printStackTrace(); 259 | } 260 | } 261 | 262 | public void getWordMap(Map<String,Integer> wordMap,String articlePath) { 263 | File articleArray[] = new File(articlePath).listFiles();//将文件夹中的文件都读取进来,下面就一个个的分析 264 | 265 | for (int i = 0;i < articleArray.length;i++) { 266 | calcFreauency(wordMap,articleArray[i]); 267 | } 268 | } 269 | 270 | //解析出文章中的词语,并且映射上频数 271 | public void calcFreauency(Map<String,Integer> wordMap,File article) { 272 | try { 273 | //取得dom4j的解析器 274 | SAXReader reader = new SAXReader(); 275 | //取得代表文档的Document对象 276 | Document document = reader.read(article); 277 | //取得根结点 278 | Element root = document.getRootElement();//取得根节点<document> 279 | 280 | List<?> list1 = root.elements();//取得<document>的子节点 281 | List<?> sentence_list = ((Element)list1.get(0)).elements();//<content>下的<sentence>集合 282 | 283 | List<?> tok_list;//<sentence>下的<tok>集合 284 | //Dom4jDemo t = new Dom4jDemo(); 285 | //遍历<sentence>节点 286 | for (int i = 0; i < sentence_list.size(); i++) { 287 | tok_list = ((Element)sentence_list.get(i)).elements();//获得每个sentence的tok集合 288 | for (int j = 0;j < tok_list.size();j++) { 289 | setWordMap((Element)tok_list.get(j),wordMap); 290 | } 291 | } 292 | } catch (DocumentException e) { 293 | e.printStackTrace(); 294 | } 295 | } 296 | 297 | public void setWordMap(Element tok,Map<String,Integer> wordMap) { 298 | String type,text; 299 | List<?> list; 300 | 301 | if (!(tok.getName().equals("tok"))) {//如果不是tok节点,那么就不用处理了 302 | return ; 303 | } 304 | //获取属性type 305 | type = tok.attributeValue("type"); 306 | //只访问原子节点 307 | if (type.equals("atom") && isGroup) { 308 | text = tok.getText(); 309 | text = text.replace("\t", ""); 310 | text = text.replace("\n", ""); 311 | /*System.out.print(text + " ");*/ 312 | strTemp = strTemp + text; 313 | } else if (type.equals("group")) { 314 | isGroup = true; 315 | strTemp = ""; 316 | list = tok.elements(); 317 | for (int k = 0,size3 = list.size();k < size3;k++) { 318 | tok = (Element)list.get(k); 319 | setWordMap(tok,wordMap); 320 | } 321 | Integer count = wordMap.get(strTemp);//计算当前map里面的当前text对应的次数 322 | wordMap.put(strTemp,count == null?1:count + 1); 323 | isGroup = false; 324 | } 325 | } 326 | 327 | public void keepEmotionWord(Map<String,Integer> wordMap) { 328 | Set<String> word = wordMap.keySet(); 329 | 330 | for (Iterator it = word.iterator();it.hasNext();) { 331 | String tmp = (String)it.next(); 332 | //两个情感词典都不包含这个词语,那么就把这个词语去掉 333 | if (!positiveDict.contains(tmp) && !negativeDict.contains(tmp)) { 334 | it.remove(); 335 | } 336 | } 337 | } 338 | } 339 | -------------------------------------------------------------------------------- /文本情感分析/EmotionJudge.java: -------------------------------------------------------------------------------- 1 | package org.geekgao.one; 2 | 3 | import org.dom4j.Document; 4 | import org.dom4j.DocumentException; 5 | import org.dom4j.Element; 6 | import org.dom4j.io.SAXReader; 7 | 8 | import java.io.BufferedReader; 9 | import java.io.File; 10 | import java.io.FileReader; 11 | import java.io.IOException; 12 | import java.util.*; 13 | 14 | public class EmotionJudge { 15 | private double priorPositive;//积极先验概率 16 | private double priorNegative;//消极先验概率 17 | private double priorUnsure;//不确定先验概率 18 | 19 | private Map<String,Double> backPositive;//词语的后验概率 20 | private Map<String,Double> backNegative;//同上 21 | private Map<String,Double> backUnsure;//同上 22 | 23 | private boolean isGroup = false; 24 | private String strTemp; 25 | private Map<String,Integer> articleWordMap; 26 | 27 | //这两个是词典的位置 28 | private final String posiDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-positive.txt"; 29 | private final String negaDictPath = "/home/geekgao/朴素贝叶斯/台湾大学情感词典/ntusd-negative.txt"; 30 | 31 | //这两个存储词典中的词语 32 | private Set<String> positiveDict; 33 | private Set<String> negativeDict; 34 | 35 | public static void main(String [] args) { 36 | new EmotionJudge().launch(); 37 | } 38 | 39 | public void launch() { 40 | getPrior(); 41 | getBack(); 42 | 43 | positiveDict = new HashSet<String>(); 44 | negativeDict = new HashSet<String>(); 45 | readEmotionWord(positiveDict, posiDictPath); 46 | readEmotionWord(negativeDict, negaDictPath); 47 | calc(); 48 | } 49 | 50 | //获得先验概率 51 | public void getPrior() { 52 | SAXReader sax = new SAXReader(); 53 | try { 54 | //从这读取doc的值 55 | Document document = sax.read(new File("/home/geekgao/doc.xml")); 56 | Element root = document.getRootElement(); 57 | List<Element> prior = root.elements(); 58 | 59 | priorPositive = Double.valueOf(prior.get(0).attributeValue("pPositive")); 60 | priorNegative = Double.valueOf(prior.get(0).attributeValue("pNegative")); 61 | priorUnsure = Double.valueOf(prior.get(0).attributeValue("pUnsure")); 62 | 63 | } catch (DocumentException e) { 64 | e.printStackTrace(); 65 | } 66 | } 67 | 68 | //获得后验概率 69 | public void getBack() { 70 | SAXReader sax = new SAXReader(); 71 | try { 72 | //从这读取weight的值 73 | Document document = sax.read(new File("/home/geekgao/weight.xml")); 74 | Element root = document.getRootElement(); 75 | List<Element> back = root.elements(); 76 | 77 | backNegative = new HashMap<String, Double>(); 78 | backPositive = new HashMap<String, Double>(); 79 | backUnsure = new HashMap<String, Double>(); 80 | 81 | double backPos;//积极后验概率 82 | double backNeg;//消极后验概率 83 | double backUns;//不确定后验概率 84 | String word; 85 | 86 | for (int i = 0;i < back.size();i++) { 87 | backPos = Double.valueOf(back.get(i).attributeValue("pPositive")); 88 | backNeg = Double.valueOf(back.get(i).attributeValue("pNegative")); 89 | backUns = Double.valueOf(back.get(i).attributeValue("pUnsure")); 90 | word = back.get(i).attributeValue("data"); 91 | 92 | backPositive.put(word,backPos); 93 | backNegative.put(word,backNeg); 94 | backUnsure.put(word,backUns); 95 | } 96 | } catch (DocumentException e) { 97 | e.printStackTrace(); 98 | } 99 | } 100 | 101 | public void calc() { 102 | articleWordMap = new HashMap<String, Integer>(); 103 | 104 | //读取文章 105 | calcFreauency(articleWordMap,new File("/home/geekgao/朴素贝叶斯/500trainblogxml/positiveout/1377331000713.txt")); 106 | keepEmotionWord(articleWordMap); 107 | 108 | double allBackPos = 1; 109 | double allBackNeg = 1; 110 | double allBackUns = 1; 111 | 112 | Set<String> word = articleWordMap.keySet(); 113 | 114 | for (Iterator it = word.iterator();it.hasNext();) { 115 | String tmp = (String)it.next(); 116 | double back; 117 | if (backPositive.containsKey(tmp)) { 118 | back = backPositive.get(tmp); 119 | allBackPos = Math.pow(back,articleWordMap.get(tmp)) * allBackPos; 120 | } 121 | 122 | if (backNegative.containsKey(tmp)) { 123 | back = backNegative.get(tmp); 124 | allBackNeg= Math.pow(back,articleWordMap.get(tmp)) * allBackNeg; 125 | } 126 | 127 | if (backUnsure.containsKey(tmp)) { 128 | back = backUnsure.get(tmp); 129 | allBackUns = Math.pow(back,articleWordMap.get(tmp)) * allBackUns; 130 | } 131 | } 132 | 133 | double resultPositive; 134 | double resultNegative; 135 | double resultUnsure; 136 | 137 | resultPositive = priorPositive * allBackPos; 138 | resultNegative = priorNegative * allBackNeg; 139 | resultUnsure = priorUnsure * allBackUns; 140 | 141 | System.out.println("积极:" + resultPositive); 142 | System.out.println("消极:" + resultNegative); 143 | System.out.println("不确定:" + resultUnsure); 144 | } 145 | 146 | //解析出文章中的词语,并且映射上频数 147 | public void calcFreauency(Map<String,Integer> wordMap,File article) { 148 | try { 149 | //取得dom4j的解析器 150 | SAXReader reader = new SAXReader(); 151 | //取得代表文档的Document对象 152 | Document document = reader.read(article); 153 | //取得根结点 154 | Element root = document.getRootElement();//取得根节点<document> 155 | 156 | List<?> list1 = root.elements();//取得<document>的子节点 157 | List<?> sentence_list = ((Element)list1.get(0)).elements();//<content>下的<sentence>集合 158 | 159 | List<?> tok_list;//<sentence>下的<tok>集合 160 | //遍历<sentence>节点 161 | for (int i = 0; i < sentence_list.size(); i++) { 162 | tok_list = ((Element)sentence_list.get(i)).elements();//获得每个sentence的tok集合 163 | for (int j = 0;j < tok_list.size();j++) { 164 | setWordMap((Element)tok_list.get(j),wordMap); 165 | } 166 | } 167 | } catch (DocumentException e) { 168 | e.printStackTrace(); 169 | } 170 | } 171 | 172 | public void setWordMap(Element tok,Map<String,Integer> wordMap) { 173 | String type,text; 174 | List<?> list; 175 | 176 | if (!(tok.getName().equals("tok"))) {//如果不是tok节点,那么就不用处理了 177 | return ; 178 | } 179 | //获取属性type 180 | type = tok.attributeValue("type"); 181 | //只访问原子节点 182 | if (type.equals("atom") && isGroup) { 183 | text = tok.getText(); 184 | text = text.replace("\t", ""); 185 | text = text.replace("\n", ""); 186 | /*System.out.print(text + " ");*/ 187 | strTemp = strTemp + text; 188 | } else if (type.equals("group")) { 189 | isGroup = true; 190 | strTemp = ""; 191 | list = tok.elements(); 192 | for (int k = 0,size3 = list.size();k < size3;k++) { 193 | tok = (Element)list.get(k); 194 | setWordMap(tok,wordMap); 195 | } 196 | Integer count = wordMap.get(strTemp);//计算当前map里面的当前text对应的次数 197 | wordMap.put(strTemp,count == null?1:count + 1); 198 | isGroup = false; 199 | } 200 | } 201 | 202 | public void keepEmotionWord(Map<String,Integer> wordMap) { 203 | Set<String> word = wordMap.keySet(); 204 | 205 | for (Iterator it = word.iterator();it.hasNext();) { 206 | String tmp = (String)it.next(); 207 | //两个情感词典都不包含这个词语,那么就把这个词语去掉 208 | if (!positiveDict.contains(tmp) && !negativeDict.contains(tmp)) { 209 | it.remove(); 210 | } 211 | } 212 | } 213 | 214 | public void readEmotionWord(Set<String> Dict, String dictPath) { 215 | File file = new File(dictPath); 216 | BufferedReader reader = null; 217 | try { 218 | String t; 219 | reader = new BufferedReader(new FileReader(file)); 220 | while ((t = reader.readLine()) != null) { 221 | Dict.add(t); 222 | } 223 | } catch (IOException e) { 224 | e.printStackTrace(); 225 | } finally { 226 | if (reader != null) { 227 | try { 228 | reader.close(); 229 | } catch (IOException e) { 230 | 231 | } 232 | } 233 | } 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /西邮导游系统源码/数据/data.txt: -------------------------------------------------------------------------------- 1 | 1 114 486 2 45 2 | 2 114 441 1 45 3 39 4 71 3 | 3 153 441 2 39 4 | 4 114 370 2 71 5 38 5 | 5 76 370 4 38 6 56 6 | 6 76 314 5 56 7 13 9 73 7 | 7 63 314 6 13 8 | 8 63 241 9 13 9 | 9 76 241 6 73 8 13 10 26 68 13 10 | 10 76 215 9 26 11 70 11 | 11 146 215 10 70 12 36 12 | 12 172 241 11 36 13 43 13 | 13 211 222 64 58 34 32 12 43 14 | 14 211 109 64 55 15 55 15 | 15 266 109 16 45 19 56 14 55 16 | 16 311 109 17 61 20 56 15 45 17 | 17 372 109 16 61 21 56 26 44 18 | 18 224 53 19 42 19 | 19 266 53 18 42 20 45 15 56 20 | 20 311 53 16 56 19 45 21 61 21 | 21 372 53 17 56 20 61 22 37 22 | 22 409 53 21 37 23 36 23 | 23 445 53 22 36 24 32 24 | 24 445 85 23 32 25 91 27 24 25 | 25 536 85 24 91 26 | 26 416 109 17 44 27 29 28 19 27 | 27 445 109 24 24 26 29 29 19 28 | 28 416 128 33 47 26 19 29 29 29 | 29 445 128 27 19 28 29 30 55 30 | 30 500 128 29 55 31 47 31 | 31 500 175 32 28 38 47 30 47 32 | 32 528 175 31 28 33 | 33 416 175 67 50 36 47 28 47 34 | 34 243 222 35 65 40 106 13 32 35 | 35 308 222 34 65 66 32 36 108 41 93 36 | 36 416 222 33 47 35 108 37 67 43 93 37 | 37 483 222 36 67 38 17 44 62 38 | 38 500 222 37 17 39 25 31 47 39 | 39 525 222 38 25 40 | 40 205 321 34 106 53 128 41 | 41 308 315 48 46 35 93 42 49 42 | 42 357 315 49 27 41 49 43 59 43 | 43 416 315 36 93 42 59 45 26 44 | 44 458 279 37 62 45 39 45 | 45 442 315 43 26 44 39 46 30 46 | 46 428 342 49 71 45 30 47 21 47 | 47 419 361 50 62 52 27 46 21 48 | 48 308 361 50 49 41 46 49 | 49 357 342 50 19 42 27 46 71 50 | 50 357 361 48 49 49 19 51 25 47 62 51 | 51 357 386 50 25 52 51 54 44 52 | 52 408 386 51 51 55 48 56 93 47 27 53 | 53 267 433 40 128 57 67 54 | 54 357 430 51 44 55 31 57 34 55 | 55 388 430 52 48 54 31 58 36 56 | 56 492 428 52 93 63 71 57 | 57 331 453 53 67 54 34 59 38 58 | 58 402 464 55 36 59 46 62 69 59 | 59 358 480 57 38 58 46 60 27 60 | 60 358 507 59 27 61 67 61 | 61 291 507 60 67 62 | 62 447 517 58 69 63 80 63 | 63 523 492 56 71 62 80 64 | 64 211 164 65 34 13 58 14 55 65 | 65 245 164 64 34 66 | 66 308 190 35 32 67 | 67 366 175 33 50 68 | 68 89 241 9 13 -------------------------------------------------------------------------------- /西邮导游系统源码/数据/view.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/数据/view.txt -------------------------------------------------------------------------------- /西邮导游系统源码/源码/images/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/images/icon.png -------------------------------------------------------------------------------- /西邮导游系统源码/源码/images/map.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/images/map.jpg -------------------------------------------------------------------------------- /西邮导游系统源码/源码/org/geekgao/guide/GuideAlgorithm.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/GuideAlgorithm.java -------------------------------------------------------------------------------- /西邮导游系统源码/源码/org/geekgao/guide/GuideSystem.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/GuideSystem.java -------------------------------------------------------------------------------- /西邮导游系统源码/源码/org/geekgao/guide/GuideUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/GuideUtil.java -------------------------------------------------------------------------------- /西邮导游系统源码/源码/org/geekgao/guide/Vertex.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elenakozlova28/Java1/40f269fbe8ca23fbb07ac642cd8cad9410cdd08d/西邮导游系统源码/源码/org/geekgao/guide/Vertex.java --------------------------------------------------------------------------------