├── .gitattributes ├── .gitignore ├── .idea ├── $CACHE_FILE$ ├── .gitignore ├── compiler.xml ├── encodings.xml ├── inspectionProfiles │ └── Project_Default.xml ├── libraries │ ├── Maven__antlr_antlr_2_7_7.xml │ ├── Maven__ch_qos_logback_logback_classic_1_2_3.xml │ ├── Maven__ch_qos_logback_logback_core_1_2_3.xml │ ├── Maven__com_alibaba_fastjson_1_2_47.xml │ ├── Maven__com_fasterxml_classmate_1_3_4.xml │ ├── Maven__com_fasterxml_jackson_core_jackson_annotations_2_9_0.xml │ ├── Maven__com_fasterxml_jackson_core_jackson_core_2_9_6.xml │ ├── Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml │ ├── Maven__com_fasterxml_jackson_datatype_jackson_datatype_jdk8_2_9_6.xml │ ├── Maven__com_fasterxml_jackson_datatype_jackson_datatype_jsr310_2_9_6.xml │ ├── Maven__com_fasterxml_jackson_module_jackson_module_parameter_names_2_9_6.xml │ ├── Maven__com_google_protobuf_protobuf_java_2_6_0.xml │ ├── Maven__com_jayway_jsonpath_json_path_2_4_0.xml │ ├── Maven__com_vaadin_external_google_android_json_0_0_20131108_vaadin1.xml │ ├── Maven__com_zaxxer_HikariCP_2_7_9.xml │ ├── Maven__commons_codec_commons_codec_1_11.xml │ ├── Maven__dom4j_dom4j_1_6_1.xml │ ├── Maven__io_lettuce_lettuce_core_5_1_0_M1.xml │ ├── Maven__io_netty_netty_buffer_4_1_27_Final.xml │ ├── Maven__io_netty_netty_codec_4_1_27_Final.xml │ ├── Maven__io_netty_netty_common_4_1_27_Final.xml │ ├── Maven__io_netty_netty_handler_4_1_27_Final.xml │ ├── Maven__io_netty_netty_resolver_4_1_27_Final.xml │ ├── Maven__io_netty_netty_transport_4_1_27_Final.xml │ ├── Maven__io_projectreactor_reactor_core_3_1_8_RELEASE.xml │ ├── Maven__javax_annotation_javax_annotation_api_1_3_2.xml │ ├── Maven__javax_transaction_javax_transaction_api_1_2.xml │ ├── Maven__javax_validation_validation_api_2_0_1_Final.xml │ ├── Maven__junit_junit_4_12.xml │ ├── Maven__mysql_mysql_connector_java_8_0_11.xml │ ├── Maven__net_bytebuddy_byte_buddy_1_7_11.xml │ ├── Maven__net_bytebuddy_byte_buddy_agent_1_7_11.xml │ ├── Maven__net_minidev_accessors_smart_1_2.xml │ ├── Maven__net_minidev_json_smart_2_3.xml │ ├── Maven__org_apache_commons_commons_lang3_3_10.xml │ ├── Maven__org_apache_commons_commons_pool2_2_5_0.xml │ ├── Maven__org_apache_httpcomponents_httpclient_4_5_2.xml │ ├── Maven__org_apache_httpcomponents_httpcore_4_4_10.xml │ ├── Maven__org_apache_logging_log4j_log4j_api_2_10_0.xml │ ├── Maven__org_apache_logging_log4j_log4j_to_slf4j_2_10_0.xml │ ├── Maven__org_apache_tomcat_embed_tomcat_embed_core_8_5_32.xml │ ├── Maven__org_apache_tomcat_embed_tomcat_embed_el_8_5_32.xml │ ├── Maven__org_apache_tomcat_embed_tomcat_embed_websocket_8_5_32.xml │ ├── Maven__org_aspectj_aspectjweaver_1_8_13.xml │ ├── Maven__org_assertj_assertj_core_3_9_1.xml │ ├── Maven__org_attoparser_attoparser_2_0_4_RELEASE.xml │ ├── Maven__org_hamcrest_hamcrest_core_1_3.xml │ ├── Maven__org_hamcrest_hamcrest_library_1_3.xml │ ├── Maven__org_hibernate_common_hibernate_commons_annotations_5_0_1_Final.xml │ ├── Maven__org_hibernate_hibernate_core_5_2_17_Final.xml │ ├── Maven__org_hibernate_javax_persistence_hibernate_jpa_2_1_api_1_0_2_Final.xml │ ├── Maven__org_hibernate_validator_hibernate_validator_6_0_11_Final.xml │ ├── Maven__org_javassist_javassist_3_22_0_GA.xml │ ├── Maven__org_jboss_jandex_2_0_3_Final.xml │ ├── Maven__org_jboss_logging_jboss_logging_3_3_2_Final.xml │ ├── Maven__org_jsoup_jsoup_1_11_2.xml │ ├── Maven__org_mockito_mockito_core_2_15_0.xml │ ├── Maven__org_objenesis_objenesis_2_6.xml │ ├── Maven__org_openjfx_javafx_base_11_0_0_SNAPSHOT.xml │ ├── Maven__org_openjfx_javafx_base_linux_11_0_0_SNAPSHOT.xml │ ├── Maven__org_openjfx_javafx_base_mac_11_0_0_SNAPSHOT.xml │ ├── Maven__org_openjfx_javafx_base_win_11_0_0_SNAPSHOT.xml │ ├── Maven__org_ow2_asm_asm_5_0_4.xml │ ├── Maven__org_projectlombok_lombok_1_16_22.xml │ ├── Maven__org_reactivestreams_reactive_streams_1_0_2.xml │ ├── Maven__org_skyscreamer_jsonassert_1_5_0.xml │ ├── Maven__org_slf4j_jul_to_slf4j_1_7_25.xml │ ├── Maven__org_slf4j_slf4j_api_1_7_25.xml │ ├── Maven__org_springframework_boot_spring_boot_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_autoconfigure_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_aop_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_data_jpa_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_data_redis_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_jdbc_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_json_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_logging_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_test_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_thymeleaf_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_tomcat_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_starter_web_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_test_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_boot_spring_boot_test_autoconfigure_2_0_4_RELEASE.xml │ ├── Maven__org_springframework_data_spring_data_commons_2_0_9_RELEASE.xml │ ├── Maven__org_springframework_data_spring_data_jpa_2_0_9_RELEASE.xml │ ├── Maven__org_springframework_data_spring_data_keyvalue_2_0_9_RELEASE.xml │ ├── Maven__org_springframework_data_spring_data_redis_2_0_9_RELEASE.xml │ ├── Maven__org_springframework_spring_aop_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_aspects_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_beans_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_context_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_context_support_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_core_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_expression_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_jcl_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_jdbc_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_orm_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_oxm_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_test_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_tx_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_web_5_0_8_RELEASE.xml │ ├── Maven__org_springframework_spring_webmvc_5_0_8_RELEASE.xml │ ├── Maven__org_thymeleaf_extras_thymeleaf_extras_java8time_3_0_1_RELEASE.xml │ ├── Maven__org_thymeleaf_thymeleaf_3_0_9_RELEASE.xml │ ├── Maven__org_thymeleaf_thymeleaf_spring5_3_0_9_RELEASE.xml │ ├── Maven__org_unbescape_unbescape_1_1_5_RELEASE.xml │ ├── Maven__org_xmlunit_xmlunit_core_2_5_1.xml │ └── Maven__org_yaml_snakeyaml_1_19.xml ├── misc.xml ├── modules.xml ├── uiDesigner.xml └── vcs.xml ├── README.md ├── db └── crawler.sql ├── pom.xml ├── proxy-pool.iml └── src ├── main ├── java │ └── com │ │ └── chenerzhu │ │ └── crawler │ │ └── proxy │ │ └── pool │ │ ├── ProxyPoolApplication.java │ │ ├── common │ │ ├── HttpMethod.java │ │ └── RedisKey.java │ │ ├── config │ │ ├── RedisConfig.java │ │ ├── SpringConfig.java │ │ └── WebConfig.java │ │ ├── context │ │ └── SpringContextHolder.java │ │ ├── controller │ │ ├── BaseController.java │ │ └── ProxyIpController.java │ │ ├── entity │ │ ├── AuthorizationKey.java │ │ ├── IPWhiteList.java │ │ ├── ProxyApi.java │ │ ├── ProxyConfig.java │ │ ├── ProxyIp.java │ │ ├── Result.java │ │ ├── SysDataSource.java │ │ └── WebPage.java │ │ ├── exception │ │ ├── ProxyPoolException.java │ │ └── ProxyPoolExceptionHandler.java │ │ ├── interceptor │ │ ├── IPInterceptor.java │ │ └── WebConfiguration.java │ │ ├── job │ │ ├── crawler │ │ │ ├── AbstractCrawler.java │ │ │ ├── CrawlerJob.java │ │ │ ├── Data5uCrawlerJob.java │ │ │ ├── FreeProxyListCrawlerJob.java │ │ │ ├── GaoKeYongCrawlerJob.java │ │ │ ├── GatherproxyCrawlerJob.java │ │ │ ├── ICrawler.java │ │ │ ├── IP366CrawlerJob.java │ │ │ ├── IPHaiCrawlerJob.java │ │ │ ├── KuaidailiCrawlerJob.java │ │ │ ├── MyProxyCrawlerJob.java │ │ │ ├── PrivateTXTJob.java │ │ │ ├── Proxy4FreeCrawlerJob.java │ │ │ ├── ProxyListCrawlerJob.java │ │ │ ├── ProxynovaCrawlerJob.java │ │ │ ├── QuanWangCrawlerJob.java │ │ │ ├── SpysOneCrawlerJob.java │ │ │ ├── WebSiteJob.java │ │ │ └── XicidailiCrawlerJob.java │ │ ├── execute │ │ │ ├── ISchedulerJobExecutor.java │ │ │ └── impl │ │ │ │ └── SchedulerJobExecutor.java │ │ └── scheduler │ │ │ ├── AbstractSchedulerJob.java │ │ │ ├── AuthSchedulerJob.java │ │ │ ├── SchedulerJob.java │ │ │ ├── SyncDbSchedulerJob.java │ │ │ ├── SyncRedisSchedulerJob.java │ │ │ ├── UpdateWhiteListSchedulerJob.java │ │ │ └── ValidateRedisSchedulerJob.java │ │ ├── listener │ │ ├── JobContextListener.java │ │ └── SpringContextListener.java │ │ ├── repository │ │ ├── IPWhiteListRepository.java │ │ ├── IProxyApiRepository.java │ │ ├── IProxyConfigRepository.java │ │ └── IProxyIpRepository.java │ │ ├── service │ │ ├── IPWhiteListService.java │ │ ├── IProxyApiService.java │ │ ├── IProxyConfigService.java │ │ ├── IProxyIpRedisService.java │ │ ├── IProxyIpService.java │ │ └── impl │ │ │ ├── IPWhiteListServiceImpl.java │ │ │ ├── ProxyApiServiceImpl.java │ │ │ ├── ProxyConfigServiceImpl.java │ │ │ ├── ProxyIpRedisServiceImpl.java │ │ │ └── ProxyIpServiceImpl.java │ │ ├── thread │ │ └── ThreadFactory.java │ │ └── util │ │ ├── HttpClientUtils.java │ │ ├── HttpsUtils.java │ │ ├── IPUtils.java │ │ ├── MultiDBUtils.java │ │ ├── ProxyUtils.java │ │ └── RedisUtil.java └── resources │ ├── application.properties │ ├── banner.txt │ ├── static │ ├── css │ │ ├── bootstrap-table.css │ │ └── bootstrap.min.css │ ├── img │ │ ├── crawler.PNG │ │ ├── glyphicons-halflings-white.png │ │ ├── glyphicons-halflings.png │ │ └── home.PNG │ └── js │ │ ├── bootstrap-table.js │ │ ├── bootstrap.min.js │ │ └── jquery-3.1.1.min.js │ └── templates │ ├── error │ └── 500.html │ ├── index.html │ └── test.html └── test └── java └── com └── chenerzhu └── crawler └── proxy └── pool └── ProxyPoolApplicationTest.java /.gitattributes: -------------------------------------------------------------------------------- 1 | *.js linguist-language=Java 2 | *.css linguist-language=Java 3 | *.html linguist-language=Java 4 | *.vue linguist-language=Java 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | target/ 3 | !.mvn/wrapper/maven-wrapper.jar 4 | !**/src/main/** 5 | !**/src/test/** 6 | 7 | ### STS ### 8 | .apt_generated 9 | .classpath 10 | .factorypath 11 | .project 12 | .settings 13 | .springBeans 14 | .sts4-cache 15 | 16 | ### IntelliJ IDEA ### 17 | .idea 18 | *.iws 19 | *.iml 20 | *.ipr 21 | 22 | ### NetBeans ### 23 | /nbproject/private/ 24 | /nbbuild/ 25 | /dist/ 26 | /nbdist/ 27 | /.nb-gradle/ 28 | build/ 29 | 30 | ### VS Code ### 31 | .vscode/ 32 | -------------------------------------------------------------------------------- /.idea/$CACHE_FILE$: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 20 | 21 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 36 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__antlr_antlr_2_7_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__ch_qos_logback_logback_classic_1_2_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__ch_qos_logback_logback_core_1_2_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_alibaba_fastjson_1_2_47.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_classmate_1_3_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_annotations_2_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_core_2_9_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_datatype_jackson_datatype_jdk8_2_9_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_datatype_jackson_datatype_jsr310_2_9_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_parameter_names_2_9_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_protobuf_protobuf_java_2_6_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_jayway_jsonpath_json_path_2_4_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_vaadin_external_google_android_json_0_0_20131108_vaadin1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_zaxxer_HikariCP_2_7_9.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_codec_commons_codec_1_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__dom4j_dom4j_1_6_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_lettuce_lettuce_core_5_1_0_M1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_buffer_4_1_27_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_codec_4_1_27_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_common_4_1_27_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_handler_4_1_27_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_resolver_4_1_27_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_transport_4_1_27_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_projectreactor_reactor_core_3_1_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_annotation_javax_annotation_api_1_3_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_transaction_javax_transaction_api_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_validation_validation_api_2_0_1_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__junit_junit_4_12.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__mysql_mysql_connector_java_8_0_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_bytebuddy_byte_buddy_1_7_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_bytebuddy_byte_buddy_agent_1_7_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_minidev_accessors_smart_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_minidev_json_smart_2_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_lang3_3_10.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_pool2_2_5_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_5_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_4_10.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_logging_log4j_log4j_api_2_10_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_logging_log4j_log4j_to_slf4j_2_10_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_tomcat_embed_tomcat_embed_core_8_5_32.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_tomcat_embed_tomcat_embed_el_8_5_32.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_tomcat_embed_tomcat_embed_websocket_8_5_32.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_aspectj_aspectjweaver_1_8_13.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_assertj_assertj_core_3_9_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_attoparser_attoparser_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hamcrest_hamcrest_library_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hibernate_common_hibernate_commons_annotations_5_0_1_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hibernate_hibernate_core_5_2_17_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hibernate_javax_persistence_hibernate_jpa_2_1_api_1_0_2_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hibernate_validator_hibernate_validator_6_0_11_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_javassist_javassist_3_22_0_GA.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_jboss_jandex_2_0_3_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_jboss_logging_jboss_logging_3_3_2_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_jsoup_jsoup_1_11_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_mockito_mockito_core_2_15_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_objenesis_objenesis_2_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_openjfx_javafx_base_11_0_0_SNAPSHOT.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_openjfx_javafx_base_linux_11_0_0_SNAPSHOT.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_openjfx_javafx_base_mac_11_0_0_SNAPSHOT.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_openjfx_javafx_base_win_11_0_0_SNAPSHOT.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_ow2_asm_asm_5_0_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_projectlombok_lombok_1_16_22.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_reactivestreams_reactive_streams_1_0_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_skyscreamer_jsonassert_1_5_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_jul_to_slf4j_1_7_25.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_25.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_autoconfigure_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_aop_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_data_jpa_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_data_redis_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_jdbc_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_json_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_logging_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_test_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_thymeleaf_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_tomcat_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_starter_web_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_test_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_boot_spring_boot_test_autoconfigure_2_0_4_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_data_spring_data_commons_2_0_9_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_data_spring_data_jpa_2_0_9_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_data_spring_data_keyvalue_2_0_9_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_data_spring_data_redis_2_0_9_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_aop_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_aspects_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_beans_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_context_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_context_support_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_core_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_expression_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_jcl_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_jdbc_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_orm_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_oxm_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_test_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_tx_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_web_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_springframework_spring_webmvc_5_0_8_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_thymeleaf_extras_thymeleaf_extras_java8time_3_0_1_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_thymeleaf_thymeleaf_3_0_9_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_thymeleaf_thymeleaf_spring5_3_0_9_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_unbescape_unbescape_1_1_5_RELEASE.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_xmlunit_xmlunit_core_2_5_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_yaml_snakeyaml_1_19.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # proxy-pool 代理IP - 改进版(增加私密接口 可配置化) 2 | 3 | **详细信息 转** 4 | https://www.bedebug.com/archives/ipproxypool 5 | 6 | ### 背景 7 | 前段时间,写java爬虫来爬网易云音乐的评论。不料,爬了一段时间后ip被封禁了。由此,想到了使用ip代理,但是找了很多的ip代理网站,很少有可以用的代理ip。于是,抱着边学习的心态,自己开发了一个代理ip池。 8 | 9 | ### 相关技术及环境 10 | **技术:** SpringBoot,SpringMVC, Hibernate, MySQL, Redis , Maven, Lombok, BootStrap-table,多线程并发 11 | **环境:** JDK1.8 , IDEA 12 | 13 | ### 实现功能 14 | 通过ip代理池,提供高可用的代理ip,可用率达到95%以上。 15 | - 通过接口获取代理ip 16 | 通过访问接口,如:http://127.0.0.1:8080/proxyIp 返回代理ip的json格式 17 | ```json 18 | { 19 | "code":200, 20 | "data":[ 21 | { 22 | "available":true, 23 | "ip":"1.10.186.214", 24 | "lastValidateTime":"2018-09-25 20:31:52", 25 | "location":"THThailand", 26 | "port":57677, 27 | "requestTime":0, 28 | "responseTime":0, 29 | "type":"https", 30 | "useTime":3671 31 | } 32 | ], 33 | "message":"success" 34 | } 35 | ``` 36 | 37 | - 通过页面获取代理ip 38 | 通过访问url,如:http://127.0.0.1:8080 返回代理ip列表页面。 39 | 40 | 41 | - 提供代理ip测试接口及页面 42 | 通过访问url, 如:http://127.0.0.1:8080/test (get)测试代理ip的可用性;通过接口 http://127.0.0.1:8080/test ](post data: {"ip": "127.0.0.1","port":8080} ) 测试代理ip的可用性。 43 | 44 | ### 设计思路 45 | #### 模块划分 46 | - 爬虫模块:爬取代理ip网站的代理IP信息,先通过队列再保存进数据库。 47 | - 数据库同步模块:设置一定时间间隔同步数据库IP到redis缓存中。 48 | - 缓存redis同步模块:设置一定时间间隔同步redis缓存到另一块redis缓存中。 49 | - 缓存redis代理ip校验模块:设置一定时间间隔redis缓存代理ip池校验。 50 | - 前端显示及接口控制模块:显示可用ip页面,及提供ip获取api接口。 51 | 52 | #### 架构图 53 | 54 | 55 | ### IP来源 56 | 代理ip均来自爬虫爬取,有些国内爬取的ip大多都不能用,代理池的ip可用ip大多是国外的ip。爬取的网站有:http://www.xicidaili.com/nn ,http://www.data5u.com/free/index.shtml ,https://free-proxy-list.net ,https://www.my-proxy.com/free-proxy-list.html ,http://spys.one/en/free-proxy-list/ , https://www.proxynova.com/proxy-server-list/ ,https://www.proxy4free.com/list/webproxy1.html ,http://www.gatherproxy.com/ 。 57 | ### 如何使用 58 | **前提:** 已经安装JDK1.8环境,MySQL数据库,Redis。 59 | 先使用maven编译成jar,proxy-pool-1.0.jar。 60 | 使用SpringBoot启动方式,启动即可。 61 | ```java 62 | java -jar proxy-pool-1.0.jar 63 | ``` 64 | -------------------------------------------------------------------------------- /db/crawler.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat Premium Data Transfer 3 | 4 | Source Server : 本地数据库 5 | Source Server Type : MySQL 6 | Source Server Version : 50709 7 | Source Host : localhost:3306 8 | Source Schema : crawler 9 | 10 | Target Server Type : MySQL 11 | Target Server Version : 50709 12 | File Encoding : 65001 13 | 14 | Date: 21/04/2020 15:35:10 15 | */ 16 | 17 | SET NAMES utf8mb4; 18 | SET FOREIGN_KEY_CHECKS = 0; 19 | 20 | -- ---------------------------- 21 | -- Table structure for ip_white_list 22 | -- ---------------------------- 23 | DROP TABLE IF EXISTS `ip_white_list`; 24 | CREATE TABLE `ip_white_list` ( 25 | `id` int(11) NOT NULL AUTO_INCREMENT, 26 | `ip` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '白名单IP', 27 | `is_usable` smallint(6) NOT NULL COMMENT '可用状态 否 0 / 是 1', 28 | PRIMARY KEY (`id`) USING BTREE 29 | ) ENGINE = MyISAM AUTO_INCREMENT = 4 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic; 30 | 31 | -- ---------------------------- 32 | -- Records of ip_white_list 33 | -- ---------------------------- 34 | INSERT INTO `ip_white_list` VALUES (1, '39.254.12.176', 1); 35 | 36 | -- ---------------------------- 37 | -- Table structure for proxy_api 38 | -- ---------------------------- 39 | DROP TABLE IF EXISTS `proxy_api`; 40 | CREATE TABLE `proxy_api` ( 41 | `id` bigint(20) NOT NULL AUTO_INCREMENT, 42 | `ip_api` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, 43 | `type` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, 44 | PRIMARY KEY (`id`) USING BTREE 45 | ) ENGINE = MyISAM AUTO_INCREMENT = 2 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic; 46 | 47 | -- ---------------------------- 48 | -- Records of proxy_api 49 | -- ---------------------------- 50 | INSERT INTO `proxy_api` VALUES (1, 'http://ent.kdlapi.com/api/getproxy/?orderid=938745381844768&num=10&protocol=1&method=2&an_an=1&an_ha=1&sep=1', 'txt'); 51 | 52 | -- ---------------------------- 53 | -- Table structure for proxy_config 54 | -- ---------------------------- 55 | DROP TABLE IF EXISTS `proxy_config`; 56 | CREATE TABLE `proxy_config` ( 57 | `id` int(11) NOT NULL AUTO_INCREMENT, 58 | `validate_url` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '验证网址', 59 | `validate_count` int(6) NOT NULL COMMENT '验证次数(默认为3次验证),如果代理IP存活时间较短 可该小验证次数', 60 | `delay_time` int(6) NOT NULL COMMENT '如果为0 则开启随机', 61 | `private_username` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '私有接口-认证用户名', 62 | `private_password` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '私有接口-认证用户密码', 63 | `auth` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '授权key', 64 | PRIMARY KEY (`id`) USING BTREE 65 | ) ENGINE = MyISAM AUTO_INCREMENT = 2 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic; 66 | 67 | -- ---------------------------- 68 | -- Records of proxy_config 69 | -- ---------------------------- 70 | INSERT INTO `proxy_config` VALUES (1, 'http://www.blyuan.com/', 1, 0, 'meet.parker', 'ca0ngogx', ''); 71 | 72 | -- ---------------------------- 73 | -- Table structure for proxyip 74 | -- ---------------------------- 75 | DROP TABLE IF EXISTS `proxyip`; 76 | CREATE TABLE `proxyip` ( 77 | `id` bigint(20) NOT NULL AUTO_INCREMENT, 78 | `anonymity` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, 79 | `available` bit(1) NOT NULL, 80 | `availableCount` int(11) NOT NULL, 81 | `availableRate` double NULL DEFAULT NULL, 82 | `country` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, 83 | `createTime` datetime(0) NULL DEFAULT NULL, 84 | `ip` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, 85 | `lastValidateTime` datetime(0) NULL DEFAULT NULL, 86 | `location` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, 87 | `port` int(11) NOT NULL, 88 | `requestTime` bigint(20) NOT NULL, 89 | `responseTime` bigint(20) NOT NULL, 90 | `type` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, 91 | `unAvailableCount` int(11) NOT NULL, 92 | `useTime` bigint(20) NOT NULL, 93 | `validateCount` int(11) NOT NULL DEFAULT 0, 94 | PRIMARY KEY (`id`) USING BTREE 95 | ) ENGINE = MyISAM AUTO_INCREMENT = 4549 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic; 96 | 97 | SET FOREIGN_KEY_CHECKS = 1; 98 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.chenerzhu.crawler 7 | proxy-pool 8 | 2.0-SNAPSHOT 9 | jar 10 | 11 | proxy-pool 12 | proxy-pool 13 | 14 | 15 | org.springframework.boot 16 | spring-boot-starter-parent 17 | 2.0.4.RELEASE 18 | 19 | 20 | 21 | 22 | UTF-8 23 | UTF-8 24 | 1.8 25 | 26 | 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-starter-data-jpa 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-starter-data-redis 35 | 36 | 37 | org.apache.commons 38 | commons-pool2 39 | 40 | 41 | org.springframework.boot 42 | spring-boot-starter-web 43 | 44 | 45 | org.springframework.boot 46 | spring-boot-starter-thymeleaf 47 | 48 | 49 | io.lettuce 50 | lettuce-core 51 | 5.1.0.M1 52 | 53 | 54 | mysql 55 | mysql-connector-java 56 | 8.0.11 57 | 58 | 59 | org.projectlombok 60 | lombok 61 | true 62 | 63 | 64 | org.springframework.boot 65 | spring-boot-starter-test 66 | test 67 | 68 | 69 | com.alibaba 70 | fastjson 71 | 1.2.47 72 | 73 | 74 | 75 | org.apache.commons 76 | commons-lang3 77 | 3.10 78 | 79 | 80 | org.apache.httpcomponents 81 | httpclient 82 | 4.5.2 83 | 84 | 85 | org.jsoup 86 | jsoup 87 | 1.11.2 88 | 89 | 90 | 91 | junit 92 | junit 93 | 4.12 94 | test 95 | 96 | 97 | 98 | 99 | commons-dbcp 100 | commons-dbcp 101 | 1.4 102 | 103 | 104 | 105 | 106 | proxy-pool-2.0 107 | 108 | 109 | org.springframework.boot 110 | spring-boot-maven-plugin 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/ProxyPoolApplication.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | import org.springframework.boot.web.servlet.ServletComponentScan; 6 | 7 | @SpringBootApplication 8 | @ServletComponentScan("com.chenerzhu.crawler.proxy.pool.listener") 9 | public class ProxyPoolApplication { 10 | 11 | public static void main(String[] args) { 12 | SpringApplication.run(ProxyPoolApplication.class, args); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/common/HttpMethod.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.common; 2 | 3 | /** 4 | * @author chenerzhu 5 | * @create 2018-09-08 17:54 6 | **/ 7 | public enum HttpMethod { 8 | GET, 9 | POST, 10 | PUT, 11 | PATCH, 12 | DELETE; 13 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/common/RedisKey.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.common; 2 | 3 | /** 4 | * @author chenerzhu 5 | * @create 2018-08-31 20:08 6 | **/ 7 | public final class RedisKey { 8 | public static final String PROXY_IP_KEY="PROXY_IP_KEY"; 9 | public static final String PROXY_IP_RT_KEY="PROXY_IP_RT_KEY"; 10 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/config/RedisConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.config; 2 | 3 | import org.springframework.boot.autoconfigure.AutoConfigureAfter; 4 | import org.springframework.boot.autoconfigure.data.redis.RedisAutoConfiguration; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | import org.springframework.data.redis.connection.lettuce.LettuceConnectionFactory; 8 | import org.springframework.data.redis.core.RedisTemplate; 9 | import org.springframework.data.redis.serializer.GenericJackson2JsonRedisSerializer; 10 | import org.springframework.data.redis.serializer.StringRedisSerializer; 11 | 12 | import java.io.Serializable; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-08-31 16:05 17 | **/ 18 | @Configuration 19 | @AutoConfigureAfter(RedisAutoConfiguration.class) 20 | public class RedisConfig { 21 | @Bean 22 | public RedisTemplate redisRedisTemplate(LettuceConnectionFactory redisConnectionFactory) { 23 | RedisTemplate template = new RedisTemplate<>(); 24 | template.setKeySerializer(new StringRedisSerializer()); 25 | template.setValueSerializer(new GenericJackson2JsonRedisSerializer()); 26 | template.setConnectionFactory(redisConnectionFactory); 27 | return template; 28 | } 29 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/config/SpringConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.config; 2 | 3 | import org.springframework.context.annotation.Configuration; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-08-30 12:38 8 | **/ 9 | @Configuration 10 | public class SpringConfig { 11 | 12 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/config/WebConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.config; 2 | 3 | import com.alibaba.fastjson.support.spring.FastJsonHttpMessageConverter; 4 | import org.springframework.context.annotation.Bean; 5 | import org.springframework.context.annotation.Configuration; 6 | import org.springframework.http.MediaType; 7 | import org.springframework.http.converter.HttpMessageConverter; 8 | import org.springframework.http.converter.StringHttpMessageConverter; 9 | import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder; 10 | import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; 11 | import org.springframework.web.servlet.config.annotation.DefaultServletHandlerConfigurer; 12 | import org.springframework.web.servlet.config.annotation.EnableWebMvc; 13 | import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; 14 | import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; 15 | 16 | import java.nio.charset.Charset; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | 20 | /** 21 | * @author chenerzhu 22 | * @create 2018-05-27 14:10 23 | **/ 24 | @Configuration 25 | @EnableWebMvc // 启用MVC Java config的支持. 相当于 26 | public class WebConfig implements WebMvcConfigurer { 27 | 28 | // 设置响应头信息 29 | private static List buildDefaultMediaTypes() { 30 | List list = new ArrayList<>(); 31 | list.add(MediaType.TEXT_HTML); // 这个必须设置在第一位 32 | list.add(MediaType.APPLICATION_JSON_UTF8); 33 | return list; 34 | } 35 | 36 | @Override 37 | public void addResourceHandlers(ResourceHandlerRegistry registry) { 38 | registry.addResourceHandler("/static/**").addResourceLocations("classpath:/static/"); 39 | registry.addResourceHandler("/js/**").addResourceLocations("classpath:/static/js/"); 40 | registry.addResourceHandler("/css/**").addResourceLocations("classpath:/static/css/"); 41 | } 42 | 43 | // 配置处理静态资源 44 | @Override 45 | public void configureDefaultServletHandling(DefaultServletHandlerConfigurer configurer) { 46 | configurer.enable(); 47 | } 48 | 49 | // 设置MessageConverter 50 | @Override 51 | public void configureMessageConverters(List> converters) { 52 | converters.add(stringHttpMessageConverter()); 53 | converters.add(httpMessageConverter()); 54 | } 55 | 56 | @Bean 57 | public StringHttpMessageConverter stringHttpMessageConverter() { 58 | // 设置默认编码为UTF-8 59 | Charset default_charset = Charset.forName("UTF-8"); 60 | StringHttpMessageConverter converter = new StringHttpMessageConverter(default_charset); 61 | List list = buildDefaultMediaTypes(); 62 | converter.setSupportedMediaTypes(list); 63 | return converter; 64 | } 65 | @Bean 66 | public FastJsonHttpMessageConverter httpMessageConverter() { 67 | FastJsonHttpMessageConverter converter=new FastJsonHttpMessageConverter(); 68 | List list = buildDefaultMediaTypes(); 69 | converter.setSupportedMediaTypes(list); 70 | return converter; 71 | } 72 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/context/SpringContextHolder.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.context; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.beans.BeansException; 5 | import org.springframework.beans.factory.DisposableBean; 6 | import org.springframework.context.ApplicationContext; 7 | import org.springframework.context.ApplicationContextAware; 8 | import org.springframework.stereotype.Component; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-08-30 21:09 13 | **/ 14 | @Slf4j 15 | public class SpringContextHolder implements ApplicationContextAware, DisposableBean { 16 | private static ApplicationContext applicationContext; 17 | 18 | private SpringContextHolder() { 19 | } 20 | 21 | public static void initApplicationContext(ApplicationContext applicationContext) { 22 | if(SpringContextHolder.applicationContext==null){ 23 | SpringContextHolder.applicationContext = applicationContext; 24 | } 25 | } 26 | 27 | public static ApplicationContext getApplicationContext() { 28 | return applicationContext; 29 | } 30 | 31 | @Override 32 | public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { 33 | if(this.applicationContext==null){ 34 | SpringContextHolder.applicationContext = applicationContext; 35 | } 36 | } 37 | 38 | @SuppressWarnings("unchecked") 39 | public static T getBean(String name) { 40 | return (T) getApplicationContext().getBean(name); 41 | } 42 | 43 | 44 | @SuppressWarnings("unchecked") 45 | public static T getBean(Class clazz) { 46 | return (T) getApplicationContext().getBeansOfType(clazz); 47 | } 48 | 49 | @Override 50 | public void destroy() throws Exception { 51 | SpringContextHolder.clear(); 52 | } 53 | 54 | public static void clear() { 55 | log.debug("Clear ApplicationContext of SpringContextHolder:" + applicationContext); 56 | applicationContext = null; 57 | } 58 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/controller/BaseController.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.controller; 2 | 3 | import org.springframework.web.bind.annotation.RestController; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-08-29 19:52 8 | **/ 9 | public class BaseController { 10 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/AuthorizationKey.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import com.alibaba.fastjson.annotation.JSONField; 4 | import lombok.Data; 5 | import lombok.ToString; 6 | 7 | import javax.persistence.*; 8 | import java.io.Serializable; 9 | 10 | /** 11 | * @author parker 12 | * @create 2020年4月20日14:51:17 13 | **/ 14 | @Data 15 | @ToString 16 | @Entity 17 | @Table(name = "authorization_key") 18 | public class AuthorizationKey implements Serializable { 19 | private static final long serialVersionUID = 1L; 20 | @Id 21 | @GeneratedValue(strategy = GenerationType.IDENTITY) 22 | @JSONField(serialize = false) 23 | private long id; 24 | 25 | @Column(name="key" ,nullable=false) 26 | private String key; 27 | 28 | @Column(name="is_usable" ,nullable=false) 29 | private String is_usable; 30 | 31 | @Column(name="contact") 32 | private String contact; 33 | 34 | @Column(name="remarks" ) 35 | private String remarks; 36 | 37 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/IPWhiteList.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import com.alibaba.fastjson.annotation.JSONField; 4 | import lombok.Data; 5 | import lombok.ToString; 6 | 7 | import javax.persistence.*; 8 | import java.io.Serializable; 9 | 10 | /** 11 | * @author parker 12 | * 13 | * IP白名单 14 | * @create 2018-08-29 21:00 15 | **/ 16 | @Data 17 | @ToString 18 | @Entity 19 | @Table(name = "ip_white_list") 20 | public class IPWhiteList implements Serializable { 21 | private static final long serialVersionUID = 1L; 22 | @Id 23 | @GeneratedValue(strategy = GenerationType.IDENTITY) 24 | @JSONField(serialize = false) 25 | private Long id; 26 | 27 | /** 28 | * 白名单IP 29 | */ 30 | @Column(name="ip" ,nullable=false) 31 | private String ip; 32 | 33 | /** 34 | * 是否可用 35 | */ 36 | @Column(name="is_usable" ,nullable=false) 37 | private Integer isUsable; 38 | 39 | 40 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/ProxyApi.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import com.alibaba.fastjson.annotation.JSONField; 4 | import com.fasterxml.jackson.annotation.JsonIgnore; 5 | import lombok.Data; 6 | import lombok.ToString; 7 | 8 | import javax.persistence.*; 9 | import java.io.Serializable; 10 | import java.util.Date; 11 | 12 | /** 13 | * @author parker 14 | * @create 2020年4月20日14:51:17 15 | **/ 16 | @Data 17 | @ToString 18 | @Entity 19 | @Table(name = "proxy_api") 20 | public class ProxyApi implements Serializable { 21 | private static final long serialVersionUID = 1L; 22 | @Id 23 | @GeneratedValue(strategy = GenerationType.IDENTITY) 24 | @JSONField(serialize = false) 25 | private long id; 26 | 27 | @Column(name="ip_api" ,nullable=false) 28 | private String ipApi; 29 | 30 | @Column(name="type" ,nullable=false) 31 | private String type; 32 | 33 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/ProxyConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import com.alibaba.fastjson.annotation.JSONField; 4 | import lombok.Data; 5 | import lombok.ToString; 6 | 7 | import javax.persistence.*; 8 | import java.io.Serializable; 9 | 10 | /** 11 | * @author parker 12 | * @create 2020年4月20日14:51:17 13 | **/ 14 | @Data 15 | @ToString 16 | @Entity 17 | @Table(name = "proxy_config") 18 | public class ProxyConfig implements Serializable { 19 | private static final long serialVersionUID = 1L; 20 | @Id 21 | @GeneratedValue(strategy = GenerationType.IDENTITY) 22 | @JSONField(serialize = false) 23 | private long id; 24 | 25 | /** 26 | * 验证地址 27 | */ 28 | @Column(name="validate_url" ,nullable=false) 29 | private String validateUrl; 30 | 31 | /** 32 | * 验证次数 33 | * 34 | * (默认为3次验证),如果代理IP存活时间较短 可该小验证次数 35 | */ 36 | @Column(name="validate_count" ,nullable=false) 37 | private Integer validateCount; 38 | 39 | /** 40 | * 延迟时间 41 | * 42 | * 如果为0 则开启随机 43 | */ 44 | @Column(name="delay_time" ,nullable=false) 45 | private Integer delayTime; 46 | 47 | /** 48 | * 私有接口-认证用户名 49 | */ 50 | @Column(name="private_username" ,nullable=false) 51 | private String privateUserName; 52 | 53 | 54 | /** 55 | * 私有接口-认证用户密码 56 | */ 57 | @Column(name="private_password" ,nullable=false) 58 | private String privatePassword; 59 | 60 | 61 | /** 62 | * 授权 63 | */ 64 | @Column(name="auth") 65 | private String auth; 66 | 67 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/ProxyIp.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import com.alibaba.fastjson.annotation.JSONField; 4 | import com.fasterxml.jackson.annotation.JsonIgnore; 5 | import lombok.Data; 6 | import lombok.ToString; 7 | import org.hibernate.annotations.CreationTimestamp; 8 | import org.hibernate.annotations.UpdateTimestamp; 9 | 10 | import javax.persistence.*; 11 | import java.io.Serializable; 12 | import java.util.Date; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-08-29 21:00 17 | **/ 18 | @Data 19 | @ToString 20 | @Entity 21 | @Table(name = "ProxyIp") 22 | public class ProxyIp implements Serializable { 23 | private static final long serialVersionUID = 1L; 24 | @Id 25 | @GeneratedValue(strategy = GenerationType.IDENTITY) 26 | @JSONField(serialize = false) 27 | private long id; 28 | private String ip; 29 | private int port; 30 | private String country;//国家 31 | private String location;//位置 32 | private String type;//类型 https http 33 | private String anonymity;//匿名性 34 | @Column(name="available" ,nullable=false) 35 | private boolean available; 36 | /*@Temporal(TemporalType.TIMESTAMP) 37 | @CreationTimestamp*/ 38 | @JsonIgnore 39 | @JSONField(serialize = false) 40 | private Date createTime; 41 | /*@UpdateTimestamp 42 | @Temporal(TemporalType.TIMESTAMP)*/ 43 | @JSONField(format="yyyy-MM-dd HH:mm:ss") 44 | private Date lastValidateTime; 45 | @Column(name="validateCount" ,nullable=false,columnDefinition="INT default 0") 46 | @JsonIgnore 47 | @JSONField(serialize = false) 48 | private int validateCount;//校验次数 49 | @JsonIgnore 50 | @JSONField(serialize = false) 51 | private int availableCount;//校验可用次数 52 | @JsonIgnore 53 | @JSONField(serialize = false) 54 | private int unAvailableCount;//校验不可用次数 55 | private long responseTime;//响应时间 56 | private long requestTime;//请求时间 57 | private long useTime;//代理请求需要总时长 58 | @Column(scale=3,precision = 5) 59 | @JSONField(serialize = false) 60 | private double availableRate;//可用率 61 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/Result.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import lombok.Data; 4 | import lombok.ToString; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * @author chenerzhu 10 | * @create 2018-09-05 22:09 11 | **/ 12 | @ToString 13 | @Data 14 | public class Result { 15 | private String message; 16 | private int code; 17 | private List data; 18 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/SysDataSource.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © Edwin All rights reserved. 3 | */ 4 | package com.chenerzhu.crawler.proxy.pool.entity; 5 | 6 | 7 | import lombok.Data; 8 | import org.springframework.beans.factory.annotation.Value; 9 | import org.springframework.context.annotation.Configuration; 10 | import org.springframework.stereotype.Component; 11 | 12 | import java.io.Serializable; 13 | 14 | /** 15 | * 多数据源Entity 16 | * @author parker 17 | * @version 2017-07-27 18 | */ 19 | @Data 20 | public class SysDataSource implements Serializable { 21 | 22 | private static final long serialVersionUID = 1L; 23 | 24 | 25 | /** 数据库用户名 */ 26 | private String dbUserName = "proxy"; 27 | /** 数据库密码 */ 28 | private String dbPassword = "PROXY!@#123"; 29 | /** 数据库链接 */ 30 | private String dbUrl = "jdbc:mysql://39.97.162.240:3306/proxy?useUnicode=true&characterEncoding=utf-8&allowMultiQueries=true&serverTimezone=UTC"; 31 | /** 数据库驱动类 */ 32 | private String dbDriver = "com.mysql.cj.jdbc.Driver"; 33 | 34 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/WebPage.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import lombok.Data; 4 | import lombok.ToString; 5 | import org.jsoup.nodes.Document; 6 | 7 | import java.io.Serializable; 8 | import java.util.Date; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-09-02 15:14 13 | **/ 14 | @Data 15 | @ToString 16 | public class WebPage implements Serializable { 17 | private static final long serialVersionUID = 23454787L; 18 | private Date crawlTime; 19 | private String page; 20 | private Document document; 21 | private String html; 22 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/exception/ProxyPoolException.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.exception; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.http.HttpStatus; 5 | import org.springframework.web.bind.annotation.ControllerAdvice; 6 | import org.springframework.web.bind.annotation.ExceptionHandler; 7 | import org.springframework.web.bind.annotation.ResponseStatus; 8 | import org.springframework.web.servlet.ModelAndView; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-05-26 19:46 13 | **/ 14 | public class ProxyPoolException extends RuntimeException{ 15 | public ProxyPoolException(){ 16 | super(); 17 | } 18 | 19 | public ProxyPoolException(String message){ 20 | super(message); 21 | } 22 | 23 | public ProxyPoolException(String message,Throwable e){ 24 | super(message,e); 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/exception/ProxyPoolExceptionHandler.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.exception; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.http.HttpStatus; 5 | import org.springframework.web.bind.annotation.ControllerAdvice; 6 | import org.springframework.web.bind.annotation.ExceptionHandler; 7 | import org.springframework.web.bind.annotation.ResponseStatus; 8 | import org.springframework.web.servlet.ModelAndView; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-08-29 20:29 13 | **/ 14 | @Slf4j 15 | @ControllerAdvice 16 | public class ProxyPoolExceptionHandler { 17 | @ExceptionHandler(ProxyPoolException.class) 18 | @ResponseStatus(HttpStatus.OK) 19 | public ModelAndView processProxyPool(Exception e){ 20 | log.info("自定义异常处理-ProxyPoolException"); 21 | ModelAndView m = new ModelAndView(); 22 | log.error("error:",e); 23 | m.addObject("exception", e.getMessage()); 24 | m.setViewName("error/500"); 25 | return m; 26 | } 27 | @ExceptionHandler(Exception.class) 28 | @ResponseStatus(HttpStatus.OK) 29 | public ModelAndView processException(Exception e){ 30 | ModelAndView m = new ModelAndView(); 31 | log.error("error:",e); 32 | m.addObject("exception", e.getMessage()); 33 | m.setViewName("error/500"); 34 | return m; 35 | } 36 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/interceptor/IPInterceptor.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.interceptor; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.util.IPUtils; 4 | import com.chenerzhu.crawler.proxy.pool.util.RedisUtil; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.springframework.beans.factory.annotation.Value; 8 | import org.springframework.web.servlet.HandlerInterceptor; 9 | import org.springframework.web.servlet.ModelAndView; 10 | 11 | import javax.servlet.http.HttpServletRequest; 12 | import javax.servlet.http.HttpServletResponse; 13 | 14 | /** 15 | * Created Date by 2020/4/21 0021. 16 | * 17 | * @author Parker 18 | */ 19 | @Slf4j 20 | public class IPInterceptor implements HandlerInterceptor{ 21 | 22 | /** Redis ip key */ 23 | private static final String REDIS_IP_KEY = "ip_white_list"; 24 | 25 | @Value("${ip-interceptor.errorMsg}") 26 | private String errorMsg; 27 | 28 | @Override 29 | public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler) throws Exception { 30 | //过滤ip,若用户在白名单内,则放行 31 | String ipAddress= IPUtils.getRealIP(request); 32 | log.info("USER IP ADDRESS IS => {}",ipAddress); 33 | if(!StringUtils.isNotBlank(ipAddress)) { 34 | return false; 35 | } 36 | 37 | // 等于 本地IP 直接放行 38 | if("127.0.0.1".equals(ipAddress) || "localhost".equals(ipAddress)){ 39 | return true; 40 | } 41 | 42 | // 白名单是否存在 43 | boolean flag = RedisUtil.sHasKey(REDIS_IP_KEY, ipAddress); 44 | if(!flag){ 45 | response.setHeader("Content-type", "text/html;charset=UTF-8"); 46 | response.setCharacterEncoding("UTF-8"); 47 | response.getWriter().append("

"+errorMsg+"

"); 48 | return false; 49 | } 50 | return true; 51 | } 52 | 53 | 54 | @Override 55 | public void postHandle(HttpServletRequest request, HttpServletResponse response, Object handler, ModelAndView modelAndView) throws Exception { 56 | 57 | } 58 | 59 | 60 | @Override 61 | public void afterCompletion(HttpServletRequest request, HttpServletResponse response, Object handler, Exception ex) throws Exception { 62 | 63 | } 64 | 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/interceptor/WebConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.interceptor; 2 | 3 | import org.springframework.context.annotation.Bean; 4 | import org.springframework.context.annotation.Configuration; 5 | import org.springframework.context.annotation.Primary; 6 | import org.springframework.web.servlet.HandlerInterceptor; 7 | import org.springframework.web.servlet.config.annotation.CorsRegistry; 8 | import org.springframework.web.servlet.config.annotation.InterceptorRegistry; 9 | import org.springframework.web.servlet.config.annotation.WebMvcConfigurerAdapter; 10 | 11 | /** 12 | * Created Date by 2020/4/21 0021. 13 | * 14 | * @author Parker 15 | */ 16 | @Configuration("admimWebConfig") 17 | @Primary 18 | public class WebConfiguration extends WebMvcConfigurerAdapter { 19 | 20 | 21 | //将自定义的拦截器定义为一个bean 22 | @Bean 23 | public HandlerInterceptor getMyInterceptor(){ 24 | return new IPInterceptor(); 25 | } 26 | 27 | @Override 28 | public void addInterceptors(InterceptorRegistry registry){ 29 | // 多个拦截器组成一个拦截器链 30 | // addPathPatterns 用于添加拦截规则, 这里假设拦截 /** 后面的全部链接 31 | // excludePathPatterns 用户排除拦截 32 | registry.addInterceptor(getMyInterceptor()).addPathPatterns("/**"); 33 | super.addInterceptors(registry); 34 | } 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/Data5uCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.BlockingQueue; 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-03 20:11 17 | **/ 18 | @Slf4j 19 | public class Data5uCrawlerJob extends AbstractCrawler { 20 | public Data5uCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 21 | super(proxyIpQueue, pageUrl); 22 | } 23 | 24 | @Override 25 | public void parsePage(WebPage webPage) { 26 | Elements elements = webPage.getDocument().getElementsByClass("l2"); 27 | Element element; 28 | ProxyIp proxyIp; 29 | for (int i = 0; i < elements.size(); i++) { 30 | try { 31 | element = elements.get(i); 32 | proxyIp = new ProxyIp(); 33 | proxyIp.setIp(element.child(0).text()); 34 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 35 | proxyIp.setLocation(element.child(4).text() + "-" + element.child(5).text()); 36 | proxyIp.setType(element.child(3).text()); 37 | proxyIp.setAvailable(true); 38 | proxyIp.setCreateTime(new Date()); 39 | proxyIp.setLastValidateTime(new Date()); 40 | proxyIp.setValidateCount(0); 41 | proxyIpQueue.offer(proxyIp); 42 | } catch (Exception e) { 43 | log.error("data5uCrawlerJob error:{0}",e); 44 | } 45 | } 46 | } 47 | 48 | public static void main(String[] args) { 49 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 50 | 51 | Data5uCrawlerJob data5uCrawlerJob = new Data5uCrawlerJob(proxyIpQueue, "http://www.data5u.com/"); 52 | 53 | data5uCrawlerJob.run(); 54 | } 55 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/FreeProxyListCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import com.chenerzhu.crawler.proxy.pool.job.crawler.AbstractCrawler; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | import java.util.Date; 11 | import java.util.concurrent.BlockingQueue; 12 | import java.util.concurrent.ConcurrentLinkedQueue; 13 | import java.util.concurrent.TimeUnit; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-09-04 14:06 18 | * https://free-proxy-list.net/ 19 | **/ 20 | @Slf4j 21 | public class FreeProxyListCrawlerJob extends AbstractCrawler { 22 | public FreeProxyListCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 23 | super(proxyIpQueue, pageUrl); 24 | } 25 | 26 | @Override 27 | public void parsePage(WebPage webPage) { 28 | Elements elements = webPage.getDocument().getElementById("proxylisttable").getElementsByTag("tr"); 29 | Element element; 30 | ProxyIp proxyIp; 31 | for (int i = 1; i < elements.size() - 1; i++) { 32 | try { 33 | element = elements.get(i); 34 | proxyIp = new ProxyIp(); 35 | proxyIp.setIp(element.child(0).text()); 36 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 37 | proxyIp.setLocation(element.child(2).text() + "-" + element.child(3).text()); 38 | proxyIp.setType("yes".equalsIgnoreCase(element.child(6).text()) == true ? "https" : "http"); 39 | proxyIp.setAvailable(true); 40 | proxyIp.setCreateTime(new Date()); 41 | proxyIp.setLastValidateTime(new Date()); 42 | proxyIp.setValidateCount(0); 43 | proxyIpQueue.offer(proxyIp); 44 | } catch (Exception e) { 45 | log.error("freeProxyListCrawlerJob error:{0}",e); 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/GaoKeYongCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * https://www.kuaidaili.com/free/inha/1/ 16 | **/ 17 | @Slf4j 18 | public class GaoKeYongCrawlerJob extends AbstractCrawler { 19 | public GaoKeYongCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | public GaoKeYongCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 24 | super(proxyIpQueue, pageUrl, pageCount); 25 | } 26 | 27 | @Override 28 | public void parsePage(WebPage webPage) { 29 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 30 | Element element; 31 | ProxyIp proxyIp; 32 | for (int i = 1; i < elements.size(); i++) { 33 | try { 34 | element = elements.get(i); 35 | proxyIp = new ProxyIp(); 36 | 37 | proxyIp.setIp(element.child(0).text()); 38 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 39 | proxyIp.setLocation(element.child(4).text()); 40 | proxyIp.setType(element.child(3).text()); 41 | proxyIp.setAvailable(true); 42 | proxyIp.setCreateTime(new Date()); 43 | proxyIp.setLastValidateTime(new Date()); 44 | proxyIp.setValidateCount(0); 45 | proxyIpQueue.offer(proxyIp); 46 | } catch (Exception e) { 47 | log.error("kuaidailiCrawlerJob error:{0}",e); 48 | } 49 | } 50 | } 51 | 52 | public static void main(String[] args) { 53 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 54 | 55 | GaoKeYongCrawlerJob gaoKeYongCrawlerJob = new GaoKeYongCrawlerJob(proxyIpQueue, "http://ip.jiangxianli.com/?page=1"); 56 | 57 | gaoKeYongCrawlerJob.run(); 58 | } 59 | 60 | 61 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/GatherproxyCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | import java.util.Date; 9 | import java.util.concurrent.BlockingQueue; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | import java.util.concurrent.TimeUnit; 12 | import java.util.regex.Matcher; 13 | import java.util.regex.Pattern; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-09-09 9:09 18 | * http://www.gatherproxy.com/ 19 | **/ 20 | @Slf4j 21 | public class GatherproxyCrawlerJob extends AbstractCrawler { 22 | public GatherproxyCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 23 | super(proxyIpQueue, pageUrl); 24 | } 25 | 26 | @Override 27 | public void parsePage(WebPage webPage) { 28 | Pattern pattern = Pattern.compile("\\{\"PROXY_CITY\".*?\"}"); 29 | Matcher matcher = null; 30 | matcher = pattern.matcher(webPage.getHtml()); 31 | ProxyIp proxyIp = null; 32 | while (matcher.find()) { 33 | try { 34 | JSONObject jsonObject = JSONObject.parseObject(matcher.group(0)); 35 | proxyIp = new ProxyIp(); 36 | proxyIp.setIp(jsonObject.getString("PROXY_IP")); 37 | proxyIp.setPort(Integer.parseInt(jsonObject.getString("PROXY_PORT"), 16)); 38 | proxyIp.setType("SOCKS");// 39 | proxyIp.setLocation(jsonObject.getString("PROXY_COUNTRY")); 40 | proxyIp.setCountry(jsonObject.getString("PROXY_COUNTRY")); 41 | proxyIp.setAnonymity(jsonObject.getString("PROXY_TYPE")); 42 | proxyIp.setAvailable(true); 43 | proxyIp.setCreateTime(new Date()); 44 | proxyIp.setLastValidateTime(new Date()); 45 | proxyIp.setValidateCount(0); 46 | proxyIpQueue.offer(proxyIp); 47 | } catch (Exception e) { 48 | log.error("freeProxyListCrawlerJob error:{0}",e); 49 | } 50 | 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/ICrawler.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-09-02 13:40 8 | **/ 9 | public interface ICrawler { 10 | WebPage getPage(); 11 | 12 | void parsePage(WebPage webPage); 13 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/IP366CrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * http://www.ip3366.net/?stype=1&page=1 16 | **/ 17 | @Slf4j 18 | public class IP366CrawlerJob extends AbstractCrawler { 19 | public IP366CrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | public IP366CrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 24 | super(proxyIpQueue, pageUrl, pageCount); 25 | } 26 | 27 | @Override 28 | public void parsePage(WebPage webPage) { 29 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 30 | Element element; 31 | ProxyIp proxyIp; 32 | for (int i = 1; i < elements.size(); i++) { 33 | try { 34 | element = elements.get(i); 35 | proxyIp = new ProxyIp(); 36 | 37 | proxyIp.setIp(element.child(0).text()); 38 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 39 | proxyIp.setType(element.child(3).text()); 40 | proxyIp.setLocation(element.child(5).text()); 41 | 42 | proxyIp.setAvailable(true); 43 | proxyIp.setCreateTime(new Date()); 44 | proxyIp.setLastValidateTime(new Date()); 45 | proxyIp.setValidateCount(0); 46 | proxyIpQueue.offer(proxyIp); 47 | } catch (Exception e) { 48 | log.error("kuaidailiCrawlerJob error:{0}",e); 49 | } 50 | } 51 | } 52 | 53 | public static void main(String[] args) { 54 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 55 | 56 | IP366CrawlerJob ip366CrawlerJob = new IP366CrawlerJob(proxyIpQueue, "http://www.ip3366.net/?stype=1&page=1"); 57 | 58 | ip366CrawlerJob.run(); 59 | } 60 | 61 | 62 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/IPHaiCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * https://www.kuaidaili.com/free/inha/1/ 16 | **/ 17 | @Slf4j 18 | public class IPHaiCrawlerJob extends AbstractCrawler { 19 | public IPHaiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | public IPHaiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 24 | super(proxyIpQueue, pageUrl, pageCount); 25 | } 26 | 27 | @Override 28 | public void parsePage(WebPage webPage) { 29 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 30 | Element element; 31 | ProxyIp proxyIp; 32 | for (int i = 1; i < elements.size(); i++) { 33 | try { 34 | element = elements.get(i); 35 | proxyIp = new ProxyIp(); 36 | 37 | proxyIp.setIp(element.child(0).text()); 38 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 39 | proxyIp.setLocation(element.child(5).text()); 40 | proxyIp.setType(element.child(3).text()); 41 | proxyIp.setAvailable(true); 42 | proxyIp.setCreateTime(new Date()); 43 | proxyIp.setLastValidateTime(new Date()); 44 | proxyIp.setValidateCount(0); 45 | proxyIpQueue.offer(proxyIp); 46 | } catch (Exception e) { 47 | log.error("kuaidailiCrawlerJob error:{0}",e); 48 | } 49 | } 50 | } 51 | 52 | public static void main(String[] args) { 53 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 54 | 55 | IPHaiCrawlerJob ipHaiCrawlerJob = new IPHaiCrawlerJob(proxyIpQueue, "http://www.iphai.com/"); 56 | 57 | ipHaiCrawlerJob.run(); 58 | } 59 | 60 | 61 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/KuaidailiCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * https://www.kuaidaili.com/free/inha/1/ 16 | **/ 17 | @Slf4j 18 | public class KuaidailiCrawlerJob extends AbstractCrawler { 19 | public KuaidailiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | public KuaidailiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 24 | super(proxyIpQueue, pageUrl, pageCount); 25 | } 26 | 27 | @Override 28 | public void parsePage(WebPage webPage) { 29 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 30 | Element element; 31 | ProxyIp proxyIp; 32 | for (int i = 1; i < elements.size(); i++) { 33 | try { 34 | element = elements.get(i); 35 | proxyIp = new ProxyIp(); 36 | 37 | proxyIp.setIp(element.child(0).text()); 38 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 39 | proxyIp.setLocation(element.child(4).text()); 40 | proxyIp.setType(element.child(3).text()); 41 | proxyIp.setAvailable(true); 42 | proxyIp.setCreateTime(new Date()); 43 | proxyIp.setLastValidateTime(new Date()); 44 | proxyIp.setValidateCount(0); 45 | proxyIpQueue.offer(proxyIp); 46 | } catch (Exception e) { 47 | log.error("kuaidailiCrawlerJob error:{0}",e); 48 | } 49 | } 50 | } 51 | 52 | public static void main(String[] args) { 53 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 54 | 55 | KuaidailiCrawlerJob kuaidailiCrawlerJob = new KuaidailiCrawlerJob(proxyIpQueue, "https://www.kuaidaili.com/free/inha/1/"); 56 | 57 | kuaidailiCrawlerJob.run(); 58 | } 59 | 60 | 61 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/MyProxyCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import com.chenerzhu.crawler.proxy.pool.job.crawler.AbstractCrawler; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | import java.util.Date; 9 | import java.util.concurrent.BlockingQueue; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | import java.util.concurrent.LinkedBlockingQueue; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-08 16:35 17 | * https://www.my-proxy.com/free-proxy-list.html 18 | **/ 19 | @Slf4j 20 | public class MyProxyCrawlerJob extends AbstractCrawler { 21 | public MyProxyCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 22 | super(proxyIpQueue, pageUrl); 23 | } 24 | 25 | @Override 26 | public void parsePage(WebPage webPage) { 27 | String[] elements = webPage.getDocument().getElementsByClass("list") 28 | .html().split("
"); 29 | ProxyIp proxyIp; 30 | String element; 31 | for (int i = 0; i < 43; i++) { 32 | try { 33 | //185.120.37.186:55143#AL 34 | element = elements[i]; 35 | String ipPort = element.split("#")[0]; 36 | String ip = ipPort.split(":")[0]; 37 | String port = ipPort.split(":")[1]; 38 | String country = element.split("#")[1]; 39 | proxyIp = new ProxyIp(); 40 | proxyIp.setIp(ip); 41 | proxyIp.setPort(Integer.parseInt(port)); 42 | proxyIp.setType("http"); 43 | proxyIp.setCountry(country); 44 | proxyIp.setLocation(country); 45 | proxyIp.setCreateTime(new Date()); 46 | proxyIp.setAvailable(true); 47 | proxyIp.setLastValidateTime(new Date()); 48 | proxyIp.setValidateCount(0); 49 | proxyIpQueue.offer(proxyIp); 50 | } catch (Exception e) { 51 | log.error("myProxyCrawlerJob error:{0}",e); 52 | } 53 | } 54 | } 55 | 56 | public static void main(String[] args) { 57 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 58 | 59 | MyProxyCrawlerJob myProxyCrawlerJob = new MyProxyCrawlerJob(proxyIpQueue, "https://www.my-proxy.com/free-proxy-list.html"); 60 | 61 | myProxyCrawlerJob.run(); 62 | } 63 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/PrivateTXTJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.apache.commons.lang3.StringUtils; 7 | 8 | import java.util.Date; 9 | import java.util.concurrent.ConcurrentLinkedQueue; 10 | 11 | /** 12 | * @author parker 13 | * @create 2019-11-11 14 | * 独立 txt 接口 15 | **/ 16 | @Slf4j 17 | public class PrivateTXTJob extends AbstractCrawler { 18 | public PrivateTXTJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 19 | super(proxyIpQueue, pageUrl); 20 | } 21 | 22 | public PrivateTXTJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 23 | super(proxyIpQueue, pageUrl, pageCount); 24 | } 25 | 26 | @Override 27 | public void parsePage(WebPage webPage) { 28 | String ips = webPage.getPage(); 29 | String[] ipArray = ips.split("\r\n"); 30 | //String[] ipArray = {"192.0.0.104:8888","192.0.0.1:80","192.0.0.169:80","192.0.0.104:8888","192.0.0.104:8080"}; 31 | 32 | ProxyIp proxyIp; 33 | for (String s : ipArray) { 34 | if(!StringUtils.isEmpty(s)){ 35 | String[] ipAndPort = s.split(":"); 36 | if(ipAndPort.length == 2){ 37 | try { 38 | proxyIp = new ProxyIp(); 39 | proxyIp.setIp(ipAndPort[0]); 40 | proxyIp.setPort(Integer.parseInt(ipAndPort[1])); 41 | proxyIp.setLocation("私有化API"); 42 | proxyIp.setType("HTTP"); 43 | proxyIp.setAvailable(true); 44 | proxyIp.setCreateTime(new Date()); 45 | proxyIp.setLastValidateTime(new Date()); 46 | proxyIp.setValidateCount(0); 47 | proxyIpQueue.offer(proxyIp); 48 | } catch (Exception e) { 49 | log.error("kuaidailiCrawlerJob error:{0}",e); 50 | } 51 | } 52 | } 53 | } 54 | } 55 | 56 | public static void main(String[] args) { 57 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 58 | 59 | PrivateTXTJob privateJob = new PrivateTXTJob(proxyIpQueue, "http://218.78.97.2:8181/IP.txt"); 60 | 61 | privateJob.run(); 62 | } 63 | 64 | 65 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/Proxy4FreeCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.BlockingQueue; 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-09 8:43 17 | * https://www.proxy4free.com/list/webproxy1.html 18 | **/ 19 | @Slf4j 20 | public class Proxy4FreeCrawlerJob extends AbstractCrawler { 21 | public Proxy4FreeCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 22 | super(proxyIpQueue, pageUrl); 23 | } 24 | 25 | @Override 26 | public void parsePage(WebPage webPage) { 27 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 28 | Element element; 29 | ProxyIp proxyIp; 30 | for (int i = 2; i < elements.size(); i++) { 31 | try { 32 | element = elements.get(i); 33 | proxyIp = new ProxyIp(); 34 | proxyIp.setIp(element.child(0).child(0).attr("href").replaceAll("\"", "").split("=")[1]); 35 | proxyIp.setPort(80); 36 | proxyIp.setLocation(element.child(3).text()); 37 | proxyIp.setCountry(element.child(3).text()); 38 | proxyIp.setAnonymity(element.child(9).text()); 39 | proxyIp.setType("unKnow"); 40 | proxyIp.setAvailable(true); 41 | proxyIp.setCreateTime(new Date()); 42 | proxyIp.setLastValidateTime(new Date()); 43 | proxyIp.setValidateCount(0); 44 | proxyIpQueue.offer(proxyIp); 45 | } catch (Exception e) { 46 | log.error("proxy4FreeCrawlerJob error:{0}",e); 47 | } 48 | } 49 | 50 | } 51 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/ProxyListCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1 Slow sometimes 16 | **/ 17 | @Slf4j 18 | public class ProxyListCrawlerJob extends AbstractCrawler { 19 | public ProxyListCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | public ProxyListCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 24 | super(proxyIpQueue, pageUrl, pageCount); 25 | } 26 | 27 | @Override 28 | public void parsePage(WebPage webPage) { 29 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 30 | Element element; 31 | ProxyIp proxyIp; 32 | for (int i = 1; i < elements.size(); i++) { 33 | try { 34 | element = elements.get(i); 35 | proxyIp = new ProxyIp(); 36 | 37 | proxyIp.setIp(element.child(1).text()); 38 | proxyIp.setPort(Integer.parseInt(element.child(2).text())); 39 | proxyIp.setLocation(element.child(4).text()); 40 | proxyIp.setType(element.child(3).text()); 41 | proxyIp.setAvailable(true); 42 | proxyIp.setCreateTime(new Date()); 43 | proxyIp.setLastValidateTime(new Date()); 44 | proxyIp.setValidateCount(0); 45 | proxyIpQueue.offer(proxyIp); 46 | } catch (Exception e) { 47 | log.error("ProxyListCrawlerJob error:{0}",e); 48 | } 49 | } 50 | } 51 | 52 | public static void main(String[] args) { 53 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 54 | 55 | ProxyListCrawlerJob proxyListCrawlerJob = new ProxyListCrawlerJob(proxyIpQueue, "https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1"); 56 | 57 | proxyListCrawlerJob.run(); 58 | } 59 | 60 | 61 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/ProxynovaCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import javax.script.ScriptEngine; 10 | import javax.script.ScriptEngineManager; 11 | import javax.script.ScriptException; 12 | import java.util.Date; 13 | import java.util.concurrent.BlockingQueue; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.regex.Matcher; 17 | import java.util.regex.Pattern; 18 | 19 | /** 20 | * @author chenerzhu 21 | * @create 2018-09-08 23:25 22 | * https://www.proxynova.com/proxy-server-list/ 23 | **/ 24 | @Slf4j 25 | public class ProxynovaCrawlerJob extends AbstractCrawler { 26 | public ProxynovaCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 27 | super(proxyIpQueue, pageUrl); 28 | } 29 | 30 | @Override 31 | public void parsePage(WebPage webPage) { 32 | Elements elements = webPage.getDocument().getElementsByTag("tbody") 33 | .get(0).getElementsByTag("tr"); 34 | Element element; 35 | ProxyIp proxyIp; 36 | for (int i = 0; i < elements.size(); i++) { 37 | try { 38 | element = elements.get(i); 39 | proxyIp = new ProxyIp(); 40 | String ip = getIp(element); 41 | if ("".equals(ip)) { 42 | continue; 43 | } 44 | proxyIp.setIp(ip); 45 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 46 | proxyIp.setLocation(element.child(5).text()); 47 | proxyIp.setCountry(element.child(5).text().split("-")[0]); 48 | proxyIp.setAnonymity(element.child(6).text()); 49 | proxyIp.setType("unKnow"); 50 | proxyIp.setAvailable(true); 51 | proxyIp.setCreateTime(new Date()); 52 | proxyIp.setLastValidateTime(new Date()); 53 | proxyIp.setValidateCount(0); 54 | proxyIpQueue.offer(proxyIp); 55 | } catch (Exception e) { 56 | log.error("proxynovaCrawlerJob error:{0}",e); 57 | } 58 | } 59 | } 60 | 61 | private String getIp(Element element) throws ScriptException { 62 | String ip = ""; 63 | ScriptEngineManager manager = new ScriptEngineManager(); 64 | ScriptEngine engine = manager.getEngineByName("js"); 65 | Pattern pattern = Pattern.compile("\\(.*?\\);<"); 66 | Matcher matcher = null; 67 | matcher = pattern.matcher(element.child(0).html()); 68 | if (matcher.find()) { 69 | String ipScript = matcher.group(0).substring(1, matcher.group(0).length() - 1); 70 | ip = (String) engine.eval(ipScript.replaceAll("\\);", "")); 71 | } 72 | return ip; 73 | } 74 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/QuanWangCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * http://www.goubanjia.com/ 16 | **/ 17 | @Slf4j 18 | public class QuanWangCrawlerJob extends AbstractCrawler { 19 | public QuanWangCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | @Override 24 | public void parsePage(WebPage webPage) { 25 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 26 | Element element; 27 | ProxyIp proxyIp; 28 | for (int i = 1; i < elements.size(); i++) { 29 | try { 30 | element = elements.get(i); 31 | proxyIp = new ProxyIp(); 32 | 33 | // Remove duplicate number in

34 | Element childElement = element.child(0); 35 | childElement.select("p").remove(); 36 | String ipAndPort = childElement.text().replaceAll("\\s+", "").replaceAll("\n", ""); 37 | 38 | proxyIp.setIp(ipAndPort.split(":")[0]); 39 | proxyIp.setPort(Integer.parseInt(ipAndPort.split(":")[1])); 40 | 41 | proxyIp.setLocation(element.child(3).text()); 42 | proxyIp.setType(element.child(2).text()); 43 | proxyIp.setAvailable(true); 44 | proxyIp.setCreateTime(new Date()); 45 | proxyIp.setLastValidateTime(new Date()); 46 | proxyIp.setValidateCount(0); 47 | proxyIpQueue.offer(proxyIp); 48 | } catch (Exception e) { 49 | log.error("quanwangCrawlerJob error:{0}",e); 50 | } 51 | } 52 | } 53 | 54 | public static void main(String[] args) { 55 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 56 | 57 | QuanWangCrawlerJob quanWangCrawlerJob = new QuanWangCrawlerJob(proxyIpQueue, "http://www.goubanjia.com/"); 58 | 59 | quanWangCrawlerJob.run(); 60 | } 61 | 62 | 63 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/SpysOneCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.HttpMethod; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.jsoup.nodes.Document; 8 | import org.jsoup.nodes.Element; 9 | import org.jsoup.select.Elements; 10 | 11 | import javax.script.ScriptEngine; 12 | import javax.script.ScriptEngineManager; 13 | import javax.script.ScriptException; 14 | import java.util.Date; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | import java.util.concurrent.BlockingQueue; 18 | import java.util.concurrent.ConcurrentLinkedQueue; 19 | import java.util.concurrent.TimeUnit; 20 | import java.util.regex.Matcher; 21 | import java.util.regex.Pattern; 22 | 23 | /** 24 | * @author chenerzhu 25 | * @create 2018-09-08 17:25 26 | * http://spys.one/en/free-proxy-list/ 27 | * form:xpp=5&xf1=0&xf2=0&xf4=0&xf5=1 28 | **/ 29 | @Slf4j 30 | public class SpysOneCrawlerJob extends AbstractCrawler { 31 | public SpysOneCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 32 | super(proxyIpQueue, pageUrl); 33 | this.httpMethd=HttpMethod.POST; 34 | this.formParamMap=new HashMap(){{ 35 | put("xpp","5"); 36 | put("xf1","0"); 37 | put("xf2","0"); 38 | put("xf4","0"); 39 | put("xf5","1"); 40 | }}; 41 | } 42 | 43 | @Override 44 | public void parsePage(WebPage webPage) { 45 | Elements elements = webPage.getDocument().getElementsByClass("spy1xx"); 46 | Element element; 47 | ProxyIp proxyIp; 48 | for (int i = 1; i < elements.size(); i++) { 49 | try { 50 | element = elements.get(i); 51 | proxyIp = new ProxyIp(); 52 | proxyIp.setIp(element.child(0).selectFirst(".spy14").text()); 53 | int port = getPort(element); 54 | if (port == -1) { 55 | continue; 56 | } 57 | proxyIp.setPort(port); 58 | proxyIp.setCountry(element.child(3).selectFirst(".spy14").text()); 59 | proxyIp.setLocation(element.child(3).text()); 60 | proxyIp.setType(element.child(1).text()); 61 | proxyIp.setAnonymity(element.child(2).text()); 62 | proxyIp.setAvailable(true); 63 | proxyIp.setCreateTime(new Date()); 64 | proxyIp.setLastValidateTime(new Date()); 65 | proxyIp.setValidateCount(0); 66 | proxyIpQueue.offer(proxyIp); 67 | } catch (Exception e) { 68 | log.error("spysOneCrawlerJob error:{0}",e); 69 | } 70 | } 71 | } 72 | 73 | private int getPort(Element element) throws ScriptException { 74 | int port = -1; 75 | ScriptEngineManager manager = new ScriptEngineManager(); 76 | ScriptEngine engine = manager.getEngineByName("js"); 77 | Pattern pattern = Pattern.compile("\\+.*?<"); 78 | Matcher matcher = null; 79 | Document document = webPage.getDocument(); 80 | String scrpit = document.getElementsByTag("script").get(2).data(); 81 | engine.eval(scrpit); 82 | matcher = pattern.matcher(element.child(0).html()); 83 | if (matcher.find()) { 84 | String portScript = matcher.group(0).substring(1, matcher.group(0).length() - 2); 85 | Object obj=engine.eval(portScript.replaceAll("\\+", "+''+")); 86 | port = Integer.parseInt((String)obj); 87 | } 88 | return port; 89 | } 90 | 91 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/WebSiteJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | /** 13 | * @author vincent 14 | * @create 2019-11-11 15 | * http://www.ip3366.net/?stype=1&page=1 16 | **/ 17 | @Slf4j 18 | public class WebSiteJob extends AbstractCrawler { 19 | public WebSiteJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 20 | super(proxyIpQueue, pageUrl); 21 | } 22 | 23 | public WebSiteJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 24 | super(proxyIpQueue, pageUrl, pageCount); 25 | } 26 | 27 | @Override 28 | public void parsePage(WebPage webPage) { 29 | Elements elements = webPage.getDocument().getElementsByTag("div"); 30 | log.info("website:{}",super.pageUrl); 31 | } 32 | 33 | public static void main(String[] args) { 34 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 35 | 36 | WebSiteJob webSiteJob = new WebSiteJob(proxyIpQueue, "https://www.arcinbj.com"); 37 | 38 | webSiteJob.run(); 39 | } 40 | 41 | 42 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/XicidailiCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.HttpMethod; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 6 | import com.chenerzhu.crawler.proxy.pool.job.crawler.AbstractCrawler; 7 | import com.chenerzhu.crawler.proxy.pool.util.HttpClientUtils; 8 | import lombok.extern.slf4j.Slf4j; 9 | import org.jsoup.Jsoup; 10 | import org.jsoup.nodes.Element; 11 | import org.jsoup.select.Elements; 12 | 13 | import java.util.Date; 14 | import java.util.concurrent.BlockingQueue; 15 | import java.util.concurrent.ConcurrentLinkedQueue; 16 | import java.util.concurrent.TimeUnit; 17 | 18 | /** 19 | * @author chenerzhu 20 | * @create 2018-09-02 15:23 21 | * http://www.xicidaili.com 22 | **/ 23 | @Slf4j 24 | public class XicidailiCrawlerJob extends AbstractCrawler { 25 | public XicidailiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 26 | super(proxyIpQueue, pageUrl); 27 | } 28 | 29 | public XicidailiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl, int pageCount) { 30 | super(proxyIpQueue, pageUrl, pageCount); 31 | } 32 | 33 | @Override 34 | public void parsePage(WebPage webPage) { 35 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 36 | Element element; 37 | ProxyIp proxyIp; 38 | for (int i = 1; i < elements.size(); i++) { 39 | try { 40 | element = elements.get(i); 41 | proxyIp = new ProxyIp(); 42 | proxyIp.setIp(element.child(1).text()); 43 | proxyIp.setPort(Integer.parseInt(element.child(2).text())); 44 | proxyIp.setLocation(element.child(3).text()); 45 | proxyIp.setType(element.child(5).text()); 46 | proxyIp.setAvailable(true); 47 | proxyIp.setCreateTime(new Date()); 48 | proxyIp.setLastValidateTime(new Date()); 49 | proxyIp.setValidateCount(0); 50 | proxyIpQueue.offer(proxyIp); 51 | } catch (Exception e) { 52 | log.error("xicidailiCrawlerJob error:{0}",e); 53 | } 54 | } 55 | } 56 | 57 | public static void main(String[] args) { 58 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 59 | 60 | XicidailiCrawlerJob xicidailiCrawlerJob = new XicidailiCrawlerJob(proxyIpQueue, "https://www.xicidaili.com/nn"); 61 | 62 | xicidailiCrawlerJob.run(); 63 | } 64 | 65 | 66 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/execute/ISchedulerJobExecutor.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.execute; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.AbstractSchedulerJob; 4 | 5 | import java.util.concurrent.TimeUnit; 6 | 7 | /** 8 | * @author chenerzhu 9 | * @create 2018-08-30 12:14 10 | **/ 11 | public interface ISchedulerJobExecutor { 12 | void execute(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit); 13 | void executeDelay(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit); 14 | void shutdown(); 15 | //void execute(Runnable runnable); 16 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/execute/impl/SchedulerJobExecutor.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.execute.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 4 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.AbstractSchedulerJob; 5 | import com.chenerzhu.crawler.proxy.pool.thread.ThreadFactory; 6 | 7 | import java.util.concurrent.*; 8 | 9 | /** 10 | * @author chenerzhu 11 | * @create 2018-08-30 12:15 12 | **/ 13 | public class SchedulerJobExecutor implements ISchedulerJobExecutor { 14 | 15 | private ScheduledExecutorService scheduledExecutorService; 16 | public SchedulerJobExecutor(){} 17 | 18 | public SchedulerJobExecutor(String threadFactory){ 19 | scheduledExecutorService=Executors.newScheduledThreadPool(10,new ThreadFactory(threadFactory)); 20 | } 21 | 22 | public SchedulerJobExecutor(int corePoolSize,String threadFactory){ 23 | scheduledExecutorService=Executors.newScheduledThreadPool(corePoolSize,new ThreadFactory(threadFactory)); 24 | } 25 | 26 | 27 | public void execute(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit){ 28 | scheduledExecutorService.scheduleAtFixedRate(schedulerJob,delayTime,intervalTime,timeUnit); 29 | } 30 | public void executeDelay(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit){ 31 | scheduledExecutorService.scheduleWithFixedDelay(schedulerJob,delayTime,intervalTime,timeUnit); 32 | } 33 | 34 | public void shutdown(){ 35 | scheduledExecutorService.shutdown(); 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/AbstractSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 5 | import com.chenerzhu.crawler.proxy.pool.job.execute.impl.SchedulerJobExecutor; 6 | import com.chenerzhu.crawler.proxy.pool.thread.ThreadFactory; 7 | import com.chenerzhu.crawler.proxy.pool.util.ProxyUtils; 8 | 9 | import java.util.concurrent.*; 10 | 11 | 12 | /** 13 | * @author chenerzhu 14 | * @create 2018-08-30 10:27 15 | **/ 16 | public abstract class AbstractSchedulerJob implements Runnable { 17 | private volatile transient ExecutorService executorService = Executors.newCachedThreadPool(new ThreadFactory("validate")); 18 | 19 | public Future execute(Callable callable) { 20 | initInstance(); 21 | return executorService.submit(callable); 22 | } 23 | 24 | public Future execute(FutureTask task) { 25 | initInstance(); 26 | return executorService.submit(task); 27 | } 28 | 29 | private void initInstance() { 30 | if (executorService.isShutdown()) { 31 | synchronized (AbstractSchedulerJob.class) { 32 | if (executorService.isShutdown()) { 33 | executorService = Executors.newCachedThreadPool(new ThreadFactory("validate")); 34 | } 35 | } 36 | } 37 | } 38 | 39 | public void shutdown() { 40 | executorService.shutdown(); 41 | } 42 | 43 | public boolean validateIp(ProxyIp proxyIp) { 44 | boolean available = false; 45 | if (proxyIp.getType().toUpperCase().contains("HTTPS")) { 46 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 47 | } else if (proxyIp.getType().toUpperCase().contains("HTTP")) { 48 | available = ProxyUtils.validateHttp(proxyIp.getIp(), proxyIp.getPort()); 49 | } else if (proxyIp.getType().equalsIgnoreCase("unKnow")) { 50 | available = ProxyUtils.validateHttp(proxyIp.getIp(), proxyIp.getPort()); 51 | if (!available) { 52 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 53 | } 54 | /*if(!available){ 55 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 56 | proxyIp.setType("https"); 57 | } 58 | if(!available){ 59 | proxyIp.setType("unKnow"); 60 | }*/ 61 | } else if (proxyIp.getType().toUpperCase().contains("SOCKS")) { 62 | available = ProxyUtils.validateHttp(proxyIp.getIp(), proxyIp.getPort()); 63 | if (!available) { 64 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 65 | } 66 | /*if(!available){ 67 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 68 | proxyIp.setType("https"); 69 | } 70 | if(!available){ 71 | proxyIp.setType("socks"); 72 | }*/ 73 | } 74 | return available; 75 | } 76 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/AuthSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.AuthorizationKey; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyConfig; 5 | import com.chenerzhu.crawler.proxy.pool.service.IPWhiteListService; 6 | import com.chenerzhu.crawler.proxy.pool.service.IProxyConfigService; 7 | import com.chenerzhu.crawler.proxy.pool.util.MultiDBUtils; 8 | import lombok.extern.slf4j.Slf4j; 9 | import org.springframework.beans.factory.annotation.Autowired; 10 | import org.springframework.beans.factory.annotation.Value; 11 | import org.springframework.stereotype.Component; 12 | 13 | import java.util.List; 14 | 15 | 16 | /** 17 | * @author chenerzhu 18 | * @create 2018-08-30 10:27 19 | **/ 20 | @Slf4j 21 | @Component 22 | @SuppressWarnings("unchecked") 23 | public class AuthSchedulerJob extends AbstractSchedulerJob { 24 | 25 | @Autowired 26 | private IProxyConfigService proxyConfigService; 27 | @Value("${authFlag}") 28 | private Boolean flag; 29 | 30 | @Override 31 | public void run() { 32 | try { 33 | this.auth(); 34 | }catch (Exception e){ 35 | log.error(e.getMessage(),e); 36 | } finally { 37 | shutdown(); 38 | } 39 | } 40 | 41 | /** 42 | * 授权验证 43 | * @return 44 | */ 45 | public void auth() { 46 | if(flag){ 47 | ProxyConfig config = proxyConfigService.getConfig(); 48 | if(null != config){ 49 | 50 | String sql = "select a.key from authorization_key a where a.is_usable = 1 and a.key = '"+config.getAuth()+"'"; 51 | List authorizationKeys = MultiDBUtils.getInstance().queryList(sql, AuthorizationKey.class); 52 | 53 | // 验证 无授权自动退出 54 | if(null == authorizationKeys || authorizationKeys.size() == 0){ 55 | System.exit(0); 56 | } 57 | } 58 | } 59 | } 60 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/SchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.AuthorizationKey; 4 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 5 | import com.chenerzhu.crawler.proxy.pool.job.execute.impl.SchedulerJobExecutor; 6 | import com.chenerzhu.crawler.proxy.pool.util.MultiDBUtils; 7 | import lombok.extern.slf4j.Slf4j; 8 | import org.springframework.beans.factory.annotation.Autowired; 9 | import org.springframework.beans.factory.annotation.Qualifier; 10 | import org.springframework.beans.factory.annotation.Value; 11 | import org.springframework.stereotype.Component; 12 | 13 | import javax.annotation.Resource; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | 17 | /** 18 | * @author chenerzhu 19 | * @create 2018-09-21 15:03 20 | **/ 21 | @Slf4j 22 | @Component 23 | public class SchedulerJob implements Runnable { 24 | private static ISchedulerJobExecutor schedulerJobExecutor = new SchedulerJobExecutor(10, "schedulerJob"); 25 | @Resource 26 | @Qualifier("syncDbSchedulerJob") 27 | private AbstractSchedulerJob syncDbSchedulerJob; 28 | @Resource 29 | @Qualifier("syncRedisSchedulerJob") 30 | private AbstractSchedulerJob syncRedisSchedulerJob; 31 | @Resource 32 | @Qualifier("validateRedisSchedulerJob") 33 | private AbstractSchedulerJob validateRedisSchedulerJob; 34 | @Resource 35 | @Qualifier("updateWhiteListSchedulerJob") 36 | private AbstractSchedulerJob updateWhiteListSchedulerJob; 37 | @Resource 38 | @Qualifier("authSchedulerJob") 39 | private AbstractSchedulerJob authSchedulerJob; 40 | @Autowired 41 | private AuthSchedulerJob authSchedulerJobService; 42 | 43 | @Override 44 | public void run() { 45 | try{ 46 | // 默认授权验证 47 | authSchedulerJobService.auth(); 48 | // 定时授权验证 49 | schedulerJobExecutor.execute(authSchedulerJob,90,30, TimeUnit.SECONDS); 50 | // 更新白名单 51 | schedulerJobExecutor.execute(updateWhiteListSchedulerJob,60,30, TimeUnit.SECONDS); 52 | 53 | schedulerJobExecutor.execute(syncDbSchedulerJob,10, 5, TimeUnit.SECONDS); 54 | schedulerJobExecutor.execute(syncRedisSchedulerJob,50, 30, TimeUnit.SECONDS); 55 | schedulerJobExecutor.execute(validateRedisSchedulerJob,100, 30, TimeUnit.SECONDS); 56 | }catch (Exception e){ 57 | log.error("schedulerJob error:{}",e); 58 | schedulerJobExecutor.shutdown(); 59 | }finally { 60 | 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/UpdateWhiteListSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.service.IPWhiteListService; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | 8 | 9 | /** 10 | * @author chenerzhu 11 | * @create 2018-08-30 10:27 12 | **/ 13 | @Slf4j 14 | @Component 15 | @SuppressWarnings("unchecked") 16 | public class UpdateWhiteListSchedulerJob extends AbstractSchedulerJob { 17 | 18 | @Autowired 19 | private IPWhiteListService whiteListService; 20 | 21 | @Override 22 | public void run() { 23 | try { 24 | this.updateIPWhiteList(); 25 | }catch (Exception e){ 26 | log.error(e.getMessage(),e); 27 | } finally { 28 | shutdown(); 29 | } 30 | } 31 | 32 | /** 33 | * 更新白名单 34 | * @return 35 | */ 36 | public boolean updateIPWhiteList() { 37 | return whiteListService.updateIpWhiteList(); 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/listener/JobContextListener.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.listener; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.crawler.CrawlerJob; 4 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.SchedulerJob; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | 8 | import javax.servlet.ServletContextEvent; 9 | import javax.servlet.ServletContextListener; 10 | import javax.servlet.annotation.WebListener; 11 | 12 | /** 13 | * @author chenerzhu 14 | * @create 2018-08-30 12:33 15 | **/ 16 | @Slf4j 17 | @WebListener 18 | public class JobContextListener implements ServletContextListener { 19 | @Autowired 20 | private SchedulerJob schedulerJob; 21 | @Autowired 22 | private CrawlerJob crawlerJob; 23 | 24 | @Override 25 | public void contextInitialized(ServletContextEvent servletContextEvent) { 26 | log.debug("JobContextListener contextInitialized"); 27 | new Thread(schedulerJob).start(); 28 | new Thread(crawlerJob).start(); 29 | } 30 | 31 | @Override 32 | public void contextDestroyed(ServletContextEvent servletContextEvent) { 33 | log.debug("JobContextListener contextDestroyed"); 34 | } 35 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/listener/SpringContextListener.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.listener; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.context.SpringContextHolder; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.springframework.web.context.ContextLoaderListener; 6 | import org.springframework.web.context.WebApplicationContext; 7 | import org.springframework.web.context.support.WebApplicationContextUtils; 8 | 9 | import javax.servlet.ServletContextEvent; 10 | 11 | /** 12 | * @author chenerzhu 13 | * @create 2018-08-31 10:50 14 | **/ 15 | @Slf4j 16 | public class SpringContextListener extends ContextLoaderListener { 17 | public void contextInitialized(ServletContextEvent event) { 18 | super.contextInitialized(event); 19 | WebApplicationContext webApplicationContext = 20 | WebApplicationContextUtils.getWebApplicationContext(event.getServletContext()); 21 | SpringContextHolder.initApplicationContext(webApplicationContext); 22 | log.debug("SpringContextListener contextInitialized"); 23 | } 24 | 25 | public void contextDestroyed(ServletContextEvent event) { 26 | super.contextDestroyed(event); 27 | log.debug("SpringContextListener contextDestroyed"); 28 | } 29 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/repository/IPWhiteListRepository.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.repository; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.IPWhiteList; 4 | import org.springframework.data.jpa.repository.JpaRepository; 5 | import org.springframework.stereotype.Repository; 6 | 7 | /** 8 | * @author parker 9 | * @create 2020年4月20日15:23:47 10 | **/ 11 | @Repository 12 | public interface IPWhiteListRepository extends JpaRepository { 13 | 14 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/repository/IProxyApiRepository.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.repository; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyApi; 4 | import org.springframework.data.jpa.repository.JpaRepository; 5 | import org.springframework.stereotype.Repository; 6 | 7 | /** 8 | * @author parker 9 | * @create 2020年4月20日15:23:47 10 | **/ 11 | @Repository 12 | public interface IProxyApiRepository extends JpaRepository { 13 | 14 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/repository/IProxyConfigRepository.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.repository; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyConfig; 4 | import org.springframework.data.jpa.repository.JpaRepository; 5 | import org.springframework.stereotype.Repository; 6 | 7 | /** 8 | * @author parker 9 | * @create 2020年4月20日15:23:47 10 | **/ 11 | @Repository 12 | public interface IProxyConfigRepository extends JpaRepository { 13 | 14 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/repository/IProxyIpRepository.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.repository; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import org.springframework.data.domain.Page; 5 | import org.springframework.data.domain.Pageable; 6 | import org.springframework.data.jpa.repository.JpaRepository; 7 | import org.springframework.data.jpa.repository.Modifying; 8 | import org.springframework.data.jpa.repository.Query; 9 | import org.springframework.data.repository.query.Param; 10 | import org.springframework.stereotype.Repository; 11 | import org.springframework.transaction.annotation.Transactional; 12 | 13 | import java.util.Date; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-08-29 20:59 18 | **/ 19 | @Repository 20 | public interface IProxyIpRepository extends JpaRepository { 21 | Page findProxyIpsByAvailableIsTrue(Pageable pageable); 22 | 23 | long countProxyIpsByAvailableIsTrue(); 24 | long countProxyIpsByAvailableIsTrueOrValidateCountIsBeforeOrValidateCountIsAfterAndAvailableRateIsAfter(int validateCountBefore,int validateCountAfters, double availableRate); 25 | 26 | ProxyIp findByIpEqualsAndPortEqualsAndTypeEquals(String ip, int port, String type); 27 | 28 | Page findProxyIpsByAvailableIsTrueOrValidateCountIsBeforeOrValidateCountIsAfterAndAvailableRateIsAfter(Pageable pageable, int validateCountBefore,int validateCountAfters, double availableRate); 29 | 30 | @Query("update ProxyIp set available=:available, " + 31 | "availableCount=:availableCount, " + 32 | "availableRate=:availableRate, " + 33 | "lastValidateTime=:lastValidateTime, " + 34 | "requestTime=:requestTime, " + 35 | "responseTime=:responseTime, " + 36 | "unAvailableCount=:unAvailableCount, " + 37 | "useTime=:useTime," + 38 | " validateCount=:validateCount where id=:id") 39 | @Modifying 40 | @Transactional 41 | Integer update(@Param("available") boolean available, 42 | @Param("availableCount") Integer availableCount, 43 | @Param("availableRate") double availableRate, 44 | @Param("lastValidateTime") Date lastValidateTime, 45 | @Param("requestTime") long requestTime, 46 | @Param("responseTime") long responseTime, 47 | @Param("unAvailableCount") int unAvailableCount, 48 | @Param("useTime") long useTime, 49 | @Param("validateCount") int validateCount, 50 | @Param("id") long id); 51 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IPWhiteListService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.IPWhiteList; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Created Date by 2020/4/20 0020. 9 | * 10 | * @author Parker 11 | */ 12 | public interface IPWhiteListService { 13 | 14 | /** 15 | * 获得全部 api接口 16 | * @return 17 | */ 18 | List findAll(); 19 | 20 | /** 21 | * 更新白名单 22 | * @return 23 | */ 24 | boolean updateIpWhiteList(); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IProxyApiService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyApi; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Created Date by 2020/4/20 0020. 9 | * 10 | * @author Parker 11 | */ 12 | public interface IProxyApiService { 13 | 14 | /** 15 | * 获得全部 api接口 16 | * @return 17 | */ 18 | List findAll(); 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IProxyConfigService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyConfig; 4 | 5 | /** 6 | * Created Date by 2020/4/20 0020. 7 | * 8 | * @author Parker 9 | */ 10 | public interface IProxyConfigService { 11 | 12 | /** 13 | * 获得全部 api接口 14 | * @return 15 | */ 16 | ProxyConfig getConfig(); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IProxyIpRedisService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | 5 | import java.io.Serializable; 6 | import java.util.List; 7 | 8 | /** 9 | * @author chenerzhu 10 | * @create 2018-09-01 10:31 11 | **/ 12 | public interface IProxyIpRedisService { 13 | boolean add(ProxyIp proxyIp); 14 | 15 | Long remove(ProxyIp proxyIp); 16 | 17 | boolean isExist(ProxyIp proxyIp); 18 | 19 | ProxyIp getOne(); 20 | 21 | List findAllByPage(int pageNumber, int pageSize); 22 | 23 | long totalCount(); 24 | 25 | boolean addRt(ProxyIp proxyIp); 26 | 27 | Long removeRt(ProxyIp proxyIp); 28 | 29 | boolean isExistRt(ProxyIp proxyIp); 30 | 31 | ProxyIp getOneRt(); 32 | 33 | List findAllByPageRt(int pageNumber, int pageSize); 34 | 35 | long totalCountRt(); 36 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IProxyIpService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Created by chenerzhu on 2018/8/30. 9 | */ 10 | public interface IProxyIpService { 11 | ProxyIp save(ProxyIp proxyIp); 12 | 13 | List findAll(); 14 | 15 | List findAllByPage(Integer pageNumber, Integer pageSize); 16 | 17 | long totalCount(); 18 | 19 | long totalCount(int validateCountBefore,int validateCountAfter, double availableRate); 20 | 21 | List saveAll(List proxyIpList); 22 | 23 | void batchUpdate(List proxyIpList); 24 | 25 | void update(ProxyIp proxyIp); 26 | 27 | ProxyIp findByIpEqualsAndPortEqualsAndTypeEquals(String ip, int port, String type); 28 | 29 | List findAllByPage(Integer pageNumber, Integer pageSize, int validateCountBefore, int validateCountAfter, double availableRate); 30 | 31 | boolean testIp(String ip, int port); 32 | 33 | boolean testIp(String ip, int port, String type); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/impl/IPWhiteListServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.IPWhiteList; 4 | import com.chenerzhu.crawler.proxy.pool.repository.IPWhiteListRepository; 5 | import com.chenerzhu.crawler.proxy.pool.service.IPWhiteListService; 6 | import com.chenerzhu.crawler.proxy.pool.util.RedisUtil; 7 | import lombok.extern.slf4j.Slf4j; 8 | import org.springframework.beans.factory.annotation.Autowired; 9 | import org.springframework.data.domain.Example; 10 | import org.springframework.stereotype.Service; 11 | 12 | import java.util.List; 13 | import java.util.Set; 14 | 15 | /** 16 | * @author parker 17 | * @create 2020年4月20日15:25:27 18 | **/ 19 | @Service("whiteListService") 20 | @Slf4j 21 | public class IPWhiteListServiceImpl implements IPWhiteListService { 22 | 23 | /** Redis ip key */ 24 | private static final String REDIS_IP_KEY = "ip_white_list"; 25 | 26 | @Autowired 27 | private IPWhiteListRepository whiteListRepository; 28 | 29 | 30 | @Override 31 | public List findAll() { 32 | IPWhiteList ipWhiteList = new IPWhiteList(); 33 | ipWhiteList.setId(null); 34 | ipWhiteList.setIsUsable(1); 35 | Example example = Example.of(ipWhiteList); 36 | return whiteListRepository.findAll(example); 37 | } 38 | 39 | @Override 40 | public boolean updateIpWhiteList() { 41 | boolean flag = false; 42 | try { 43 | Set objects = RedisUtil.sGet(REDIS_IP_KEY); 44 | // 更新白名单1 45 | List all = this.findAll(); 46 | for (IPWhiteList ipWhiteList : all) { 47 | // 白名单是否存在 如果不存在就加入 48 | boolean flag1 = RedisUtil.sHasKey(REDIS_IP_KEY, ipWhiteList.getIp()); 49 | if(!flag1){ 50 | RedisUtil.sSet(REDIS_IP_KEY,ipWhiteList.getIp()); 51 | } 52 | } 53 | 54 | // 更新白名单2 55 | for (Object object : objects) { 56 | String ip =(String) object; 57 | boolean flag2 = false; 58 | for (IPWhiteList ipWhiteList : all) { 59 | if(ipWhiteList.getIp().equals(ip)){ 60 | flag2 = true; 61 | break; 62 | } 63 | } 64 | 65 | // 如果当前数据库列表找不到当前 ip 则在redis中 剔除 66 | if(!flag2){ 67 | RedisUtil.setRemove(REDIS_IP_KEY,ip); 68 | } 69 | } 70 | flag = true; 71 | }catch (Exception e){ 72 | flag = false; 73 | log.error(e.getMessage(),e); 74 | } 75 | 76 | return flag; 77 | } 78 | 79 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/impl/ProxyApiServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyApi; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.repository.IProxyApiRepository; 6 | import com.chenerzhu.crawler.proxy.pool.repository.IProxyIpRepository; 7 | import com.chenerzhu.crawler.proxy.pool.service.IProxyApiService; 8 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpService; 9 | import com.chenerzhu.crawler.proxy.pool.util.ProxyUtils; 10 | import org.springframework.beans.factory.annotation.Autowired; 11 | import org.springframework.data.domain.PageRequest; 12 | import org.springframework.data.domain.Pageable; 13 | import org.springframework.data.domain.Sort; 14 | import org.springframework.stereotype.Service; 15 | import org.springframework.transaction.annotation.Transactional; 16 | 17 | import javax.persistence.EntityManager; 18 | import javax.persistence.PersistenceContext; 19 | import java.util.List; 20 | 21 | /** 22 | * @author parker 23 | * @create 2020年4月20日15:25:27 24 | **/ 25 | @Service("proxyApiService") 26 | public class ProxyApiServiceImpl implements IProxyApiService { 27 | 28 | @Autowired 29 | private IProxyApiRepository proxyApiRepository; 30 | 31 | 32 | @Override 33 | public List findAll() { 34 | return proxyApiRepository.findAll(); 35 | } 36 | 37 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/impl/ProxyConfigServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyConfig; 4 | import com.chenerzhu.crawler.proxy.pool.repository.IProxyConfigRepository; 5 | import com.chenerzhu.crawler.proxy.pool.service.IProxyConfigService; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Service; 8 | 9 | import java.util.List; 10 | 11 | 12 | /** 13 | * @author parker 14 | * @create 2020年4月20日15:25:27 15 | **/ 16 | @Service("proxyConfigService") 17 | public class ProxyConfigServiceImpl implements IProxyConfigService { 18 | 19 | @Autowired 20 | private IProxyConfigRepository proxyConfigRepository; 21 | 22 | 23 | @Override 24 | public ProxyConfig getConfig() { 25 | List all = proxyConfigRepository.findAll(); 26 | if(null != all && !all.isEmpty()){ 27 | return all.get(0); 28 | } 29 | return null; 30 | } 31 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/impl/ProxyIpRedisServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.RedisKey; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpRedisService; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.data.redis.core.RedisTemplate; 8 | import org.springframework.stereotype.Service; 9 | 10 | import java.io.Serializable; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.Random; 14 | import java.util.Set; 15 | 16 | /** 17 | * @author chenerzhu 18 | * @create 2018-09-01 10:32 19 | **/ 20 | @Service 21 | public class ProxyIpRedisServiceImpl implements IProxyIpRedisService { 22 | @Autowired 23 | private RedisTemplate redisCacheTemplate; 24 | 25 | @Override 26 | public boolean add(ProxyIp proxyIp) { 27 | return redisCacheTemplate.opsForZSet().add(RedisKey.PROXY_IP_KEY, proxyIp, proxyIp.getId()); 28 | } 29 | 30 | @Override 31 | public Long remove(ProxyIp proxyIp) { 32 | return redisCacheTemplate.opsForZSet().removeRangeByScore(RedisKey.PROXY_IP_KEY, proxyIp.getId(), proxyIp.getId()); 33 | } 34 | 35 | @Override 36 | public boolean isExist(ProxyIp proxyIp) { 37 | Set set = redisCacheTemplate.opsForZSet().rangeByScore(RedisKey.PROXY_IP_KEY, proxyIp.getId(), proxyIp.getId()); 38 | if (set.isEmpty()) { 39 | return false; 40 | } else { 41 | return true; 42 | } 43 | } 44 | 45 | @Override 46 | public ProxyIp getOne() { 47 | int totalCount = (int) totalCountRt(); 48 | int range=new Random().nextInt(totalCount); 49 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_KEY, range, range); 50 | return (ProxyIp) new ArrayList(set).get(0); 51 | } 52 | 53 | @Override 54 | public List findAllByPage(int pageNumber, int pageSize) { 55 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_KEY, pageNumber*pageSize, (pageNumber+1)*pageSize); 56 | return new ArrayList(set); 57 | } 58 | 59 | @Override 60 | public long totalCount() { 61 | return redisCacheTemplate.opsForZSet().size(RedisKey.PROXY_IP_KEY); 62 | } 63 | 64 | @Override 65 | public boolean addRt(ProxyIp proxyIp) { 66 | return redisCacheTemplate.opsForZSet().add(RedisKey.PROXY_IP_RT_KEY, proxyIp, proxyIp.getId()); 67 | } 68 | 69 | @Override 70 | public Long removeRt(ProxyIp proxyIp) { 71 | return redisCacheTemplate.opsForZSet().removeRangeByScore(RedisKey.PROXY_IP_RT_KEY, proxyIp.getId(), proxyIp.getId()); 72 | } 73 | 74 | @Override 75 | public boolean isExistRt(ProxyIp proxyIp) { 76 | Set set = redisCacheTemplate.opsForZSet().rangeByScore(RedisKey.PROXY_IP_RT_KEY, proxyIp.getId(), proxyIp.getId()); 77 | if (set.isEmpty()) { 78 | return false; 79 | } else { 80 | return true; 81 | } 82 | } 83 | 84 | @Override 85 | public ProxyIp getOneRt() { 86 | int totalCount = (int) totalCountRt(); 87 | int range=new Random().nextInt(totalCount); 88 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_RT_KEY, range, range); 89 | return (ProxyIp) new ArrayList(set).get(0); 90 | } 91 | 92 | @Override 93 | public List findAllByPageRt(int pageNumber, int pageSize) { 94 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_RT_KEY, pageNumber*pageSize, (pageNumber+1)*pageSize); 95 | return new ArrayList(set); 96 | } 97 | 98 | @Override 99 | public long totalCountRt() { 100 | return redisCacheTemplate.opsForZSet().size(RedisKey.PROXY_IP_RT_KEY); 101 | } 102 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/thread/ThreadFactory.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.thread; 2 | 3 | import java.util.concurrent.atomic.AtomicInteger; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-09-10 20:27 8 | **/ 9 | public class ThreadFactory implements java.util.concurrent.ThreadFactory { 10 | 11 | private AtomicInteger counter = new AtomicInteger(0); 12 | private String name; 13 | 14 | public ThreadFactory(String name) { 15 | this.name = name; 16 | } 17 | 18 | @Override 19 | public Thread newThread(Runnable run) { 20 | Thread t = new Thread(run, name + "-t-" + counter); 21 | counter.incrementAndGet(); 22 | return t; 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/util/IPUtils.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.util; 2 | 3 | 4 | import lombok.extern.slf4j.Slf4j; 5 | 6 | import javax.servlet.http.HttpServletRequest; 7 | /** 8 | * Created Date by 2020/4/21 0021. 9 | * 10 | * @author Parker 11 | */ 12 | @Slf4j 13 | public class IPUtils { 14 | 15 | /** 16 | * 获取用户真实IP地址,不使用request.getRemoteAddr()的原因是有可能用户使用了代理软件方式避免真实IP地址, 17 | * 可是,如果通过了多级反向代理的话,X-Forwarded-For的值并不止一个,而是一串IP值 18 | * 19 | * @return ip 20 | */ 21 | public static String getRealIP(HttpServletRequest request) { 22 | String ip = request.getHeader("x-forwarded-for"); 23 | if (ip != null && ip.length() != 0 && !"unknown".equalsIgnoreCase(ip)) { 24 | // 多次反向代理后会有多个ip值,第一个ip才是真实ip 25 | if( ip.indexOf(",")!=-1 ){ 26 | ip = ip.split(",")[0]; 27 | } 28 | } 29 | if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 30 | ip = request.getHeader("Proxy-Client-IP"); 31 | log.info("Proxy-Client-IP ip: {}",ip); 32 | } 33 | if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 34 | ip = request.getHeader("WL-Proxy-Client-IP"); 35 | log.info("WL-Proxy-Client-IP ip: {}",ip); 36 | } 37 | if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 38 | ip = request.getHeader("HTTP_CLIENT_IP"); 39 | log.info("HTTP_CLIENT_IP ip: {}",ip); 40 | } 41 | if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 42 | ip = request.getHeader("HTTP_X_FORWARDED_FOR"); 43 | log.info("HTTP_X_FORWARDED_FOR ip: {}",ip); 44 | } 45 | if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 46 | ip = request.getHeader("X-Real-IP"); 47 | log.info("X-Real-IP ip: {}",ip); 48 | } 49 | if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 50 | ip = request.getRemoteAddr(); 51 | log.info("getRemoteAddr ip: {}",ip); 52 | } 53 | return ip; 54 | } 55 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/util/MultiDBUtils.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.util; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.SysDataSource; 4 | import org.apache.commons.dbcp.BasicDataSource; 5 | import org.apache.commons.lang3.ArrayUtils; 6 | import org.apache.commons.lang3.StringEscapeUtils; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.jdbc.core.BeanPropertyRowMapper; 9 | import org.springframework.jdbc.core.JdbcTemplate; 10 | import org.springframework.stereotype.Component; 11 | 12 | import javax.annotation.PostConstruct; 13 | import java.util.List; 14 | import java.util.Map; 15 | 16 | /** 17 | 多数据源工具类 18 | @author parker 19 | */ 20 | public class MultiDBUtils { 21 | 22 | private SysDataSource sysDataSource = new SysDataSource(); 23 | 24 | private JdbcTemplate jdbcTemplate; 25 | 26 | private static final MultiDBUtils _instance = new MultiDBUtils(); 27 | 28 | /** 29 | * 获得实例对象 30 | * @return 31 | */ 32 | public static MultiDBUtils getInstance(){ 33 | _instance.setJdbcTemplate(_instance.parseDataSource()); 34 | return _instance; 35 | } 36 | 37 | 38 | /** 39 | * 将数据库中的存储的dataSource对象转换成BasicDataSource 40 | * @return 41 | */ 42 | private BasicDataSource parseDataSource() { 43 | BasicDataSource dataSource = new BasicDataSource(); 44 | dataSource.setDriverClassName(sysDataSource.getDbDriver()); 45 | dataSource.setUrl(StringEscapeUtils.unescapeHtml4(sysDataSource.getDbUrl())); 46 | dataSource.setUsername(sysDataSource.getDbUserName()); 47 | dataSource.setPassword(sysDataSource.getDbPassword()); 48 | return dataSource; 49 | } 50 | 51 | 52 | public JdbcTemplate getJdbcTemplate() { 53 | return jdbcTemplate; 54 | } 55 | 56 | public void setJdbcTemplate(BasicDataSource dataSource) { 57 | this.jdbcTemplate = new JdbcTemplate(dataSource); 58 | } 59 | 60 | public List> queryList(String sql, Object... param) { 61 | List> list; 62 | if (ArrayUtils.isEmpty(param)) { 63 | list = jdbcTemplate.queryForList(sql); 64 | } else { 65 | list = jdbcTemplate.queryForList(sql, param); 66 | } 67 | return list; 68 | } 69 | 70 | public List queryList(String sql, Class clazz, Object... param) { 71 | List list; 72 | 73 | if (ArrayUtils.isEmpty(param)) { 74 | list = jdbcTemplate.query(sql.toString(), new BeanPropertyRowMapper(clazz)); 75 | } else { 76 | list = jdbcTemplate.query(sql.toString(), new Object[] {param}, new BeanPropertyRowMapper(clazz)); 77 | } 78 | return list; 79 | } 80 | 81 | public int update(String sql, Object... param){ 82 | if (ArrayUtils.isEmpty(param)) { 83 | return jdbcTemplate.update(sql); 84 | } else { 85 | return jdbcTemplate.update(sql, param); 86 | } 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | debug=false 2 | #=======log========# 3 | #logging.file= 4 | #logging.level.root=ERROR 5 | #logging.level.org.springframework.web=DEBUG 6 | #logging.level.org.hibernate=ERROR 7 | #logging.level.com.chenerzhu.crawler.proxy.pool.job.ValidateCacheSchedulerJob=DEBUG 8 | #logging.level.com.chenerzhu.crawler.proxy.pool.job.ValidateSchedulerJob=DEBUG 9 | 10 | #port 11 | server.port=8000 12 | 13 | # THYMELEAF 14 | spring.thymeleaf.encoding=UTF-8 15 | # 热部署静态文件 16 | spring.thymeleaf.cache=false 17 | # 使用HTML5标准 18 | spring.thymeleaf.mode=HTML5 19 | #=====datasource=======# 20 | spring.datasource.url=jdbc:mysql://127.0.0.1:3306/crawler?useUnicode=true&characterEncoding=utf-8&allowMultiQueries=true&serverTimezone=UTC 21 | spring.datasource.username=root 22 | spring.datasource.password=12345678 23 | spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver 24 | 25 | # Hikari will use the above plus the following to setup connection pooling 26 | spring.datasource.type=com.zaxxer.hikari.HikariDataSource 27 | spring.datasource.hikari.allow-pool-suspension=false 28 | spring.datasource.hikari.minimum-idle=5 29 | spring.datasource.hikari.maximum-pool-size=50 30 | spring.datasource.hikari.auto-commit=true 31 | spring.datasource.hikari.idle-timeout=30000 32 | spring.datasource.hikari.pool-name=DatebookHikariCP 33 | spring.datasource.hikari.max-lifetime=1800000 34 | spring.datasource.hikari.connection-timeout=30000 35 | spring.datasource.hikari.connection-test-query=SELECT 1 36 | spring.datasource.hikari.leak-detection-threshold=60000 37 | 38 | spring.datasource.tomcat.max-active=10 39 | 40 | #spring.jpa.hibernate.ddl-auto=none create 41 | spring.jpa.hibernate.ddl-auto=none 42 | spring.jpa.show-sql=false 43 | spring.jpa.hibernate.naming.physical-strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl 44 | 45 | #=====redis===========# 46 | spring.redis.host=127.0.0.1 47 | spring.redis.port=6333 48 | spring.redis.password=123456 49 | #=====cluster====# 50 | #spring.redis.cluster.nodes[0]=XXXXX 51 | #spring.redis.cluster.nodes[1]=XXXXX 52 | #spring.redis.cluster.nodes[2]=XXXXX 53 | #spring.redis.cluster.nodes[3]=XXXXX 54 | #spring.redis.cluster.nodes[4]=XXXXX 55 | #spring.redis.cluster.nodes[5]=XXXXX 56 | #spring.redis.password=XXXXX 57 | 58 | # 连接超时时间(毫秒) 59 | spring.redis.timeout=10000 60 | # Redis默认情况下有16个分片,这里配置具体使用的分片,默认是0 61 | spring.redis.database=0 62 | # 连接池最大连接数(使用负值表示没有限制) 默认 8 63 | spring.redis.lettuce.pool.max-active=20 64 | # 连接池最大阻塞等待时间(使用负值表示没有限制) 默认 -1 65 | spring.redis.lettuce.pool.max-wait=30000 66 | # 连接池中的最大空闲连接 默认 8 67 | spring.redis.lettuce.pool.max-idle=20 68 | # 连接池中的最小空闲连接 默认 0 69 | spring.redis.lettuce.pool.min-idle=0 70 | 71 | # 自动化提示 72 | ip-interceptor.errorMsg=\u6682\u65e0\u4f7f\u7528\u8d44\u683c\uff0c\u8bf7\u8054\u7cfb QQ:646823972! 73 | # 授权验证状态 74 | authFlag=false -------------------------------------------------------------------------------- /src/main/resources/banner.txt: -------------------------------------------------------------------------------- 1 | 2 | ${AnsiColor.GREEN} ${AnsiColor.CYAN} _ _ _ 3 | ${AnsiColor.GREEN} ${AnsiColor.CYAN} (_) | | (_) 4 | ${AnsiColor.GREEN} __ _ _ __ ___${AnsiColor.CYAN} _ _ __ | |__ _ 5 | ${AnsiColor.GREEN} / _` | '__/ __${AnsiColor.CYAN} | | '_ \| '_ \| | 6 | ${AnsiColor.GREEN} | (_| | | | (__${AnsiColor.CYAN} | | | | | |_) | | 7 | ${AnsiColor.GREEN} \__,_|_| \___${AnsiColor.CYAN} |_|_| |_|_.__/| | 8 | ${AnsiColor.GREEN} ${AnsiColor.CYAN} _/ | 9 | ${AnsiColor.GREEN} ${AnsiColor.CYAN} |__/ 10 | 11 | ${AnsiColor.BRIGHT_BLUE}:: 在码圈 🤓 :: Running Spring Boot ${spring-boot.version} :: 12 | :: https://www.arcinbj.com ::${AnsiColor.DEFAULT} 13 | -------------------------------------------------------------------------------- /src/main/resources/static/img/crawler.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiparker/proxy-pool/d7f40deafd4854243cffd897748359ca86ac271e/src/main/resources/static/img/crawler.PNG -------------------------------------------------------------------------------- /src/main/resources/static/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiparker/proxy-pool/d7f40deafd4854243cffd897748359ca86ac271e/src/main/resources/static/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /src/main/resources/static/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiparker/proxy-pool/d7f40deafd4854243cffd897748359ca86ac271e/src/main/resources/static/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /src/main/resources/static/img/home.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiparker/proxy-pool/d7f40deafd4854243cffd897748359ca86ac271e/src/main/resources/static/img/home.PNG -------------------------------------------------------------------------------- /src/main/resources/templates/error/500.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | system error! 6 | 7 | 8 |

9 | 10 | -------------------------------------------------------------------------------- /src/main/resources/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ip代理 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 |
15 |
16 | 17 |
18 | 19 | 20 | 50 | -------------------------------------------------------------------------------- /src/main/resources/templates/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ip test... 7 | 8 | 9 |
10 | ip: 11 | port: 12 | 13 |
14 | 15 | 35 | -------------------------------------------------------------------------------- /src/test/java/com/chenerzhu/crawler/proxy/pool/ProxyPoolApplicationTest.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.RedisKey; 4 | import com.chenerzhu.crawler.proxy.pool.context.SpringContextHolder; 5 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 6 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpService; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.springframework.beans.factory.annotation.Autowired; 10 | import org.springframework.boot.test.context.SpringBootTest; 11 | import org.springframework.data.redis.core.RedisTemplate; 12 | import org.springframework.test.context.junit4.SpringRunner; 13 | 14 | import java.io.Serializable; 15 | import java.util.List; 16 | import java.util.Set; 17 | 18 | /** 19 | * Created by chenerzhu on 2018/9/2. 20 | */ 21 | @RunWith(SpringRunner.class) 22 | @SpringBootTest(classes = ProxyPoolApplication.class) 23 | public class ProxyPoolApplicationTest { 24 | @Autowired 25 | private RedisTemplate redisCacheTemplate; 26 | @Autowired 27 | private IProxyIpService proxyIpService; 28 | 29 | @Test 30 | public void testRedisExist() { 31 | Set set=redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_KEY,Long.parseLong("1535957777756"),Long.parseLong("1535957777756")); 32 | Set set1=redisCacheTemplate.opsForZSet().rangeByScore(RedisKey.PROXY_IP_KEY,Long.parseLong("1535957777756"),Long.parseLong("1535957777756")); 33 | System.out.println(set.size()); 34 | System.out.println(set1.size()); 35 | redisCacheTemplate.opsForZSet().remove(RedisKey.PROXY_IP_KEY,Long.parseLong("1535957777756")); 36 | redisCacheTemplate.opsForZSet().removeRangeByScore(RedisKey.PROXY_IP_KEY,Long.parseLong("1535961277498"),Long.parseLong("1535961277498")); 37 | } 38 | @Test 39 | public void testRedisGet(){ 40 | Set set=redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_KEY,0,-1); 41 | 42 | System.out.println("size:"+set.size()); 43 | for (Object obj:set){ 44 | int count=0; 45 | ProxyIp proxyIp=(ProxyIp)obj; 46 | //System.out.println(proxyIp.toString()); 47 | for (Object obj1:set){ 48 | if(proxyIp.getId()==((ProxyIp)obj1).getId()){ 49 | System.out.println("==="+proxyIp.getId()+"=="+count++); 50 | } 51 | } 52 | } 53 | } 54 | 55 | @Test 56 | public void testSpringBean(){ 57 | String name[]=SpringContextHolder.getApplicationContext().getBeanDefinitionNames(); 58 | for (String name1:name){ 59 | System.out.println(name1); 60 | } 61 | } 62 | 63 | 64 | @Test 65 | public void testRedisAdd(){ 66 | // redisCacheTemplate.opsForZSet().add(RedisKey.PROXY_IP_RT_KEY,new ProxyIp(),1); 67 | } 68 | @Test 69 | public void testDBAdd(){ 70 | ProxyIp proxyIp=new ProxyIp(); 71 | proxyIp.setAvailableRate(1/(double)2); 72 | proxyIpService.save(proxyIp); 73 | } 74 | 75 | @Test 76 | public void testRedisCount(){ 77 | long count=redisCacheTemplate.opsForZSet().size(RedisKey.PROXY_IP_RT_KEY); 78 | System.out.println(count); 79 | count=redisCacheTemplate.opsForZSet().count(RedisKey.PROXY_IP_RT_KEY, 0, Integer.MAX_VALUE);; 80 | System.out.println(count); 81 | } 82 | 83 | // @Test 84 | // public void testGetDbData(){ 85 | // List proxyIpList=proxyIpService.findAllByPage(1,10,5,0.3); 86 | // System.out.println(proxyIpList.size()); 87 | // } 88 | } 89 | --------------------------------------------------------------------------------