├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── chenerzhu │ │ └── crawler │ │ └── proxy │ │ └── pool │ │ ├── ProxyPoolApplication.java │ │ ├── common │ │ ├── HttpMethod.java │ │ └── RedisKey.java │ │ ├── config │ │ ├── RedisConfig.java │ │ ├── SpringConfig.java │ │ └── WebConfig.java │ │ ├── context │ │ └── SpringContextHolder.java │ │ ├── controller │ │ ├── BaseController.java │ │ └── ProxyIpController.java │ │ ├── entity │ │ ├── ProxyIp.java │ │ ├── Result.java │ │ └── WebPage.java │ │ ├── exception │ │ ├── ProxyPoolException.java │ │ └── ProxyPoolExceptionHandler.java │ │ ├── job │ │ ├── crawler │ │ │ ├── AbstractCrawler.java │ │ │ ├── CrawlerJob.java │ │ │ ├── Data5uCrawlerJob.java │ │ │ ├── FreeProxyListCrawlerJob.java │ │ │ ├── GatherproxyCrawlerJob.java │ │ │ ├── ICrawler.java │ │ │ ├── MyProxyCrawlerJob.java │ │ │ ├── Proxy4FreeCrawlerJob.java │ │ │ ├── ProxynovaCrawlerJob.java │ │ │ ├── SpysOneCrawlerJob.java │ │ │ └── XicidailiCrawlerJob.java │ │ ├── execute │ │ │ ├── ISchedulerJobExecutor.java │ │ │ └── impl │ │ │ │ └── SchedulerJobExecutor.java │ │ └── scheduler │ │ │ ├── AbstractSchedulerJob.java │ │ │ ├── SchedulerJob.java │ │ │ ├── SyncDbSchedulerJob.java │ │ │ ├── SyncRedisSchedulerJob.java │ │ │ └── ValidateRedisSchedulerJob.java │ │ ├── listener │ │ ├── JobContextListener.java │ │ └── SpringContextListener.java │ │ ├── repository │ │ └── IProxyIpRepository.java │ │ ├── service │ │ ├── IProxyIpRedisService.java │ │ ├── IProxyIpService.java │ │ └── impl │ │ │ ├── ProxyIpRedisServiceImpl.java │ │ │ └── ProxyIpServiceImpl.java │ │ ├── thread │ │ └── ThreadFactory.java │ │ └── util │ │ ├── HttpClientUtils.java │ │ ├── HttpsUtils.java │ │ └── ProxyUtils.java └── resources │ ├── application.properties │ ├── static │ ├── css │ │ ├── bootstrap-table.css │ │ └── bootstrap.min.css │ ├── img │ │ ├── crawler.PNG │ │ ├── glyphicons-halflings-white.png │ │ ├── glyphicons-halflings.png │ │ └── home.PNG │ └── js │ │ ├── bootstrap-table.js │ │ ├── bootstrap.min.js │ │ └── jquery-3.1.1.min.js │ └── templates │ ├── error │ └── 500.html │ ├── index.html │ └── test.html └── test └── java └── com └── chenerzhu └── crawler └── proxy └── pool └── ProxyPoolApplicationTest.java /README.md: -------------------------------------------------------------------------------- 1 | # proxy-pool 代理IP 2 | ### 背景 3 | 前段时间,写java爬虫来爬网易云音乐的评论。不料,爬了一段时间后ip被封禁了。由此,想到了使用ip代理,但是找了很多的ip代理网站,很少有可以用的代理ip。于是,抱着边学习的心态,自己开发了一个代理ip池。 4 | 5 | ### 相关技术及环境 6 | **技术:** SpringBoot,SpringMVC, Hibernate, MySQL, Redis , Maven, Lombok, BootStrap-table,多线程并发 7 | **环境:** JDK1.8 , IDEA 8 | 9 | ### 实现功能 10 | 通过ip代理池,提供高可用的代理ip,可用率达到95%以上。 11 | - 通过接口获取代理ip 12 | 通过访问接口,如:http://127.0.0.1:8080/proxyIp 返回代理ip的json格式 13 | ```json 14 | { 15 | "code":200, 16 | "data":[ 17 | { 18 | "available":true, 19 | "ip":"1.10.186.214", 20 | "lastValidateTime":"2018-09-25 20:31:52", 21 | "location":"THThailand", 22 | "port":57677, 23 | "requestTime":0, 24 | "responseTime":0, 25 | "type":"https", 26 | "useTime":3671 27 | } 28 | ], 29 | "message":"success" 30 | } 31 | ``` 32 | 33 | - 通过页面获取代理ip 34 | 通过访问url,如:http://127.0.0.1:8080 返回代理ip列表页面。 35 | 36 | 37 | - 提供代理ip测试接口及页面 38 | 通过访问url, 如:http://127.0.0.1:8080/test (get)测试代理ip的可用性;通过接口 http://127.0.0.1:8080/test ](post data: {"ip": "127.0.0.1","port":8080} ) 测试代理ip的可用性。 39 | 40 | ### 设计思路 41 | #### 模块划分 42 | - 爬虫模块:爬取代理ip网站的代理IP信息,先通过队列再保存进数据库。 43 | - 数据库同步模块:设置一定时间间隔同步数据库IP到redis缓存中。 44 | - 缓存redis同步模块:设置一定时间间隔同步redis缓存到另一块redis缓存中。 45 | - 缓存redis代理ip校验模块:设置一定时间间隔redis缓存代理ip池校验。 46 | - 前端显示及接口控制模块:显示可用ip页面,及提供ip获取api接口。 47 | 48 | #### 架构图 49 | 50 | 51 | ### IP来源 52 | 代理ip均来自爬虫爬取,有些国内爬取的ip大多都不能用,代理池的ip可用ip大多是国外的ip。爬取的网站有:http://www.xicidaili.com/nn ,http://www.data5u.com/free/index.shtml ,https://free-proxy-list.net ,https://www.my-proxy.com/free-proxy-list.html ,http://spys.one/en/free-proxy-list/ , https://www.proxynova.com/proxy-server-list/ ,https://www.proxy4free.com/list/webproxy1.html ,http://www.gatherproxy.com/ 。 53 | ### 如何使用 54 | **前提:** 已经安装JDK1.8环境,MySQL数据库,Redis。 55 | 先使用maven编译成jar,proxy-pool-1.0.jar。 56 | 使用SpringBoot启动方式,启动即可。 57 | ```java 58 | java -jar proxy-pool-1.0.jar 59 | ``` 60 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.chenerzhu.crawler 7 | proxy-pool 8 | 1.0-SNAPSHOT 9 | jar 10 | 11 | proxy-pool 12 | proxy-pool 13 | 14 | 15 | org.springframework.boot 16 | spring-boot-starter-parent 17 | 2.0.4.RELEASE 18 | 19 | 20 | 21 | 22 | UTF-8 23 | UTF-8 24 | 1.8 25 | 26 | 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-starter-data-jpa 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-starter-data-redis 35 | 36 | 37 | org.apache.commons 38 | commons-pool2 39 | 40 | 41 | org.springframework.boot 42 | spring-boot-starter-web 43 | 44 | 45 | org.springframework.boot 46 | spring-boot-starter-thymeleaf 47 | 48 | 49 | io.lettuce 50 | lettuce-core 51 | 5.1.0.M1 52 | 53 | 54 | mysql 55 | mysql-connector-java 56 | 8.0.11 57 | 58 | 59 | org.projectlombok 60 | lombok 61 | true 62 | 63 | 64 | org.springframework.boot 65 | spring-boot-starter-test 66 | test 67 | 68 | 69 | com.alibaba 70 | fastjson 71 | 1.2.47 72 | 73 | 74 | commons-lang 75 | commons-lang 76 | 2.5 77 | 78 | 79 | org.apache.httpcomponents 80 | httpclient 81 | 4.5.2 82 | 83 | 84 | org.jsoup 85 | jsoup 86 | 1.11.2 87 | 88 | 89 | 90 | junit 91 | junit 92 | 4.12 93 | test 94 | 95 | 96 | 97 | 98 | proxy-pool-1.0 99 | 100 | 101 | org.springframework.boot 102 | spring-boot-maven-plugin 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/ProxyPoolApplication.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | import org.springframework.boot.web.servlet.ServletComponentScan; 6 | 7 | @SpringBootApplication 8 | @ServletComponentScan("com.chenerzhu.crawler.proxy.pool.listener") 9 | public class ProxyPoolApplication { 10 | 11 | public static void main(String[] args) { 12 | SpringApplication.run(ProxyPoolApplication.class, args); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/common/HttpMethod.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.common; 2 | 3 | /** 4 | * @author chenerzhu 5 | * @create 2018-09-08 17:54 6 | **/ 7 | public enum HttpMethod { 8 | GET, 9 | POST, 10 | PUT, 11 | PATCH, 12 | DELETE; 13 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/common/RedisKey.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.common; 2 | 3 | /** 4 | * @author chenerzhu 5 | * @create 2018-08-31 20:08 6 | **/ 7 | public final class RedisKey { 8 | public static final String PROXY_IP_KEY="PROXY_IP_KEY"; 9 | public static final String PROXY_IP_RT_KEY="PROXY_IP_RT_KEY"; 10 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/config/RedisConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.config; 2 | 3 | import org.springframework.boot.autoconfigure.AutoConfigureAfter; 4 | import org.springframework.boot.autoconfigure.data.redis.RedisAutoConfiguration; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | import org.springframework.data.redis.connection.lettuce.LettuceConnectionFactory; 8 | import org.springframework.data.redis.core.RedisTemplate; 9 | import org.springframework.data.redis.serializer.GenericJackson2JsonRedisSerializer; 10 | import org.springframework.data.redis.serializer.StringRedisSerializer; 11 | 12 | import java.io.Serializable; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-08-31 16:05 17 | **/ 18 | @Configuration 19 | @AutoConfigureAfter(RedisAutoConfiguration.class) 20 | public class RedisConfig { 21 | @Bean 22 | public RedisTemplate redisRedisTemplate(LettuceConnectionFactory redisConnectionFactory) { 23 | RedisTemplate template = new RedisTemplate<>(); 24 | template.setKeySerializer(new StringRedisSerializer()); 25 | template.setValueSerializer(new GenericJackson2JsonRedisSerializer()); 26 | template.setConnectionFactory(redisConnectionFactory); 27 | return template; 28 | } 29 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/config/SpringConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.config; 2 | 3 | import org.springframework.context.annotation.Configuration; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-08-30 12:38 8 | **/ 9 | @Configuration 10 | public class SpringConfig { 11 | 12 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/config/WebConfig.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.config; 2 | 3 | import com.alibaba.fastjson.support.spring.FastJsonHttpMessageConverter; 4 | import org.springframework.context.annotation.Bean; 5 | import org.springframework.context.annotation.Configuration; 6 | import org.springframework.http.MediaType; 7 | import org.springframework.http.converter.HttpMessageConverter; 8 | import org.springframework.http.converter.StringHttpMessageConverter; 9 | import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder; 10 | import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; 11 | import org.springframework.web.servlet.config.annotation.DefaultServletHandlerConfigurer; 12 | import org.springframework.web.servlet.config.annotation.EnableWebMvc; 13 | import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; 14 | import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; 15 | 16 | import java.nio.charset.Charset; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | 20 | /** 21 | * @author chenerzhu 22 | * @create 2018-05-27 14:10 23 | **/ 24 | @Configuration 25 | @EnableWebMvc // 启用MVC Java config的支持. 相当于 26 | public class WebConfig implements WebMvcConfigurer { 27 | 28 | // 设置响应头信息 29 | private static List buildDefaultMediaTypes() { 30 | List list = new ArrayList<>(); 31 | list.add(MediaType.TEXT_HTML); // 这个必须设置在第一位 32 | list.add(MediaType.APPLICATION_JSON_UTF8); 33 | return list; 34 | } 35 | 36 | @Override 37 | public void addResourceHandlers(ResourceHandlerRegistry registry) { 38 | registry.addResourceHandler("/static/**").addResourceLocations("classpath:/static/"); 39 | registry.addResourceHandler("/js/**").addResourceLocations("classpath:/static/js/"); 40 | registry.addResourceHandler("/css/**").addResourceLocations("classpath:/static/css/"); 41 | } 42 | 43 | // 配置处理静态资源 44 | @Override 45 | public void configureDefaultServletHandling(DefaultServletHandlerConfigurer configurer) { 46 | configurer.enable(); 47 | } 48 | 49 | // 设置MessageConverter 50 | @Override 51 | public void configureMessageConverters(List> converters) { 52 | converters.add(stringHttpMessageConverter()); 53 | converters.add(httpMessageConverter()); 54 | } 55 | 56 | @Bean 57 | public StringHttpMessageConverter stringHttpMessageConverter() { 58 | // 设置默认编码为UTF-8 59 | Charset default_charset = Charset.forName("UTF-8"); 60 | StringHttpMessageConverter converter = new StringHttpMessageConverter(default_charset); 61 | List list = buildDefaultMediaTypes(); 62 | converter.setSupportedMediaTypes(list); 63 | return converter; 64 | } 65 | @Bean 66 | public FastJsonHttpMessageConverter httpMessageConverter() { 67 | FastJsonHttpMessageConverter converter=new FastJsonHttpMessageConverter(); 68 | List list = buildDefaultMediaTypes(); 69 | converter.setSupportedMediaTypes(list); 70 | return converter; 71 | } 72 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/context/SpringContextHolder.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.context; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.beans.BeansException; 5 | import org.springframework.beans.factory.DisposableBean; 6 | import org.springframework.context.ApplicationContext; 7 | import org.springframework.context.ApplicationContextAware; 8 | import org.springframework.stereotype.Component; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-08-30 21:09 13 | **/ 14 | @Slf4j 15 | public class SpringContextHolder implements ApplicationContextAware, DisposableBean { 16 | private static ApplicationContext applicationContext; 17 | 18 | private SpringContextHolder() { 19 | } 20 | 21 | public static void initApplicationContext(ApplicationContext applicationContext) { 22 | if(SpringContextHolder.applicationContext==null){ 23 | SpringContextHolder.applicationContext = applicationContext; 24 | } 25 | } 26 | 27 | public static ApplicationContext getApplicationContext() { 28 | return applicationContext; 29 | } 30 | 31 | @Override 32 | public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { 33 | if(this.applicationContext==null){ 34 | SpringContextHolder.applicationContext = applicationContext; 35 | } 36 | } 37 | 38 | @SuppressWarnings("unchecked") 39 | public static T getBean(String name) { 40 | return (T) getApplicationContext().getBean(name); 41 | } 42 | 43 | 44 | @SuppressWarnings("unchecked") 45 | public static T getBean(Class clazz) { 46 | return (T) getApplicationContext().getBeansOfType(clazz); 47 | } 48 | 49 | @Override 50 | public void destroy() throws Exception { 51 | SpringContextHolder.clear(); 52 | } 53 | 54 | public static void clear() { 55 | log.debug("Clear ApplicationContext of SpringContextHolder:" + applicationContext); 56 | applicationContext = null; 57 | } 58 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/controller/BaseController.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.controller; 2 | 3 | import org.springframework.web.bind.annotation.RestController; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-08-29 19:52 8 | **/ 9 | public class BaseController { 10 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/controller/ProxyIpController.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.controller; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import com.alibaba.fastjson.serializer.SerializeConfig; 6 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 7 | import com.chenerzhu.crawler.proxy.pool.entity.Result; 8 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpRedisService; 9 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpService; 10 | import lombok.extern.slf4j.Slf4j; 11 | import org.springframework.beans.factory.annotation.Autowired; 12 | import org.springframework.http.ResponseEntity; 13 | import org.springframework.stereotype.Controller; 14 | import org.springframework.ui.ModelMap; 15 | import org.springframework.web.bind.annotation.GetMapping; 16 | import org.springframework.web.bind.annotation.PostMapping; 17 | import org.springframework.web.bind.annotation.RequestMapping; 18 | import org.springframework.web.bind.annotation.ResponseBody; 19 | 20 | import javax.annotation.Resource; 21 | import javax.servlet.http.HttpServletRequest; 22 | import javax.servlet.http.HttpServletResponse; 23 | import java.util.ArrayList; 24 | import java.util.Arrays; 25 | import java.util.List; 26 | 27 | /** 28 | * @author chenerzhu 29 | * @create 2018-08-29 19:51 30 | **/ 31 | @Slf4j 32 | @Controller 33 | public class ProxyIpController extends BaseController { 34 | @Autowired 35 | private IProxyIpRedisService proxyIpRedisService; 36 | 37 | @Resource 38 | private IProxyIpService proxyIpService; 39 | 40 | @GetMapping("/") 41 | public String index(ModelMap modelMap){ 42 | List proxyIpList=proxyIpRedisService.findAllByPageRt(0,20); 43 | modelMap.put("proxyIpList", JSON.toJSON(proxyIpList)); 44 | return "index"; 45 | } 46 | 47 | @GetMapping("/proxyIpLow") 48 | @ResponseBody 49 | public Object getProxyIpLow(HttpServletRequest request, HttpServletResponse response, ModelMap modelMap) throws Exception { 50 | ProxyIp proxyIp = proxyIpRedisService.getOne(); 51 | boolean available = proxyIpService.testIp(proxyIp.getIp(), proxyIp.getPort(),proxyIp.getType()); 52 | while (!available){ 53 | proxyIp = proxyIpRedisService.getOne(); 54 | available = proxyIpService.testIp(proxyIp.getIp(), proxyIp.getPort(),proxyIp.getType()); 55 | } 56 | Result result=new Result(); 57 | result.setCode(200); 58 | result.setMessage("success"); 59 | result.setData(Arrays.asList(proxyIp)); 60 | return result; 61 | } 62 | 63 | @GetMapping("/proxyIp") 64 | @ResponseBody 65 | public Object getProxyIp(HttpServletRequest request, HttpServletResponse response, ModelMap modelMap) throws Exception { 66 | ProxyIp proxyIp = proxyIpRedisService.getOneRt(); 67 | Result result=new Result(); 68 | result.setCode(200); 69 | result.setMessage("success"); 70 | result.setData(Arrays.asList(proxyIp)); 71 | return result; 72 | } 73 | 74 | @PostMapping("/test") 75 | @ResponseBody 76 | public Object testIp(HttpServletRequest request, HttpServletResponse response, ModelMap modelMap) throws Exception { 77 | String ip = request.getParameter("ip").trim(); 78 | String port = request.getParameter("port").trim(); 79 | boolean available = proxyIpService.testIp(ip, Integer.parseInt(port)); 80 | Result result=new Result(); 81 | result.setCode(200); 82 | result.setData(new ArrayList()); 83 | result.setMessage(available==true?"available":"unavailable"); 84 | return result; 85 | } 86 | 87 | @GetMapping("/test") 88 | public String test() { 89 | return "test"; 90 | } 91 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/ProxyIp.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import com.alibaba.fastjson.annotation.JSONField; 4 | import com.fasterxml.jackson.annotation.JsonIgnore; 5 | import lombok.Data; 6 | import lombok.ToString; 7 | import org.hibernate.annotations.CreationTimestamp; 8 | import org.hibernate.annotations.UpdateTimestamp; 9 | 10 | import javax.persistence.*; 11 | import java.io.Serializable; 12 | import java.util.Date; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-08-29 21:00 17 | **/ 18 | @Data 19 | @ToString 20 | @Entity 21 | @Table(name = "ProxyIp") 22 | public class ProxyIp implements Serializable { 23 | private static final long serialVersionUID = 1L; 24 | @Id 25 | @GeneratedValue(strategy = GenerationType.IDENTITY) 26 | @JSONField(serialize = false) 27 | private long id; 28 | private String ip; 29 | private int port; 30 | private String country;//国家 31 | private String location;//位置 32 | private String type;//类型 https http 33 | private String anonymity;//匿名性 34 | @Column(name="available" ,nullable=false) 35 | private boolean available; 36 | /*@Temporal(TemporalType.TIMESTAMP) 37 | @CreationTimestamp*/ 38 | @JsonIgnore 39 | @JSONField(serialize = false) 40 | private Date createTime; 41 | /*@UpdateTimestamp 42 | @Temporal(TemporalType.TIMESTAMP)*/ 43 | @JSONField(format="yyyy-MM-dd HH:mm:ss") 44 | private Date lastValidateTime; 45 | @Column(name="validateCount" ,nullable=false,columnDefinition="INT default 0") 46 | @JsonIgnore 47 | @JSONField(serialize = false) 48 | private int validateCount;//校验次数 49 | @JsonIgnore 50 | @JSONField(serialize = false) 51 | private int availableCount;//校验可用次数 52 | @JsonIgnore 53 | @JSONField(serialize = false) 54 | private int unAvailableCount;//校验不可用次数 55 | private long responseTime;//响应时间 56 | private long requestTime;//请求时间 57 | private long useTime;//代理请求需要总时长 58 | @Column(scale=3,precision = 5) 59 | @JSONField(serialize = false) 60 | private double availableRate;//可用率 61 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/Result.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import lombok.Data; 4 | import lombok.ToString; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * @author chenerzhu 10 | * @create 2018-09-05 22:09 11 | **/ 12 | @ToString 13 | @Data 14 | public class Result { 15 | private String message; 16 | private int code; 17 | private List data; 18 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/entity/WebPage.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.entity; 2 | 3 | import lombok.Data; 4 | import lombok.ToString; 5 | import org.jsoup.nodes.Document; 6 | 7 | import java.io.Serializable; 8 | import java.util.Date; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-09-02 15:14 13 | **/ 14 | @Data 15 | @ToString 16 | public class WebPage implements Serializable { 17 | private static final long serialVersionUID = 23454787L; 18 | private Date crawlTime; 19 | private String page; 20 | private Document document; 21 | private String html; 22 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/exception/ProxyPoolException.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.exception; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.http.HttpStatus; 5 | import org.springframework.web.bind.annotation.ControllerAdvice; 6 | import org.springframework.web.bind.annotation.ExceptionHandler; 7 | import org.springframework.web.bind.annotation.ResponseStatus; 8 | import org.springframework.web.servlet.ModelAndView; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-05-26 19:46 13 | **/ 14 | public class ProxyPoolException extends RuntimeException{ 15 | public ProxyPoolException(){ 16 | super(); 17 | } 18 | 19 | public ProxyPoolException(String message){ 20 | super(message); 21 | } 22 | 23 | public ProxyPoolException(String message,Throwable e){ 24 | super(message,e); 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/exception/ProxyPoolExceptionHandler.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.exception; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.http.HttpStatus; 5 | import org.springframework.web.bind.annotation.ControllerAdvice; 6 | import org.springframework.web.bind.annotation.ExceptionHandler; 7 | import org.springframework.web.bind.annotation.ResponseStatus; 8 | import org.springframework.web.servlet.ModelAndView; 9 | 10 | /** 11 | * @author chenerzhu 12 | * @create 2018-08-29 20:29 13 | **/ 14 | @Slf4j 15 | @ControllerAdvice 16 | public class ProxyPoolExceptionHandler { 17 | @ExceptionHandler(ProxyPoolException.class) 18 | @ResponseStatus(HttpStatus.OK) 19 | public ModelAndView processProxyPool(Exception e){ 20 | log.info("自定义异常处理-ProxyPoolException"); 21 | ModelAndView m = new ModelAndView(); 22 | log.error("error:",e); 23 | m.addObject("exception", e.getMessage()); 24 | m.setViewName("error/500"); 25 | return m; 26 | } 27 | @ExceptionHandler(Exception.class) 28 | @ResponseStatus(HttpStatus.OK) 29 | public ModelAndView processException(Exception e){ 30 | ModelAndView m = new ModelAndView(); 31 | log.error("error:",e); 32 | m.addObject("exception", e.getMessage()); 33 | m.setViewName("error/500"); 34 | return m; 35 | } 36 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/AbstractCrawler.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.HttpMethod; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 6 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.AbstractSchedulerJob; 7 | import com.chenerzhu.crawler.proxy.pool.util.HttpClientUtils; 8 | import lombok.extern.slf4j.Slf4j; 9 | import org.jsoup.Jsoup; 10 | 11 | import java.util.Date; 12 | import java.util.HashMap; 13 | import java.util.Map; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | 16 | /** 17 | * @author chenerzhu 18 | * @create 2018-09-02 13:40 19 | **/ 20 | @Slf4j 21 | public abstract class AbstractCrawler extends AbstractSchedulerJob implements ICrawler, Runnable { 22 | protected ConcurrentLinkedQueue proxyIpQueue; 23 | protected String pageUrl; 24 | protected WebPage webPage; 25 | protected HttpMethod httpMethd=HttpMethod.GET; 26 | protected Map formParamMap; 27 | private Map headerMap = new HashMap() {{ 28 | put("Connection", "keep-alive"); 29 | put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"); 30 | put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); 31 | put("Accept-Encoding", "gzip, deflate, sdch"); 32 | put("Accept-Language", "zh-CN,zh;q=0.9"); 33 | put("Redis-Control", "max-age=0"); 34 | put("Upgrade-Insecure-Requests", "1"); 35 | }}; 36 | 37 | public AbstractCrawler(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 38 | this.proxyIpQueue = proxyIpQueue; 39 | this.pageUrl = pageUrl; 40 | this.httpMethd=HttpMethod.GET; 41 | } 42 | 43 | public AbstractCrawler(ConcurrentLinkedQueue proxyIpQueue, String pageUrl,HttpMethod httpMethd,Map formParamMap) { 44 | this.proxyIpQueue = proxyIpQueue; 45 | this.pageUrl = pageUrl; 46 | this.httpMethd=httpMethd; 47 | this.formParamMap=formParamMap; 48 | } 49 | 50 | @Override 51 | public void run() { 52 | try { 53 | getPage(); 54 | parsePage(webPage); 55 | }catch (Exception e){ 56 | log.error("{} page process error",pageUrl,e); 57 | } 58 | 59 | } 60 | 61 | @Override 62 | public WebPage getPage() { 63 | WebPage webPage = null; 64 | try { 65 | log.debug("start get page:{}", pageUrl); 66 | headerMap.put("Referer", pageUrl); 67 | String pageContent=""; 68 | if(httpMethd==HttpMethod.GET){ 69 | pageContent= HttpClientUtils.sendGet(pageUrl, headerMap); 70 | }else if(httpMethd==HttpMethod.POST){ 71 | pageContent= HttpClientUtils.sendPostForm(pageUrl, "",headerMap,formParamMap); 72 | } 73 | webPage = new WebPage(); 74 | webPage.setCrawlTime(new Date()); 75 | webPage.setPage(pageContent); 76 | webPage.setDocument(Jsoup.parse(pageContent)); 77 | webPage.setHtml(Jsoup.parse(pageContent).html()); 78 | this.webPage = webPage; 79 | log.debug("end get page:{}", pageUrl); 80 | } catch (Exception e) { 81 | log.error("get page:{}", pageUrl, e); 82 | } 83 | return webPage; 84 | } 85 | 86 | public String getPageUrl() { 87 | return pageUrl; 88 | } 89 | 90 | public void setPageUrl(String pageUrl) { 91 | this.pageUrl = pageUrl; 92 | } 93 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/CrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 5 | import com.chenerzhu.crawler.proxy.pool.job.execute.impl.SchedulerJobExecutor; 6 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpService; 7 | import lombok.extern.slf4j.Slf4j; 8 | import org.springframework.beans.factory.annotation.Autowired; 9 | import org.springframework.stereotype.Component; 10 | 11 | import java.util.concurrent.*; 12 | import com.chenerzhu.crawler.proxy.pool.thread.ThreadFactory; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-02 20:16 17 | **/ 18 | @Slf4j 19 | @Component 20 | @SuppressWarnings("unchecked") 21 | public class CrawlerJob implements Runnable { 22 | private volatile static ExecutorService executorService= Executors.newFixedThreadPool(5,new ThreadFactory("crawlerJob-consumer")); 23 | 24 | private ISchedulerJobExecutor schedulerJobExecutor=new SchedulerJobExecutor(30,"crawlerJob-producer"); 25 | 26 | @Autowired 27 | private IProxyIpService proxyIpService; 28 | 29 | @Override 30 | public void run() { 31 | try{ 32 | ConcurrentLinkedQueue proxyIpQueue = new ConcurrentLinkedQueue<>(); 33 | //生产者 34 | //schedulerJobExecutor.execute(new XicidailiCrawlerJob(proxyIpQueue, "http://www.xicidaili.com/nn"), 0, 100, TimeUnit.SECONDS); 35 | 36 | //schedulerJobExecutor.execute(new Data5uCrawlerJob(proxyIpQueue, "http://www.data5u.com/free/index.shtml"), 10, 100, TimeUnit.SECONDS); 37 | 38 | schedulerJobExecutor.execute(new FreeProxyListCrawlerJob(proxyIpQueue, "https://free-proxy-list.net"), 20, 100, TimeUnit.SECONDS); 39 | 40 | schedulerJobExecutor.execute(new MyProxyCrawlerJob(proxyIpQueue, "https://www.my-proxy.com/free-proxy-list.html"), 30, 100, TimeUnit.SECONDS); 41 | 42 | //schedulerJobExecutor.execute(new SpysOneCrawlerJob(proxyIpQueue, "http://spys.one/en/free-proxy-list/"), 40, 100, TimeUnit.SECONDS); 43 | 44 | schedulerJobExecutor.execute(new ProxynovaCrawlerJob(proxyIpQueue, "https://www.proxynova.com/proxy-server-list/"), 50, 100, TimeUnit.SECONDS); 45 | 46 | schedulerJobExecutor.execute(new Proxy4FreeCrawlerJob(proxyIpQueue, "https://www.proxy4free.com/list/webproxy1.html"), 60, 100, TimeUnit.SECONDS); 47 | 48 | schedulerJobExecutor.execute(new GatherproxyCrawlerJob(proxyIpQueue, "http://www.gatherproxy.com/"), 70, 100, TimeUnit.SECONDS); 49 | 50 | //消费者 51 | for (int i = 0; i < 5; i++) { 52 | executorService.execute(new Runnable() { 53 | @Override 54 | public void run() { 55 | while (true && !Thread.currentThread().isInterrupted()) { 56 | try { 57 | log.info("the proxyIpQueue current size:{}", proxyIpQueue.size()); 58 | ProxyIp proxyIp = proxyIpQueue.poll(); 59 | if (proxyIp != null) { 60 | log.debug("get proxy ip:{}", proxyIp.toString()); 61 | if (proxyIpService.findByIpEqualsAndPortEqualsAndTypeEquals(proxyIp.getIp(), proxyIp.getPort(), proxyIp.getType()) == null) { 62 | proxyIpService.save(proxyIp); 63 | } else { 64 | log.debug("the proxy ip exist:{}", proxyIp.toString()); 65 | } 66 | }else{ 67 | TimeUnit.SECONDS.sleep(3); 68 | } 69 | } catch (Exception e) { 70 | log.error("get the proxy ip failed! error:{}",e.getMessage()); 71 | //e.printStackTrace(); 72 | try { 73 | TimeUnit.SECONDS.sleep(3); 74 | } catch (InterruptedException e1) { 75 | e1.printStackTrace(); 76 | } 77 | } 78 | } 79 | } 80 | }); 81 | } 82 | }catch (Exception e){ 83 | log.error("crawler error:{}",e); 84 | executorService.shutdown(); 85 | schedulerJobExecutor.shutdown(); 86 | }finally { 87 | 88 | } 89 | } 90 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/Data5uCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.BlockingQueue; 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-03 20:11 17 | **/ 18 | @Slf4j 19 | public class Data5uCrawlerJob extends AbstractCrawler { 20 | public Data5uCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 21 | super(proxyIpQueue, pageUrl); 22 | } 23 | 24 | @Override 25 | public void parsePage(WebPage webPage) { 26 | Elements elements = webPage.getDocument().getElementsByClass("l2"); 27 | Element element; 28 | ProxyIp proxyIp; 29 | for (int i = 0; i < elements.size(); i++) { 30 | try { 31 | element = elements.get(i); 32 | proxyIp = new ProxyIp(); 33 | proxyIp.setIp(element.child(0).text()); 34 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 35 | proxyIp.setLocation(element.child(4).text() + "-" + element.child(5).text()); 36 | proxyIp.setType(element.child(3).text()); 37 | proxyIp.setAvailable(true); 38 | proxyIp.setCreateTime(new Date()); 39 | proxyIp.setLastValidateTime(new Date()); 40 | proxyIp.setValidateCount(0); 41 | proxyIpQueue.offer(proxyIp); 42 | } catch (Exception e) { 43 | log.error("data5uCrawlerJob error:{0}",e); 44 | } 45 | } 46 | } 47 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/FreeProxyListCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import com.chenerzhu.crawler.proxy.pool.job.crawler.AbstractCrawler; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | import java.util.Date; 11 | import java.util.concurrent.BlockingQueue; 12 | import java.util.concurrent.ConcurrentLinkedQueue; 13 | import java.util.concurrent.TimeUnit; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-09-04 14:06 18 | * https://free-proxy-list.net/ 19 | **/ 20 | @Slf4j 21 | public class FreeProxyListCrawlerJob extends AbstractCrawler { 22 | public FreeProxyListCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 23 | super(proxyIpQueue, pageUrl); 24 | } 25 | 26 | @Override 27 | public void parsePage(WebPage webPage) { 28 | Elements elements = webPage.getDocument().getElementById("proxylisttable").getElementsByTag("tr"); 29 | Element element; 30 | ProxyIp proxyIp; 31 | for (int i = 1; i < elements.size() - 1; i++) { 32 | try { 33 | element = elements.get(i); 34 | proxyIp = new ProxyIp(); 35 | proxyIp.setIp(element.child(0).text()); 36 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 37 | proxyIp.setLocation(element.child(2).text() + "-" + element.child(3).text()); 38 | proxyIp.setType("yes".equalsIgnoreCase(element.child(6).text()) == true ? "https" : "http"); 39 | proxyIp.setAvailable(true); 40 | proxyIp.setCreateTime(new Date()); 41 | proxyIp.setLastValidateTime(new Date()); 42 | proxyIp.setValidateCount(0); 43 | proxyIpQueue.offer(proxyIp); 44 | } catch (Exception e) { 45 | log.error("freeProxyListCrawlerJob error:{0}",e); 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/GatherproxyCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | import java.util.Date; 9 | import java.util.concurrent.BlockingQueue; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | import java.util.concurrent.TimeUnit; 12 | import java.util.regex.Matcher; 13 | import java.util.regex.Pattern; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-09-09 9:09 18 | * http://www.gatherproxy.com/ 19 | **/ 20 | @Slf4j 21 | public class GatherproxyCrawlerJob extends AbstractCrawler { 22 | public GatherproxyCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 23 | super(proxyIpQueue, pageUrl); 24 | } 25 | 26 | @Override 27 | public void parsePage(WebPage webPage) { 28 | Pattern pattern = Pattern.compile("\\{\"PROXY_CITY\".*?\"}"); 29 | Matcher matcher = null; 30 | matcher = pattern.matcher(webPage.getHtml()); 31 | ProxyIp proxyIp = null; 32 | while (matcher.find()) { 33 | try { 34 | JSONObject jsonObject = JSONObject.parseObject(matcher.group(0)); 35 | proxyIp = new ProxyIp(); 36 | proxyIp.setIp(jsonObject.getString("PROXY_IP")); 37 | proxyIp.setPort(Integer.parseInt(jsonObject.getString("PROXY_PORT"), 16)); 38 | proxyIp.setType("SOCKS");// 39 | proxyIp.setLocation(jsonObject.getString("PROXY_COUNTRY")); 40 | proxyIp.setCountry(jsonObject.getString("PROXY_COUNTRY")); 41 | proxyIp.setAnonymity(jsonObject.getString("PROXY_TYPE")); 42 | proxyIp.setAvailable(true); 43 | proxyIp.setCreateTime(new Date()); 44 | proxyIp.setLastValidateTime(new Date()); 45 | proxyIp.setValidateCount(0); 46 | proxyIpQueue.offer(proxyIp); 47 | } catch (Exception e) { 48 | log.error("freeProxyListCrawlerJob error:{0}",e); 49 | } 50 | 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/ICrawler.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-09-02 13:40 8 | **/ 9 | public interface ICrawler { 10 | WebPage getPage(); 11 | 12 | void parsePage(WebPage webPage); 13 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/MyProxyCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import com.chenerzhu.crawler.proxy.pool.job.crawler.AbstractCrawler; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | import java.util.Date; 9 | import java.util.concurrent.BlockingQueue; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | import java.util.concurrent.LinkedBlockingQueue; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-08 16:35 17 | * https://www.my-proxy.com/free-proxy-list.html 18 | **/ 19 | @Slf4j 20 | public class MyProxyCrawlerJob extends AbstractCrawler { 21 | public MyProxyCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 22 | super(proxyIpQueue, pageUrl); 23 | } 24 | 25 | @Override 26 | public void parsePage(WebPage webPage) { 27 | String[] elements = webPage.getDocument().getElementsByClass("list") 28 | .html().split("
"); 29 | ProxyIp proxyIp; 30 | String element; 31 | for (int i = 0; i < 43; i++) { 32 | try { 33 | //185.120.37.186:55143#AL 34 | element = elements[i]; 35 | String ipPort = element.split("#")[0]; 36 | String ip = ipPort.split(":")[0]; 37 | String port = ipPort.split(":")[1]; 38 | String country = element.split("#")[1]; 39 | proxyIp = new ProxyIp(); 40 | proxyIp.setIp(ip); 41 | proxyIp.setPort(Integer.parseInt(port)); 42 | proxyIp.setType("http"); 43 | proxyIp.setCountry(country); 44 | proxyIp.setLocation(country); 45 | proxyIp.setCreateTime(new Date()); 46 | proxyIp.setAvailable(true); 47 | proxyIp.setLastValidateTime(new Date()); 48 | proxyIp.setValidateCount(0); 49 | proxyIpQueue.offer(proxyIp); 50 | } catch (Exception e) { 51 | log.error("myProxyCrawlerJob error:{0}",e); 52 | } 53 | } 54 | 55 | 56 | } 57 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/Proxy4FreeCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.Date; 10 | import java.util.concurrent.BlockingQueue; 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-09 8:43 17 | * https://www.proxy4free.com/list/webproxy1.html 18 | **/ 19 | @Slf4j 20 | public class Proxy4FreeCrawlerJob extends AbstractCrawler { 21 | public Proxy4FreeCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 22 | super(proxyIpQueue, pageUrl); 23 | } 24 | 25 | @Override 26 | public void parsePage(WebPage webPage) { 27 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 28 | Element element; 29 | ProxyIp proxyIp; 30 | for (int i = 2; i < elements.size(); i++) { 31 | try { 32 | element = elements.get(i); 33 | proxyIp = new ProxyIp(); 34 | proxyIp.setIp(element.child(0).child(0).attr("href").replaceAll("\"", "").split("=")[1]); 35 | proxyIp.setPort(80); 36 | proxyIp.setLocation(element.child(3).text()); 37 | proxyIp.setCountry(element.child(3).text()); 38 | proxyIp.setAnonymity(element.child(9).text()); 39 | proxyIp.setType("unKnow"); 40 | proxyIp.setAvailable(true); 41 | proxyIp.setCreateTime(new Date()); 42 | proxyIp.setLastValidateTime(new Date()); 43 | proxyIp.setValidateCount(0); 44 | proxyIpQueue.offer(proxyIp); 45 | } catch (Exception e) { 46 | log.error("proxy4FreeCrawlerJob error:{0}",e); 47 | } 48 | } 49 | 50 | } 51 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/ProxynovaCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import javax.script.ScriptEngine; 10 | import javax.script.ScriptEngineManager; 11 | import javax.script.ScriptException; 12 | import java.util.Date; 13 | import java.util.concurrent.BlockingQueue; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.regex.Matcher; 17 | import java.util.regex.Pattern; 18 | 19 | /** 20 | * @author chenerzhu 21 | * @create 2018-09-08 23:25 22 | * https://www.proxynova.com/proxy-server-list/ 23 | **/ 24 | @Slf4j 25 | public class ProxynovaCrawlerJob extends AbstractCrawler { 26 | public ProxynovaCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 27 | super(proxyIpQueue, pageUrl); 28 | } 29 | 30 | @Override 31 | public void parsePage(WebPage webPage) { 32 | Elements elements = webPage.getDocument().getElementsByTag("tbody") 33 | .get(0).getElementsByTag("tr"); 34 | Element element; 35 | ProxyIp proxyIp; 36 | for (int i = 0; i < elements.size(); i++) { 37 | try { 38 | element = elements.get(i); 39 | proxyIp = new ProxyIp(); 40 | String ip = getIp(element); 41 | if ("".equals(ip)) { 42 | continue; 43 | } 44 | proxyIp.setIp(ip); 45 | proxyIp.setPort(Integer.parseInt(element.child(1).text())); 46 | proxyIp.setLocation(element.child(5).text()); 47 | proxyIp.setCountry(element.child(5).text().split("-")[0]); 48 | proxyIp.setAnonymity(element.child(6).text()); 49 | proxyIp.setType("unKnow"); 50 | proxyIp.setAvailable(true); 51 | proxyIp.setCreateTime(new Date()); 52 | proxyIp.setLastValidateTime(new Date()); 53 | proxyIp.setValidateCount(0); 54 | proxyIpQueue.offer(proxyIp); 55 | } catch (Exception e) { 56 | log.error("proxynovaCrawlerJob error:{0}",e); 57 | } 58 | } 59 | } 60 | 61 | private String getIp(Element element) throws ScriptException { 62 | String ip = ""; 63 | ScriptEngineManager manager = new ScriptEngineManager(); 64 | ScriptEngine engine = manager.getEngineByName("js"); 65 | Pattern pattern = Pattern.compile("\\(.*?\\);<"); 66 | Matcher matcher = null; 67 | matcher = pattern.matcher(element.child(0).html()); 68 | if (matcher.find()) { 69 | String ipScript = matcher.group(0).substring(1, matcher.group(0).length() - 1); 70 | ip = (String) engine.eval(ipScript.replaceAll("\\);", "")); 71 | } 72 | return ip; 73 | } 74 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/SpysOneCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.HttpMethod; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.jsoup.nodes.Document; 8 | import org.jsoup.nodes.Element; 9 | import org.jsoup.select.Elements; 10 | 11 | import javax.script.ScriptEngine; 12 | import javax.script.ScriptEngineManager; 13 | import javax.script.ScriptException; 14 | import java.util.Date; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | import java.util.concurrent.BlockingQueue; 18 | import java.util.concurrent.ConcurrentLinkedQueue; 19 | import java.util.concurrent.TimeUnit; 20 | import java.util.regex.Matcher; 21 | import java.util.regex.Pattern; 22 | 23 | /** 24 | * @author chenerzhu 25 | * @create 2018-09-08 17:25 26 | * http://spys.one/en/free-proxy-list/ 27 | * form:xpp=5&xf1=0&xf2=0&xf4=0&xf5=1 28 | **/ 29 | @Slf4j 30 | public class SpysOneCrawlerJob extends AbstractCrawler { 31 | public SpysOneCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 32 | super(proxyIpQueue, pageUrl); 33 | this.httpMethd=HttpMethod.POST; 34 | this.formParamMap=new HashMap(){{ 35 | put("xpp","5"); 36 | put("xf1","0"); 37 | put("xf2","0"); 38 | put("xf4","0"); 39 | put("xf5","1"); 40 | }}; 41 | } 42 | 43 | @Override 44 | public void parsePage(WebPage webPage) { 45 | Elements elements = webPage.getDocument().getElementsByClass("spy1xx"); 46 | Element element; 47 | ProxyIp proxyIp; 48 | for (int i = 1; i < elements.size(); i++) { 49 | try { 50 | element = elements.get(i); 51 | proxyIp = new ProxyIp(); 52 | proxyIp.setIp(element.child(0).selectFirst(".spy14").text()); 53 | int port = getPort(element); 54 | if (port == -1) { 55 | continue; 56 | } 57 | proxyIp.setPort(port); 58 | proxyIp.setCountry(element.child(3).selectFirst(".spy14").text()); 59 | proxyIp.setLocation(element.child(3).text()); 60 | proxyIp.setType(element.child(1).text()); 61 | proxyIp.setAnonymity(element.child(2).text()); 62 | proxyIp.setAvailable(true); 63 | proxyIp.setCreateTime(new Date()); 64 | proxyIp.setLastValidateTime(new Date()); 65 | proxyIp.setValidateCount(0); 66 | proxyIpQueue.offer(proxyIp); 67 | } catch (Exception e) { 68 | log.error("spysOneCrawlerJob error:{0}",e); 69 | } 70 | } 71 | } 72 | 73 | private int getPort(Element element) throws ScriptException { 74 | int port = -1; 75 | ScriptEngineManager manager = new ScriptEngineManager(); 76 | ScriptEngine engine = manager.getEngineByName("js"); 77 | Pattern pattern = Pattern.compile("\\+.*?<"); 78 | Matcher matcher = null; 79 | Document document = webPage.getDocument(); 80 | String scrpit = document.getElementsByTag("script").get(2).data(); 81 | engine.eval(scrpit); 82 | matcher = pattern.matcher(element.child(0).html()); 83 | if (matcher.find()) { 84 | String portScript = matcher.group(0).substring(1, matcher.group(0).length() - 2); 85 | Object obj=engine.eval(portScript.replaceAll("\\+", "+''+")); 86 | port = Integer.parseInt((String)obj); 87 | } 88 | return port; 89 | } 90 | 91 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/crawler/XicidailiCrawlerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.crawler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.entity.WebPage; 5 | import com.chenerzhu.crawler.proxy.pool.job.crawler.AbstractCrawler; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | import java.util.Date; 11 | import java.util.concurrent.BlockingQueue; 12 | import java.util.concurrent.ConcurrentLinkedQueue; 13 | import java.util.concurrent.TimeUnit; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-09-02 15:23 18 | * http://www.xicidaili.com 19 | **/ 20 | @Slf4j 21 | public class XicidailiCrawlerJob extends AbstractCrawler { 22 | public XicidailiCrawlerJob(ConcurrentLinkedQueue proxyIpQueue, String pageUrl) { 23 | super(proxyIpQueue, pageUrl); 24 | } 25 | 26 | @Override 27 | public void parsePage(WebPage webPage) { 28 | Elements elements = webPage.getDocument().getElementsByTag("tr"); 29 | Element element; 30 | ProxyIp proxyIp; 31 | for (int i = 1; i < elements.size(); i++) { 32 | try { 33 | element = elements.get(i); 34 | proxyIp = new ProxyIp(); 35 | proxyIp.setIp(element.child(1).text()); 36 | proxyIp.setPort(Integer.parseInt(element.child(2).text())); 37 | proxyIp.setLocation(element.child(3).text()); 38 | proxyIp.setType(element.child(5).text()); 39 | proxyIp.setAvailable(true); 40 | proxyIp.setCreateTime(new Date()); 41 | proxyIp.setLastValidateTime(new Date()); 42 | proxyIp.setValidateCount(0); 43 | proxyIpQueue.offer(proxyIp); 44 | } catch (Exception e) { 45 | log.error("xicidailiCrawlerJob error:{0}",e); 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/execute/ISchedulerJobExecutor.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.execute; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.AbstractSchedulerJob; 4 | 5 | import java.util.concurrent.TimeUnit; 6 | 7 | /** 8 | * @author chenerzhu 9 | * @create 2018-08-30 12:14 10 | **/ 11 | public interface ISchedulerJobExecutor { 12 | void execute(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit); 13 | void executeDelay(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit); 14 | void shutdown(); 15 | //void execute(Runnable runnable); 16 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/execute/impl/SchedulerJobExecutor.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.execute.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 4 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.AbstractSchedulerJob; 5 | import com.chenerzhu.crawler.proxy.pool.thread.ThreadFactory; 6 | 7 | import java.util.concurrent.*; 8 | 9 | /** 10 | * @author chenerzhu 11 | * @create 2018-08-30 12:15 12 | **/ 13 | public class SchedulerJobExecutor implements ISchedulerJobExecutor { 14 | 15 | private ScheduledExecutorService scheduledExecutorService; 16 | public SchedulerJobExecutor(){} 17 | 18 | public SchedulerJobExecutor(String threadFactory){ 19 | scheduledExecutorService=Executors.newScheduledThreadPool(10,new ThreadFactory(threadFactory)); 20 | } 21 | 22 | public SchedulerJobExecutor(int corePoolSize,String threadFactory){ 23 | scheduledExecutorService=Executors.newScheduledThreadPool(corePoolSize,new ThreadFactory(threadFactory)); 24 | } 25 | 26 | 27 | public void execute(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit){ 28 | scheduledExecutorService.scheduleAtFixedRate(schedulerJob,delayTime,intervalTime,timeUnit); 29 | } 30 | public void executeDelay(AbstractSchedulerJob schedulerJob, long delayTime, long intervalTime, TimeUnit timeUnit){ 31 | scheduledExecutorService.scheduleWithFixedDelay(schedulerJob,delayTime,intervalTime,timeUnit); 32 | } 33 | 34 | public void shutdown(){ 35 | scheduledExecutorService.shutdown(); 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/AbstractSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 5 | import com.chenerzhu.crawler.proxy.pool.job.execute.impl.SchedulerJobExecutor; 6 | import com.chenerzhu.crawler.proxy.pool.thread.ThreadFactory; 7 | import com.chenerzhu.crawler.proxy.pool.util.ProxyUtils; 8 | 9 | import java.util.concurrent.*; 10 | 11 | 12 | /** 13 | * @author chenerzhu 14 | * @create 2018-08-30 10:27 15 | **/ 16 | public abstract class AbstractSchedulerJob implements Runnable { 17 | private volatile transient ExecutorService executorService = Executors.newCachedThreadPool(new ThreadFactory("validate")); 18 | 19 | public Future execute(Callable callable) { 20 | initInstance(); 21 | return executorService.submit(callable); 22 | } 23 | 24 | public Future execute(FutureTask task) { 25 | initInstance(); 26 | return executorService.submit(task); 27 | } 28 | 29 | private void initInstance() { 30 | if (executorService.isShutdown()) { 31 | synchronized (AbstractSchedulerJob.class) { 32 | if (executorService.isShutdown()) { 33 | executorService = Executors.newCachedThreadPool(new ThreadFactory("validate")); 34 | } 35 | } 36 | } 37 | } 38 | 39 | public void shutdown() { 40 | executorService.shutdown(); 41 | } 42 | 43 | public boolean validateIp(ProxyIp proxyIp) { 44 | boolean available = false; 45 | if (proxyIp.getType().toUpperCase().contains("HTTPS")) { 46 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 47 | } else if (proxyIp.getType().toUpperCase().contains("HTTP")) { 48 | available = ProxyUtils.validateHttp(proxyIp.getIp(), proxyIp.getPort()); 49 | } else if (proxyIp.getType().equalsIgnoreCase("unKnow")) { 50 | available = ProxyUtils.validateHttp(proxyIp.getIp(), proxyIp.getPort()); 51 | if (!available) { 52 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 53 | } 54 | /*if(!available){ 55 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 56 | proxyIp.setType("https"); 57 | } 58 | if(!available){ 59 | proxyIp.setType("unKnow"); 60 | }*/ 61 | } else if (proxyIp.getType().toUpperCase().contains("SOCKS")) { 62 | available = ProxyUtils.validateHttp(proxyIp.getIp(), proxyIp.getPort()); 63 | if (!available) { 64 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 65 | } 66 | /*if(!available){ 67 | available = ProxyUtils.validateHttps(proxyIp.getIp(), proxyIp.getPort()); 68 | proxyIp.setType("https"); 69 | } 70 | if(!available){ 71 | proxyIp.setType("socks"); 72 | }*/ 73 | } 74 | return available; 75 | } 76 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/SchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.execute.ISchedulerJobExecutor; 4 | import com.chenerzhu.crawler.proxy.pool.job.execute.impl.SchedulerJobExecutor; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.springframework.beans.factory.annotation.Qualifier; 7 | import org.springframework.stereotype.Component; 8 | 9 | import javax.annotation.Resource; 10 | import java.text.SimpleDateFormat; 11 | import java.util.Date; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | /** 15 | * @author chenerzhu 16 | * @create 2018-09-21 15:03 17 | **/ 18 | @Slf4j 19 | @Component 20 | public class SchedulerJob implements Runnable { 21 | private static ISchedulerJobExecutor schedulerJobExecutor = new SchedulerJobExecutor(10, "schedulerJob"); 22 | @Resource 23 | @Qualifier("syncDbSchedulerJob") 24 | private AbstractSchedulerJob syncDbSchedulerJob; 25 | @Resource 26 | @Qualifier("syncRedisSchedulerJob") 27 | private AbstractSchedulerJob syncRedisSchedulerJob; 28 | @Resource 29 | @Qualifier("validateRedisSchedulerJob") 30 | private AbstractSchedulerJob validateRedisSchedulerJob; 31 | @Override 32 | public void run() { 33 | try{ 34 | schedulerJobExecutor.execute(syncDbSchedulerJob,10, 5, TimeUnit.SECONDS); 35 | schedulerJobExecutor.execute(syncRedisSchedulerJob,50, 30, TimeUnit.SECONDS); 36 | schedulerJobExecutor.execute(validateRedisSchedulerJob,100, 30, TimeUnit.SECONDS); 37 | }catch (Exception e){ 38 | log.error("schedulerJob error:{}",e); 39 | schedulerJobExecutor.shutdown(); 40 | }finally { 41 | 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/SyncDbSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpRedisService; 5 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpService; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.stereotype.Component; 9 | 10 | import java.util.ArrayList; 11 | import java.util.Date; 12 | import java.util.List; 13 | import java.util.concurrent.Callable; 14 | import java.util.concurrent.CopyOnWriteArrayList; 15 | import java.util.concurrent.FutureTask; 16 | import java.util.concurrent.TimeUnit; 17 | import java.util.concurrent.atomic.AtomicInteger; 18 | import java.util.stream.IntStream; 19 | 20 | /** 21 | * @author chenerzhu 22 | * @create 2018-09-07 17:25 23 | **/ 24 | @Slf4j 25 | @Component 26 | @SuppressWarnings("unchecked") 27 | public class SyncDbSchedulerJob extends AbstractSchedulerJob { 28 | 29 | @Autowired 30 | private IProxyIpRedisService proxyIpRedisService; 31 | @Autowired 32 | private IProxyIpService proxyIpService; 33 | 34 | 35 | @Override 36 | public void run() { 37 | try { 38 | List availableIpList = new CopyOnWriteArrayList(); 39 | List unAvailableIpList = new CopyOnWriteArrayList(); 40 | int validateCountBefore = 3; 41 | int validateCountAfter = 100; 42 | double availableRate=0.5;//可用率大于0.5的重新取出来 43 | long totalCount = proxyIpService.totalCount(validateCountBefore,validateCountAfter,availableRate); 44 | log.info("proxyIp total count:{}", totalCount); 45 | AtomicInteger availableIpCount=new AtomicInteger(0); 46 | AtomicInteger unAvailableIpCount=new AtomicInteger(0); 47 | int pageSize = 200; 48 | int pageCount = (int) ((int) (totalCount % pageSize) == 0 ? totalCount / pageSize : totalCount / pageSize + 1); 49 | List> taskList = new ArrayList<>(); 50 | long start = System.currentTimeMillis(); 51 | IntStream.range(0, pageCount).forEach(pageNumber -> { 52 | List proxyIpList = proxyIpService.findAllByPage(pageNumber, pageSize, validateCountBefore,validateCountAfter ,availableRate); 53 | proxyIpList.forEach(proxyIp -> { 54 | FutureTask task = new FutureTask(new Callable() { 55 | @Override 56 | public ProxyIp call() { 57 | try{ 58 | long startTime = System.currentTimeMillis(); 59 | boolean available = validateIp(proxyIp); 60 | long endTime = System.currentTimeMillis(); 61 | log.info("validateIp ==> ip:{} port:{} available:{} total time:{}", proxyIp.getIp(), proxyIp.getPort(), available, (endTime - startTime)); 62 | if (available) { 63 | if (proxyIpRedisService.isExist(proxyIp)) { 64 | log.info("redis exist ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 65 | proxyIpRedisService.remove(proxyIp); 66 | } 67 | proxyIp.setLastValidateTime(new Date()); 68 | proxyIp.setAvailable(available); 69 | proxyIp.setValidateCount(proxyIp.getValidateCount() + 1); 70 | proxyIp.setAvailableCount(proxyIp.getAvailableCount()+1); 71 | proxyIp.setAvailableRate(proxyIp.getAvailableCount()/(double)proxyIp.getValidateCount()); 72 | proxyIp.setUseTime(endTime - startTime); 73 | proxyIpRedisService.add(proxyIp); 74 | log.info("redis add or update ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 75 | availableIpList.add(proxyIp); 76 | availableIpCount.incrementAndGet(); 77 | } else { 78 | //proxyIpRedisService.remove(proxyIp);//第一层校验不删除缓存,通过第二层校验删除 79 | //log.info("redis remove ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 80 | proxyIp.setLastValidateTime(new Date()); 81 | proxyIp.setAvailable(available); 82 | proxyIp.setValidateCount(proxyIp.getValidateCount() + 1); 83 | proxyIp.setUnAvailableCount(proxyIp.getUnAvailableCount()+1); 84 | proxyIp.setAvailableRate(proxyIp.getAvailableCount()/(double)proxyIp.getValidateCount()); 85 | proxyIp.setUseTime(endTime - startTime); 86 | unAvailableIpList.add(proxyIp); 87 | unAvailableIpCount.incrementAndGet(); 88 | } 89 | return proxyIp; 90 | }catch (Exception e){ 91 | log.error("syncDb task proxyIP:{}",proxyIp.getIp(),e); 92 | try { 93 | TimeUnit.SECONDS.sleep(1); 94 | } catch (InterruptedException e1) { 95 | e1.printStackTrace(); 96 | } 97 | } 98 | return null; 99 | } 100 | }); 101 | taskList.add(task); 102 | execute(task); 103 | }); 104 | try { 105 | TimeUnit.SECONDS.sleep(1); 106 | } catch (InterruptedException e1) { 107 | e1.printStackTrace(); 108 | } 109 | }); 110 | List proxyIpList = new ArrayList<>(); 111 | taskList.forEach(proxyIpFuture -> { 112 | try { 113 | ProxyIp proxyIp = proxyIpFuture.get(6, TimeUnit.SECONDS); 114 | if(proxyIp!=null){ 115 | proxyIpList.add(proxyIp); 116 | } 117 | } catch (InterruptedException e) { 118 | log.error("Interrupted ", e); 119 | } catch (Exception e) { 120 | log.error("error:", e); 121 | } 122 | }); 123 | refreshDataBase(availableIpList,unAvailableIpList); 124 | long end = System.currentTimeMillis(); 125 | log.info("validate over total time:{}", (end - start)); 126 | log.info("availableIp size:{}", availableIpCount.get()); 127 | log.info("unAvailableIp size:{}", unAvailableIpCount.get()); 128 | } catch (Exception e) { 129 | log.error("error:", e); 130 | } finally { 131 | shutdown(); 132 | } 133 | } 134 | 135 | private void refreshDataBase(List availableIpList,List unAvailableIpList) { 136 | int batchSize = 100; 137 | List> taskList = new ArrayList<>(); 138 | long startTime=System.currentTimeMillis(); 139 | log.info("refreshDataBase start..."); 140 | batchUpdate(availableIpList, batchSize, taskList); 141 | batchUpdate(unAvailableIpList, batchSize, taskList); 142 | 143 | taskList.forEach(proxyIpFuture -> { 144 | try { 145 | ProxyIp proxyIp = proxyIpFuture.get(10, TimeUnit.MINUTES); 146 | } catch (InterruptedException e) { 147 | log.error("refreshDataBase Interrupted ", e); 148 | } catch (Exception e) { 149 | log.error("refreshDataBase error:", e); 150 | } 151 | }); 152 | long endTime=System.currentTimeMillis(); 153 | log.info("refreshDataBase time:{}",endTime-startTime); 154 | log.info("refreshDataBase proxyIp size:{}", availableIpList.size()+unAvailableIpList.size()); 155 | } 156 | 157 | private void batchUpdate(List ipList, int batchSize, List> taskList) { 158 | CopyOnWriteArrayList cowIpList=new CopyOnWriteArrayList(ipList); 159 | for (int i = 0; i < cowIpList.size(); i++) { 160 | if ((i != 0) && i % batchSize == 0 || (i + 1 == cowIpList.size())) { 161 | if(i() { 163 | @Override 164 | public Object call() throws Exception { 165 | proxyIpService.batchUpdate(cowIpList); 166 | return null; 167 | } 168 | }); 169 | taskList.add(task); 170 | execute(task); 171 | }else{ 172 | final int start=i; 173 | FutureTask task = new FutureTask(new Callable() { 174 | @Override 175 | public Object call() throws Exception { 176 | proxyIpService.batchUpdate(cowIpList.subList(start-batchSize,start)); 177 | return null; 178 | } 179 | }); 180 | taskList.add(task); 181 | execute(task); 182 | } 183 | } 184 | } 185 | } 186 | 187 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/SyncRedisSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpRedisService; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Component; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.Date; 12 | import java.util.List; 13 | import java.util.concurrent.Callable; 14 | import java.util.concurrent.CopyOnWriteArrayList; 15 | import java.util.concurrent.FutureTask; 16 | import java.util.concurrent.TimeUnit; 17 | import java.util.stream.IntStream; 18 | 19 | /** 20 | * @author chenerzhu 21 | * @create 2018-09-07 17:25 22 | **/ 23 | @Slf4j 24 | @Component 25 | @SuppressWarnings("unchecked") 26 | public class SyncRedisSchedulerJob extends AbstractSchedulerJob { 27 | @Autowired 28 | private IProxyIpRedisService proxyIpRedisService; 29 | 30 | @Override 31 | public void run() { 32 | try { 33 | List availableIpList = new CopyOnWriteArrayList(); 34 | List unAvailableIpList = new CopyOnWriteArrayList(); 35 | long totalCount = proxyIpRedisService.totalCount(); 36 | log.info("the proxyIp total count:{}", totalCount); 37 | long redisTotalCount = proxyIpRedisService.totalCountRt(); 38 | log.info("the redis's proxyIp total count:{}", redisTotalCount); 39 | int pageSize = 100; 40 | int pageCount = (int) ((int) (totalCount % pageSize) == 0 ? totalCount / pageSize : totalCount / pageSize + 1); 41 | List> taskList = new ArrayList<>(); 42 | long start = System.currentTimeMillis(); 43 | IntStream.range(0, pageCount).forEach(pageNumber -> { 44 | List proxyIpList = proxyIpRedisService.findAllByPage(pageNumber, pageSize); 45 | proxyIpList.forEach(serializable -> { 46 | FutureTask task = new FutureTask(new Callable() { 47 | ProxyIp proxyIp = (ProxyIp) serializable; 48 | @Override 49 | public ProxyIp call(){ 50 | try{ 51 | long startTime = System.currentTimeMillis(); 52 | boolean available = validateIp(proxyIp); 53 | long endTime = System.currentTimeMillis(); 54 | log.info("validateIp redis ==> ip:{} port:{} available:{} total time:{}", proxyIp.getIp(), proxyIp.getPort(), available, (endTime - startTime)); 55 | if (available) { 56 | if (proxyIpRedisService.isExistRt(proxyIp)) { 57 | log.info("redis rt exist ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 58 | proxyIpRedisService.removeRt(proxyIp); 59 | } 60 | proxyIp.setLastValidateTime(new Date()); 61 | proxyIp.setAvailable(available); 62 | proxyIp.setValidateCount(proxyIp.getValidateCount() + 1); 63 | proxyIp.setAvailableCount(proxyIp.getAvailableCount() + 1); 64 | proxyIp.setAvailableRate(proxyIp.getAvailableCount() / (double) proxyIp.getValidateCount()); 65 | proxyIp.setUseTime(endTime - startTime); 66 | proxyIpRedisService.addRt(proxyIp); 67 | log.info("redis rt add or update ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 68 | availableIpList.add(proxyIp); 69 | } else { 70 | proxyIpRedisService.removeRt(proxyIp); 71 | log.info("redis rt remove ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 72 | proxyIpRedisService.remove(proxyIp); 73 | log.info("redis remove ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 74 | proxyIp.setLastValidateTime(new Date()); 75 | proxyIp.setAvailable(available); 76 | proxyIp.setValidateCount(proxyIp.getValidateCount() + 1); 77 | proxyIp.setUnAvailableCount(proxyIp.getUnAvailableCount() + 1); 78 | proxyIp.setAvailableRate(proxyIp.getAvailableCount() / (double) proxyIp.getValidateCount()); 79 | proxyIp.setUseTime(endTime - startTime); 80 | unAvailableIpList.add(proxyIp); 81 | } 82 | return proxyIp; 83 | }catch (Exception e){ 84 | log.error("syncRedis task proxyIP:{}",proxyIp.getIp(),e); 85 | try { 86 | TimeUnit.MILLISECONDS.sleep(100); 87 | } catch (InterruptedException e1) { 88 | e1.printStackTrace(); 89 | } 90 | } 91 | return null; 92 | } 93 | }); 94 | taskList.add(task); 95 | execute(task); 96 | }); 97 | try { 98 | TimeUnit.SECONDS.sleep(1); 99 | } catch (InterruptedException e1) { 100 | e1.printStackTrace(); 101 | } 102 | }); 103 | List proxyIpList = new ArrayList<>(); 104 | taskList.forEach(proxyIpFuture -> { 105 | try { 106 | ProxyIp proxyIp = proxyIpFuture.get(10, TimeUnit.SECONDS); 107 | if(proxyIp!=null){ 108 | proxyIpList.add(proxyIp); 109 | } 110 | } catch (InterruptedException e) { 111 | log.error("Interrupted ", e); 112 | } catch (Exception e) { 113 | log.error("error:", e); 114 | } 115 | }); 116 | long end = System.currentTimeMillis(); 117 | log.info("the redis's ip validate over, total time:{}", (end - start)); 118 | log.info("the redis's availableIp size:{}", availableIpList.size()); 119 | log.info("the redis's unAvailableIp size:{}", unAvailableIpList.size()); 120 | log.info("refresh redis's proxyIp size:{}", proxyIpList.size()); 121 | } catch (Exception e) { 122 | log.error("error:", e); 123 | } finally { 124 | shutdown(); 125 | } 126 | } 127 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/job/scheduler/ValidateRedisSchedulerJob.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.job.scheduler; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpRedisService; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Component; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.Date; 12 | import java.util.List; 13 | import java.util.concurrent.Callable; 14 | import java.util.concurrent.CopyOnWriteArrayList; 15 | import java.util.concurrent.FutureTask; 16 | import java.util.concurrent.TimeUnit; 17 | import java.util.stream.IntStream; 18 | 19 | /** 20 | * @author chenerzhu 21 | * @create 2018-09-06 10:53 22 | **/ 23 | @Slf4j 24 | @Component 25 | @SuppressWarnings("unchecked") 26 | public class ValidateRedisSchedulerJob extends AbstractSchedulerJob { 27 | @Autowired 28 | private IProxyIpRedisService proxyIpRedisService; 29 | 30 | @Override 31 | public void run() { 32 | try { 33 | List availableIpList = new CopyOnWriteArrayList(); 34 | List unAvailableIpList = new CopyOnWriteArrayList(); 35 | long redisTotalCount = proxyIpRedisService.totalCountRt(); 36 | log.info("the redis's proxyIp total count:{}", redisTotalCount); 37 | int pageSize = 100; 38 | int pageCount = (int) ((int) (redisTotalCount % pageSize) == 0 ? redisTotalCount / pageSize : redisTotalCount / pageSize + 1); 39 | List> taskList = new ArrayList<>(); 40 | long start = System.currentTimeMillis(); 41 | IntStream.range(0, pageCount).forEach(pageNumber -> { 42 | 43 | List proxyIpList = proxyIpRedisService.findAllByPageRt(pageNumber, pageSize); 44 | proxyIpList.forEach(serializable -> { 45 | FutureTask task = new FutureTask(new Callable() { 46 | ProxyIp proxyIp = (ProxyIp) serializable; 47 | 48 | @Override 49 | public ProxyIp call() { 50 | try{ 51 | long startTime = System.currentTimeMillis(); 52 | boolean available = validateIp(proxyIp); 53 | long endTime = System.currentTimeMillis(); 54 | log.info("validateIp redis rt ==> ip:{} port:{} available:{} total time:{}", proxyIp.getIp(), proxyIp.getPort(), available, (endTime - startTime)); 55 | if (available) { 56 | if (proxyIpRedisService.isExistRt(proxyIp)) { 57 | log.info("redis rt exist ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 58 | proxyIpRedisService.removeRt(proxyIp); 59 | } 60 | proxyIp.setLastValidateTime(new Date()); 61 | proxyIp.setAvailable(available); 62 | proxyIp.setValidateCount(proxyIp.getValidateCount() + 1); 63 | proxyIp.setAvailableCount(proxyIp.getAvailableCount() + 1); 64 | proxyIp.setAvailableRate(proxyIp.getAvailableCount() / (double) proxyIp.getValidateCount()); 65 | proxyIp.setUseTime(endTime - startTime); 66 | proxyIpRedisService.addRt(proxyIp); 67 | log.info("redis rt add or update ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 68 | availableIpList.add(proxyIp); 69 | } else { 70 | proxyIpRedisService.removeRt(proxyIp); 71 | log.info("redis rt remove ip:{} port:{}", proxyIp.getIp(), proxyIp.getPort()); 72 | proxyIp.setLastValidateTime(new Date()); 73 | proxyIp.setAvailable(available); 74 | proxyIp.setValidateCount(proxyIp.getValidateCount() + 1); 75 | proxyIp.setUnAvailableCount(proxyIp.getUnAvailableCount() + 1); 76 | proxyIp.setAvailableRate(proxyIp.getAvailableCount() / (double) proxyIp.getValidateCount()); 77 | proxyIp.setUseTime(endTime - startTime); 78 | unAvailableIpList.add(proxyIp); 79 | } 80 | return proxyIp; 81 | }catch (Exception e){ 82 | log.error("syncRedis task proxyIP:{}",proxyIp.getIp(),e); 83 | } 84 | return null; 85 | 86 | } 87 | }); 88 | taskList.add(task); 89 | execute(task); 90 | }); 91 | try { 92 | TimeUnit.SECONDS.sleep(1); 93 | } catch (InterruptedException e1) { 94 | e1.printStackTrace(); 95 | } 96 | }); 97 | List proxyIpList = new ArrayList<>(); 98 | taskList.forEach(proxyIpFuture -> { 99 | try { 100 | ProxyIp proxyIp = proxyIpFuture.get(10, TimeUnit.SECONDS); 101 | if(proxyIp!=null){ 102 | proxyIpList.add(proxyIp); 103 | } 104 | } catch (InterruptedException e) { 105 | log.error("Interrupted ", e); 106 | } catch (Exception e) { 107 | log.error("error:", e); 108 | } 109 | }); 110 | long end = System.currentTimeMillis(); 111 | log.info("the redis's ip validate over, total time:{}", (end - start)); 112 | log.info("the redis's availableIp size:{}", availableIpList.size()); 113 | log.info("the redis's unAvailableIp size:{}", unAvailableIpList.size()); 114 | log.info("refresh redis's proxyIp size:{}", proxyIpList.size()); 115 | } catch (Exception e) { 116 | log.error("error:", e); 117 | } finally { 118 | shutdown(); 119 | } 120 | } 121 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/listener/JobContextListener.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.listener; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.job.crawler.CrawlerJob; 4 | import com.chenerzhu.crawler.proxy.pool.job.scheduler.SchedulerJob; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | 8 | import javax.servlet.ServletContextEvent; 9 | import javax.servlet.ServletContextListener; 10 | import javax.servlet.annotation.WebListener; 11 | 12 | /** 13 | * @author chenerzhu 14 | * @create 2018-08-30 12:33 15 | **/ 16 | @Slf4j 17 | @WebListener 18 | public class JobContextListener implements ServletContextListener { 19 | @Autowired 20 | private SchedulerJob schedulerJob; 21 | @Autowired 22 | private CrawlerJob crawlerJob; 23 | 24 | @Override 25 | public void contextInitialized(ServletContextEvent servletContextEvent) { 26 | log.debug("JobContextListener contextInitialized"); 27 | new Thread(schedulerJob).start(); 28 | new Thread(crawlerJob).start(); 29 | } 30 | 31 | @Override 32 | public void contextDestroyed(ServletContextEvent servletContextEvent) { 33 | log.debug("JobContextListener contextDestroyed"); 34 | } 35 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/listener/SpringContextListener.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.listener; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.context.SpringContextHolder; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.springframework.web.context.ContextLoaderListener; 6 | import org.springframework.web.context.WebApplicationContext; 7 | import org.springframework.web.context.support.WebApplicationContextUtils; 8 | 9 | import javax.servlet.ServletContextEvent; 10 | 11 | /** 12 | * @author chenerzhu 13 | * @create 2018-08-31 10:50 14 | **/ 15 | @Slf4j 16 | public class SpringContextListener extends ContextLoaderListener { 17 | public void contextInitialized(ServletContextEvent event) { 18 | super.contextInitialized(event); 19 | WebApplicationContext webApplicationContext = 20 | WebApplicationContextUtils.getWebApplicationContext(event.getServletContext()); 21 | SpringContextHolder.initApplicationContext(webApplicationContext); 22 | log.debug("SpringContextListener contextInitialized"); 23 | } 24 | 25 | public void contextDestroyed(ServletContextEvent event) { 26 | super.contextDestroyed(event); 27 | log.debug("SpringContextListener contextDestroyed"); 28 | } 29 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/repository/IProxyIpRepository.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.repository; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import org.springframework.data.domain.Page; 5 | import org.springframework.data.domain.Pageable; 6 | import org.springframework.data.jpa.repository.JpaRepository; 7 | import org.springframework.data.jpa.repository.Modifying; 8 | import org.springframework.data.jpa.repository.Query; 9 | import org.springframework.data.repository.query.Param; 10 | import org.springframework.stereotype.Repository; 11 | import org.springframework.transaction.annotation.Transactional; 12 | 13 | import java.util.Date; 14 | 15 | /** 16 | * @author chenerzhu 17 | * @create 2018-08-29 20:59 18 | **/ 19 | @Repository 20 | public interface IProxyIpRepository extends JpaRepository { 21 | Page findProxyIpsByAvailableIsTrue(Pageable pageable); 22 | 23 | long countProxyIpsByAvailableIsTrue(); 24 | long countProxyIpsByAvailableIsTrueOrValidateCountIsBeforeOrValidateCountIsAfterAndAvailableRateIsAfter(int validateCountBefore,int validateCountAfters, double availableRate); 25 | 26 | ProxyIp findByIpEqualsAndPortEqualsAndTypeEquals(String ip, int port, String type); 27 | 28 | Page findProxyIpsByAvailableIsTrueOrValidateCountIsBeforeOrValidateCountIsAfterAndAvailableRateIsAfter(Pageable pageable, int validateCountBefore,int validateCountAfters, double availableRate); 29 | 30 | @Query("update ProxyIp set available=:available, " + 31 | "availableCount=:availableCount, " + 32 | "availableRate=:availableRate, " + 33 | "lastValidateTime=:lastValidateTime, " + 34 | "requestTime=:requestTime, " + 35 | "responseTime=:responseTime, " + 36 | "unAvailableCount=:unAvailableCount, " + 37 | "useTime=:useTime," + 38 | " validateCount=:validateCount where id=:id") 39 | @Modifying 40 | @Transactional 41 | Integer update(@Param("available") boolean available, 42 | @Param("availableCount") Integer availableCount, 43 | @Param("availableRate") double availableRate, 44 | @Param("lastValidateTime") Date lastValidateTime, 45 | @Param("requestTime") long requestTime, 46 | @Param("responseTime") long responseTime, 47 | @Param("unAvailableCount") int unAvailableCount, 48 | @Param("useTime") long useTime, 49 | @Param("validateCount") int validateCount, 50 | @Param("id") long id); 51 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IProxyIpRedisService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | 5 | import java.io.Serializable; 6 | import java.util.List; 7 | 8 | /** 9 | * @author chenerzhu 10 | * @create 2018-09-01 10:31 11 | **/ 12 | public interface IProxyIpRedisService { 13 | boolean add(ProxyIp proxyIp); 14 | 15 | Long remove(ProxyIp proxyIp); 16 | 17 | boolean isExist(ProxyIp proxyIp); 18 | 19 | ProxyIp getOne(); 20 | 21 | List findAllByPage(int pageNumber, int pageSize); 22 | 23 | long totalCount(); 24 | 25 | boolean addRt(ProxyIp proxyIp); 26 | 27 | Long removeRt(ProxyIp proxyIp); 28 | 29 | boolean isExistRt(ProxyIp proxyIp); 30 | 31 | ProxyIp getOneRt(); 32 | 33 | List findAllByPageRt(int pageNumber, int pageSize); 34 | 35 | long totalCountRt(); 36 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/IProxyIpService.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Created by chenerzhu on 2018/8/30. 9 | */ 10 | public interface IProxyIpService { 11 | ProxyIp save(ProxyIp proxyIp); 12 | 13 | List findAll(); 14 | 15 | List findAllByPage(Integer pageNumber, Integer pageSize); 16 | 17 | long totalCount(); 18 | 19 | long totalCount(int validateCountBefore,int validateCountAfter, double availableRate); 20 | 21 | List saveAll(List proxyIpList); 22 | 23 | void batchUpdate(List proxyIpList); 24 | 25 | void update(ProxyIp proxyIp); 26 | 27 | ProxyIp findByIpEqualsAndPortEqualsAndTypeEquals(String ip, int port, String type); 28 | 29 | List findAllByPage(Integer pageNumber, Integer pageSize, int validateCountBefore, int validateCountAfter, double availableRate); 30 | 31 | boolean testIp(String ip, int port); 32 | 33 | boolean testIp(String ip, int port, String type); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/impl/ProxyIpRedisServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.RedisKey; 4 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 5 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpRedisService; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.data.redis.core.RedisTemplate; 8 | import org.springframework.stereotype.Service; 9 | 10 | import java.io.Serializable; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.Random; 14 | import java.util.Set; 15 | 16 | /** 17 | * @author chenerzhu 18 | * @create 2018-09-01 10:32 19 | **/ 20 | @Service 21 | public class ProxyIpRedisServiceImpl implements IProxyIpRedisService { 22 | @Autowired 23 | private RedisTemplate redisCacheTemplate; 24 | 25 | @Override 26 | public boolean add(ProxyIp proxyIp) { 27 | return redisCacheTemplate.opsForZSet().add(RedisKey.PROXY_IP_KEY, proxyIp, proxyIp.getId()); 28 | } 29 | 30 | @Override 31 | public Long remove(ProxyIp proxyIp) { 32 | return redisCacheTemplate.opsForZSet().removeRangeByScore(RedisKey.PROXY_IP_KEY, proxyIp.getId(), proxyIp.getId()); 33 | } 34 | 35 | @Override 36 | public boolean isExist(ProxyIp proxyIp) { 37 | Set set = redisCacheTemplate.opsForZSet().rangeByScore(RedisKey.PROXY_IP_KEY, proxyIp.getId(), proxyIp.getId()); 38 | if (set.isEmpty()) { 39 | return false; 40 | } else { 41 | return true; 42 | } 43 | } 44 | 45 | @Override 46 | public ProxyIp getOne() { 47 | int totalCount = (int) totalCountRt(); 48 | int range=new Random().nextInt(totalCount); 49 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_KEY, range, range); 50 | return (ProxyIp) new ArrayList(set).get(0); 51 | } 52 | 53 | @Override 54 | public List findAllByPage(int pageNumber, int pageSize) { 55 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_KEY, pageNumber*pageSize, (pageNumber+1)*pageSize); 56 | return new ArrayList(set); 57 | } 58 | 59 | @Override 60 | public long totalCount() { 61 | return redisCacheTemplate.opsForZSet().size(RedisKey.PROXY_IP_KEY); 62 | } 63 | 64 | @Override 65 | public boolean addRt(ProxyIp proxyIp) { 66 | return redisCacheTemplate.opsForZSet().add(RedisKey.PROXY_IP_RT_KEY, proxyIp, proxyIp.getId()); 67 | } 68 | 69 | @Override 70 | public Long removeRt(ProxyIp proxyIp) { 71 | return redisCacheTemplate.opsForZSet().removeRangeByScore(RedisKey.PROXY_IP_RT_KEY, proxyIp.getId(), proxyIp.getId()); 72 | } 73 | 74 | @Override 75 | public boolean isExistRt(ProxyIp proxyIp) { 76 | Set set = redisCacheTemplate.opsForZSet().rangeByScore(RedisKey.PROXY_IP_RT_KEY, proxyIp.getId(), proxyIp.getId()); 77 | if (set.isEmpty()) { 78 | return false; 79 | } else { 80 | return true; 81 | } 82 | } 83 | 84 | @Override 85 | public ProxyIp getOneRt() { 86 | int totalCount = (int) totalCountRt(); 87 | int range=new Random().nextInt(totalCount); 88 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_RT_KEY, range, range); 89 | return (ProxyIp) new ArrayList(set).get(0); 90 | } 91 | 92 | @Override 93 | public List findAllByPageRt(int pageNumber, int pageSize) { 94 | Set set = redisCacheTemplate.opsForZSet().range(RedisKey.PROXY_IP_RT_KEY, pageNumber*pageSize, (pageNumber+1)*pageSize); 95 | return new ArrayList(set); 96 | } 97 | 98 | @Override 99 | public long totalCountRt() { 100 | return redisCacheTemplate.opsForZSet().size(RedisKey.PROXY_IP_RT_KEY); 101 | } 102 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/service/impl/ProxyIpServiceImpl.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.service.impl; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.entity.ProxyIp; 4 | import com.chenerzhu.crawler.proxy.pool.repository.IProxyIpRepository; 5 | import com.chenerzhu.crawler.proxy.pool.service.IProxyIpService; 6 | import com.chenerzhu.crawler.proxy.pool.util.ProxyUtils; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.data.domain.PageRequest; 9 | import org.springframework.data.domain.Pageable; 10 | import org.springframework.data.domain.Sort; 11 | import org.springframework.jdbc.core.JdbcTemplate; 12 | import org.springframework.orm.hibernate5.HibernateTemplate; 13 | import org.springframework.stereotype.Service; 14 | import org.springframework.transaction.annotation.Transactional; 15 | 16 | import javax.persistence.EntityManager; 17 | import javax.persistence.PersistenceContext; 18 | import java.util.Date; 19 | import java.util.List; 20 | 21 | /** 22 | * @author chenerzhu 23 | * @create 2018-08-30 19:05 24 | **/ 25 | @Service("proxyIpService") 26 | public class ProxyIpServiceImpl implements IProxyIpService { 27 | @Autowired 28 | private IProxyIpRepository proxyIpRepository; 29 | 30 | @PersistenceContext 31 | private EntityManager entityManager; 32 | 33 | 34 | @Transactional(rollbackFor = Exception.class) 35 | @Override 36 | public ProxyIp save(ProxyIp proxyIp) { 37 | return proxyIpRepository.save(proxyIp); 38 | } 39 | @Override 40 | public List findAll() { 41 | return proxyIpRepository.findAll(); 42 | } 43 | 44 | @Override 45 | public List findAllByPage(Integer pageNumber, Integer pageSize) { 46 | Sort sort = new Sort(Sort.Direction.ASC, "id"); 47 | Pageable pageable = PageRequest.of(pageNumber, pageSize, sort); 48 | return proxyIpRepository.findProxyIpsByAvailableIsTrue(pageable).getContent(); 49 | } 50 | @Override 51 | public long totalCount(){ 52 | return proxyIpRepository.countProxyIpsByAvailableIsTrue(); 53 | } 54 | 55 | @Override 56 | public long totalCount(int validateCountBefore,int validateCountAfter, double availableRate){ 57 | return proxyIpRepository.countProxyIpsByAvailableIsTrueOrValidateCountIsBeforeOrValidateCountIsAfterAndAvailableRateIsAfter(validateCountBefore,validateCountAfter, availableRate); 58 | } 59 | 60 | @Transactional(rollbackFor = Exception.class) 61 | @Override 62 | public List saveAll(List proxyIpList){ 63 | return proxyIpRepository.saveAll(proxyIpList); 64 | } 65 | 66 | /*@Transactional(rollbackFor = Exception.class)*/ 67 | @Override 68 | public void batchUpdate(List proxyIpList) { 69 | for(int i = 0; i < proxyIpList.size(); i++) { 70 | update(proxyIpList.get(i)); 71 | } 72 | } 73 | @Override 74 | public void update(ProxyIp proxyIp){ 75 | proxyIpRepository.update(proxyIp.isAvailable(), 76 | proxyIp.getAvailableCount(), 77 | proxyIp.getAvailableRate(), 78 | proxyIp.getLastValidateTime(), 79 | proxyIp.getRequestTime(), 80 | proxyIp.getResponseTime(), 81 | proxyIp.getUnAvailableCount(), 82 | proxyIp.getUseTime(), 83 | proxyIp.getValidateCount(), 84 | proxyIp.getId() 85 | ); 86 | 87 | } 88 | 89 | 90 | 91 | @Override 92 | public ProxyIp findByIpEqualsAndPortEqualsAndTypeEquals(String ip, int port, String type) { 93 | return proxyIpRepository.findByIpEqualsAndPortEqualsAndTypeEquals(ip,port,type); 94 | } 95 | @Override 96 | public List findAllByPage(Integer pageNumber, Integer pageSize, int validateCountBefore,int validateCountAfter, double availableRate){ 97 | Sort sort = new Sort(Sort.Direction.ASC, "id"); 98 | Pageable pageable = PageRequest.of(pageNumber, pageSize, sort); 99 | return proxyIpRepository.findProxyIpsByAvailableIsTrueOrValidateCountIsBeforeOrValidateCountIsAfterAndAvailableRateIsAfter(pageable,validateCountBefore,validateCountAfter,availableRate).getContent(); 100 | } 101 | @Override 102 | public boolean testIp(String ip, int port){ 103 | boolean available= ProxyUtils.validateIp(ip,port, ProxyUtils.ProxyType.HTTP); 104 | if(!available){ 105 | available= ProxyUtils.validateIp(ip,port, ProxyUtils.ProxyType.HTTPS); 106 | } 107 | return available; 108 | } 109 | @Override 110 | public boolean testIp(String ip, int port, String type){ 111 | if("http".equalsIgnoreCase(type)){ 112 | return ProxyUtils.validateIp(ip,port, ProxyUtils.ProxyType.HTTP); 113 | }else{ 114 | return ProxyUtils.validateIp(ip,port, ProxyUtils.ProxyType.HTTPS); 115 | } 116 | 117 | } 118 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/thread/ThreadFactory.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.thread; 2 | 3 | import java.util.concurrent.atomic.AtomicInteger; 4 | 5 | /** 6 | * @author chenerzhu 7 | * @create 2018-09-10 20:27 8 | **/ 9 | public class ThreadFactory implements java.util.concurrent.ThreadFactory { 10 | 11 | private AtomicInteger counter = new AtomicInteger(0); 12 | private String name; 13 | 14 | public ThreadFactory(String name) { 15 | this.name = name; 16 | } 17 | 18 | @Override 19 | public Thread newThread(Runnable run) { 20 | Thread t = new Thread(run, name + "-t-" + counter); 21 | counter.incrementAndGet(); 22 | return t; 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/util/HttpClientUtils.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.util; 2 | 3 | import com.chenerzhu.crawler.proxy.pool.common.HttpMethod; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.apache.http.HttpResponse; 6 | import org.apache.http.NameValuePair; 7 | import org.apache.http.client.ClientProtocolException; 8 | import org.apache.http.client.config.RequestConfig; 9 | import org.apache.http.client.entity.UrlEncodedFormEntity; 10 | import org.apache.http.client.methods.*; 11 | import org.apache.http.config.Registry; 12 | import org.apache.http.config.RegistryBuilder; 13 | import org.apache.http.conn.socket.ConnectionSocketFactory; 14 | import org.apache.http.conn.ssl.SSLConnectionSocketFactory; 15 | import org.apache.http.entity.BufferedHttpEntity; 16 | import org.apache.http.entity.StringEntity; 17 | import org.apache.http.impl.client.CloseableHttpClient; 18 | import org.apache.http.impl.client.HttpClientBuilder; 19 | import org.apache.http.impl.client.HttpClients; 20 | import org.apache.http.impl.client.StandardHttpRequestRetryHandler; 21 | import org.apache.http.impl.conn.BasicHttpClientConnectionManager; 22 | import org.apache.http.message.BasicNameValuePair; 23 | import org.apache.http.util.EntityUtils; 24 | import org.springframework.util.StringUtils; 25 | 26 | import javax.net.ssl.SSLContext; 27 | import javax.net.ssl.TrustManager; 28 | import javax.net.ssl.X509TrustManager; 29 | import java.io.IOException; 30 | import java.security.KeyManagementException; 31 | import java.security.NoSuchAlgorithmException; 32 | import java.security.cert.CertificateException; 33 | import java.security.cert.X509Certificate; 34 | import java.util.*; 35 | 36 | /** 37 | * @author chenerzhu 38 | * @create 2018-08-11 11:25 39 | **/ 40 | @Slf4j 41 | public class HttpClientUtils { 42 | private static final String DEFAULT_CHARSET = "UTF-8"; 43 | private static RequestConfig reqConf = null; 44 | private static StandardHttpRequestRetryHandler standardHandler = null; 45 | 46 | static { 47 | reqConf = RequestConfig.custom() 48 | .setSocketTimeout(5000) 49 | .setConnectTimeout(5000) 50 | .setConnectionRequestTimeout(2000) 51 | .setRedirectsEnabled(false) 52 | .setMaxRedirects(0) 53 | .build(); 54 | standardHandler = new StandardHttpRequestRetryHandler(3, true); 55 | } 56 | 57 | public static void requestConfig() { 58 | reqConf = RequestConfig.custom() 59 | .setSocketTimeout(5000) 60 | .setConnectTimeout(5000) 61 | .setConnectionRequestTimeout(2000) 62 | .setRedirectsEnabled(false) 63 | .setMaxRedirects(0) 64 | .build(); 65 | standardHandler = new StandardHttpRequestRetryHandler(3, true); 66 | } 67 | 68 | public static String send(final String url, String content, Map headerMap,Map formParamMap, String contentCharset, String resultCharset, HttpMethod method) { 69 | if (StringUtils.isEmpty(contentCharset)) { 70 | contentCharset = DEFAULT_CHARSET; 71 | } 72 | CloseableHttpClient httpClient = null; 73 | try { 74 | httpClient = HttpClientBuilder.create().setRetryHandler(standardHandler).build(); 75 | if (url.toLowerCase().startsWith("https")) { 76 | initSSL(httpClient, getPort(url)); 77 | } 78 | HttpResponse httpResponse = null; 79 | switch (method) { 80 | case GET: 81 | HttpGet httpGet = new HttpGet(url); 82 | httpGet.setConfig(reqConf); 83 | addHeader(httpGet, headerMap); 84 | httpResponse = httpClient.execute(httpGet); 85 | break; 86 | case POST: 87 | HttpPost httpPost = new HttpPost(url); 88 | httpPost.setConfig(reqConf); 89 | addHeader(httpPost, headerMap); 90 | if(formParamMap==null||formParamMap.isEmpty()){ 91 | httpPost.setEntity(new StringEntity(content, contentCharset)); 92 | }else{ 93 | List ls = new ArrayList(); 94 | for(Map.Entry param:formParamMap.entrySet()){ 95 | ls.add(new BasicNameValuePair(param.getKey(),param.getValue())); 96 | } 97 | httpPost.setEntity(new UrlEncodedFormEntity(ls,"UTF-8")); 98 | } 99 | httpResponse = httpClient.execute(httpPost); 100 | break; 101 | case DELETE: 102 | HttpDelete httpDelete = new HttpDelete(url); 103 | httpDelete.setConfig(reqConf); 104 | addHeader(httpDelete, headerMap); 105 | httpResponse = httpClient.execute(httpDelete); 106 | break; 107 | case PUT: 108 | HttpPut httpPut = new HttpPut(url); 109 | httpPut.setConfig(reqConf); 110 | addHeader(httpPut, headerMap); 111 | httpPut.setEntity(new StringEntity(content, contentCharset)); 112 | httpResponse = httpClient.execute(httpPut); 113 | break; 114 | case PATCH: 115 | HttpPatch httpPatch = new HttpPatch(url); 116 | httpPatch.setConfig(reqConf); 117 | addHeader(httpPatch, headerMap); 118 | httpPatch.setEntity(new StringEntity(content, contentCharset)); 119 | httpResponse = httpClient.execute(httpPatch); 120 | break; 121 | } 122 | log.info("request url:" + url + "; response status:" + httpResponse.getStatusLine()); 123 | if (httpResponse.getStatusLine().getStatusCode() == 200) { 124 | BufferedHttpEntity entity = new BufferedHttpEntity(httpResponse.getEntity()); 125 | //获取响应内容 126 | if (StringUtils.isEmpty(resultCharset)) { 127 | resultCharset = DEFAULT_CHARSET; 128 | } 129 | return EntityUtils.toString(entity, resultCharset); 130 | } 131 | } catch (ClientProtocolException e) { 132 | log.error("Protocol error", e); 133 | } catch (IOException e) { 134 | log.error("Network error", e); 135 | } finally { 136 | try { 137 | if (httpClient != null) { 138 | httpClient.close(); 139 | } 140 | } catch (Exception e) { 141 | e.printStackTrace(); 142 | } 143 | } 144 | return null; 145 | } 146 | 147 | /** 148 | * 添加请求头 149 | * 150 | * @param httpRequest 151 | * @param headerMap 152 | * @return 153 | */ 154 | private static HttpRequestBase addHeader(HttpRequestBase httpRequest, Map headerMap) { 155 | if (headerMap != null && !headerMap.isEmpty()) { 156 | Set keys = headerMap.keySet(); 157 | Iterator iterator = keys.iterator(); 158 | while (iterator.hasNext()) { 159 | String key = iterator.next(); 160 | httpRequest.addHeader(key, headerMap.get(key)); 161 | } 162 | } 163 | return httpRequest; 164 | } 165 | 166 | /** 167 | * @param url 路径 168 | * @return int 169 | * @author 170 | * @date 171 | */ 172 | private static int getPort(String url) { 173 | int startIndex = url.indexOf("://") + "://".length(); 174 | String host = url.substring(startIndex); 175 | if (host.indexOf("/") != -1) { 176 | host = host.substring(0, host.indexOf("/")); 177 | } 178 | int port = 443; 179 | if (host.contains(":")) { 180 | int i = host.indexOf(":"); 181 | port = new Integer(host.substring(i + 1)); 182 | } 183 | return port; 184 | } 185 | 186 | /** 187 | * 初始化HTTPS请求服务 188 | * 189 | * @param httpClient HTTP客户端 190 | * @param port 端口 191 | */ 192 | private static void initSSL(CloseableHttpClient httpClient, int port) { 193 | SSLContext sslContext = null; 194 | try { 195 | sslContext = SSLContext.getInstance("SSL"); 196 | final X509TrustManager trustManager = new X509TrustManager() { 197 | public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { 198 | } 199 | 200 | public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { 201 | } 202 | 203 | public X509Certificate[] getAcceptedIssuers() { 204 | return null; 205 | } 206 | }; 207 | // 使用TrustManager来初始化该上下文,TrustManager只是被SSL的Socket所使用 208 | sslContext.init(null, new TrustManager[]{trustManager}, null); 209 | ConnectionSocketFactory ssf = new SSLConnectionSocketFactory(sslContext); 210 | Registry r = RegistryBuilder.create().register("https", ssf).build(); 211 | BasicHttpClientConnectionManager ccm = new BasicHttpClientConnectionManager(r); 212 | HttpClients.custom().setConnectionManager(ccm).build(); 213 | } catch (KeyManagementException e) { 214 | e.printStackTrace(); 215 | } catch (NoSuchAlgorithmException e) { 216 | e.printStackTrace(); 217 | } 218 | } 219 | 220 | public static String sendGet(final String url, Map headerMap) { 221 | return sendGet(url, headerMap, DEFAULT_CHARSET, DEFAULT_CHARSET); 222 | } 223 | 224 | public static String sendGet(final String url, Map headerMap, String contentCharset, String resultCharset) { 225 | return send(url, "", headerMap,null, contentCharset, resultCharset, HttpMethod.GET); 226 | } 227 | 228 | public static String sendPost(final String url, String content, Map headerMap) { 229 | return sendPost(url, content, headerMap, DEFAULT_CHARSET, DEFAULT_CHARSET); 230 | } 231 | 232 | public static String sendPostForm(final String url, String content, Map headerMap, Map formParamMap) { 233 | return send(url, content, headerMap,formParamMap, DEFAULT_CHARSET, DEFAULT_CHARSET,HttpMethod.POST); 234 | } 235 | 236 | public static String sendPost(final String url, String content, Map headerMap, String contentCharset, String resultCharset) { 237 | return send(url, content, headerMap,null, contentCharset, resultCharset, HttpMethod.POST); 238 | } 239 | 240 | public static String sendPostForm(final String url, String content, Map headerMap, Map formParamMap,String contentCharset, String resultCharset) { 241 | return send(url, content, headerMap, formParamMap,contentCharset, resultCharset, HttpMethod.POST); 242 | } 243 | 244 | public static String sendDelete(final String url, String content, Map headerMap) { 245 | return sendDelete(url, content, headerMap, DEFAULT_CHARSET, DEFAULT_CHARSET); 246 | } 247 | 248 | public static String sendDelete(final String url, String content, Map headerMap, String contentCharset, String resultCharset) { 249 | return send(url, content, headerMap, null,contentCharset, resultCharset, HttpMethod.DELETE); 250 | } 251 | 252 | public static String sendPut(final String url, String content, Map headerMap) { 253 | return sendPut(url, content, headerMap, DEFAULT_CHARSET, DEFAULT_CHARSET); 254 | } 255 | 256 | public static String sendPut(final String url, String content, Map headerMap, String contentCharset, String resultCharset) { 257 | return send(url, content, headerMap,null, contentCharset, resultCharset, HttpMethod.PUT); 258 | } 259 | 260 | public static String sendPatch(final String url, String content, Map headerMap) { 261 | return sendPatch(url, content, headerMap, DEFAULT_CHARSET, DEFAULT_CHARSET); 262 | } 263 | 264 | public static String sendPatch(final String url, String content, Map headerMap, String contentCharset, String resultCharset) { 265 | return send(url, content, headerMap, null,contentCharset, resultCharset, HttpMethod.PATCH); 266 | } 267 | 268 | public static void main(String[] args) { 269 | String result = HttpClientUtils.sendGet("https://www.baidu.com", null); 270 | System.out.println(result); 271 | } 272 | } -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/util/HttpsUtils.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.util; 2 | 3 | import sun.net.www.protocol.https.Handler; 4 | 5 | import javax.net.ssl.*; 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | import java.io.InputStreamReader; 10 | import java.net.URL; 11 | import java.security.*; 12 | import java.security.cert.CertificateException; 13 | import java.security.cert.CertificateFactory; 14 | import java.security.cert.X509Certificate; 15 | 16 | public class HttpsUtils { 17 | public static SSLParams getSslSocketFactory(InputStream[] certificates, InputStream bksFile, String password) { 18 | SSLParams sslParams = new SSLParams(); 19 | try { 20 | TrustManager[] trustManagers = prepareTrustManager(certificates); 21 | KeyManager[] keyManagers = prepareKeyManager(bksFile, password); 22 | SSLContext sslContext = SSLContext.getInstance("TLS"); 23 | X509TrustManager trustManager = null; 24 | if (trustManagers != null) { 25 | trustManager = new MyTrustManager(chooseTrustManager(trustManagers)); 26 | } else { 27 | trustManager = new UnSafeTrustManager(); 28 | } 29 | sslContext.init(keyManagers, new TrustManager[]{trustManager}, null); 30 | sslParams.sSLSocketFactory = sslContext.getSocketFactory(); 31 | sslParams.trustManager = trustManager; 32 | return sslParams; 33 | } catch (NoSuchAlgorithmException e) { 34 | throw new AssertionError(e); 35 | } catch (KeyManagementException e) { 36 | throw new AssertionError(e); 37 | } catch (KeyStoreException e) { 38 | throw new AssertionError(e); 39 | } 40 | } 41 | public static SSLSocketFactory getSslSocketFactory(){ 42 | SSLContext sslContext=null; 43 | try { 44 | sslContext= SSLContext.getInstance("TLS"); 45 | // 指定信任https 46 | sslContext.init(null, new TrustManager[] { new UnSafeTrustManager() }, new java.security.SecureRandom()); 47 | } catch (Exception e) { 48 | e.printStackTrace(); 49 | } 50 | return sslContext.getSocketFactory(); 51 | } 52 | 53 | public static HostnameVerifier getTrustAnyHostnameVerifier(){ 54 | return new UnSafeHostnameVerifier(); 55 | } 56 | 57 | private static TrustManager[] prepareTrustManager(InputStream... certificates) { 58 | if (certificates == null || certificates.length <= 0) return null; 59 | try { 60 | 61 | CertificateFactory certificateFactory = CertificateFactory.getInstance("X.509"); 62 | KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType()); 63 | keyStore.load(null); 64 | int index = 0; 65 | for (InputStream certificate : certificates) { 66 | String certificateAlias = Integer.toString(index++); 67 | keyStore.setCertificateEntry(certificateAlias, certificateFactory.generateCertificate(certificate)); 68 | try { 69 | if (certificate != null) 70 | certificate.close(); 71 | } catch (IOException e) 72 | 73 | { 74 | } 75 | } 76 | TrustManagerFactory trustManagerFactory = null; 77 | 78 | trustManagerFactory = TrustManagerFactory. 79 | getInstance(TrustManagerFactory.getDefaultAlgorithm()); 80 | trustManagerFactory.init(keyStore); 81 | 82 | TrustManager[] trustManagers = trustManagerFactory.getTrustManagers(); 83 | 84 | return trustManagers; 85 | } catch (NoSuchAlgorithmException e) { 86 | e.printStackTrace(); 87 | } catch (CertificateException e) { 88 | e.printStackTrace(); 89 | } catch (KeyStoreException e) { 90 | e.printStackTrace(); 91 | } catch (Exception e) { 92 | e.printStackTrace(); 93 | } 94 | return null; 95 | 96 | } 97 | 98 | private static KeyManager[] prepareKeyManager(InputStream bksFile, String password) { 99 | try { 100 | if (bksFile == null || password == null) return null; 101 | 102 | KeyStore clientKeyStore = KeyStore.getInstance("BKS"); 103 | clientKeyStore.load(bksFile, password.toCharArray()); 104 | KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm()); 105 | keyManagerFactory.init(clientKeyStore, password.toCharArray()); 106 | return keyManagerFactory.getKeyManagers(); 107 | 108 | } catch (KeyStoreException e) { 109 | e.printStackTrace(); 110 | } catch (NoSuchAlgorithmException e) { 111 | e.printStackTrace(); 112 | } catch (UnrecoverableKeyException e) { 113 | e.printStackTrace(); 114 | } catch (CertificateException e) { 115 | e.printStackTrace(); 116 | } catch (IOException e) { 117 | e.printStackTrace(); 118 | } catch (Exception e) { 119 | e.printStackTrace(); 120 | } 121 | return null; 122 | } 123 | 124 | private static X509TrustManager chooseTrustManager(TrustManager[] trustManagers) { 125 | for (TrustManager trustManager : trustManagers) { 126 | if (trustManager instanceof X509TrustManager) { 127 | return (X509TrustManager) trustManager; 128 | } 129 | } 130 | return null; 131 | } 132 | 133 | public static class SSLParams { 134 | public SSLSocketFactory sSLSocketFactory; 135 | public X509TrustManager trustManager; 136 | } 137 | 138 | public static class UnSafeTrustManager implements X509TrustManager { 139 | @Override 140 | public void checkClientTrusted(X509Certificate[] chain, String authType) 141 | throws CertificateException { 142 | } 143 | 144 | @Override 145 | public void checkServerTrusted(X509Certificate[] chain, String authType) 146 | throws CertificateException { 147 | } 148 | 149 | @Override 150 | public X509Certificate[] getAcceptedIssuers() { 151 | return new X509Certificate[]{}; 152 | } 153 | } 154 | 155 | private static class MyTrustManager implements X509TrustManager { 156 | private X509TrustManager defaultTrustManager; 157 | private X509TrustManager localTrustManager; 158 | 159 | public MyTrustManager(X509TrustManager localTrustManager) throws NoSuchAlgorithmException, KeyStoreException { 160 | TrustManagerFactory var4 = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm()); 161 | var4.init((KeyStore) null); 162 | defaultTrustManager = chooseTrustManager(var4.getTrustManagers()); 163 | this.localTrustManager = localTrustManager; 164 | } 165 | 166 | 167 | @Override 168 | public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { 169 | 170 | } 171 | 172 | @Override 173 | public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { 174 | try { 175 | defaultTrustManager.checkServerTrusted(chain, authType); 176 | } catch (CertificateException ce) { 177 | localTrustManager.checkServerTrusted(chain, authType); 178 | } 179 | } 180 | 181 | 182 | @Override 183 | public X509Certificate[] getAcceptedIssuers() { 184 | return new X509Certificate[0]; 185 | } 186 | } 187 | 188 | private static class UnSafeHostnameVerifier implements HostnameVerifier { 189 | @Override 190 | public boolean verify(String hostname, SSLSession session) { 191 | return true; 192 | } 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/main/java/com/chenerzhu/crawler/proxy/pool/util/ProxyUtils.java: -------------------------------------------------------------------------------- 1 | package com.chenerzhu.crawler.proxy.pool.util; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.jsoup.Jsoup; 6 | import sun.net.www.protocol.https.Handler; 7 | 8 | import javax.net.ssl.HttpsURLConnection; 9 | import java.io.BufferedReader; 10 | import java.io.IOException; 11 | import java.io.InputStreamReader; 12 | import java.net.HttpURLConnection; 13 | import java.net.InetSocketAddress; 14 | import java.net.Proxy; 15 | import java.net.URL; 16 | import java.util.concurrent.CountDownLatch; 17 | import java.util.concurrent.atomic.AtomicInteger; 18 | 19 | /** 20 | * @author chenerzhu 21 | * @create 2018-09-05 21:14 22 | **/ 23 | @Slf4j 24 | public final class ProxyUtils { 25 | //private static final String VALIDATE_URL = "http://115.239.211.112"; 26 | private static final String VALIDATE_URL = "http://www.baidu.com/"; 27 | 28 | public static boolean validateIp(String ip, int port, ProxyType proxyType) { 29 | boolean available = false; 30 | if (proxyType.getType().equalsIgnoreCase("http")) { 31 | available = validateHttp(ip, port); 32 | } else if (proxyType.getType().equalsIgnoreCase("https")) { 33 | available = validateHttps(ip, port); 34 | } 35 | return available; 36 | } 37 | 38 | public static boolean validateHttp(String ip, int port) { 39 | boolean available = false; 40 | HttpURLConnection connection = null; 41 | try { 42 | URL url = new URL(VALIDATE_URL); 43 | Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port)); 44 | connection = (HttpURLConnection) url.openConnection(proxy); 45 | connection.setRequestProperty("accept", ""); 46 | connection.setRequestProperty("connection", "Keep-Alive"); 47 | connection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"); 48 | connection.setConnectTimeout(2 * 1000); 49 | connection.setReadTimeout(3 * 1000); 50 | connection.setInstanceFollowRedirects(false); 51 | BufferedReader br = new BufferedReader(new InputStreamReader(connection.getInputStream())); 52 | String s = null; 53 | StringBuilder sb = new StringBuilder(); 54 | while ((s = br.readLine()) != null) { 55 | sb.append(s); 56 | } 57 | if (sb.toString().contains("baidu.com") && connection.getResponseCode() == 200) { 58 | available = true; 59 | } 60 | log.info("validateHttp ==> ip:{} port:{} info:{}", ip, port, connection.getResponseMessage()); 61 | } catch (Exception e) { 62 | //e.printStackTrace(); 63 | available = false; 64 | } finally { 65 | if (connection != null) { 66 | connection.disconnect(); 67 | } 68 | } 69 | return available; 70 | } 71 | 72 | public static boolean validateHttps(String ip, int port) { 73 | boolean available = false; 74 | HttpsURLConnection httpsURLConnection = null; 75 | try { 76 | URL url = new URL(null, VALIDATE_URL, new Handler()); 77 | Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port)); 78 | httpsURLConnection = (HttpsURLConnection) url.openConnection(proxy); 79 | httpsURLConnection.setSSLSocketFactory(HttpsUtils.getSslSocketFactory()); 80 | httpsURLConnection.setHostnameVerifier(HttpsUtils.getTrustAnyHostnameVerifier()); 81 | httpsURLConnection.setRequestProperty("accept", ""); 82 | httpsURLConnection.setRequestProperty("connection", "Keep-Alive"); 83 | httpsURLConnection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"); 84 | httpsURLConnection.setConnectTimeout(2 * 1000); 85 | httpsURLConnection.setReadTimeout(3 * 1000); 86 | httpsURLConnection.setInstanceFollowRedirects(false); 87 | BufferedReader br = new BufferedReader(new InputStreamReader(httpsURLConnection.getInputStream())); 88 | String s = null; 89 | StringBuilder sb = new StringBuilder(); 90 | while ((s = br.readLine()) != null) { 91 | sb.append(s); 92 | } 93 | if (sb.toString().contains("baidu.com") && httpsURLConnection.getResponseCode() == 200) { 94 | available = true; 95 | } 96 | log.info("validateHttps ==> ip:{} port:{} info:{}", ip, port, httpsURLConnection.getResponseMessage()); 97 | } catch (Exception e) { 98 | //e.printStackTrace(); 99 | available = false; 100 | } finally { 101 | if (httpsURLConnection != null) { 102 | httpsURLConnection.disconnect(); 103 | } 104 | } 105 | return available; 106 | } 107 | 108 | public static void main(String[] args) { 109 | AtomicInteger counter=new AtomicInteger(0); 110 | CountDownLatch latch=new CountDownLatch(100); 111 | for (int i = 0; i < 100; i++) { 112 | Thread thread = new Thread(new Runnable() { 113 | @Override 114 | public void run() { 115 | String ip = "185.28.248.238"; 116 | int port = 23500; 117 | boolean availableHttp = ProxyUtils.validateHttp(ip, port); 118 | boolean availableHttps = ProxyUtils.validateHttps(ip, port); 119 | if(availableHttp||availableHttps){ 120 | counter.incrementAndGet(); 121 | } 122 | latch.countDown(); 123 | System.out.println("http:" + availableHttp + " https:" + availableHttps); 124 | } 125 | }); 126 | thread.start(); 127 | } 128 | try { 129 | latch.await(); 130 | } catch (InterruptedException e) { 131 | e.printStackTrace(); 132 | } 133 | System.out.println("========"+counter.get()/100.0); 134 | } 135 | 136 | public enum ProxyType { 137 | HTTP("HTTP"), 138 | HTTPS("HTTPS"), 139 | SOCKS("SOCKS"); 140 | private String type; 141 | 142 | ProxyType(String proxyType) { 143 | this.type = proxyType; 144 | } 145 | 146 | public String getType() { 147 | return type; 148 | } 149 | } 150 | } -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenerzhu/proxy-pool/d764fad02bbc85c1781fe31e644e468753bb1e7a/src/main/resources/application.properties -------------------------------------------------------------------------------- /src/main/resources/static/css/bootstrap-table.css: -------------------------------------------------------------------------------- 1 | /** 2 | * @author zhixin wen 3 | * version: 1.12.1 4 | * https://github.com/wenzhixin/bootstrap-table/ 5 | */ 6 | 7 | .bootstrap-table .table { 8 | margin-bottom: 0 !important; 9 | border-bottom: 1px solid #dddddd; 10 | border-collapse: collapse !important; 11 | border-radius: 1px; 12 | } 13 | 14 | .bootstrap-table .table:not(.table-condensed), 15 | .bootstrap-table .table:not(.table-condensed) > tbody > tr > th, 16 | .bootstrap-table .table:not(.table-condensed) > tfoot > tr > th, 17 | .bootstrap-table .table:not(.table-condensed) > thead > tr > td, 18 | .bootstrap-table .table:not(.table-condensed) > tbody > tr > td, 19 | .bootstrap-table .table:not(.table-condensed) > tfoot > tr > td { 20 | padding: 8px; 21 | } 22 | 23 | .bootstrap-table .table.table-no-bordered > thead > tr > th, 24 | .bootstrap-table .table.table-no-bordered > tbody > tr > td { 25 | border-right: 2px solid transparent; 26 | } 27 | 28 | .bootstrap-table .table.table-no-bordered > tbody > tr > td:last-child { 29 | border-right: none; 30 | } 31 | 32 | .fixed-table-container { 33 | position: relative; 34 | clear: both; 35 | border: 1px solid #dddddd; 36 | border-radius: 4px; 37 | -webkit-border-radius: 4px; 38 | -moz-border-radius: 4px; 39 | } 40 | 41 | .fixed-table-container.table-no-bordered { 42 | border: 1px solid transparent; 43 | } 44 | 45 | .fixed-table-footer, 46 | .fixed-table-header { 47 | overflow: hidden; 48 | } 49 | 50 | .fixed-table-footer { 51 | border-top: 1px solid #dddddd; 52 | } 53 | 54 | .fixed-table-body { 55 | overflow-x: auto; 56 | overflow-y: auto; 57 | height: 100%; 58 | } 59 | 60 | .fixed-table-container table { 61 | width: 100%; 62 | } 63 | 64 | .fixed-table-container thead th { 65 | height: 0; 66 | padding: 0; 67 | margin: 0; 68 | border-left: 1px solid #dddddd; 69 | } 70 | 71 | .fixed-table-container thead th:focus { 72 | outline: 0 solid transparent; 73 | } 74 | 75 | .fixed-table-container thead th:first-child:not([data-not-first-th]) { 76 | border-left: none; 77 | border-top-left-radius: 4px; 78 | -webkit-border-top-left-radius: 4px; 79 | -moz-border-radius-topleft: 4px; 80 | } 81 | 82 | .fixed-table-container thead th .th-inner, 83 | .fixed-table-container tbody td .th-inner { 84 | padding: 8px; 85 | line-height: 24px; 86 | vertical-align: top; 87 | overflow: hidden; 88 | text-overflow: ellipsis; 89 | white-space: nowrap; 90 | } 91 | 92 | .fixed-table-container thead th .sortable { 93 | cursor: pointer; 94 | background-position: right; 95 | background-repeat: no-repeat; 96 | padding-right: 30px; 97 | } 98 | 99 | .fixed-table-container thead th .both { 100 | background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABMAAAATCAQAAADYWf5HAAAAkElEQVQoz7X QMQ5AQBCF4dWQSJxC5wwax1Cq1e7BAdxD5SL+Tq/QCM1oNiJidwox0355mXnG/DrEtIQ6azioNZQxI0ykPhTQIwhCR+BmBYtlK7kLJYwWCcJA9M4qdrZrd8pPjZWPtOqdRQy320YSV17OatFC4euts6z39GYMKRPCTKY9UnPQ6P+GtMRfGtPnBCiqhAeJPmkqAAAAAElFTkSuQmCC'); 101 | } 102 | 103 | .fixed-table-container thead th .asc { 104 | background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABMAAAATCAYAAAByUDbMAAAAZ0lEQVQ4y2NgGLKgquEuFxBPAGI2ahhWCsS/gDibUoO0gPgxEP8H4ttArEyuQYxAPBdqEAxPBImTY5gjEL9DM+wTENuQahAvEO9DMwiGdwAxOymGJQLxTyD+jgWDxCMZRsEoGAVoAADeemwtPcZI2wAAAABJRU5ErkJggg=='); 105 | } 106 | 107 | .fixed-table-container thead th .desc { 108 | background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABMAAAATCAYAAAByUDbMAAAAZUlEQVQ4y2NgGAWjYBSggaqGu5FA/BOIv2PBIPFEUgxjB+IdQPwfC94HxLykus4GiD+hGfQOiB3J8SojEE9EM2wuSJzcsFMG4ttQgx4DsRalkZENxL+AuJQaMcsGxBOAmGvopk8AVz1sLZgg0bsAAAAASUVORK5CYII= '); 109 | } 110 | 111 | .fixed-table-container th.detail { 112 | width: 30px; 113 | } 114 | 115 | .fixed-table-container tbody td { 116 | border-left: 1px solid #dddddd; 117 | } 118 | 119 | .fixed-table-container tbody tr:first-child td { 120 | border-top: none; 121 | } 122 | 123 | .fixed-table-container tbody td:first-child { 124 | border-left: none; 125 | } 126 | 127 | /* the same color with .active */ 128 | .fixed-table-container tbody .selected td { 129 | background-color: #f5f5f5; 130 | } 131 | 132 | .fixed-table-container .bs-checkbox { 133 | text-align: center; 134 | } 135 | 136 | .fixed-table-container input[type="radio"], 137 | .fixed-table-container input[type="checkbox"] { 138 | margin: 0 auto !important; 139 | } 140 | 141 | .fixed-table-container .no-records-found { 142 | text-align: center; 143 | } 144 | 145 | .fixed-table-pagination div.pagination, 146 | .fixed-table-pagination .pagination-detail { 147 | margin-top: 10px; 148 | margin-bottom: 10px; 149 | } 150 | 151 | .fixed-table-pagination div.pagination .pagination { 152 | margin: 0; 153 | } 154 | 155 | .fixed-table-pagination .pagination a { 156 | padding: 6px 12px; 157 | line-height: 1.428571429; 158 | } 159 | 160 | .fixed-table-pagination .pagination-info { 161 | line-height: 34px; 162 | margin-right: 5px; 163 | } 164 | 165 | .fixed-table-pagination .btn-group { 166 | position: relative; 167 | display: inline-block; 168 | vertical-align: middle; 169 | } 170 | 171 | .fixed-table-pagination .dropup .dropdown-menu { 172 | margin-bottom: 0; 173 | } 174 | 175 | .fixed-table-pagination .page-list { 176 | display: inline-block; 177 | } 178 | 179 | .fixed-table-toolbar .columns-left { 180 | margin-right: 5px; 181 | } 182 | 183 | .fixed-table-toolbar .columns-right { 184 | margin-left: 5px; 185 | } 186 | 187 | .fixed-table-toolbar .columns label { 188 | display: block; 189 | padding: 3px 20px; 190 | clear: both; 191 | font-weight: normal; 192 | line-height: 1.428571429; 193 | } 194 | 195 | .fixed-table-toolbar .bs-bars, 196 | .fixed-table-toolbar .search, 197 | .fixed-table-toolbar .columns { 198 | position: relative; 199 | margin-top: 10px; 200 | margin-bottom: 10px; 201 | line-height: 34px; 202 | } 203 | 204 | .fixed-table-pagination li.disabled a { 205 | pointer-events: none; 206 | cursor: default; 207 | } 208 | 209 | .fixed-table-loading { 210 | display: none; 211 | position: absolute; 212 | top: 42px; 213 | right: 0; 214 | bottom: 0; 215 | left: 0; 216 | z-index: 99; 217 | background-color: #fff; 218 | text-align: center; 219 | } 220 | 221 | .fixed-table-body .card-view .title { 222 | font-weight: bold; 223 | display: inline-block; 224 | min-width: 30%; 225 | text-align: left !important; 226 | } 227 | 228 | /* support bootstrap 2 */ 229 | .fixed-table-body thead th .th-inner { 230 | box-sizing: border-box; 231 | } 232 | 233 | .table th, .table td { 234 | vertical-align: middle; 235 | box-sizing: border-box; 236 | } 237 | 238 | .fixed-table-toolbar .dropdown-menu { 239 | text-align: left; 240 | max-height: 300px; 241 | overflow: auto; 242 | } 243 | 244 | .fixed-table-toolbar .btn-group > .btn-group { 245 | display: inline-block; 246 | margin-left: -1px !important; 247 | } 248 | 249 | .fixed-table-toolbar .btn-group > .btn-group > .btn { 250 | border-radius: 0; 251 | } 252 | 253 | .fixed-table-toolbar .btn-group > .btn-group:first-child > .btn { 254 | border-top-left-radius: 4px; 255 | border-bottom-left-radius: 4px; 256 | } 257 | 258 | .fixed-table-toolbar .btn-group > .btn-group:last-child > .btn { 259 | border-top-right-radius: 4px; 260 | border-bottom-right-radius: 4px; 261 | } 262 | 263 | .bootstrap-table .table > thead > tr > th { 264 | vertical-align: bottom; 265 | border-bottom: 1px solid #ddd; 266 | } 267 | 268 | /* support bootstrap 3 */ 269 | .bootstrap-table .table thead > tr > th { 270 | padding: 0; 271 | margin: 0; 272 | } 273 | 274 | .bootstrap-table .fixed-table-footer tbody > tr > td { 275 | padding: 0 !important; 276 | } 277 | 278 | .bootstrap-table .fixed-table-footer .table { 279 | border-bottom: none; 280 | border-radius: 0; 281 | padding: 0 !important; 282 | } 283 | 284 | .bootstrap-table .pull-right .dropdown-menu { 285 | right: 0; 286 | left: auto; 287 | } 288 | 289 | /* calculate scrollbar width */ 290 | p.fixed-table-scroll-inner { 291 | width: 100%; 292 | height: 200px; 293 | } 294 | 295 | div.fixed-table-scroll-outer { 296 | top: 0; 297 | left: 0; 298 | visibility: hidden; 299 | width: 200px; 300 | height: 150px; 301 | overflow: hidden; 302 | } 303 | 304 | /* for get correct heights */ 305 | .fixed-table-toolbar:after, .fixed-table-pagination:after { 306 | content: ""; 307 | display: block; 308 | clear: both; 309 | } 310 | 311 | .fullscreen { 312 | position: fixed; 313 | top: 0; 314 | left: 0; 315 | z-index: 1050; 316 | width: 100%!important; 317 | background: #FFF; 318 | } 319 | -------------------------------------------------------------------------------- /src/main/resources/static/img/crawler.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenerzhu/proxy-pool/d764fad02bbc85c1781fe31e644e468753bb1e7a/src/main/resources/static/img/crawler.PNG -------------------------------------------------------------------------------- /src/main/resources/static/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenerzhu/proxy-pool/d764fad02bbc85c1781fe31e644e468753bb1e7a/src/main/resources/static/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /src/main/resources/static/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenerzhu/proxy-pool/d764fad02bbc85c1781fe31e644e468753bb1e7a/src/main/resources/static/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /src/main/resources/static/img/home.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenerzhu/proxy-pool/d764fad02bbc85c1781fe31e644e468753bb1e7a/src/main/resources/static/img/home.PNG -------------------------------------------------------------------------------- /src/main/resources/static/js/bootstrap.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap v3.2.0 (http://getbootstrap.com) 3 | * Copyright 2011-2014 Twitter, Inc. 4 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) 5 | */ 6 | if ("undefined" == typeof jQuery) 7 | throw new Error("Bootstrap's JavaScript requires jQuery"); 8 | +function(a) { 9 | "use strict"; 10 | function b() { 11 | var a = document.createElement("bootstrap") 12 | , b = { 13 | WebkitTransition: "webkitTransitionEnd", 14 | MozTransition: "transitionend", 15 | OTransition: "oTransitionEnd otransitionend", 16 | transition: "transitionend" 17 | }; 18 | for (var c in b) 19 | if (void 0 !== a.style[c]) 20 | return { 21 | end: b[c] 22 | }; 23 | return !1 24 | } 25 | a.fn.emulateTransitionEnd = function(b) { 26 | var c = !1 27 | , d = this; 28 | a(this).one("bsTransitionEnd", function() { 29 | c = !0 30 | }); 31 | var e = function() { 32 | c || a(d).trigger(a.support.transition.end) 33 | }; 34 | return setTimeout(e, b), 35 | this 36 | } 37 | , 38 | a(function() { 39 | a.support.transition = b(), 40 | a.support.transition && (a.event.special.bsTransitionEnd = { 41 | bindType: a.support.transition.end, 42 | delegateType: a.support.transition.end, 43 | handle: function(b) { 44 | return a(b.target).is(this) ? b.handleObj.handler.apply(this, arguments) : void 0 45 | } 46 | }) 47 | }) 48 | }(jQuery), 49 | +function(a) { 50 | "use strict"; 51 | function b(b) { 52 | return this.each(function() { 53 | var c = a(this) 54 | , e = c.data("bs.alert"); 55 | e || c.data("bs.alert", e = new d(this)), 56 | "string" == typeof b && e[b].call(c) 57 | }) 58 | } 59 | var c = '[data-dismiss="alert"]' 60 | , d = function(b) { 61 | a(b).on("click", c, this.close) 62 | }; 63 | d.VERSION = "3.2.0", 64 | d.prototype.close = function(b) { 65 | function c() { 66 | f.detach().trigger("closed.bs.alert").remove() 67 | } 68 | var d = a(this) 69 | , e = d.attr("data-target"); 70 | e || (e = d.attr("href"), 71 | e = e && e.replace(/.*(?=#[^\s]*$)/, "")); 72 | var f = a(e); 73 | b && b.preventDefault(), 74 | f.length || (f = d.hasClass("alert") ? d : d.parent()), 75 | f.trigger(b = a.Event("close.bs.alert")), 76 | b.isDefaultPrevented() || (f.removeClass("in"), 77 | a.support.transition && f.hasClass("fade") ? f.one("bsTransitionEnd", c).emulateTransitionEnd(150) : c()) 78 | } 79 | ; 80 | var e = a.fn.alert; 81 | a.fn.alert = b, 82 | a.fn.alert.Constructor = d, 83 | a.fn.alert.noConflict = function() { 84 | return a.fn.alert = e, 85 | this 86 | } 87 | , 88 | a(document).on("click.bs.alert.data-api", c, d.prototype.close) 89 | }(jQuery), 90 | +function(a) { 91 | "use strict"; 92 | function b(b) { 93 | return this.each(function() { 94 | var d = a(this) 95 | , e = d.data("bs.button") 96 | , f = "object" == typeof b && b; 97 | e || d.data("bs.button", e = new c(this,f)), 98 | "toggle" == b ? e.toggle() : b && e.setState(b) 99 | }) 100 | } 101 | var c = function(b, d) { 102 | this.$element = a(b), 103 | this.options = a.extend({}, c.DEFAULTS, d), 104 | this.isLoading = !1 105 | }; 106 | c.VERSION = "3.2.0", 107 | c.DEFAULTS = { 108 | loadingText: "loading..." 109 | }, 110 | c.prototype.setState = function(b) { 111 | var c = "disabled" 112 | , d = this.$element 113 | , e = d.is("input") ? "val" : "html" 114 | , f = d.data(); 115 | b += "Text", 116 | null == f.resetText && d.data("resetText", d[e]()), 117 | d[e](null == f[b] ? this.options[b] : f[b]), 118 | setTimeout(a.proxy(function() { 119 | "loadingText" == b ? (this.isLoading = !0, 120 | d.addClass(c).attr(c, c)) : this.isLoading && (this.isLoading = !1, 121 | d.removeClass(c).removeAttr(c)) 122 | }, this), 0) 123 | } 124 | , 125 | c.prototype.toggle = function() { 126 | var a = !0 127 | , b = this.$element.closest('[data-toggle="buttons"]'); 128 | if (b.length) { 129 | var c = this.$element.find("input"); 130 | "radio" == c.prop("type") && (c.prop("checked") && this.$element.hasClass("active") ? a = !1 : b.find(".active").removeClass("active")), 131 | a && c.prop("checked", !this.$element.hasClass("active")).trigger("change") 132 | } 133 | a && this.$element.toggleClass("active") 134 | } 135 | ; 136 | var d = a.fn.button; 137 | a.fn.button = b, 138 | a.fn.button.Constructor = c, 139 | a.fn.button.noConflict = function() { 140 | return a.fn.button = d, 141 | this 142 | } 143 | , 144 | a(document).on("click.bs.button.data-api", '[data-toggle^="button"]', function(c) { 145 | var d = a(c.target); 146 | d.hasClass("btn") || (d = d.closest(".btn")), 147 | b.call(d, "toggle"), 148 | c.preventDefault() 149 | }) 150 | }(jQuery), 151 | +function(a) { 152 | "use strict"; 153 | function b(b) { 154 | return this.each(function() { 155 | var d = a(this) 156 | , e = d.data("bs.carousel") 157 | , f = a.extend({}, c.DEFAULTS, d.data(), "object" == typeof b && b) 158 | , g = "string" == typeof b ? b : f.slide; 159 | e || d.data("bs.carousel", e = new c(this,f)), 160 | "number" == typeof b ? e.to(b) : g ? e[g]() : f.interval && e.pause().cycle() 161 | }) 162 | } 163 | var c = function(b, c) { 164 | this.$element = a(b).on("keydown.bs.carousel", a.proxy(this.keydown, this)), 165 | this.$indicators = this.$element.find(".carousel-indicators"), 166 | this.options = c, 167 | this.paused = this.sliding = this.interval = this.$active = this.$items = null, 168 | "hover" == this.options.pause && this.$element.on("mouseenter.bs.carousel", a.proxy(this.pause, this)).on("mouseleave.bs.carousel", a.proxy(this.cycle, this)) 169 | }; 170 | c.VERSION = "3.2.0", 171 | c.DEFAULTS = { 172 | interval: 5e3, 173 | pause: "hover", 174 | wrap: !0 175 | }, 176 | c.prototype.keydown = function(a) { 177 | switch (a.which) { 178 | case 37: 179 | this.prev(); 180 | break; 181 | case 39: 182 | this.next(); 183 | break; 184 | default: 185 | return 186 | } 187 | a.preventDefault() 188 | } 189 | , 190 | c.prototype.cycle = function(b) { 191 | return b || (this.paused = !1), 192 | this.interval && clearInterval(this.interval), 193 | this.options.interval && !this.paused && (this.interval = setInterval(a.proxy(this.next, this), this.options.interval)), 194 | this 195 | } 196 | , 197 | c.prototype.getItemIndex = function(a) { 198 | return this.$items = a.parent().children(".item"), 199 | this.$items.index(a || this.$active) 200 | } 201 | , 202 | c.prototype.to = function(b) { 203 | var c = this 204 | , d = this.getItemIndex(this.$active = this.$element.find(".item.active")); 205 | return b > this.$items.length - 1 || 0 > b ? void 0 : this.sliding ? this.$element.one("slid.bs.carousel", function() { 206 | c.to(b) 207 | }) : d == b ? this.pause().cycle() : this.slide(b > d ? "next" : "prev", a(this.$items[b])) 208 | } 209 | , 210 | c.prototype.pause = function(b) { 211 | return b || (this.paused = !0), 212 | this.$element.find(".next, .prev").length && a.support.transition && (this.$element.trigger(a.support.transition.end), 213 | this.cycle(!0)), 214 | this.interval = clearInterval(this.interval), 215 | this 216 | } 217 | , 218 | c.prototype.next = function() { 219 | return this.sliding ? void 0 : this.slide("next") 220 | } 221 | , 222 | c.prototype.prev = function() { 223 | return this.sliding ? void 0 : this.slide("prev") 224 | } 225 | , 226 | c.prototype.slide = function(b, c) { 227 | var d = this.$element.find(".item.active") 228 | , e = c || d[b]() 229 | , f = this.interval 230 | , g = "next" == b ? "left" : "right" 231 | , h = "next" == b ? "first" : "last" 232 | , i = this; 233 | if (!e.length) { 234 | if (!this.options.wrap) 235 | return; 236 | e = this.$element.find(".item")[h]() 237 | } 238 | if (e.hasClass("active")) 239 | return this.sliding = !1; 240 | var j = e[0] 241 | , k = a.Event("slide.bs.carousel", { 242 | relatedTarget: j, 243 | direction: g 244 | }); 245 | if (this.$element.trigger(k), 246 | !k.isDefaultPrevented()) { 247 | if (this.sliding = !0, 248 | f && this.pause(), 249 | this.$indicators.length) { 250 | this.$indicators.find(".active").removeClass("active"); 251 | var l = a(this.$indicators.children()[this.getItemIndex(e)]); 252 | l && l.addClass("active") 253 | } 254 | var m = a.Event("slid.bs.carousel", { 255 | relatedTarget: j, 256 | direction: g 257 | }); 258 | return a.support.transition && this.$element.hasClass("slide") ? (e.addClass(b), 259 | e[0].offsetWidth, 260 | d.addClass(g), 261 | e.addClass(g), 262 | d.one("bsTransitionEnd", function() { 263 | e.removeClass([b, g].join(" ")).addClass("active"), 264 | d.removeClass(["active", g].join(" ")), 265 | i.sliding = !1, 266 | setTimeout(function() { 267 | i.$element.trigger(m) 268 | }, 0) 269 | }).emulateTransitionEnd(1e3 * d.css("transition-duration").slice(0, -1))) : (d.removeClass("active"), 270 | e.addClass("active"), 271 | this.sliding = !1, 272 | this.$element.trigger(m)), 273 | f && this.cycle(), 274 | this 275 | } 276 | } 277 | ; 278 | var d = a.fn.carousel; 279 | a.fn.carousel = b, 280 | a.fn.carousel.Constructor = c, 281 | a.fn.carousel.noConflict = function() { 282 | return a.fn.carousel = d, 283 | this 284 | } 285 | , 286 | a(document).on("click.bs.carousel.data-api", "[data-slide], [data-slide-to]", function(c) { 287 | var d, e = a(this), f = a(e.attr("data-target") || (d = e.attr("href")) && d.replace(/.*(?=#[^\s]+$)/, "")); 288 | if (f.hasClass("carousel")) { 289 | var g = a.extend({}, f.data(), e.data()) 290 | , h = e.attr("data-slide-to"); 291 | h && (g.interval = !1), 292 | b.call(f, g), 293 | h && f.data("bs.carousel").to(h), 294 | c.preventDefault() 295 | } 296 | }), 297 | a(window).on("load", function() { 298 | a('[data-ride="carousel"]').each(function() { 299 | var c = a(this); 300 | b.call(c, c.data()) 301 | }) 302 | }) 303 | }(jQuery), 304 | +function(a) { 305 | "use strict"; 306 | function b(b) { 307 | return this.each(function() { 308 | var d = a(this) 309 | , e = d.data("bs.collapse") 310 | , f = a.extend({}, c.DEFAULTS, d.data(), "object" == typeof b && b); 311 | !e && f.toggle && "show" == b && (b = !b), 312 | e || d.data("bs.collapse", e = new c(this,f)), 313 | "string" == typeof b && e[b]() 314 | }) 315 | } 316 | var c = function(b, d) { 317 | this.$element = a(b), 318 | this.options = a.extend({}, c.DEFAULTS, d), 319 | this.transitioning = null, 320 | this.options.parent && (this.$parent = a(this.options.parent)), 321 | this.options.toggle && this.toggle() 322 | }; 323 | c.VERSION = "3.2.0", 324 | c.DEFAULTS = { 325 | toggle: !0 326 | }, 327 | c.prototype.dimension = function() { 328 | var a = this.$element.hasClass("width"); 329 | return a ? "width" : "height" 330 | } 331 | , 332 | c.prototype.show = function() { 333 | if (!this.transitioning && !this.$element.hasClass("in")) { 334 | var c = a.Event("show.bs.collapse"); 335 | if (this.$element.trigger(c), 336 | !c.isDefaultPrevented()) { 337 | var d = this.$parent && this.$parent.find("> .panel > .in"); 338 | if (d && d.length) { 339 | var e = d.data("bs.collapse"); 340 | if (e && e.transitioning) 341 | return; 342 | b.call(d, "hide"), 343 | e || d.data("bs.collapse", null) 344 | } 345 | var f = this.dimension(); 346 | this.$element.removeClass("collapse").addClass("collapsing")[f](0), 347 | this.transitioning = 1; 348 | var g = function() { 349 | this.$element.removeClass("collapsing").addClass("collapse in")[f](""), 350 | this.transitioning = 0, 351 | this.$element.trigger("shown.bs.collapse") 352 | }; 353 | if (!a.support.transition) 354 | return g.call(this); 355 | var h = a.camelCase(["scroll", f].join("-")); 356 | this.$element.one("bsTransitionEnd", a.proxy(g, this)).emulateTransitionEnd(350)[f](this.$element[0][h]) 357 | } 358 | } 359 | } 360 | , 361 | c.prototype.hide = function() { 362 | if (!this.transitioning && this.$element.hasClass("in")) { 363 | var b = a.Event("hide.bs.collapse"); 364 | if (this.$element.trigger(b), 365 | !b.isDefaultPrevented()) { 366 | var c = this.dimension(); 367 | this.$element[c](this.$element[c]())[0].offsetHeight, 368 | this.$element.addClass("collapsing").removeClass("collapse").removeClass("in"), 369 | this.transitioning = 1; 370 | var d = function() { 371 | this.transitioning = 0, 372 | this.$element.trigger("hidden.bs.collapse").removeClass("collapsing").addClass("collapse") 373 | }; 374 | return a.support.transition ? void this.$element[c](0).one("bsTransitionEnd", a.proxy(d, this)).emulateTransitionEnd(350) : d.call(this) 375 | } 376 | } 377 | } 378 | , 379 | c.prototype.toggle = function() { 380 | this[this.$element.hasClass("in") ? "hide" : "show"]() 381 | } 382 | ; 383 | var d = a.fn.collapse; 384 | a.fn.collapse = b, 385 | a.fn.collapse.Constructor = c, 386 | a.fn.collapse.noConflict = function() { 387 | return a.fn.collapse = d, 388 | this 389 | } 390 | , 391 | a(document).on("click.bs.collapse.data-api", '[data-toggle="collapse"]', function(c) { 392 | var d, e = a(this), f = e.attr("data-target") || c.preventDefault() || (d = e.attr("href")) && d.replace(/.*(?=#[^\s]+$)/, ""), g = a(f), h = g.data("bs.collapse"), i = h ? "toggle" : e.data(), j = e.attr("data-parent"), k = j && a(j); 393 | h && h.transitioning || (k && k.find('[data-toggle="collapse"][data-parent="' + j + '"]').not(e).addClass("collapsed"), 394 | e[g.hasClass("in") ? "addClass" : "removeClass"]("collapsed")), 395 | b.call(g, i) 396 | }) 397 | }(jQuery), 398 | +function(a) { 399 | "use strict"; 400 | function b(b) { 401 | b && 3 === b.which || (a(e).remove(), 402 | a(f).each(function() { 403 | var d = c(a(this)) 404 | , e = { 405 | relatedTarget: this 406 | }; 407 | d.hasClass("open") && (d.trigger(b = a.Event("hide.bs.dropdown", e)), 408 | b.isDefaultPrevented() || d.removeClass("open").trigger("hidden.bs.dropdown", e)) 409 | })) 410 | } 411 | function c(b) { 412 | var c = b.attr("data-target"); 413 | c || (c = b.attr("href"), 414 | c = c && /#[A-Za-z]/.test(c) && c.replace(/.*(?=#[^\s]*$)/, "")); 415 | var d = c && a(c); 416 | return d && d.length ? d : b.parent() 417 | } 418 | function d(b) { 419 | return this.each(function() { 420 | var c = a(this) 421 | , d = c.data("bs.dropdown"); 422 | d || c.data("bs.dropdown", d = new g(this)), 423 | "string" == typeof b && d[b].call(c) 424 | }) 425 | } 426 | var e = ".dropdown-backdrop" 427 | , f = '[data-toggle="dropdown"]' 428 | , g = function(b) { 429 | a(b).on("click.bs.dropdown", this.toggle) 430 | }; 431 | g.VERSION = "3.2.0", 432 | g.prototype.toggle = function(d) { 433 | var e = a(this); 434 | if (!e.is(".disabled, :disabled")) { 435 | var f = c(e) 436 | , g = f.hasClass("open"); 437 | if (b(), 438 | !g) { 439 | "ontouchstart"in document.documentElement && !f.closest(".navbar-nav").length && a('