proxies = new ArrayList<>(100);
92 |
93 |
94 | public ProxyRecheckHandlerThread(String threadName) {
95 | this.threadName = threadName;
96 | }
97 |
98 | /**
99 | * When an object implementing interface Runnable
is used
100 | * to create a thread, starting the thread causes the object's
101 | * run
method to be called in that separately executing
102 | * thread.
103 | *
104 | * The general contract of the method run
is that it may
105 | * take any action whatsoever.
106 | *
107 | * @see Thread#run()
108 | */
109 | @Override
110 | public void run() {
111 | while (true) {
112 | doReCheckProxies();
113 | }
114 | }
115 |
116 | public void doReCheckProxies() {
117 | ProxyCheck proxyCheck = ProxyCheck.getInstance();
118 | try {
119 | while (!queueIsEmpty) {
120 | Proxy proxy = proxyBlockingDeque.poll();
121 | if (proxy != null) {
122 | long begin = System.currentTimeMillis();
123 | boolean valid = proxyCheck.checkProxyBySocket(new HttpHost(proxy.getIp(), proxy.getPort()), false);
124 | long end = System.currentTimeMillis();
125 | proxy.setValid(valid);
126 | if (!valid) {
127 | proxy.setInvalidTime(end);
128 | if (proxy.getLastSurviveTime() == null || proxy.getLastSurviveTime() <= 0) {
129 | proxy.setLastSurviveTime(end - proxy.getCheckTime());
130 | }
131 | }
132 | proxy.setCheckStatus(1);
133 | proxy.setCheckTime(begin);
134 | proxy.setResponseTime(end - begin);
135 | if (proxy.getLastSurviveTime() == null) {
136 | proxy.setLastSurviveTime(-1L);
137 | }
138 | if (proxy.getInvalidTime() == null) {
139 | proxy.setInvalidTime(-1L);
140 | }
141 | if (proxy.getValidTime() == null) {
142 | proxy.setValidTime(1);
143 | } else {
144 | proxy.setValidTime(proxy.getValidTime() + 1);
145 | }
146 |
147 | proxies.add(proxy);
148 | if (proxies.size() >= BATCH_UPDATE_SIEZ) {
149 | int size = proxies.size();
150 | proxyService.updateProxies(proxies);
151 | proxies.clear();
152 | LOG.info("批量检测代理成功!数量:" + size);
153 | }
154 | // LOG.info(this.threadName + " 校验代理结果>>> " + proxy.getIp() + ":" + proxy.getProtocolType() + ",是否有效: " + proxy.isValid());
155 | }
156 | }
157 | if (CollectionUtils.isNotEmpty(proxies)) {
158 | proxyService.updateProxies(proxies);
159 | proxies.clear();
160 | }
161 | Thread.sleep(10000);
162 | } catch (Exception e) {
163 | LOG.warn("检验代理异常:", e);
164 | }
165 | }
166 |
167 | }
168 |
169 | }
170 |
--------------------------------------------------------------------------------
/src/main/java/com/meow/proxy/check/ProxyRecheckSender.java:
--------------------------------------------------------------------------------
1 | package com.meow.proxy.check;
2 |
3 | import com.meow.proxy.entity.Proxy;
4 | import org.slf4j.Logger;
5 | import org.slf4j.LoggerFactory;
6 | import org.springframework.beans.factory.annotation.Autowired;
7 | import org.springframework.stereotype.Component;
8 |
9 | import java.util.List;
10 |
11 | /**
12 | * @author Alex
13 | * date:2017/12/19
14 | * email:jwnie@foxmail.com
15 | */
16 | @Component
17 | public class ProxyRecheckSender implements ProxyRecheckCallBack {
18 | private final static Logger LOG = LoggerFactory.getLogger(ProxyRecheckSender.class);
19 |
20 | @Autowired
21 | ProxyRecheckHandler proxyRecheckHandler;
22 |
23 | public void sendRecheckProxies(List proxyList){
24 | proxyRecheckHandler.handleMessage(this,proxyList);
25 | }
26 |
27 |
28 | @Override
29 | public void process(String handleStatus) {
30 | LOG.info("代理重新检测状态:"+handleStatus);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/meow/proxy/configure/Configure.java:
--------------------------------------------------------------------------------
1 | package com.meow.proxy.configure;
2 |
3 | import org.springframework.beans.factory.annotation.Value;
4 | import org.springframework.stereotype.Component;
5 |
6 | /**
7 | * Created by Jwnie on 2017/12/17.
8 | */
9 | @Component
10 | public class Configure {
11 | @Value("${com.meow.proxy.configure.chromedriver.path}")
12 | private String chromeDriverPath;
13 |
14 | public String getChromeDriverPath()
15 | {
16 | return this.chromeDriverPath;
17 | }
18 |
19 | public void setChromeDriverPath(String chromeDriverPath) {
20 | this.chromeDriverPath = chromeDriverPath;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/meow/proxy/configure/ScheduleConfig.java:
--------------------------------------------------------------------------------
1 | package com.meow.proxy.configure;
2 |
3 | import org.springframework.context.annotation.Bean;
4 | import org.springframework.context.annotation.Configuration;
5 | import org.springframework.scheduling.annotation.EnableScheduling;
6 | import org.springframework.scheduling.annotation.SchedulingConfigurer;
7 | import org.springframework.scheduling.config.ScheduledTaskRegistrar;
8 |
9 | import java.util.concurrent.Executor;
10 | import java.util.concurrent.Executors;
11 |
12 | /**
13 | * Springboot本身默认的执行方式是串行执行,使用线程池使之并行
14 | * @author Alex
15 | * date:2017/12/19
16 | * email:jwnie@foxmail.com
17 | */
18 | @Configuration
19 | @EnableScheduling
20 | public class ScheduleConfig implements SchedulingConfigurer {
21 | @Override
22 | public void configureTasks(ScheduledTaskRegistrar scheduledTaskRegistrar) {
23 | scheduledTaskRegistrar.setScheduler(taskExecutor());
24 | }
25 |
26 | @Bean(destroyMethod="shutdown")
27 | public Executor taskExecutor() {
28 | return Executors.newScheduledThreadPool(10);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/com/meow/proxy/configure/TaskHolder.java:
--------------------------------------------------------------------------------
1 | package com.meow.proxy.configure;
2 |
3 | import com.meow.proxy.entity.Task;
4 | import com.meow.proxy.enums.ProxySite;
5 |
6 | import java.util.ArrayList;
7 | import java.util.List;
8 |
9 | /**
10 | * 待爬取代理网站的配置
11 | * Created by Jwnie on 2017/12/17.
12 | */
13 | public class TaskHolder {
14 | private static TaskHolder ourInstance = new TaskHolder();
15 |
16 | public static TaskHolder getInstance() {
17 | return ourInstance;
18 | }
19 |
20 | private List taskList = new ArrayList<>(50);
21 |
22 | private TaskHolder() {
23 | this.taskList.add(new Task("http://www.xicidaili.com/", true, 2, "xicidailiDownLoader", "xicidailiExtractor", ProxySite.xicidaili.getProxySiteName()));
24 | this.taskList.add(new Task("http://www.goubanjia.com/", true, 10, "goubanjiaDownLoader", "goubanjiaExtractor", ProxySite.goubanjia.getProxySiteName()));
25 | this.taskList.add(new Task("http://www.ip3366.net", true, 4, "ip3366DownLoader", "ip3366Extractor", ProxySite.ip3366.getProxySiteName()));
26 | this.taskList.add(new Task("http://www.data5u.com/", true, 10, "data5uDownLoader", "data5uExtractor", ProxySite.data5u.getProxySiteName()));
27 | this.taskList.add(new Task("http://www.xdaili.cn/ipagent/freeip/getFreeIps", false, 1, "baseDownLoader", "xdailiExtractor", ProxySite.xdaili.getProxySiteName()));
28 | this.taskList.add(new Task("http://www.nianshao.me/", true, 8, "nianshaoDownLoader", "nianshaoExtractor", ProxySite.nianshao.getProxySiteName()));
29 | this.taskList.add(new Task("http://proxydb.net/", true, 6, "proxydbDownLoader", "proxydbExtractor", ProxySite.proxydb.getProxySiteName()));
30 | this.taskList.add(new Task("http://www.kxdaili.com/dailiip.html", true, 8, "kxdailiDownLoader", "kxdailiExtractor", ProxySite.kxdaili.getProxySiteName()));
31 | this.taskList.add(new Task("https://proxy.coderbusy.com/", true, 6, "coderbusyDownLoader", "coderbusyExtractor", ProxySite.coderbusy.getProxySiteName()));
32 |
33 | //境外的代理網站(部分url需要VPN)
34 | this.taskList.add(new Task("https://free-proxy-list.net", false, 1, "freeProxyListDownLoader", "freeProxyListExtractor", ProxySite.freeProxyList.getProxySiteName()));
35 | }
36 |
37 | public List getTaskList() {
38 | return taskList;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/com/meow/proxy/controller/ProxyControllor.java:
--------------------------------------------------------------------------------
1 | package com.meow.proxy.controller;
2 |
3 | import com.alibaba.fastjson.JSONArray;
4 | import com.meow.proxy.entity.Proxy;
5 | import com.meow.proxy.entity.ProxyQueryResult;
6 | import com.meow.proxy.service.ProxyService;
7 | import org.apache.commons.collections.CollectionUtils;
8 | import org.apache.ibatis.annotations.Param;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | import org.springframework.beans.factory.annotation.Autowired;
12 | import org.springframework.web.bind.annotation.RequestMapping;
13 | import org.springframework.web.bind.annotation.RequestMethod;
14 | import org.springframework.web.bind.annotation.RestController;
15 |
16 | import java.util.ArrayList;
17 | import java.util.List;
18 | import java.util.Map;
19 |
20 | /**
21 | * @author Alex
22 | * date:2017/12/20
23 | * email:jwnie@foxmail.com
24 | */
25 | @RestController
26 | @RequestMapping(value = "/proxy")
27 | public class ProxyControllor {
28 | private final static Logger LOG = LoggerFactory.getLogger(ProxyControllor.class);
29 |
30 | @Autowired
31 | ProxyService proxyService;
32 |
33 | @RequestMapping(value = "/getProxy", method = RequestMethod.GET)
34 | public ProxyQueryResult getProxy(@Param("protocolType") String protocolType, @Param("isDemostic") String isDemostic, @Param("anonymousType") String anonymousType) {
35 | ProxyQueryResult proxyQueryResult = new ProxyQueryResult();
36 | List proxies = new ArrayList();
37 | try {
38 | proxies = proxyService.queryProxy(protocolType, isDemostic, anonymousType);
39 | if (CollectionUtils.isNotEmpty(proxies)) {
40 | int totalCount = proxyService.queryValidProxyCount(protocolType,isDemostic,anonymousType);
41 | proxyQueryResult.setTotalProxyCount(totalCount);
42 | proxyQueryResult.setProxies(proxies);
43 | proxyQueryResult.setResProxyCount(proxies.size());
44 | }
45 | proxyQueryResult.setStatus("success");
46 | } catch (Exception e) {
47 | LOG.error("查询代理异常:", e);
48 | proxyQueryResult.setProxies(proxies);
49 | proxyQueryResult.setResProxyCount(proxies.size());
50 | proxyQueryResult.setStatus("failed");
51 | }
52 | return proxyQueryResult;
53 | }
54 |
55 | @RequestMapping(value = "proxyStatistic", method = RequestMethod.GET)
56 | public JSONArray proxyStatistic() {
57 | JSONArray js = new JSONArray();
58 | try {
59 | List