├── ip代理与定点爬取(重构)
├── target
│ ├── classes
│ │ ├── db-config.properties
│ │ ├── IPModel
│ │ │ ├── IPMessage.class
│ │ │ └── SerializeUtil.class
│ │ ├── database
│ │ │ ├── MyRedis.class
│ │ │ └── RedisDB.class
│ │ ├── htmlparse
│ │ │ ├── IPPool.class
│ │ │ ├── IPThread.class
│ │ │ └── URLFecter.class
│ │ ├── ipfilter
│ │ │ ├── IPFilter.class
│ │ │ └── IPUtils.class
│ │ ├── timeutils
│ │ │ ├── MyTimer.class
│ │ │ └── MyTimeJob.class
│ │ └── httpbrowser
│ │ │ └── MyHttpResponse.class
│ └── test-classes
│ │ ├── testRedis.class
│ │ └── testTimer.class
├── src
│ ├── main
│ │ ├── resources
│ │ │ └── db-config.properties
│ │ └── java
│ │ │ ├── .idea
│ │ │ ├── copyright
│ │ │ │ └── profiles_settings.xml
│ │ │ ├── modules.xml
│ │ │ ├── compiler.xml
│ │ │ ├── misc.xml
│ │ │ └── src.iml
│ │ │ ├── timeutils
│ │ │ ├── MyTimer.java
│ │ │ └── MyTimeJob.java
│ │ │ ├── htmlparse
│ │ │ ├── IPThread.java
│ │ │ ├── IPPool.java
│ │ │ └── URLFecter.java
│ │ │ ├── ipfilter
│ │ │ ├── IPFilter.java
│ │ │ └── IPUtils.java
│ │ │ ├── database
│ │ │ ├── MyRedis.java
│ │ │ └── RedisDB.java
│ │ │ ├── IPModel
│ │ │ ├── IPMessage.java
│ │ │ └── SerializeUtil.java
│ │ │ └── httpbrowser
│ │ │ └── MyHttpResponse.java
│ └── test
│ │ └── java
│ │ ├── testTimer.java
│ │ └── testRedis.java
├── .idea
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── kotlinc.xml
│ ├── modules.xml
│ ├── misc.xml
│ ├── libraries
│ │ ├── Maven__redis_clients_jedis_2_9_0.xml
│ │ └── Maven__org_apache_commons_commons_pool2_2_4_2.xml
│ ├── compiler.xml
│ ├── dataSources.local.xml
│ ├── dataSources.xml
│ ├── dataSources
│ │ └── c37050e9-c728-46b8-9a05-b8b36e206d80.xml
│ └── uiDesigner.xml
├── README.md
├── pom.xml
└── ip代理与定点爬取.iml
├── ip代理与定点爬取
├── .idea
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── modules.xml
│ ├── compiler.xml
│ ├── dataSources.local.xml
│ ├── dataSources.xml
│ ├── misc.xml
│ └── dataSources
│ │ └── 7bc77221-9c0f-4103-8fcc-36aa3de003b6.xml
├── src
│ ├── .idea
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── modules.xml
│ │ ├── compiler.xml
│ │ ├── misc.xml
│ │ └── src.iml
│ ├── ipfilter
│ │ ├── IPFilter.java
│ │ └── IPUtils.java
│ ├── IPModel
│ │ ├── IPMessage.java
│ │ └── DatabaseMessage.java
│ ├── timeutils
│ │ ├── TimeUpdate.java
│ │ └── MyTimeJob.java
│ ├── htmlparse
│ │ └── URLFecter.java
│ ├── database
│ │ └── DataBaseDemo.java
│ └── httpbrowser
│ │ └── HttpResponseDemo.java
└── ip代理与定点爬取.iml
└── README.md
/ip代理与定点爬取(重构)/target/classes/db-config.properties:
--------------------------------------------------------------------------------
1 | jedis.addr=127.0.0.1
2 | jedis.port=6379
3 | jedis.passwd=6204576387
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/resources/db-config.properties:
--------------------------------------------------------------------------------
1 | jedis.addr=127.0.0.1
2 | jedis.port=6379
3 | jedis.passwd=********
4 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/test-classes/testRedis.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/test-classes/testRedis.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/test-classes/testTimer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/test-classes/testTimer.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/IPModel/IPMessage.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/IPModel/IPMessage.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/database/MyRedis.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/database/MyRedis.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/database/RedisDB.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/database/RedisDB.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/htmlparse/IPPool.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/htmlparse/IPPool.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/ipfilter/IPFilter.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/ipfilter/IPFilter.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/ipfilter/IPUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/ipfilter/IPUtils.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/timeutils/MyTimer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/timeutils/MyTimer.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/htmlparse/IPThread.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/htmlparse/IPThread.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/htmlparse/URLFecter.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/htmlparse/URLFecter.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/timeutils/MyTimeJob.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/timeutils/MyTimeJob.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/IPModel/SerializeUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/IPModel/SerializeUtil.class
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/target/classes/httpbrowser/MyHttpResponse.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/httpbrowser/MyHttpResponse.class
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ip-proxy-pools-regularly
2 | 实现定时爬取与IP代理池
3 |
4 | **更详细的README.md请参考:**[ip-proxy-pools-regularly](https://github.com/championheng/ip-proxy-pools-regularly/tree/master/ip%E4%BB%A3%E7%90%86%E4%B8%8E%E5%AE%9A%E7%82%B9%E7%88%AC%E5%8F%96(%E9%87%8D%E6%9E%84))
5 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/kotlinc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/libraries/Maven__redis_clients_jedis_2_9_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/timeutils/MyTimer.java:
--------------------------------------------------------------------------------
1 | package timeutils;
2 |
3 | import java.util.Calendar;
4 | import java.util.Date;
5 | import java.util.Timer;
6 |
7 | /**
8 | * Created by paranoid on 17-4-13.
9 | */
10 |
11 | public class MyTimer {
12 | public static void main(String[] args) {
13 | MyTimeJob job = new MyTimeJob();
14 | Timer timer = new Timer();
15 |
16 | Calendar calendar = Calendar.getInstance();
17 | Date date = calendar.getTime();
18 |
19 | //设置定时任务,从现在开始,每24小时执行一次
20 | timer.schedule(job, date, 24*60*60*1000);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/libraries/Maven__org_apache_commons_commons_pool2_2_4_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/test/java/testTimer.java:
--------------------------------------------------------------------------------
1 | import java.util.Calendar;
2 | import java.util.Date;
3 |
4 | import static java.lang.System.out;
5 |
6 | /**
7 | * Created by hg_yi on 17-8-9.
8 | */
9 | public class testTimer {
10 | public static void main(String[] args) {
11 | Calendar calendar = Calendar.getInstance();
12 | int year = calendar.get(Calendar.YEAR);
13 | int month = calendar.get(Calendar.MONTH);
14 | int day = calendar.get(Calendar.DAY_OF_MONTH);
15 |
16 | //设置任务开始执行时间
17 | calendar.set(year,month,day, 6, 0, 0);
18 | Date date = calendar.getTime();
19 |
20 | out.println(date);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/htmlparse/IPThread.java:
--------------------------------------------------------------------------------
1 | package htmlparse;
2 |
3 | import java.util.List;
4 |
5 | import static java.lang.System.out;
6 |
7 | /**
8 | * Created by hg_yi on 17-8-8.
9 | */
10 | public class IPThread extends Thread {
11 | private List urls;
12 | private IPPool ipPool;
13 |
14 | public IPThread(List urls, IPPool ipPool) {
15 | this.urls = urls;
16 | this.ipPool = ipPool;
17 | }
18 |
19 | @Override
20 | public void run() {
21 | //进行ip的抓取
22 | for (String url : urls) {
23 | out.println(Thread.currentThread().getName() + "爬取的地址为:" + url);
24 | }
25 | ipPool.getIP(urls);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/dataSources.local.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #@
7 | `
8 |
9 |
10 | master_key
11 | root
12 | mysql:
13 |
14 |
15 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/dataSources.local.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #@
7 | `
8 |
9 |
10 | master_key
11 | root
12 | *:IPProxyPool
13 |
14 |
15 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3306/mysql
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3306/IPProxyPool
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/test/java/testRedis.java:
--------------------------------------------------------------------------------
1 | import IPModel.IPMessage;
2 | import database.MyRedis;
3 |
4 | import java.util.ArrayList;
5 | import java.util.List;
6 |
7 | import static java.lang.System.out;
8 |
9 | /**
10 | * Created by hg_yi on 17-8-9.
11 | */
12 | public class testRedis {
13 | public static void main(String[] args) {
14 | List ipMessages = new ArrayList<>();
15 | // IPMessage ipMessage = new IPMessage();
16 |
17 | // ipMessage.setIPAddress("175.172.212.178");
18 | // ipMessage.setIPPort("80");
19 | // ipMessage.setIPType("HTTPS");
20 | // ipMessage.setIPSpeed("3.837秒");
21 | //
22 | // ipMessages.add(ipMessage);
23 | //
24 | MyRedis redis = new MyRedis();
25 | // redis.setIPToList(ipMessages);
26 | IPMessage ipMessage = redis.getIPByList();
27 |
28 | out.println(ipMessage.getIPAddress());
29 | out.println(ipMessage.getIPPort());
30 |
31 | redis.close();
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/ipfilter/IPFilter.java:
--------------------------------------------------------------------------------
1 | package ipfilter;
2 |
3 | import IPModel.IPMessage;
4 |
5 | import java.util.*;
6 |
7 | import static java.lang.System.out;
8 |
9 | /**
10 | * Created by paranoid on 17-4-14.
11 | * 对得到的IP进行筛选,将IP速度在两秒以内的并且类型是https的留下,其余删除
12 | */
13 |
14 | public class IPFilter {
15 | //对IP进行过滤
16 | public static List Filter(List ipMessages1) {
17 | List newIPMessages = new ArrayList<>();
18 |
19 | for (int i = 0; i < ipMessages1.size(); i++) {
20 | String ipType = ipMessages1.get(i).getIPType();
21 | String ipSpeed = ipMessages1.get(i).getIPSpeed();
22 |
23 | ipSpeed = ipSpeed.substring(0, ipSpeed.indexOf('秒'));
24 | double Speed = Double.parseDouble(ipSpeed);
25 |
26 | if (ipType.equals("HTTPS") && Speed <= 2.0) {
27 | newIPMessages.add(ipMessages1.get(i));
28 | }
29 | }
30 |
31 | return newIPMessages;
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/README.md:
--------------------------------------------------------------------------------
1 | # IP代理池(ip-proxy-pools)
2 | 平时在写爬虫的时候,最怕的事情就是IP封禁。这是我自制的一个IP代理池,使用Java语言进行编写,结合Redis数据库对代理IP进行存储。被抓取的代理IP来源于[xici代理网](http://www.xicidaili.com/)。
3 |
4 | **对于技术上的实现细节,参考本人所写的博客链接**:
5 | [Java网络爬虫(十一)--重构定时爬取以及IP代理池(多线程+Redis+代码优化)](http://blog.csdn.net/championhengyi/article/details/77053448)
6 |
7 | ## 环境需求
8 | >- JDK 1.8
9 | >- Redis 3.0.6
10 | >- IDEA
11 | >- Maven
12 |
13 | ## 实现架构
14 | 
15 |
16 | ## 使用说明
17 | 要使用此IP代理池,只能将本项目clone至本地,然后使用IDEA运行源代码。运行结果如下图:
18 |
19 | 
20 |
21 | 就目前来说,如果想要真正的将此IP代理池运用到其它工程中,还需要对代码做额外的补充,最基本也要考虑使用`通知/等待机制`。
22 |
23 | 对于将此IP代理池如何运用到一个工程中,可以参考:[multithreading-crawlers](https://github.com/championheng/multithreading-crawlers)
24 |
25 | ## TODO
26 | 1. 优化任务分配策略
27 | 2. 对外提供接口与使用文档
28 | 3. 可视化管理... ...
29 |
30 | **注:此IP代理池真正运用在工程中的版本,[multithreading-crawlers](https://github.com/championheng/multithreading-crawlers),可以称为第三版,与此版本差别还是挺大的,对于版本3,我会尽快push到这个仓库中... ...**
31 |
32 | ## 版本说明
33 | 
34 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | groupId
8 | ip代理与定点爬取
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 1.7
17 | 1.7
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | redis.clients
26 | jedis
27 | 2.9.0
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/ipfilter/IPFilter.java:
--------------------------------------------------------------------------------
1 | package ipfilter;
2 |
3 | import IPModel.IPMessage;
4 |
5 | import java.util.*;
6 |
7 | import static java.lang.System.loadLibrary;
8 | import static java.lang.System.out;
9 |
10 | /**
11 | * Created by paranoid on 17-4-14.
12 | * 对于Java已经规定的常用的类如String我们不可能对它进行重新编译,在不能使用Comparable
13 | * 的情况下我们需要自己操作Comparator,重新定义它的compare方法.
14 | *
15 | * String的compareTo方法自动升序排列.
16 | */
17 |
18 | public class IPFilter {
19 | //对IP进行过滤,选取1000个IP中速度排名前六百的IP(升序),其余的舍弃
20 | public static List Filter(List list) {
21 | List newlist = new ArrayList<>();
22 |
23 | Collections.sort(list, new Comparator() {
24 | @Override
25 | public int compare(IPMessage o1, IPMessage o2) {
26 | return o1.getIPSpeed().compareTo(o2.getIPSpeed());
27 | }
28 | });
29 |
30 | //只返回容器中前100的对象
31 | for(int i = 0; i < list.size(); i++) {
32 | if(i < 100) {
33 | newlist.add(list.get(i));
34 | }else {
35 | break;
36 | }
37 | }
38 |
39 | return newlist;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/IPModel/IPMessage.java:
--------------------------------------------------------------------------------
1 | package IPModel;
2 |
3 | /**
4 | * Created by paranoid on 17-4-10.
5 | */
6 | public class IPMessage {
7 | private String IPAddress;
8 | private String IPPort;
9 | private String ServerAddress;
10 | private String IPType;
11 | private String IPSpeed;
12 |
13 | public String getIPAddress() {
14 | return IPAddress;
15 | }
16 |
17 | public void setIPAddress(String IPAddress) {
18 | this.IPAddress = IPAddress;
19 | }
20 |
21 | public String getIPPort() {
22 | return IPPort;
23 | }
24 |
25 | public void setIPPort(String IPPort) {
26 | this.IPPort = IPPort;
27 | }
28 |
29 | public String getServerAddress() {
30 | return ServerAddress;
31 | }
32 |
33 | public void setServerAddress(String serverAddress) {
34 | ServerAddress = serverAddress;
35 | }
36 |
37 | public String getIPType() {
38 | return IPType;
39 | }
40 |
41 | public void setIPType(String IPType) {
42 | this.IPType = IPType;
43 | }
44 |
45 | public String getIPSpeed() {
46 | return IPSpeed;
47 | }
48 |
49 | public void setIPSpeed(String IPSpeed) {
50 | this.IPSpeed = IPSpeed;
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/database/MyRedis.java:
--------------------------------------------------------------------------------
1 | package database;
2 |
3 | import IPModel.IPMessage;
4 | import IPModel.SerializeUtil;
5 | import redis.clients.jedis.Jedis;
6 |
7 | import java.util.List;
8 |
9 | import static java.lang.System.out;
10 |
11 | /**
12 | * Created by hg_yi on 17-8-9.
13 | */
14 | public class MyRedis {
15 | Jedis jedis = RedisDB.getJedis();
16 |
17 | //将ip信息保存在Redis列表中
18 | public void setIPToList(List ipMessages) {
19 | for (IPMessage ipMessage : ipMessages) {
20 | //首先将ipMessage进行序列化
21 | byte[] bytes = SerializeUtil.serialize(ipMessage);
22 |
23 | jedis.rpush("IPPool".getBytes(), bytes);
24 | }
25 | }
26 |
27 | //将Redis中保存的对象进行反序列化
28 | public IPMessage getIPByList() {
29 | int rand = (int)(Math.random()*jedis.llen("IPPool"));
30 |
31 | Object o = SerializeUtil.unserialize(jedis.lindex("IPPool".getBytes(), 0));
32 | if (o instanceof IPMessage) {
33 | return (IPMessage)o;
34 | } else {
35 | out.println("不是IPMessage的一个实例~");
36 | return null;
37 | }
38 | }
39 |
40 | public void deleteKey(String key) {
41 | jedis.del(key);
42 | }
43 |
44 | public void close() {
45 | RedisDB.close(jedis);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/IPModel/DatabaseMessage.java:
--------------------------------------------------------------------------------
1 | package IPModel;
2 |
3 | /**
4 | * Created by paranoid on 17-4-21.
5 | */
6 | public class DatabaseMessage {
7 | private String id;
8 | private String IPAddress;
9 | private String IPPort;
10 | private String ServerAddress;
11 | private String IPType;
12 | private String IPSpeed;
13 |
14 | public String getId() {
15 | return id;
16 | }
17 |
18 | public void setId(String id) {
19 | this.id = id;
20 | }
21 |
22 | public String getIPAddress() {
23 | return IPAddress;
24 | }
25 |
26 | public void setIPAddress(String IPAddress) {
27 | this.IPAddress = IPAddress;
28 | }
29 |
30 | public String getIPPort() {
31 | return IPPort;
32 | }
33 |
34 | public void setIPPort(String IPPort) {
35 | this.IPPort = IPPort;
36 | }
37 |
38 | public String getServerAddress() {
39 | return ServerAddress;
40 | }
41 |
42 | public void setServerAddress(String serverAddress) {
43 | ServerAddress = serverAddress;
44 | }
45 |
46 | public String getIPType() {
47 | return IPType;
48 | }
49 |
50 | public void setIPType(String IPType) {
51 | this.IPType = IPType;
52 | }
53 |
54 | public String getIPSpeed() {
55 | return IPSpeed;
56 | }
57 |
58 | public void setIPSpeed(String IPSpeed) {
59 | this.IPSpeed = IPSpeed;
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/IPModel/IPMessage.java:
--------------------------------------------------------------------------------
1 | package IPModel;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * Created by paranoid on 17-4-10.
7 | *
8 | * 显式地定义serialVersionUID有两种用途:
9 | * 1、在某些场合,希望类的不同版本对序列化兼容,因此需要确保类的不同版本具有相同的serialVersionUID;
10 | * 2、在某些场合,不希望类的不同版本对序列化兼容,因此需要确保类的不同版本具有不同的serialVersionUID。
11 | *
12 | * 具体详情希望大家百度
13 | */
14 |
15 | //想要将该对象存储倒Redis List中,必须对其实现序列化于反序列化,操作Serializable接口
16 | public class IPMessage implements Serializable {
17 | //关于这个UID希望大家可以下去自己查一查
18 | private static final long serialVersionUID = 1L;
19 | private String IPAddress;
20 | private String IPPort;
21 | private String IPType;
22 | private String IPSpeed;
23 |
24 | public String getIPAddress() {
25 | return IPAddress;
26 | }
27 |
28 | public void setIPAddress(String IPAddress) {
29 | this.IPAddress = IPAddress;
30 | }
31 |
32 | public String getIPPort() {
33 | return IPPort;
34 | }
35 |
36 | public void setIPPort(String IPPort) {
37 | this.IPPort = IPPort;
38 | }
39 |
40 | public String getIPType() {
41 | return IPType;
42 | }
43 |
44 | public void setIPType(String IPType) {
45 | this.IPType = IPType;
46 | }
47 |
48 | public String getIPSpeed() {
49 | return IPSpeed;
50 | }
51 |
52 | public void setIPSpeed(String IPSpeed) {
53 | this.IPSpeed = IPSpeed;
54 | }
55 |
56 | @Override
57 | public String toString() {
58 | return IPAddress + ":" + IPPort;
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/IPModel/SerializeUtil.java:
--------------------------------------------------------------------------------
1 | package IPModel;
2 |
3 | import java.io.*;
4 |
5 | /**
6 | * Created by hg_yi on 17-8-9.
7 | *
8 | * java.io.ObjectOutputStream代表对象输出流,它的writeObject(Object obj)方法
9 | * 可对参数指定的obj对象进行序列化,把得到的字节序列写到一个目标输出流中。
10 | *
11 | * java.io.ObjectInputStream代表对象输入流,它的readObject()方法一个源输入流中读
12 | * 取字节序列,再把它们反序列化为一个对象,并将其返回。
13 | *
14 | * 对象序列化包括如下步骤:
15 | * 1)创建一个对象输出流,它可以包装一个其他类型的目标输出流,如文件输出流(我这里是字节流);
16 | * 2)通过对象输出流的writeObject()方法写对象。
17 | *
18 | * 对象反序列化的步骤如下:
19 | * 1)创建一个对象输入流,它可以包装一个其他类型的源输入流,如文件输入流(我这里是字节流);
20 | * 2)通过对象输入流的readObject()方法读取对象。
21 | */
22 |
23 | public class SerializeUtil {
24 | public static byte[] serialize(Object object) {
25 | ObjectOutputStream oos;
26 | ByteArrayOutputStream baos;
27 |
28 | try {
29 | // 序列化
30 | baos = new ByteArrayOutputStream();
31 | oos = new ObjectOutputStream(baos);
32 | oos.writeObject(object);
33 |
34 | byte[] bytes = baos.toByteArray();
35 |
36 | return bytes;
37 | } catch (Exception e) {
38 | e.printStackTrace();
39 | }
40 | return null;
41 | }
42 |
43 | //反序列化
44 | public static Object unserialize(byte[] bytes) {
45 | ByteArrayInputStream bais;
46 | ObjectInputStream ois;
47 |
48 | try {
49 | // 反序列化
50 | bais = new ByteArrayInputStream(bytes);
51 | ois = new ObjectInputStream(bais);
52 |
53 | return ois.readObject();
54 | } catch (Exception e) {
55 | e.printStackTrace();
56 | }
57 |
58 | return null;
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/timeutils/TimeUpdate.java:
--------------------------------------------------------------------------------
1 | package timeutils;
2 |
3 | import org.quartz.CronTrigger;
4 | import org.quartz.JobDetail;
5 | import org.quartz.Scheduler;
6 | import org.quartz.SchedulerFactory;
7 | import org.quartz.impl.StdSchedulerFactory;
8 |
9 | import java.text.SimpleDateFormat;
10 | import java.util.Date;
11 |
12 | import static org.quartz.CronScheduleBuilder.cronSchedule;
13 | import static org.quartz.JobBuilder.newJob;
14 | import static org.quartz.TriggerBuilder.newTrigger;
15 |
16 | /**
17 | * Created by paranoid on 17-4-13.
18 | */
19 |
20 | public class TimeUpdate {
21 | public void go() throws Exception {
22 | // 首先,必需要取得一个Scheduler的引用(设置一个工厂)
23 | SchedulerFactory sf = new StdSchedulerFactory();
24 |
25 | //从工厂里面拿到一个scheduler实例
26 | Scheduler sched = sf.getScheduler();
27 |
28 | //真正执行的任务并不是Job接口的实例,而是用反射的方式实例化的一个JobDetail实例
29 | JobDetail job = newJob(MyTimeJob.class).withIdentity("job1", "group1").build();
30 | // 定义一个触发器,job 1将每隔执行一次
31 | CronTrigger trigger = newTrigger().withIdentity("trigger1", "group1").
32 | withSchedule(cronSchedule("30 04 18 * * ?")).build();
33 |
34 | //执行任务和触发器
35 | Date ft = sched.scheduleJob(job, trigger);
36 |
37 | //格式化日期显示格式
38 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
39 | System.out.println(job.getKey() + " 已被安排执行于: " + sdf.format(ft) + "," +
40 | "并且以如下重复规则重复执行: " + trigger.getCronExpression());
41 |
42 | sched.start();
43 | }
44 |
45 | public static void main(String[] args) throws Exception {
46 | TimeUpdate test = new TimeUpdate();
47 | test.go();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/dataSources/c37050e9-c728-46b8-9a05-b8b36e206d80.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | 1
7 | 1
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | 1
19 | int(11)|0
20 | 1
21 | 1
22 |
23 |
24 | 2
25 | char(60)|0
26 | 1
27 |
28 |
29 | 3
30 | int(11)|0
31 | 1
32 |
33 |
34 | 4
35 | char(20)|0
36 | 1
37 |
38 |
39 | 5
40 | int(11)|0
41 | 1
42 | '100'
43 |
44 |
45 | 1
46 | id
47 | 1
48 |
49 |
50 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/database/RedisDB.java:
--------------------------------------------------------------------------------
1 | package database;
2 |
3 | import redis.clients.jedis.Jedis;
4 | import redis.clients.jedis.JedisPool;
5 | import redis.clients.jedis.JedisPoolConfig;
6 |
7 | import java.util.ResourceBundle;
8 |
9 | /**
10 | * Created by paranoid on 17-4-12.
11 | */
12 | public class RedisDB {
13 | // private static JedisPool jedisPool;
14 | private static String addr;
15 | private static int port;
16 | private static String passwd;
17 |
18 | //加载配置文件
19 | private static ResourceBundle rb = ResourceBundle.getBundle("db-config");
20 |
21 | //初始化连接
22 | static {
23 | addr = rb.getString("jedis.addr");
24 | port = Integer.parseInt(rb.getString("jedis.port"));
25 | passwd = rb.getString("jedis.passwd");
26 |
27 | // try {
28 | // //先进行redis数据的参数配置
29 | // JedisPoolConfig config = new JedisPoolConfig();
30 | // //链接耗尽时是否阻塞,false时抛出异常,默认是true,阻塞超时之后抛出异常
31 | // config.setBlockWhenExhausted(true);
32 | // //逐出策略类名,当连接超过最大空闲时间或最大空闲数抛出异常
33 | // config.setEvictionPolicyClassName("org.apache.commons.pool2." +
34 | // "impl.DefaultEvictionPolicy");
35 | // //是否启用pool的jmx管理功能,默认是true
36 | // config.setJmxEnabled(true);
37 | // //最大空闲数,默认为8,一个pool最多有多少空闲的Jedis实例
38 | // config.setMaxIdle(8);
39 | // //最大连接数
40 | // config.setMaxTotal(100);
41 | // //当引入一个Jedis实例时,最大的等待时间,如果超过等待时间,抛出异常
42 | // config.setMaxWaitMillis(1000*10);
43 | // //获得一个jedis实例的时候是否检查连接可用性(ping())
44 | // config.setTestOnBorrow(true);
45 | // } catch(Exception e) {
46 | // e.printStackTrace();
47 | // }
48 | }
49 |
50 | //获取Jedis实例
51 | public synchronized static Jedis getJedis() {
52 | //连接本地的 Redis 服务
53 | Jedis jedis = new Jedis(addr, port);
54 | //权限认证
55 | jedis.auth(passwd);
56 |
57 | return jedis;
58 | }
59 |
60 | //释放Jedis资源
61 | public static void close(final Jedis jedis) {
62 | if (jedis != null) {
63 | jedis.close();
64 | }
65 | }
66 | }
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/ipfilter/IPUtils.java:
--------------------------------------------------------------------------------
1 | package ipfilter;
2 |
3 | import IPModel.IPMessage;
4 | import org.apache.http.HttpHost;
5 | import org.apache.http.client.config.RequestConfig;
6 | import org.apache.http.client.methods.CloseableHttpResponse;
7 | import org.apache.http.client.methods.HttpGet;
8 | import org.apache.http.impl.client.CloseableHttpClient;
9 | import org.apache.http.impl.client.HttpClients;
10 |
11 | import java.io.IOException;
12 | import java.util.List;
13 |
14 | import static java.lang.System.out;
15 |
16 | /**
17 | * Created by paranoid on 17-4-21.
18 | * 测试此Ip是否有效
19 | */
20 |
21 | public class IPUtils {
22 | public static List IPIsable(List ipMessages) {
23 | String ip;
24 | String port;
25 |
26 | CloseableHttpClient httpClient = HttpClients.createDefault();
27 | CloseableHttpResponse response = null;
28 |
29 | for(int i = 0; i < ipMessages.size(); i++) {
30 | ip = ipMessages.get(i).getIPAddress();
31 | port = ipMessages.get(i).getIPPort();
32 |
33 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port));
34 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000).
35 | setSocketTimeout(3000).build();
36 | HttpGet httpGet = new HttpGet("https://www.baidu.com");
37 | httpGet.setConfig(config);
38 |
39 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
40 | "q=0.9,image/webp,*/*;q=0.8");
41 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
42 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
43 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit" +
44 | "/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
45 |
46 | try {
47 | response = httpClient.execute(httpGet);
48 | } catch (IOException e) {
49 | out.println("不可用代理已删除" + ipMessages.get(i).getIPAddress() + ": " + ipMessages.get(i).getIPPort());
50 | ipMessages.remove(ipMessages.get(i));
51 | i--;
52 | }
53 | }
54 |
55 | try {
56 | httpClient.close();
57 | response.close();
58 | } catch (IOException e) {
59 | e.printStackTrace();
60 | }
61 |
62 | return ipMessages;
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/htmlparse/IPPool.java:
--------------------------------------------------------------------------------
1 | package htmlparse;
2 |
3 | import IPModel.IPMessage;
4 | import ipfilter.IPFilter;
5 | import ipfilter.IPUtils;
6 |
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | import static java.lang.System.out;
11 |
12 | /**
13 | * Created by hg_yi on 17-8-3.
14 | */
15 | public class IPPool {
16 | //成员变量(非线程安全)
17 | private List ipMessages;
18 |
19 | public IPPool(List ipMessages) {
20 | this.ipMessages = ipMessages;
21 | }
22 |
23 | public void getIP(List urls) {
24 | String ipAddress;
25 | String ipPort;
26 |
27 | for (int i = 0; i < urls.size(); i++) {
28 | /** 随机挑选代理IP(仔细想了想,本步骤由于其他线程有可能在位置确定之后对ipMessages数量进行
29 | * 增加,虽说不会改变已经选择的ip代理的位置,但合情合理还是在对共享变量进行读写的时候要保证
30 | * 其原子性,否则极易发生脏读)
31 | */
32 | //每个线程先将自己抓取下来的ip保存下来并进行过滤与检测
33 | List ipMessages1 = new ArrayList<>();
34 | String url = urls.get(i);
35 |
36 | synchronized (ipMessages) {
37 | int rand = (int) (Math.random()*ipMessages.size());
38 | out.println("当前线程 " + Thread.currentThread().getName() + " rand值: " + rand +
39 | " ipMessages 大小: " + ipMessages.size());
40 |
41 | ipAddress = ipMessages.get(rand).getIPAddress();
42 | ipPort = ipMessages.get(rand).getIPPort();
43 | }
44 |
45 | //这里要注意Java中非基本类型的参数传递方式,实际上都是同一个对象
46 | boolean status = URLFecter.urlParse(url, ipAddress, ipPort, ipMessages1);
47 | //如果ip代理池里面的ip不能用,则切换下一个IP对本页进行重新抓取
48 | if (status == false) {
49 | i--;
50 | continue;
51 | } else {
52 | out.println("线程:" + Thread.currentThread().getName() + "已成功抓取 " +
53 | url + " ipMessage1:" + ipMessages1.size());
54 | }
55 |
56 | //对ip重新进行过滤,只要速度在两秒以内的并且类型为HTTPS的
57 | ipMessages1 = IPFilter.Filter(ipMessages1);
58 |
59 | //对ip进行质量检测,将质量不合格的ip在List里进行删除
60 | IPUtils.IPIsable(ipMessages1);
61 |
62 | //将质量合格的ip合并到共享变量ipMessages中,进行合并的时候保证原子性
63 | synchronized (ipMessages) {
64 | out.println("线程" + Thread.currentThread().getName() + "已进入合并区 " +
65 | "待合并大小 ipMessages1:" + ipMessages1.size());
66 | ipMessages.addAll(ipMessages1);
67 | }
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/ipfilter/IPUtils.java:
--------------------------------------------------------------------------------
1 | package ipfilter;
2 |
3 | import IPModel.IPMessage;
4 | import org.apache.http.HttpHost;
5 | import org.apache.http.client.config.RequestConfig;
6 | import org.apache.http.client.methods.CloseableHttpResponse;
7 | import org.apache.http.client.methods.HttpGet;
8 | import org.apache.http.impl.client.CloseableHttpClient;
9 | import org.apache.http.impl.client.HttpClients;
10 |
11 | import java.io.IOException;
12 | import java.util.List;
13 |
14 | import static java.lang.System.out;
15 |
16 | /**
17 | * Created by paranoid on 17-4-21.
18 | * 测试此IP是否有效
19 | */
20 |
21 | public class IPUtils {
22 | public static void IPIsable(List ipMessages1) {
23 | CloseableHttpClient httpClient = HttpClients.createDefault();
24 | CloseableHttpResponse response = null;
25 |
26 | for(int i = 0; i < ipMessages1.size(); i++) {
27 | String ip = ipMessages1.get(i).getIPAddress();
28 | String port = ipMessages1.get(i).getIPPort();
29 |
30 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port));
31 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(5000).
32 | setSocketTimeout(5000).build();
33 | HttpGet httpGet = new HttpGet("https://www.baidu.com");
34 | httpGet.setConfig(config);
35 |
36 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
37 | "q=0.9,image/webp,*/*;q=0.8");
38 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
39 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
40 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit" +
41 | "/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
42 |
43 | try {
44 | response = httpClient.execute(httpGet);
45 | } catch (IOException e) {
46 | out.println("不可用代理已删除" + ipMessages1.get(i).getIPAddress()
47 | + ": " + ipMessages1.get(i).getIPPort());
48 | ipMessages1.remove(ipMessages1.get(i));
49 | i--;
50 | }
51 | }
52 |
53 | try {
54 | if (httpClient != null) {
55 | httpClient.close();
56 | }
57 | if (response != null) {
58 | response.close();
59 | }
60 | } catch (IOException e) {
61 | e.printStackTrace();
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/.idea/src.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/.idea/src.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/timeutils/MyTimeJob.java:
--------------------------------------------------------------------------------
1 | package timeutils;
2 |
3 | import IPModel.IPMessage;
4 | import database.MyRedis;
5 | import htmlparse.IPPool;
6 | import htmlparse.IPThread;
7 | import htmlparse.URLFecter;
8 | import ipfilter.IPFilter;
9 | import ipfilter.IPUtils;
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | import java.util.TimerTask;
13 |
14 | import static java.lang.System.out;
15 |
16 | /**
17 | * Created by paranoid on 17-4-13.
18 | *
19 | * ip代理池里面最少保存200个代理ip
20 | *
21 | * 多线程主要考虑的就是合理的任务分配以及线程安全性。
22 | *
23 | * implements Job
24 | */
25 |
26 | public class MyTimeJob extends TimerTask {
27 | MyRedis redis = new MyRedis();
28 |
29 | @Override
30 | public void run() {
31 | //首先清空redis数据库中的key
32 | redis.deleteKey("IPPool");
33 |
34 | //存放爬取下来的ip信息
35 | List ipMessages = new ArrayList<>();
36 | List urls = new ArrayList<>();
37 | //对创建的子线程进行收集
38 | List threads = new ArrayList<>();
39 |
40 | //首先使用本机ip爬取xici代理网第一页
41 | ipMessages = URLFecter.urlParse(ipMessages);
42 |
43 | //对得到的IP进行筛选,将IP速度在两秒以内的并且类型是https的留下,其余删除
44 | ipMessages = IPFilter.Filter(ipMessages);
45 |
46 | //对拿到的ip进行质量检测,将质量不合格的ip在List里进行删除
47 | IPUtils.IPIsable(ipMessages);
48 |
49 | //构造种子url(4000条ip)
50 | for (int i = 2; i <= 41; i++) {
51 | urls.add("http://www.xicidaili.com/nn/" + i);
52 | }
53 |
54 | /**
55 | * 对urls进行解析并进行过滤,拿到所有目标IP(使用多线程)
56 | *
57 | * 基本思路是给每个线程分配自己的任务,在这个过程中List ipMessages
58 | * 应该是共享变量,每个线程更新其中数据的时候应该注意线程安全
59 | */
60 | IPPool ipPool = new IPPool(ipMessages);
61 | for (int i = 0; i < 20; i++) {
62 | //给每个线程进行任务的分配
63 | Thread IPThread = new IPThread(urls.subList(i*2, i*2+2), ipPool);
64 | threads.add(IPThread);
65 | IPThread.start();
66 | }
67 |
68 | for (Thread thread : threads) {
69 | try {
70 | thread.join();
71 | } catch (InterruptedException e) {
72 | e.printStackTrace();
73 | }
74 | }
75 |
76 | for(IPMessage ipMessage : ipMessages){
77 | out.println(ipMessage.getIPAddress());
78 | out.println(ipMessage.getIPPort());
79 | out.println(ipMessage.getIPType());
80 | out.println(ipMessage.getIPSpeed());
81 | }
82 |
83 | //将爬取下来的ip信息写进Redis数据库中(List集合)
84 | redis.setIPToList(ipMessages);
85 |
86 | //从redis数据库中随机拿出一个IP
87 | IPMessage ipMessage = redis.getIPByList();
88 | out.println(ipMessage.getIPAddress());
89 | out.println(ipMessage.getIPPort());
90 | redis.close();
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/ip代理与定点爬取.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/htmlparse/URLFecter.java:
--------------------------------------------------------------------------------
1 | package htmlparse;
2 |
3 | import IPModel.IPMessage;
4 | import httpbrowser.MyHttpResponse;
5 | import org.jsoup.Jsoup;
6 | import org.jsoup.nodes.Document;
7 | import org.jsoup.select.Elements;
8 |
9 | import java.io.IOException;
10 | import java.util.List;
11 |
12 | import static java.lang.System.out;
13 |
14 | /**
15 | * Created by paranoid on 17-4-10.
16 | */
17 |
18 | public class URLFecter {
19 | //使用代理进行爬取
20 | public static boolean urlParse(String url, String ip, String port,
21 | List ipMessages1) {
22 | //调用一个类使其返回html源码
23 | String html = MyHttpResponse.getHtml(url, ip, port);
24 |
25 | if(html != null) {
26 | //将html解析成DOM结构
27 | Document document = Jsoup.parse(html);
28 |
29 | //提取所需要的数据
30 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr");
31 |
32 | for (int i = 1; i < trs.size(); i++) {
33 | IPMessage ipMessage = new IPMessage();
34 | String ipAddress = trs.get(i).select("td").get(1).text();
35 | String ipPort = trs.get(i).select("td").get(2).text();
36 | String ipType = trs.get(i).select("td").get(5).text();
37 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]").
38 | attr("title");
39 |
40 | ipMessage.setIPAddress(ipAddress);
41 | ipMessage.setIPPort(ipPort);
42 | ipMessage.setIPType(ipType);
43 | ipMessage.setIPSpeed(ipSpeed);
44 |
45 |
46 | ipMessages1.add(ipMessage);
47 | }
48 |
49 | return true;
50 | } else {
51 | out.println(ip+ ": " + port + " 代理不可用");
52 |
53 | return false;
54 | }
55 | }
56 |
57 | //使用本机IP爬取xici代理网站的第一页
58 | public static List urlParse(List ipMessages) {
59 | String url = "http://www.xicidaili.com/nn/1";
60 | String html = MyHttpResponse.getHtml(url);
61 |
62 | //将html解析成DOM结构
63 | Document document = Jsoup.parse(html);
64 |
65 | //提取所需要的数据
66 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr");
67 |
68 | for (int i = 1; i < trs.size(); i++) {
69 | IPMessage ipMessage = new IPMessage();
70 | String ipAddress = trs.get(i).select("td").get(1).text();
71 | String ipPort = trs.get(i).select("td").get(2).text();
72 | String ipType = trs.get(i).select("td").get(5).text();
73 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]").
74 | attr("title");
75 |
76 | ipMessage.setIPAddress(ipAddress);
77 | ipMessage.setIPPort(ipPort);
78 | ipMessage.setIPType(ipType);
79 | ipMessage.setIPSpeed(ipSpeed);
80 |
81 | ipMessages.add(ipMessage);
82 | }
83 |
84 | return ipMessages;
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/htmlparse/URLFecter.java:
--------------------------------------------------------------------------------
1 | package htmlparse;
2 |
3 | import IPModel.IPMessage;
4 | import httpbrowser.HttpResponseDemo;
5 | import org.apache.http.impl.client.CloseableHttpClient;
6 | import org.apache.http.impl.client.HttpClients;
7 | import org.jsoup.Jsoup;
8 | import org.jsoup.nodes.Document;
9 | import org.jsoup.select.Elements;
10 |
11 | import java.io.IOException;
12 | import java.util.List;
13 |
14 | import static java.lang.System.out;
15 |
16 | /**
17 | * Created by paranoid on 17-4-10.
18 | */
19 |
20 | public class URLFecter {
21 | //使用代理进行爬取
22 | public static List urlParse
23 | (String url, String ip, String port,
24 | List ipMessages) throws ClassNotFoundException, IOException {
25 | //调用一个类使其返回html源码
26 | String html = HttpResponseDemo.getHtml(url, ip, port);
27 |
28 | if(html != null) {
29 | //将html解析成DOM结构
30 | Document document = Jsoup.parse(html);
31 |
32 | //提取所需要的数据
33 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr");
34 |
35 | for (int i = 1; i < trs.size(); i++) {
36 | IPMessage ipMessage = new IPMessage();
37 | String ipAddress = trs.get(i).select("td").get(1).text();
38 | String ipPort = trs.get(i).select("td").get(2).text();
39 | String serverAddress = trs.get(i).select("td").get(3).text();
40 | String ipType = trs.get(i).select("td").get(5).text();
41 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]").
42 | attr("title");
43 |
44 | ipMessage.setIPAddress(ipAddress);
45 | ipMessage.setIPPort(ipPort);
46 | ipMessage.setServerAddress(serverAddress);
47 | ipMessage.setIPType(ipType);
48 | ipMessage.setIPSpeed(ipSpeed);
49 |
50 | ipMessages.add(ipMessage);
51 | }
52 | } else {
53 | out.println(ip+ ": " + port + " 代理不可用");
54 | }
55 |
56 | return ipMessages;
57 | }
58 |
59 | //使用本机IP爬取xici代理网站的第一页
60 | public static List urlParse(String url, List list)
61 | throws IOException, ClassNotFoundException {
62 | String html = HttpResponseDemo.getHtml(url);
63 |
64 | //将html解析成DOM结构
65 | Document document = Jsoup.parse(html);
66 |
67 | //提取所需要的数据
68 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr");
69 |
70 | for (int i = 1; i < trs.size(); i++) {
71 | IPMessage ipMessage = new IPMessage();
72 | String ipAddress = trs.get(i).select("td").get(1).text();
73 | String ipPort = trs.get(i).select("td").get(2).text();
74 | String serverAddress = trs.get(i).select("td").get(3).text();
75 | String ipType = trs.get(i).select("td").get(5).text();
76 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]").
77 | attr("title");
78 |
79 | ipMessage.setIPAddress(ipAddress);
80 | ipMessage.setIPPort(ipPort);
81 | ipMessage.setServerAddress(serverAddress);
82 | ipMessage.setIPType(ipType);
83 | ipMessage.setIPSpeed(ipSpeed);
84 |
85 | list.add(ipMessage);
86 | }
87 |
88 | return list;
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/ip代理与定点爬取.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/timeutils/MyTimeJob.java:
--------------------------------------------------------------------------------
1 | package timeutils;
2 |
3 | import IPModel.DatabaseMessage;
4 | import IPModel.IPMessage;
5 | import database.DataBaseDemo;
6 | import htmlparse.URLFecter;
7 | import ipfilter.IPFilter;
8 | import ipfilter.IPUtils;
9 | import org.quartz.Job;
10 | import org.quartz.JobExecutionContext;
11 | import org.quartz.JobExecutionException;
12 |
13 | import java.io.IOException;
14 | import java.util.ArrayList;
15 | import java.util.List;
16 |
17 | import static java.lang.System.out;
18 |
19 | /**
20 | * Created by paranoid on 17-4-13.
21 | */
22 |
23 | public class MyTimeJob implements Job {
24 | public void execute(JobExecutionContext argv) throws JobExecutionException {
25 | List Urls = new ArrayList<>();
26 | List databaseMessages = new ArrayList<>();
27 | List list = new ArrayList<>();
28 | List ipMessages = new ArrayList<>();
29 | String url = "http://www.xicidaili.com/nn/1";
30 | String IPAddress;
31 | String IPPort;
32 | int k, j;
33 |
34 | //首先使用本机ip进行爬取
35 | try {
36 | list = URLFecter.urlParse(url, list);
37 | } catch (IOException e) {
38 | e.printStackTrace();
39 | } catch (ClassNotFoundException e) {
40 | e.printStackTrace();
41 | }
42 |
43 | //对得到的IP进行筛选,选取链接速度前100名的
44 | list = IPFilter.Filter(list);
45 |
46 | //构造种子Url
47 | for (int i = 1; i <= 5; i++) {
48 | Urls.add("http://www.xicidaili.com/nn/" + i);
49 | }
50 |
51 | //得到所需要的数据
52 | for (k = 0, j = 0; j < Urls.size(); k++) {
53 | url = Urls.get(j);
54 |
55 | IPAddress = list.get(k).getIPAddress();
56 | IPPort = list.get(k).getIPPort();
57 | //每次爬取前的大小
58 | int preIPMessSize = ipMessages.size();
59 | try {
60 | ipMessages = URLFecter.urlParse(url, IPAddress, IPPort, ipMessages);
61 | //每次爬取后的大小
62 | int lastIPMessSize = ipMessages.size();
63 | if(preIPMessSize != lastIPMessSize){
64 | j++;
65 | }
66 |
67 | //对IP进行轮寻调用
68 | if (k >= list.size()) {
69 | k = 0;
70 | }
71 | } catch (ClassNotFoundException e) {
72 | e.printStackTrace();
73 | } catch (IOException e) {
74 | e.printStackTrace();
75 | }
76 | }
77 |
78 | //对得到的IP进行筛选,选取链接速度前100名的
79 | ipMessages = IPFilter.Filter(ipMessages);
80 |
81 | //对ip进行测试,不可用的从数组中删除
82 | ipMessages = IPUtils.IPIsable(ipMessages);
83 |
84 | for(IPMessage ipMessage : ipMessages){
85 | out.println(ipMessage.getIPAddress());
86 | out.println(ipMessage.getIPPort());
87 | out.println(ipMessage.getServerAddress());
88 | out.println(ipMessage.getIPType());
89 | out.println(ipMessage.getIPSpeed());
90 | }
91 |
92 | //将得到的IP存储在数据库中(每次先清空数据库)
93 | try {
94 | DataBaseDemo.delete();
95 | DataBaseDemo.add(ipMessages);
96 | } catch (ClassNotFoundException e) {
97 | e.printStackTrace();
98 | }
99 |
100 | //从数据库中将IP取到
101 | try {
102 | databaseMessages = DataBaseDemo.query();
103 | } catch (ClassNotFoundException e) {
104 | e.printStackTrace();
105 | }
106 |
107 | for (DatabaseMessage databaseMessage: databaseMessages) {
108 | out.println(databaseMessage.getId());
109 | out.println(databaseMessage.getIPAddress());
110 | out.println(databaseMessage.getIPPort());
111 | out.println(databaseMessage.getServerAddress());
112 | out.println(databaseMessage.getIPType());
113 | out.println(databaseMessage.getIPSpeed());
114 | }
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/database/DataBaseDemo.java:
--------------------------------------------------------------------------------
1 | package database;
2 |
3 | import IPModel.DatabaseMessage;
4 | import IPModel.IPMessage;
5 |
6 | import java.sql.*;
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | /**
11 | * Created by paranoid on 17-4-12.
12 | */
13 | public class DataBaseDemo {
14 | private static String driver = "com.mysql.jdbc.Driver"; //数据库驱动
15 | private static String dbURL = "jdbc:mysql://127.0.0.1:3306/IPProxy" +
16 | "?characterEncoding=utf8&useSSL=true"; //操作的数据库地址,端口及库名
17 | private static String user = "**********"; //数据库用户名
18 | private static String password = "********"; //数据库密码
19 |
20 | //数据库添加功能
21 | public static void add(List list) throws ClassNotFoundException {
22 | Class.forName(driver); //加载数据库驱动
23 |
24 | try(Connection conn = DriverManager.getConnection(dbURL, user, password);
25 | PreparedStatement statement = conn.prepareStatement("INSERT INTO " +
26 | "ProxyPool (IPAddress, IPPort, serverAddress, IPType, IPSpeed)" +
27 | " VALUES (?, ?, ?, ?, ?)")) {
28 |
29 | for(IPMessage ipMessage : list) {
30 | statement.setString(1, ipMessage.getIPAddress());
31 | statement.setString(2, ipMessage.getIPPort());
32 | statement.setString(3, ipMessage.getServerAddress());
33 | statement.setString(4, ipMessage.getIPType());
34 | statement.setString(5, ipMessage.getIPSpeed());
35 |
36 | statement.executeUpdate();
37 | }
38 |
39 | statement.close();
40 | conn.close();
41 | } catch (SQLException e) {
42 | e.printStackTrace();
43 | }
44 | }
45 |
46 | //删除数据库指定IP
47 | public static void deleteIP(int IPid) {
48 | String sql = "DELETE FROM ProxyPool WHERE id = " + IPid;
49 | try(Connection conn = DriverManager.getConnection(dbURL, user, password);
50 | Statement statement = conn.createStatement()) {
51 | statement.executeUpdate(sql);
52 |
53 | statement.close();
54 | conn.close();
55 | } catch (SQLException e) {
56 | e.printStackTrace();
57 | }
58 | }
59 |
60 | //数据库表清除功能(id也一并清除)
61 | public static void delete() {
62 | try(Connection conn = DriverManager.getConnection(dbURL, user, password);
63 | Statement statement = conn.createStatement()) {
64 | statement.executeUpdate("TRUNCATE TABLE ProxyPool");
65 |
66 | statement.close();
67 | conn.close();
68 | }
69 | catch(SQLException e){
70 | e.printStackTrace();
71 | }
72 | }
73 |
74 | //数据库查找功能
75 | public static List query() throws ClassNotFoundException {
76 | Class.forName(driver); //加载数据库驱动
77 | List list = new ArrayList<>();
78 |
79 | try(Connection conn = DriverManager.getConnection(dbURL, user, password);
80 | Statement statement = conn.createStatement()) {
81 | ResultSet resultSet = statement.executeQuery("SELECT * FROM ProxyPool");
82 |
83 | while(resultSet.next()){
84 | DatabaseMessage databaseMessage = new DatabaseMessage();
85 |
86 | databaseMessage.setId(resultSet.getString(1));
87 | databaseMessage.setIPAddress(resultSet.getString(2));
88 | databaseMessage.setIPPort(resultSet.getString(3));
89 | databaseMessage.setServerAddress(resultSet.getString(4));
90 | databaseMessage.setIPType(resultSet.getString(5));
91 | databaseMessage.setIPSpeed(resultSet.getString(6));
92 |
93 | list.add(databaseMessage);
94 | }
95 |
96 | resultSet.close();
97 | statement.close();
98 | conn.close();
99 | } catch (SQLException e) {
100 | e.printStackTrace();
101 | }
102 |
103 | return list;
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/src/main/java/httpbrowser/MyHttpResponse.java:
--------------------------------------------------------------------------------
1 | package httpbrowser;
2 |
3 | import org.apache.http.HttpHost;
4 | import org.apache.http.client.ClientProtocolException;
5 | import org.apache.http.client.config.RequestConfig;
6 | import org.apache.http.client.methods.CloseableHttpResponse;
7 | import org.apache.http.client.methods.HttpGet;
8 | import org.apache.http.impl.client.CloseableHttpClient;
9 | import org.apache.http.impl.client.HttpClients;
10 | import org.apache.http.util.EntityUtils;
11 |
12 | import java.io.IOException;
13 |
14 | import static java.lang.System.out;
15 |
16 | /**
17 | * Created by paranoid on 17-4-10.
18 | * 进行代理访问
19 | *
20 | * setConnectTimeout:设置连接超时时间,单位毫秒.
21 | * setConnectionRequestTimeout:设置从connect Manager获取Connection 超时时间,单位毫秒.
22 | * 这个属性是新加的属性,因为目前版本是可以共享连接池的.
23 | * setSocketTimeout:请求获取数据的超时时间,单位毫秒.如果访问一个接口,多少时间内无法返回数据,
24 | * 就直接放弃此次调用。
25 | */
26 |
27 | public class MyHttpResponse {
28 | public static String getHtml( String url, String ip, String port) {
29 | String entity = null;
30 | CloseableHttpClient httpClient = HttpClients.createDefault();
31 |
32 | //设置代理访问和超时处理
33 | out.println("此时线程: " + Thread.currentThread().getName() + " 爬取所使用的代理为: "
34 | + ip + ":" + port);
35 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port));
36 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000).
37 | setSocketTimeout(3000).build();
38 | HttpGet httpGet = new HttpGet(url);
39 | httpGet.setConfig(config);
40 |
41 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
42 | "q=0.9,image/webp,*/*;q=0.8");
43 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
44 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
45 | httpGet.setHeader("Cache-Control", "no-cache");
46 | httpGet.setHeader("Connection", "keep-alive");
47 | httpGet.setHeader("Host", "www.xicidaili.com");
48 | httpGet.setHeader("Pragma", "no-cache");
49 | httpGet.setHeader("Upgrade-Insecure-Requests", "1");
50 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " +
51 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
52 |
53 | try {
54 | //客户端执行httpGet方法,返回响应
55 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
56 |
57 | //得到服务响应状态码
58 | if (httpResponse.getStatusLine().getStatusCode() == 200) {
59 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8");
60 | }
61 |
62 | httpResponse.close();
63 | httpClient.close();
64 | } catch (ClientProtocolException e) {
65 | entity = null;
66 | } catch (IOException e) {
67 | entity = null;
68 | }
69 |
70 | return entity;
71 | }
72 |
73 | //对上一个方法的重载,使用本机ip进行网站爬取
74 | public static String getHtml(String url) {
75 | String entity = null;
76 | CloseableHttpClient httpClient = HttpClients.createDefault();
77 |
78 | //设置超时处理
79 | RequestConfig config = RequestConfig.custom().setConnectTimeout(3000).
80 | setSocketTimeout(3000).build();
81 | HttpGet httpGet = new HttpGet(url);
82 | httpGet.setConfig(config);
83 |
84 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
85 | "q=0.9,image/webp,*/*;q=0.8");
86 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
87 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
88 | httpGet.setHeader("Cache-Control", "no-cache");
89 | httpGet.setHeader("Connection", "keep-alive");
90 | httpGet.setHeader("Host", "www.xicidaili.com");
91 | httpGet.setHeader("Pragma", "no-cache");
92 | httpGet.setHeader("Upgrade-Insecure-Requests", "1");
93 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " +
94 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
95 |
96 | try {
97 | //客户端执行httpGet方法,返回响应
98 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
99 |
100 | //得到服务响应状态码
101 | if (httpResponse.getStatusLine().getStatusCode() == 200) {
102 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8");
103 | }
104 |
105 | httpResponse.close();
106 | httpClient.close();
107 | } catch (ClientProtocolException e) {
108 | e.printStackTrace();
109 | } catch (IOException e) {
110 | e.printStackTrace();
111 | }
112 |
113 | return entity;
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/src/httpbrowser/HttpResponseDemo.java:
--------------------------------------------------------------------------------
1 | package httpbrowser;
2 |
3 | import org.apache.http.HttpHost;
4 | import org.apache.http.client.ClientProtocolException;
5 | import org.apache.http.client.config.RequestConfig;
6 | import org.apache.http.client.methods.CloseableHttpResponse;
7 | import org.apache.http.client.methods.HttpGet;
8 | import org.apache.http.impl.client.CloseableHttpClient;
9 | import org.apache.http.impl.client.HttpClients;
10 | import org.apache.http.util.EntityUtils;
11 |
12 | import java.io.IOException;
13 |
14 | /**
15 | * Created by paranoid on 17-4-10.
16 | * 进行代理访问
17 | *
18 | * setConnectTimeout:设置连接超时时间,单位毫秒.
19 | * setConnectionRequestTimeout:设置从connect Manager获取Connection 超时时间,单位毫秒.
20 | * 这个属性是新加的属性,因为目前版本是可以共享连接池的.
21 | * setSocketTimeout:请求获取数据的超时时间,单位毫秒.如果访问一个接口,多少时间内无法返回数据,就直接放弃此次调用。
22 | */
23 |
24 | public class HttpResponseDemo {
25 | public static String getHtml( String url, String ip, String port) {
26 | String entity = null;
27 | CloseableHttpClient httpClient = HttpClients.createDefault();
28 |
29 | //设置代理访问和超时处理
30 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port));
31 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000).
32 | setSocketTimeout(3000).build();
33 | HttpGet httpGet = new HttpGet(url);
34 | httpGet.setConfig(config);
35 |
36 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
37 | "q=0.9,image/webp,*/*;q=0.8");
38 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
39 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
40 | httpGet.setHeader("Cache-Control", "no-cache");
41 | httpGet.setHeader("Connection", "keep-alive");
42 | httpGet.setHeader("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTRkYjMyM" +
43 | "TU3NGRjMWVhM2JlMDA5Y2IyNzZlZmVlZTYwBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUhtT0pjcnRT" +
44 | "bm9CZEllSXNTYkNZZWk2Nnp3NGNDcFFSQVFodzk1dmpLZWM9BjsARg%3D%3D--09d8736fbfb9a8544" +
45 | "b46eef48bb320c2b40ee721; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1492128157,149" +
46 | "2160558,1492347839,1492764281; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1492764295");
47 | httpGet.setHeader("Host", "www.xicidaili.com");
48 | httpGet.setHeader("Pragma", "no-cache");
49 | httpGet.setHeader("Upgrade-Insecure-Requests", "1");
50 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " +
51 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
52 |
53 | try {
54 | //客户端执行httpGet方法,返回响应
55 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
56 |
57 | //得到服务响应状态码
58 | if (httpResponse.getStatusLine().getStatusCode() == 200) {
59 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8");
60 | }
61 |
62 | httpResponse.close();
63 | httpClient.close();
64 | } catch (ClientProtocolException e) {
65 | entity = null;
66 | } catch (IOException e) {
67 | entity = null;
68 | }
69 |
70 | return entity;
71 | }
72 |
73 | //对上一个方法的重载,使用本机ip进行网站爬取
74 | public static String getHtml(String url) throws ClassNotFoundException,
75 | IOException {
76 | String entity = null;
77 | CloseableHttpClient httpClient = HttpClients.createDefault();
78 |
79 | //设置超时处理
80 | RequestConfig config = RequestConfig.custom().setConnectTimeout(5000).
81 | setSocketTimeout(5000).build();
82 | HttpGet httpGet = new HttpGet(url);
83 | httpGet.setConfig(config);
84 |
85 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
86 | "q=0.9,image/webp,*/*;q=0.8");
87 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
88 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
89 | httpGet.setHeader("Cache-Control", "no-cache");
90 | httpGet.setHeader("Connection", "keep-alive");
91 | httpGet.setHeader("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTRkYjMyM" +
92 | "TU3NGRjMWVhM2JlMDA5Y2IyNzZlZmVlZTYwBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUhtT0pjcnRT" +
93 | "bm9CZEllSXNTYkNZZWk2Nnp3NGNDcFFSQVFodzk1dmpLZWM9BjsARg%3D%3D--09d8736fbfb9a8544" +
94 | "b46eef48bb320c2b40ee721; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1492128157,149" +
95 | "2160558,1492347839,1492764281; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1492764295");
96 | httpGet.setHeader("Host", "www.xicidaili.com");
97 | httpGet.setHeader("Pragma", "no-cache");
98 | httpGet.setHeader("Upgrade-Insecure-Requests", "1");
99 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " +
100 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
101 |
102 | try {
103 | //客户端执行httpGet方法,返回响应
104 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
105 |
106 | //得到服务响应状态码
107 | if (httpResponse.getStatusLine().getStatusCode() == 200) {
108 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8");
109 | }
110 |
111 | httpResponse.close();
112 | httpClient.close();
113 | } catch (ClientProtocolException e) {
114 | e.printStackTrace();
115 | }
116 |
117 | return entity;
118 | }
119 | }
120 |
--------------------------------------------------------------------------------
/ip代理与定点爬取(重构)/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/ip代理与定点爬取/.idea/dataSources/7bc77221-9c0f-4103-8fcc-36aa3de003b6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 1
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | Column privileges
14 |
15 |
16 | Database privileges
17 |
18 |
19 |
22 |
23 | User defined functions
24 |
25 |
28 |
29 |
30 | help categories
31 |
32 |
35 |
36 | keyword-topic relation
37 |
38 |
41 |
42 |
43 |
44 |
47 |
48 | Stored Procedures
49 |
50 |
51 | Procedure privileges
52 |
53 |
54 | User proxy privileges
55 |
56 |
57 |
58 | MySQL Foreign Servers table
59 |
60 |
61 | Master Information
62 |
63 |
64 | Relay Log Information
65 |
66 |
67 | Worker Information
68 |
69 |
72 |
73 | Table privileges
74 |
75 |
78 |
79 | Leap seconds information for time zones
80 |
81 |
82 | Time zone names
83 |
84 |
85 | Time zone transitions
86 |
87 |
88 | Time zone transition types
89 |
90 |
91 | Users and global privileges
92 |
93 |
94 | 1
95 | char(60)|0
96 | ''
97 |
98 |
99 | 1
100 | char(64)|0
101 | ''
102 |
103 |
104 | 1
105 | char(32)|0
106 | ''
107 |
108 |
109 | 1
110 | char(64)|0
111 | ''
112 |
113 |
114 | 1
115 | char(64)|0
116 | ''
117 |
118 |
119 | 1
120 | timestamp|0
121 | CURRENT_TIMESTAMP
122 |
123 |
124 | 1
125 | set('Select', 'Insert', 'Update', 'References')|0
126 | ''
127 |
128 |
129 | Host
130 | Db
131 | User
132 | Table_name
133 | Column_name
134 | 1
135 |
136 |
137 | 1
138 | char(60)|0
139 | ''
140 |
141 |
142 | 1
143 | char(64)|0
144 | ''
145 |
146 |
147 | 1
148 | char(32)|0
149 | ''
150 |
151 |
152 | 1
153 | enum('N', 'Y')|0
154 | 'N'
155 |
156 |
157 | 1
158 | enum('N', 'Y')|0
159 | 'N'
160 |
161 |
162 | 1
163 | enum('N', 'Y')|0
164 | 'N'
165 |
166 |
167 | 1
168 | enum('N', 'Y')|0
169 | 'N'
170 |
171 |
172 | 1
173 | enum('N', 'Y')|0
174 | 'N'
175 |
176 |
177 | 1
178 | enum('N', 'Y')|0
179 | 'N'
180 |
181 |
182 | 1
183 | enum('N', 'Y')|0
184 | 'N'
185 |
186 |
187 | 1
188 | enum('N', 'Y')|0
189 | 'N'
190 |
191 |
192 | 1
193 | enum('N', 'Y')|0
194 | 'N'
195 |
196 |
197 | 1
198 | enum('N', 'Y')|0
199 | 'N'
200 |
201 |
202 | 1
203 | enum('N', 'Y')|0
204 | 'N'
205 |
206 |
207 | 1
208 | enum('N', 'Y')|0
209 | 'N'
210 |
211 |
212 | 1
213 | enum('N', 'Y')|0
214 | 'N'
215 |
216 |
217 | 1
218 | enum('N', 'Y')|0
219 | 'N'
220 |
221 |
222 | 1
223 | enum('N', 'Y')|0
224 | 'N'
225 |
226 |
227 | 1
228 | enum('N', 'Y')|0
229 | 'N'
230 |
231 |
232 | 1
233 | enum('N', 'Y')|0
234 | 'N'
235 |
236 |
237 | 1
238 | enum('N', 'Y')|0
239 | 'N'
240 |
241 |
242 | 1
243 | enum('N', 'Y')|0
244 | 'N'
245 |
246 |
247 | User
248 |
249 |
250 |
251 | Host
252 | Db
253 | User
254 | 1
255 |
256 |
257 | 1
258 | varchar(64)|0
259 |
260 |
261 | 1
262 | int(11)|0
263 |
264 |
265 | 1
266 | varchar(64)|0
267 |
268 |
269 | float|0
270 |
271 |
272 | 1
273 | timestamp|0
274 | CURRENT_TIMESTAMP
275 |
276 |
277 | varchar(1024)|0
278 |
279 |
280 | cost_name
281 | engine_name
282 | device_type
283 | 1
284 |
285 |
286 | 1
287 | char(64)|0
288 | ''
289 |
290 |
291 | 1
292 | char(64)|0
293 | ''
294 |
295 |
296 | 1
297 | longblob|0
298 |
299 |
300 | 1
301 | char(93)|0
302 | ''
303 |
304 |
305 | datetime|0
306 |
307 |
308 | int(11)|0
309 |
310 |
311 | enum('YEAR', 'QUARTER', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'WEEK', 'SECOND', 'MICROSECOND', 'YEAR_MONTH', 'DAY_HOUR', 'DAY_MINUTE', 'DAY_SECOND', 'HOUR_MINUTE', 'HOUR_SECOND', 'MINUTE_SECOND', 'DAY_MICROSECOND', 'HOUR_MICROSECOND', 'MINUTE_MICROSECOND', 'SECOND_MICROSECOND')|0
312 |
313 |
314 | 1
315 | timestamp|0
316 | CURRENT_TIMESTAMP
317 |
318 |
319 | 1
320 | timestamp|0
321 | '0000-00-00 00:00:00'
322 |
323 |
324 | datetime|0
325 |
326 |
327 | datetime|0
328 |
329 |
330 | datetime|0
331 |
332 |
333 | 1
334 | enum('ENABLED', 'DISABLED', 'SLAVESIDE_DISABLED')|0
335 | 'ENABLED'
336 |
337 |
338 | 1
339 | enum('DROP', 'PRESERVE')|0
340 | 'DROP'
341 |
342 |
343 | 1
344 | set('REAL_AS_FLOAT', 'PIPES_AS_CONCAT', 'ANSI_QUOTES', 'IGNORE_SPACE', 'NOT_USED', 'ONLY_FULL_GROUP_BY', 'NO_UNSIGNED_SUBTRACTION', 'NO_DIR_IN_CREATE', 'POSTGRESQL', 'ORACLE', 'MSSQL', 'DB2', 'MAXDB', 'NO_KEY_OPTIONS', 'NO_TABLE_OPTIONS', 'NO_FIELD_OPTIONS', 'MYSQL323', 'MYSQL40', 'ANSI', 'NO_AUTO_VALUE_ON_ZERO', 'NO_BACKSLASH_ESCAPES', 'STRICT_TRANS_TABLES', 'STRICT_ALL_TABLES', 'NO_ZERO_IN_DATE', 'NO_ZERO_DATE', 'INVALID_DATES', 'ERROR_FOR_DIVISION_BY_ZERO', 'TRADITIONAL', 'NO_AUTO_CREATE_USER', 'HIGH_NOT_PRECEDENCE', 'NO_ENGINE_SUBSTITUTION', 'PAD_CHAR_TO_FULL_LENGTH')|0
345 | ''
346 |
347 |
348 | 1
349 | char(64)|0
350 | ''
351 |
352 |
353 | 1
354 | int(10) unsigned|0
355 |
356 |
357 | 1
358 | char(64)|0
359 | 'SYSTEM'
360 |
361 |
362 | char(32)|0
363 |
364 |
365 | char(32)|0
366 |
367 |
368 | char(32)|0
369 |
370 |
371 | longblob|0
372 |
373 |
374 | db
375 | name
376 | 1
377 |
378 |
379 | 1
380 | char(64)|0
381 | ''
382 |
383 |
384 | 1
385 | tinyint(1)|0
386 | '0'
387 |
388 |
389 | 1
390 | char(128)|0
391 | ''
392 |
393 |
394 | 1
395 | enum('function', 'aggregate')|0
396 |
397 |
398 | name
399 | 1
400 |
401 |
402 | 1
403 | timestamp(6)|0
404 | CURRENT_TIMESTAMP(6)
405 |
406 |
407 | 1
408 | mediumtext|0
409 |
410 |
411 | 1
412 | bigint(21) unsigned|0
413 |
414 |
415 | 1
416 | int(10) unsigned|0
417 |
418 |
419 | 1
420 | varchar(64)|0
421 |
422 |
423 | 1
424 | mediumblob|0
425 |
426 |
427 | uuid of the source where the transaction was originally executed.
428 | 1
429 | char(36)|0
430 |
431 |
432 | First number of interval.
433 | 1
434 | bigint(20)|0
435 |
436 |
437 | Last number of interval.
438 | 1
439 | bigint(20)|0
440 |
441 |
442 | source_uuid
443 | interval_start
444 | 1
445 |
446 |
447 | 1
448 | smallint(5) unsigned|0
449 |
450 |
451 | 1
452 | char(64)|0
453 |
454 |
455 | smallint(5) unsigned|0
456 |
457 |
458 | 1
459 | text|0
460 |
461 |
462 | name
463 |
464 | 1
465 |
466 |
467 | help_category_id
468 | 1
469 |
470 |
471 | name
472 | name
473 |
474 |
475 | 1
476 | int(10) unsigned|0
477 |
478 |
479 | 1
480 | char(64)|0
481 |
482 |
483 | name
484 |
485 | 1
486 |
487 |
488 | help_keyword_id
489 | 1
490 |
491 |
492 | name
493 | name
494 |
495 |
496 | 1
497 | int(10) unsigned|0
498 |
499 |
500 | 1
501 | int(10) unsigned|0
502 |
503 |
504 | help_keyword_id
505 | help_topic_id
506 | 1
507 |
508 |
509 | 1
510 | int(10) unsigned|0
511 |
512 |
513 | 1
514 | char(64)|0
515 |
516 |
517 | 1
518 | smallint(5) unsigned|0
519 |
520 |
521 | 1
522 | text|0
523 |
524 |
525 | 1
526 | text|0
527 |
528 |
529 | 1
530 | text|0
531 |
532 |
533 | name
534 |
535 | 1
536 |
537 |
538 | help_topic_id
539 | 1
540 |
541 |
542 | name
543 | name
544 |
545 |
546 | 1
547 | varchar(64)|0
548 |
549 |
550 | 1
551 | varchar(64)|0
552 |
553 |
554 | 1
555 | varchar(64)|0
556 |
557 |
558 | 1
559 | timestamp|0
560 | CURRENT_TIMESTAMP
561 |
562 |
563 | 1
564 | varchar(64)|0
565 |
566 |
567 | 1
568 | bigint(20) unsigned|0
569 |
570 |
571 | bigint(20) unsigned|0
572 |
573 |
574 | 1
575 | varchar(1024)|0
576 |
577 |
578 | database_name
579 | table_name
580 | index_name
581 | stat_name
582 | 1
583 |
584 |
585 | 1
586 | varchar(64)|0
587 |
588 |
589 | 1
590 | varchar(64)|0
591 |
592 |
593 | 1
594 | timestamp|0
595 | CURRENT_TIMESTAMP
596 |
597 |
598 | 1
599 | bigint(20) unsigned|0
600 |
601 |
602 | 1
603 | bigint(20) unsigned|0
604 |
605 |
606 | 1
607 | bigint(20) unsigned|0
608 |
609 |
610 | database_name
611 | table_name
612 | 1
613 |
614 |
615 | 1
616 | bigint(20) unsigned|0
617 |
618 |
619 | 1
620 | varchar(255)|0
621 |
622 |
623 | 1
624 | bigint(20) unsigned|0
625 |
626 |
627 | 1
628 | int(10) unsigned|0
629 |
630 |
631 | 1
632 | int(10) unsigned|0
633 |
634 |
635 | 1
636 | int(10) unsigned|0
637 |
638 |
639 | 1
640 | int(10) unsigned|0
641 |
642 |
643 | 1
644 | int(10) unsigned|0
645 |
646 |
647 | 1
648 | bigint(20) unsigned|0
649 |
650 |
651 | 1
652 | int(10) unsigned|0
653 |
654 |
655 | 1
656 | bigint(20) unsigned|0
657 |
658 |
659 | 1
660 | varchar(255)|0
661 |
662 |
663 | epoch
664 | orig_server_id
665 | orig_epoch
666 | 1
667 |
668 |
669 | 1
670 | varchar(64)|0
671 | ''
672 |
673 |
674 | 1
675 | varchar(128)|0
676 | ''
677 |
678 |
679 | name
680 | 1
681 |
682 |
683 | 1
684 | char(64)|0
685 | ''
686 |
687 |
688 | 1
689 | char(64)|0
690 | ''
691 |
692 |
693 | 1
694 | enum('FUNCTION', 'PROCEDURE')|0
695 |
696 |
697 | 1
698 | char(64)|0
699 | ''
700 |
701 |
702 | 1
703 | enum('SQL')|0
704 | 'SQL'
705 |
706 |
707 | 1
708 | enum('CONTAINS_SQL', 'NO_SQL', 'READS_SQL_DATA', 'MODIFIES_SQL_DATA')|0
709 | 'CONTAINS_SQL'
710 |
711 |
712 | 1
713 | enum('YES', 'NO')|0
714 | 'NO'
715 |
716 |
717 | 1
718 | enum('INVOKER', 'DEFINER')|0
719 | 'DEFINER'
720 |
721 |
722 | 1
723 | blob|0
724 |
725 |
726 | 1
727 | longblob|0
728 |
729 |
730 | 1
731 | longblob|0
732 |
733 |
734 | 1
735 | char(93)|0
736 | ''
737 |
738 |
739 | 1
740 | timestamp|0
741 | CURRENT_TIMESTAMP
742 |
743 |
744 | 1
745 | timestamp|0
746 | '0000-00-00 00:00:00'
747 |
748 |
749 | 1
750 | set('REAL_AS_FLOAT', 'PIPES_AS_CONCAT', 'ANSI_QUOTES', 'IGNORE_SPACE', 'NOT_USED', 'ONLY_FULL_GROUP_BY', 'NO_UNSIGNED_SUBTRACTION', 'NO_DIR_IN_CREATE', 'POSTGRESQL', 'ORACLE', 'MSSQL', 'DB2', 'MAXDB', 'NO_KEY_OPTIONS', 'NO_TABLE_OPTIONS', 'NO_FIELD_OPTIONS', 'MYSQL323', 'MYSQL40', 'ANSI', 'NO_AUTO_VALUE_ON_ZERO', 'NO_BACKSLASH_ESCAPES', 'STRICT_TRANS_TABLES', 'STRICT_ALL_TABLES', 'NO_ZERO_IN_DATE', 'NO_ZERO_DATE', 'INVALID_DATES', 'ERROR_FOR_DIVISION_BY_ZERO', 'TRADITIONAL', 'NO_AUTO_CREATE_USER', 'HIGH_NOT_PRECEDENCE', 'NO_ENGINE_SUBSTITUTION', 'PAD_CHAR_TO_FULL_LENGTH')|0
751 | ''
752 |
753 |
754 | 1
755 | text|0
756 |
757 |
758 | char(32)|0
759 |
760 |
761 | char(32)|0
762 |
763 |
764 | char(32)|0
765 |
766 |
767 | longblob|0
768 |
769 |
770 | db
771 | name
772 | type
773 | 1
774 |
775 |
776 | 1
777 | char(60)|0
778 | ''
779 |
780 |
781 | 1
782 | char(64)|0
783 | ''
784 |
785 |
786 | 1
787 | char(32)|0
788 | ''
789 |
790 |
791 | 1
792 | char(64)|0
793 | ''
794 |
795 |
796 | 1
797 | enum('FUNCTION', 'PROCEDURE')|0
798 |
799 |
800 | 1
801 | char(93)|0
802 | ''
803 |
804 |
805 | 1
806 | set('Execute', 'Alter Routine', 'Grant')|0
807 | ''
808 |
809 |
810 | 1
811 | timestamp|0
812 | CURRENT_TIMESTAMP
813 |
814 |
815 | Grantor
816 |
817 |
818 |
819 | Host
820 | Db
821 | User
822 | Routine_name
823 | Routine_type
824 | 1
825 |
826 |
827 | 1
828 | char(60)|0
829 | ''
830 |
831 |
832 | 1
833 | char(32)|0
834 | ''
835 |
836 |
837 | 1
838 | char(60)|0
839 | ''
840 |
841 |
842 | 1
843 | char(32)|0
844 | ''
845 |
846 |
847 | 1
848 | tinyint(1)|0
849 | '0'
850 |
851 |
852 | 1
853 | char(93)|0
854 | ''
855 |
856 |
857 | 1
858 | timestamp|0
859 | CURRENT_TIMESTAMP
860 |
861 |
862 | Grantor
863 |
864 |
865 |
866 | Host
867 | User
868 | Proxied_host
869 | Proxied_user
870 | 1
871 |
872 |
873 | 1
874 | varchar(64)|0
875 |
876 |
877 | float|0
878 |
879 |
880 | 1
881 | timestamp|0
882 | CURRENT_TIMESTAMP
883 |
884 |
885 | varchar(1024)|0
886 |
887 |
888 | cost_name
889 | 1
890 |
891 |
892 | 1
893 | char(64)|0
894 | ''
895 |
896 |
897 | 1
898 | char(64)|0
899 | ''
900 |
901 |
902 | 1
903 | char(64)|0
904 | ''
905 |
906 |
907 | 1
908 | char(64)|0
909 | ''
910 |
911 |
912 | 1
913 | char(64)|0
914 | ''
915 |
916 |
917 | 1
918 | int(4)|0
919 | '0'
920 |
921 |
922 | 1
923 | char(64)|0
924 | ''
925 |
926 |
927 | 1
928 | char(64)|0
929 | ''
930 |
931 |
932 | 1
933 | char(64)|0
934 | ''
935 |
936 |
937 | Server_name
938 | 1
939 |
940 |
941 | Number of lines in the file.
942 | 1
943 | int(10) unsigned|0
944 |
945 |
946 | The name of the master binary log currently being read from the master.
947 | 1
948 | text|0
949 |
950 |
951 | The master log position of the last read event.
952 | 1
953 | bigint(20) unsigned|0
954 |
955 |
956 | The host name of the master.
957 | char(64)|0
958 |
959 |
960 | The user name used to connect to the master.
961 | text|0
962 |
963 |
964 | The password used to connect to the master.
965 | text|0
966 |
967 |
968 | The network port used to connect to the master.
969 | 1
970 | int(10) unsigned|0
971 |
972 |
973 | The period (in seconds) that the slave will wait before trying to reconnect to the master.
974 | 1
975 | int(10) unsigned|0
976 |
977 |
978 | Indicates whether the server supports SSL connections.
979 | 1
980 | tinyint(1)|0
981 |
982 |
983 | The file used for the Certificate Authority (CA) certificate.
984 | text|0
985 |
986 |
987 | The path to the Certificate Authority (CA) certificates.
988 | text|0
989 |
990 |
991 | The name of the SSL certificate file.
992 | text|0
993 |
994 |
995 | The name of the cipher in use for the SSL connection.
996 | text|0
997 |
998 |
999 | The name of the SSL key file.
1000 | text|0
1001 |
1002 |
1003 | Whether to verify the server certificate.
1004 | 1
1005 | tinyint(1)|0
1006 |
1007 |
1008 | 1
1009 | float|0
1010 |
1011 |
1012 | Displays which interface is employed when connecting to the MySQL server
1013 | text|0
1014 |
1015 |
1016 | The number of server IDs to be ignored, followed by the actual server IDs
1017 | text|0
1018 |
1019 |
1020 | The master server uuid.
1021 | text|0
1022 |
1023 |
1024 | Number of reconnect attempts, to the master, before giving up.
1025 | 1
1026 | bigint(20) unsigned|0
1027 |
1028 |
1029 | The file used for the Certificate Revocation List (CRL)
1030 | text|0
1031 |
1032 |
1033 | The path used for Certificate Revocation List (CRL) files
1034 | text|0
1035 |
1036 |
1037 | Indicates whether GTIDs will be used to retrieve events from the master.
1038 | 1
1039 | tinyint(1)|0
1040 |
1041 |
1042 | The channel on which the slave is connected to a source. Used in Multisource Replication
1043 | 1
1044 | char(64)|0
1045 |
1046 |
1047 | Tls version
1048 | text|0
1049 |
1050 |
1051 | Channel_name
1052 | 1
1053 |
1054 |
1055 | Number of lines in the file or rows in the table. Used to version table definitions.
1056 | 1
1057 | int(10) unsigned|0
1058 |
1059 |
1060 | The name of the current relay log file.
1061 | 1
1062 | text|0
1063 |
1064 |
1065 | The relay log position of the last executed event.
1066 | 1
1067 | bigint(20) unsigned|0
1068 |
1069 |
1070 | The name of the master binary log file from which the events in the relay log file were read.
1071 | 1
1072 | text|0
1073 |
1074 |
1075 | The master log position of the last executed event.
1076 | 1
1077 | bigint(20) unsigned|0
1078 |
1079 |
1080 | The number of seconds that the slave must lag behind the master.
1081 | 1
1082 | int(11)|0
1083 |
1084 |
1085 | 1
1086 | int(10) unsigned|0
1087 |
1088 |
1089 | Internal Id that uniquely identifies this record.
1090 | 1
1091 | int(10) unsigned|0
1092 |
1093 |
1094 | The channel on which the slave is connected to a source. Used in Multisource Replication
1095 | 1
1096 | char(64)|0
1097 |
1098 |
1099 | Channel_name
1100 | 1
1101 |
1102 |
1103 | 1
1104 | int(10) unsigned|0
1105 |
1106 |
1107 | 1
1108 | text|0
1109 |
1110 |
1111 | 1
1112 | bigint(20) unsigned|0
1113 |
1114 |
1115 | 1
1116 | text|0
1117 |
1118 |
1119 | 1
1120 | bigint(20) unsigned|0
1121 |
1122 |
1123 | 1
1124 | text|0
1125 |
1126 |
1127 | 1
1128 | bigint(20) unsigned|0
1129 |
1130 |
1131 | 1
1132 | text|0
1133 |
1134 |
1135 | 1
1136 | bigint(20) unsigned|0
1137 |
1138 |
1139 | 1
1140 | int(10) unsigned|0
1141 |
1142 |
1143 | 1
1144 | int(10) unsigned|0
1145 |
1146 |
1147 | 1
1148 | blob|0
1149 |
1150 |
1151 | The channel on which the slave is connected to a source. Used in Multisource Replication
1152 | 1
1153 | char(64)|0
1154 |
1155 |
1156 | Channel_name
1157 | Id
1158 | 1
1159 |
1160 |
1161 | 1
1162 | timestamp(6)|0
1163 | CURRENT_TIMESTAMP(6)
1164 |
1165 |
1166 | 1
1167 | mediumtext|0
1168 |
1169 |
1170 | 1
1171 | time(6)|0
1172 |
1173 |
1174 | 1
1175 | time(6)|0
1176 |
1177 |
1178 | 1
1179 | int(11)|0
1180 |
1181 |
1182 | 1
1183 | int(11)|0
1184 |
1185 |
1186 | 1
1187 | varchar(512)|0
1188 |
1189 |
1190 | 1
1191 | int(11)|0
1192 |
1193 |
1194 | 1
1195 | int(11)|0
1196 |
1197 |
1198 | 1
1199 | int(10) unsigned|0
1200 |
1201 |
1202 | 1
1203 | mediumblob|0
1204 |
1205 |
1206 | 1
1207 | bigint(21) unsigned|0
1208 |
1209 |
1210 | 1
1211 | char(60)|0
1212 | ''
1213 |
1214 |
1215 | 1
1216 | char(64)|0
1217 | ''
1218 |
1219 |
1220 | 1
1221 | char(32)|0
1222 | ''
1223 |
1224 |
1225 | 1
1226 | char(64)|0
1227 | ''
1228 |
1229 |
1230 | 1
1231 | char(93)|0
1232 | ''
1233 |
1234 |
1235 | 1
1236 | timestamp|0
1237 | CURRENT_TIMESTAMP
1238 |
1239 |
1240 | 1
1241 | set('Select', 'Insert', 'Update', 'Delete', 'Create', 'Drop', 'Grant', 'References', 'Index', 'Alter', 'Create View', 'Show view', 'Trigger')|0
1242 | ''
1243 |
1244 |
1245 | 1
1246 | set('Select', 'Insert', 'Update', 'References')|0
1247 | ''
1248 |
1249 |
1250 | Grantor
1251 |
1252 |
1253 |
1254 | Host
1255 | Db
1256 | User
1257 | Table_name
1258 | 1
1259 |
1260 |
1261 | 1
1262 | int(10) unsigned|0
1263 | 1
1264 |
1265 |
1266 | 1
1267 | enum('Y', 'N')|0
1268 | 'N'
1269 |
1270 |
1271 | Time_zone_id
1272 | 1
1273 |
1274 |
1275 | 1
1276 | bigint(20)|0
1277 |
1278 |
1279 | 1
1280 | int(11)|0
1281 |
1282 |
1283 | Transition_time
1284 | 1
1285 |
1286 |
1287 | 1
1288 | char(64)|0
1289 |
1290 |
1291 | 1
1292 | int(10) unsigned|0
1293 |
1294 |
1295 | Name
1296 | 1
1297 |
1298 |
1299 | 1
1300 | int(10) unsigned|0
1301 |
1302 |
1303 | 1
1304 | bigint(20)|0
1305 |
1306 |
1307 | 1
1308 | int(10) unsigned|0
1309 |
1310 |
1311 | Time_zone_id
1312 | Transition_time
1313 | 1
1314 |
1315 |
1316 | 1
1317 | int(10) unsigned|0
1318 |
1319 |
1320 | 1
1321 | int(10) unsigned|0
1322 |
1323 |
1324 | 1
1325 | int(11)|0
1326 | '0'
1327 |
1328 |
1329 | 1
1330 | tinyint(3) unsigned|0
1331 | '0'
1332 |
1333 |
1334 | 1
1335 | char(8)|0
1336 | ''
1337 |
1338 |
1339 | Time_zone_id
1340 | Transition_type_id
1341 | 1
1342 |
1343 |
1344 | 1
1345 | char(60)|0
1346 | ''
1347 |
1348 |
1349 | 1
1350 | char(32)|0
1351 | ''
1352 |
1353 |
1354 | 1
1355 | enum('N', 'Y')|0
1356 | 'N'
1357 |
1358 |
1359 | 1
1360 | enum('N', 'Y')|0
1361 | 'N'
1362 |
1363 |
1364 | 1
1365 | enum('N', 'Y')|0
1366 | 'N'
1367 |
1368 |
1369 | 1
1370 | enum('N', 'Y')|0
1371 | 'N'
1372 |
1373 |
1374 | 1
1375 | enum('N', 'Y')|0
1376 | 'N'
1377 |
1378 |
1379 | 1
1380 | enum('N', 'Y')|0
1381 | 'N'
1382 |
1383 |
1384 | 1
1385 | enum('N', 'Y')|0
1386 | 'N'
1387 |
1388 |
1389 | 1
1390 | enum('N', 'Y')|0
1391 | 'N'
1392 |
1393 |
1394 | 1
1395 | enum('N', 'Y')|0
1396 | 'N'
1397 |
1398 |
1399 | 1
1400 | enum('N', 'Y')|0
1401 | 'N'
1402 |
1403 |
1404 | 1
1405 | enum('N', 'Y')|0
1406 | 'N'
1407 |
1408 |
1409 | 1
1410 | enum('N', 'Y')|0
1411 | 'N'
1412 |
1413 |
1414 | 1
1415 | enum('N', 'Y')|0
1416 | 'N'
1417 |
1418 |
1419 | 1
1420 | enum('N', 'Y')|0
1421 | 'N'
1422 |
1423 |
1424 | 1
1425 | enum('N', 'Y')|0
1426 | 'N'
1427 |
1428 |
1429 | 1
1430 | enum('N', 'Y')|0
1431 | 'N'
1432 |
1433 |
1434 | 1
1435 | enum('N', 'Y')|0
1436 | 'N'
1437 |
1438 |
1439 | 1
1440 | enum('N', 'Y')|0
1441 | 'N'
1442 |
1443 |
1444 | 1
1445 | enum('N', 'Y')|0
1446 | 'N'
1447 |
1448 |
1449 | 1
1450 | enum('N', 'Y')|0
1451 | 'N'
1452 |
1453 |
1454 | 1
1455 | enum('N', 'Y')|0
1456 | 'N'
1457 |
1458 |
1459 | 1
1460 | enum('N', 'Y')|0
1461 | 'N'
1462 |
1463 |
1464 | 1
1465 | enum('N', 'Y')|0
1466 | 'N'
1467 |
1468 |
1469 | 1
1470 | enum('N', 'Y')|0
1471 | 'N'
1472 |
1473 |
1474 | 1
1475 | enum('N', 'Y')|0
1476 | 'N'
1477 |
1478 |
1479 | 1
1480 | enum('N', 'Y')|0
1481 | 'N'
1482 |
1483 |
1484 | 1
1485 | enum('N', 'Y')|0
1486 | 'N'
1487 |
1488 |
1489 | 1
1490 | enum('N', 'Y')|0
1491 | 'N'
1492 |
1493 |
1494 | 1
1495 | enum('N', 'Y')|0
1496 | 'N'
1497 |
1498 |
1499 | 1
1500 | enum('', 'ANY', 'X509', 'SPECIFIED')|0
1501 | ''
1502 |
1503 |
1504 | 1
1505 | blob|0
1506 |
1507 |
1508 | 1
1509 | blob|0
1510 |
1511 |
1512 | 1
1513 | blob|0
1514 |
1515 |
1516 | 1
1517 | int(11) unsigned|0
1518 | '0'
1519 |
1520 |
1521 | 1
1522 | int(11) unsigned|0
1523 | '0'
1524 |
1525 |
1526 | 1
1527 | int(11) unsigned|0
1528 | '0'
1529 |
1530 |
1531 | 1
1532 | int(11) unsigned|0
1533 | '0'
1534 |
1535 |
1536 | 1
1537 | char(64)|0
1538 | 'mysql_native_password'
1539 |
1540 |
1541 | text|0
1542 |
1543 |
1544 | 1
1545 | enum('N', 'Y')|0
1546 | 'N'
1547 |
1548 |
1549 | timestamp|0
1550 |
1551 |
1552 | smallint(5) unsigned|0
1553 |
1554 |
1555 | 1
1556 | enum('N', 'Y')|0
1557 | 'N'
1558 |
1559 |
1560 | Host
1561 | User
1562 | 1
1563 |
1564 |
1565 |
--------------------------------------------------------------------------------