├── ip代理与定点爬取(重构) ├── target │ ├── classes │ │ ├── db-config.properties │ │ ├── IPModel │ │ │ ├── IPMessage.class │ │ │ └── SerializeUtil.class │ │ ├── database │ │ │ ├── MyRedis.class │ │ │ └── RedisDB.class │ │ ├── htmlparse │ │ │ ├── IPPool.class │ │ │ ├── IPThread.class │ │ │ └── URLFecter.class │ │ ├── ipfilter │ │ │ ├── IPFilter.class │ │ │ └── IPUtils.class │ │ ├── timeutils │ │ │ ├── MyTimer.class │ │ │ └── MyTimeJob.class │ │ └── httpbrowser │ │ │ └── MyHttpResponse.class │ └── test-classes │ │ ├── testRedis.class │ │ └── testTimer.class ├── src │ ├── main │ │ ├── resources │ │ │ └── db-config.properties │ │ └── java │ │ │ ├── .idea │ │ │ ├── copyright │ │ │ │ └── profiles_settings.xml │ │ │ ├── modules.xml │ │ │ ├── compiler.xml │ │ │ ├── misc.xml │ │ │ └── src.iml │ │ │ ├── timeutils │ │ │ ├── MyTimer.java │ │ │ └── MyTimeJob.java │ │ │ ├── htmlparse │ │ │ ├── IPThread.java │ │ │ ├── IPPool.java │ │ │ └── URLFecter.java │ │ │ ├── ipfilter │ │ │ ├── IPFilter.java │ │ │ └── IPUtils.java │ │ │ ├── database │ │ │ ├── MyRedis.java │ │ │ └── RedisDB.java │ │ │ ├── IPModel │ │ │ ├── IPMessage.java │ │ │ └── SerializeUtil.java │ │ │ └── httpbrowser │ │ │ └── MyHttpResponse.java │ └── test │ │ └── java │ │ ├── testTimer.java │ │ └── testRedis.java ├── .idea │ ├── copyright │ │ └── profiles_settings.xml │ ├── kotlinc.xml │ ├── modules.xml │ ├── misc.xml │ ├── libraries │ │ ├── Maven__redis_clients_jedis_2_9_0.xml │ │ └── Maven__org_apache_commons_commons_pool2_2_4_2.xml │ ├── compiler.xml │ ├── dataSources.local.xml │ ├── dataSources.xml │ ├── dataSources │ │ └── c37050e9-c728-46b8-9a05-b8b36e206d80.xml │ └── uiDesigner.xml ├── README.md ├── pom.xml └── ip代理与定点爬取.iml ├── ip代理与定点爬取 ├── .idea │ ├── copyright │ │ └── profiles_settings.xml │ ├── modules.xml │ ├── compiler.xml │ ├── dataSources.local.xml │ ├── dataSources.xml │ ├── misc.xml │ └── dataSources │ │ └── 7bc77221-9c0f-4103-8fcc-36aa3de003b6.xml ├── src │ ├── .idea │ │ ├── copyright │ │ │ └── profiles_settings.xml │ │ ├── modules.xml │ │ ├── compiler.xml │ │ ├── misc.xml │ │ └── src.iml │ ├── ipfilter │ │ ├── IPFilter.java │ │ └── IPUtils.java │ ├── IPModel │ │ ├── IPMessage.java │ │ └── DatabaseMessage.java │ ├── timeutils │ │ ├── TimeUpdate.java │ │ └── MyTimeJob.java │ ├── htmlparse │ │ └── URLFecter.java │ ├── database │ │ └── DataBaseDemo.java │ └── httpbrowser │ │ └── HttpResponseDemo.java └── ip代理与定点爬取.iml └── README.md /ip代理与定点爬取(重构)/target/classes/db-config.properties: -------------------------------------------------------------------------------- 1 | jedis.addr=127.0.0.1 2 | jedis.port=6379 3 | jedis.passwd=6204576387 -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/resources/db-config.properties: -------------------------------------------------------------------------------- 1 | jedis.addr=127.0.0.1 2 | jedis.port=6379 3 | jedis.passwd=******** 4 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/test-classes/testRedis.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/test-classes/testRedis.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/test-classes/testTimer.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/test-classes/testTimer.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/IPModel/IPMessage.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/IPModel/IPMessage.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/database/MyRedis.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/database/MyRedis.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/database/RedisDB.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/database/RedisDB.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/htmlparse/IPPool.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/htmlparse/IPPool.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/ipfilter/IPFilter.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/ipfilter/IPFilter.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/ipfilter/IPUtils.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/ipfilter/IPUtils.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/timeutils/MyTimer.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/timeutils/MyTimer.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/htmlparse/IPThread.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/htmlparse/IPThread.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/htmlparse/URLFecter.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/htmlparse/URLFecter.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/timeutils/MyTimeJob.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/timeutils/MyTimeJob.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/IPModel/SerializeUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/IPModel/SerializeUtil.class -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/target/classes/httpbrowser/MyHttpResponse.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhengyi/ip-proxy-pools-regularly/HEAD/ip代理与定点爬取(重构)/target/classes/httpbrowser/MyHttpResponse.class -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ip-proxy-pools-regularly 2 | 实现定时爬取与IP代理池 3 | 4 | **更详细的README.md请参考:**[ip-proxy-pools-regularly](https://github.com/championheng/ip-proxy-pools-regularly/tree/master/ip%E4%BB%A3%E7%90%86%E4%B8%8E%E5%AE%9A%E7%82%B9%E7%88%AC%E5%8F%96(%E9%87%8D%E6%9E%84)) 5 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/kotlinc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/libraries/Maven__redis_clients_jedis_2_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/timeutils/MyTimer.java: -------------------------------------------------------------------------------- 1 | package timeutils; 2 | 3 | import java.util.Calendar; 4 | import java.util.Date; 5 | import java.util.Timer; 6 | 7 | /** 8 | * Created by paranoid on 17-4-13. 9 | */ 10 | 11 | public class MyTimer { 12 | public static void main(String[] args) { 13 | MyTimeJob job = new MyTimeJob(); 14 | Timer timer = new Timer(); 15 | 16 | Calendar calendar = Calendar.getInstance(); 17 | Date date = calendar.getTime(); 18 | 19 | //设置定时任务,从现在开始,每24小时执行一次 20 | timer.schedule(job, date, 24*60*60*1000); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/libraries/Maven__org_apache_commons_commons_pool2_2_4_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/test/java/testTimer.java: -------------------------------------------------------------------------------- 1 | import java.util.Calendar; 2 | import java.util.Date; 3 | 4 | import static java.lang.System.out; 5 | 6 | /** 7 | * Created by hg_yi on 17-8-9. 8 | */ 9 | public class testTimer { 10 | public static void main(String[] args) { 11 | Calendar calendar = Calendar.getInstance(); 12 | int year = calendar.get(Calendar.YEAR); 13 | int month = calendar.get(Calendar.MONTH); 14 | int day = calendar.get(Calendar.DAY_OF_MONTH); 15 | 16 | //设置任务开始执行时间 17 | calendar.set(year,month,day, 6, 0, 0); 18 | Date date = calendar.getTime(); 19 | 20 | out.println(date); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/htmlparse/IPThread.java: -------------------------------------------------------------------------------- 1 | package htmlparse; 2 | 3 | import java.util.List; 4 | 5 | import static java.lang.System.out; 6 | 7 | /** 8 | * Created by hg_yi on 17-8-8. 9 | */ 10 | public class IPThread extends Thread { 11 | private List urls; 12 | private IPPool ipPool; 13 | 14 | public IPThread(List urls, IPPool ipPool) { 15 | this.urls = urls; 16 | this.ipPool = ipPool; 17 | } 18 | 19 | @Override 20 | public void run() { 21 | //进行ip的抓取 22 | for (String url : urls) { 23 | out.println(Thread.currentThread().getName() + "爬取的地址为:" + url); 24 | } 25 | ipPool.getIP(urls); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/dataSources.local.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | #@ 7 | ` 8 | 9 | 10 | master_key 11 | root 12 | mysql: 13 | 14 | 15 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/dataSources.local.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | #@ 7 | ` 8 | 9 | 10 | master_key 11 | root 12 | *:IPProxyPool 13 | 14 | 15 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/dataSources.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | mysql 6 | true 7 | com.mysql.jdbc.Driver 8 | jdbc:mysql://localhost:3306/mysql 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/dataSources.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | mysql 6 | true 7 | com.mysql.jdbc.Driver 8 | jdbc:mysql://localhost:3306/IPProxyPool 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/test/java/testRedis.java: -------------------------------------------------------------------------------- 1 | import IPModel.IPMessage; 2 | import database.MyRedis; 3 | 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import static java.lang.System.out; 8 | 9 | /** 10 | * Created by hg_yi on 17-8-9. 11 | */ 12 | public class testRedis { 13 | public static void main(String[] args) { 14 | List ipMessages = new ArrayList<>(); 15 | // IPMessage ipMessage = new IPMessage(); 16 | 17 | // ipMessage.setIPAddress("175.172.212.178"); 18 | // ipMessage.setIPPort("80"); 19 | // ipMessage.setIPType("HTTPS"); 20 | // ipMessage.setIPSpeed("3.837秒"); 21 | // 22 | // ipMessages.add(ipMessage); 23 | // 24 | MyRedis redis = new MyRedis(); 25 | // redis.setIPToList(ipMessages); 26 | IPMessage ipMessage = redis.getIPByList(); 27 | 28 | out.println(ipMessage.getIPAddress()); 29 | out.println(ipMessage.getIPPort()); 30 | 31 | redis.close(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/ipfilter/IPFilter.java: -------------------------------------------------------------------------------- 1 | package ipfilter; 2 | 3 | import IPModel.IPMessage; 4 | 5 | import java.util.*; 6 | 7 | import static java.lang.System.out; 8 | 9 | /** 10 | * Created by paranoid on 17-4-14. 11 | * 对得到的IP进行筛选,将IP速度在两秒以内的并且类型是https的留下,其余删除 12 | */ 13 | 14 | public class IPFilter { 15 | //对IP进行过滤 16 | public static List Filter(List ipMessages1) { 17 | List newIPMessages = new ArrayList<>(); 18 | 19 | for (int i = 0; i < ipMessages1.size(); i++) { 20 | String ipType = ipMessages1.get(i).getIPType(); 21 | String ipSpeed = ipMessages1.get(i).getIPSpeed(); 22 | 23 | ipSpeed = ipSpeed.substring(0, ipSpeed.indexOf('秒')); 24 | double Speed = Double.parseDouble(ipSpeed); 25 | 26 | if (ipType.equals("HTTPS") && Speed <= 2.0) { 27 | newIPMessages.add(ipMessages1.get(i)); 28 | } 29 | } 30 | 31 | return newIPMessages; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/README.md: -------------------------------------------------------------------------------- 1 | # IP代理池(ip-proxy-pools) 2 | 平时在写爬虫的时候,最怕的事情就是IP封禁。这是我自制的一个IP代理池,使用Java语言进行编写,结合Redis数据库对代理IP进行存储。被抓取的代理IP来源于[xici代理网](http://www.xicidaili.com/)。 3 | 4 | **对于技术上的实现细节,参考本人所写的博客链接**: 5 | [Java网络爬虫(十一)--重构定时爬取以及IP代理池(多线程+Redis+代码优化)](http://blog.csdn.net/championhengyi/article/details/77053448) 6 | 7 | ## 环境需求 8 | >- JDK 1.8 9 | >- Redis 3.0.6 10 | >- IDEA 11 | >- Maven 12 | 13 | ## 实现架构 14 | ![架构说明](http://on-img.com/chart_image/598c1f86e4b02cf2fc84c11a.png) 15 | 16 | ## 使用说明 17 | 要使用此IP代理池,只能将本项目clone至本地,然后使用IDEA运行源代码。运行结果如下图: 18 | 19 | ![运行结果](http://i4.bvimg.com/633787/dbbaab4034d2b5f5.png) 20 | 21 | 就目前来说,如果想要真正的将此IP代理池运用到其它工程中,还需要对代码做额外的补充,最基本也要考虑使用`通知/等待机制`。 22 | 23 | 对于将此IP代理池如何运用到一个工程中,可以参考:[multithreading-crawlers](https://github.com/championheng/multithreading-crawlers) 24 | 25 | ## TODO 26 | 1. 优化任务分配策略 27 | 2. 对外提供接口与使用文档 28 | 3. 可视化管理... ... 29 | 30 | **注:此IP代理池真正运用在工程中的版本,[multithreading-crawlers](https://github.com/championheng/multithreading-crawlers),可以称为第三版,与此版本差别还是挺大的,对于版本3,我会尽快push到这个仓库中... ...** 31 | 32 | ## 版本说明 33 | ![version 2.0](https://img.shields.io/badge/version-2.0-blue.svg) 34 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | groupId 8 | ip代理与定点爬取 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.maven.plugins 14 | maven-compiler-plugin 15 | 16 | 1.7 17 | 1.7 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | redis.clients 26 | jedis 27 | 2.9.0 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/ipfilter/IPFilter.java: -------------------------------------------------------------------------------- 1 | package ipfilter; 2 | 3 | import IPModel.IPMessage; 4 | 5 | import java.util.*; 6 | 7 | import static java.lang.System.loadLibrary; 8 | import static java.lang.System.out; 9 | 10 | /** 11 | * Created by paranoid on 17-4-14. 12 | * 对于Java已经规定的常用的类如String我们不可能对它进行重新编译,在不能使用Comparable 13 | * 的情况下我们需要自己操作Comparator,重新定义它的compare方法. 14 | * 15 | * String的compareTo方法自动升序排列. 16 | */ 17 | 18 | public class IPFilter { 19 | //对IP进行过滤,选取1000个IP中速度排名前六百的IP(升序),其余的舍弃 20 | public static List Filter(List list) { 21 | List newlist = new ArrayList<>(); 22 | 23 | Collections.sort(list, new Comparator() { 24 | @Override 25 | public int compare(IPMessage o1, IPMessage o2) { 26 | return o1.getIPSpeed().compareTo(o2.getIPSpeed()); 27 | } 28 | }); 29 | 30 | //只返回容器中前100的对象 31 | for(int i = 0; i < list.size(); i++) { 32 | if(i < 100) { 33 | newlist.add(list.get(i)); 34 | }else { 35 | break; 36 | } 37 | } 38 | 39 | return newlist; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/IPModel/IPMessage.java: -------------------------------------------------------------------------------- 1 | package IPModel; 2 | 3 | /** 4 | * Created by paranoid on 17-4-10. 5 | */ 6 | public class IPMessage { 7 | private String IPAddress; 8 | private String IPPort; 9 | private String ServerAddress; 10 | private String IPType; 11 | private String IPSpeed; 12 | 13 | public String getIPAddress() { 14 | return IPAddress; 15 | } 16 | 17 | public void setIPAddress(String IPAddress) { 18 | this.IPAddress = IPAddress; 19 | } 20 | 21 | public String getIPPort() { 22 | return IPPort; 23 | } 24 | 25 | public void setIPPort(String IPPort) { 26 | this.IPPort = IPPort; 27 | } 28 | 29 | public String getServerAddress() { 30 | return ServerAddress; 31 | } 32 | 33 | public void setServerAddress(String serverAddress) { 34 | ServerAddress = serverAddress; 35 | } 36 | 37 | public String getIPType() { 38 | return IPType; 39 | } 40 | 41 | public void setIPType(String IPType) { 42 | this.IPType = IPType; 43 | } 44 | 45 | public String getIPSpeed() { 46 | return IPSpeed; 47 | } 48 | 49 | public void setIPSpeed(String IPSpeed) { 50 | this.IPSpeed = IPSpeed; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/database/MyRedis.java: -------------------------------------------------------------------------------- 1 | package database; 2 | 3 | import IPModel.IPMessage; 4 | import IPModel.SerializeUtil; 5 | import redis.clients.jedis.Jedis; 6 | 7 | import java.util.List; 8 | 9 | import static java.lang.System.out; 10 | 11 | /** 12 | * Created by hg_yi on 17-8-9. 13 | */ 14 | public class MyRedis { 15 | Jedis jedis = RedisDB.getJedis(); 16 | 17 | //将ip信息保存在Redis列表中 18 | public void setIPToList(List ipMessages) { 19 | for (IPMessage ipMessage : ipMessages) { 20 | //首先将ipMessage进行序列化 21 | byte[] bytes = SerializeUtil.serialize(ipMessage); 22 | 23 | jedis.rpush("IPPool".getBytes(), bytes); 24 | } 25 | } 26 | 27 | //将Redis中保存的对象进行反序列化 28 | public IPMessage getIPByList() { 29 | int rand = (int)(Math.random()*jedis.llen("IPPool")); 30 | 31 | Object o = SerializeUtil.unserialize(jedis.lindex("IPPool".getBytes(), 0)); 32 | if (o instanceof IPMessage) { 33 | return (IPMessage)o; 34 | } else { 35 | out.println("不是IPMessage的一个实例~"); 36 | return null; 37 | } 38 | } 39 | 40 | public void deleteKey(String key) { 41 | jedis.del(key); 42 | } 43 | 44 | public void close() { 45 | RedisDB.close(jedis); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/IPModel/DatabaseMessage.java: -------------------------------------------------------------------------------- 1 | package IPModel; 2 | 3 | /** 4 | * Created by paranoid on 17-4-21. 5 | */ 6 | public class DatabaseMessage { 7 | private String id; 8 | private String IPAddress; 9 | private String IPPort; 10 | private String ServerAddress; 11 | private String IPType; 12 | private String IPSpeed; 13 | 14 | public String getId() { 15 | return id; 16 | } 17 | 18 | public void setId(String id) { 19 | this.id = id; 20 | } 21 | 22 | public String getIPAddress() { 23 | return IPAddress; 24 | } 25 | 26 | public void setIPAddress(String IPAddress) { 27 | this.IPAddress = IPAddress; 28 | } 29 | 30 | public String getIPPort() { 31 | return IPPort; 32 | } 33 | 34 | public void setIPPort(String IPPort) { 35 | this.IPPort = IPPort; 36 | } 37 | 38 | public String getServerAddress() { 39 | return ServerAddress; 40 | } 41 | 42 | public void setServerAddress(String serverAddress) { 43 | ServerAddress = serverAddress; 44 | } 45 | 46 | public String getIPType() { 47 | return IPType; 48 | } 49 | 50 | public void setIPType(String IPType) { 51 | this.IPType = IPType; 52 | } 53 | 54 | public String getIPSpeed() { 55 | return IPSpeed; 56 | } 57 | 58 | public void setIPSpeed(String IPSpeed) { 59 | this.IPSpeed = IPSpeed; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/IPModel/IPMessage.java: -------------------------------------------------------------------------------- 1 | package IPModel; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * Created by paranoid on 17-4-10. 7 | * 8 | * 显式地定义serialVersionUID有两种用途: 9 | * 1、在某些场合,希望类的不同版本对序列化兼容,因此需要确保类的不同版本具有相同的serialVersionUID; 10 | * 2、在某些场合,不希望类的不同版本对序列化兼容,因此需要确保类的不同版本具有不同的serialVersionUID。 11 | * 12 | * 具体详情希望大家百度 13 | */ 14 | 15 | //想要将该对象存储倒Redis List中,必须对其实现序列化于反序列化,操作Serializable接口 16 | public class IPMessage implements Serializable { 17 | //关于这个UID希望大家可以下去自己查一查 18 | private static final long serialVersionUID = 1L; 19 | private String IPAddress; 20 | private String IPPort; 21 | private String IPType; 22 | private String IPSpeed; 23 | 24 | public String getIPAddress() { 25 | return IPAddress; 26 | } 27 | 28 | public void setIPAddress(String IPAddress) { 29 | this.IPAddress = IPAddress; 30 | } 31 | 32 | public String getIPPort() { 33 | return IPPort; 34 | } 35 | 36 | public void setIPPort(String IPPort) { 37 | this.IPPort = IPPort; 38 | } 39 | 40 | public String getIPType() { 41 | return IPType; 42 | } 43 | 44 | public void setIPType(String IPType) { 45 | this.IPType = IPType; 46 | } 47 | 48 | public String getIPSpeed() { 49 | return IPSpeed; 50 | } 51 | 52 | public void setIPSpeed(String IPSpeed) { 53 | this.IPSpeed = IPSpeed; 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return IPAddress + ":" + IPPort; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/IPModel/SerializeUtil.java: -------------------------------------------------------------------------------- 1 | package IPModel; 2 | 3 | import java.io.*; 4 | 5 | /** 6 | * Created by hg_yi on 17-8-9. 7 | * 8 | * java.io.ObjectOutputStream代表对象输出流,它的writeObject(Object obj)方法 9 | * 可对参数指定的obj对象进行序列化,把得到的字节序列写到一个目标输出流中。 10 | * 11 | * java.io.ObjectInputStream代表对象输入流,它的readObject()方法一个源输入流中读 12 | * 取字节序列,再把它们反序列化为一个对象,并将其返回。 13 | * 14 | * 对象序列化包括如下步骤: 15 | * 1)创建一个对象输出流,它可以包装一个其他类型的目标输出流,如文件输出流(我这里是字节流); 16 | * 2)通过对象输出流的writeObject()方法写对象。 17 | * 18 | * 对象反序列化的步骤如下: 19 | * 1)创建一个对象输入流,它可以包装一个其他类型的源输入流,如文件输入流(我这里是字节流); 20 | * 2)通过对象输入流的readObject()方法读取对象。 21 | */ 22 | 23 | public class SerializeUtil { 24 | public static byte[] serialize(Object object) { 25 | ObjectOutputStream oos; 26 | ByteArrayOutputStream baos; 27 | 28 | try { 29 | // 序列化 30 | baos = new ByteArrayOutputStream(); 31 | oos = new ObjectOutputStream(baos); 32 | oos.writeObject(object); 33 | 34 | byte[] bytes = baos.toByteArray(); 35 | 36 | return bytes; 37 | } catch (Exception e) { 38 | e.printStackTrace(); 39 | } 40 | return null; 41 | } 42 | 43 | //反序列化 44 | public static Object unserialize(byte[] bytes) { 45 | ByteArrayInputStream bais; 46 | ObjectInputStream ois; 47 | 48 | try { 49 | // 反序列化 50 | bais = new ByteArrayInputStream(bytes); 51 | ois = new ObjectInputStream(bais); 52 | 53 | return ois.readObject(); 54 | } catch (Exception e) { 55 | e.printStackTrace(); 56 | } 57 | 58 | return null; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/timeutils/TimeUpdate.java: -------------------------------------------------------------------------------- 1 | package timeutils; 2 | 3 | import org.quartz.CronTrigger; 4 | import org.quartz.JobDetail; 5 | import org.quartz.Scheduler; 6 | import org.quartz.SchedulerFactory; 7 | import org.quartz.impl.StdSchedulerFactory; 8 | 9 | import java.text.SimpleDateFormat; 10 | import java.util.Date; 11 | 12 | import static org.quartz.CronScheduleBuilder.cronSchedule; 13 | import static org.quartz.JobBuilder.newJob; 14 | import static org.quartz.TriggerBuilder.newTrigger; 15 | 16 | /** 17 | * Created by paranoid on 17-4-13. 18 | */ 19 | 20 | public class TimeUpdate { 21 | public void go() throws Exception { 22 | // 首先,必需要取得一个Scheduler的引用(设置一个工厂) 23 | SchedulerFactory sf = new StdSchedulerFactory(); 24 | 25 | //从工厂里面拿到一个scheduler实例 26 | Scheduler sched = sf.getScheduler(); 27 | 28 | //真正执行的任务并不是Job接口的实例,而是用反射的方式实例化的一个JobDetail实例 29 | JobDetail job = newJob(MyTimeJob.class).withIdentity("job1", "group1").build(); 30 | // 定义一个触发器,job 1将每隔执行一次 31 | CronTrigger trigger = newTrigger().withIdentity("trigger1", "group1"). 32 | withSchedule(cronSchedule("30 04 18 * * ?")).build(); 33 | 34 | //执行任务和触发器 35 | Date ft = sched.scheduleJob(job, trigger); 36 | 37 | //格式化日期显示格式 38 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS"); 39 | System.out.println(job.getKey() + " 已被安排执行于: " + sdf.format(ft) + "," + 40 | "并且以如下重复规则重复执行: " + trigger.getCronExpression()); 41 | 42 | sched.start(); 43 | } 44 | 45 | public static void main(String[] args) throws Exception { 46 | TimeUpdate test = new TimeUpdate(); 47 | test.go(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/dataSources/c37050e9-c728-46b8-9a05-b8b36e206d80.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 1 7 | 1 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 1 19 | int(11)|0 20 | 1 21 | 1 22 | 23 | 24 | 2 25 | char(60)|0 26 | 1 27 | 28 | 29 | 3 30 | int(11)|0 31 | 1 32 | 33 | 34 | 4 35 | char(20)|0 36 | 1 37 | 38 | 39 | 5 40 | int(11)|0 41 | 1 42 | '100' 43 | 44 | 45 | 1 46 | id 47 | 1 48 | 49 | 50 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/database/RedisDB.java: -------------------------------------------------------------------------------- 1 | package database; 2 | 3 | import redis.clients.jedis.Jedis; 4 | import redis.clients.jedis.JedisPool; 5 | import redis.clients.jedis.JedisPoolConfig; 6 | 7 | import java.util.ResourceBundle; 8 | 9 | /** 10 | * Created by paranoid on 17-4-12. 11 | */ 12 | public class RedisDB { 13 | // private static JedisPool jedisPool; 14 | private static String addr; 15 | private static int port; 16 | private static String passwd; 17 | 18 | //加载配置文件 19 | private static ResourceBundle rb = ResourceBundle.getBundle("db-config"); 20 | 21 | //初始化连接 22 | static { 23 | addr = rb.getString("jedis.addr"); 24 | port = Integer.parseInt(rb.getString("jedis.port")); 25 | passwd = rb.getString("jedis.passwd"); 26 | 27 | // try { 28 | // //先进行redis数据的参数配置 29 | // JedisPoolConfig config = new JedisPoolConfig(); 30 | // //链接耗尽时是否阻塞,false时抛出异常,默认是true,阻塞超时之后抛出异常 31 | // config.setBlockWhenExhausted(true); 32 | // //逐出策略类名,当连接超过最大空闲时间或最大空闲数抛出异常 33 | // config.setEvictionPolicyClassName("org.apache.commons.pool2." + 34 | // "impl.DefaultEvictionPolicy"); 35 | // //是否启用pool的jmx管理功能,默认是true 36 | // config.setJmxEnabled(true); 37 | // //最大空闲数,默认为8,一个pool最多有多少空闲的Jedis实例 38 | // config.setMaxIdle(8); 39 | // //最大连接数 40 | // config.setMaxTotal(100); 41 | // //当引入一个Jedis实例时,最大的等待时间,如果超过等待时间,抛出异常 42 | // config.setMaxWaitMillis(1000*10); 43 | // //获得一个jedis实例的时候是否检查连接可用性(ping()) 44 | // config.setTestOnBorrow(true); 45 | // } catch(Exception e) { 46 | // e.printStackTrace(); 47 | // } 48 | } 49 | 50 | //获取Jedis实例 51 | public synchronized static Jedis getJedis() { 52 | //连接本地的 Redis 服务 53 | Jedis jedis = new Jedis(addr, port); 54 | //权限认证 55 | jedis.auth(passwd); 56 | 57 | return jedis; 58 | } 59 | 60 | //释放Jedis资源 61 | public static void close(final Jedis jedis) { 62 | if (jedis != null) { 63 | jedis.close(); 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /ip代理与定点爬取/src/ipfilter/IPUtils.java: -------------------------------------------------------------------------------- 1 | package ipfilter; 2 | 3 | import IPModel.IPMessage; 4 | import org.apache.http.HttpHost; 5 | import org.apache.http.client.config.RequestConfig; 6 | import org.apache.http.client.methods.CloseableHttpResponse; 7 | import org.apache.http.client.methods.HttpGet; 8 | import org.apache.http.impl.client.CloseableHttpClient; 9 | import org.apache.http.impl.client.HttpClients; 10 | 11 | import java.io.IOException; 12 | import java.util.List; 13 | 14 | import static java.lang.System.out; 15 | 16 | /** 17 | * Created by paranoid on 17-4-21. 18 | * 测试此Ip是否有效 19 | */ 20 | 21 | public class IPUtils { 22 | public static List IPIsable(List ipMessages) { 23 | String ip; 24 | String port; 25 | 26 | CloseableHttpClient httpClient = HttpClients.createDefault(); 27 | CloseableHttpResponse response = null; 28 | 29 | for(int i = 0; i < ipMessages.size(); i++) { 30 | ip = ipMessages.get(i).getIPAddress(); 31 | port = ipMessages.get(i).getIPPort(); 32 | 33 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port)); 34 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000). 35 | setSocketTimeout(3000).build(); 36 | HttpGet httpGet = new HttpGet("https://www.baidu.com"); 37 | httpGet.setConfig(config); 38 | 39 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" + 40 | "q=0.9,image/webp,*/*;q=0.8"); 41 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); 42 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8"); 43 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit" + 44 | "/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"); 45 | 46 | try { 47 | response = httpClient.execute(httpGet); 48 | } catch (IOException e) { 49 | out.println("不可用代理已删除" + ipMessages.get(i).getIPAddress() + ": " + ipMessages.get(i).getIPPort()); 50 | ipMessages.remove(ipMessages.get(i)); 51 | i--; 52 | } 53 | } 54 | 55 | try { 56 | httpClient.close(); 57 | response.close(); 58 | } catch (IOException e) { 59 | e.printStackTrace(); 60 | } 61 | 62 | return ipMessages; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/htmlparse/IPPool.java: -------------------------------------------------------------------------------- 1 | package htmlparse; 2 | 3 | import IPModel.IPMessage; 4 | import ipfilter.IPFilter; 5 | import ipfilter.IPUtils; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import static java.lang.System.out; 11 | 12 | /** 13 | * Created by hg_yi on 17-8-3. 14 | */ 15 | public class IPPool { 16 | //成员变量(非线程安全) 17 | private List ipMessages; 18 | 19 | public IPPool(List ipMessages) { 20 | this.ipMessages = ipMessages; 21 | } 22 | 23 | public void getIP(List urls) { 24 | String ipAddress; 25 | String ipPort; 26 | 27 | for (int i = 0; i < urls.size(); i++) { 28 | /** 随机挑选代理IP(仔细想了想,本步骤由于其他线程有可能在位置确定之后对ipMessages数量进行 29 | * 增加,虽说不会改变已经选择的ip代理的位置,但合情合理还是在对共享变量进行读写的时候要保证 30 | * 其原子性,否则极易发生脏读) 31 | */ 32 | //每个线程先将自己抓取下来的ip保存下来并进行过滤与检测 33 | List ipMessages1 = new ArrayList<>(); 34 | String url = urls.get(i); 35 | 36 | synchronized (ipMessages) { 37 | int rand = (int) (Math.random()*ipMessages.size()); 38 | out.println("当前线程 " + Thread.currentThread().getName() + " rand值: " + rand + 39 | " ipMessages 大小: " + ipMessages.size()); 40 | 41 | ipAddress = ipMessages.get(rand).getIPAddress(); 42 | ipPort = ipMessages.get(rand).getIPPort(); 43 | } 44 | 45 | //这里要注意Java中非基本类型的参数传递方式,实际上都是同一个对象 46 | boolean status = URLFecter.urlParse(url, ipAddress, ipPort, ipMessages1); 47 | //如果ip代理池里面的ip不能用,则切换下一个IP对本页进行重新抓取 48 | if (status == false) { 49 | i--; 50 | continue; 51 | } else { 52 | out.println("线程:" + Thread.currentThread().getName() + "已成功抓取 " + 53 | url + " ipMessage1:" + ipMessages1.size()); 54 | } 55 | 56 | //对ip重新进行过滤,只要速度在两秒以内的并且类型为HTTPS的 57 | ipMessages1 = IPFilter.Filter(ipMessages1); 58 | 59 | //对ip进行质量检测,将质量不合格的ip在List里进行删除 60 | IPUtils.IPIsable(ipMessages1); 61 | 62 | //将质量合格的ip合并到共享变量ipMessages中,进行合并的时候保证原子性 63 | synchronized (ipMessages) { 64 | out.println("线程" + Thread.currentThread().getName() + "已进入合并区 " + 65 | "待合并大小 ipMessages1:" + ipMessages1.size()); 66 | ipMessages.addAll(ipMessages1); 67 | } 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/ipfilter/IPUtils.java: -------------------------------------------------------------------------------- 1 | package ipfilter; 2 | 3 | import IPModel.IPMessage; 4 | import org.apache.http.HttpHost; 5 | import org.apache.http.client.config.RequestConfig; 6 | import org.apache.http.client.methods.CloseableHttpResponse; 7 | import org.apache.http.client.methods.HttpGet; 8 | import org.apache.http.impl.client.CloseableHttpClient; 9 | import org.apache.http.impl.client.HttpClients; 10 | 11 | import java.io.IOException; 12 | import java.util.List; 13 | 14 | import static java.lang.System.out; 15 | 16 | /** 17 | * Created by paranoid on 17-4-21. 18 | * 测试此IP是否有效 19 | */ 20 | 21 | public class IPUtils { 22 | public static void IPIsable(List ipMessages1) { 23 | CloseableHttpClient httpClient = HttpClients.createDefault(); 24 | CloseableHttpResponse response = null; 25 | 26 | for(int i = 0; i < ipMessages1.size(); i++) { 27 | String ip = ipMessages1.get(i).getIPAddress(); 28 | String port = ipMessages1.get(i).getIPPort(); 29 | 30 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port)); 31 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(5000). 32 | setSocketTimeout(5000).build(); 33 | HttpGet httpGet = new HttpGet("https://www.baidu.com"); 34 | httpGet.setConfig(config); 35 | 36 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" + 37 | "q=0.9,image/webp,*/*;q=0.8"); 38 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); 39 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8"); 40 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit" + 41 | "/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"); 42 | 43 | try { 44 | response = httpClient.execute(httpGet); 45 | } catch (IOException e) { 46 | out.println("不可用代理已删除" + ipMessages1.get(i).getIPAddress() 47 | + ": " + ipMessages1.get(i).getIPPort()); 48 | ipMessages1.remove(ipMessages1.get(i)); 49 | i--; 50 | } 51 | } 52 | 53 | try { 54 | if (httpClient != null) { 55 | httpClient.close(); 56 | } 57 | if (response != null) { 58 | response.close(); 59 | } 60 | } catch (IOException e) { 61 | e.printStackTrace(); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/.idea/src.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/.idea/src.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/timeutils/MyTimeJob.java: -------------------------------------------------------------------------------- 1 | package timeutils; 2 | 3 | import IPModel.IPMessage; 4 | import database.MyRedis; 5 | import htmlparse.IPPool; 6 | import htmlparse.IPThread; 7 | import htmlparse.URLFecter; 8 | import ipfilter.IPFilter; 9 | import ipfilter.IPUtils; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.TimerTask; 13 | 14 | import static java.lang.System.out; 15 | 16 | /** 17 | * Created by paranoid on 17-4-13. 18 | * 19 | * ip代理池里面最少保存200个代理ip 20 | * 21 | * 多线程主要考虑的就是合理的任务分配以及线程安全性。 22 | * 23 | * implements Job 24 | */ 25 | 26 | public class MyTimeJob extends TimerTask { 27 | MyRedis redis = new MyRedis(); 28 | 29 | @Override 30 | public void run() { 31 | //首先清空redis数据库中的key 32 | redis.deleteKey("IPPool"); 33 | 34 | //存放爬取下来的ip信息 35 | List ipMessages = new ArrayList<>(); 36 | List urls = new ArrayList<>(); 37 | //对创建的子线程进行收集 38 | List threads = new ArrayList<>(); 39 | 40 | //首先使用本机ip爬取xici代理网第一页 41 | ipMessages = URLFecter.urlParse(ipMessages); 42 | 43 | //对得到的IP进行筛选,将IP速度在两秒以内的并且类型是https的留下,其余删除 44 | ipMessages = IPFilter.Filter(ipMessages); 45 | 46 | //对拿到的ip进行质量检测,将质量不合格的ip在List里进行删除 47 | IPUtils.IPIsable(ipMessages); 48 | 49 | //构造种子url(4000条ip) 50 | for (int i = 2; i <= 41; i++) { 51 | urls.add("http://www.xicidaili.com/nn/" + i); 52 | } 53 | 54 | /** 55 | * 对urls进行解析并进行过滤,拿到所有目标IP(使用多线程) 56 | * 57 | * 基本思路是给每个线程分配自己的任务,在这个过程中List ipMessages 58 | * 应该是共享变量,每个线程更新其中数据的时候应该注意线程安全 59 | */ 60 | IPPool ipPool = new IPPool(ipMessages); 61 | for (int i = 0; i < 20; i++) { 62 | //给每个线程进行任务的分配 63 | Thread IPThread = new IPThread(urls.subList(i*2, i*2+2), ipPool); 64 | threads.add(IPThread); 65 | IPThread.start(); 66 | } 67 | 68 | for (Thread thread : threads) { 69 | try { 70 | thread.join(); 71 | } catch (InterruptedException e) { 72 | e.printStackTrace(); 73 | } 74 | } 75 | 76 | for(IPMessage ipMessage : ipMessages){ 77 | out.println(ipMessage.getIPAddress()); 78 | out.println(ipMessage.getIPPort()); 79 | out.println(ipMessage.getIPType()); 80 | out.println(ipMessage.getIPSpeed()); 81 | } 82 | 83 | //将爬取下来的ip信息写进Redis数据库中(List集合) 84 | redis.setIPToList(ipMessages); 85 | 86 | //从redis数据库中随机拿出一个IP 87 | IPMessage ipMessage = redis.getIPByList(); 88 | out.println(ipMessage.getIPAddress()); 89 | out.println(ipMessage.getIPPort()); 90 | redis.close(); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /ip代理与定点爬取/ip代理与定点爬取.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/htmlparse/URLFecter.java: -------------------------------------------------------------------------------- 1 | package htmlparse; 2 | 3 | import IPModel.IPMessage; 4 | import httpbrowser.MyHttpResponse; 5 | import org.jsoup.Jsoup; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.io.IOException; 10 | import java.util.List; 11 | 12 | import static java.lang.System.out; 13 | 14 | /** 15 | * Created by paranoid on 17-4-10. 16 | */ 17 | 18 | public class URLFecter { 19 | //使用代理进行爬取 20 | public static boolean urlParse(String url, String ip, String port, 21 | List ipMessages1) { 22 | //调用一个类使其返回html源码 23 | String html = MyHttpResponse.getHtml(url, ip, port); 24 | 25 | if(html != null) { 26 | //将html解析成DOM结构 27 | Document document = Jsoup.parse(html); 28 | 29 | //提取所需要的数据 30 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr"); 31 | 32 | for (int i = 1; i < trs.size(); i++) { 33 | IPMessage ipMessage = new IPMessage(); 34 | String ipAddress = trs.get(i).select("td").get(1).text(); 35 | String ipPort = trs.get(i).select("td").get(2).text(); 36 | String ipType = trs.get(i).select("td").get(5).text(); 37 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]"). 38 | attr("title"); 39 | 40 | ipMessage.setIPAddress(ipAddress); 41 | ipMessage.setIPPort(ipPort); 42 | ipMessage.setIPType(ipType); 43 | ipMessage.setIPSpeed(ipSpeed); 44 | 45 | 46 | ipMessages1.add(ipMessage); 47 | } 48 | 49 | return true; 50 | } else { 51 | out.println(ip+ ": " + port + " 代理不可用"); 52 | 53 | return false; 54 | } 55 | } 56 | 57 | //使用本机IP爬取xici代理网站的第一页 58 | public static List urlParse(List ipMessages) { 59 | String url = "http://www.xicidaili.com/nn/1"; 60 | String html = MyHttpResponse.getHtml(url); 61 | 62 | //将html解析成DOM结构 63 | Document document = Jsoup.parse(html); 64 | 65 | //提取所需要的数据 66 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr"); 67 | 68 | for (int i = 1; i < trs.size(); i++) { 69 | IPMessage ipMessage = new IPMessage(); 70 | String ipAddress = trs.get(i).select("td").get(1).text(); 71 | String ipPort = trs.get(i).select("td").get(2).text(); 72 | String ipType = trs.get(i).select("td").get(5).text(); 73 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]"). 74 | attr("title"); 75 | 76 | ipMessage.setIPAddress(ipAddress); 77 | ipMessage.setIPPort(ipPort); 78 | ipMessage.setIPType(ipType); 79 | ipMessage.setIPSpeed(ipSpeed); 80 | 81 | ipMessages.add(ipMessage); 82 | } 83 | 84 | return ipMessages; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/htmlparse/URLFecter.java: -------------------------------------------------------------------------------- 1 | package htmlparse; 2 | 3 | import IPModel.IPMessage; 4 | import httpbrowser.HttpResponseDemo; 5 | import org.apache.http.impl.client.CloseableHttpClient; 6 | import org.apache.http.impl.client.HttpClients; 7 | import org.jsoup.Jsoup; 8 | import org.jsoup.nodes.Document; 9 | import org.jsoup.select.Elements; 10 | 11 | import java.io.IOException; 12 | import java.util.List; 13 | 14 | import static java.lang.System.out; 15 | 16 | /** 17 | * Created by paranoid on 17-4-10. 18 | */ 19 | 20 | public class URLFecter { 21 | //使用代理进行爬取 22 | public static List urlParse 23 | (String url, String ip, String port, 24 | List ipMessages) throws ClassNotFoundException, IOException { 25 | //调用一个类使其返回html源码 26 | String html = HttpResponseDemo.getHtml(url, ip, port); 27 | 28 | if(html != null) { 29 | //将html解析成DOM结构 30 | Document document = Jsoup.parse(html); 31 | 32 | //提取所需要的数据 33 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr"); 34 | 35 | for (int i = 1; i < trs.size(); i++) { 36 | IPMessage ipMessage = new IPMessage(); 37 | String ipAddress = trs.get(i).select("td").get(1).text(); 38 | String ipPort = trs.get(i).select("td").get(2).text(); 39 | String serverAddress = trs.get(i).select("td").get(3).text(); 40 | String ipType = trs.get(i).select("td").get(5).text(); 41 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]"). 42 | attr("title"); 43 | 44 | ipMessage.setIPAddress(ipAddress); 45 | ipMessage.setIPPort(ipPort); 46 | ipMessage.setServerAddress(serverAddress); 47 | ipMessage.setIPType(ipType); 48 | ipMessage.setIPSpeed(ipSpeed); 49 | 50 | ipMessages.add(ipMessage); 51 | } 52 | } else { 53 | out.println(ip+ ": " + port + " 代理不可用"); 54 | } 55 | 56 | return ipMessages; 57 | } 58 | 59 | //使用本机IP爬取xici代理网站的第一页 60 | public static List urlParse(String url, List list) 61 | throws IOException, ClassNotFoundException { 62 | String html = HttpResponseDemo.getHtml(url); 63 | 64 | //将html解析成DOM结构 65 | Document document = Jsoup.parse(html); 66 | 67 | //提取所需要的数据 68 | Elements trs = document.select("table[id=ip_list]").select("tbody").select("tr"); 69 | 70 | for (int i = 1; i < trs.size(); i++) { 71 | IPMessage ipMessage = new IPMessage(); 72 | String ipAddress = trs.get(i).select("td").get(1).text(); 73 | String ipPort = trs.get(i).select("td").get(2).text(); 74 | String serverAddress = trs.get(i).select("td").get(3).text(); 75 | String ipType = trs.get(i).select("td").get(5).text(); 76 | String ipSpeed = trs.get(i).select("td").get(6).select("div[class=bar]"). 77 | attr("title"); 78 | 79 | ipMessage.setIPAddress(ipAddress); 80 | ipMessage.setIPPort(ipPort); 81 | ipMessage.setServerAddress(serverAddress); 82 | ipMessage.setIPType(ipType); 83 | ipMessage.setIPSpeed(ipSpeed); 84 | 85 | list.add(ipMessage); 86 | } 87 | 88 | return list; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/ip代理与定点爬取.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/timeutils/MyTimeJob.java: -------------------------------------------------------------------------------- 1 | package timeutils; 2 | 3 | import IPModel.DatabaseMessage; 4 | import IPModel.IPMessage; 5 | import database.DataBaseDemo; 6 | import htmlparse.URLFecter; 7 | import ipfilter.IPFilter; 8 | import ipfilter.IPUtils; 9 | import org.quartz.Job; 10 | import org.quartz.JobExecutionContext; 11 | import org.quartz.JobExecutionException; 12 | 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | 17 | import static java.lang.System.out; 18 | 19 | /** 20 | * Created by paranoid on 17-4-13. 21 | */ 22 | 23 | public class MyTimeJob implements Job { 24 | public void execute(JobExecutionContext argv) throws JobExecutionException { 25 | List Urls = new ArrayList<>(); 26 | List databaseMessages = new ArrayList<>(); 27 | List list = new ArrayList<>(); 28 | List ipMessages = new ArrayList<>(); 29 | String url = "http://www.xicidaili.com/nn/1"; 30 | String IPAddress; 31 | String IPPort; 32 | int k, j; 33 | 34 | //首先使用本机ip进行爬取 35 | try { 36 | list = URLFecter.urlParse(url, list); 37 | } catch (IOException e) { 38 | e.printStackTrace(); 39 | } catch (ClassNotFoundException e) { 40 | e.printStackTrace(); 41 | } 42 | 43 | //对得到的IP进行筛选,选取链接速度前100名的 44 | list = IPFilter.Filter(list); 45 | 46 | //构造种子Url 47 | for (int i = 1; i <= 5; i++) { 48 | Urls.add("http://www.xicidaili.com/nn/" + i); 49 | } 50 | 51 | //得到所需要的数据 52 | for (k = 0, j = 0; j < Urls.size(); k++) { 53 | url = Urls.get(j); 54 | 55 | IPAddress = list.get(k).getIPAddress(); 56 | IPPort = list.get(k).getIPPort(); 57 | //每次爬取前的大小 58 | int preIPMessSize = ipMessages.size(); 59 | try { 60 | ipMessages = URLFecter.urlParse(url, IPAddress, IPPort, ipMessages); 61 | //每次爬取后的大小 62 | int lastIPMessSize = ipMessages.size(); 63 | if(preIPMessSize != lastIPMessSize){ 64 | j++; 65 | } 66 | 67 | //对IP进行轮寻调用 68 | if (k >= list.size()) { 69 | k = 0; 70 | } 71 | } catch (ClassNotFoundException e) { 72 | e.printStackTrace(); 73 | } catch (IOException e) { 74 | e.printStackTrace(); 75 | } 76 | } 77 | 78 | //对得到的IP进行筛选,选取链接速度前100名的 79 | ipMessages = IPFilter.Filter(ipMessages); 80 | 81 | //对ip进行测试,不可用的从数组中删除 82 | ipMessages = IPUtils.IPIsable(ipMessages); 83 | 84 | for(IPMessage ipMessage : ipMessages){ 85 | out.println(ipMessage.getIPAddress()); 86 | out.println(ipMessage.getIPPort()); 87 | out.println(ipMessage.getServerAddress()); 88 | out.println(ipMessage.getIPType()); 89 | out.println(ipMessage.getIPSpeed()); 90 | } 91 | 92 | //将得到的IP存储在数据库中(每次先清空数据库) 93 | try { 94 | DataBaseDemo.delete(); 95 | DataBaseDemo.add(ipMessages); 96 | } catch (ClassNotFoundException e) { 97 | e.printStackTrace(); 98 | } 99 | 100 | //从数据库中将IP取到 101 | try { 102 | databaseMessages = DataBaseDemo.query(); 103 | } catch (ClassNotFoundException e) { 104 | e.printStackTrace(); 105 | } 106 | 107 | for (DatabaseMessage databaseMessage: databaseMessages) { 108 | out.println(databaseMessage.getId()); 109 | out.println(databaseMessage.getIPAddress()); 110 | out.println(databaseMessage.getIPPort()); 111 | out.println(databaseMessage.getServerAddress()); 112 | out.println(databaseMessage.getIPType()); 113 | out.println(databaseMessage.getIPSpeed()); 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/database/DataBaseDemo.java: -------------------------------------------------------------------------------- 1 | package database; 2 | 3 | import IPModel.DatabaseMessage; 4 | import IPModel.IPMessage; 5 | 6 | import java.sql.*; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | /** 11 | * Created by paranoid on 17-4-12. 12 | */ 13 | public class DataBaseDemo { 14 | private static String driver = "com.mysql.jdbc.Driver"; //数据库驱动 15 | private static String dbURL = "jdbc:mysql://127.0.0.1:3306/IPProxy" + 16 | "?characterEncoding=utf8&useSSL=true"; //操作的数据库地址,端口及库名 17 | private static String user = "**********"; //数据库用户名 18 | private static String password = "********"; //数据库密码 19 | 20 | //数据库添加功能 21 | public static void add(List list) throws ClassNotFoundException { 22 | Class.forName(driver); //加载数据库驱动 23 | 24 | try(Connection conn = DriverManager.getConnection(dbURL, user, password); 25 | PreparedStatement statement = conn.prepareStatement("INSERT INTO " + 26 | "ProxyPool (IPAddress, IPPort, serverAddress, IPType, IPSpeed)" + 27 | " VALUES (?, ?, ?, ?, ?)")) { 28 | 29 | for(IPMessage ipMessage : list) { 30 | statement.setString(1, ipMessage.getIPAddress()); 31 | statement.setString(2, ipMessage.getIPPort()); 32 | statement.setString(3, ipMessage.getServerAddress()); 33 | statement.setString(4, ipMessage.getIPType()); 34 | statement.setString(5, ipMessage.getIPSpeed()); 35 | 36 | statement.executeUpdate(); 37 | } 38 | 39 | statement.close(); 40 | conn.close(); 41 | } catch (SQLException e) { 42 | e.printStackTrace(); 43 | } 44 | } 45 | 46 | //删除数据库指定IP 47 | public static void deleteIP(int IPid) { 48 | String sql = "DELETE FROM ProxyPool WHERE id = " + IPid; 49 | try(Connection conn = DriverManager.getConnection(dbURL, user, password); 50 | Statement statement = conn.createStatement()) { 51 | statement.executeUpdate(sql); 52 | 53 | statement.close(); 54 | conn.close(); 55 | } catch (SQLException e) { 56 | e.printStackTrace(); 57 | } 58 | } 59 | 60 | //数据库表清除功能(id也一并清除) 61 | public static void delete() { 62 | try(Connection conn = DriverManager.getConnection(dbURL, user, password); 63 | Statement statement = conn.createStatement()) { 64 | statement.executeUpdate("TRUNCATE TABLE ProxyPool"); 65 | 66 | statement.close(); 67 | conn.close(); 68 | } 69 | catch(SQLException e){ 70 | e.printStackTrace(); 71 | } 72 | } 73 | 74 | //数据库查找功能 75 | public static List query() throws ClassNotFoundException { 76 | Class.forName(driver); //加载数据库驱动 77 | List list = new ArrayList<>(); 78 | 79 | try(Connection conn = DriverManager.getConnection(dbURL, user, password); 80 | Statement statement = conn.createStatement()) { 81 | ResultSet resultSet = statement.executeQuery("SELECT * FROM ProxyPool"); 82 | 83 | while(resultSet.next()){ 84 | DatabaseMessage databaseMessage = new DatabaseMessage(); 85 | 86 | databaseMessage.setId(resultSet.getString(1)); 87 | databaseMessage.setIPAddress(resultSet.getString(2)); 88 | databaseMessage.setIPPort(resultSet.getString(3)); 89 | databaseMessage.setServerAddress(resultSet.getString(4)); 90 | databaseMessage.setIPType(resultSet.getString(5)); 91 | databaseMessage.setIPSpeed(resultSet.getString(6)); 92 | 93 | list.add(databaseMessage); 94 | } 95 | 96 | resultSet.close(); 97 | statement.close(); 98 | conn.close(); 99 | } catch (SQLException e) { 100 | e.printStackTrace(); 101 | } 102 | 103 | return list; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/src/main/java/httpbrowser/MyHttpResponse.java: -------------------------------------------------------------------------------- 1 | package httpbrowser; 2 | 3 | import org.apache.http.HttpHost; 4 | import org.apache.http.client.ClientProtocolException; 5 | import org.apache.http.client.config.RequestConfig; 6 | import org.apache.http.client.methods.CloseableHttpResponse; 7 | import org.apache.http.client.methods.HttpGet; 8 | import org.apache.http.impl.client.CloseableHttpClient; 9 | import org.apache.http.impl.client.HttpClients; 10 | import org.apache.http.util.EntityUtils; 11 | 12 | import java.io.IOException; 13 | 14 | import static java.lang.System.out; 15 | 16 | /** 17 | * Created by paranoid on 17-4-10. 18 | * 进行代理访问 19 | * 20 | * setConnectTimeout:设置连接超时时间,单位毫秒. 21 | * setConnectionRequestTimeout:设置从connect Manager获取Connection 超时时间,单位毫秒. 22 | * 这个属性是新加的属性,因为目前版本是可以共享连接池的. 23 | * setSocketTimeout:请求获取数据的超时时间,单位毫秒.如果访问一个接口,多少时间内无法返回数据, 24 | * 就直接放弃此次调用。 25 | */ 26 | 27 | public class MyHttpResponse { 28 | public static String getHtml( String url, String ip, String port) { 29 | String entity = null; 30 | CloseableHttpClient httpClient = HttpClients.createDefault(); 31 | 32 | //设置代理访问和超时处理 33 | out.println("此时线程: " + Thread.currentThread().getName() + " 爬取所使用的代理为: " 34 | + ip + ":" + port); 35 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port)); 36 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000). 37 | setSocketTimeout(3000).build(); 38 | HttpGet httpGet = new HttpGet(url); 39 | httpGet.setConfig(config); 40 | 41 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" + 42 | "q=0.9,image/webp,*/*;q=0.8"); 43 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); 44 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8"); 45 | httpGet.setHeader("Cache-Control", "no-cache"); 46 | httpGet.setHeader("Connection", "keep-alive"); 47 | httpGet.setHeader("Host", "www.xicidaili.com"); 48 | httpGet.setHeader("Pragma", "no-cache"); 49 | httpGet.setHeader("Upgrade-Insecure-Requests", "1"); 50 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + 51 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"); 52 | 53 | try { 54 | //客户端执行httpGet方法,返回响应 55 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet); 56 | 57 | //得到服务响应状态码 58 | if (httpResponse.getStatusLine().getStatusCode() == 200) { 59 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8"); 60 | } 61 | 62 | httpResponse.close(); 63 | httpClient.close(); 64 | } catch (ClientProtocolException e) { 65 | entity = null; 66 | } catch (IOException e) { 67 | entity = null; 68 | } 69 | 70 | return entity; 71 | } 72 | 73 | //对上一个方法的重载,使用本机ip进行网站爬取 74 | public static String getHtml(String url) { 75 | String entity = null; 76 | CloseableHttpClient httpClient = HttpClients.createDefault(); 77 | 78 | //设置超时处理 79 | RequestConfig config = RequestConfig.custom().setConnectTimeout(3000). 80 | setSocketTimeout(3000).build(); 81 | HttpGet httpGet = new HttpGet(url); 82 | httpGet.setConfig(config); 83 | 84 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" + 85 | "q=0.9,image/webp,*/*;q=0.8"); 86 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); 87 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8"); 88 | httpGet.setHeader("Cache-Control", "no-cache"); 89 | httpGet.setHeader("Connection", "keep-alive"); 90 | httpGet.setHeader("Host", "www.xicidaili.com"); 91 | httpGet.setHeader("Pragma", "no-cache"); 92 | httpGet.setHeader("Upgrade-Insecure-Requests", "1"); 93 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + 94 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"); 95 | 96 | try { 97 | //客户端执行httpGet方法,返回响应 98 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet); 99 | 100 | //得到服务响应状态码 101 | if (httpResponse.getStatusLine().getStatusCode() == 200) { 102 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8"); 103 | } 104 | 105 | httpResponse.close(); 106 | httpClient.close(); 107 | } catch (ClientProtocolException e) { 108 | e.printStackTrace(); 109 | } catch (IOException e) { 110 | e.printStackTrace(); 111 | } 112 | 113 | return entity; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /ip代理与定点爬取/src/httpbrowser/HttpResponseDemo.java: -------------------------------------------------------------------------------- 1 | package httpbrowser; 2 | 3 | import org.apache.http.HttpHost; 4 | import org.apache.http.client.ClientProtocolException; 5 | import org.apache.http.client.config.RequestConfig; 6 | import org.apache.http.client.methods.CloseableHttpResponse; 7 | import org.apache.http.client.methods.HttpGet; 8 | import org.apache.http.impl.client.CloseableHttpClient; 9 | import org.apache.http.impl.client.HttpClients; 10 | import org.apache.http.util.EntityUtils; 11 | 12 | import java.io.IOException; 13 | 14 | /** 15 | * Created by paranoid on 17-4-10. 16 | * 进行代理访问 17 | * 18 | * setConnectTimeout:设置连接超时时间,单位毫秒. 19 | * setConnectionRequestTimeout:设置从connect Manager获取Connection 超时时间,单位毫秒. 20 | * 这个属性是新加的属性,因为目前版本是可以共享连接池的. 21 | * setSocketTimeout:请求获取数据的超时时间,单位毫秒.如果访问一个接口,多少时间内无法返回数据,就直接放弃此次调用。 22 | */ 23 | 24 | public class HttpResponseDemo { 25 | public static String getHtml( String url, String ip, String port) { 26 | String entity = null; 27 | CloseableHttpClient httpClient = HttpClients.createDefault(); 28 | 29 | //设置代理访问和超时处理 30 | HttpHost proxy = new HttpHost(ip, Integer.parseInt(port)); 31 | RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000). 32 | setSocketTimeout(3000).build(); 33 | HttpGet httpGet = new HttpGet(url); 34 | httpGet.setConfig(config); 35 | 36 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" + 37 | "q=0.9,image/webp,*/*;q=0.8"); 38 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); 39 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8"); 40 | httpGet.setHeader("Cache-Control", "no-cache"); 41 | httpGet.setHeader("Connection", "keep-alive"); 42 | httpGet.setHeader("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTRkYjMyM" + 43 | "TU3NGRjMWVhM2JlMDA5Y2IyNzZlZmVlZTYwBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUhtT0pjcnRT" + 44 | "bm9CZEllSXNTYkNZZWk2Nnp3NGNDcFFSQVFodzk1dmpLZWM9BjsARg%3D%3D--09d8736fbfb9a8544" + 45 | "b46eef48bb320c2b40ee721; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1492128157,149" + 46 | "2160558,1492347839,1492764281; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1492764295"); 47 | httpGet.setHeader("Host", "www.xicidaili.com"); 48 | httpGet.setHeader("Pragma", "no-cache"); 49 | httpGet.setHeader("Upgrade-Insecure-Requests", "1"); 50 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + 51 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"); 52 | 53 | try { 54 | //客户端执行httpGet方法,返回响应 55 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet); 56 | 57 | //得到服务响应状态码 58 | if (httpResponse.getStatusLine().getStatusCode() == 200) { 59 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8"); 60 | } 61 | 62 | httpResponse.close(); 63 | httpClient.close(); 64 | } catch (ClientProtocolException e) { 65 | entity = null; 66 | } catch (IOException e) { 67 | entity = null; 68 | } 69 | 70 | return entity; 71 | } 72 | 73 | //对上一个方法的重载,使用本机ip进行网站爬取 74 | public static String getHtml(String url) throws ClassNotFoundException, 75 | IOException { 76 | String entity = null; 77 | CloseableHttpClient httpClient = HttpClients.createDefault(); 78 | 79 | //设置超时处理 80 | RequestConfig config = RequestConfig.custom().setConnectTimeout(5000). 81 | setSocketTimeout(5000).build(); 82 | HttpGet httpGet = new HttpGet(url); 83 | httpGet.setConfig(config); 84 | 85 | httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" + 86 | "q=0.9,image/webp,*/*;q=0.8"); 87 | httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); 88 | httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8"); 89 | httpGet.setHeader("Cache-Control", "no-cache"); 90 | httpGet.setHeader("Connection", "keep-alive"); 91 | httpGet.setHeader("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTRkYjMyM" + 92 | "TU3NGRjMWVhM2JlMDA5Y2IyNzZlZmVlZTYwBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUhtT0pjcnRT" + 93 | "bm9CZEllSXNTYkNZZWk2Nnp3NGNDcFFSQVFodzk1dmpLZWM9BjsARg%3D%3D--09d8736fbfb9a8544" + 94 | "b46eef48bb320c2b40ee721; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1492128157,149" + 95 | "2160558,1492347839,1492764281; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1492764295"); 96 | httpGet.setHeader("Host", "www.xicidaili.com"); 97 | httpGet.setHeader("Pragma", "no-cache"); 98 | httpGet.setHeader("Upgrade-Insecure-Requests", "1"); 99 | httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + 100 | "(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"); 101 | 102 | try { 103 | //客户端执行httpGet方法,返回响应 104 | CloseableHttpResponse httpResponse = httpClient.execute(httpGet); 105 | 106 | //得到服务响应状态码 107 | if (httpResponse.getStatusLine().getStatusCode() == 200) { 108 | entity = EntityUtils.toString(httpResponse.getEntity(), "utf-8"); 109 | } 110 | 111 | httpResponse.close(); 112 | httpClient.close(); 113 | } catch (ClientProtocolException e) { 114 | e.printStackTrace(); 115 | } 116 | 117 | return entity; 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /ip代理与定点爬取(重构)/.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /ip代理与定点爬取/.idea/dataSources/7bc77221-9c0f-4103-8fcc-36aa3de003b6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 1 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | Column privileges 14 |
15 | 16 | Database privileges 17 |
18 | 19 |
20 | Events 21 |
22 | 23 | User defined functions 24 |
25 | 26 | General log 27 |
28 | 29 |
30 | help categories 31 |
32 | 33 | help keywords 34 |
35 | 36 | keyword-topic relation 37 |
38 | 39 | help topics 40 |
41 | 42 |
43 |
44 |
45 | MySQL plugins 46 |
47 | 48 | Stored Procedures 49 |
50 | 51 | Procedure privileges 52 |
53 | 54 | User proxy privileges 55 |
56 | 57 |
58 | MySQL Foreign Servers table 59 |
60 | 61 | Master Information 62 |
63 | 64 | Relay Log Information 65 |
66 | 67 | Worker Information 68 |
69 | 70 | Slow log 71 |
72 | 73 | Table privileges 74 |
75 | 76 | Time zones 77 |
78 | 79 | Leap seconds information for time zones 80 |
81 | 82 | Time zone names 83 |
84 | 85 | Time zone transitions 86 |
87 | 88 | Time zone transition types 89 |
90 | 91 | Users and global privileges 92 |
93 | 94 | 1 95 | char(60)|0 96 | '' 97 | 98 | 99 | 1 100 | char(64)|0 101 | '' 102 | 103 | 104 | 1 105 | char(32)|0 106 | '' 107 | 108 | 109 | 1 110 | char(64)|0 111 | '' 112 | 113 | 114 | 1 115 | char(64)|0 116 | '' 117 | 118 | 119 | 1 120 | timestamp|0 121 | CURRENT_TIMESTAMP 122 | 123 | 124 | 1 125 | set('Select', 'Insert', 'Update', 'References')|0 126 | '' 127 | 128 | 129 | Host 130 | Db 131 | User 132 | Table_name 133 | Column_name 134 | 1 135 | 136 | 137 | 1 138 | char(60)|0 139 | '' 140 | 141 | 142 | 1 143 | char(64)|0 144 | '' 145 | 146 | 147 | 1 148 | char(32)|0 149 | '' 150 | 151 | 152 | 1 153 | enum('N', 'Y')|0 154 | 'N' 155 | 156 | 157 | 1 158 | enum('N', 'Y')|0 159 | 'N' 160 | 161 | 162 | 1 163 | enum('N', 'Y')|0 164 | 'N' 165 | 166 | 167 | 1 168 | enum('N', 'Y')|0 169 | 'N' 170 | 171 | 172 | 1 173 | enum('N', 'Y')|0 174 | 'N' 175 | 176 | 177 | 1 178 | enum('N', 'Y')|0 179 | 'N' 180 | 181 | 182 | 1 183 | enum('N', 'Y')|0 184 | 'N' 185 | 186 | 187 | 1 188 | enum('N', 'Y')|0 189 | 'N' 190 | 191 | 192 | 1 193 | enum('N', 'Y')|0 194 | 'N' 195 | 196 | 197 | 1 198 | enum('N', 'Y')|0 199 | 'N' 200 | 201 | 202 | 1 203 | enum('N', 'Y')|0 204 | 'N' 205 | 206 | 207 | 1 208 | enum('N', 'Y')|0 209 | 'N' 210 | 211 | 212 | 1 213 | enum('N', 'Y')|0 214 | 'N' 215 | 216 | 217 | 1 218 | enum('N', 'Y')|0 219 | 'N' 220 | 221 | 222 | 1 223 | enum('N', 'Y')|0 224 | 'N' 225 | 226 | 227 | 1 228 | enum('N', 'Y')|0 229 | 'N' 230 | 231 | 232 | 1 233 | enum('N', 'Y')|0 234 | 'N' 235 | 236 | 237 | 1 238 | enum('N', 'Y')|0 239 | 'N' 240 | 241 | 242 | 1 243 | enum('N', 'Y')|0 244 | 'N' 245 | 246 | 247 | User 248 | 249 | 250 | 251 | Host 252 | Db 253 | User 254 | 1 255 | 256 | 257 | 1 258 | varchar(64)|0 259 | 260 | 261 | 1 262 | int(11)|0 263 | 264 | 265 | 1 266 | varchar(64)|0 267 | 268 | 269 | float|0 270 | 271 | 272 | 1 273 | timestamp|0 274 | CURRENT_TIMESTAMP 275 | 276 | 277 | varchar(1024)|0 278 | 279 | 280 | cost_name 281 | engine_name 282 | device_type 283 | 1 284 | 285 | 286 | 1 287 | char(64)|0 288 | '' 289 | 290 | 291 | 1 292 | char(64)|0 293 | '' 294 | 295 | 296 | 1 297 | longblob|0 298 | 299 | 300 | 1 301 | char(93)|0 302 | '' 303 | 304 | 305 | datetime|0 306 | 307 | 308 | int(11)|0 309 | 310 | 311 | enum('YEAR', 'QUARTER', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'WEEK', 'SECOND', 'MICROSECOND', 'YEAR_MONTH', 'DAY_HOUR', 'DAY_MINUTE', 'DAY_SECOND', 'HOUR_MINUTE', 'HOUR_SECOND', 'MINUTE_SECOND', 'DAY_MICROSECOND', 'HOUR_MICROSECOND', 'MINUTE_MICROSECOND', 'SECOND_MICROSECOND')|0 312 | 313 | 314 | 1 315 | timestamp|0 316 | CURRENT_TIMESTAMP 317 | 318 | 319 | 1 320 | timestamp|0 321 | '0000-00-00 00:00:00' 322 | 323 | 324 | datetime|0 325 | 326 | 327 | datetime|0 328 | 329 | 330 | datetime|0 331 | 332 | 333 | 1 334 | enum('ENABLED', 'DISABLED', 'SLAVESIDE_DISABLED')|0 335 | 'ENABLED' 336 | 337 | 338 | 1 339 | enum('DROP', 'PRESERVE')|0 340 | 'DROP' 341 | 342 | 343 | 1 344 | set('REAL_AS_FLOAT', 'PIPES_AS_CONCAT', 'ANSI_QUOTES', 'IGNORE_SPACE', 'NOT_USED', 'ONLY_FULL_GROUP_BY', 'NO_UNSIGNED_SUBTRACTION', 'NO_DIR_IN_CREATE', 'POSTGRESQL', 'ORACLE', 'MSSQL', 'DB2', 'MAXDB', 'NO_KEY_OPTIONS', 'NO_TABLE_OPTIONS', 'NO_FIELD_OPTIONS', 'MYSQL323', 'MYSQL40', 'ANSI', 'NO_AUTO_VALUE_ON_ZERO', 'NO_BACKSLASH_ESCAPES', 'STRICT_TRANS_TABLES', 'STRICT_ALL_TABLES', 'NO_ZERO_IN_DATE', 'NO_ZERO_DATE', 'INVALID_DATES', 'ERROR_FOR_DIVISION_BY_ZERO', 'TRADITIONAL', 'NO_AUTO_CREATE_USER', 'HIGH_NOT_PRECEDENCE', 'NO_ENGINE_SUBSTITUTION', 'PAD_CHAR_TO_FULL_LENGTH')|0 345 | '' 346 | 347 | 348 | 1 349 | char(64)|0 350 | '' 351 | 352 | 353 | 1 354 | int(10) unsigned|0 355 | 356 | 357 | 1 358 | char(64)|0 359 | 'SYSTEM' 360 | 361 | 362 | char(32)|0 363 | 364 | 365 | char(32)|0 366 | 367 | 368 | char(32)|0 369 | 370 | 371 | longblob|0 372 | 373 | 374 | db 375 | name 376 | 1 377 | 378 | 379 | 1 380 | char(64)|0 381 | '' 382 | 383 | 384 | 1 385 | tinyint(1)|0 386 | '0' 387 | 388 | 389 | 1 390 | char(128)|0 391 | '' 392 | 393 | 394 | 1 395 | enum('function', 'aggregate')|0 396 | 397 | 398 | name 399 | 1 400 | 401 | 402 | 1 403 | timestamp(6)|0 404 | CURRENT_TIMESTAMP(6) 405 | 406 | 407 | 1 408 | mediumtext|0 409 | 410 | 411 | 1 412 | bigint(21) unsigned|0 413 | 414 | 415 | 1 416 | int(10) unsigned|0 417 | 418 | 419 | 1 420 | varchar(64)|0 421 | 422 | 423 | 1 424 | mediumblob|0 425 | 426 | 427 | uuid of the source where the transaction was originally executed. 428 | 1 429 | char(36)|0 430 | 431 | 432 | First number of interval. 433 | 1 434 | bigint(20)|0 435 | 436 | 437 | Last number of interval. 438 | 1 439 | bigint(20)|0 440 | 441 | 442 | source_uuid 443 | interval_start 444 | 1 445 | 446 | 447 | 1 448 | smallint(5) unsigned|0 449 | 450 | 451 | 1 452 | char(64)|0 453 | 454 | 455 | smallint(5) unsigned|0 456 | 457 | 458 | 1 459 | text|0 460 | 461 | 462 | name 463 | 464 | 1 465 | 466 | 467 | help_category_id 468 | 1 469 | 470 | 471 | name 472 | name 473 | 474 | 475 | 1 476 | int(10) unsigned|0 477 | 478 | 479 | 1 480 | char(64)|0 481 | 482 | 483 | name 484 | 485 | 1 486 | 487 | 488 | help_keyword_id 489 | 1 490 | 491 | 492 | name 493 | name 494 | 495 | 496 | 1 497 | int(10) unsigned|0 498 | 499 | 500 | 1 501 | int(10) unsigned|0 502 | 503 | 504 | help_keyword_id 505 | help_topic_id 506 | 1 507 | 508 | 509 | 1 510 | int(10) unsigned|0 511 | 512 | 513 | 1 514 | char(64)|0 515 | 516 | 517 | 1 518 | smallint(5) unsigned|0 519 | 520 | 521 | 1 522 | text|0 523 | 524 | 525 | 1 526 | text|0 527 | 528 | 529 | 1 530 | text|0 531 | 532 | 533 | name 534 | 535 | 1 536 | 537 | 538 | help_topic_id 539 | 1 540 | 541 | 542 | name 543 | name 544 | 545 | 546 | 1 547 | varchar(64)|0 548 | 549 | 550 | 1 551 | varchar(64)|0 552 | 553 | 554 | 1 555 | varchar(64)|0 556 | 557 | 558 | 1 559 | timestamp|0 560 | CURRENT_TIMESTAMP 561 | 562 | 563 | 1 564 | varchar(64)|0 565 | 566 | 567 | 1 568 | bigint(20) unsigned|0 569 | 570 | 571 | bigint(20) unsigned|0 572 | 573 | 574 | 1 575 | varchar(1024)|0 576 | 577 | 578 | database_name 579 | table_name 580 | index_name 581 | stat_name 582 | 1 583 | 584 | 585 | 1 586 | varchar(64)|0 587 | 588 | 589 | 1 590 | varchar(64)|0 591 | 592 | 593 | 1 594 | timestamp|0 595 | CURRENT_TIMESTAMP 596 | 597 | 598 | 1 599 | bigint(20) unsigned|0 600 | 601 | 602 | 1 603 | bigint(20) unsigned|0 604 | 605 | 606 | 1 607 | bigint(20) unsigned|0 608 | 609 | 610 | database_name 611 | table_name 612 | 1 613 | 614 | 615 | 1 616 | bigint(20) unsigned|0 617 | 618 | 619 | 1 620 | varchar(255)|0 621 | 622 | 623 | 1 624 | bigint(20) unsigned|0 625 | 626 | 627 | 1 628 | int(10) unsigned|0 629 | 630 | 631 | 1 632 | int(10) unsigned|0 633 | 634 | 635 | 1 636 | int(10) unsigned|0 637 | 638 | 639 | 1 640 | int(10) unsigned|0 641 | 642 | 643 | 1 644 | int(10) unsigned|0 645 | 646 | 647 | 1 648 | bigint(20) unsigned|0 649 | 650 | 651 | 1 652 | int(10) unsigned|0 653 | 654 | 655 | 1 656 | bigint(20) unsigned|0 657 | 658 | 659 | 1 660 | varchar(255)|0 661 | 662 | 663 | epoch 664 | orig_server_id 665 | orig_epoch 666 | 1 667 | 668 | 669 | 1 670 | varchar(64)|0 671 | '' 672 | 673 | 674 | 1 675 | varchar(128)|0 676 | '' 677 | 678 | 679 | name 680 | 1 681 | 682 | 683 | 1 684 | char(64)|0 685 | '' 686 | 687 | 688 | 1 689 | char(64)|0 690 | '' 691 | 692 | 693 | 1 694 | enum('FUNCTION', 'PROCEDURE')|0 695 | 696 | 697 | 1 698 | char(64)|0 699 | '' 700 | 701 | 702 | 1 703 | enum('SQL')|0 704 | 'SQL' 705 | 706 | 707 | 1 708 | enum('CONTAINS_SQL', 'NO_SQL', 'READS_SQL_DATA', 'MODIFIES_SQL_DATA')|0 709 | 'CONTAINS_SQL' 710 | 711 | 712 | 1 713 | enum('YES', 'NO')|0 714 | 'NO' 715 | 716 | 717 | 1 718 | enum('INVOKER', 'DEFINER')|0 719 | 'DEFINER' 720 | 721 | 722 | 1 723 | blob|0 724 | 725 | 726 | 1 727 | longblob|0 728 | 729 | 730 | 1 731 | longblob|0 732 | 733 | 734 | 1 735 | char(93)|0 736 | '' 737 | 738 | 739 | 1 740 | timestamp|0 741 | CURRENT_TIMESTAMP 742 | 743 | 744 | 1 745 | timestamp|0 746 | '0000-00-00 00:00:00' 747 | 748 | 749 | 1 750 | set('REAL_AS_FLOAT', 'PIPES_AS_CONCAT', 'ANSI_QUOTES', 'IGNORE_SPACE', 'NOT_USED', 'ONLY_FULL_GROUP_BY', 'NO_UNSIGNED_SUBTRACTION', 'NO_DIR_IN_CREATE', 'POSTGRESQL', 'ORACLE', 'MSSQL', 'DB2', 'MAXDB', 'NO_KEY_OPTIONS', 'NO_TABLE_OPTIONS', 'NO_FIELD_OPTIONS', 'MYSQL323', 'MYSQL40', 'ANSI', 'NO_AUTO_VALUE_ON_ZERO', 'NO_BACKSLASH_ESCAPES', 'STRICT_TRANS_TABLES', 'STRICT_ALL_TABLES', 'NO_ZERO_IN_DATE', 'NO_ZERO_DATE', 'INVALID_DATES', 'ERROR_FOR_DIVISION_BY_ZERO', 'TRADITIONAL', 'NO_AUTO_CREATE_USER', 'HIGH_NOT_PRECEDENCE', 'NO_ENGINE_SUBSTITUTION', 'PAD_CHAR_TO_FULL_LENGTH')|0 751 | '' 752 | 753 | 754 | 1 755 | text|0 756 | 757 | 758 | char(32)|0 759 | 760 | 761 | char(32)|0 762 | 763 | 764 | char(32)|0 765 | 766 | 767 | longblob|0 768 | 769 | 770 | db 771 | name 772 | type 773 | 1 774 | 775 | 776 | 1 777 | char(60)|0 778 | '' 779 | 780 | 781 | 1 782 | char(64)|0 783 | '' 784 | 785 | 786 | 1 787 | char(32)|0 788 | '' 789 | 790 | 791 | 1 792 | char(64)|0 793 | '' 794 | 795 | 796 | 1 797 | enum('FUNCTION', 'PROCEDURE')|0 798 | 799 | 800 | 1 801 | char(93)|0 802 | '' 803 | 804 | 805 | 1 806 | set('Execute', 'Alter Routine', 'Grant')|0 807 | '' 808 | 809 | 810 | 1 811 | timestamp|0 812 | CURRENT_TIMESTAMP 813 | 814 | 815 | Grantor 816 | 817 | 818 | 819 | Host 820 | Db 821 | User 822 | Routine_name 823 | Routine_type 824 | 1 825 | 826 | 827 | 1 828 | char(60)|0 829 | '' 830 | 831 | 832 | 1 833 | char(32)|0 834 | '' 835 | 836 | 837 | 1 838 | char(60)|0 839 | '' 840 | 841 | 842 | 1 843 | char(32)|0 844 | '' 845 | 846 | 847 | 1 848 | tinyint(1)|0 849 | '0' 850 | 851 | 852 | 1 853 | char(93)|0 854 | '' 855 | 856 | 857 | 1 858 | timestamp|0 859 | CURRENT_TIMESTAMP 860 | 861 | 862 | Grantor 863 | 864 | 865 | 866 | Host 867 | User 868 | Proxied_host 869 | Proxied_user 870 | 1 871 | 872 | 873 | 1 874 | varchar(64)|0 875 | 876 | 877 | float|0 878 | 879 | 880 | 1 881 | timestamp|0 882 | CURRENT_TIMESTAMP 883 | 884 | 885 | varchar(1024)|0 886 | 887 | 888 | cost_name 889 | 1 890 | 891 | 892 | 1 893 | char(64)|0 894 | '' 895 | 896 | 897 | 1 898 | char(64)|0 899 | '' 900 | 901 | 902 | 1 903 | char(64)|0 904 | '' 905 | 906 | 907 | 1 908 | char(64)|0 909 | '' 910 | 911 | 912 | 1 913 | char(64)|0 914 | '' 915 | 916 | 917 | 1 918 | int(4)|0 919 | '0' 920 | 921 | 922 | 1 923 | char(64)|0 924 | '' 925 | 926 | 927 | 1 928 | char(64)|0 929 | '' 930 | 931 | 932 | 1 933 | char(64)|0 934 | '' 935 | 936 | 937 | Server_name 938 | 1 939 | 940 | 941 | Number of lines in the file. 942 | 1 943 | int(10) unsigned|0 944 | 945 | 946 | The name of the master binary log currently being read from the master. 947 | 1 948 | text|0 949 | 950 | 951 | The master log position of the last read event. 952 | 1 953 | bigint(20) unsigned|0 954 | 955 | 956 | The host name of the master. 957 | char(64)|0 958 | 959 | 960 | The user name used to connect to the master. 961 | text|0 962 | 963 | 964 | The password used to connect to the master. 965 | text|0 966 | 967 | 968 | The network port used to connect to the master. 969 | 1 970 | int(10) unsigned|0 971 | 972 | 973 | The period (in seconds) that the slave will wait before trying to reconnect to the master. 974 | 1 975 | int(10) unsigned|0 976 | 977 | 978 | Indicates whether the server supports SSL connections. 979 | 1 980 | tinyint(1)|0 981 | 982 | 983 | The file used for the Certificate Authority (CA) certificate. 984 | text|0 985 | 986 | 987 | The path to the Certificate Authority (CA) certificates. 988 | text|0 989 | 990 | 991 | The name of the SSL certificate file. 992 | text|0 993 | 994 | 995 | The name of the cipher in use for the SSL connection. 996 | text|0 997 | 998 | 999 | The name of the SSL key file. 1000 | text|0 1001 | 1002 | 1003 | Whether to verify the server certificate. 1004 | 1 1005 | tinyint(1)|0 1006 | 1007 | 1008 | 1 1009 | float|0 1010 | 1011 | 1012 | Displays which interface is employed when connecting to the MySQL server 1013 | text|0 1014 | 1015 | 1016 | The number of server IDs to be ignored, followed by the actual server IDs 1017 | text|0 1018 | 1019 | 1020 | The master server uuid. 1021 | text|0 1022 | 1023 | 1024 | Number of reconnect attempts, to the master, before giving up. 1025 | 1 1026 | bigint(20) unsigned|0 1027 | 1028 | 1029 | The file used for the Certificate Revocation List (CRL) 1030 | text|0 1031 | 1032 | 1033 | The path used for Certificate Revocation List (CRL) files 1034 | text|0 1035 | 1036 | 1037 | Indicates whether GTIDs will be used to retrieve events from the master. 1038 | 1 1039 | tinyint(1)|0 1040 | 1041 | 1042 | The channel on which the slave is connected to a source. Used in Multisource Replication 1043 | 1 1044 | char(64)|0 1045 | 1046 | 1047 | Tls version 1048 | text|0 1049 | 1050 | 1051 | Channel_name 1052 | 1 1053 | 1054 | 1055 | Number of lines in the file or rows in the table. Used to version table definitions. 1056 | 1 1057 | int(10) unsigned|0 1058 | 1059 | 1060 | The name of the current relay log file. 1061 | 1 1062 | text|0 1063 | 1064 | 1065 | The relay log position of the last executed event. 1066 | 1 1067 | bigint(20) unsigned|0 1068 | 1069 | 1070 | The name of the master binary log file from which the events in the relay log file were read. 1071 | 1 1072 | text|0 1073 | 1074 | 1075 | The master log position of the last executed event. 1076 | 1 1077 | bigint(20) unsigned|0 1078 | 1079 | 1080 | The number of seconds that the slave must lag behind the master. 1081 | 1 1082 | int(11)|0 1083 | 1084 | 1085 | 1 1086 | int(10) unsigned|0 1087 | 1088 | 1089 | Internal Id that uniquely identifies this record. 1090 | 1 1091 | int(10) unsigned|0 1092 | 1093 | 1094 | The channel on which the slave is connected to a source. Used in Multisource Replication 1095 | 1 1096 | char(64)|0 1097 | 1098 | 1099 | Channel_name 1100 | 1 1101 | 1102 | 1103 | 1 1104 | int(10) unsigned|0 1105 | 1106 | 1107 | 1 1108 | text|0 1109 | 1110 | 1111 | 1 1112 | bigint(20) unsigned|0 1113 | 1114 | 1115 | 1 1116 | text|0 1117 | 1118 | 1119 | 1 1120 | bigint(20) unsigned|0 1121 | 1122 | 1123 | 1 1124 | text|0 1125 | 1126 | 1127 | 1 1128 | bigint(20) unsigned|0 1129 | 1130 | 1131 | 1 1132 | text|0 1133 | 1134 | 1135 | 1 1136 | bigint(20) unsigned|0 1137 | 1138 | 1139 | 1 1140 | int(10) unsigned|0 1141 | 1142 | 1143 | 1 1144 | int(10) unsigned|0 1145 | 1146 | 1147 | 1 1148 | blob|0 1149 | 1150 | 1151 | The channel on which the slave is connected to a source. Used in Multisource Replication 1152 | 1 1153 | char(64)|0 1154 | 1155 | 1156 | Channel_name 1157 | Id 1158 | 1 1159 | 1160 | 1161 | 1 1162 | timestamp(6)|0 1163 | CURRENT_TIMESTAMP(6) 1164 | 1165 | 1166 | 1 1167 | mediumtext|0 1168 | 1169 | 1170 | 1 1171 | time(6)|0 1172 | 1173 | 1174 | 1 1175 | time(6)|0 1176 | 1177 | 1178 | 1 1179 | int(11)|0 1180 | 1181 | 1182 | 1 1183 | int(11)|0 1184 | 1185 | 1186 | 1 1187 | varchar(512)|0 1188 | 1189 | 1190 | 1 1191 | int(11)|0 1192 | 1193 | 1194 | 1 1195 | int(11)|0 1196 | 1197 | 1198 | 1 1199 | int(10) unsigned|0 1200 | 1201 | 1202 | 1 1203 | mediumblob|0 1204 | 1205 | 1206 | 1 1207 | bigint(21) unsigned|0 1208 | 1209 | 1210 | 1 1211 | char(60)|0 1212 | '' 1213 | 1214 | 1215 | 1 1216 | char(64)|0 1217 | '' 1218 | 1219 | 1220 | 1 1221 | char(32)|0 1222 | '' 1223 | 1224 | 1225 | 1 1226 | char(64)|0 1227 | '' 1228 | 1229 | 1230 | 1 1231 | char(93)|0 1232 | '' 1233 | 1234 | 1235 | 1 1236 | timestamp|0 1237 | CURRENT_TIMESTAMP 1238 | 1239 | 1240 | 1 1241 | set('Select', 'Insert', 'Update', 'Delete', 'Create', 'Drop', 'Grant', 'References', 'Index', 'Alter', 'Create View', 'Show view', 'Trigger')|0 1242 | '' 1243 | 1244 | 1245 | 1 1246 | set('Select', 'Insert', 'Update', 'References')|0 1247 | '' 1248 | 1249 | 1250 | Grantor 1251 | 1252 | 1253 | 1254 | Host 1255 | Db 1256 | User 1257 | Table_name 1258 | 1 1259 | 1260 | 1261 | 1 1262 | int(10) unsigned|0 1263 | 1 1264 | 1265 | 1266 | 1 1267 | enum('Y', 'N')|0 1268 | 'N' 1269 | 1270 | 1271 | Time_zone_id 1272 | 1 1273 | 1274 | 1275 | 1 1276 | bigint(20)|0 1277 | 1278 | 1279 | 1 1280 | int(11)|0 1281 | 1282 | 1283 | Transition_time 1284 | 1 1285 | 1286 | 1287 | 1 1288 | char(64)|0 1289 | 1290 | 1291 | 1 1292 | int(10) unsigned|0 1293 | 1294 | 1295 | Name 1296 | 1 1297 | 1298 | 1299 | 1 1300 | int(10) unsigned|0 1301 | 1302 | 1303 | 1 1304 | bigint(20)|0 1305 | 1306 | 1307 | 1 1308 | int(10) unsigned|0 1309 | 1310 | 1311 | Time_zone_id 1312 | Transition_time 1313 | 1 1314 | 1315 | 1316 | 1 1317 | int(10) unsigned|0 1318 | 1319 | 1320 | 1 1321 | int(10) unsigned|0 1322 | 1323 | 1324 | 1 1325 | int(11)|0 1326 | '0' 1327 | 1328 | 1329 | 1 1330 | tinyint(3) unsigned|0 1331 | '0' 1332 | 1333 | 1334 | 1 1335 | char(8)|0 1336 | '' 1337 | 1338 | 1339 | Time_zone_id 1340 | Transition_type_id 1341 | 1 1342 | 1343 | 1344 | 1 1345 | char(60)|0 1346 | '' 1347 | 1348 | 1349 | 1 1350 | char(32)|0 1351 | '' 1352 | 1353 | 1354 | 1 1355 | enum('N', 'Y')|0 1356 | 'N' 1357 | 1358 | 1359 | 1 1360 | enum('N', 'Y')|0 1361 | 'N' 1362 | 1363 | 1364 | 1 1365 | enum('N', 'Y')|0 1366 | 'N' 1367 | 1368 | 1369 | 1 1370 | enum('N', 'Y')|0 1371 | 'N' 1372 | 1373 | 1374 | 1 1375 | enum('N', 'Y')|0 1376 | 'N' 1377 | 1378 | 1379 | 1 1380 | enum('N', 'Y')|0 1381 | 'N' 1382 | 1383 | 1384 | 1 1385 | enum('N', 'Y')|0 1386 | 'N' 1387 | 1388 | 1389 | 1 1390 | enum('N', 'Y')|0 1391 | 'N' 1392 | 1393 | 1394 | 1 1395 | enum('N', 'Y')|0 1396 | 'N' 1397 | 1398 | 1399 | 1 1400 | enum('N', 'Y')|0 1401 | 'N' 1402 | 1403 | 1404 | 1 1405 | enum('N', 'Y')|0 1406 | 'N' 1407 | 1408 | 1409 | 1 1410 | enum('N', 'Y')|0 1411 | 'N' 1412 | 1413 | 1414 | 1 1415 | enum('N', 'Y')|0 1416 | 'N' 1417 | 1418 | 1419 | 1 1420 | enum('N', 'Y')|0 1421 | 'N' 1422 | 1423 | 1424 | 1 1425 | enum('N', 'Y')|0 1426 | 'N' 1427 | 1428 | 1429 | 1 1430 | enum('N', 'Y')|0 1431 | 'N' 1432 | 1433 | 1434 | 1 1435 | enum('N', 'Y')|0 1436 | 'N' 1437 | 1438 | 1439 | 1 1440 | enum('N', 'Y')|0 1441 | 'N' 1442 | 1443 | 1444 | 1 1445 | enum('N', 'Y')|0 1446 | 'N' 1447 | 1448 | 1449 | 1 1450 | enum('N', 'Y')|0 1451 | 'N' 1452 | 1453 | 1454 | 1 1455 | enum('N', 'Y')|0 1456 | 'N' 1457 | 1458 | 1459 | 1 1460 | enum('N', 'Y')|0 1461 | 'N' 1462 | 1463 | 1464 | 1 1465 | enum('N', 'Y')|0 1466 | 'N' 1467 | 1468 | 1469 | 1 1470 | enum('N', 'Y')|0 1471 | 'N' 1472 | 1473 | 1474 | 1 1475 | enum('N', 'Y')|0 1476 | 'N' 1477 | 1478 | 1479 | 1 1480 | enum('N', 'Y')|0 1481 | 'N' 1482 | 1483 | 1484 | 1 1485 | enum('N', 'Y')|0 1486 | 'N' 1487 | 1488 | 1489 | 1 1490 | enum('N', 'Y')|0 1491 | 'N' 1492 | 1493 | 1494 | 1 1495 | enum('N', 'Y')|0 1496 | 'N' 1497 | 1498 | 1499 | 1 1500 | enum('', 'ANY', 'X509', 'SPECIFIED')|0 1501 | '' 1502 | 1503 | 1504 | 1 1505 | blob|0 1506 | 1507 | 1508 | 1 1509 | blob|0 1510 | 1511 | 1512 | 1 1513 | blob|0 1514 | 1515 | 1516 | 1 1517 | int(11) unsigned|0 1518 | '0' 1519 | 1520 | 1521 | 1 1522 | int(11) unsigned|0 1523 | '0' 1524 | 1525 | 1526 | 1 1527 | int(11) unsigned|0 1528 | '0' 1529 | 1530 | 1531 | 1 1532 | int(11) unsigned|0 1533 | '0' 1534 | 1535 | 1536 | 1 1537 | char(64)|0 1538 | 'mysql_native_password' 1539 | 1540 | 1541 | text|0 1542 | 1543 | 1544 | 1 1545 | enum('N', 'Y')|0 1546 | 'N' 1547 | 1548 | 1549 | timestamp|0 1550 | 1551 | 1552 | smallint(5) unsigned|0 1553 | 1554 | 1555 | 1 1556 | enum('N', 'Y')|0 1557 | 'N' 1558 | 1559 | 1560 | Host 1561 | User 1562 | 1 1563 | 1564 |
1565 |
--------------------------------------------------------------------------------