├── src
├── cn
│ └── aidou
│ │ ├── Algorithm
│ │ ├── AstarAlgorithm
│ │ │ ├── AStar.java
│ │ │ ├── ASta算法
│ │ │ ├── Node.java
│ │ │ ├── NodeFComparator.java
│ │ │ └── Test.java
│ │ ├── BubbleSort
│ │ │ ├── BubbleSort.java
│ │ │ └── 冒泡排序
│ │ ├── MergeSort
│ │ │ ├── MergeSort.java
│ │ │ └── 归并排序
│ │ └── 算法
│ │ ├── Entry
│ │ ├── EntryClass.java
│ │ └── Manager.java
│ │ ├── TaskDistributor
│ │ ├── SpiderContainer.java
│ │ └── spider
│ │ │ ├── ISpiderMan.java
│ │ │ └── Spiders
│ │ │ ├── CreateSpider.java
│ │ │ └── CreateSpider1.java
│ │ ├── aop
│ │ ├── AbstractHandler.java
│ │ ├── AfterHandler.java
│ │ ├── AfterHandlerImpl.java
│ │ ├── BeforeHandler.java
│ │ ├── BeforeHandlerImpl.java
│ │ ├── Calculator.java
│ │ ├── CalculatorImpl.java
│ │ ├── ProxyFactory.java
│ │ └── TestAopInJDK.java
│ │ ├── bean
│ │ ├── SpiderBean.java
│ │ ├── TaskBean.java
│ │ └── UrlQueue.java
│ │ ├── dao
│ │ ├── BaseDao.java
│ │ ├── ConnectionPool.java
│ │ ├── PooledConnection.java
│ │ └── URLLinkDao.java
│ │ ├── listener
│ │ ├── ExampleListener.java
│ │ ├── RunTimeEvent.java
│ │ └── RunTimeListener.java
│ │ ├── robot
│ │ ├── DownResource.java
│ │ ├── GetPageEncoding.java
│ │ ├── IDownResource.java
│ │ └── ReadLabelHandler.java
│ │ ├── thread
│ │ ├── PooledThread.java
│ │ ├── SuchThread.java
│ │ └── ThreadPool.java
│ │ └── util
│ │ ├── Array.java
│ │ ├── Encrypt.java
│ │ ├── ObjFactory.java
│ │ ├── PackageParamObj.java
│ │ ├── ParamObject.java
│ │ └── PrivateUtil.java
└── log4j.properties
├── 分布式网络爬虫架构实现.docx
├── 分布式网络爬虫框架架构资料
├── 1.png
├── 2.png
├── 3.jpg
└── 4.jpg
└── 自定义分布式爬虫架构原理图.png
/src/cn/aidou/Algorithm/AstarAlgorithm/AStar.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Algorithm.AstarAlgorithm;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collections;
5 | import java.util.List;
6 |
7 | public class AStar {
8 | private int[][] map;//地图(1可通过 0不可通过)
9 | private List< Node> openList;//开启列表
10 | private List< Node> closeList;//关闭列表
11 | private final int COST_STRAIGHT = 10;//垂直方向或水平方向移动的路径评分
12 | private final int COST_DIAGONAL = 14;//斜方向移动的路径评分
13 | private int row;//行
14 | private int column;//列
15 |
16 | public AStar(int[][] map,int row,int column){
17 | this.map=map;
18 | this.row=row;
19 | this.column=column;
20 | openList=new ArrayList< Node>();
21 | closeList=new ArrayList< Node>();
22 | }
23 | //查找坐标(-1:错误,0:没找到,1:找到了)
24 | public int search(int x1,int y1,int x2,int y2){
25 | //验证坐标是否合法
26 | if(x1< 0||x1>=row||x2< 0||x2>=row||y1< 0||y1>=column||y2< 0||y2>=column){
27 | return -1;
28 | }
29 | //验证坐标是否是“墙”
30 | if(map[x1][y1]==0||map[x2][y2]==0){
31 | return -1;
32 | }
33 | Node sNode=new Node(x1,y1,null);//初始化开始节点
34 | Node eNode=new Node(x2,y2,null);//初始化结束节点
35 | openList.add(sNode);//将开始节点增加到开启列表当中
36 | List< Node> resultList=search(sNode, eNode);
37 | if(resultList.size()==0){
38 | return 0;
39 | }
40 | for(Node node:resultList){
41 | map[node.getX()][node.getY()]=2;
42 | }
43 | return 1;
44 | }
45 | //查找核心算法
46 | private List< Node> search(Node sNode,Node eNode){
47 | List< Node> resultList=new ArrayList< Node>();
48 | boolean isFind=false;
49 | Node node=null;
50 | while(openList.size()>0){
51 | // System.out.println(openList);
52 | //取出开启列表中最低F值,即第一个存储的值的F为最低的
53 | node=openList.get(0);
54 | //判断是否找到目标点
55 | if(node.getX()==eNode.getX()&&node.getY()==eNode.getY()){//本节点找到目标节点的要素就是节点的x\y值相等
56 | isFind=true;
57 | break;
58 | }
59 | //上
60 | if((node.getY()-1)>=0){
61 | checkPath(node.getX(),node.getY()-1,node, eNode, COST_STRAIGHT);
62 | }
63 | //下
64 | if((node.getY()+1)< column){
65 | checkPath(node.getX(),node.getY()+1,node, eNode, COST_STRAIGHT);
66 | }
67 | //左
68 | if((node.getX()-1)>=0){
69 | checkPath(node.getX()-1,node.getY(),node, eNode, COST_STRAIGHT);
70 | }
71 | //右
72 | if((node.getX()+1)< row){
73 | checkPath(node.getX()+1,node.getY(),node, eNode, COST_STRAIGHT);
74 | }
75 | //左上
76 | if((node.getX()-1)>=0&&(node.getY()-1)>=0){
77 | checkPath(node.getX()-1,node.getY()-1,node, eNode, COST_DIAGONAL);
78 | }
79 | //左下
80 | if((node.getX()-1)>=0&&(node.getY()+1)< column){
81 | checkPath(node.getX()-1,node.getY()+1,node, eNode, COST_DIAGONAL);
82 | }
83 | //右上
84 | if((node.getX()+1)< row&&(node.getY()-1)>=0){
85 | checkPath(node.getX()+1,node.getY()-1,node, eNode, COST_DIAGONAL);
86 | }
87 | //右下
88 | if((node.getX()+1)< row&&(node.getY()+1)< column){
89 | checkPath(node.getX()+1,node.getY()+1,node, eNode, COST_DIAGONAL);
90 | }
91 | //从开启列表中删除
92 | //添加到关闭列表中
93 | closeList.add(openList.remove(0));
94 | //开启列表中排序,把F值最低的放到最底端
95 | Collections.sort(openList, new NodeFComparator());
96 | //System.out.println(openList);
97 | }
98 | if(isFind){
99 | getPath(resultList, node);
100 | }
101 | return resultList;
102 | }
103 |
104 | //查询此路是否能走通
105 | private boolean checkPath(int x,int y,Node parentNode,Node eNode,int cost){
106 | Node node=new Node(x, y, parentNode);
107 | //查找地图中是否能通过
108 | if(map[x][y]==0){
109 | closeList.add(node);
110 | return false;
111 | }
112 | //查找关闭列表中是否存在
113 | if(isListContains(closeList, x, y)!=-1){
114 | return false;
115 | }
116 | //查找开启列表中是否存在
117 | int index=-1;
118 | if((index=isListContains(openList, x, y))!=-1){
119 | //G值是否更小,即是否更新G,F值
120 | /**
121 | * 开启列表中的g值比父节点的g值大
122 | */
123 | if((parentNode.getG()+cost)< openList.get(index).getG()){
124 | node.setParentNode(parentNode);
125 | countG(node, eNode, cost);
126 | countF(node);
127 |
128 | openList.set(index, node);
129 | /**
130 | * add方法是在某个指定的位置加上某个对象,并将原来的位置的那个对象向后挤了一格.
131 | * set方法是将原来位置上的那个给取代了,并将原来位置上对象的返回.
132 | */
133 | }
134 | }else{
135 | /**
136 | * 开启列表中的g值比父节点的g值小,则找到最小g值的节点设置为父节点
137 | */
138 | //添加到开启列表中
139 | node.setParentNode(parentNode);
140 | count(node, eNode, cost);
141 | openList.add(node);
142 | }
143 | return true;
144 | }
145 |
146 | //集合中是否包含某个元素(-1:没有找到,否则返回所在的索引)
147 | private int isListContains(List< Node> list,int x,int y){
148 | for(int i=0;i< list.size();i++){
149 | Node node=list.get(i);
150 | if(node.getX()==x&&node.getY()==y){
151 | return i;
152 | }
153 | }
154 | return -1;
155 | }
156 |
157 | //从终点往返回到起点
158 | private void getPath(List< Node> resultList,Node node){
159 | if(node.getParentNode()!=null){
160 | getPath(resultList, node.getParentNode());
161 | }
162 | resultList.add(node);
163 | }
164 |
165 | //计算G,H,F值
166 | private void count(Node node,Node eNode,int cost){
167 | countG(node, eNode, cost);
168 | countH(node, eNode);
169 | countF(node);
170 | }
171 | //计算G值
172 | private void countG(Node node,Node eNode,int cost){
173 | if(node.getParentNode()==null){
174 | node.setG(cost);
175 | }else{
176 | node.setG(node.getParentNode().getG()+cost);
177 | }
178 | }
179 | //计算H值
180 | private void countH(Node node,Node eNode){
181 | node.setH((Math.abs(node.getX()-eNode.getX())+Math.abs(node.getY()-eNode.getY()))*10);
182 | }
183 | //计算F值
184 | private void countF(Node node){
185 | node.setF(node.getG()+node.getH());
186 | }
187 |
188 | }
189 |
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/AstarAlgorithm/ASta算法:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenkai1100/SpiderFrame/15485cf172aa0b91a2e78a505aa66c305a0d9242/src/cn/aidou/Algorithm/AstarAlgorithm/ASta算法
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/AstarAlgorithm/Node.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Algorithm.AstarAlgorithm;
2 | class Node {
3 | private int x;//X坐标
4 | private int y;//Y坐标
5 | private Node parentNode;//父类节点
6 | private int g;//当前点到起点的移动耗费
7 | private int h;//当前点到终点的移动耗费,即曼哈顿距离|x1-x2|+|y1-y2|(忽略障碍物)
8 | private int f;//f=g+h
9 |
10 | public Node(int x,int y,Node parentNode){
11 | this.x=x;
12 | this.y=y;
13 | this.parentNode=parentNode;
14 | }
15 |
16 | public int getX() {
17 | return x;
18 | }
19 | public void setX(int x) {
20 | this.x = x;
21 | }
22 | public int getY() {
23 | return y;
24 | }
25 | public void setY(int y) {
26 | this.y = y;
27 | }
28 | public Node getParentNode() {
29 | return parentNode;
30 | }
31 | public void setParentNode(Node parentNode) {
32 | this.parentNode = parentNode;
33 | }
34 | public int getG() {
35 | return g;
36 | }
37 | public void setG(int g) {
38 | this.g = g;
39 | }
40 | public int getH() {
41 | return h;
42 | }
43 | public void setH(int h) {
44 | this.h = h;
45 | }
46 | public int getF() {
47 | return f;
48 | }
49 | public void setF(int f) {
50 | this.f = f;
51 | }
52 | public String toString(){
53 | return "("+x+","+y+","+f+")";
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/AstarAlgorithm/NodeFComparator.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Algorithm.AstarAlgorithm;
2 |
3 | import java.util.Comparator;
4 |
5 | //节点比较类
6 | class NodeFComparator implements Comparator< Node>{
7 | @Override
8 | public int compare(Node o1, Node o2) {
9 | return o1.getF()-o2.getF();
10 | }
11 |
12 | }
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/AstarAlgorithm/Test.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Algorithm.AstarAlgorithm;
2 | import java.util.*;
3 | public class Test {
4 |
5 | public static void main(String[] args){
6 | int[][] map=new int[][]{ //地图数组
7 | {1,1,1,1,1,1,1,1,1,1},//map[0]
8 | {1,1,1,1,0,1,1,1,1,1},//map[1]
9 | {1,1,1,1,0,1,1,1,1,1},//map[2]
10 | {1,1,1,1,0,1,1,1,1,1},//map[3]
11 | {1,1,1,1,0,1,1,1,1,1},//map[4]
12 | {1,1,1,1,0,1,1,1,1,1},//map[5]
13 | {1,1,1,1,0,1,1,1,1,1},//map[6]
14 | {1,1,1,1,0,1,1,1,1,1},//map[7]
15 | {1,1,1,1,1,1,1,1,1,1} //map[8]
16 | };
17 | AStar aStar=new AStar(map, 6, 10);
18 | int flag=aStar.search(5, 3, 3, 8);//int x1,int y1,int x2,int y2
19 | if(flag==-1){
20 | System.out.println("传输数据有误!");
21 | }else if(flag==0){
22 | System.out.println("没找到!");
23 | }else{
24 | for(int x=0;x< 6;x++){
25 | for(int y=0;y< 10;y++){
26 | if(map[x][y]==1){
27 | System.out.print(" ");
28 | }else if(map[x][y]==0){
29 | System.out.print("〓");
30 | }else if(map[x][y]==2){//输出搜索路径
31 | System.out.print("※");
32 | }
33 | }
34 | System.out.println();
35 | }
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/BubbleSort/BubbleSort.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Algorithm.BubbleSort;
2 |
3 | import java.util.Scanner;
4 |
5 | /**
6 | * 冒泡排序 2016/1/10 15:15
7 | *
8 | * @author aidou
9 | */
10 | public class BubbleSort {
11 | /*
12 | * 冒泡排序
13 | */
14 | public static void main(String[] args) {
15 | Integer[] list = { 49, 38, 65, 97, 76, 13, 27, 14, 10 };
16 | // 冒泡排序
17 | bubble(list);
18 | for (int i = 0; i < list.length; i++) {
19 | System.out.print(list[i] + " ");
20 | }
21 | System.out.println();
22 | }
23 |
24 | /**
25 | * 将最大的下沉到最后一位
26 | *
27 | * @param data
28 | * 49, 38, 65, 97, 76, 13, 27, 14, 10
29 | */
30 | public static void bubble(Integer[] data) {
31 | for (int i = 0; i < data.length; i++) {
32 | for (int j = 0; j < data.length - 1 - i; j++) {
33 | if (data[j] > data[j + 1]) { // 如果后一个数小于前一个数交换
34 | int tmp = data[j];
35 | data[j] = data[j + 1];
36 | data[j + 1] = tmp;
37 | }
38 | }
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/BubbleSort/冒泡排序:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenkai1100/SpiderFrame/15485cf172aa0b91a2e78a505aa66c305a0d9242/src/cn/aidou/Algorithm/BubbleSort/冒泡排序
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/MergeSort/MergeSort.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Algorithm.MergeSort;
2 |
3 | import java.util.Arrays;
4 |
5 | /**
6 | * 归并排序算法
7 | * @author aidou
8 | * 开始时间:2015\12\4 10:05
9 | * 结束时间:
10 | */
11 | /**
12 | * 归并排序 简介:将两个(或两个以上)有序表合并成一个新的有序表 即把待排序序列分为若干个子序列,每个子序列是有序的。然后再把有序子序列合并为整体有序序列
13 | * 时间复杂度为O(nlogn) 稳定排序方式
14 | *
15 | * @param nums
16 | * 待排序数组
17 | * @return 输出有序数组
18 | */
19 | public class MergeSort {
20 |
21 | public static int[] sort(int[] nums, int low, int high) {
22 | int mid = (low + high) / 2;
23 | if (low < high) {
24 | // 左边
25 | sort(nums, low, mid);
26 | // 右边
27 | sort(nums, mid + 1, high);
28 | // 左右归并
29 | merge(nums, low, mid, high);
30 | }
31 | return nums;
32 | }
33 |
34 | public static void merge(int[] nums, int low, int mid, int high) {
35 | //创建一个临时数组用来存储合并后的数据
36 | int[] temp = new int[high - low + 1];
37 | int i = low;// 左指针
38 | int j = mid + 1;// 右指针
39 | int k = 0;
40 |
41 | // 把较小的数先移到新数组中
42 | while (i <= mid && j <= high) {
43 | if (nums[i] < nums[j]) {
44 | temp[k++] = nums[i++];
45 | } else {
46 | temp[k++] = nums[j++];
47 | }
48 | }
49 |
50 | // 把左边剩余的数移入数组
51 | while (i <= mid) {
52 | temp[k++] = nums[i++];
53 | }
54 |
55 | // 把右边边剩余的数移入数组
56 | while (j <= high) {
57 | temp[k++] = nums[j++];
58 | }
59 |
60 | // 把新数组中的数覆盖nums数组
61 | for (int k2 = 0; k2 < temp.length; k2++) {
62 | nums[k2 + low] = temp[k2];
63 | }
64 | }
65 |
66 | // 归并排序的实现
67 | public static void main(String[] args) {
68 |
69 | int[] nums = { 2, 3, 8, 1, 7, 6, 9, 0, 5, 4 };
70 |
71 | MergeSort.sort(nums, 0, nums.length - 1);
72 | System.out.println(Arrays.toString(nums));
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/MergeSort/归并排序:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenkai1100/SpiderFrame/15485cf172aa0b91a2e78a505aa66c305a0d9242/src/cn/aidou/Algorithm/MergeSort/归并排序
--------------------------------------------------------------------------------
/src/cn/aidou/Algorithm/算法:
--------------------------------------------------------------------------------
1 | /*****************************************/
2 | //start time 201512011349
3 | A* 算法
4 | //end time
5 | /*****************************************/
--------------------------------------------------------------------------------
/src/cn/aidou/Entry/EntryClass.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Entry;
2 |
3 |
4 | import org.apache.log4j.Logger;
5 |
6 | /**
7 | * 程序有哪些不足 多线程怎样对任务管理的比较好?效率会高一些? 一个线程负责一个对象,一个对象拥有1把锁,当锁中的对象出现故障的时候有监听器自行断电,
8 | * 终止线程,进而抛出异常给开发人员,当线程与任务链接状况很好的情况下(需要有很好的监听
9 | * 排错机制),每个需要访问任务的线程拥有对象任务的一把锁,实现了线程对任务的互斥管理, 方便了责任的划分。
10 | * @author aidou
11 | */
12 | public class EntryClass
13 | {
14 | /**
15 | * [数据流处理]
16 | * 源:以流的形式到达处理器
17 | * execute 符合特定业务流程的中间处理过程
18 | * 目标:将任务结果输出到指定地方
19 | * [监控运行状态层面]
20 | * 在运行过程中加入log4j日志处理,把每一关健步骤都监控起来---log4j
21 | * 由运行状态各项参数将过程及结果动态展现出来---类似ganglia
22 | * [引入的第三方服务]
23 | * 1.flume系统日志收集系统
24 | * 2.kafka消息队列
25 | * 3.zookeeper分布式应用程序协调服务
26 | * 4.流处理引擎 spark streaming or JStorm
27 | * 5.hdfs存储数据/mapreduce计算数据
28 | * @param args
29 | */
30 | //args 输入参数按照:driver url username password 顺序输入
31 | private static Logger logger = Logger.getLogger(EntryClass.class);
32 | public static void main(String[] args) throws Exception
33 | {
34 |
35 | /********************************************************/
36 | /**
37 | * 开源框架集成情况:
38 | * a.将开源任务调度框架集成进来
39 | * b.kafka消息队列的源码阅读与集成
40 | * c.流处理技术应用到项目当中
41 | * d.hdfs分布式文件系统存储数据以及mapreduce过程
42 | * f.flume项目的集成情况
43 | * g.zookeeper
44 | *
45 | * #############阅读源码与学习源码以及应用情况
46 | * #############积累算法和整个架构的思想
47 | */
48 | /********************************************************/
49 | /**
50 | * 1.输入流的产生
51 | */
52 | /**
53 | * 2.输入流格式化切分
54 | */
55 | /**
56 | * 3.任务调度容器处理爬虫作业--------------------------开源框架
57 | */
58 | /***********************初始化基础运行环境开始***********************/
59 | /**
60 | * #.1管理器初始化数据库运行环境
61 | */
62 | Manager sm = new Manager(args);
63 | /**
64 | * #.2初始化线程池环境
65 | */
66 | sm.initWorkSpace();
67 | /**
68 | * #.3初始化爬虫群的各项参数
69 | */
70 | sm.initMember(10, 4);
71 | /***********************初始化基础运行环境结束***********************/
72 | /**
73 | * 4.执行爬虫任务!
74 | */
75 | sm.executeWork();
76 | /**
77 | * #############JVM+爬虫的心跳机制+分布式缓存机制+任务的负载均衡策略
78 | */
79 | /**
80 | * #############scala+python+并发编程学习+大数据技术源码学习
81 | */
82 |
83 | /**********************借鉴************************/
84 | //时间触发基于内存的处理引擎,符合条件的开仓处理。和database不同
85 | //实时风险控制 跟踪
86 | //开仓模式识别
87 | /************************************************/
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/cn/aidou/Entry/Manager.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.Entry;
2 |
3 | import java.util.*;
4 |
5 | import org.apache.log4j.Logger;
6 |
7 | import cn.aidou.TaskDistributor.SpiderContainer;
8 | import cn.aidou.TaskDistributor.spider.Spiders.CreateSpider;
9 | import cn.aidou.bean.SpiderBean;
10 | import cn.aidou.dao.BaseDao;
11 | import cn.aidou.thread.SuchThread;
12 | import cn.aidou.thread.ThreadPool;
13 | import cn.aidou.util.Encrypt;
14 |
15 |
16 | /**
17 | * 做爬去任务时 线程和爬虫均由任务管理器来管理维护。
18 | * 一个线程负责一个爬虫,线程从线程池中获得,并由线程池管理线程的生命周期
19 | * member代指爬虫
20 | * workspace代指线程
21 | *
22 | * @author aidou
23 | */
24 |
25 | /**
26 | * 现状:线程是否能够有效地管理:线程置于线程队列当中, 一个任务过来会激活一个爬虫和一个线程
27 | *
28 | * @author aidou 问题:如何处理线程交叉处理任务的过程? 线程用多少初始化多少? 爬虫依据执行不同的执行策略和任务种类去选择合适的执行环境?
29 | * 线程依据用户输入后再去第二个任务的执行,? 动态创建线程的问题 ,是否需要动态代理的实现?
30 | *
31 | *
32 | * URL任务队列、已爬取的URL表. 爬虫中多线程的管理实际是需要维护一个线程池;URL去重也是使用MD5结合布隆过滤器进行实现的.
33 | * 抓取线程主动去任务队列找活干,如果没活就等待,有活了就通知那些等待的抓取线程。
34 | *
35 | *
36 | * 采用生产者与消费者模式,生产者负责生产爬虫,爬虫存在于爬虫队列中,消费者用于管理执行线程,生产一个那么就会消费一个,
37 | * 在应用程序初始化的时候会初始化爬虫参数和线程参数,至于爬虫与线程之间的执行任务部分则会在线程生命周期中完成执行,并
38 | * 将需要的结果持久化到数据库。与此同时,当需要二次 数据加工的时候,可以从数据库中将数据取出后有爬虫管理者定义爬虫管理类的对象
39 | * 再次执行相同的任务。
40 | */
41 | public class Manager {
42 | private static Logger logger = Logger.getLogger(Manager.class);
43 | private int spiderCount;// 爬虫数量=任务数量 //一种类型的任务是由1个爬虫来完成的
44 | private CreateSpider suchSpider;
45 | private ThreadPool threadPool = null;
46 | private List threadQueue;
47 | private SpiderContainer sc = new SpiderContainer();
48 |
49 | /**
50 | * 1.初始化运行数据库参数
51 | */
52 | public Manager(String[] args) {
53 | new BaseDao(getDefaultParam_mysql());
54 | // if (checkNotNull(args))
55 | // {
56 | // new BaseDao(initDB(args));
57 | // } else
58 | // {
59 | // System.exit(-1);
60 | // }
61 | }
62 |
63 | private Map getDefaultParam_mysql() {
64 | String driver = "com.mysql.jdbc.Driver";
65 | String url = "jdbc:mysql://localhost:3306/Spider";
66 | String username = "root";
67 | String password = "1992";
68 | Map mysqlnfo = new HashMap();
69 | mysqlnfo.put("driver", driver);
70 | mysqlnfo.put("dbUrl", url);
71 | mysqlnfo.put("dbUsername", username);
72 | mysqlnfo.put("dbPassword", password);
73 | return mysqlnfo;
74 | }
75 |
76 | /**
77 | * //String driver = "com.mysql.jdbc.Driver"; //String url =
78 | * "jdbc:mysql://localhost:3306/Spider"; //String username = "root";
79 | * //String password = "1992";
80 | *
81 | * @param args
82 | * @return
83 | */
84 | public static Map initDB(String[] args) {
85 | Map DBinfo = new HashMap();
86 | DBinfo.put("driver", args[0]);
87 | DBinfo.put("url", args[1]);
88 | DBinfo.put("username", args[2]);
89 | DBinfo.put("password", args[3]);
90 | return DBinfo;
91 | }
92 |
93 | private static boolean checkNotNull(String[] args) {
94 | if (args[0] == null || args[1] == null || args[2] == null || args[3] == null) {
95 | try {
96 | throw new NullPointerException("初始运行参数不能为空!");
97 | } catch (Exception e) {
98 | e.printStackTrace();
99 | } finally {
100 | return false;
101 | }
102 | }
103 | return true;
104 | }
105 |
106 | /**
107 | * 3.创建指定深度和数量的爬虫,初始化爬虫队列
108 | *
109 | * @param spiderCount
110 | * 爬虫数量
111 | * @param Depth
112 | * 爬虫爬取深度
113 | */
114 | public void initMember(int spiderCount, int Depth) {
115 | this.spiderCount = spiderCount;
116 | for (int i = 0; i < spiderCount; i++) {
117 | SpiderBean sb = new SpiderBean(getSpiderName(), Depth);
118 | logger.info("第" + i + "个爬虫ID为" + sb.getspiderID());
119 | suchSpider = new CreateSpider(sb);
120 | sc.addSpider(suchSpider);
121 | }
122 | }
123 |
124 | public String getSpiderName() {
125 | return Encrypt.md5AndSha(String.valueOf(Math.random() * 50) + 50000 + System.nanoTime());
126 | }
127 |
128 | /**
129 | * 2.创建线程池
130 | *
131 | * @注:无参代表默认标准
132 | * @【线程池定义参考标准】 默认为 initialThreads 10,incrementalThreads 5,maxThreads 50
133 | */
134 | public void initWorkSpace() {
135 | threadPool = new ThreadPool();
136 | try {
137 | threadPool.createPool();
138 | } catch (Exception e) {
139 | e.printStackTrace();
140 | }
141 | }
142 |
143 | public void initWorkSpace(int initialThreads, int incrementalThreads, int maxThreads) {
144 | threadPool = new ThreadPool(initialThreads, incrementalThreads, maxThreads);
145 | try {
146 | threadPool.createPool();
147 | } catch (Exception e) {
148 | e.printStackTrace();
149 | }
150 | }
151 |
152 | public void checkRunningEnv() {
153 | /**
154 | * 0.对任务的初始化工作
155 | */
156 | /**
157 | * input [对输入信息进行处理,将信息呈递给需要的目标] 01.获得任务 a.通过文件方式 b.网页源代码方式
158 | * c.进行网页分析获得数据流 d.视频流技术鉴别 e.图像流输入。如 对牌照图像识别进而识别套牌车 f.对声音进行识别,去除噪音后的语音识别
159 | * g.传感器输入信号,对输入信号的处理 h.对文件夹进行监控,将文件信息上传至hdfs
160 | */
161 | /**
162 | * 如果涉及集群,还需要考虑分布式对任务处理的问题。
163 | */
164 | /**
165 | * 02.对任务根据情况来解压缩
166 | */
167 | // System.out.println("开始执行任务【 " + taskName+"】");
168 | /**
169 | * 1.由任务分发器取出适合当前任务的爬虫 这里需要增加爬虫管理的容器,把爬虫的状态交由容器管理,应用程序不对爬虫负责!
170 | */
171 | }
172 |
173 | /**
174 | * 4.处理线程中的任务 前提是任务+线程+爬虫都准备好了!
175 | */
176 | public void executeWork() {
177 | try {
178 | checkRunningEnv();// 对初始化参数进行检查
179 | threadQueue = new ArrayList();
180 | for (int i = 0; i < spiderCount; i++) {
181 | Thread thread = threadPool.getThread();
182 | ((SuchThread) thread).setRunningFlag(true);
183 | threadQueue.add(thread);
184 | }
185 | } catch (Exception e) {
186 | e.printStackTrace();
187 | }
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/src/cn/aidou/TaskDistributor/SpiderContainer.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.TaskDistributor;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Iterator;
5 | import java.util.List;
6 |
7 | import cn.aidou.TaskDistributor.spider.Spiders.CreateSpider;
8 |
9 | /**
10 | * 爬虫容器
11 | * 对爬虫的生命周期负责
12 | * @author aidou
13 | */
14 | public class SpiderContainer
15 | {
16 | private List spiderQueue = new ArrayList();
17 | private CreateSpider cs = null;
18 |
19 | public void addSpider(CreateSpider cs)
20 | {
21 | spiderQueue.add(cs);
22 | }
23 |
24 | public void setSpiderQueue(List spiderQueue)
25 | {
26 | this.spiderQueue = spiderQueue;
27 | }
28 |
29 | public SpiderContainer() {
30 | }
31 |
32 | /**
33 | * 由存储适配器从爬虫队列中循环侦测出空闲的爬虫
34 | *
35 | * @return
36 | */
37 | public synchronized CreateSpider getSpiderObj() {
38 | return CircleCheck();
39 | }
40 |
41 | /**
42 | * 循环检测爬虫队列中的空闲爬虫
43 | *
44 | * @return
45 | */
46 | public synchronized CreateSpider CircleCheck() {
47 | cs = getSpderstate();
48 | while (cs == null) {
49 | try {
50 | wait(500);
51 | cs = getSpderstate();
52 | } catch (InterruptedException e) {
53 | e.printStackTrace();
54 | }
55 | }
56 | return cs;
57 | }
58 |
59 | /**
60 | * 得到爬虫当前的使用状态等信息
61 | */
62 | public synchronized CreateSpider getSpderstate() {
63 | Iterator it = spiderQueue.iterator();
64 | while (it.hasNext()) {
65 | CreateSpider CreateSpide = it.next();
66 | if (!CreateSpide.isUsed()) {
67 | return CreateSpide;
68 | }
69 | }
70 | System.out.println("爬虫处于忙碌状态");
71 | return null;
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/cn/aidou/TaskDistributor/spider/ISpiderMan.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.TaskDistributor.spider;
2 |
3 | public interface ISpiderMan {
4 | //设置创建爬虫时间
5 | public void setCreateObjectTime();
6 | //控制爬虫状态
7 | public void setUsed(boolean flag);
8 | //判断爬虫是否被使用
9 | public boolean isUsed();
10 | //得到爬虫URL的方式
11 | public void spiderManURL();
12 | //爬虫处理方法
13 | public void spiderManHandler(String url);
14 | }
15 |
--------------------------------------------------------------------------------
/src/cn/aidou/TaskDistributor/spider/Spiders/CreateSpider.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.TaskDistributor.spider.Spiders;
2 |
3 | import java.text.SimpleDateFormat;
4 | import java.util.Date;
5 | import java.util.List;
6 |
7 | import org.apache.log4j.Logger;
8 |
9 | import cn.aidou.TaskDistributor.spider.ISpiderMan;
10 | import cn.aidou.bean.SpiderBean;
11 | import cn.aidou.bean.UrlQueue;
12 | import cn.aidou.dao.URLLinkDao;
13 | import cn.aidou.robot.DownResource;
14 | import cn.aidou.robot.IDownResource;
15 | import cn.aidou.robot.ReadLabelHandler;
16 | import cn.aidou.util.PackageParamObj;
17 | import cn.aidou.util.ParamObject;
18 | /**
19 | * 持久化部分有太多的数据库连接了
20 | * @author aidou
21 | *
22 | */
23 |
24 | public class CreateSpider implements ISpiderMan{
25 | private String SpiderID;//爬虫ID
26 | private int Depth;// 爬虫爬取深度;
27 | private boolean used = false;// 爬虫是否被使用
28 | private String StartTime;// 爬虫开始时间;
29 | private static Logger logger = Logger.getLogger(CreateSpider.class);
30 | //private URLLinkDao uRLLinkDao = null;
31 | /**
32 | * 设置创建爬虫时间
33 | */
34 | public void setCreateObjectTime() {
35 | Date date = new Date();
36 | SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd HH:mm:ss ");
37 | StartTime = sdf.format(date);
38 | logger.info("当前时间" + StartTime);
39 | }
40 | /**
41 | * 控制爬虫状态
42 | * @param flag
43 | * false 休眠
44 | * true 忙碌
45 | */
46 | public void setUsed(boolean flag){
47 | this.used = flag;
48 | }
49 | /**
50 | * 判断爬虫是否被使用
51 | * @return
52 | */
53 | public boolean isUsed(){
54 | return used;
55 | }
56 | public CreateSpider(SpiderBean sb) {
57 | this.SpiderID=sb.getspiderID();
58 | this.Depth = sb.getSpiderDepth();
59 | setCreateObjectTime();
60 | }
61 |
62 | /**
63 | * 【注意:】多线程环境下抛出异常:待解决
64 | */
65 | public void spiderManURL(){
66 | List li = null;
67 | for(UrlQueue list : li){
68 | String url = list.getUrl();
69 | spiderManHandler(url);
70 | }
71 | }
72 | public void spiderManHandler(String url) {
73 | /**
74 | * 0.链接输入(该模块针对不同的输入源备用)
75 | */
76 |
77 | /**
78 | * 1.链接处理:获取网页源码
79 | */
80 | IDownResource dps = new DownResource();
81 | dps.downPageSourceHandler(url);
82 | /**
83 | * 2.对获取的源代码的处理
84 | */
85 | ReadLabelHandler rlh = new ReadLabelHandler();
86 | rlh.readALabel(url);
87 | /**
88 | * 3.链接输出:持久化操作、存入本地磁盘
89 | */
90 | //uRLLinkDao.insertURL(url, 0);
91 | /**
92 | * 4.因为线程不需要知道自己执行的是哪一个爬虫,只需要知道自己执行的是哪种类型的任务即可
93 | * 具体哪种类型的爬虫执行了什么特征的任务由任务分发器负责,所以此处不需要关心爬虫是由
94 | * 那个线程执行的!也就是说线程对爬虫是透明的!
95 | */
96 | //【作废】System.out.println(SpiderName + "是由:" + ThreadName + "执行的!");
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/src/cn/aidou/TaskDistributor/spider/Spiders/CreateSpider1.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.TaskDistributor.spider.Spiders;
2 |
3 | import cn.aidou.TaskDistributor.spider.ISpiderMan;
4 |
5 | public class CreateSpider1 implements ISpiderMan{
6 | @Override
7 | public void setCreateObjectTime()
8 | {
9 |
10 | }
11 |
12 | @Override
13 | public void setUsed(boolean flag)
14 | {
15 |
16 | }
17 |
18 | @Override
19 | public boolean isUsed()
20 | {
21 | return false;
22 | }
23 |
24 | @Override
25 | public void spiderManURL()
26 | {
27 |
28 | }
29 |
30 | @Override
31 | public void spiderManHandler(String url)
32 | {
33 |
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/AbstractHandler.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | import java.lang.reflect.InvocationHandler;
4 |
5 | /**
6 | * The Class AbstractHandler provides a simple wrapper for our own aop.
7 | *
8 | * @author Debadatta Mishra
9 | */
10 | public abstract class AbstractHandler implements InvocationHandler {
11 |
12 | /** The target object. */
13 | private Object targetObject;
14 |
15 | /**
16 | * Sets the target object.
17 | *
18 | * @param targetObject the new target object
19 | */
20 | public void setTargetObject(Object targetObject) {
21 | this.targetObject = targetObject;
22 | }
23 |
24 | /**
25 | * Gets the target object.
26 | *
27 | * @return the target object
28 | */
29 | public Object getTargetObject() {
30 | return targetObject;
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/AfterHandler.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | import java.lang.reflect.Method;
4 |
5 | /**
6 | * The Class AfterHandler provides a template for After concern.
7 | *
8 | * @author Debadatta Mishra
9 | */
10 | public abstract class AfterHandler extends AbstractHandler {
11 |
12 | /**
13 | * Handles after the execution of method.
14 | *
15 | * @param proxy the proxy
16 | * @param method the method
17 | * @param args the args
18 | */
19 | public abstract void handleAfter(Object proxy, Method method, Object[] args);
20 |
21 | /* (non-Javadoc)
22 | * @see java.lang.reflect.InvocationHandler#invoke(java.lang.Object, java.lang.reflect.Method, java.lang.Object[])
23 | */
24 | @Override
25 | public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
26 |
27 | Object result = method.invoke(getTargetObject(), args);
28 | handleAfter(proxy, method, args);
29 | return result;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/AfterHandlerImpl.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | import java.lang.reflect.Method;
4 | /**
5 | * The Class AfterHandlerImpl provides an implementation of business logic which
6 | * will be executed after the actual method execution.
7 | *
8 | * @author Debadatta Mishra
9 | */
10 | public class AfterHandlerImpl extends AfterHandler {
11 |
12 | /* (non-Javadoc)
13 | * @see com.ddlab.rnd.aop.AfterHandler#handleAfter(java.lang.Object, java.lang.reflect.Method, java.lang.Object[])
14 | */
15 | @Override
16 | public void handleAfter(Object proxy, Method method, Object[] args) {
17 | //Provide your own cross cutting concern
18 | System.out.println("Handling after actual method execution ........");
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/BeforeHandler.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | import java.lang.reflect.Method;
4 |
5 | /**
6 | * The Class BeforeHandler provides a template for the before execution
7 | *
8 | * @author Debadatta Mishra
9 | */
10 | public abstract class BeforeHandler extends AbstractHandler {
11 |
12 | /**
13 | * Handles before execution of actual method.
14 | *
15 | * @param proxy the proxy
16 | * @param method the method
17 | * @param args the args
18 | */
19 | public abstract void handleBefore(Object proxy, Method method, Object[] args);
20 |
21 | /* (non-Javadoc)
22 | * @see java.lang.reflect.InvocationHandler#invoke(java.lang.Object, java.lang.reflect.Method, java.lang.Object[])
23 | */
24 | @Override
25 | public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
26 | handleBefore(proxy, method, args);
27 | return method.invoke(getTargetObject(), args);
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/BeforeHandlerImpl.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | import java.lang.reflect.Method;
4 |
5 |
6 |
7 | /**
8 | * The Class BeforeHandlerImpl provides implementation before
9 | * actual execution of method.
10 | *
11 | * @author Debadatta Mishra
12 | */
13 | public class BeforeHandlerImpl extends BeforeHandler {
14 |
15 | /* (non-Javadoc)
16 | * @see com.ddlab.rnd.aop.BeforeHandler#handleBefore(java.lang.Object, java.lang.reflect.Method, java.lang.Object[])
17 | */
18 | @Override
19 | public void handleBefore(Object proxy, Method method, Object[] args) {
20 | //Provide your own cross cutting concern
21 | System.out.println("Handling before actual method execution ........");
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/Calculator.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | /**
4 | * The Interface Calculator.
5 | *
6 | * @author Debadatta Mishra
7 | */
8 | public interface Calculator {
9 |
10 | /**
11 | * Calculate.
12 | *
13 | * @param a the a
14 | * @param b the b
15 | * @return the int
16 | */
17 | public int calculate(int a, int b);
18 | }
19 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/CalculatorImpl.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | /**
4 | * The Class CalculatorImpl.
5 | * @author Debadatta Mishra
6 | */
7 | public class CalculatorImpl implements Calculator {
8 | public CalculatorImpl(){
9 | }
10 | @Override
11 | public int calculate(int a, int b) {
12 | System.out.println("**********Actual Method Execution**********");
13 | return a/b;
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/ProxyFactory.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 | import java.lang.reflect.Proxy;
3 | import java.util.List;
4 |
5 | /**
6 | * A factory for creating Proxy objects.
7 | * @author Debadatta Mishra
8 | */
9 | public class ProxyFactory {
10 |
11 | /**
12 | * Gets the proxy.
13 | *
14 | * @param targetObject 目标对象
15 | * @param handlers 处理类集合
16 | * @return Object 对象
17 | */
18 | public static Object getProxy(Object targetObject,
19 | List handlers) {
20 | Object proxyObject = null;
21 | if (handlers.size() > 0) {
22 | proxyObject = targetObject;
23 | for (int i = 0; i < handlers.size(); i++) {
24 | handlers.get(i).setTargetObject(proxyObject);
25 | proxyObject = Proxy.newProxyInstance(targetObject.getClass()
26 | .getClassLoader(), targetObject.getClass()
27 | .getInterfaces(), handlers.get(i));
28 | }
29 | return proxyObject;
30 | } else {
31 | return targetObject;
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/cn/aidou/aop/TestAopInJDK.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.aop;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | /**
6 | * 面向切面编程
7 | * @author aidou
8 | */
9 | public class TestAopInJDK {
10 | public static void main(String[] args) {
11 |
12 | CalculatorImpl calcImpl = new CalculatorImpl();
13 | BeforeHandler before = new BeforeHandlerImpl();
14 | AfterHandler after = new AfterHandlerImpl();
15 | /**
16 | * 将通知处理类分发到目标函数周围
17 | */
18 | List handlers = new ArrayList();
19 | handlers.add(before);//前置通知
20 | handlers.add(after);//后置通知
21 | Calculator proxy = (Calculator) ProxyFactory.getProxy(calcImpl,
22 | handlers);
23 | int result = proxy.calculate(20, 10);
24 | System.out.println("Final Result :::" + result);
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/cn/aidou/bean/SpiderBean.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.bean;
2 |
3 | /**
4 | * Created by aidou on 2016/4/30.
5 | */
6 | public class SpiderBean
7 | {
8 | private String spiderID;
9 | private int spiderStatus;
10 | private int spiderDepth;
11 | public SpiderBean(String spiderID, int spiderDepth)
12 | {
13 | this.spiderID = spiderID;
14 | this.spiderStatus = 1;
15 | this.spiderDepth = spiderDepth;
16 | }
17 |
18 | public SpiderBean(String spiderID, int spiderStatus, int spiderDepth)
19 | {
20 | this.spiderID = spiderID;
21 | this.spiderStatus = spiderStatus;
22 | this.spiderDepth = spiderDepth;
23 | }
24 |
25 | public String getspiderID()
26 | {
27 | return spiderID;
28 | }
29 |
30 | public void setspiderID(String spiderID)
31 | {
32 | this.spiderID = spiderID;
33 | }
34 |
35 | public int getSpiderStatus()
36 | {
37 | return spiderStatus;
38 | }
39 |
40 | public void setSpiderStatus(int spiderStatus)
41 | {
42 | this.spiderStatus = spiderStatus;
43 | }
44 |
45 | public int getSpiderDepth()
46 | {
47 | return spiderDepth;
48 | }
49 |
50 | public void setSpiderDepth(int spiderDepth)
51 | {
52 | this.spiderDepth = spiderDepth;
53 | }
54 |
55 |
56 | @Override
57 | public String toString()
58 | {
59 | return "SpiderInfo{" +
60 | "spiderID='" + spiderID + ", spiderStatus='" + spiderStatus + ", spiderDepth='" + spiderDepth + '\'' +
61 | '}';
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/cn/aidou/bean/TaskBean.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.bean;
2 |
3 | /**
4 | * Created by aidou on 2016/4/30.
5 | */
6 | public class TaskBean
7 | {
8 | private String taskID;
9 | private String taskName;
10 | private String taskStartTime;
11 | private String taskEndTime;
12 | private String Event1;
13 | private String Event2;
14 | private String Event3;
15 | private String Event4;
16 | private String Event5;
17 |
18 | public String getTaskID()
19 | {
20 | return taskID;
21 | }
22 |
23 | public void setTaskID(String taskID)
24 | {
25 | this.taskID = taskID;
26 | }
27 |
28 | public String getTaskName()
29 | {
30 | return taskName;
31 | }
32 |
33 | public void setTaskName(String taskName)
34 | {
35 | this.taskName = taskName;
36 | }
37 |
38 | public String getTaskStartTime()
39 | {
40 | return taskStartTime;
41 | }
42 |
43 | public void setTaskStartTime(String taskStartTime)
44 | {
45 | this.taskStartTime = taskStartTime;
46 | }
47 |
48 | public String getTaskEndTime()
49 | {
50 | return taskEndTime;
51 | }
52 |
53 | public void setTaskEndTime(String taskEndTime)
54 | {
55 | this.taskEndTime = taskEndTime;
56 | }
57 |
58 | public String getEvent1()
59 | {
60 | return Event1;
61 | }
62 |
63 | public void setEvent1(String event1)
64 | {
65 | Event1 = event1;
66 | }
67 |
68 | public String getEvent2()
69 | {
70 | return Event2;
71 | }
72 |
73 | public void setEvent2(String event2)
74 | {
75 | Event2 = event2;
76 | }
77 |
78 | public String getEvent3()
79 | {
80 | return Event3;
81 | }
82 |
83 | public void setEvent3(String event3)
84 | {
85 | Event3 = event3;
86 | }
87 |
88 | public String getEvent4()
89 | {
90 | return Event4;
91 | }
92 |
93 | public void setEvent4(String event4)
94 | {
95 | Event4 = event4;
96 | }
97 |
98 | public String getEvent5()
99 | {
100 | return Event5;
101 | }
102 |
103 | public void setEvent5(String event5)
104 | {
105 | Event5 = event5;
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/cn/aidou/bean/UrlQueue.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.bean;
2 |
3 | public class UrlQueue
4 | {
5 | private int urlid;
6 | private String url;
7 | private int status;
8 |
9 | public int getUrlid()
10 | {
11 | return urlid;
12 | }
13 |
14 | public void setUrlid(int urlid)
15 | {
16 | this.urlid = urlid;
17 | }
18 |
19 | public String getUrl()
20 | {
21 | return url;
22 | }
23 |
24 | public void setUrl(String url)
25 | {
26 | this.url = url;
27 | }
28 |
29 | public int getStatus()
30 | {
31 | return status;
32 | }
33 |
34 | public void setStatus(int status)
35 | {
36 | this.status = status;
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/cn/aidou/dao/BaseDao.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.dao;
2 |
3 | import cn.aidou.util.PackageParamObj;
4 | import cn.aidou.util.ParamObject;
5 |
6 | import java.sql.PreparedStatement;
7 | import java.sql.ResultSet;
8 | import java.sql.Statement;
9 | import java.util.HashMap;
10 | import java.util.Map;
11 |
12 | public class BaseDao
13 | {
14 | Map map = new HashMap();
15 | ParamObject po = PackageParamObj.getParamObject();
16 | /**
17 | * 初始化数据库连接池
18 | */
19 | public BaseDao(Map dbinfo)
20 | {
21 | try
22 | {
23 | ConnectionPool cp = new ConnectionPool(dbinfo);
24 | map.put("connPool", cp);
25 | po.setEnvRoot(map);
26 | cp.createPool();
27 | } catch (Exception e)
28 | {
29 | e.printStackTrace();
30 | }
31 | }
32 |
33 | /**
34 | * 释放资源
35 | *
36 | * @param ResultSet rs
37 | * @param PreparedStatement pst
38 | * @param Statement st
39 | */
40 | public static void releaseResource(ResultSet rs, PreparedStatement pst, Statement st)
41 | {
42 | try
43 | {
44 | if (rs != null)
45 | {
46 | rs.close();
47 | }
48 | if (st != null)
49 | {
50 | st.close();
51 | }
52 | if (pst != null)
53 | {
54 | pst.close();
55 | }
56 | } catch (Exception e)
57 | {
58 | e.printStackTrace();
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/cn/aidou/dao/ConnectionPool.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.dao;
2 |
3 | //
4 | //一个效果非常不错的JAVA数据库连接池.
5 | //from:http://www.jxer.com/home/?uid-195-action-viewspace-itemid-332
6 | //虽然现在用APACHE COMMONS DBCP可以非常方便的建立数据库连接池,
7 | //但是像这篇文章把数据库连接池的内部原理写的这么透彻,注视这么完整,
8 | //真是非常难得,让开发人员可以更深层次的理解数据库连接池,真是非常感
9 | //谢这篇文章的作者。
10 | //
11 |
12 | import cn.aidou.Algorithm.AstarAlgorithm.Test;
13 |
14 | import java.sql.Connection;
15 | import java.sql.DatabaseMetaData;
16 | import java.sql.Driver;
17 | import java.sql.DriverManager;
18 | import java.sql.SQLException;
19 | import java.sql.Statement;
20 | import java.util.Enumeration;
21 | import java.util.Map;
22 | import java.util.Vector;
23 |
24 | import org.apache.log4j.Logger;
25 |
26 | public class ConnectionPool
27 | {
28 |
29 | private String driver = ""; // 数据库驱动
30 | private String dbUrl = ""; // 数据 URL
31 | private String dbUsername = ""; // 数据库用户名
32 | private String dbPassword = ""; // 数据库用户密码
33 | private String testTable = "urlqueue"; // 测试连接是否可用的测试表名,默认没有测试表
34 | private int initialConnections = 10; // 连接池的初始大小
35 | private int incrementalConnections = 5; // 连接池自动增加的大小
36 | private int maxConnections = 50; // 连接池最大的大小
37 | private Vector connections = null; // 存放连接池中数据库连接的向量 ,
38 | // 初始时为 null
39 | // 它中存放的对象为 PooledConnection 型
40 | private static Logger logger = Logger.getLogger(ConnectionPool.class);
41 |
42 | public ConnectionPool(Map DBinfo)
43 | {
44 | this.driver = DBinfo.get("driver");
45 | this.dbUrl = DBinfo.get("dbUrl");
46 | this.dbUsername = DBinfo.get("dbUsername");
47 | this.dbPassword = DBinfo.get("dbPassword");
48 | }
49 |
50 | public int getInitialConnections()
51 | {
52 | return this.initialConnections;
53 | }
54 |
55 | public void setInitialConnections(int initialConnections)
56 | {
57 | this.initialConnections = initialConnections;
58 | }
59 |
60 | public int getIncrementalConnections()
61 | {
62 | return this.incrementalConnections;
63 | }
64 |
65 | public void setIncrementalConnections(int incrementalConnections)
66 | {
67 | this.incrementalConnections = incrementalConnections;
68 | }
69 |
70 | public int getMaxConnections()
71 | {
72 | return this.maxConnections;
73 | }
74 |
75 | public void setMaxConnections(int maxConnections)
76 | {
77 | this.maxConnections = maxConnections;
78 | }
79 |
80 | public String getTestTable()
81 | {
82 | return this.testTable;
83 | }
84 |
85 | public void setTestTable(String testTable)
86 | {
87 | this.testTable = testTable;
88 | }
89 |
90 | public synchronized void createPool() throws Exception
91 | {
92 | // 确保连接池没有创建
93 | // 假如连接池己经创建了,保存连接的向量 connections 不会为空
94 | if (connections != null)
95 | {
96 | return; // 假如己经创建,则返回
97 | }
98 | // 实例化 Driver 中指定的驱动类实例
99 | Driver driver = (Driver) (Class.forName(this.driver).newInstance());
100 | DriverManager.registerDriver(driver); // 注册 JDBC 驱动程序
101 | // 创建保存连接的向量 , 初始时有 0 个元素
102 | connections = new Vector();
103 | // 根据 initialConnections 中设置的值,创建连接。
104 | createConnections(this.initialConnections);
105 | logger.info("Database connection pool creation successed !");
106 | }
107 |
108 | private void createConnections(int numConnections) throws SQLException
109 | {
110 | // 循环创建指定数目的数据库连接
111 | for (int x = 0; x < numConnections; x++)
112 | {
113 | // 是否连接池中的数据库连接的数量己经达到最大?最大值由类成员 maxConnections
114 | // 指出,假如 maxConnections 为 0 或负数,表示连接数量没有限制。
115 | // 假如连接数己经达到最大,即退出。
116 | if (this.maxConnections > 0 && this.connections.size() >= this.maxConnections)
117 | {
118 | break;
119 | }
120 | // add a new PooledConnection object to connections vector
121 | // 增加一个连接到连接池中(向量 connections 中)
122 | try
123 | {
124 | connections.addElement(new PooledConnection(newConnection()));// connections中放置的是Connection对象
125 | } catch (SQLException e)
126 | {
127 | logger.error("Database connection pool creation fail !"+e.getMessage());
128 | throw new SQLException();
129 | }
130 | logger.info("Database connection pool being created ......!");
131 | }
132 | }
133 |
134 | private Connection newConnection() throws SQLException
135 | {
136 | // 创建一个数据库连接
137 | Connection conn = DriverManager.getConnection(dbUrl, dbUsername, dbPassword);
138 | // 假如这是第一次创建数据库连接,即检查数据库,获得此数据库答应支持的
139 | // 最大客户连接数目
140 | // connections.size()==0 表示目前没有连接己被创建
141 | if (connections.size() == 0)
142 | {
143 | // 检查数据库的操作只需要做一次就可以了
144 | DatabaseMetaData metaData = conn.getMetaData();
145 | int driverMaxConnections = metaData.getMaxConnections();
146 | // 数据库返回的 driverMaxConnections 若为 0 ,表示此数据库没有最大
147 | // 连接限制,或数据库的最大连接限制不知道
148 | // driverMaxConnections 为返回的一个整数,表示此数据库答应客户连接的数目
149 | // 假如连接池中设置的最大连接数量大于数据库答应的连接数目 , 则置连接池的最大
150 | // 连接数目为数据库答应的最大数目
151 | if (driverMaxConnections > 0 && this.maxConnections > driverMaxConnections)
152 | {
153 | this.maxConnections = driverMaxConnections;
154 | }
155 | }
156 | return conn; // 返回创建的新的数据库连接
157 | }
158 |
159 | public synchronized Connection getConnection() throws SQLException
160 | {
161 | // 确保连接池己被创建
162 | if (connections == null)
163 | {
164 | System.out.println("连接池还没创建!");
165 | return null; // 连接池还没创建,则返回 null
166 | }
167 | // 如果连接池已近创建,那么从链接池中获取一个可用的数据库链接
168 |
169 | Connection conn = getFreeConnection(); // 获得一个可用的数据库连接
170 | // 假如目前没有可以使用的连接,即所有的连接都在使用中
171 | while (conn == null)
172 | {
173 | // 等一会再试
174 | wait(250);
175 | conn = getFreeConnection(); // 重新再试,直到获得可用的连接,假如
176 | // getFreeConnection() 返回的为 null
177 | // 则表明创建一批连接后也不可获得可用连接
178 | }
179 | return conn; // 返回获得的可用的连接
180 | }
181 |
182 | private Connection getFreeConnection() throws SQLException
183 | {
184 | // 从连接池中获得一个可用的数据库连接
185 | Connection conn = findFreeConnection();
186 | if (conn == null)
187 | {
188 | // 假如目前连接池中没有可用的连接
189 | // 创建一些连接
190 | createConnections(incrementalConnections);
191 | // 重新从池中查找是否有可用连接
192 | conn = findFreeConnection();
193 | if (conn == null)
194 | {
195 | // 假如创建连接后仍获得不到可用的连接,则返回 null
196 | return null;
197 | }
198 | }
199 | return conn;
200 | }
201 |
202 | private Connection findFreeConnection() throws SQLException
203 | {
204 | Connection conn = null;
205 | PooledConnection pConn = null;
206 | // 获得连接池向量中所有的对象
207 | /**
208 | * boolean hasMoreElemerts()
209 | * 测试Enumeration枚举对象中是否还含有元素,如果返回true,则表示还含有至少一个的元素。 ·Object
210 | * nextElement() :如果Bnumeration枚举对象还含有元素,该方法得到对象中的下一个元素。
211 | */
212 | Enumeration enumerate = connections.elements();
213 | // 遍历所有的对象,看是否有可用的连接
214 | while (enumerate.hasMoreElements())
215 | {
216 | pConn = (PooledConnection) enumerate.nextElement();
217 | if (!pConn.isBusy())
218 | {
219 | // 假如此对象不忙,则获得它的数据库连接并把它设为忙
220 | conn = pConn.getConnection();
221 | pConn.setBusy(true);
222 | // 测试此连接是否可用
223 | if (!testConnection(conn))
224 | {
225 | // 假如此连接不可再用了,则创建一个新的连接,
226 | // 并替换此不可用的连接对象,假如创建失败,返回 null
227 | try
228 | {
229 | conn = newConnection();
230 | } catch (SQLException e)
231 | {
232 | System.out.println(" 创建数据库连接失败! " + e.getMessage());
233 | // return null;
234 | }
235 | pConn.setConnection(conn);
236 | }
237 | break; // 己经找到一个可用的连接,退出
238 | }
239 | }
240 | return conn; // 返回找到到的可用连接
241 | }
242 |
243 | private boolean testConnection(Connection conn)
244 | {
245 | try
246 | {
247 | // 判定测试表是否存在
248 | if (testTable.equals(""))
249 | {
250 | // 假如测试表为空,试着使用此连接的 setAutoCommit() 方法
251 | // 来判定连接否可用(此方法只在部分数据库可用,假如不可用 ,
252 | // 抛出异常)。注重:使用测试表的方法更可靠
253 | conn.setAutoCommit(true);
254 | } else
255 | { // 有测试表的时候使用测试表测试
256 | Statement stmt = conn.createStatement();
257 | stmt.executeQuery("select count(*) from " + testTable);
258 | }
259 | } catch (SQLException e)
260 | {
261 | // 上面抛出异常,此连接己不可用,关闭它,并返回 false;
262 | closeConnection(conn);
263 | return false;
264 | }
265 | // 连接可用,返回 true
266 | return true;
267 | }
268 |
269 | public void returnConnection(Connection conn)
270 | {
271 | // 确保连接池存在,假如连接没有创建(不存在),直接返回
272 | if (connections == null)
273 | {
274 | // 连接池是连接的归宿,没有连接池的时候,任务执行完不用的连接就无法归还给连接池,造成连接没有人管理了,造成浪费。
275 |
276 | System.out.println(" 连接池不存在,无法返回此连接到连接池中 !");
277 | return;
278 | }
279 | PooledConnection pConn = null;
280 | Enumeration enumerate = connections.elements();
281 | // 遍历连接池中的所有连接,找到这个要返回的连接对象
282 | while (enumerate.hasMoreElements())
283 | {
284 | pConn = (PooledConnection) enumerate.nextElement();
285 | // 先找到连接池中的要返回的连接对象
286 | if (conn == pConn.getConnection())
287 | {
288 | // 经典:连接被用于去执行任务,并不是说连接不再连接池中了,而是在连接池中的状态改变了,即由空闲状态变为忙碌状态
289 |
290 | // 找到了 , 设置此连接为空闲状态
291 | pConn.setBusy(false);
292 | break;
293 | }
294 | }
295 | }
296 |
297 | public synchronized void refreshConnections() throws SQLException
298 | {
299 | // 刷新连接的目的在于当某一链接卡死的时候用一个新的链接取代他
300 | // 确保连接池己创新存在
301 | if (connections == null)
302 | {
303 | System.out.println(" 连接池不存在,无法刷新 !");
304 | return;
305 | }
306 | PooledConnection pConn = null;
307 | Enumeration enumerate = connections.elements();
308 | while (enumerate.hasMoreElements())
309 | {
310 | // 获得一个连接对象
311 | pConn = (PooledConnection) enumerate.nextElement();
312 | // 假如对象忙则等 5 秒 ,5 秒后直接刷新
313 | if (pConn.isBusy())
314 | {
315 | wait(5000); // 等 5 秒
316 | }
317 | // 关闭此连接,用一个新的连接代替它。
318 | closeConnection(pConn.getConnection());
319 | pConn.setConnection(newConnection());
320 | pConn.setBusy(false);
321 | }
322 | }
323 |
324 | public synchronized void closeConnectionPool() throws SQLException
325 | {
326 | // 确保连接池存在,假如不存在,返回
327 | if (connections == null)
328 | {
329 | System.out.println(" 连接池不存在,无法关闭 !");
330 | return;
331 | }
332 | PooledConnection pConn = null;
333 | Enumeration enumerate = connections.elements();
334 | while (enumerate.hasMoreElements())
335 | {
336 | pConn = (PooledConnection) enumerate.nextElement();
337 | // 假如忙,等 5 秒
338 | if (pConn.isBusy())
339 | {
340 | wait(5000); // 等 5 秒
341 | }
342 | // 5 秒后直接关闭它
343 | closeConnection(pConn.getConnection());
344 | // 从连接池向量中删除它
345 | connections.removeElement(pConn);
346 | }
347 | // 置连接池为空
348 | connections = null;
349 | }
350 |
351 | private void closeConnection(Connection conn)
352 | {
353 | try
354 | {
355 | conn.close();
356 | } catch (SQLException e)
357 | {
358 | System.out.println(" 关闭数据库连接出错: " + e.getMessage());
359 | }
360 | }
361 |
362 | private void wait(int mSeconds)
363 | {
364 | try
365 | {
366 | Thread.sleep(mSeconds);
367 | } catch (InterruptedException e)
368 | {
369 | }
370 | }
371 | }
372 |
--------------------------------------------------------------------------------
/src/cn/aidou/dao/PooledConnection.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.dao;
2 |
3 | import java.sql.Connection;
4 |
5 | public class PooledConnection
6 | {
7 | Connection connection = null; // 数据库连接
8 | boolean busy = false; // 此连接是否正在使用的标志,默认没有正在使用
9 | // 构造函数,根据一个 Connection 构告一个 PooledConnection 对象
10 |
11 | public PooledConnection(Connection connection)
12 | {
13 | this.connection = connection;
14 | }
15 |
16 | // 返回此对象中的连接
17 | public Connection getConnection()
18 | {
19 | return connection;
20 | }
21 |
22 | // 设置此对象的,连接
23 | public void setConnection(Connection connection)
24 | {
25 | this.connection = connection;
26 | }
27 |
28 | // 获得对象连接是否忙
29 | public boolean isBusy()
30 | {
31 | return busy;
32 | }
33 |
34 | // 设置对象的连接正在忙
35 | public void setBusy(boolean busy)
36 | {
37 | this.busy = busy;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/cn/aidou/dao/URLLinkDao.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.dao;
2 |
3 | import java.sql.Connection;
4 | import java.sql.ResultSet;
5 | import java.sql.SQLException;
6 | import java.sql.Statement;
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | import cn.aidou.bean.UrlQueue;
11 | import cn.aidou.util.ObjFactory;
12 | import cn.aidou.util.PackageParamObj;
13 |
14 | import static cn.aidou.dao.BaseDao.releaseResource;
15 |
16 | public class URLLinkDao{
17 | public URLLinkDao()
18 | {
19 | }
20 |
21 | private UrlQueue uq = null;
22 | private List li = new ArrayList();
23 | private Connection conn = null;
24 | private Statement st = null;
25 | private ConnectionPool connPool = ObjFactory.connPool;
26 | public List selectURL(){
27 | ResultSet rs = null;
28 | try {
29 | String sql = " select * from urlqueue ";
30 | conn = connPool.getConnection();
31 | Statement st = conn.createStatement();
32 | conn.prepareStatement(sql);
33 | rs = st.executeQuery(sql);
34 | while(rs.next()){
35 | uq = (UrlQueue) ObjFactory.createObj("cn.aidou.bean.UrlQueue");
36 | uq.setUrlid(rs.getInt("urlid"));
37 | uq.setUrl(rs.getString("url"));
38 | uq.setStatus(rs.getInt("status"));
39 | li.add(uq);
40 | }
41 | return li;
42 | } catch (Exception e) {
43 | e.printStackTrace();
44 | return null;
45 | }finally{
46 | releaseResource(rs, null, st);
47 | connPool.returnConnection(conn);
48 | }
49 | }
50 | public void updateURL(){
51 |
52 | }
53 | public void deleteURL(){
54 |
55 | }
56 | public void insertURL(String url, int status){
57 | String sql = "insert into urlqueue(url,status) values('" + url + "','" + status + "'); ";
58 | try {
59 | st = connPool.getConnection().createStatement();
60 | int j = st.executeUpdate(sql);
61 | System.out.println("被影响的行数:" + j);
62 | } catch (SQLException e) {
63 | e.printStackTrace();
64 | }finally{
65 | BaseDao.releaseResource(null, null, st);
66 | connPool.returnConnection(conn);
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/cn/aidou/listener/ExampleListener.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.listener;
2 |
3 | import java.util.EventListener;
4 |
5 | public class ExampleListener implements EventListener {
6 | public ExampleListener() {
7 | //初始化事件监听器配置信息
8 | }
9 | //事件发生后的回调方法
10 | public void callBackFunction(RunTimeEvent runTimeEvent) {
11 | //写监听器监听到事件后,回调函数的代码
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/src/cn/aidou/listener/RunTimeEvent.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.listener;
2 |
3 | import java.util.EventObject;
4 |
5 |
6 | /**
7 | * 事件类,用于封装事件源及一些与事件相关的参数.
8 | * @author Eric
9 | */
10 | public class RunTimeEvent extends EventObject {
11 | private static final long serialVersionUID = 1L;
12 | private Object source;//事件源
13 |
14 | public RunTimeEvent(Object source){
15 | super(source);
16 | this.source = source;
17 | }
18 |
19 | public Object getSource() {
20 | return source;
21 | }
22 |
23 | public void setSource(Object source) {
24 | this.source = source;
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/cn/aidou/listener/RunTimeListener.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.listener;
2 |
3 | import java.util.EventListener;
4 | import cn.aidou.thread.SuchThread;
5 | public class RunTimeListener implements EventListener {
6 | public RunTimeListener() {
7 | System.out.println("初始化蜘蛛人运行时间监控的事件监听器");
8 | }
9 | //事件发生后的回调方法
10 | public void callBackFunction(RunTimeEvent runTimeEvent) {
11 | System.out.println("蜘蛛人运行时间监控的事件监听器被调用!");
12 | System.out.println("事件源发生相应的事件后调用此回调函数,对监听器监听到的事件源的改变的处理事件的代码存放到监听器端");
13 | SuchThread suchThread = (SuchThread) runTimeEvent.getSource();
14 | System.out.println("运行超时了!");
15 | System.out.println("爬虫运行时间为:" + suchThread.getSpiderRunTime().toString() + "\"");
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/src/cn/aidou/robot/DownResource.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.robot;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.File;
6 | import java.io.FileOutputStream;
7 | import java.io.IOException;
8 | import java.io.InputStreamReader;
9 | import java.io.OutputStreamWriter;
10 | import java.net.HttpURLConnection;
11 | import java.net.URL;
12 |
13 | import org.jsoup.Jsoup;
14 | import org.jsoup.nodes.Document;
15 |
16 | /**
17 | * 2015.10.8 下载网页源码
18 | */
19 |
20 | public class DownResource implements IDownResource {
21 | String link = null;
22 | URL url = null;
23 | HttpURLConnection urlConnection;
24 | BufferedReader reader = null;
25 | BufferedWriter writer = null;
26 |
27 | /**
28 | * 下载网页源代码
29 | */
30 | public void downPageSourceHandler(String link) {
31 | int responsecode;
32 | String line;
33 | try {
34 | url = new URL("http://" + link);
35 | // 打开url链接
36 | urlConnection = (HttpURLConnection) url.openConnection();
37 | reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), "UTF-8"));
38 | Document doc = Jsoup.connect("http://" + link).get();
39 | String title = doc.title();
40 | writer = new BufferedWriter(
41 | new OutputStreamWriter(new FileOutputStream(new File("D:\\" + title + ".html"))));
42 | // 生成一个url对象,要获取源代码的网页地址为:http://www.sina.com.cn
43 | // 获取返回的状态码
44 | responsecode = urlConnection.getResponseCode();
45 | if (responsecode == 200) {
46 | while ((line = reader.readLine()) != null) {
47 | // System.out.println(line);
48 | writer.write(line);
49 | writer.newLine();
50 | }
51 | System.out.println("下载成功!");
52 | } else {
53 | System.out.println("获取不到网页的源码,服务器响应代码为:" + responsecode);
54 | }
55 | } catch (Exception e) {
56 | System.out.println("获取不到网页的源码,出现异常:" + e);
57 | } finally {
58 | try {
59 | reader.close();
60 | writer.close();
61 | } catch (IOException e) {
62 | e.printStackTrace();
63 | }
64 | }
65 | }
66 |
67 | /**
68 | * 下载图片资源
69 | */
70 | public void downImageSource() {
71 |
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/cn/aidou/robot/GetPageEncoding.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.robot;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.InputStreamReader;
6 | import java.net.HttpURLConnection;
7 | import java.net.URL;
8 |
9 | public class GetPageEncoding {
10 |
11 | String link = null;
12 | URL url = null;
13 | HttpURLConnection urlConnection;
14 | BufferedReader reader = null;
15 | BufferedWriter writer = null;
16 |
17 | /**
18 | * 2015.10.8 获取网页编码方式
19 | *
20 | * @param link
21 | * @return
22 | */
23 |
24 | public String getCharset(String link) {
25 | String result = null;
26 | try {
27 | url = new URL(link);
28 | // 打开url链接
29 | urlConnection = (HttpURLConnection) url.openConnection();
30 | // User-Agent头域的内容包含发出请求的用户信息
31 | urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)");
32 | urlConnection.connect();
33 | String contentType = urlConnection.getContentType();
34 | // 在header里面找charset
35 | result = findCharset(contentType);
36 | // 如果没找到的话,则一行一行的读入页面的html代码,从html代码中寻找
37 | if (result == null) {
38 | BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
39 | String line = reader.readLine();
40 | while (line != null) {
41 | if (line.contains("Content-Type")) {
42 | result = findCharset(line);
43 | break;
44 | }
45 | line = reader.readLine();
46 | }
47 | }
48 | } catch (Exception e) {
49 | e.printStackTrace();
50 | } finally {
51 | urlConnection.disconnect();
52 | }
53 | return result;
54 | }
55 |
56 | /**
57 | * 2015.10.8 辅助函数
58 | *
59 | * @param line
60 | * @return
61 | */
62 | private String findCharset(String line) {
63 | System.out.println(line);
64 | int x = line.indexOf("charset=");
65 | int y = line.lastIndexOf('\"');
66 | if (x < 0)
67 | return null;
68 | else if (y >= 0)
69 | return line.substring(x + 8, y);
70 | else
71 | return line.substring(x + 8);
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/cn/aidou/robot/IDownResource.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.robot;
2 |
3 |
4 | public interface IDownResource {
5 | public void downPageSourceHandler(String link);
6 |
7 | public void downImageSource();
8 | }
9 |
--------------------------------------------------------------------------------
/src/cn/aidou/robot/ReadLabelHandler.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.robot;
2 |
3 | import java.io.IOException;
4 |
5 | import org.jsoup.Jsoup;
6 | import org.jsoup.nodes.Document;
7 | import org.jsoup.nodes.Element;
8 | import org.jsoup.select.Elements;
9 |
10 |
11 | public class ReadLabelHandler {
12 | /**
13 | * 2015.10.9 读取超链接标签
14 | */
15 | public void readALabel(String link) {
16 | Document doc;
17 | String linkHref = "";
18 | try {
19 | doc = Jsoup.connect("http://" + link).get();
20 | String title = doc.title();
21 | Elements links = doc.select("a[href]"); // 带有href属性的a元素
22 | for (Element el : links) {
23 | linkHref = el.attr("href");// 获取href的值
24 | // String linkText = el.text();//获取a标签的内容
25 | // String outerHtml = el.outerHtml();//获取整个a标签
26 |
27 | // System.out.println("linkHref:" + linkHref);// 这个位置该往数据库里边存放了!!!!!!!!!!!!!!!!!
28 | }
29 | // 定义过滤规则
30 | // if(linkHref.matches("[a-zA-z]+://[^\\s]*")){
31 | // System.out.println("linkHref:"+linkHref);
32 | // }
33 | System.out.println("title:" + title);
34 | } catch (IOException e) {
35 | // TODO Auto-generated catch block
36 | e.printStackTrace();
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/cn/aidou/thread/PooledThread.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.thread;
2 |
3 | public class PooledThread {
4 | Thread thread = null; // 数据库连接
5 | boolean busy = false; // 此连接线程是否正在使用的标志,默认没有正在使用
6 | // 构造函数,根据一个 Thread 构告一个 PooledThread 对象
7 |
8 | public PooledThread(Thread thread) {
9 | this.thread = thread;
10 | }
11 |
12 | // 返回此对象中的线程
13 | public Thread getThread() {
14 | return thread;
15 | }
16 |
17 | // 设置此对象的线程
18 | public void setThread(Thread thread) {
19 | this.thread = thread;
20 | }
21 |
22 | // 获得对象线程是否忙
23 | public boolean isBusy() {
24 | return busy;
25 | }
26 |
27 | // 设置对象的线程正在忙
28 | public void setBusy(boolean busy) {
29 | this.busy = busy;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/cn/aidou/thread/SuchThread.java:
--------------------------------------------------------------------------------
1 | package cn.aidou.thread;
2 |
3 | import java.math.BigDecimal;
4 | import java.util.HashSet;
5 | import java.util.Set;
6 |
7 | import cn.aidou.TaskDistributor.spider.Spiders.CreateSpider;
8 | import cn.aidou.listener.ExampleListener;
9 | import cn.aidou.listener.RunTimeEvent;
10 | import cn.aidou.listener.RunTimeListener;
11 |
12 | /**
13 | * 一个线程由一个或多个监听器监控其内部各个关节的状态,视情况做出相应的响应!
14 | *
15 | * @author aidou
16 | *
17 | */
18 |
19 | public class SuchThread extends Thread{
20 | private boolean runningFlag;// 线程状态
21 | private String threadName;
22 | private BigDecimal SpiderRunTime;
23 | // 给线程(事件源)添加监听器容器
24 | private Set