├── cockroach-samples ├── README.md ├── src │ └── main │ │ ├── resources │ │ └── log4j.properties │ │ └── java │ │ └── com │ │ └── zhangyingwei │ │ └── cockroach │ │ └── samples │ │ ├── juejin │ │ ├── store │ │ │ └── JueJinStore.java │ │ └── JueJinApplication.java │ │ ├── oschina │ │ ├── OschinaApplicatoin.java │ │ └── store │ │ │ └── OsChinaStore.java │ │ └── douban │ │ └── movie │ │ ├── DMovieApplication.java │ │ ├── Movie.java │ │ └── store │ │ └── DMovieStore.java └── pom.xml ├── cockroach-test ├── README.md ├── hello.txt ├── src │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── zhangyingwei │ │ │ └── cockroach │ │ │ ├── executer │ │ │ ├── listener │ │ │ │ └── IExecutersListenerTest.java │ │ │ ├── TaskTest.java │ │ │ ├── response │ │ │ │ └── filter │ │ │ │ │ ├── ResponseFilterTest.java │ │ │ │ │ └── ITaskResponseFilterTest.java │ │ │ └── TaskExecuterTest.java │ │ │ ├── store │ │ │ ├── PrintTestStore.java │ │ │ ├── ZhihuStore.java │ │ │ ├── TestStore.java │ │ │ ├── SelecterTestStore.java │ │ │ ├── ImageStore.java │ │ │ ├── IpStore.java │ │ │ ├── ZhiHuMeiZhi.java │ │ │ ├── MeiZhiStore.java │ │ │ └── NameStore.java │ │ │ ├── queue │ │ │ ├── TestQueueTaskFilter.java │ │ │ ├── DefaultQueueTaskFilterTest.java │ │ │ ├── DefaultQueueTaskDeepTest.java │ │ │ └── RedisTaskQueueTest.java │ │ │ ├── common │ │ │ ├── CookieGeneratorTest.java │ │ │ ├── HeaderGeneratorTest.java │ │ │ └── utils │ │ │ │ ├── ImageStore.java │ │ │ │ └── FileUtilsTest.java │ │ │ ├── http │ │ │ ├── COkIHttpClientTest.java │ │ │ └── handler │ │ │ │ └── DefaultTaskErrorHandlerTest.java │ │ │ ├── CockroachContextErrorTest.java │ │ │ ├── CockroachContextIPProxyTest.java │ │ │ ├── CockroachContextQueueRetryTest.java │ │ │ ├── CockroachContextExecutersListenerTest.java │ │ │ ├── annotation │ │ │ └── AnnotationTest.java │ │ │ ├── CockroachContextZhihuTest.java │ │ │ ├── CockroachContextImageTest.java │ │ │ ├── CockroachContextIPTest.java │ │ │ ├── CockroachContextTest.java │ │ │ └── CockroachContextGithubIssueTest.java │ └── main │ │ └── resources │ │ └── log4j.properties └── pom.xml ├── NOTE.md ├── cockroach-annotation ├── README.md ├── src │ └── main │ │ ├── java │ │ └── com │ │ │ └── zhangyingwei │ │ │ └── cockroach │ │ │ ├── annotation │ │ │ ├── Store.java │ │ │ ├── AppName.java │ │ │ ├── EnableAutoConfiguration.java │ │ │ ├── ProxyConfig.java │ │ │ ├── TaskResponseFiltersConfig.java │ │ │ ├── AutoClose.java │ │ │ ├── ThreadConfig.java │ │ │ ├── TaskErrorHandlerConfig.java │ │ │ ├── HttpConfig.java │ │ │ ├── CookieConfig.java │ │ │ ├── ExecutersListener.java │ │ │ └── HttpHeaderConfig.java │ │ │ ├── CockroachApplication.java │ │ │ └── config │ │ │ └── CockroachConfigBuilder.java │ │ └── resources │ │ └── log4j.properties └── pom.xml ├── cockroach-core ├── README.md ├── src │ └── main │ │ ├── java │ │ └── com │ │ │ └── zhangyingwei │ │ │ └── cockroach │ │ │ ├── common │ │ │ ├── generators │ │ │ │ ├── StringGenerator.java │ │ │ │ ├── MapGenerator.java │ │ │ │ ├── CockroachGenerator.java │ │ │ │ ├── NameGenerator.java │ │ │ │ ├── NoCookieGenerator.java │ │ │ │ └── NoHeaderGenerator.java │ │ │ ├── interfaces │ │ │ │ └── IBox.java │ │ │ ├── utils │ │ │ │ ├── CockroachUtils.java │ │ │ │ ├── NameUtils.java │ │ │ │ └── FileUtils.java │ │ │ └── exception │ │ │ │ └── HttpException.java │ │ │ ├── store │ │ │ ├── IStore.java │ │ │ ├── PrintStore.java │ │ │ └── DescribeStore.java │ │ │ ├── executer │ │ │ ├── listener │ │ │ │ ├── IExecutersListener.java │ │ │ │ ├── DefaultExecutersListener.java │ │ │ │ └── BootstrapExecutersListener.java │ │ │ ├── response │ │ │ │ ├── filter │ │ │ │ │ ├── ITaskResponseFilter.java │ │ │ │ │ └── TaskResponseFilterBox.java │ │ │ │ ├── ICockroachResponse.java │ │ │ │ ├── TaskErrorResponse.java │ │ │ │ ├── ResponseContent.java │ │ │ │ └── TaskResponse.java │ │ │ ├── task │ │ │ │ ├── TaskCompatator.java │ │ │ │ ├── TaskExecuter.java │ │ │ │ └── Task.java │ │ │ └── ExecuterManager.java │ │ │ ├── http │ │ │ ├── handler │ │ │ │ ├── ITaskErrorHandler.java │ │ │ │ ├── TaskErrorHandlerBox.java │ │ │ │ └── DefaultTaskErrorHandler.java │ │ │ ├── IHttpProxy.java │ │ │ ├── ProxyTuple.java │ │ │ ├── client │ │ │ │ ├── IHttpClient.java │ │ │ │ ├── AbstractHttpClient.java │ │ │ │ ├── okhttp │ │ │ │ │ ├── CookieManager.java │ │ │ │ │ └── COkHttpClient.java │ │ │ │ └── HttpClientProxy.java │ │ │ ├── HttpProxy.java │ │ │ └── HttpParams.java │ │ │ ├── queue │ │ │ ├── filter │ │ │ │ ├── IQueueTaskFilter.java │ │ │ │ ├── DefaultQueueTaskFilter.java │ │ │ │ ├── DefaultRepeatFilter.java │ │ │ │ └── TaskFilterBox.java │ │ │ ├── AbstractCockroachQueue.java │ │ │ ├── CockroachQueue.java │ │ │ └── TaskQueue.java │ │ │ ├── CockroachContext.java │ │ │ └── config │ │ │ ├── Constants.java │ │ │ └── CockroachConfig.java │ │ └── resources │ │ └── log4j.properties └── pom.xml ├── cockroach-queue-redis ├── src │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── zhangyingwei │ │ │ └── cockroach │ │ │ └── queue │ │ │ └── RedisTaskQueueTest.java │ └── main │ │ └── java │ │ └── com │ │ └── zhangyingwei │ │ └── cockroach │ │ └── queue │ │ └── RedisTaskQueue.java ├── README.md └── pom.xml ├── .travis.yml ├── .gitignore ├── TASK.md ├── UPDATE.md ├── README.md ├── pom.xml └── LICENSE /cockroach-samples/README.md: -------------------------------------------------------------------------------- 1 | # cockroach-samples 2 | 3 | cockroach 实例部分 -------------------------------------------------------------------------------- /cockroach-test/README.md: -------------------------------------------------------------------------------- 1 | # cockroach-test 2 | 3 | cockroach 测试部分,主要包含了所有测试用例,当然,写的很垃圾 -------------------------------------------------------------------------------- /NOTE.md: -------------------------------------------------------------------------------- 1 | # 备忘录 2 | 3 | * 发布 jar 包到 maven 中央仓库 4 | 5 | > mvn clean deploy -P sonatype-oss-release -------------------------------------------------------------------------------- /cockroach-annotation/README.md: -------------------------------------------------------------------------------- 1 | # cockroach-annotation 2 | 3 | cockroach 注解部分,主要实现了注解的定义以及注解的解析 -------------------------------------------------------------------------------- /cockroach-test/hello.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | -------------------------------------------------------------------------------- /cockroach-core/README.md: -------------------------------------------------------------------------------- 1 | # cockroach-core 2 | 3 | cockroach 核心部分,主要包含了核心的爬虫代码,包括: 队列、http客户端、解析器 等等 -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/generators/StringGenerator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.generators; 2 | 3 | /** 4 | * Created by zhangyw on 2017/12/19. 5 | */ 6 | public interface StringGenerator extends CockroachGenerator {} 7 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/generators/MapGenerator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.generators; 2 | 3 | import java.util.Map; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/19. 7 | */ 8 | public interface MapGenerator extends CockroachGenerator {} 9 | -------------------------------------------------------------------------------- /cockroach-queue-redis/src/test/java/com/zhangyingwei/cockroach/queue/RedisTaskQueueTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | /** 4 | * Created by zhangyw on 2018/5/31. 5 | */ 6 | public class RedisTaskQueueTest { 7 | public static void main(String[] args) { 8 | 9 | } 10 | } -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - oraclejdk8 5 | 6 | addons: 7 | apt: 8 | packages: 9 | - oracle-java8-installer # Updates JDK 8 to the latest available. 10 | 11 | notifications: 12 | email: false 13 | 14 | sudo: false 15 | 16 | cache: 17 | directories: 18 | - $HOME/.m2 -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/interfaces/IBox.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.interfaces; 2 | 3 | /** 4 | * @author: zhangyw 5 | * @date: 2018/2/10 6 | * @time: 下午2:21 7 | * @desc: 8 | */ 9 | public interface IBox { 10 | T add(T model); 11 | } 12 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/store/IStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | /** 6 | * Created by zhangyw on 2017/8/10. 7 | */ 8 | public interface IStore { 9 | void store(TaskResponse response) throws Exception; 10 | } 11 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/generators/CockroachGenerator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.generators; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/19. 7 | */ 8 | public interface CockroachGenerator { 9 | T get(Task task); 10 | } 11 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/generators/NameGenerator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.generators; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/12. 7 | */ 8 | public interface NameGenerator { 9 | String name(); 10 | } 11 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/listener/IExecutersListener.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.listener; 2 | 3 | /** 4 | * @author: zhangyw 5 | * @date: 2018/2/1 6 | * @time: 下午9:40 7 | * @desc: 8 | */ 9 | public interface IExecutersListener { 10 | void onStart(); 11 | 12 | void onEnd(); 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Java template 3 | # Compiled class file 4 | *.class 5 | 6 | # Log file 7 | *.log 8 | 9 | # Package Files # 10 | *.jar 11 | *.war 12 | *.ear 13 | *.zip 14 | *.tar.gz 15 | *.rar 16 | *.iml 17 | 18 | /.idea 19 | /target 20 | \target 21 | cockroach.iml 22 | 23 | cockroach-annotation/target/apidocs/allclasses-frame.html 24 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/handler/ITaskErrorHandler.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.handler; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskErrorResponse; 4 | 5 | /** 6 | * Created by zhangyw on 2017/8/16. 7 | * 任务失败回调 8 | */ 9 | public interface ITaskErrorHandler { 10 | void error(TaskErrorResponse response); 11 | } 12 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/filter/IQueueTaskFilter.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | 5 | /** 6 | * @author: zhangyw 7 | * @date: 2018/1/19 8 | * @time: 下午2:22 9 | * @desc: 10 | */ 11 | public interface IQueueTaskFilter { 12 | boolean accept(Task task); 13 | } 14 | -------------------------------------------------------------------------------- /cockroach-samples/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=INFO,stdout 3 | # Direct log messages to stdout 4 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 5 | log4j.appender.stdout.Target=System.out 6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.stdout.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss}] %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/Store.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import java.lang.annotation.*; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/8. 7 | */ 8 | @Documented 9 | @Target( {ElementType.TYPE}) 10 | @Inherited 11 | @Retention(RetentionPolicy.RUNTIME) 12 | public @interface Store { 13 | Class value(); 14 | } 15 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/AppName.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import java.lang.annotation.*; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/8. 7 | */ 8 | @Target({ElementType.TYPE}) 9 | @Retention(RetentionPolicy.RUNTIME) 10 | @Documented 11 | @Inherited 12 | public @interface AppName { 13 | String value(); 14 | } 15 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/EnableAutoConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import java.lang.annotation.*; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/8. 7 | */ 8 | @Target({ElementType.TYPE}) 9 | @Retention(RetentionPolicy.RUNTIME) 10 | @Documented 11 | @Inherited 12 | public @interface EnableAutoConfiguration { 13 | } 14 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/ProxyConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import java.lang.annotation.*; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/8. 7 | */ 8 | 9 | @Target({ElementType.TYPE}) 10 | @Retention(RetentionPolicy.RUNTIME) 11 | @Documented 12 | @Inherited 13 | public @interface ProxyConfig { 14 | String value(); 15 | } 16 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/generators/NoCookieGenerator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.generators; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/19. 7 | */ 8 | public class NoCookieGenerator implements StringGenerator { 9 | @Override 10 | public String get(Task task) { 11 | return null; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/response/filter/ITaskResponseFilter.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | /** 6 | * @author: zhangyw 7 | * @date: 2018/1/24 8 | * @time: 下午3:12 9 | * @desc: 10 | */ 11 | public interface ITaskResponseFilter { 12 | boolean accept(TaskResponse response); 13 | } 14 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/TaskResponseFiltersConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import java.lang.annotation.*; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/8. 7 | */ 8 | 9 | @Target({ElementType.TYPE}) 10 | @Retention(RetentionPolicy.RUNTIME) 11 | @Documented 12 | @Inherited 13 | public @interface TaskResponseFiltersConfig { 14 | Class[] value() default {}; 15 | } 16 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/task/TaskCompatator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.task; 2 | 3 | import java.util.Comparator; 4 | 5 | /** 6 | * Created by zhangyw on 2018/1/23. 7 | * 决定 task 出队的优先级 8 | */ 9 | public class TaskCompatator implements Comparator { 10 | @Override 11 | public int compare(Task task1, Task task2) { 12 | return task1.compareTo(task2); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/generators/NoHeaderGenerator.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.generators; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | 5 | import java.util.Map; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/19. 9 | */ 10 | public class NoHeaderGenerator implements MapGenerator { 11 | @Override 12 | public Map get(Task task) { 13 | return null; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/executer/listener/IExecutersListenerTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.listener; 2 | 3 | /** 4 | * @author: zhangyw 5 | * @date: 2018/2/1 6 | * @time: 下午10:21 7 | * @desc: 8 | */ 9 | public class IExecutersListenerTest implements IExecutersListener{ 10 | 11 | @Override 12 | public void onStart() { 13 | 14 | } 15 | 16 | @Override 17 | public void onEnd() { 18 | 19 | } 20 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/PrintTestStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | /** 6 | * @author: zhangyw 7 | * @date: 2018/2/1 8 | * @time: 下午10:30 9 | * @desc: 10 | */ 11 | public class PrintTestStore implements IStore { 12 | @Override 13 | public void store(TaskResponse response) throws Exception { 14 | System.out.println("haha..."); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/ZhihuStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | import java.io.IOException; 6 | 7 | /** 8 | * Created by zhangyw on 2017/8/10. 9 | */ 10 | public class ZhihuStore implements IStore { 11 | @Override 12 | public void store(TaskResponse response) throws IOException { 13 | System.out.println(response.getContent()); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/AutoClose.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.config.Constants; 4 | 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/8. 9 | */ 10 | 11 | @Target({ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | @Inherited 15 | public @interface AutoClose { 16 | boolean value() default Constants.DEFAULT_AUTO_CLOSE; 17 | } 18 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/store/PrintStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | import java.io.IOException; 6 | 7 | /** 8 | * Created by zhangyw on 2017/8/10. 9 | */ 10 | public class PrintStore implements IStore { 11 | @Override 12 | public void store(TaskResponse response) throws IOException { 13 | System.out.println(response.getContent().string()); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /TASK.md: -------------------------------------------------------------------------------- 1 | # cockroach 爬虫 2 | 3 | * httpclient 设计的还是不够优雅 √ 4 | * httperrorhandler 可以考虑增加代理模式使用 exception 的方式,统一管理起来 √ 5 | 6 | 接下来 7 | 8 | * 替换打印日志的方式 改用 log4j 或者 logback √ 9 | * 增加login方法,获取登录 cookie (考虑要不要做) 10 | * task 优先级 √ 11 | * 代理IP这里需要抽象一个接口出来,可以实现实时获取 12 | * queue 持久化 13 | * 支持 POST 请求 14 | * 添加 watcher 支持监控 15 | * 失败重试还是太low 16 | * httpclient 这里需要重构 17 | 18 | --- 19 | 20 | 新年新气象,所有的工作重新计算吧 21 | 22 | * httpclient 感觉还是需要从新搞一下 重新搞了一下,先这样吧 √ 23 | * 页面渲染这里迫切需要 24 | * 正则匹配这里,要不要到底 25 | * watcher监控需要从长计议 26 | * 代理IP这里需要抽出来做成服务吧 27 | 28 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/ThreadConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.config.Constants; 4 | 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/8. 9 | */ 10 | 11 | @Target({ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | @Inherited 15 | public @interface ThreadConfig { 16 | int num(); //线程数量 17 | int sleep() default Constants.DEFAULT_THREAD_SLEEP; //线程睡眠时间 18 | } 19 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/TaskErrorHandlerConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.http.handler.DefaultTaskErrorHandler; 4 | 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/8. 9 | */ 10 | 11 | @Target({ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | @Inherited 15 | public @interface TaskErrorHandlerConfig { 16 | Class value() default DefaultTaskErrorHandler.class; 17 | } 18 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/HttpConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.http.client.okhttp.COkHttpClient; 4 | 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/8. 9 | */ 10 | 11 | @Target({ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | @Inherited 15 | public @interface HttpConfig { 16 | Class value() default COkHttpClient.class; 17 | boolean progress() default false; 18 | } 19 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/CookieConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.NoCookieGenerator; 4 | 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/8. 9 | */ 10 | 11 | @Target({ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | @Inherited 15 | public @interface CookieConfig { 16 | String value() default ""; 17 | Class cookieGenerator() default NoCookieGenerator.class; 18 | } 19 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/IHttpProxy.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http; 2 | 3 | /** 4 | * Created by zhangyw on 2017/8/11. 5 | */ 6 | public interface IHttpProxy { 7 | 8 | /** 9 | * 随机获取一个代理 10 | * @return 11 | */ 12 | public ProxyTuple randomProxy(); 13 | 14 | /** 15 | * 如果代理失效,从代理池中删除代理 16 | * 17 | * @param proxy 18 | */ 19 | public void disable(ProxyTuple proxy); 20 | 21 | /** 22 | * 是否为空 23 | * @return 24 | */ 25 | public boolean isEmpty(); 26 | } 27 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/filter/DefaultQueueTaskFilter.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import org.apache.commons.lang.StringUtils; 5 | 6 | /** 7 | * @author: zhangyw 8 | * @date: 2018/1/19 9 | * @time: 下午2:27 10 | * @desc: 11 | */ 12 | public class DefaultQueueTaskFilter implements IQueueTaskFilter { 13 | @Override 14 | public boolean accept(Task task) { 15 | return task != null && StringUtils.isNotBlank(task.getUrl()); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/ExecutersListener.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.executer.listener.DefaultExecutersListener; 4 | 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * @author: zhangyw 9 | * @date: 2018/2/1 10 | * @time: 下午10:10 11 | * @desc: 12 | */ 13 | @Documented 14 | @Target( {ElementType.TYPE}) 15 | @Inherited 16 | @Retention(RetentionPolicy.RUNTIME) 17 | public @interface ExecutersListener { 18 | Class value() default DefaultExecutersListener.class; 19 | } 20 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/queue/TestQueueTaskFilter.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.queue.filter.IQueueTaskFilter; 5 | import org.apache.commons.lang.StringUtils; 6 | 7 | /** 8 | * @author: zhangyw 9 | * @date: 2018/1/19 10 | * @time: 下午2:37 11 | * @desc: 12 | */ 13 | public class TestQueueTaskFilter implements IQueueTaskFilter { 14 | @Override 15 | public boolean accept(Task task) { 16 | return StringUtils.isNotBlank(task.getUrl()) && task.getUrl().contains("baidu"); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/annotation/HttpHeaderConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.MapGenerator; 4 | import com.zhangyingwei.cockroach.common.generators.NoHeaderGenerator; 5 | import java.lang.annotation.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/8. 9 | */ 10 | 11 | @Target({ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | @Inherited 15 | public @interface HttpHeaderConfig { 16 | String[] value() default {}; 17 | Class headerGenerator() default NoHeaderGenerator.class; 18 | } 19 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/utils/CockroachUtils.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.utils; 2 | 3 | 4 | /** 5 | * Created by zhangyw on 2017/9/14. 6 | * @author zhangyw 7 | */ 8 | public class CockroachUtils { 9 | public static void addSystemPropertie(String key,Object value) { 10 | System.setProperty(key, value + ""); 11 | } 12 | 13 | public static String exceptionMessage(int code,String message){ 14 | return String.format("code:%d - message:%s",code,message); 15 | } 16 | 17 | public static boolean validHttpCode(int code) { 18 | return code == 200 || code == 304; 19 | } 20 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/TestStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | /** 6 | * Created by zhangyw on 2017/12/8. 7 | */ 8 | public class TestStore implements IStore { 9 | @Override 10 | public void store(TaskResponse response) throws Exception { 11 | System.out.println("hello store"); 12 | // System.out.println(response.charset("gbk").getContent()); 13 | response.charset("gbk").select("span").stream().forEach(element -> { 14 | System.out.println(element.text()); 15 | }); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/listener/DefaultExecutersListener.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.listener; 2 | 3 | import org.apache.log4j.Logger; 4 | 5 | /** 6 | * @author: zhangyw 7 | * @date: 2018/2/1 8 | * @time: 下午9:42 9 | * @desc: 10 | */ 11 | public class DefaultExecutersListener implements IExecutersListener { 12 | private Logger logger = Logger.getLogger(DefaultExecutersListener.class); 13 | @Override 14 | public void onStart() { 15 | logger.info("executers start..."); 16 | } 17 | 18 | @Override 19 | public void onEnd() { 20 | logger.info("executers end..."); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/SelecterTestStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | /** 6 | * @author: zhangyw 7 | * @date: 2018/1/21 8 | * @time: 下午3:06 9 | * @desc: 10 | */ 11 | public class SelecterTestStore implements IStore { 12 | @Override 13 | public void store(TaskResponse response) throws Exception { 14 | String title = response.select("title").text(); 15 | System.out.println(title); 16 | String res = response.xpath("//*[@id='cnblogs_post_body']/h2").get(2).text(); 17 | System.out.println(res); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/listener/BootstrapExecutersListener.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.listener; 2 | 3 | import org.apache.log4j.Logger; 4 | 5 | /** 6 | * Created by zhangyw on 2018/2/5. 7 | * 初始化一些系统功能 8 | */ 9 | public class BootstrapExecutersListener implements IExecutersListener { 10 | private Logger logger = Logger.getLogger(BootstrapExecutersListener.class); 11 | @Override 12 | public void onStart() { 13 | logger.info("BootstrapExecutersListener.onStart"); 14 | } 15 | 16 | @Override 17 | public void onEnd() { 18 | logger.info("BootstrapExecutersListener.onEnd"); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/executer/TaskTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import org.junit.Test; 5 | 6 | import java.util.HashMap; 7 | 8 | /** 9 | * Created by zhangyw on 2017/9/20. 10 | */ 11 | public class TaskTest { 12 | @Test 13 | public void getUrl() throws Exception { 14 | Task task = new Task("http://zhangyingwei.com"); 15 | task.setParams(new HashMap(){ 16 | { 17 | put("key", "hello"); 18 | put("value", "nihao"); 19 | } 20 | }); 21 | System.out.println(task.getUrl()); 22 | } 23 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/exception/HttpException.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.exception; 2 | 3 | import com.zhangyingwei.cockroach.common.utils.CockroachUtils; 4 | 5 | /** 6 | * Created by zhangyw on 2017/8/17. 7 | * Multiple Choices/多重选择 8 | */ 9 | public class HttpException extends Exception { 10 | public HttpException() { 11 | } 12 | 13 | public HttpException(String message,int code) { 14 | super(CockroachUtils.exceptionMessage(code,message)); 15 | } 16 | 17 | public HttpException(String message,int code, Throwable cause) { 18 | super(CockroachUtils.exceptionMessage(code,message), cause); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/common/CookieGeneratorTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.StringGenerator; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import org.junit.Test; 6 | 7 | import java.util.UUID; 8 | 9 | /** 10 | * Created by zhangyw on 2017/12/19. 11 | */ 12 | public class CookieGeneratorTest implements StringGenerator { 13 | 14 | @Override 15 | public String get(Task task) { 16 | String cookie = "v="+ UUID.randomUUID().toString(); 17 | System.out.println(cookie); 18 | return cookie; 19 | } 20 | 21 | @Test 22 | public void test() {} 23 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/ImageStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | import java.io.*; 6 | 7 | /** 8 | * Created by zhangyw on 2017/9/18. 9 | */ 10 | public class ImageStore implements IStore { 11 | 12 | @Override 13 | public void store(TaskResponse response) throws Exception { 14 | byte[] bytes = response.getContent().bytes(); 15 | File file = new File("image.jpeg"); 16 | DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(file)); 17 | outputStream.write(bytes); 18 | outputStream.close(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/executer/response/filter/ResponseFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | 5 | import java.io.IOException; 6 | 7 | /** 8 | * @author: zhangyw 9 | * @date: 2018/1/24 10 | * @time: 下午3:39 11 | * @desc: 12 | */ 13 | public class ResponseFilterTest implements ITaskResponseFilter{ 14 | @Override 15 | public boolean accept(TaskResponse response) { 16 | try { 17 | return response.select("title").text().contains("百度"); 18 | } catch (IOException e) { 19 | e.printStackTrace(); 20 | } 21 | return false; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/http/COkIHttpClientTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import com.zhangyingwei.cockroach.http.client.okhttp.COkHttpClient; 6 | import org.junit.Assert; 7 | import org.junit.Test; 8 | 9 | /** 10 | * Created by zhangyw on 2017/8/10. 11 | */ 12 | public class COkIHttpClientTest { 13 | @Test 14 | public void doGet() throws Exception { 15 | COkHttpClient client = new COkHttpClient(); 16 | TaskResponse resp = (TaskResponse) client.doGet(new Task("https://luolei.org")); 17 | Assert.assertNotNull(resp.select("a")); 18 | } 19 | } -------------------------------------------------------------------------------- /UPDATE.md: -------------------------------------------------------------------------------- 1 | # cockroach 爬虫 更新日志 2 | 3 | *修改了task 中 get url 编译 url 的 bug 4 | 5 | # 2018-02-28 6 | * 增加了 queue-redis 模块 7 | * 新年快乐 8 | * 需改为模块 9 | * 修改了 httpclient 实现 10 | 11 | # 2018-02-10 12 | * 增加了失败任务队列 13 | * 增加了失败任务重试功能 14 | * 调整了 task 的包位置 15 | * task 增加了 deep 参数 16 | * 换掉 ArrayBlockingQueue 使用可以定义优先级的 PriorityBlockingQueue,并结合 task 的 deep 参数实现任务的优先级 17 | * 增加了response filter 18 | * 增加了execiterslistener 监听任务开始与完成。初衷是为了有一个方法能表示出程序是否已经执行完毕,主要作用类似于:将所有爬取内容最后打包统一发邮件。 19 | 20 | ## 2018-01-19 21 | 22 | * 修改了 response 的 close 方法到 finally 方法块中 23 | * 增加了队列过滤器 24 | 25 | ## 2017-12-24 26 | * 增加了 TaskResponse 中指定编码格式的接口 27 | * 去掉了 taskqueue 中的单例 28 | * 删除了一些无用类 29 | * 增加了 cookie 生成器与 header 生成器 30 | 31 | ## 2017-09-13 32 | 33 | * 增加了 log4j 为默认日志组件 34 | * 修改了一些架构上的问题 35 | -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/CockroachApplication.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.config.CockroachConfigBuilder; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | 7 | import java.lang.annotation.Annotation; 8 | 9 | /** 10 | * Created by zhangyw on 2017/12/8. 11 | */ 12 | public class CockroachApplication { 13 | public static void run(Class clazz, CockroachQueue queue) throws Exception { 14 | Annotation[] annotations = clazz.getAnnotations(); 15 | CockroachConfig config = new CockroachConfigBuilder(annotations).bulid(); 16 | CockroachContext context = new CockroachContext(config); 17 | context.start(queue); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/AbstractCockroachQueue.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.queue.filter.IQueueTaskFilter; 5 | import com.zhangyingwei.cockroach.queue.filter.TaskFilterBox; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * Created by zhangyw on 2017/9/13. 11 | * 队列接口 12 | */ 13 | public abstract class AbstractCockroachQueue implements CockroachQueue { 14 | protected TaskFilterBox filterBox; 15 | 16 | public AbstractCockroachQueue() { 17 | this.filterBox = new TaskFilterBox(); 18 | } 19 | 20 | @Override 21 | public CockroachQueue filter(IQueueTaskFilter filter) throws Exception { 22 | this.filterBox.add(filter); 23 | return this; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/http/handler/DefaultTaskErrorHandlerTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.handler; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 5 | import com.zhangyingwei.cockroach.executer.task.Task; 6 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 7 | import com.zhangyingwei.cockroach.queue.TaskQueue; 8 | 9 | /** 10 | * Created by zhangyw on 2017/12/25. 11 | */ 12 | @EnableAutoConfiguration 13 | public class DefaultTaskErrorHandlerTest { 14 | public static void main(String[] args) throws Exception { 15 | CockroachQueue queue = TaskQueue.of(); 16 | queue.push(new Task("https://google.com")); 17 | CockroachApplication.run(DefaultTaskErrorHandler.class,queue); 18 | } 19 | } -------------------------------------------------------------------------------- /cockroach-annotation/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | cockroach 7 | com.github.zhangyingwei 8 | 1.0.6-Beta 9 | 10 | 4.0.0 11 | 12 | cockroach-annotation 13 | 14 | 15 | 16 | com.github.zhangyingwei 17 | cockroach-core 18 | 1.0.6-Beta 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/response/ICockroachResponse.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | 7 | import java.io.IOException; 8 | import java.util.List; 9 | 10 | /** 11 | * @author: zhangyw 12 | * @date: 2017/12/18 13 | * @time: 下午7:54 14 | * @desc: 15 | */ 16 | public interface ICockroachResponse { 17 | ResponseContent getContent() throws IOException; 18 | Task getTask(); 19 | boolean isGroup(String group); 20 | boolean isGroupStartWith(String groupPrefix); 21 | boolean isGroupEndWith(String end); 22 | boolean isGroupContains(String str); 23 | CockroachQueue getQueue(); 24 | List header(String key); 25 | } 26 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/ProxyTuple.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http; 2 | 3 | /** 4 | * Created by zhangyw on 2017/8/11. 5 | */ 6 | public class ProxyTuple { 7 | private static final Integer PORT_DEFAULT = 80; 8 | private String ip; 9 | private Integer port; 10 | 11 | public ProxyTuple(String ip, Integer port) { 12 | this.ip = ip; 13 | this.port = port; 14 | } 15 | 16 | public String ip(){ 17 | return ip; 18 | } 19 | 20 | public Integer port(){ 21 | if(this.port == null){ 22 | this.port = PORT_DEFAULT; 23 | } 24 | return port; 25 | } 26 | 27 | @Override 28 | public String toString() { 29 | return "ProxyTuple{" + 30 | "ip='" + ip + '\'' + 31 | ", port=" + port + 32 | '}'; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/store/DescribeStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import org.apache.log4j.Logger; 5 | 6 | /** 7 | * Created by zhangyw on 2017/12/8. 8 | */ 9 | public class DescribeStore implements IStore { 10 | private Logger logger = Logger.getLogger(DescribeStore.class); 11 | @Override 12 | public void store(TaskResponse response) throws Exception { 13 | logger.info("==================desc================="); 14 | logger.info(String.format("task id: %s",response.getTask().getId())); 15 | logger.info(String.format("thread name: %s",Thread.currentThread().getName())); 16 | logger.info(String.format("bytes: %d",response.getContent().bytes().length)); 17 | logger.info("======================================="); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/juejin/store/JueJinStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.juejin.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import com.zhangyingwei.cockroach.store.IStore; 5 | import net.sf.json.JSONObject; 6 | 7 | /** 8 | * @author: zhangyw 9 | * @date: 2018/2/26 10 | * @time: 下午8:49 11 | * @desc: 12 | */ 13 | public class JueJinStore implements IStore{ 14 | @Override 15 | public void store(TaskResponse response) throws Exception { 16 | response.getContent().toJsonObject() 17 | .getJSONObject("d") 18 | .getJSONArray("entrylist") 19 | .forEach(item -> { 20 | JSONObject itemJson = JSONObject.fromObject(item); 21 | System.out.println(itemJson.getString("title")); 22 | }); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/common/HeaderGeneratorTest.java: -------------------------------------------------------------------------------- 1 | 2 | package com.zhangyingwei.cockroach.common; 3 | 4 | import com.zhangyingwei.cockroach.common.generators.MapGenerator; 5 | import com.zhangyingwei.cockroach.executer.task.Task; 6 | import org.junit.Test; 7 | 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | 11 | /** 12 | * Created by zhangyw on 2017/12/19. 13 | */ 14 | public class HeaderGeneratorTest implements MapGenerator { 15 | private Map headers = new HashMap(); 16 | @Override 17 | public Map get(Task task) { 18 | if ("jobs.lagou".equals(task.getGroup())) { 19 | return headers; 20 | } else { 21 | System.out.println("text/json; charset=utf-8"); 22 | headers.put("content-type", "text/json; charset=utf-8"); 23 | return headers; 24 | } 25 | } 26 | @Test 27 | public void test() {} 28 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/client/IHttpClient.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.client; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.ICockroachResponse; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 6 | import com.zhangyingwei.cockroach.http.ProxyTuple; 7 | 8 | import java.util.Map; 9 | 10 | /** 11 | * Created by zhangyw on 2017/8/10. 12 | */ 13 | public interface IHttpClient { 14 | TaskResponse doGet(Task task) throws Exception; 15 | 16 | IHttpClient proxy(ProxyTuple proxy) throws Exception; 17 | 18 | TaskResponse doPost(Task task) throws Exception; 19 | 20 | IHttpClient setCookie(String cookie) throws Exception; 21 | 22 | IHttpClient setHttpHeader(Map httpHeader) throws Exception; 23 | 24 | ProxyTuple getCurrentProxyTuple() throws Exception; 25 | } 26 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/filter/DefaultRepeatFilter.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import org.apache.log4j.Logger; 5 | 6 | import java.util.HashSet; 7 | import java.util.Set; 8 | 9 | /** 10 | * @author: zhangyw 11 | * @date: 2018/1/24 12 | * @time: 下午2:37 13 | * @desc: 14 | */ 15 | public class DefaultRepeatFilter implements IQueueTaskFilter { 16 | private Set urls; 17 | private Logger logger = Logger.getLogger(DefaultRepeatFilter.class); 18 | 19 | public DefaultRepeatFilter() { 20 | this.urls = new HashSet(); 21 | } 22 | 23 | @Override 24 | public boolean accept(Task task) { 25 | if (urls.contains(task.getUrl())) { 26 | return false; 27 | } else { 28 | urls.add(task.getUrl()); 29 | } 30 | return true; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/queue/DefaultQueueTaskFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import org.junit.Test; 5 | 6 | /** 7 | * @author: zhangyw 8 | * @date: 2018/1/19 9 | * @time: 下午2:32 10 | * @desc: 11 | */ 12 | public class DefaultQueueTaskFilterTest { 13 | @Test 14 | public void accept() throws Exception { 15 | CockroachQueue queue = TaskQueue.of().filter(new TestQueueTaskFilter()); 16 | queue.push(new Task(null)); 17 | queue.push(new Task("http://baidu.com")); 18 | queue.push(new Task("http://baidu.com")); 19 | queue.push(new Task("http://baidu.com")); 20 | queue.push(new Task("http://baidu.com")); 21 | queue.push(new Task("https://google.com")); 22 | queue.push(new Task("https://google.com")); 23 | queue.push(new Task("https://google.com")); 24 | } 25 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/handler/TaskErrorHandlerBox.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.handler; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskErrorResponse; 4 | import com.zhangyingwei.cockroach.common.interfaces.IBox; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | /** 10 | * @author: zhangyw 11 | * @date: 2018/2/10 12 | * @time: 下午2:12 13 | * @desc: 14 | */ 15 | public class TaskErrorHandlerBox implements ITaskErrorHandler,IBox { 16 | private List errorHandlers; 17 | 18 | public TaskErrorHandlerBox() { 19 | this.errorHandlers = new ArrayList(); 20 | } 21 | 22 | @Override 23 | public void error(TaskErrorResponse response) { 24 | errorHandlers.forEach(handler -> { 25 | handler.error(response); 26 | }); 27 | } 28 | 29 | @Override 30 | public ITaskErrorHandler add(ITaskErrorHandler model) { 31 | this.errorHandlers.add(model); 32 | return this; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextErrorTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.TaskQueue; 6 | import com.zhangyingwei.cockroach.store.PrintStore; 7 | import com.zhangyingwei.cockroach.store.SelecterTestStore; 8 | import org.junit.Test; 9 | 10 | /** 11 | * Created by zhangyw on 2017/8/10. 12 | */ 13 | public class CockroachContextErrorTest { 14 | @Test 15 | public void test() {} 16 | public static void main(String[] args) throws Exception { 17 | CockroachConfig config = new CockroachConfig() 18 | .setAppName("test error") 19 | .setThread(1) 20 | .setAutoClose(true) 21 | .setStore(PrintStore.class); 22 | CockroachContext context = new CockroachContext(config); 23 | TaskQueue queue = TaskQueue.of(); 24 | queue.push(new Task("https://www.123123.com")); 25 | context.start(queue); 26 | } 27 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextIPProxyTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.annotation.*; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | import com.zhangyingwei.cockroach.queue.TaskQueue; 7 | import org.junit.Test; 8 | 9 | /** 10 | * Created by zhangyw on 2017/8/10. 11 | */ 12 | @EnableAutoConfiguration 13 | @AppName("proxy test") 14 | @ProxyConfig("183.222.102.105,183.222.102.108,183.222.102.107,183.222.102.106,183.222.102.104,183.222.102.109") 15 | @ThreadConfig(num = 1, sleep = 2000) 16 | @AutoClose(false) 17 | public class CockroachContextIPProxyTest { 18 | public static void main(String[] args) throws Exception { 19 | CockroachQueue queue = TaskQueue.of(); 20 | for (int i = 0; i < 100; i++) { 21 | queue.push(new Task("https://www.google.com.hk/"+i)); 22 | } 23 | CockroachApplication.run(CockroachContextIPProxyTest.class, queue); 24 | } 25 | 26 | @Test 27 | public void trest() {} 28 | } -------------------------------------------------------------------------------- /cockroach-queue-redis/README.md: -------------------------------------------------------------------------------- 1 | # cockroach-queue-redis 2 | 3 | 使用 redis 作为消息队列,使用方法为: 4 | 5 | ```java 6 | @EnableAutoConfiguration 7 | @AppName("redis") 8 | @ThreadConfig(num = 5, sleep = 100) 9 | @AutoClose(false) 10 | @Store(DescribeStore.class) 11 | public class RedisTaskQueueTest { 12 | private static CockroachQueue queue = RedisTaskQueue.of("172.30.154.75", 6379, "cockroach"); 13 | 14 | public static void main(String[] args) throws Exception { 15 | CockroachApplication.run(RedisTaskQueueTest.class,queue); 16 | } 17 | 18 | @Test 19 | public void push() throws Exception { 20 | queue.filter(new IQueueTaskFilter() { 21 | @Override 22 | public boolean accept(Task task) { 23 | return task.getUrl().contains("zhangyingwei"); 24 | } 25 | }); 26 | for (int i = 0; i < 100; i++) { 27 | Task task = new Task("http://blog.zhangyingwei.com","zhangyingwei").retry(10).addDeep(20); 28 | queue.push(task); 29 | } 30 | queue.push(new Task("http://baidu.com")); 31 | } 32 | } 33 | 34 | ``` -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextQueueRetryTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.annotation.AppName; 4 | import com.zhangyingwei.cockroach.annotation.AutoClose; 5 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 6 | import com.zhangyingwei.cockroach.annotation.ThreadConfig; 7 | import com.zhangyingwei.cockroach.executer.task.Task; 8 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 9 | import com.zhangyingwei.cockroach.queue.TaskQueue; 10 | 11 | /** 12 | * Created by zhangyw on 2017/8/10. 13 | */ 14 | @EnableAutoConfiguration 15 | @AppName("test spider") 16 | @ThreadConfig(num = 1,sleep = 1000) 17 | @AutoClose(true) 18 | public class CockroachContextQueueRetryTest { 19 | public static void main(String[] args) throws Exception { 20 | CockroachQueue queue = TaskQueue.of(); 21 | for (int i = 0; i < 5; i++) { 22 | queue.push(new Task("http://hello.com"+i).retry(5)); 23 | } 24 | CockroachApplication.run(CockroachContextQueueRetryTest.class,queue); 25 | } 26 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/client/AbstractHttpClient.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.client; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import com.zhangyingwei.cockroach.http.HttpProxy; 6 | import com.zhangyingwei.cockroach.http.ProxyTuple; 7 | import com.zhangyingwei.cockroach.http.handler.ITaskErrorHandler; 8 | 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | 12 | /** 13 | * Created by zhangyw on 2017/8/10. 14 | */ 15 | public abstract class AbstractHttpClient implements IHttpClient { 16 | protected ProxyTuple currentProxy; 17 | protected String cookie; 18 | protected Map httpHeader = new HashMap(); 19 | 20 | @Override 21 | public IHttpClient setHttpHeader(Map httpHeader) throws Exception { 22 | this.httpHeader = httpHeader; 23 | return this; 24 | } 25 | 26 | @Override 27 | public ProxyTuple getCurrentProxyTuple() throws Exception { 28 | return currentProxy; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /cockroach-queue-redis/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | cockroach 7 | com.github.zhangyingwei 8 | 1.0.6-Beta 9 | 10 | 4.0.0 11 | 12 | cockroach-queue-redis 13 | 14 | 15 | 16 | com.github.zhangyingwei 17 | cockroach-core 18 | 1.0.6-Beta 19 | 20 | 21 | 22 | redis.clients 23 | jedis 24 | 2.9.0 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/executer/response/filter/ITaskResponseFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response.filter; 2 | 3 | 4 | import com.zhangyingwei.cockroach.CockroachApplication; 5 | import com.zhangyingwei.cockroach.annotation.AppName; 6 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 7 | import com.zhangyingwei.cockroach.annotation.TaskResponseFiltersConfig; 8 | import com.zhangyingwei.cockroach.executer.task.Task; 9 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 10 | import com.zhangyingwei.cockroach.queue.TaskQueue; 11 | 12 | /** 13 | * @author: zhangyw 14 | * @date: 2018/1/24 15 | * @time: 下午3:37 16 | * @desc: 17 | */ 18 | @EnableAutoConfiguration 19 | @AppName("test") 20 | @TaskResponseFiltersConfig({ 21 | ResponseFilterTest.class 22 | }) 23 | public class ITaskResponseFilterTest { 24 | public static void main(String[] args) throws Exception { 25 | CockroachQueue queue = TaskQueue.of(); 26 | queue.push(new Task("https://baidu.com")); 27 | queue.push(new Task("http://zhangyingwei.com")); 28 | CockroachApplication.run(ITaskResponseFilterTest.class, queue); 29 | } 30 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/utils/NameUtils.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.utils; 2 | 3 | import org.apache.log4j.Logger; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.UUID; 7 | 8 | /** 9 | * Created by zhangyw on 2017/8/11. 10 | * Update by zhangye on 2017/9/13 11 | * 名称生成类 根据递增序列规则 12 | */ 13 | public class NameUtils { 14 | private static Logger logger = Logger.getLogger(NameUtils.class); 15 | private static Map nameMap = new HashMap<>(); 16 | private static Map idMap = new HashMap<>(); 17 | 18 | public synchronized static String name(Class clazz){ 19 | String name = clazz.getSimpleName(); 20 | Integer index = nameMap.getOrDefault(name, 1); 21 | nameMap.put(name, index + 1); 22 | return bulidName(name,index); 23 | } 24 | 25 | public synchronized static String id(Class clazz){ 26 | String name = clazz.getSimpleName(); 27 | return idMap.getOrDefault(name, UUID.randomUUID().toString()); 28 | } 29 | 30 | private static String bulidName(String name, Integer index) { 31 | return name + "-" + index; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextExecutersListenerTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.annotation.AppName; 4 | import com.zhangyingwei.cockroach.annotation.AutoClose; 5 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 6 | import com.zhangyingwei.cockroach.annotation.Store; 7 | import com.zhangyingwei.cockroach.executer.task.Task; 8 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 9 | import com.zhangyingwei.cockroach.queue.TaskQueue; 10 | import com.zhangyingwei.cockroach.store.PrintTestStore; 11 | 12 | /** 13 | * @author: zhangyw 14 | * @date: 2018/2/1 15 | * @time: 下午10:17 16 | * @desc: 17 | */ 18 | @EnableAutoConfiguration 19 | @AppName("listener test") 20 | //@ExecutersListener() 21 | @Store(PrintTestStore.class) 22 | @AutoClose(true) 23 | public class CockroachContextExecutersListenerTest { 24 | public static void main(String[] args) throws Exception { 25 | CockroachQueue queue = TaskQueue.of(); 26 | for (int i = 0; i < 10; i++) { 27 | queue.push(new Task("http://baidu.com/?"+i)); 28 | } 29 | CockroachApplication.run(CockroachContextExecutersListenerTest.class, queue); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/queue/DefaultQueueTaskDeepTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import org.junit.Test; 5 | 6 | import java.util.concurrent.PriorityBlockingQueue; 7 | 8 | /** 9 | * @author: zhangyw 10 | * @date: 2018/1/19 11 | * @time: 下午2:32 12 | * @desc: 13 | */ 14 | public class DefaultQueueTaskDeepTest { 15 | @Test 16 | public void accept() throws Exception { 17 | CockroachQueue queue = TaskQueue.of(); 18 | queue.push(new Task("1").addDeep(1)); 19 | queue.push(new Task("2").addDeep(3)); 20 | queue.push(new Task("3").addDeep(5)); 21 | queue.push(new Task("4").addDeep(2)); 22 | queue.push(new Task("5").addDeep(4)); 23 | queue.push(new Task("6").addDeep(1)); 24 | queue.push(new Task("7").addDeep(1)); 25 | 26 | for (int i = 0; i < 7; i++) { 27 | queue.take(); 28 | } 29 | } 30 | 31 | @Test 32 | public void queueDeepTest() { 33 | PriorityBlockingQueue queue = new PriorityBlockingQueue(10); 34 | for (int i = 0; i < 100; i++) { 35 | queue.put(i); 36 | } 37 | System.out.println(queue.size()); 38 | } 39 | } -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/oschina/OschinaApplicatoin.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.oschina; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.annotation.AppName; 5 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 6 | import com.zhangyingwei.cockroach.annotation.Store; 7 | import com.zhangyingwei.cockroach.annotation.ThreadConfig; 8 | import com.zhangyingwei.cockroach.executer.task.Task; 9 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 10 | import com.zhangyingwei.cockroach.queue.TaskQueue; 11 | import com.zhangyingwei.cockroach.samples.oschina.store.OsChinaStore; 12 | 13 | /** 14 | * Created by zhangyw on 2018/2/26. 15 | */ 16 | 17 | @EnableAutoConfiguration 18 | @AppName("开源中国博客爬虫") 19 | @ThreadConfig(num = 10,sleep = 500) 20 | @Store(OsChinaStore.class) 21 | public class OschinaApplicatoin { 22 | public static void main(String[] args) throws Exception { 23 | CockroachQueue queue = TaskQueue.of(); 24 | for (int i = 1; i <= 10; i++) { 25 | queue.push(new Task("https://www.oschina.net/action/ajax/get_more_recommend_blog?classification=0&p="+i,"oschina.blog").retry(5)); 26 | } 27 | CockroachApplication.run(OschinaApplicatoin.class, queue); 28 | } 29 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/common/utils/ImageStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.utils; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.NameGenerator; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import com.zhangyingwei.cockroach.store.IStore; 6 | 7 | /** 8 | * Created by zhangyw on 2017/12/12. 9 | */ 10 | public class ImageStore implements IStore { 11 | @Override 12 | public void store(TaskResponse response) throws Exception { 13 | String name = FileUtils.getFileName(response); 14 | System.out.println(name); 15 | String name2 = FileUtils.getFileNameOrUuid(response); 16 | // System.out.println(name2); 17 | String name3 = FileUtils.getFileNameOr(response, new NameGenerator() { 18 | @Override 19 | public String name() { 20 | String url = response.getTask().getUrl(); 21 | // url.split("/") 22 | return "generator"; 23 | } 24 | }); 25 | // System.out.println(name3); 26 | 27 | // System.out.println("use:"+name2); 28 | // 29 | // FileUtils.save(response.getContentBytes(),"/Users/zhangyw/IdeaProjects/zhangyw/Projects/java/cockroach/src/main/resources",name2+".flv"); 30 | // System.out.println("end"); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/douban/movie/DMovieApplication.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.douban.movie; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.annotation.*; 5 | import com.zhangyingwei.cockroach.executer.task.Task; 6 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 7 | import com.zhangyingwei.cockroach.queue.TaskQueue; 8 | import com.zhangyingwei.cockroach.samples.douban.movie.store.DMovieStore; 9 | 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | /** 14 | * Created by zhangyw on 2018/2/28. 15 | */ 16 | @EnableAutoConfiguration 17 | @AppName("豆瓣电影") 18 | @ThreadConfig(num = 8,sleep = 100) 19 | @Store(DMovieStore.class) 20 | @AutoClose(false) 21 | public class DMovieApplication { 22 | public static void main(String[] args) throws Exception { 23 | CockroachQueue queue = TaskQueue.of(); 24 | Map params = new HashMap(); 25 | params.put("sort", "T"); 26 | params.put("range", "0,20"); 27 | params.put("tag", ""); 28 | params.put("start", 0); 29 | Task task = new Task("https://movie.douban.com/j/new_search_subjects", "douban.movie", params); 30 | queue.push(task); 31 | CockroachApplication.run(DMovieApplication.class, queue); 32 | } 33 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/annotation/AnnotationTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.annotation; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.common.CookieGeneratorTest; 5 | import com.zhangyingwei.cockroach.common.HeaderGeneratorTest; 6 | import com.zhangyingwei.cockroach.executer.task.Task; 7 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 8 | import com.zhangyingwei.cockroach.queue.TaskQueue; 9 | import com.zhangyingwei.cockroach.store.TestStore; 10 | import org.junit.Test; 11 | 12 | /** 13 | * Created by zhangyw on 2017/12/8. 14 | */ 15 | 16 | @EnableAutoConfiguration 17 | @AppName("hello spider") 18 | @Store(TestStore.class) 19 | @AutoClose(false) 20 | @ThreadConfig(num = 1) 21 | @CookieConfig(cookieGenerator = CookieGeneratorTest.class) 22 | @HttpHeaderConfig(headerGenerator = HeaderGeneratorTest.class) 23 | public class AnnotationTest { 24 | public static void main(String[] args) throws Exception { 25 | CockroachQueue queue = TaskQueue.of(20); 26 | queue.push(new Task("http://search.51job.com/jobsearch/search_result.php?fromJs=1&jobarea=010000&industrytype=32&keyword=Java%E5%BC%80%E5%8F%91&keywordtype=2&lang=c&stype=2&postchannel=0000&fromType=1&confirmdate=9")); 27 | CockroachApplication.run(AnnotationTest.class,queue); 28 | } 29 | 30 | @Test 31 | public void test() {} 32 | } 33 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextZhihuTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.TaskQueue; 6 | import com.zhangyingwei.cockroach.store.ZhihuStore; 7 | 8 | /** 9 | * Created by zhangyw on 2017/8/10. 10 | */ 11 | public class CockroachContextZhihuTest { 12 | public static void main(String[] args) throws Exception { 13 | String cockie = ""; 14 | CockroachConfig config = new CockroachConfig() 15 | .setAppName("haha") 16 | .setThread(1) 17 | .setCookie(cockie) 18 | .setAutoClose(true) 19 | .addHttpHeader("Host","www.zhihu.com") 20 | .addHttpHeader("Upgrade-Insecure-Requests","1") 21 | .addHttpHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8") 22 | .setStore(ZhihuStore.class); 23 | CockroachContext context = new CockroachContext(config); 24 | TaskQueue queue = TaskQueue.of(); 25 | // queue.push(new Task("https://www.zhihu.com/people/wmhsr/activities")); 26 | queue.push(new Task("https://www.zhihu.com/api/v4/members/excited-vczh/followees?offset=0&limit=20")); 27 | context.start(queue); 28 | } 29 | } -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/oschina/store/OsChinaStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.oschina.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.store.IStore; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by zhangyw on 2018/2/26. 12 | */ 13 | public class OsChinaStore implements IStore { 14 | @Override 15 | public void store(TaskResponse response) throws Exception { 16 | if (response.isGroup("oschina.blog")) { 17 | response.select(".item").forEach(item -> { 18 | String href = item.select(".blog-title-link").attr("href"); 19 | try { 20 | response.getQueue().push(new Task(href,"oschina.blog.item")); 21 | } catch (Exception e) { 22 | e.printStackTrace(); 23 | } 24 | }); 25 | } else if (response.isGroup("oschina.blog.item")) { 26 | Elements content = response.select(".article-detail"); 27 | String title = content.select(".header").text().replace("顶 原 荐",""); 28 | String autor = content.select("blog-meta > div:nth-child(1) > a").text(); 29 | System.out.println(String.format("文章标题: %s 作者: %s",title,autor)); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/client/okhttp/CookieManager.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.client.okhttp; 2 | 3 | import okhttp3.Cookie; 4 | import okhttp3.CookieJar; 5 | import okhttp3.HttpUrl; 6 | 7 | import java.util.*; 8 | import java.util.stream.Collectors; 9 | 10 | /** 11 | * Created by zhangyw on 2017/8/11. 12 | */ 13 | public class CookieManager implements CookieJar { 14 | 15 | private Map> cookies = new HashMap>(); 16 | 17 | private String cookie; 18 | 19 | public CookieManager() { 20 | } 21 | 22 | public CookieManager(String cookie) { 23 | this.cookie = cookie; 24 | } 25 | 26 | @Override 27 | public void saveFromResponse(HttpUrl httpUrl, List list) { 28 | cookies.put(httpUrl.host(), list); 29 | } 30 | 31 | @Override 32 | public List loadForRequest(HttpUrl httpUrl) { 33 | List list = new ArrayList(); 34 | if (this.cookie != null) { 35 | list = Arrays.stream(this.cookie.split(";")) 36 | .map(line -> line.split("=")) 37 | .filter(item -> item.length > 1) 38 | .map(item -> new Cookie.Builder().name(item[0].trim()).value(item[1].trim()).domain(httpUrl.host().trim()).build()).collect(Collectors.toList()); 39 | } 40 | return Optional.ofNullable(cookies.get(httpUrl.host())).orElse(list); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextImageTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.TaskQueue; 6 | import com.zhangyingwei.cockroach.store.ZhiHuMeiZhi; 7 | 8 | /** 9 | * Created by zhangyw on 2017/8/10. 10 | */ 11 | public class CockroachContextImageTest { 12 | public static final String hello = "hello"; 13 | public static void main(String[] args) throws Exception { 14 | CockroachConfig config = new CockroachConfig() 15 | .setAppName("妹子下载器") 16 | .setThread(20, 3000) 17 | .setStore(ZhiHuMeiZhi.class) 18 | .setAutoClose(true); 19 | CockroachContext context = new CockroachContext(config); 20 | TaskQueue queue = TaskQueue.of(); 21 | new Thread(() -> { 22 | int index = 1; 23 | while(true){ 24 | try { 25 | Task task = new Task("https://www.zhihu.com/collection/61633672?page="+index++); 26 | queue.push(task); 27 | } catch (InterruptedException e) { 28 | e.printStackTrace(); 29 | } 30 | if(index > 199){ 31 | break; 32 | } 33 | } 34 | }).start(); 35 | context.start(queue); 36 | } 37 | } -------------------------------------------------------------------------------- /cockroach-samples/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | cockroach 7 | com.github.zhangyingwei 8 | 1.0.6-Beta 9 | 10 | 4.0.0 11 | 12 | cockroach-samples 13 | 14 | 15 | 16 | 17 | com.github.zhangyingwei 18 | cockroach-core 19 | 1.0.6-Beta 20 | 21 | 22 | com.github.zhangyingwei 23 | cockroach-annotation 24 | 1.0.6-Beta 25 | 26 | 27 | 28 | 29 | 30 | 31 | org.apache.maven.plugins 32 | maven-deploy-plugin 33 | 2.8.2 34 | 35 | true 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/juejin/JueJinApplication.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.juejin; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.annotation.AppName; 5 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 6 | import com.zhangyingwei.cockroach.annotation.Store; 7 | import com.zhangyingwei.cockroach.annotation.ThreadConfig; 8 | import com.zhangyingwei.cockroach.executer.task.Task; 9 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 10 | import com.zhangyingwei.cockroach.queue.TaskQueue; 11 | import com.zhangyingwei.cockroach.samples.juejin.store.JueJinStore; 12 | 13 | /** 14 | * @author: zhangyw 15 | * @date: 2018/2/26 16 | * @time: 下午8:43 17 | * @desc: 18 | */ 19 | 20 | @EnableAutoConfiguration 21 | @AppName("掘金") 22 | @ThreadConfig(num = 1,sleep = 500) 23 | @Store(JueJinStore.class) 24 | public class JueJinApplication { 25 | public static void main(String[] args) throws Exception { 26 | CockroachQueue queue = TaskQueue.of(); 27 | queue.push(new Task("https://timeline-merger-ms.juejin.im/v1/get_entry_by_rank?src=web&uid=58368a0461ff4b475bd600bc&device_id=1519648660286&token=eyJhY2Nlc3NfdG9rZW4iOiJqQVlaSWxIN1U3aGJnV3YzIiwicmVmcmVzaF90b2tlbiI6IlV5RnA3eDFaVWp4bk9jRVEiLCJ0b2tlbl90eXBlIjoibWFjIiwiZXhwaXJlX2luIjoyNTkyMDAwfQ%3D%3D&limit=20&category=all&recomment=1")); 28 | CockroachApplication.run(JueJinApplication.class, queue); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/response/filter/TaskResponseFilterBox.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import com.zhangyingwei.cockroach.common.interfaces.IBox; 5 | import org.apache.log4j.Logger; 6 | 7 | import java.util.HashSet; 8 | import java.util.Set; 9 | 10 | /** 11 | * @author: zhangyw 12 | * @date: 2018/1/24 13 | * @time: 下午3:24 14 | * @desc: 15 | */ 16 | public class TaskResponseFilterBox implements ITaskResponseFilter,IBox { 17 | private Set responseFilters; 18 | private Logger logger = Logger.getLogger(TaskResponseFilterBox.class); 19 | public TaskResponseFilterBox() { 20 | this.responseFilters = new HashSet(); 21 | } 22 | 23 | @Override 24 | public synchronized boolean accept(TaskResponse response) { 25 | for (ITaskResponseFilter responseFilter : responseFilters) { 26 | if (!responseFilter.accept(response)) { 27 | logger.info(Thread.currentThread().getName() + " response of " + response.getTask() + " is not accepted by " + responseFilter.getClass()); 28 | return false; 29 | } 30 | } 31 | return true; 32 | } 33 | 34 | @Override 35 | public ITaskResponseFilter add(ITaskResponseFilter model) { 36 | this.responseFilters.add(model); 37 | return this; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/executer/TaskExecuterTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.filter.TaskResponseFilterBox; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.executer.task.TaskExecuter; 6 | import com.zhangyingwei.cockroach.http.client.HttpClientProxy; 7 | import com.zhangyingwei.cockroach.http.client.okhttp.COkHttpClient; 8 | import com.zhangyingwei.cockroach.http.handler.DefaultTaskErrorHandler; 9 | import com.zhangyingwei.cockroach.queue.TaskQueue; 10 | import com.zhangyingwei.cockroach.store.DescribeStore; 11 | import org.junit.Test; 12 | 13 | import java.util.concurrent.ExecutorService; 14 | import java.util.concurrent.Executors; 15 | 16 | /** 17 | * Created by zhangyw on 2017/8/10. 18 | */ 19 | public class TaskExecuterTest { 20 | 21 | @Test 22 | public void test() throws InterruptedException { 23 | TaskQueue queue = TaskQueue.of(); 24 | queue.push(new Task("http://zhangyingwei.com")); 25 | // ExecutorService service = Executors.newCachedThreadPool(); 26 | // service.execute(new TaskExecuter(queue, new HttpClientProxy(new COkHttpClient()),new DescribeStore(),new DefaultTaskErrorHandler(),1000,false, new TaskResponseFilterBox())); 27 | TaskExecuter executer = new TaskExecuter(queue, new HttpClientProxy(new COkHttpClient()), new DescribeStore(), new DefaultTaskErrorHandler(), 1000, true, new TaskResponseFilterBox()); 28 | executer.run(); 29 | } 30 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/filter/TaskFilterBox.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue.filter; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.common.interfaces.IBox; 5 | import org.apache.log4j.Logger; 6 | 7 | import java.util.HashSet; 8 | import java.util.Set; 9 | 10 | /** 11 | * @author: zhangyw 12 | * @date: 2018/1/24 13 | * @time: 下午2:58 14 | * @desc: 15 | */ 16 | public class TaskFilterBox implements IQueueTaskFilter,IBox { 17 | private static Logger logger = Logger.getLogger(TaskFilterBox.class); 18 | private Set filters; 19 | 20 | public TaskFilterBox() { 21 | this.filters = new HashSet(); 22 | this.filtersInit(); 23 | } 24 | 25 | /** 26 | * 初始化过滤器 27 | */ 28 | private void filtersInit() { 29 | this.filters.add(new DefaultQueueTaskFilter()); 30 | this.filters.add(new DefaultRepeatFilter()); 31 | } 32 | 33 | @Override 34 | public synchronized boolean accept(Task task) { 35 | for (IQueueTaskFilter filter : filters) { 36 | if (!filter.accept(task)) { 37 | logger.info(Thread.currentThread().getName() + " " + task + " is not accepted by " + filter.getClass()); 38 | return false; 39 | } 40 | } 41 | return true; 42 | } 43 | 44 | @Override 45 | public IQueueTaskFilter add(IQueueTaskFilter filter) { 46 | this.filters.add(filter); 47 | return this; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /cockroach-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | cockroach 7 | com.github.zhangyingwei 8 | 1.0.6-Beta 9 | 10 | 4.0.0 11 | 12 | cockroach-core 13 | 14 | 15 | 16 | com.squareup.okhttp3 17 | okhttp 18 | 3.8.1 19 | 20 | 21 | 22 | net.sf.json-lib 23 | json-lib 24 | 2.4 25 | jdk15 26 | 27 | 28 | 29 | org.jsoup 30 | jsoup 31 | 1.11.2 32 | 33 | 34 | 35 | cn.wanghaomiao 36 | JsoupXpath 37 | 0.3.2 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/IpStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import com.zhangyingwei.cockroach.common.utils.NameUtils; 5 | import org.jsoup.select.Elements; 6 | 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.io.PrintWriter; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | import java.util.stream.Collectors; 13 | 14 | /** 15 | * Created by zhangyw on 2017/8/11. 16 | */ 17 | public class IpStore implements IStore { 18 | 19 | private String id = NameUtils.name(IpStore.class); 20 | 21 | public IpStore() throws IOException {} 22 | 23 | @Override 24 | public void store(TaskResponse response) throws IOException { 25 | PrintWriter writer = new PrintWriter(new FileWriter("D://iplist/"+id+".txt",true),true); 26 | Elements els = response.select("#ip_list tr"); 27 | els.stream().filter(el -> el.select("td").size()>2).map(el -> { 28 | Elements es = el.select("td"); 29 | List texts = es.stream().map(td -> td.text()).filter(text -> text.trim().length() > 0).collect(Collectors.toList()); 30 | String ip = ""; 31 | String port = ""; 32 | if(texts.size()>2){ 33 | ip = texts.get(0); 34 | port = texts.get(1); 35 | } 36 | return new String[]{ip,port}; 37 | }).forEach(name -> { 38 | System.out.println(Arrays.toString(name)); 39 | String line = name[0] + ":" + name[1]; 40 | writer.println(line); 41 | }); 42 | writer.close(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/ZhiHuMeiZhi.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.DataOutputStream; 9 | import java.io.File; 10 | import java.io.FileOutputStream; 11 | import java.util.UUID; 12 | 13 | /** 14 | * Created by zhangyw on 2017/9/18. 15 | */ 16 | public class ZhiHuMeiZhi implements IStore { 17 | @Override 18 | public void store(TaskResponse response) throws Exception { 19 | System.out.println("store"+response.getTask()); 20 | if (!response.getTask().getGroup().equals("image")) { 21 | CockroachQueue queue = response.getQueue(); 22 | Elements as = response.select("div").select("img"); 23 | as.stream().forEach(element -> { 24 | Task task = new Task(element.attr("data-original")); 25 | task.setGroup("image"); 26 | try { 27 | queue.push(task); 28 | System.out.println("push:" + task); 29 | } catch (Exception e) { 30 | e.printStackTrace(); 31 | } 32 | }); 33 | } else { 34 | File file = new File("D://zhihumeizitu2/"+ UUID.randomUUID()+".jpg"); 35 | byte[] bytes = response.getContent().bytes(); 36 | DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(file)); 37 | outputStream.write(bytes); 38 | outputStream.close(); 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/MeiZhiStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.DataOutputStream; 9 | import java.io.File; 10 | import java.io.FileOutputStream; 11 | import java.util.UUID; 12 | 13 | /** 14 | * Created by zhangyw on 2017/9/18. 15 | */ 16 | public class MeiZhiStore implements IStore { 17 | @Override 18 | public void store(TaskResponse response) throws Exception { 19 | System.out.println("store"+response.getTask()); 20 | if (!response.getTask().getGroup().equals("image")) { 21 | CockroachQueue queue = response.getQueue(); 22 | Elements as = response.select(".main-content").select("a").select("img"); 23 | as.stream().forEach(element -> { 24 | Task task = new Task(element.attr("src")); 25 | task.setGroup("image"); 26 | try { 27 | queue.push(task); 28 | System.out.println("push:" + task); 29 | } catch (Exception e) { 30 | e.printStackTrace(); 31 | } 32 | }); 33 | } else { 34 | File file = new File("D://meizitu/"+UUID.randomUUID()+".png"); 35 | byte[] bytes = response.getContent().bytes(); 36 | DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(file)); 37 | outputStream.write(bytes); 38 | outputStream.close(); 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextIPTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.TaskQueue; 6 | import com.zhangyingwei.cockroach.http.client.okhttp.COkHttpClient; 7 | import com.zhangyingwei.cockroach.store.IpStore; 8 | 9 | /** 10 | * Created by zhangyw on 2017/8/10. 11 | */ 12 | public class CockroachContextIPTest { 13 | public static void main(String[] args) throws Exception { 14 | CockroachConfig config = new CockroachConfig() 15 | .setAppName("haha") 16 | .setThread(5,5000) 17 | .setProxys("183.222.102.105,183.222.102.108,183.222.102.107,183.222.102.106,183.222.102.104,183.222.102.109") 18 | .setHttpClient(COkHttpClient.class) 19 | .setStore(IpStore.class) 20 | .setAutoClose(true); 21 | CockroachContext context = new CockroachContext(config); 22 | TaskQueue queue = TaskQueue.of(); 23 | 24 | new Thread(() -> { 25 | int i = 167; 26 | while(true){ 27 | i++; 28 | try { 29 | // Thread.sleep(1000); 30 | String url = "http://www.xicidaili.com/wt/"+i; 31 | queue.push(new Task(url)); 32 | // System.out.println("push "+url); 33 | } catch (InterruptedException e) { 34 | e.printStackTrace(); 35 | } 36 | if (i > 200) { 37 | break; 38 | } 39 | } 40 | }).start(); 41 | 42 | context.start(queue); 43 | 44 | } 45 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/common/utils/FileUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.utils; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.annotation.EnableAutoConfiguration; 5 | import com.zhangyingwei.cockroach.annotation.HttpConfig; 6 | import com.zhangyingwei.cockroach.annotation.Store; 7 | import com.zhangyingwei.cockroach.annotation.ThreadConfig; 8 | import com.zhangyingwei.cockroach.executer.task.Task; 9 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 10 | import com.zhangyingwei.cockroach.queue.TaskQueue; 11 | import org.junit.Assert; 12 | import org.junit.Test; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.util.concurrent.TimeUnit; 17 | 18 | /** 19 | * Created by zhangyw on 2017/12/12. 20 | */ 21 | @EnableAutoConfiguration 22 | @Store(ImageStore.class) 23 | @ThreadConfig(num = 1) 24 | @HttpConfig(progress = true) 25 | public class FileUtilsTest { 26 | public static void main(String[] args) throws Exception { 27 | CockroachQueue queue = TaskQueue.of(); 28 | queue.push(new Task("http://util.zhangyingwei.com//cockroach/1/carbon.png")); 29 | queue.push(new Task("http://img.dmc.csdn.net/B3DF79B6065EC826F2EC278369F31F6E.jpg")); 30 | CockroachApplication.run(FileUtilsTest.class,queue); 31 | } 32 | 33 | @Test 34 | public void getOrCreateTest() throws IOException, InterruptedException { 35 | File file = FileUtils.openOrCreate("./", "hello.txt"); 36 | FileUtils.clearFile(file); 37 | for (int i = 0; i < 10; i++) { 38 | FileUtils.append(file,i+"\n"); 39 | } 40 | FileUtils.closeWriters(); 41 | // Assert.assertTrue(FileUtils.delete(file)); 42 | } 43 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.TaskQueue; 6 | import com.zhangyingwei.cockroach.store.SelecterTestStore; 7 | import org.junit.Test; 8 | 9 | /** 10 | * Created by zhangyw on 2017/8/10. 11 | */ 12 | public class CockroachContextTest { 13 | @Test 14 | public void test() {} 15 | public static void main(String[] args) throws Exception { 16 | CockroachConfig config = new CockroachConfig() 17 | .setAppName("haha") 18 | .setThread(1) 19 | .setAutoClose(true) 20 | // .setProxys("121.232.145.21:9000") 21 | // .setHttpClient(COkHttpClient.class) 22 | .setStore(SelecterTestStore.class); 23 | CockroachContext context = new CockroachContext(config); 24 | TaskQueue queue = TaskQueue.of(); 25 | queue.push(new Task("https://www.cnblogs.com/wanghaomiao/p/4899355.html")); 26 | context.start(queue); 27 | 28 | // new Thread(() -> { 29 | // int i = 1; 30 | // while(true){ 31 | // i++; 32 | // try { 33 | // Thread.sleep(1); 34 | // String url = "http://op.5068.com/qb/118368_" + i + ".html"; 35 | // System.out.println(url); 36 | // queue.push(new Task(url)); 37 | // } catch (InterruptedException e) { 38 | // e.printStackTrace(); 39 | // } 40 | // if (i > 9) { 41 | // break; 42 | // } 43 | // } 44 | // }).start(); 45 | 46 | 47 | } 48 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/store/NameStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import com.zhangyingwei.cockroach.common.utils.NameUtils; 5 | import org.jsoup.select.Elements; 6 | 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.io.PrintWriter; 10 | 11 | /** 12 | * Created by zhangyw on 2017/8/11. 13 | */ 14 | public class NameStore implements IStore { 15 | 16 | private String id = NameUtils.name(NameStore.class); 17 | 18 | public NameStore() throws IOException {} 19 | 20 | @Override 21 | public void store(TaskResponse response) throws IOException { 22 | PrintWriter writer = new PrintWriter(new FileWriter("D://"+id+".txt",true),true); 23 | Elements els = response.select("strong"); 24 | els.stream().map(el -> el.text().trim()) 25 | .filter(name -> !name.contains("第")) 26 | .filter(name -> !name.startsWith("热门")) 27 | .filter(name -> !name.startsWith("找动画")) 28 | .filter(name -> !name.startsWith("凹凸")) 29 | .filter(name -> !name.contains("更多")) 30 | .filter(name -> !name.contains("5068")) 31 | .filter(name -> !name.contains("热播动画")) 32 | .filter(name -> !name.contains("点击浏览")) 33 | .filter(name -> !name.contains("上一页")) 34 | .filter(name -> !name.contains("关于我们")) 35 | .filter(name -> name.length() > 0) 36 | .map(name -> name.split("(")[0].trim().replaceAll(" ","")) 37 | .forEach(name -> { 38 | System.out.println(id+":"+name); 39 | writer.println(name); 40 | }); 41 | writer.close(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/handler/DefaultTaskErrorHandler.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.handler; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskErrorResponse; 4 | import com.zhangyingwei.cockroach.http.HttpProxy; 5 | import com.zhangyingwei.cockroach.http.client.HttpClientProxy; 6 | import org.apache.log4j.Logger; 7 | 8 | import java.io.IOException; 9 | 10 | /** 11 | * Created by zhangyw on 2017/8/16. 12 | */ 13 | public class DefaultTaskErrorHandler implements ITaskErrorHandler { 14 | private Logger logger = Logger.getLogger(DefaultTaskErrorHandler.class); 15 | 16 | @Override 17 | public void error(TaskErrorResponse response) { 18 | try { 19 | response.getQueue().falied(response.getTask()); 20 | logger.info("task error: "+ response.getContent()); 21 | this.validProxy(response); 22 | } catch (IOException e) { 23 | e.printStackTrace(); 24 | } catch (Exception e) { 25 | e.printStackTrace(); 26 | } 27 | } 28 | 29 | private void validProxy(TaskErrorResponse response) throws IOException { 30 | String message = response.getContent().string(); 31 | HttpClientProxy clientProxy = (HttpClientProxy) response.response().getHttpClient(); 32 | HttpProxy proxy = clientProxy.getProxy(); 33 | if (message != null && 34 | ( 35 | message.toLowerCase().contains("timeout") 36 | || message.toLowerCase().contains("time out") 37 | || message.toLowerCase().contains("connect: 403") 38 | )) { 39 | if (proxy != null && !proxy.isEmpty()) { 40 | proxy.disable(clientProxy.getCurrentProxyTuple()); 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/CockroachQueue.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.queue.filter.IQueueTaskFilter; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * Created by zhangyw on 2017/9/13. 10 | * 队列接口 11 | */ 12 | public interface CockroachQueue { 13 | 14 | /** 15 | * 如果队列为空 结果为 null 16 | * @return 17 | * @throws Exception 18 | */ 19 | public Task poll() throws Exception; 20 | 21 | /** 22 | * 如果队列为空 阻塞等待 直到队列不为空 23 | * @return 24 | * @throws Exception 25 | */ 26 | public Task take() throws Exception; 27 | 28 | /** 29 | * 入队 30 | * @param task 31 | * @throws Exception 32 | */ 33 | public void push(Task task) throws Exception; 34 | 35 | /** 36 | * 入队 37 | * 是否应用 filter 38 | * @param task 39 | * @param withFilter 40 | */ 41 | public void push(Task task, Boolean withFilter) throws Exception; 42 | 43 | /** 44 | * 失败 task 入队 45 | * @param task 46 | * @throws InterruptedException 47 | */ 48 | public void falied(Task task) throws Exception; 49 | 50 | /** 51 | * 批量入队 52 | * @param tasks 53 | * @throws Exception 54 | */ 55 | public void pushAll(List tasks) throws Exception; 56 | 57 | /** 58 | * 批量入队 59 | * @param urls 60 | * @throws Exception 61 | */ 62 | public void push(List urls) throws Exception; 63 | 64 | /** 65 | * 清空队列 66 | * @throws Exception 67 | */ 68 | public void clear() throws Exception; 69 | 70 | /** 71 | * push 的时候先经过过滤器 72 | * @throws Exception 73 | */ 74 | public CockroachQueue filter(IQueueTaskFilter filter) throws Exception; 75 | 76 | Boolean isEmpty(); 77 | } 78 | -------------------------------------------------------------------------------- /cockroach-test/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | cockroach 7 | com.github.zhangyingwei 8 | 1.0.6-Beta 9 | 10 | 4.0.0 11 | 12 | cockroach-test 13 | 14 | 15 | 16 | com.github.zhangyingwei 17 | cockroach-core 18 | 1.0.6-Beta 19 | 20 | 21 | com.github.zhangyingwei 22 | cockroach-annotation 23 | 1.0.6-Beta 24 | 25 | 26 | com.github.zhangyingwei 27 | cockroach-queue-redis 28 | 1.0.6-Beta 29 | 30 | 31 | junit 32 | junit 33 | 4.12 34 | 35 | 36 | 37 | 38 | 39 | 40 | org.apache.maven.plugins 41 | maven-deploy-plugin 42 | 2.8.2 43 | 44 | true 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/douban/movie/Movie.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.douban.movie; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * Created by zhangyw on 2018/2/28. 7 | */ 8 | public class Movie { 9 | private String title; 10 | private String rate; 11 | private String url; 12 | private String id; 13 | private List directors; //导演 14 | private List casts; //主演 15 | 16 | public String getTitle() { 17 | return title; 18 | } 19 | 20 | public void setTitle(String title) { 21 | this.title = title; 22 | } 23 | 24 | public String getRate() { 25 | return rate; 26 | } 27 | 28 | public void setRate(String rate) { 29 | this.rate = rate; 30 | } 31 | 32 | public String getUrl() { 33 | return url; 34 | } 35 | 36 | public void setUrl(String url) { 37 | this.url = url; 38 | } 39 | 40 | public String getId() { 41 | return id; 42 | } 43 | 44 | public void setId(String id) { 45 | this.id = id; 46 | } 47 | 48 | public List getDirectors() { 49 | return directors; 50 | } 51 | 52 | public void setDirectors(List directors) { 53 | this.directors = directors; 54 | } 55 | 56 | public List getCasts() { 57 | return casts; 58 | } 59 | 60 | public void setCasts(List casts) { 61 | this.casts = casts; 62 | } 63 | 64 | @Override 65 | public String toString() { 66 | return "Movie{" + 67 | "title='" + title + '\'' + 68 | ", rate='" + rate + '\'' + 69 | ", url='" + url + '\'' + 70 | ", id='" + id + '\'' + 71 | ", directors=" + directors + 72 | ", casts=" + casts + 73 | '}'; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/CockroachContext.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.ExecuterManager; 5 | import com.zhangyingwei.cockroach.executer.listener.BootstrapExecutersListener; 6 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 7 | import org.apache.log4j.Logger; 8 | 9 | /** 10 | * Created by zhangyw on 2017/8/10. 11 | */ 12 | public class CockroachContext { 13 | private Logger logger = Logger.getLogger(CockroachContext.class); 14 | private CockroachConfig config; 15 | private boolean started = false; 16 | private ExecuterManager executerManager; 17 | 18 | public CockroachContext(final CockroachConfig config) { 19 | this.config = config; 20 | this.executerManager = new ExecuterManager(this.config); 21 | } 22 | 23 | /** 24 | * 启动爬虫程序 25 | * 只能启动一次,启动之前先判断之前有没有启动过 26 | * @param queue 27 | * @throws IllegalAccessException 28 | * @throws InstantiationException 29 | */ 30 | public void start(CockroachQueue queue) { 31 | if(!started){ 32 | logger.info("starting..."); 33 | config.print(); 34 | try { 35 | this.executerManager.bindListener(BootstrapExecutersListener.class).bindListener(this.config.getExecutersListener()).start(queue); 36 | } catch (Exception e) { 37 | logger.info("start faild"); 38 | logger.debug(e.getMessage()); 39 | e.printStackTrace(); 40 | } 41 | this.started = true; 42 | logger.info("start success"); 43 | }else{ 44 | logger.warn("the cockroach has already started"); 45 | } 46 | } 47 | 48 | /** 49 | * 停止执行爬虫程序 50 | * 但是队列中的任务不会丢,除非整个程序停止了 51 | */ 52 | public void stop() { 53 | this.executerManager.stop(); 54 | this.started = false; 55 | } 56 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/HttpProxy.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http; 2 | 3 | import org.apache.log4j.Logger; 4 | import java.util.Arrays; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Random; 8 | import java.util.stream.Collectors; 9 | 10 | /** 11 | * Created by zhangyw on 2017/8/11. 12 | */ 13 | public class HttpProxy implements IHttpProxy{ 14 | private Logger logger = Logger.getLogger(HttpProxy.class); 15 | private Map proxys; 16 | private Random random = new Random(); 17 | 18 | public HttpProxy(String proxys) { 19 | this.proxys = new HashMap(); 20 | Arrays.stream(proxys.split(",")).map(item -> item.split(":")).forEach(item -> { 21 | Integer ip = null; 22 | if(item.length > 1){ 23 | ip = Integer.parseInt(item[1]); 24 | } 25 | this.proxys.put(item[0], ip); 26 | }); 27 | } 28 | 29 | /** 30 | * 随机获取一个代理 31 | * @return 32 | */ 33 | @Override 34 | public ProxyTuple randomProxy(){ 35 | synchronized (this.proxys){ 36 | if (this.proxys.size() == 0) { 37 | return null; 38 | } 39 | Map.Entry entity = this.proxys.entrySet().stream().collect(Collectors.toList()).get(random.nextInt(proxys.size())); 40 | return new ProxyTuple(entity.getKey(), entity.getValue()); 41 | } 42 | } 43 | 44 | /** 45 | * 如果代理失效,从代理池中删除代理 46 | * 47 | * @param proxy 48 | */ 49 | @Override 50 | public void disable(ProxyTuple proxy) { 51 | synchronized (this.proxys) { 52 | logger.info("disable-" + proxy); 53 | this.proxys.remove(proxy.ip()); 54 | if (this.isEmpty()) { 55 | logger.info("代理全部失效"); 56 | } 57 | } 58 | } 59 | 60 | @Override 61 | public boolean isEmpty(){ 62 | return this.proxys.isEmpty(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/response/TaskErrorResponse.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | 7 | import java.io.IOException; 8 | import java.util.List; 9 | 10 | /** 11 | * Created by zhangyw on 2017/9/19. 12 | * 失败任务封装 13 | */ 14 | public class TaskErrorResponse implements ICockroachResponse { 15 | private Task task; 16 | private String message; 17 | private CockroachQueue queue; 18 | private String charset; 19 | private TaskResponse response; 20 | private IHttpClient httpClient; 21 | 22 | public TaskErrorResponse(TaskResponse response) throws IOException { 23 | this.response = response; 24 | } 25 | 26 | @Override 27 | public ResponseContent getContent() throws IOException { 28 | return this.response.getContent(); 29 | } 30 | 31 | @Override 32 | public Task getTask() { 33 | return this.response.getTask(); 34 | } 35 | 36 | @Override 37 | public boolean isGroup(String group) { 38 | return this.response.isGroup(group); 39 | } 40 | 41 | @Override 42 | public boolean isGroupStartWith(String groupPrefix) { 43 | return this.response.isGroupStartWith(groupPrefix); 44 | } 45 | 46 | @Override 47 | public boolean isGroupEndWith(String groupEnd) { 48 | return this.response.isGroupEndWith(groupEnd); 49 | } 50 | 51 | @Override 52 | public boolean isGroupContains(String str) { 53 | return this.response.isGroupContains(str); 54 | } 55 | 56 | @Override 57 | public CockroachQueue getQueue() { 58 | return this.response.getQueue(); 59 | } 60 | 61 | @Override 62 | public List header(String key) { 63 | return this.response.header(key); 64 | } 65 | 66 | public TaskResponse response(){ 67 | return this.response; 68 | } 69 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/CockroachContextGithubIssueTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach; 2 | 3 | import com.zhangyingwei.cockroach.config.CockroachConfig; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 6 | import com.zhangyingwei.cockroach.queue.TaskQueue; 7 | import com.zhangyingwei.cockroach.store.PrintStore; 8 | import org.junit.Test; 9 | 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | /** 14 | * @author zhangyw 15 | * @date: 2018/9/4 16 | * @desc: 17 | */ 18 | public class CockroachContextGithubIssueTest { 19 | @Test 20 | public void test(){} 21 | public static final CockroachQueue queue = TaskQueue.of(); 22 | 23 | public static void main(String[] args) throws Exception { 24 | CockroachConfig cockroachConfig = new CockroachConfig() 25 | .setAppName("豆瓣电影") 26 | .setAutoClose(false) 27 | .setThread(3, 2) 28 | .setStore(PrintStore.class); 29 | CockroachContext cockroachContext = new CockroachContext(cockroachConfig); 30 | cockroachContext.start(queue); 31 | System.err.println("1111111111111");//下面的代码都不执行 32 | Map params = new HashMap(); 33 | params.put("sort", "T"); 34 | params.put("range", "0,20"); 35 | params.put("tag", ""); 36 | params.put("start", 0); 37 | Task task = new Task("https://movie.douban.com/j/new_search_subjects", "douban.movie", params); 38 | queue.push(task); 39 | // CockroachApplication.run(DMovieApplication.class, queue); 40 | Thread.sleep(5000); 41 | queue.push(new Task("https://timeline-merger-ms.juejin.im/v1/get_entry_by_rank?src=web&uid=58368a0461ff4b475bd600bc&device_id=1519648660286&token=eyJhY2Nlc3NfdG9rZW4iOiJqQVlaSWxIN1U3aGJnV3YzIiwicmVmcmVzaF90b2tlbiI6IlV5RnA3eDFaVWp4bk9jRVEiLCJ0b2tlbl90eXBlIjoibWFjIiwiZXhwaXJlX2luIjoyNTkyMDAwfQ%3D%3D&limit=20&category=all&recomment=1")); 42 | 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cockroach-samples/src/main/java/com/zhangyingwei/cockroach/samples/douban/movie/store/DMovieStore.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.samples.douban.movie.store; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.samples.douban.movie.Movie; 6 | import com.zhangyingwei.cockroach.store.IStore; 7 | import net.sf.json.JSONArray; 8 | import net.sf.json.JSONObject; 9 | 10 | import java.util.Map; 11 | 12 | /** 13 | * Created by zhangyw on 2018/2/28. 14 | */ 15 | public class DMovieStore implements IStore { 16 | @Override 17 | public void store(TaskResponse response) throws Exception { 18 | if (response.isGroup("douban.movie")) { 19 | JSONObject resJson = response.getContent().toJsonObject(); 20 | JSONArray subjects = resJson.getJSONArray("data"); 21 | for (Object subject : subjects) { 22 | JSONObject movie = JSONObject.fromObject(subject); 23 | Movie movieObj = new Movie(); 24 | movieObj.setId(movie.getString("id")); 25 | movieObj.setTitle(movie.getString("title")); 26 | movieObj.setRate(movie.getString("rate")); 27 | movieObj.setUrl(movie.getString("url")); 28 | movieObj.setDirectors(movie.getJSONArray("directors")); 29 | movieObj.setCasts(movie.getJSONArray("casts")); 30 | System.out.println(movieObj); 31 | } 32 | if (subjects.size() > 0) { 33 | Task task = new Task(); 34 | task.setUrl(response.getTask().getUrl()); 35 | task.setGroup(response.getTask().getGroup()); 36 | Map params = response.getTask().getParams(); 37 | Integer page_start = (Integer) params.get("start"); 38 | params.put("start", page_start + 20); 39 | task.setParams(params); 40 | response.getQueue().push(task); 41 | } 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /cockroach-test/src/test/java/com/zhangyingwei/cockroach/queue/RedisTaskQueueTest.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.CockroachApplication; 4 | import com.zhangyingwei.cockroach.annotation.*; 5 | import com.zhangyingwei.cockroach.executer.task.Task; 6 | import com.zhangyingwei.cockroach.queue.filter.IQueueTaskFilter; 7 | import com.zhangyingwei.cockroach.store.DescribeStore; 8 | import net.sf.json.JSONObject; 9 | import org.junit.Test; 10 | 11 | /** 12 | * Created by zhangyw on 2018/2/27. 13 | */ 14 | 15 | @EnableAutoConfiguration 16 | @AppName("redis") 17 | @ThreadConfig(num = 5, sleep = 100) 18 | @AutoClose(false) 19 | @Store(DescribeStore.class) 20 | public class RedisTaskQueueTest { 21 | // private static CockroachQueue queue = RedisTaskQueue.of("172.30.154.75", 6379,"cockroach","cockroach-error"); 22 | 23 | public static void main(String[] args) throws Exception { 24 | // CockroachApplication.run(RedisTaskQueueTest.class,queue); 25 | } 26 | 27 | // @Test 28 | public void take() throws Exception { 29 | // System.out.println(queue.take()); 30 | } 31 | 32 | // @Test 33 | public void push() throws Exception { 34 | // queue.filter(new IQueueTaskFilter() { 35 | // @Override 36 | // public boolean accept(Task task) { 37 | // return task.getUrl().contains("zhangyingwei"); 38 | // } 39 | // }); 40 | // for (int i = 0; i < 100; i++) { 41 | // Task task = new Task("http://blog.zhangyingwei.com","zhangyingwei").retry(10).addDeep(20); 42 | // queue.push(task); 43 | // } 44 | // queue.push(new Task("http://baidu.com")); 45 | } 46 | 47 | @Test 48 | public void test(){ 49 | Task task = new Task("http://baidu.com"); 50 | System.out.println(task); 51 | JSONObject jsonObject = JSONObject.fromObject(task); 52 | System.out.println(jsonObject); 53 | Task task1 = (Task) JSONObject.toBean(jsonObject, Task.class); 54 | System.out.println(task1); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/response/ResponseContent.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response; 2 | 3 | import net.sf.json.JSONArray; 4 | import net.sf.json.JSONObject; 5 | import org.jsoup.Jsoup; 6 | import org.jsoup.nodes.Document; 7 | 8 | import java.io.IOException; 9 | import java.io.UnsupportedEncodingException; 10 | import java.util.Optional; 11 | 12 | /** 13 | * @author: zhangyw 14 | * @date: 2018/2/26 15 | * @time: 下午8:15 16 | * @desc: 17 | */ 18 | public class ResponseContent { 19 | private byte[] contentBytes; 20 | private String content; 21 | private String charset; 22 | private Document document; 23 | 24 | public ResponseContent() {} 25 | 26 | public ResponseContent(byte[] contentBytes) { 27 | this.contentBytes = contentBytes; 28 | } 29 | 30 | public byte[] bytes() { 31 | return contentBytes; 32 | } 33 | 34 | public String string() throws UnsupportedEncodingException { 35 | if (null == this.content) { 36 | if (null != this.charset) { 37 | this.content = new String(this.contentBytes, this.charset); 38 | } else { 39 | this.content = new String(this.contentBytes); 40 | } 41 | } 42 | return this.content; 43 | } 44 | 45 | public Document toDocument() throws IOException { 46 | if(this.document == null){ 47 | this.document = Jsoup.parse(Optional.ofNullable(this.string()).orElse("")); 48 | } 49 | return this.document; 50 | } 51 | 52 | public void setContentBytes(byte[] contentBytes) { 53 | this.contentBytes = contentBytes; 54 | } 55 | 56 | public ResponseContent charset(String charset) { 57 | this.charset = charset; 58 | return this; 59 | } 60 | 61 | public JSONObject toJsonObject() throws UnsupportedEncodingException { 62 | return JSONObject.fromObject(this.string()); 63 | } 64 | 65 | public JSONArray toJsonArray() throws UnsupportedEncodingException { 66 | return JSONArray.fromObject(this.string()); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/config/Constants.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.config; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.MapGenerator; 4 | import com.zhangyingwei.cockroach.common.generators.NoCookieGenerator; 5 | import com.zhangyingwei.cockroach.common.generators.NoHeaderGenerator; 6 | import com.zhangyingwei.cockroach.executer.listener.DefaultExecutersListener; 7 | import com.zhangyingwei.cockroach.executer.listener.IExecutersListener; 8 | import com.zhangyingwei.cockroach.http.client.okhttp.COkHttpClient; 9 | import com.zhangyingwei.cockroach.http.handler.DefaultTaskErrorHandler; 10 | import com.zhangyingwei.cockroach.http.handler.ITaskErrorHandler; 11 | import com.zhangyingwei.cockroach.store.PrintStore; 12 | 13 | /** 14 | * Created by zhangyw on 2017/9/13. 15 | * 常量统一管理 16 | */ 17 | public class Constants { 18 | public static final String APP_NAME_KEY = "cockroach.app.name"; 19 | public static final String APP_PROXY_KEY = "cockroach.app.proxy"; 20 | public static final String APP_AUTOCLOSE_KEY = "cockroach.app.autoclose"; 21 | public static final String APP_THREAD_KEY = "cockroach.app.thread"; 22 | public static final String APP_TASK_GROUP_DEFAULT = "default"; 23 | public static final String APP_THREAD_SLEEP_KEY = "cockroach.app.thread.sleep"; 24 | public static final String APP_HTTPCLIENT_KEY = "cockroach.app.httpclient"; 25 | public static final String APP_STORE_KEY = "cockroach.app.store"; 26 | public static final String APP_COOKIE_KEY = "cockroach.app.cookie"; 27 | public static final String APP_TASK_ERROR_KEY = "cockroach.app.task.error"; 28 | 29 | public static final String DEFAULT_APP_NAME = "cockroach"; 30 | public static final Class DEFAULT_TASKERROR_HANDLER = DefaultTaskErrorHandler.class; 31 | public static final int DEFAULT_THREAD_NUM = 10; 32 | public static final int DEFAULT_THREAD_SLEEP = 500; //毫秒 33 | public static final boolean DEFAULT_AUTO_CLOSE = false; 34 | 35 | public static final Boolean HTTP_SHOWHTTPCLIENTPROGRESS = false; 36 | 37 | public static final Class HTTP_CLIENT = COkHttpClient.class; 38 | public static final Class STORE = PrintStore.class; 39 | public static final Class COOKIDGENERATOR = NoCookieGenerator.class; 40 | public static final Class HEADERGENERATOR = NoHeaderGenerator.class; 41 | 42 | public static final Integer DEFAULT_TASK_RETRY = 0; 43 | public static final Integer DEFAULT_TASK_DEEP = 0; 44 | 45 | public static final Integer DEFAULT_QUEUE_CALACITY = 11; 46 | public static final Class DEFAULT_EXECUTERSLISTENER = DefaultExecutersListener.class; 47 | } -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/HttpParams.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http; 2 | 3 | import org.apache.log4j.Logger; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Random; 8 | 9 | /** 10 | * Created by zhangyw on 2017/8/10. 11 | */ 12 | public class HttpParams { 13 | 14 | private static Random random = new Random(); 15 | private static Logger logger = Logger.getLogger(HttpParams.class); 16 | 17 | private static String[] agents = new String[]{ 18 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36", 19 | "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", 20 | "Mozilla/5.0 (Windows NT 6.0; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1", 21 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2a1pre) Gecko/20081222 Fennec/1.0a2", 22 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1b2pre) Gecko/20081015 Fennec/1.0a1", 23 | "Opera/9.80 (Windows NT 6.0; Opera Mobi/49; U; en) Presto/2.4.18 Version/10.00", 24 | "Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221", 25 | "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", 26 | "Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19", 27 | "Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 28 | "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1", 29 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.107 Safari/535.1", 30 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/533.3", 31 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.4 (KHTML, like Gecko) Chrome/4.0.241.0 Safari/532.4", 32 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.0 Safari/531.3", 33 | "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13", 34 | "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11" 35 | }; 36 | 37 | public static Map headers(Map httpHeader) { 38 | Map headers = new HashMap(); 39 | if(httpHeader != null){ 40 | headers.putAll(httpHeader); 41 | } 42 | if (!headers.containsKey("User-Agent")) { 43 | headers.put("User-Agent", randouAgent()); 44 | } 45 | return headers; 46 | } 47 | 48 | private static String randouAgent() { 49 | String agent = agents[random.nextInt(agents.length)]; 50 | logger.debug("user agent: "+agent); 51 | return agent; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/client/okhttp/COkHttpClient.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.client.okhttp; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.ICockroachResponse; 4 | import com.zhangyingwei.cockroach.executer.task.Task; 5 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 6 | import com.zhangyingwei.cockroach.http.HttpParams; 7 | import com.zhangyingwei.cockroach.http.ProxyTuple; 8 | import com.zhangyingwei.cockroach.http.client.AbstractHttpClient; 9 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 10 | import net.sf.json.JSONObject; 11 | import okhttp3.*; 12 | import org.apache.log4j.Logger; 13 | 14 | import java.net.InetSocketAddress; 15 | import java.net.Proxy; 16 | 17 | 18 | /** 19 | * Created by zhangyw on 2017/8/10. 20 | */ 21 | public class COkHttpClient extends AbstractHttpClient { 22 | private Logger logger = Logger.getLogger(COkHttpClient.class); 23 | private OkHttpClient.Builder clientBuilder; 24 | 25 | public COkHttpClient() { 26 | this.clientBuilder = new OkHttpClient.Builder().cookieJar(new CookieManager()); 27 | this.httpHeader.put("cockroach", "hello-cockroach"); 28 | } 29 | 30 | @Override 31 | public TaskResponse doGet(Task task) throws Exception { 32 | Request request = new Request.Builder() 33 | .url(task.realUrl()) 34 | .headers(Headers.of(HttpParams.headers(this.httpHeader))) 35 | .get() 36 | .build(); 37 | Response response = this.clientBuilder.build().newCall(request).execute(); 38 | TaskResponse taskResponse = new TaskResponse(response.body().bytes(), response.headers().toMultimap(), response.code(), task); 39 | response.close(); 40 | return taskResponse; 41 | } 42 | 43 | @Override 44 | public IHttpClient proxy(ProxyTuple proxy) { 45 | super.currentProxy = proxy; 46 | if (proxy != null) { 47 | this.clientBuilder = this.clientBuilder 48 | .cookieJar(new CookieManager(this.cookie)) 49 | .proxy( 50 | new Proxy( 51 | Proxy.Type.HTTP, 52 | new InetSocketAddress(super.currentProxy.ip(), super.currentProxy.port()) 53 | ) 54 | ); 55 | logger.info("代理:" + super.currentProxy); 56 | } 57 | return this; 58 | } 59 | 60 | @Override 61 | public TaskResponse doPost(Task task) throws Exception { 62 | RequestBody requestBody = RequestBody.create( 63 | MediaType.parse("application/json; charset=utf-8"), 64 | JSONObject.fromObject(task.getParams()).toString() 65 | ); 66 | Request request = new Request.Builder() 67 | .url(task.getUrl()) 68 | .headers(Headers.of(HttpParams.headers(this.httpHeader))) 69 | .post(requestBody) 70 | .build(); 71 | Response response = this.clientBuilder.build().newCall(request).execute(); 72 | TaskResponse taskResponse = new TaskResponse(response.body().bytes(), response.headers().toMultimap(), response.code(), task); 73 | response.close(); 74 | return taskResponse; 75 | } 76 | 77 | @Override 78 | public IHttpClient setCookie(String cookie) { 79 | this.cookie = cookie; 80 | this.clientBuilder = this.clientBuilder.cookieJar(new CookieManager(this.cookie)); 81 | return this; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cockroach 爬虫:又一个 java 爬虫实现 2 | 3 | [![](https://travis-ci.org/zhangyingwei/cockroach.svg?branch=master)](https://travis-ci.org/zhangyingwei/cockroach) 4 | [![](https://img.shields.io/badge/language-java-orange.svg)]() 5 | [![](https://img.shields.io/badge/jdk-1.8-green.svg)]() 6 | [![License](http://img.shields.io/:license-apache-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.html) 7 | 8 | ![](http://util.zhangyingwei.com//cockroach/1/carbon.png) 9 | 10 | 重构了 11 | [cockroach2](https://github.com/zhangyingwei/cockroach2) 12 | 13 | ## 简介 14 | 15 | cockroach[小强] 当时不知道为啥选了这么个名字,又长又难记,导致编码的过程中因为单词的拼写问题耽误了好长时间。 16 | 17 | 这个项目算是我的又一个坑吧,算起来挖的坑多了去了,多一个不多少一个不少。 18 | 19 | 一个小巧、灵活、健壮的内容(pa)获取(chong)框架,暂且叫做框架吧。 20 | 21 | 简单到什么程度呢,几句话就可以创建一个内容(pa)获取(chong)程序。 22 | 23 | ### 依赖部分 24 | 25 | ```xml 26 | 27 | com.github.zhangyingwei 28 | cockroach-core 29 | 1.0.6-Beta 30 | 31 | 32 | 33 | com.github.zhangyingwei 34 | cockroach-annotation 35 | 1.0.6-Beta 36 | 37 | ``` 38 | 39 | ### 代码部分: 40 | 41 | ```java 42 | @EnableAutoConfiguration 43 | public class CockroachApplicationTest { 44 | public static void main(String[] args) throws Exception { 45 | TaskQueue queue = TaskQueue.of(); 46 | queue.push(new Task("http://blog.zhangyingwei.com")); 47 | CockroachApplication.run(CockroachApplicationTest.class,queue); 48 | } 49 | } 50 | ``` 51 | 没错,就是这么简单。这个内容(pa)获取(chong)程序就是获(pa)取 `http://blog.zhangyingwei.com` 这个页面的内容并将结果打印出来。 52 | 在结果处理这个问题上,程序中默认使用 PringStore 这个类将所有结果打印出来。 53 | 54 | ## scala & kotlin 55 | 56 | 作为目前使用的 jvm 系语言几大巨头,scala 与 kotlin 这里基本上对跟 java 的互调做的很好,但是这里还是给几个 demo。 57 | 58 | ### scala 59 | 60 | ```scala 61 | /** 62 | * Created by zhangyw on 2017/12/25. 63 | */ 64 | class TTTStore extends IStore{ 65 | override def store(taskResponse: TaskResponse): Unit = { 66 | println("ttt store") 67 | } 68 | } 69 | 70 | object TTTStore{} 71 | ``` 72 | 73 | ```scala 74 | /** 75 | * Created by zhangyw on 2017/12/25. 76 | */ 77 | @EnableAutoConfiguration 78 | @ThreadConfig(num = 1) 79 | @Store(classOf[TTTStore]) 80 | object MainApplication { 81 | def main(args: Array[String]): Unit = { 82 | println("hello scala spider") 83 | val queue = TaskQueue.of() 84 | queue.push(new Task("http://blog.zhangyingwei.com")) 85 | CockroachApplication.run(MainApplication.getClass(),queue) 86 | } 87 | } 88 | ``` 89 | 90 | ### kotlin 91 | 92 | ```kotlin 93 | class TTTStore :IStore{ 94 | override fun store(response: TaskResponse) { 95 | print("ttt store") 96 | } 97 | } 98 | ``` 99 | 100 | ```kotlin 101 | 102 | /** 103 | * Created by zhangyw on 2017/12/25. 104 | */ 105 | @EnableAutoConfiguration 106 | @ThreadConfig(num = 1) 107 | @Store(TTTStore::class) 108 | object MainApplication { 109 | @JvmStatic 110 | fun main(args: Array) { 111 | print("hello kotlin spider") 112 | val queue = TaskQueue.of() 113 | queue.push(Task("http://blog.zhangyingwei.com")) 114 | CockroachApplication.run(MainApplication::class.java, queue) 115 | } 116 | } 117 | ``` 118 | 119 | ## 联系方式 120 | * 邮箱: zhangyw001@gmail.com 121 | * 微信: fengche361 122 | 123 | ## Lisence 124 | 125 | Lisenced under [Apache 2.0 lisence](./LICENSE) 126 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/task/TaskExecuter.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.task; 2 | 3 | import com.zhangyingwei.cockroach.executer.response.TaskErrorResponse; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import com.zhangyingwei.cockroach.executer.response.filter.TaskResponseFilterBox; 6 | import com.zhangyingwei.cockroach.http.HttpProxy; 7 | import com.zhangyingwei.cockroach.http.ProxyTuple; 8 | import com.zhangyingwei.cockroach.http.client.HttpClientProxy; 9 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 10 | import com.zhangyingwei.cockroach.http.handler.ITaskErrorHandler; 11 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 12 | import com.zhangyingwei.cockroach.store.IStore; 13 | import com.zhangyingwei.cockroach.common.utils.NameUtils; 14 | import org.apache.log4j.Logger; 15 | 16 | import java.io.IOException; 17 | import java.util.concurrent.TimeUnit; 18 | 19 | /** 20 | * Created by zhangyw on 2017/8/10. 21 | * 任务执行器,主要工作是从队列中取出任务然后执行任务 22 | */ 23 | public class TaskExecuter implements Runnable { 24 | private final ITaskErrorHandler errorHandlerBox; 25 | private final TaskResponseFilterBox filterBox; 26 | private Logger logger = Logger.getLogger(TaskExecuter.class); 27 | private CockroachQueue queue; 28 | private HttpClientProxy httpClient; 29 | private IStore store; 30 | private String id; 31 | private boolean autoClose; 32 | private int sleep; 33 | private boolean flag = true; 34 | 35 | public TaskExecuter(CockroachQueue queue, HttpClientProxy httpClient, IStore store, ITaskErrorHandler errorHandlerBox, int sleep, boolean autoClose, TaskResponseFilterBox filterBox) { 36 | this.queue = queue; 37 | this.httpClient = httpClient; 38 | this.store = store; 39 | this.id = NameUtils.name(TaskExecuter.class); 40 | this.errorHandlerBox = errorHandlerBox; 41 | this.autoClose = autoClose; 42 | this.sleep = sleep; 43 | this.filterBox = filterBox; 44 | } 45 | 46 | @Override 47 | public void run() { 48 | loop:while (flag) { 49 | TaskResponse response = null; 50 | try { 51 | Task task = null; 52 | if(autoClose){ 53 | task = this.queue.poll(); 54 | if(task == null){ 55 | flag = false; 56 | break loop; 57 | } 58 | }else{ 59 | task = this.queue.take(); 60 | } 61 | TimeUnit.MILLISECONDS.sleep(sleep); 62 | logger.info(this.getId()+" GET - "+task); 63 | response = (TaskResponse) this.httpClient.doGet(task); 64 | response.setQueue(this.queue); 65 | } catch (Exception e) { 66 | logger.error(this.getId()+" - "+ e.getLocalizedMessage()); 67 | }finally { 68 | try { 69 | if(response.isFalied()){ 70 | this.errorHandlerBox.error(new TaskErrorResponse(response)); 71 | }else{ 72 | if (this.filterBox.accept(response)) { 73 | this.store.store(response); 74 | } 75 | } 76 | } catch (Exception e) {} 77 | } 78 | } 79 | logger.info(id+" : over"); 80 | } 81 | 82 | public void stop() { 83 | this.flag = false; 84 | } 85 | 86 | public String getId() { 87 | return id; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /cockroach-queue-redis/src/main/java/com/zhangyingwei/cockroach/queue/RedisTaskQueue.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | import com.zhangyingwei.cockroach.executer.task.Task; 4 | import net.sf.json.JSONObject; 5 | import redis.clients.jedis.Jedis; 6 | 7 | import java.util.List; 8 | import java.util.stream.Collectors; 9 | 10 | /** 11 | * Created by zhangyw on 2018/2/27. 12 | */ 13 | public class RedisTaskQueue extends AbstractCockroachQueue { 14 | private String key; 15 | private String failedKey; 16 | private Jedis jedis; 17 | 18 | public RedisTaskQueue(String host, Integer port,String key,String failedKey) { 19 | this.key = key; 20 | this.failedKey = failedKey; 21 | this.jedis = new Jedis(host, port); 22 | } 23 | 24 | public RedisTaskQueue(String host, Integer port, String auth, String key,String failedKey) { 25 | this.key = key; 26 | this.failedKey = failedKey; 27 | this.jedis = new Jedis(host, port); 28 | this.jedis.auth(auth); 29 | } 30 | 31 | public RedisTaskQueue(String host, Integer port, String auth, Integer index, String key,String failedKey) { 32 | this.key = key; 33 | this.failedKey = failedKey; 34 | this.jedis = new Jedis(host, port); 35 | this.jedis.auth(auth); 36 | this.jedis.select(index); 37 | } 38 | 39 | public static RedisTaskQueue of(String host, Integer port,String key,String failedKey){ 40 | return new RedisTaskQueue(host,port,key,failedKey); 41 | } 42 | 43 | public static RedisTaskQueue of(String host, Integer port,String auth,String key,String failedKey){ 44 | return new RedisTaskQueue(host, port, auth, key, failedKey); 45 | } 46 | 47 | public static RedisTaskQueue of(String host, Integer port,String auth,Integer index,String key,String failedKey){ 48 | return new RedisTaskQueue(host, port, auth, index, key, failedKey); 49 | } 50 | 51 | @Override 52 | public synchronized Task poll() throws Exception { 53 | String json = this.jedis.lpop(this.key); 54 | JSONObject jsonObject = JSONObject.fromObject(json); 55 | return (Task) JSONObject.toBean(jsonObject, Task.class); 56 | } 57 | 58 | @Override 59 | public synchronized Task take() throws Exception { 60 | List json = this.jedis.blpop(Integer.MAX_VALUE,this.key); 61 | JSONObject jsonObject = JSONObject.fromObject(json.get(1)); 62 | return (Task) JSONObject.toBean(jsonObject, Task.class); 63 | } 64 | 65 | @Override 66 | public void push(Task task) throws Exception { 67 | this.push(task,true); 68 | } 69 | 70 | @Override 71 | public void push(Task task, Boolean withFilter) throws Exception { 72 | if (withFilter) { 73 | if (super.filterBox.accept(task)) { 74 | JSONObject json = JSONObject.fromObject(task); 75 | this.jedis.lpush(this.key, json.toString()); 76 | } 77 | } else { 78 | JSONObject json = JSONObject.fromObject(task); 79 | this.jedis.lpush(this.key, json.toString()); 80 | } 81 | } 82 | 83 | @Override 84 | public void falied(Task task) throws Exception { 85 | JSONObject json = JSONObject.fromObject(task); 86 | this.jedis.lpush(this.failedKey, json.toString()); 87 | } 88 | 89 | @Override 90 | public void pushAll(List tasks) throws Exception { 91 | for (Task task : tasks) { 92 | push(task); 93 | } 94 | } 95 | 96 | @Override 97 | public void push(List urls) throws Exception { 98 | List tasks = urls.stream().map(url -> { 99 | return new Task(url); 100 | }).collect(Collectors.toList()); 101 | pushAll(tasks); 102 | } 103 | 104 | @Override 105 | public void clear() throws Exception { 106 | this.jedis.del(this.key); 107 | this.jedis.del(this.failedKey); 108 | } 109 | 110 | @Override 111 | public Boolean isEmpty() { 112 | long size = this.jedis.llen(this.key); 113 | long faildSize = this.jedis.llen(this.failedKey); 114 | return size + faildSize == 0; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/queue/TaskQueue.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.queue; 2 | 3 | 4 | import com.zhangyingwei.cockroach.config.Constants; 5 | import com.zhangyingwei.cockroach.executer.task.Task; 6 | import com.zhangyingwei.cockroach.executer.task.TaskCompatator; 7 | import com.zhangyingwei.cockroach.queue.filter.IQueueTaskFilter; 8 | import com.zhangyingwei.cockroach.queue.filter.TaskFilterBox; 9 | import org.apache.log4j.Logger; 10 | import java.util.List; 11 | import java.util.concurrent.BlockingQueue; 12 | import java.util.concurrent.PriorityBlockingQueue; 13 | 14 | /** 15 | * Created by zhangyw on 2017/8/10. 16 | * 消息队列 17 | */ 18 | public class TaskQueue extends AbstractCockroachQueue { 19 | private Logger logger = Logger.getLogger(TaskQueue.class); 20 | 21 | private BlockingQueue queue; 22 | private BlockingQueue faildQueue; 23 | 24 | public static TaskQueue of(){ 25 | return TaskQueue.of(Constants.DEFAULT_QUEUE_CALACITY); 26 | } 27 | 28 | public static TaskQueue of(int calacity){ 29 | return new TaskQueue(calacity); 30 | } 31 | 32 | public TaskQueue(Integer calacity) { 33 | this.queue = new PriorityBlockingQueue(calacity,new TaskCompatator()); 34 | this.faildQueue = new PriorityBlockingQueue(); 35 | logger.info("create queue whith calacity " + calacity); 36 | } 37 | 38 | @Override 39 | public Task poll() throws InterruptedException { 40 | this.queueValid(); 41 | Task task = this.queue.poll(); 42 | logger.info(Thread.currentThread().getName() + " pull task " + task); 43 | return task; 44 | } 45 | 46 | @Override 47 | public Task take() throws InterruptedException { 48 | this.queueValid(); 49 | Task task = this.queue.take(); 50 | logger.info(Thread.currentThread().getName() + " take task " + task); 51 | return task; 52 | } 53 | 54 | /** 55 | * 如果 queue 为空 56 | * 如果 task retry 次数小于系统设置的 DEFAULT_TASK_RESTY 57 | * 把失败任务重新添加到队列中 58 | */ 59 | private synchronized void queueValid() throws InterruptedException { 60 | if(this.queue.isEmpty() && !this.faildQueue.isEmpty()){ 61 | for (Task task : this.faildQueue) { 62 | this.push(task, false); 63 | } 64 | this.faildQueue.clear(); 65 | } 66 | if (this.queue.isEmpty()) { 67 | logger.info(Thread.currentThread().getName() + " queue is empty"); 68 | } 69 | } 70 | 71 | @Override 72 | public void push(Task task) throws InterruptedException { 73 | this.queue.put(task); 74 | logger.info(Thread.currentThread().getName() + " push task " + task); 75 | } 76 | 77 | @Override 78 | public void push(Task task, Boolean withFilter) throws InterruptedException { 79 | Boolean allow = true; 80 | if (withFilter) { 81 | allow = super.filterBox.accept(task); 82 | } 83 | if (allow) { 84 | this.push(task); 85 | } 86 | } 87 | 88 | @Override 89 | public synchronized void falied(Task task) throws InterruptedException { 90 | if (task.getRetry() > 0) { 91 | this.faildQueue.put(task.retry()); 92 | logger.info(Thread.currentThread().getName() + " push failed task " + task); 93 | } 94 | } 95 | 96 | @Override 97 | public void pushAll(List tasks) throws InterruptedException { 98 | for (Task task : tasks) { 99 | this.push(task); 100 | } 101 | } 102 | 103 | @Override 104 | public void push(List urls) { 105 | urls.stream().map(url -> new Task(url)).forEach(task -> { 106 | try { 107 | this.push(task); 108 | } catch (InterruptedException e) { 109 | e.printStackTrace(); 110 | } 111 | }); 112 | } 113 | 114 | @Override 115 | public void clear(){ 116 | this.queue.clear(); 117 | logger.info(Thread.currentThread().getName() + " clear queue"); 118 | } 119 | 120 | @Override 121 | public Boolean isEmpty() { 122 | return this.queue.isEmpty(); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/response/TaskResponse.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.response; 2 | 3 | import cn.wanghaomiao.xpath.exception.XpathSyntaxErrorException; 4 | import cn.wanghaomiao.xpath.model.JXDocument; 5 | import com.zhangyingwei.cockroach.common.exception.HttpException; 6 | import com.zhangyingwei.cockroach.executer.task.Task; 7 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 8 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 9 | import com.zhangyingwei.cockroach.common.utils.CockroachUtils; 10 | import org.jsoup.nodes.Document; 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | import java.io.IOException; 14 | import java.util.*; 15 | import java.util.stream.Collectors; 16 | 17 | /** 18 | * Created by zhangyw on 2017/8/10. 19 | * 请求返回结构 20 | */ 21 | public class TaskResponse implements ICockroachResponse { 22 | private Map> headers; 23 | private Task task; 24 | private JXDocument xdocument; 25 | private CockroachQueue queue; 26 | private ResponseContent content; 27 | private boolean failed = false; 28 | private IHttpClient httpClient; 29 | 30 | public TaskResponse(byte[] contentBytes, Map> headers, int code, Task task) throws IOException, HttpException { 31 | this.content = new ResponseContent(); 32 | this.content.setContentBytes(contentBytes); 33 | this.task = task; 34 | this.headers = headers; 35 | if (!CockroachUtils.validHttpCode(code)) { 36 | throw new HttpException(this.getContent().string(),code); 37 | } 38 | } 39 | 40 | public TaskResponse() { 41 | this.content = new ResponseContent(); 42 | } 43 | 44 | @Override 45 | public ResponseContent getContent() throws IOException { 46 | return this.content; 47 | } 48 | 49 | private JXDocument parseJXDocument() throws IOException { 50 | if (this.xdocument == null) { 51 | Document doc = this.content.toDocument(); 52 | this.xdocument = new JXDocument(doc); 53 | } 54 | return this.xdocument; 55 | } 56 | 57 | @Override 58 | public Task getTask() { 59 | return task; 60 | } 61 | 62 | public TaskResponse setTask(Task task) { 63 | this.task = task; 64 | return this; 65 | } 66 | 67 | public TaskResponse charset(String charset) { 68 | this.content.charset(charset); 69 | return this; 70 | } 71 | 72 | public Elements select(String cssSelect) throws IOException { 73 | return this.content.toDocument().select(cssSelect); 74 | } 75 | 76 | public Elements xpath(String xpath) throws IOException, XpathSyntaxErrorException { 77 | List elements = this.parseJXDocument().sel(xpath).stream().map(obj -> { 78 | return (Element) obj; 79 | }).collect(Collectors.toList()); 80 | return new Elements(Optional.of(elements).orElse(new ArrayList())); 81 | } 82 | 83 | @Override 84 | public boolean isGroup(String group){ 85 | return task.getGroup().equals(group); 86 | } 87 | 88 | @Override 89 | public boolean isGroupStartWith(String groupPrefix) { 90 | return task.getGroup().startsWith(groupPrefix); 91 | } 92 | 93 | @Override 94 | public boolean isGroupEndWith(String end) { 95 | return task.getGroup().endsWith(end); 96 | } 97 | 98 | @Override 99 | public boolean isGroupContains(String str) { 100 | return task.getGroup().contains(str); 101 | } 102 | 103 | public void setQueue(CockroachQueue queue) { 104 | this.queue = queue; 105 | } 106 | 107 | @Override 108 | public CockroachQueue getQueue() { 109 | return queue; 110 | } 111 | 112 | @Override 113 | public List header(String key) { 114 | if (this.headers.containsKey(key)) { 115 | return this.headers.get(key); 116 | } 117 | return null; 118 | } 119 | 120 | public boolean isFalied() { 121 | return this.failed; 122 | } 123 | 124 | public TaskResponse falied(String message) { 125 | this.failed = true; 126 | this.content.setContentBytes(message.getBytes()); 127 | return this; 128 | } 129 | 130 | public IHttpClient getHttpClient() { 131 | return httpClient; 132 | } 133 | 134 | public TaskResponse setHttpClient(IHttpClient httpClient) { 135 | this.httpClient = httpClient; 136 | return this; 137 | } 138 | } -------------------------------------------------------------------------------- /cockroach-annotation/src/main/java/com/zhangyingwei/cockroach/config/CockroachConfigBuilder.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.config; 2 | 3 | import com.zhangyingwei.cockroach.annotation.*; 4 | import com.zhangyingwei.cockroach.executer.listener.IExecutersListener; 5 | import com.zhangyingwei.cockroach.executer.response.filter.ITaskResponseFilter; 6 | 7 | import java.lang.annotation.Annotation; 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | 11 | /** 12 | * Created by zhangyw on 2017/12/8. 13 | */ 14 | public class CockroachConfigBuilder { 15 | private Annotation[] annotations; 16 | private CockroachConfig config; 17 | 18 | public CockroachConfigBuilder(Annotation[] annotations) { 19 | this.config = new CockroachConfig(); 20 | this.annotations = annotations; 21 | } 22 | 23 | public CockroachConfig bulid() throws Exception { 24 | if(this.isAutoConfiguration()){ 25 | this.autoConfig(); 26 | } 27 | for (Annotation annotation : annotations) { 28 | if (annotation instanceof AppName) { 29 | this.config.setAppName(((AppName) annotation).value()); 30 | } else if (annotation instanceof ProxyConfig) { 31 | this.config.setProxys(((ProxyConfig) annotation).value()); 32 | } else if (annotation instanceof AutoClose) { 33 | this.config.setAutoClose(((AutoClose) annotation).value()); 34 | } else if (annotation instanceof ThreadConfig) { 35 | this.config.setThread(((ThreadConfig) annotation).num(), ((ThreadConfig) annotation).sleep()); 36 | } else if (annotation instanceof HttpConfig) { 37 | this.config.setHttpClient(((HttpConfig) annotation).value()); 38 | this.config.setShowHttpClientProgress(((HttpConfig) annotation).progress()); 39 | } else if (annotation instanceof Store) { 40 | this.config.setStore(((Store) annotation).value()); 41 | } else if (annotation instanceof CookieConfig) { 42 | this.config.setCookie(((CookieConfig) annotation).value()); 43 | this.config.setCookieGenerator(((CookieConfig) annotation).cookieGenerator()); 44 | } else if (annotation instanceof HttpHeaderConfig) { 45 | String[] headers = ((HttpHeaderConfig) annotation).value(); 46 | this.config.setHeaderGenerator(((HttpHeaderConfig) annotation).headerGenerator()); 47 | if (headers.length > 0) { 48 | for (String header : headers) { 49 | if (header.indexOf("=") < 0) { 50 | throw new Exception("require header like key=value, but get "+header); 51 | } 52 | String[] kv = header.split("="); 53 | this.config.addHttpHeader(kv[0], kv[1]); 54 | } 55 | } 56 | } else if (annotation instanceof TaskErrorHandlerConfig) { 57 | this.config.setTaskErrorHandler(((TaskErrorHandlerConfig) annotation).value()); 58 | } else if (annotation instanceof TaskResponseFiltersConfig) { 59 | Set> filters = new HashSet>(); 60 | Class[] values = ((TaskResponseFiltersConfig) annotation).value(); 61 | if (values != null && values.length > 0) { 62 | for (Class value : values) { 63 | filters.add(value); 64 | } 65 | } 66 | this.config.setResponseFilters(filters); 67 | } else if (annotation instanceof ExecutersListener) { 68 | Class listener = ((ExecutersListener) annotation).value(); 69 | this.config.setExecutersListener(listener); 70 | } 71 | } 72 | return this.config; 73 | } 74 | 75 | /** 76 | * 判断是否配置 autoconfiguration 77 | * @return 78 | */ 79 | private boolean isAutoConfiguration() { 80 | for (Annotation annotation : this.annotations) { 81 | if (annotation instanceof EnableAutoConfiguration) { 82 | return true; 83 | } 84 | } 85 | return false; 86 | } 87 | 88 | private void autoConfig() { 89 | this.config 90 | .setAppName(Constants.DEFAULT_APP_NAME) 91 | .setAutoClose(Constants.DEFAULT_AUTO_CLOSE) 92 | .setTaskErrorHandler(Constants.DEFAULT_TASKERROR_HANDLER) 93 | .setThread(Constants.DEFAULT_THREAD_NUM, Constants.DEFAULT_THREAD_SLEEP); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/http/client/HttpClientProxy.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.http.client; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.MapGenerator; 4 | import com.zhangyingwei.cockroach.common.generators.NoCookieGenerator; 5 | import com.zhangyingwei.cockroach.common.generators.NoHeaderGenerator; 6 | import com.zhangyingwei.cockroach.common.generators.StringGenerator; 7 | import com.zhangyingwei.cockroach.executer.response.ICockroachResponse; 8 | import com.zhangyingwei.cockroach.executer.task.Task; 9 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 10 | import com.zhangyingwei.cockroach.http.HttpProxy; 11 | import com.zhangyingwei.cockroach.http.ProxyTuple; 12 | import org.apache.log4j.Logger; 13 | import java.util.Map; 14 | 15 | /** 16 | * @author: zhangyw 17 | * @date: 2017/8/16 18 | * @time: 下午8:52 19 | * @desc: 20 | */ 21 | public class HttpClientProxy implements IHttpClient { 22 | private Logger logger = Logger.getLogger(HttpClientProxy.class); 23 | private IHttpClient client; 24 | private StringGenerator cookieGenerator; 25 | private MapGenerator headerGenerator; 26 | private HttpProxy proxy; 27 | 28 | public HttpClientProxy(IHttpClient client) { 29 | this.client = client; 30 | } 31 | 32 | @Override 33 | public TaskResponse doGet(Task task) { 34 | this.makeGenerators(task); 35 | String message = ""; 36 | TaskResponse response = null; 37 | try { 38 | response = this.client.proxy(this.randomProxy()).doGet(task); 39 | } catch (Exception e) { 40 | message = e.getMessage(); 41 | logger.error(task + " - " + message); 42 | response = new TaskResponse().setTask(task).falied(message); 43 | } 44 | return response.setHttpClient(this); 45 | } 46 | 47 | public HttpClientProxy setProxy(HttpProxy proxy) { 48 | this.proxy = proxy; 49 | return this; 50 | } 51 | 52 | public HttpProxy getProxy(){ 53 | return this.proxy; 54 | } 55 | 56 | /** 57 | * 这个方法暂时用不到,没用 58 | * @param proxy 59 | * @return 60 | * @throws Exception 61 | */ 62 | @Override 63 | public HttpClientProxy proxy(ProxyTuple proxy) throws Exception { 64 | this.client.proxy(proxy); 65 | return this; 66 | } 67 | 68 | /** 69 | * 随机一个代理 70 | * @return 71 | */ 72 | private ProxyTuple randomProxy() { 73 | if(this.proxy != null && !this.proxy.isEmpty()) { 74 | return this.proxy.randomProxy(); 75 | } 76 | return null; 77 | } 78 | 79 | /** 80 | * 如果配置了生成器,则在请求之前调用生成器 81 | * @param task 82 | */ 83 | private void makeGenerators(Task task) { 84 | if (this.cookieGenerator != null) { 85 | if (!(this.cookieGenerator instanceof NoCookieGenerator)) { 86 | this.setCookie(this.cookieGenerator.get(task)); 87 | } 88 | } 89 | if (this.headerGenerator != null) { 90 | if (!(this.headerGenerator instanceof NoHeaderGenerator)) { 91 | Map headers = this.headerGenerator.get(task); 92 | this.setHttpHeader(headers); 93 | } 94 | } 95 | } 96 | 97 | @Override 98 | public TaskResponse doPost(Task task) { 99 | try { 100 | return this.client.doPost(task); 101 | } catch (Exception e) { 102 | e.printStackTrace(); 103 | return new TaskResponse().setTask(task).falied(e.getMessage()); 104 | } 105 | } 106 | 107 | @Override 108 | public HttpClientProxy setCookie(String cookie) { 109 | try { 110 | this.client.setCookie(cookie); 111 | } catch (Exception e) { 112 | logger.error(e.getMessage()); 113 | } 114 | return this; 115 | } 116 | 117 | @Override 118 | public HttpClientProxy setHttpHeader(Map httpHeader) { 119 | try { 120 | this.client.setHttpHeader(httpHeader); 121 | } catch (Exception e) { 122 | logger.error(e.getMessage()); 123 | } 124 | return this; 125 | } 126 | 127 | @Override 128 | public ProxyTuple getCurrentProxyTuple() { 129 | try { 130 | return this.client.getCurrentProxyTuple(); 131 | } catch (Exception e) { 132 | logger.error(e.getMessage()); 133 | } 134 | return null; 135 | } 136 | 137 | public HttpClientProxy setCookieGenerator(StringGenerator cookieGenerator){ 138 | this.cookieGenerator = cookieGenerator; 139 | return this; 140 | } 141 | 142 | public HttpClientProxy setHeaderGenerator(MapGenerator headerGenerator) { 143 | this.headerGenerator = headerGenerator; 144 | return this; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | cockroach-core 8 | cockroach-annotation 9 | cockroach-test 10 | cockroach-samples 11 | cockroach-queue-redis 12 | 13 | 14 | 15 | 16 | org.sonatype.oss 17 | oss-parent 18 | 7 19 | 20 | 21 | 4.0.0 22 | com.github.zhangyingwei 23 | cockroach 24 | 1.0.6-Beta 25 | pom 26 | cockroach 27 | https://github.com/zhangyingwei/cockroach 28 | 又一个 java 爬虫 29 | 30 | 31 | 32 | The Apache Software License, Version 2.0 33 | http://www.apache.org/licenses/LICENSE-2.0.txt 34 | repo 35 | 36 | 37 | 38 | 39 | https://github.com/zhangyingwei/cockroach 40 | scm:git:https://github.com/zhangyingwei/cockroach.git 41 | scm:git:https://github.com/zhangyingwei/cockroach.git 42 | 43 | 44 | 45 | 46 | zhangyingwei 47 | zhangyw001@gmail.com 48 | https://www.zhangyingwei.com 49 | 50 | 51 | 52 | 53 | 54 | log4j 55 | log4j 56 | 1.2.17 57 | 58 | 59 | 60 | 61 | UTF-8 62 | UTF-8 63 | UTF-8 64 | -Xdoclint:none 65 | 66 | 67 | 68 | 69 | 70 | oss 71 | OSS Snapshots Repository 72 | 73 | https://oss.sonatype.org/content/repositories/snapshots/ 74 | 75 | 76 | oss 77 | OSS Staging Repository 78 | 79 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | org.apache.maven.plugins 88 | maven-compiler-plugin 89 | 90 | 1.8 91 | 1.8 92 | 93 | 94 | 95 | 96 | org.apache.maven.plugins 97 | maven-source-plugin 98 | 2.2.1 99 | 100 | 101 | attach-sources 102 | 103 | jar 104 | 105 | 106 | 107 | 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-javadoc-plugin 112 | 2.9 113 | 114 | UTF-8 115 | UTF-8 116 | 117 | 118 | 119 | attach-javadocs 120 | 121 | jar 122 | 123 | 124 | 125 | 126 | 127 | org.apache.maven.plugins 128 | maven-release-plugin 129 | 2.5.1 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /cockroach-core/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | #①配置根Logger,其语法为: 3 | # 4 | #log4j.rootLogger =[level],appenderName,appenderName2,... 5 | #level是日志记录的优先级,分为OFF,TRACE,DEBUG,INFO,WARN,ERROR,FATAL,ALL 6 | ##Log4j建议只使用四个级别,优先级从低到高分别是DEBUG,INFO,WARN,ERROR 7 | #通过在这里定义的级别,您可以控制到应用程序中相应级别的日志信息的开关 8 | #比如在这里定义了INFO级别,则应用程序中所有DEBUG级别的日志信息将不被打印出来 9 | #appenderName就是指定日志信息输出到哪个地方。可同时指定多个输出目的 10 | ################################################################################ 11 | ################################################################################ 12 | #②配置日志信息输出目的地Appender,其语法为: 13 | # 14 | #log4j.appender.appenderName =fully.qualified.name.of.appender.class 15 | #log4j.appender.appenderName.optionN =valueN 16 | # 17 | #Log4j提供的appender有以下几种: 18 | #1)org.apache.log4j.ConsoleAppender(输出到控制台) 19 | #2)org.apache.log4j.FileAppender(输出到文件) 20 | #3)org.apache.log4j.DailyRollingFileAppender(每天产生一个日志文件) 21 | #4)org.apache.log4j.RollingFileAppender(文件大小到达指定尺寸的时候产生一个新的文件) 22 | #5)org.apache.log4j.WriterAppender(将日志信息以流格式发送到任意指定的地方) 23 | # 24 | #1)ConsoleAppender选项属性 25 | # -Threshold = DEBUG:指定日志消息的输出最低层次 26 | # -ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 27 | # -Target = System.err:默认值System.out,输出到控制台(err为红色,out为黑色) 28 | # 29 | #2)FileAppender选项属性 30 | # -Threshold = INFO:指定日志消息的输出最低层次 31 | # -ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 32 | # -File = C:\log4j.log:指定消息输出到C:\log4j.log文件 33 | # -Append = FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 34 | # -Encoding = UTF-8:可以指定文件编码格式 35 | # 36 | #3)DailyRollingFileAppender选项属性 37 | #-Threshold = WARN:指定日志消息的输出最低层次 38 | #-ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 39 | # -File =C:\log4j.log:指定消息输出到C:\log4j.log文件 40 | # -Append= FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 41 | #-DatePattern='.'yyyy-ww:每周滚动一次文件,即每周产生一个新的文件。还可以按用以下参数: 42 | # '.'yyyy-MM:每月 43 | # '.'yyyy-ww:每周 44 | # '.'yyyy-MM-dd:每天 45 | # '.'yyyy-MM-dd-a:每天两次 46 | # '.'yyyy-MM-dd-HH:每小时 47 | # '.'yyyy-MM-dd-HH-mm:每分钟 48 | #-Encoding = UTF-8:可以指定文件编码格式 49 | # 50 | #4)RollingFileAppender选项属性 51 | #-Threshold = ERROR:指定日志消息的输出最低层次 52 | #-ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 53 | # -File =C:/log4j.log:指定消息输出到C:/log4j.log文件 54 | # -Append= FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 55 | #-MaxFileSize = 100KB:后缀可以是KB,MB,GB.在日志文件到达该大小时,将会自动滚动.如:log4j.log.1 56 | #-MaxBackupIndex = 2:指定可以产生的滚动文件的最大数 57 | #-Encoding = UTF-8:可以指定文件编码格式 58 | ################################################################################ 59 | ################################################################################ 60 | #③配置日志信息的格式(布局),其语法为: 61 | # 62 | #log4j.appender.appenderName.layout=fully.qualified.name.of.layout.class 63 | #log4j.appender.appenderName.layout.optionN= valueN 64 | # 65 | #Log4j提供的layout有以下几种: 66 | #5)org.apache.log4j.HTMLLayout(以HTML表格形式布局) 67 | #6)org.apache.log4j.PatternLayout(可以灵活地指定布局模式) 68 | #7)org.apache.log4j.SimpleLayout(包含日志信息的级别和信息字符串) 69 | #8)org.apache.log4j.TTCCLayout(包含日志产生的时间、线程、类别等等信息) 70 | #9)org.apache.log4j.xml.XMLLayout(以XML形式布局) 71 | # 72 | #5)HTMLLayout选项属性 73 | #-LocationInfo = TRUE:默认值false,输出java文件名称和行号 74 | #-Title=Struts Log Message:默认值 Log4JLog Messages 75 | # 76 | #6)PatternLayout选项属性 77 | #-ConversionPattern = %m%n:格式化指定的消息(参数意思下面有) 78 | # 79 | #9)XMLLayout选项属性 80 | #-LocationInfo = TRUE:默认值false,输出java文件名称和行号 81 | # 82 | #Log4J采用类似C语言中的printf函数的打印格式格式化日志信息,打印参数如下: 83 | #%m 输出代码中指定的消息 84 | #%p 输出优先级,即DEBUG,INFO,WARN,ERROR,FATAL 85 | #%r 输出自应用启动到输出该log信息耗费的毫秒数 86 | #%c 输出所属的类目,通常就是所在类的全名 87 | #%t 输出产生该日志事件的线程名 88 | #%n 输出一个回车换行符,Windows平台为“\r\n”,Unix平台为“\n” 89 | #%d 输出日志时间点的日期或时间,默认格式为ISO8601,也可以在其后指定格式 90 | # 如:%d{yyyy年MM月dd日HH:mm:ss,SSS},输出类似:2012年01月05日 22:10:28,921 91 | #%l 输出日志事件的发生位置,包括类目名、发生的线程,以及在代码中的行数 92 | # 如:Testlog.main(TestLog.java:10) 93 | #%F 输出日志消息产生时所在的文件名称 94 | #%L 输出代码中的行号 95 | #%x 输出和当前线程相关联的NDC(嵌套诊断环境),像javaservlets多客户多线程的应用中 96 | #%% 输出一个"%"字符 97 | # 98 | # 可以在%与模式字符之间加上修饰符来控制其最小宽度、最大宽度、和文本的对齐方式。如: 99 | # %5c: 输出category名称,最小宽度是5,category<5,默认的情况下右对齐 100 | # %-5c:输出category名称,最小宽度是5,category<5,"-"号指定左对齐,会有空格 101 | # %.5c:输出category名称,最大宽度是5,category>5,就会将左边多出的字符截掉,<5不会有空格 102 | # %20.30c:category名称<20补空格,并且右对齐,>30字符,就从左边交远销出的字符截掉 103 | ################################################################################ 104 | ################################################################################ 105 | #④指定特定包的输出特定的级别 106 | #log4j.logger.org.springframework=DEBUG 107 | ################################################################################ 108 | 109 | #OFF,systemOut,logFile,logDailyFile,logRollingFile,logMail,logDB,ALL 110 | 111 | #log4j.rootLogger=INFO,systemOut 112 | 113 | #输出到控制台 114 | #log4j.appender.systemOut= org.apache.log4j.ConsoleAppender 115 | #log4j.appender.systemOut.layout= org.apache.log4j.PatternLayout 116 | #log4j.appender.systemOut.layout.ConversionPattern= [%-5p][%-20d{yyyy/MM/dd HH:mm:ss}][%c] %m%n 117 | #log4j.appender.systemOut.Threshold= INFO 118 | #log4j.appender.systemOut.ImmediateFlush= TRUE 119 | #log4j.appender.systemOut.Target= System.out 120 | 121 | 122 | # Root logger option 123 | log4j.rootLogger=INFO,stdout 124 | # Direct log messages to stdout 125 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 126 | log4j.appender.stdout.Target=System.out 127 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 128 | log4j.appender.stdout.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss}] %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /cockroach-test/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | #①配置根Logger,其语法为: 3 | # 4 | #log4j.rootLogger =[level],appenderName,appenderName2,... 5 | #level是日志记录的优先级,分为OFF,TRACE,DEBUG,INFO,WARN,ERROR,FATAL,ALL 6 | ##Log4j建议只使用四个级别,优先级从低到高分别是DEBUG,INFO,WARN,ERROR 7 | #通过在这里定义的级别,您可以控制到应用程序中相应级别的日志信息的开关 8 | #比如在这里定义了INFO级别,则应用程序中所有DEBUG级别的日志信息将不被打印出来 9 | #appenderName就是指定日志信息输出到哪个地方。可同时指定多个输出目的 10 | ################################################################################ 11 | ################################################################################ 12 | #②配置日志信息输出目的地Appender,其语法为: 13 | # 14 | #log4j.appender.appenderName =fully.qualified.name.of.appender.class 15 | #log4j.appender.appenderName.optionN =valueN 16 | # 17 | #Log4j提供的appender有以下几种: 18 | #1)org.apache.log4j.ConsoleAppender(输出到控制台) 19 | #2)org.apache.log4j.FileAppender(输出到文件) 20 | #3)org.apache.log4j.DailyRollingFileAppender(每天产生一个日志文件) 21 | #4)org.apache.log4j.RollingFileAppender(文件大小到达指定尺寸的时候产生一个新的文件) 22 | #5)org.apache.log4j.WriterAppender(将日志信息以流格式发送到任意指定的地方) 23 | # 24 | #1)ConsoleAppender选项属性 25 | # -Threshold = DEBUG:指定日志消息的输出最低层次 26 | # -ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 27 | # -Target = System.err:默认值System.out,输出到控制台(err为红色,out为黑色) 28 | # 29 | #2)FileAppender选项属性 30 | # -Threshold = INFO:指定日志消息的输出最低层次 31 | # -ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 32 | # -File = C:\log4j.log:指定消息输出到C:\log4j.log文件 33 | # -Append = FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 34 | # -Encoding = UTF-8:可以指定文件编码格式 35 | # 36 | #3)DailyRollingFileAppender选项属性 37 | #-Threshold = WARN:指定日志消息的输出最低层次 38 | #-ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 39 | # -File =C:\log4j.log:指定消息输出到C:\log4j.log文件 40 | # -Append= FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 41 | #-DatePattern='.'yyyy-ww:每周滚动一次文件,即每周产生一个新的文件。还可以按用以下参数: 42 | # '.'yyyy-MM:每月 43 | # '.'yyyy-ww:每周 44 | # '.'yyyy-MM-dd:每天 45 | # '.'yyyy-MM-dd-a:每天两次 46 | # '.'yyyy-MM-dd-HH:每小时 47 | # '.'yyyy-MM-dd-HH-mm:每分钟 48 | #-Encoding = UTF-8:可以指定文件编码格式 49 | # 50 | #4)RollingFileAppender选项属性 51 | #-Threshold = ERROR:指定日志消息的输出最低层次 52 | #-ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 53 | # -File =C:/log4j.log:指定消息输出到C:/log4j.log文件 54 | # -Append= FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 55 | #-MaxFileSize = 100KB:后缀可以是KB,MB,GB.在日志文件到达该大小时,将会自动滚动.如:log4j.log.1 56 | #-MaxBackupIndex = 2:指定可以产生的滚动文件的最大数 57 | #-Encoding = UTF-8:可以指定文件编码格式 58 | ################################################################################ 59 | ################################################################################ 60 | #③配置日志信息的格式(布局),其语法为: 61 | # 62 | #log4j.appender.appenderName.layout=fully.qualified.name.of.layout.class 63 | #log4j.appender.appenderName.layout.optionN= valueN 64 | # 65 | #Log4j提供的layout有以下几种: 66 | #5)org.apache.log4j.HTMLLayout(以HTML表格形式布局) 67 | #6)org.apache.log4j.PatternLayout(可以灵活地指定布局模式) 68 | #7)org.apache.log4j.SimpleLayout(包含日志信息的级别和信息字符串) 69 | #8)org.apache.log4j.TTCCLayout(包含日志产生的时间、线程、类别等等信息) 70 | #9)org.apache.log4j.xml.XMLLayout(以XML形式布局) 71 | # 72 | #5)HTMLLayout选项属性 73 | #-LocationInfo = TRUE:默认值false,输出java文件名称和行号 74 | #-Title=Struts Log Message:默认值 Log4JLog Messages 75 | # 76 | #6)PatternLayout选项属性 77 | #-ConversionPattern = %m%n:格式化指定的消息(参数意思下面有) 78 | # 79 | #9)XMLLayout选项属性 80 | #-LocationInfo = TRUE:默认值false,输出java文件名称和行号 81 | # 82 | #Log4J采用类似C语言中的printf函数的打印格式格式化日志信息,打印参数如下: 83 | #%m 输出代码中指定的消息 84 | #%p 输出优先级,即DEBUG,INFO,WARN,ERROR,FATAL 85 | #%r 输出自应用启动到输出该log信息耗费的毫秒数 86 | #%c 输出所属的类目,通常就是所在类的全名 87 | #%t 输出产生该日志事件的线程名 88 | #%n 输出一个回车换行符,Windows平台为“\r\n”,Unix平台为“\n” 89 | #%d 输出日志时间点的日期或时间,默认格式为ISO8601,也可以在其后指定格式 90 | # 如:%d{yyyy年MM月dd日HH:mm:ss,SSS},输出类似:2012年01月05日 22:10:28,921 91 | #%l 输出日志事件的发生位置,包括类目名、发生的线程,以及在代码中的行数 92 | # 如:Testlog.main(TestLog.java:10) 93 | #%F 输出日志消息产生时所在的文件名称 94 | #%L 输出代码中的行号 95 | #%x 输出和当前线程相关联的NDC(嵌套诊断环境),像javaservlets多客户多线程的应用中 96 | #%% 输出一个"%"字符 97 | # 98 | # 可以在%与模式字符之间加上修饰符来控制其最小宽度、最大宽度、和文本的对齐方式。如: 99 | # %5c: 输出category名称,最小宽度是5,category<5,默认的情况下右对齐 100 | # %-5c:输出category名称,最小宽度是5,category<5,"-"号指定左对齐,会有空格 101 | # %.5c:输出category名称,最大宽度是5,category>5,就会将左边多出的字符截掉,<5不会有空格 102 | # %20.30c:category名称<20补空格,并且右对齐,>30字符,就从左边交远销出的字符截掉 103 | ################################################################################ 104 | ################################################################################ 105 | #④指定特定包的输出特定的级别 106 | #log4j.logger.org.springframework=DEBUG 107 | ################################################################################ 108 | 109 | #OFF,systemOut,logFile,logDailyFile,logRollingFile,logMail,logDB,ALL 110 | 111 | #log4j.rootLogger=INFO,systemOut 112 | 113 | #输出到控制台 114 | #log4j.appender.systemOut= org.apache.log4j.ConsoleAppender 115 | #log4j.appender.systemOut.layout= org.apache.log4j.PatternLayout 116 | #log4j.appender.systemOut.layout.ConversionPattern= [%-5p][%-20d{yyyy/MM/dd HH:mm:ss}][%c] %m%n 117 | #log4j.appender.systemOut.Threshold= INFO 118 | #log4j.appender.systemOut.ImmediateFlush= TRUE 119 | #log4j.appender.systemOut.Target= System.out 120 | 121 | 122 | # Root logger option 123 | log4j.rootLogger=INFO,stdout 124 | # Direct log messages to stdout 125 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 126 | log4j.appender.stdout.Target=System.out 127 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 128 | log4j.appender.stdout.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss}] %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /cockroach-annotation/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | #①配置根Logger,其语法为: 3 | # 4 | #log4j.rootLogger =[level],appenderName,appenderName2,... 5 | #level是日志记录的优先级,分为OFF,TRACE,DEBUG,INFO,WARN,ERROR,FATAL,ALL 6 | ##Log4j建议只使用四个级别,优先级从低到高分别是DEBUG,INFO,WARN,ERROR 7 | #通过在这里定义的级别,您可以控制到应用程序中相应级别的日志信息的开关 8 | #比如在这里定义了INFO级别,则应用程序中所有DEBUG级别的日志信息将不被打印出来 9 | #appenderName就是指定日志信息输出到哪个地方。可同时指定多个输出目的 10 | ################################################################################ 11 | ################################################################################ 12 | #②配置日志信息输出目的地Appender,其语法为: 13 | # 14 | #log4j.appender.appenderName =fully.qualified.name.of.appender.class 15 | #log4j.appender.appenderName.optionN =valueN 16 | # 17 | #Log4j提供的appender有以下几种: 18 | #1)org.apache.log4j.ConsoleAppender(输出到控制台) 19 | #2)org.apache.log4j.FileAppender(输出到文件) 20 | #3)org.apache.log4j.DailyRollingFileAppender(每天产生一个日志文件) 21 | #4)org.apache.log4j.RollingFileAppender(文件大小到达指定尺寸的时候产生一个新的文件) 22 | #5)org.apache.log4j.WriterAppender(将日志信息以流格式发送到任意指定的地方) 23 | # 24 | #1)ConsoleAppender选项属性 25 | # -Threshold = DEBUG:指定日志消息的输出最低层次 26 | # -ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 27 | # -Target = System.err:默认值System.out,输出到控制台(err为红色,out为黑色) 28 | # 29 | #2)FileAppender选项属性 30 | # -Threshold = INFO:指定日志消息的输出最低层次 31 | # -ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 32 | # -File = C:\log4j.log:指定消息输出到C:\log4j.log文件 33 | # -Append = FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 34 | # -Encoding = UTF-8:可以指定文件编码格式 35 | # 36 | #3)DailyRollingFileAppender选项属性 37 | #-Threshold = WARN:指定日志消息的输出最低层次 38 | #-ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 39 | # -File =C:\log4j.log:指定消息输出到C:\log4j.log文件 40 | # -Append= FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 41 | #-DatePattern='.'yyyy-ww:每周滚动一次文件,即每周产生一个新的文件。还可以按用以下参数: 42 | # '.'yyyy-MM:每月 43 | # '.'yyyy-ww:每周 44 | # '.'yyyy-MM-dd:每天 45 | # '.'yyyy-MM-dd-a:每天两次 46 | # '.'yyyy-MM-dd-HH:每小时 47 | # '.'yyyy-MM-dd-HH-mm:每分钟 48 | #-Encoding = UTF-8:可以指定文件编码格式 49 | # 50 | #4)RollingFileAppender选项属性 51 | #-Threshold = ERROR:指定日志消息的输出最低层次 52 | #-ImmediateFlush = TRUE:默认值是true,所有的消息都会被立即输出 53 | # -File =C:/log4j.log:指定消息输出到C:/log4j.log文件 54 | # -Append= FALSE:默认值true,将消息追加到指定文件中,false指将消息覆盖指定的文件内容 55 | #-MaxFileSize = 100KB:后缀可以是KB,MB,GB.在日志文件到达该大小时,将会自动滚动.如:log4j.log.1 56 | #-MaxBackupIndex = 2:指定可以产生的滚动文件的最大数 57 | #-Encoding = UTF-8:可以指定文件编码格式 58 | ################################################################################ 59 | ################################################################################ 60 | #③配置日志信息的格式(布局),其语法为: 61 | # 62 | #log4j.appender.appenderName.layout=fully.qualified.name.of.layout.class 63 | #log4j.appender.appenderName.layout.optionN= valueN 64 | # 65 | #Log4j提供的layout有以下几种: 66 | #5)org.apache.log4j.HTMLLayout(以HTML表格形式布局) 67 | #6)org.apache.log4j.PatternLayout(可以灵活地指定布局模式) 68 | #7)org.apache.log4j.SimpleLayout(包含日志信息的级别和信息字符串) 69 | #8)org.apache.log4j.TTCCLayout(包含日志产生的时间、线程、类别等等信息) 70 | #9)org.apache.log4j.xml.XMLLayout(以XML形式布局) 71 | # 72 | #5)HTMLLayout选项属性 73 | #-LocationInfo = TRUE:默认值false,输出java文件名称和行号 74 | #-Title=Struts Log Message:默认值 Log4JLog Messages 75 | # 76 | #6)PatternLayout选项属性 77 | #-ConversionPattern = %m%n:格式化指定的消息(参数意思下面有) 78 | # 79 | #9)XMLLayout选项属性 80 | #-LocationInfo = TRUE:默认值false,输出java文件名称和行号 81 | # 82 | #Log4J采用类似C语言中的printf函数的打印格式格式化日志信息,打印参数如下: 83 | #%m 输出代码中指定的消息 84 | #%p 输出优先级,即DEBUG,INFO,WARN,ERROR,FATAL 85 | #%r 输出自应用启动到输出该log信息耗费的毫秒数 86 | #%c 输出所属的类目,通常就是所在类的全名 87 | #%t 输出产生该日志事件的线程名 88 | #%n 输出一个回车换行符,Windows平台为“\r\n”,Unix平台为“\n” 89 | #%d 输出日志时间点的日期或时间,默认格式为ISO8601,也可以在其后指定格式 90 | # 如:%d{yyyy年MM月dd日HH:mm:ss,SSS},输出类似:2012年01月05日 22:10:28,921 91 | #%l 输出日志事件的发生位置,包括类目名、发生的线程,以及在代码中的行数 92 | # 如:Testlog.main(TestLog.java:10) 93 | #%F 输出日志消息产生时所在的文件名称 94 | #%L 输出代码中的行号 95 | #%x 输出和当前线程相关联的NDC(嵌套诊断环境),像javaservlets多客户多线程的应用中 96 | #%% 输出一个"%"字符 97 | # 98 | # 可以在%与模式字符之间加上修饰符来控制其最小宽度、最大宽度、和文本的对齐方式。如: 99 | # %5c: 输出category名称,最小宽度是5,category<5,默认的情况下右对齐 100 | # %-5c:输出category名称,最小宽度是5,category<5,"-"号指定左对齐,会有空格 101 | # %.5c:输出category名称,最大宽度是5,category>5,就会将左边多出的字符截掉,<5不会有空格 102 | # %20.30c:category名称<20补空格,并且右对齐,>30字符,就从左边交远销出的字符截掉 103 | ################################################################################ 104 | ################################################################################ 105 | #④指定特定包的输出特定的级别 106 | #log4j.logger.org.springframework=DEBUG 107 | ################################################################################ 108 | 109 | #OFF,systemOut,logFile,logDailyFile,logRollingFile,logMail,logDB,ALL 110 | 111 | #log4j.rootLogger=INFO,systemOut 112 | 113 | #输出到控制台 114 | #log4j.appender.systemOut= org.apache.log4j.ConsoleAppender 115 | #log4j.appender.systemOut.layout= org.apache.log4j.PatternLayout 116 | #log4j.appender.systemOut.layout.ConversionPattern= [%-5p][%-20d{yyyy/MM/dd HH:mm:ss}][%c] %m%n 117 | #log4j.appender.systemOut.Threshold= INFO 118 | #log4j.appender.systemOut.ImmediateFlush= TRUE 119 | #log4j.appender.systemOut.Target= System.out 120 | 121 | 122 | # Root logger option 123 | log4j.rootLogger=INFO,stdout 124 | # Direct log messages to stdout 125 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 126 | log4j.appender.stdout.Target=System.out 127 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 128 | log4j.appender.stdout.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss}] %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/ExecuterManager.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.MapGenerator; 4 | import com.zhangyingwei.cockroach.common.generators.StringGenerator; 5 | import com.zhangyingwei.cockroach.config.CockroachConfig; 6 | import com.zhangyingwei.cockroach.executer.listener.IExecutersListener; 7 | import com.zhangyingwei.cockroach.executer.response.filter.ITaskResponseFilter; 8 | import com.zhangyingwei.cockroach.executer.response.filter.TaskResponseFilterBox; 9 | import com.zhangyingwei.cockroach.executer.task.TaskExecuter; 10 | import com.zhangyingwei.cockroach.http.HttpProxy; 11 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 12 | import com.zhangyingwei.cockroach.http.client.HttpClientProxy; 13 | import com.zhangyingwei.cockroach.http.handler.TaskErrorHandlerBox; 14 | import com.zhangyingwei.cockroach.queue.CockroachQueue; 15 | import org.apache.log4j.Logger; 16 | 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.Set; 20 | import java.util.concurrent.ExecutorService; 21 | import java.util.concurrent.Executors; 22 | import java.util.concurrent.TimeUnit; 23 | 24 | /** 25 | * @author: zhangyw 26 | * @date: 2018/1/29 27 | * @time: 下午2:14 28 | * @desc: 29 | */ 30 | public class ExecuterManager { 31 | private int thread; 32 | private HttpProxy proxy = null; 33 | private ExecutorService service = Executors.newCachedThreadPool(); 34 | private CockroachConfig config; 35 | private Logger logger = Logger.getLogger(ExecuterManager.class); 36 | private List executerListeners; 37 | private List executerList = new ArrayList(); 38 | 39 | public ExecuterManager(CockroachConfig config) { 40 | this.config = config; 41 | executerListeners = new ArrayList(); 42 | } 43 | 44 | /** 45 | * 发车 46 | * @param queue 47 | * @throws Exception 48 | */ 49 | public void start(CockroachQueue queue) throws Exception { 50 | this.thread = config.getThread(); 51 | TaskResponseFilterBox filterBox = this.bulidResponseFilters(); 52 | this.executerListeners.forEach(IExecutersListener::onStart); 53 | for (int i = 0; i < thread; i++) { 54 | TaskExecuter executer = new TaskExecuter( 55 | queue, 56 | this.bulidHttpClient(), 57 | this.config.getStore().newInstance(), 58 | new TaskErrorHandlerBox().add(this.config.getTaskErrorHandler().newInstance()), 59 | this.config.getThreadSleep(), 60 | this.config.isAutoClose(), 61 | filterBox 62 | ); 63 | logger.info("new thread:" + executer.getId()); 64 | service.execute(executer); 65 | executerList.add(executer); 66 | } 67 | /** 68 | * 不可以再继续 提交新的任务 已经提交的任务不影响 69 | */ 70 | service.shutdown(); 71 | 72 | /** 73 | * 每 5 秒检测是否全部线程执行完毕 74 | */ 75 | new Thread(() -> { 76 | try { 77 | while (true) { 78 | TimeUnit.SECONDS.sleep(5); 79 | if (service.isTerminated()) { 80 | logger.info("任务已经全部执行完毕"); 81 | this.executerListeners.forEach(IExecutersListener::onEnd); 82 | break; 83 | } 84 | } 85 | } catch (InterruptedException e) { 86 | e.printStackTrace(); 87 | } 88 | }).start(); 89 | } 90 | 91 | /** 92 | * 停车,我要下车 93 | */ 94 | public void stop() { 95 | this.executerList.forEach(executer -> { 96 | executer.stop(); 97 | }); 98 | } 99 | 100 | /** 101 | * bind executer listener 102 | * @param listener 103 | * @return 104 | */ 105 | public ExecuterManager bindListener(Class listener) throws IllegalAccessException, InstantiationException { 106 | if (listener != null) { 107 | this.executerListeners.add(listener.newInstance()); 108 | } 109 | return this; 110 | } 111 | 112 | private TaskResponseFilterBox bulidResponseFilters() throws IllegalAccessException, InstantiationException { 113 | logger.info("bulid response filters"); 114 | TaskResponseFilterBox filterBox = new TaskResponseFilterBox(); 115 | Set> filters = this.config.getResponseFilters(); 116 | for (Class filter : filters) { 117 | filterBox.add(filter.newInstance()); 118 | } 119 | return filterBox; 120 | } 121 | 122 | private HttpClientProxy bulidHttpClient() throws Exception { 123 | logger.info("bulid httpclient"); 124 | if(this.config.getProxys() != null && this.proxy ==null){ 125 | this.proxy = new HttpProxy(this.config.getProxys()); 126 | } 127 | IHttpClient client = this.config.getHttpClient().newInstance(); 128 | 129 | StringGenerator cookieGenerator = null; 130 | if (this.config.getCookieGenerator() != null) { 131 | cookieGenerator = (StringGenerator) this.config.getCookieGenerator().newInstance(); 132 | } 133 | 134 | MapGenerator headerGenerator = null; 135 | if (this.config.getHeaderGenerator() != null) { 136 | headerGenerator = (MapGenerator) this.config.getHeaderGenerator().newInstance(); 137 | } 138 | 139 | return new HttpClientProxy(client) 140 | .setProxy(this.proxy) 141 | .setCookieGenerator(cookieGenerator) 142 | .setHeaderGenerator(headerGenerator) 143 | .setCookie(this.config.getCookie()) 144 | .setHttpHeader(this.config.getHttpHeader()); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/common/utils/FileUtils.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.common.utils; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.NameGenerator; 4 | import com.zhangyingwei.cockroach.executer.response.TaskResponse; 5 | import org.apache.log4j.Logger; 6 | import java.io.*; 7 | import java.nio.file.Path; 8 | import java.nio.file.Paths; 9 | import java.util.*; 10 | import java.util.stream.Collectors; 11 | 12 | /** 13 | * Created by zhangyw on 2017/10/18/018. 14 | * 操作文件的工具类 15 | */ 16 | public class FileUtils { 17 | 18 | private static Logger logger = Logger.getLogger(FileUtils.class); 19 | private static Map writerCache = new HashMap(); 20 | 21 | /** 22 | * save bytes into file 23 | * @param bytes 24 | * @param filePath 25 | * @param fileName 26 | * @throws IOException 27 | */ 28 | public static void save(byte[] bytes,String filePath,String fileName) throws IOException { 29 | Path path = Paths.get(filePath, fileName); 30 | mkirDirs(path.getParent()); 31 | String pathStr = path.toString(); 32 | File file = new File(pathStr); 33 | write(bytes,file); 34 | } 35 | 36 | /** 37 | * if dir is not exists,make it 38 | * @param parent 39 | */ 40 | private static void mkirDirs(Path parent) { 41 | if(!parent.toFile().exists()){ 42 | parent.toFile().mkdirs(); 43 | } 44 | } 45 | 46 | /** 47 | * wtire bytes into file 48 | * @param bytes 49 | * @param file 50 | * @throws IOException 51 | */ 52 | public static void write(byte[] bytes,File file) throws IOException { 53 | BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(file)); 54 | outputStream.write(bytes); 55 | outputStream.close(); 56 | } 57 | 58 | /** 59 | * 获取文件名称 60 | * @param response 61 | * @return 62 | */ 63 | public static String getFileName(TaskResponse response){ 64 | List name = response.header("content-disposition"); 65 | System.out.println(name); 66 | if (null != name) { 67 | return Arrays.stream(name.get(0).split("; ")).filter(item -> { 68 | return item.trim().startsWith("filename=\""); 69 | }).map(filename -> { 70 | return filename.replaceAll("filename=","").replaceAll("\"",""); 71 | }).limit(1).collect(Collectors.toList()).get(0); 72 | } 73 | return null; 74 | } 75 | 76 | /** 77 | * 获取文件名称,如果不存在,使用 UUID 78 | * @param response 79 | * @return 80 | */ 81 | public static String getFileNameOrUuid(TaskResponse response) { 82 | String name = getFileName(response); 83 | if (null == name) { 84 | name = UUID.randomUUID().toString(); 85 | } 86 | return name; 87 | } 88 | 89 | /** 90 | * 获取文件名称,如果获取不到,使用自定义接口生成一个名字 91 | * @param response 92 | * @param nameGenerator 93 | * @return 94 | */ 95 | public static String getFileNameOr(TaskResponse response, NameGenerator nameGenerator) { 96 | String name = getFileName(response); 97 | if (null == name) { 98 | name = nameGenerator.name(); 99 | } 100 | return name; 101 | } 102 | 103 | /** 104 | * 打开或者创建 105 | * 如果存在就打开,如果不存在就创建 106 | * @param path 107 | * @param fileName 108 | * @return 109 | * @throws IOException 110 | */ 111 | public static File openOrCreate(String path,String fileName) throws IOException { 112 | Path filePath = Paths.get(path, fileName); 113 | File file = filePath.toFile(); 114 | if (file.exists()) { 115 | if (file.isFile()) { 116 | return file; 117 | } 118 | throw new FileNotFoundException(path+" is discroty"); 119 | }else { 120 | mkirDirs(filePath.getParent()); 121 | file.createNewFile(); 122 | return file; 123 | } 124 | } 125 | 126 | /** 127 | * 文件中追加内容 128 | * @param file 129 | * @param content 130 | * @throws IOException 131 | */ 132 | public synchronized static void append(File file,String content) throws IOException { 133 | Writer writer = writerCache.getOrDefault(file.getPath(), new FileWriter(file,true)); 134 | writer.write(content); 135 | writer.flush(); 136 | writerCache.put(file.getPath(), writer); 137 | } 138 | 139 | /** 140 | * 关闭所有 writer 141 | */ 142 | public synchronized static void closeWriters() { 143 | writerCache.values().forEach(writer -> { 144 | try { 145 | writer.close(); 146 | } catch (IOException e) { 147 | e.printStackTrace(); 148 | } 149 | }); 150 | writerCache.clear(); 151 | } 152 | 153 | /** 154 | * 关闭相应 file 的 writer 155 | * @param filePath 156 | */ 157 | public static void closeWriter(String filePath) { 158 | Writer writer = writerCache.get(filePath); 159 | if (writer != null) { 160 | try { 161 | writer.close(); 162 | } catch (IOException e) { 163 | e.printStackTrace(); 164 | } 165 | } 166 | writerCache.remove(writer); 167 | } 168 | 169 | /** 170 | * 清空文件内容 171 | * @param file 172 | * @throws IOException 173 | */ 174 | public static void clearFile(File file) throws IOException { 175 | write(new byte[0],file); 176 | } 177 | 178 | /** 179 | * delete file 180 | * @param file 181 | */ 182 | public static boolean delete(File file) { 183 | boolean result = false; 184 | if (file.exists()) { 185 | result = file.delete(); 186 | if (!result) { 187 | logger.info("delete " + file.getName() + " error"); 188 | } 189 | } 190 | return result; 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/executer/task/Task.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.executer.task; 2 | 3 | 4 | import com.zhangyingwei.cockroach.config.Constants; 5 | import com.zhangyingwei.cockroach.common.utils.NameUtils; 6 | import org.apache.commons.lang.StringUtils; 7 | import org.apache.log4j.Logger; 8 | 9 | import java.util.*; 10 | import java.util.stream.Collectors; 11 | 12 | /** 13 | * Created by zhangyw on 2017/8/10. 14 | * 爬虫任务描述类 15 | */ 16 | public class Task implements Comparable { 17 | private Logger logger = Logger.getLogger(Task.class); 18 | //每一个任务都会生成一个编号,编号是一个递增的连续序列 19 | private String id = NameUtils.name(Task.class); 20 | //每一个任务都会有一个分组,如果没有设置,默认为 default 21 | private String group = Constants.APP_TASK_GROUP_DEFAULT; 22 | private String url; 23 | private Map params = new HashMap();; 24 | private List selects; 25 | private Object extr; 26 | private Integer retry = Constants.DEFAULT_TASK_RETRY; 27 | private Integer deep = Constants.DEFAULT_TASK_DEEP; 28 | 29 | public Task(String url, Map params) { 30 | this.url = url; 31 | this.params = params; 32 | } 33 | 34 | public Task(String url) { 35 | this.url = url; 36 | } 37 | 38 | public Task(String url, String group) { 39 | this.url = url; 40 | this.group = group; 41 | } 42 | 43 | public Task(String url, String group, Map params) { 44 | this.group = group; 45 | this.url = url; 46 | this.params = params; 47 | } 48 | 49 | public Task() { 50 | } 51 | 52 | public String getGroup() { 53 | return group; 54 | } 55 | 56 | public void setGroup(String group) { 57 | this.group = group; 58 | } 59 | 60 | public String getId() { 61 | return id; 62 | } 63 | 64 | public void setId(String id) { 65 | this.id = id; 66 | } 67 | 68 | public String getUrl() { 69 | return this.url; 70 | } 71 | 72 | public void setUrl(String url) { 73 | this.url = url; 74 | } 75 | 76 | public Map getParams() { 77 | return params; 78 | } 79 | 80 | public void setParams(Map params) { 81 | this.params = params; 82 | } 83 | 84 | public List getSelects() { 85 | return selects; 86 | } 87 | 88 | public Task addSelect(String cssSelect) { 89 | this.selects = Optional.ofNullable(this.selects).orElse(new ArrayList()); 90 | this.selects.add(cssSelect); 91 | return this; 92 | } 93 | 94 | public T getExtr() { 95 | return (T) extr; 96 | } 97 | 98 | public void setExtr(Object extr) { 99 | this.extr = extr; 100 | } 101 | 102 | @Override 103 | public String toString() { 104 | return "Task{" + 105 | "id='" + id + '\'' + 106 | ", group='" + group + '\'' + 107 | ", url='" + url + '\'' + 108 | ", params=" + params + 109 | ", selects=" + selects + 110 | ", extr=" + extr + 111 | ", retry=" + retry + 112 | ", deep=" + deep + 113 | '}'; 114 | } 115 | 116 | @Override 117 | public boolean equals(Object o) { 118 | if (this == o) return true; 119 | if (!(o instanceof Task)) return false; 120 | 121 | Task task = (Task) o; 122 | 123 | if (getId() != null ? !getId().equals(task.getId()) : task.getId() != null) return false; 124 | if (getGroup() != null ? !getGroup().equals(task.getGroup()) : task.getGroup() != null) return false; 125 | if (getUrl() != null ? !getUrl().equals(task.getUrl()) : task.getUrl() != null) return false; 126 | if (getParams() != null ? !getParams().equals(task.getParams()) : task.getParams() != null) return false; 127 | if (getSelects() != null ? !getSelects().equals(task.getSelects()) : task.getSelects() != null) return false; 128 | if (getExtr() != null ? !getExtr().equals(task.getExtr()) : task.getExtr() != null) return false; 129 | return getRetry() != null ? getRetry().equals(task.getRetry()) : task.getRetry() == null; 130 | } 131 | 132 | @Override 133 | public int hashCode() { 134 | int result = getId() != null ? getId().hashCode() : 0; 135 | result = 31 * result + (getGroup() != null ? getGroup().hashCode() : 0); 136 | result = 31 * result + (getUrl() != null ? getUrl().hashCode() : 0); 137 | result = 31 * result + (getParams() != null ? getParams().hashCode() : 0); 138 | result = 31 * result + (getSelects() != null ? getSelects().hashCode() : 0); 139 | result = 31 * result + (getExtr() != null ? getExtr().hashCode() : 0); 140 | result = 31 * result + (getRetry() != null ? getRetry().hashCode() : 0); 141 | return result; 142 | } 143 | 144 | public Integer getRetry() { 145 | return retry; 146 | } 147 | 148 | public Task retry() { 149 | this.retry -= 1; 150 | return this; 151 | } 152 | 153 | public Task retry(Integer retry) { 154 | this.retry = retry; 155 | return this; 156 | } 157 | 158 | public Integer getDeep() { 159 | return deep; 160 | } 161 | 162 | public Task addDeep(int deep) { 163 | if (deep > 0) { 164 | this.deep += deep; 165 | } else { 166 | logger.info("deep is not valid: " + deep); 167 | } 168 | return this; 169 | } 170 | 171 | public Task nextDeepBy(Task task) { 172 | this.deep = task.getDeep() + 1; 173 | return this; 174 | } 175 | 176 | @Override 177 | public int compareTo(Task task) { 178 | return task.getDeep() - this.getDeep(); 179 | } 180 | 181 | public void setSelects(List selects) { 182 | this.selects = selects; 183 | } 184 | 185 | public void setRetry(Integer retry) { 186 | this.retry = retry; 187 | } 188 | 189 | public void setDeep(Integer deep) { 190 | this.deep = deep; 191 | } 192 | 193 | public String realUrl() { 194 | if (this.getParams().isEmpty()) { 195 | return url; 196 | } else { 197 | List paramsList = this.getParams().entrySet().stream().map(entity -> String.format("%s=%s", entity.getKey(), entity.getValue())).collect(Collectors.toList()); 198 | String param = StringUtils.join(paramsList.toArray(), "&"); 199 | return String.format("%s?%s", url, param); 200 | } 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /cockroach-core/src/main/java/com/zhangyingwei/cockroach/config/CockroachConfig.java: -------------------------------------------------------------------------------- 1 | package com.zhangyingwei.cockroach.config; 2 | 3 | import com.zhangyingwei.cockroach.common.generators.MapGenerator; 4 | import com.zhangyingwei.cockroach.common.generators.StringGenerator; 5 | import com.zhangyingwei.cockroach.executer.listener.IExecutersListener; 6 | import com.zhangyingwei.cockroach.executer.response.filter.ITaskResponseFilter; 7 | import com.zhangyingwei.cockroach.http.client.IHttpClient; 8 | import com.zhangyingwei.cockroach.http.handler.DefaultTaskErrorHandler; 9 | import com.zhangyingwei.cockroach.http.handler.ITaskErrorHandler; 10 | import com.zhangyingwei.cockroach.store.IStore; 11 | import com.zhangyingwei.cockroach.common.utils.CockroachUtils; 12 | import org.apache.log4j.ConsoleAppender; 13 | import org.apache.log4j.Level; 14 | import org.apache.log4j.Logger; 15 | import org.apache.log4j.PatternLayout; 16 | 17 | import java.util.*; 18 | 19 | /** 20 | * Created by zhangyw on 2017/8/10. 21 | * cockroach 爬虫 配置类,主要配置有 22 | * 应用名 http客户端 结果处理类 代理 线程数 任务处理完毕线程操作(等待/结束) cookie httpheader 任务失败处理逻辑 23 | */ 24 | public class CockroachConfig { 25 | private Logger logger = Logger.getLogger(CockroachConfig.class); 26 | private String appName; 27 | private String proxys = null; 28 | private int thread = 1; 29 | private int threadSleep = 0; 30 | private Class httpClient = Constants.HTTP_CLIENT; 31 | private Boolean showHttpClientProgress = Constants.HTTP_SHOWHTTPCLIENTPROGRESS; 32 | private Class store = Constants.STORE; 33 | private String cookie; 34 | private Class cookieGenerator = Constants.COOKIDGENERATOR; 35 | private Class headerGenerator = Constants.HEADERGENERATOR; 36 | private Map httpHeader; 37 | private boolean autoClose = false; 38 | private Class taskErrorHandler; 39 | private Set> responseFilters = new HashSet>(); 40 | private Class executersListener = Constants.DEFAULT_EXECUTERSLISTENER; 41 | 42 | /** 43 | * 如果找不到 log4j 的配置,就使用默认配置 44 | */ 45 | static { 46 | Logger elogger = Logger.getLogger(CockroachConfig.class); 47 | if(!elogger.getParent().getAllAppenders().hasMoreElements()){ 48 | Logger logger = Logger.getRootLogger(); 49 | System.err.println("log4j.properties is not found , use default log4j config"); 50 | logger.setLevel(Level.DEBUG); 51 | logger.addAppender(new ConsoleAppender(new PatternLayout("[%-5p][%-20d{yyyy/MM/dd HH:mm:ss}][%c] %m%n"))); 52 | } 53 | } 54 | 55 | public String getProxys() { 56 | return proxys; 57 | } 58 | 59 | public CockroachConfig setProxys(String proxys) { 60 | CockroachUtils.addSystemPropertie(Constants.APP_PROXY_KEY,proxys); 61 | this.proxys = proxys; 62 | return this; 63 | } 64 | 65 | public boolean isAutoClose() { 66 | return autoClose; 67 | } 68 | 69 | public CockroachConfig setAutoClose(boolean autoClose) { 70 | CockroachUtils.addSystemPropertie(Constants.APP_AUTOCLOSE_KEY,autoClose); 71 | this.autoClose = autoClose; 72 | return this; 73 | } 74 | 75 | public String getAppName() { 76 | return appName; 77 | } 78 | 79 | public CockroachConfig setAppName(String appName) { 80 | CockroachUtils.addSystemPropertie(Constants.APP_NAME_KEY,appName); 81 | this.appName = appName; 82 | return this; 83 | } 84 | 85 | public int getThread() { 86 | return thread; 87 | } 88 | 89 | public CockroachConfig setThread(int thread) { 90 | CockroachUtils.addSystemPropertie(Constants.APP_THREAD_KEY,thread); 91 | this.thread = thread; 92 | return this; 93 | } 94 | 95 | public CockroachConfig setThread(int thread, int sleep) { 96 | CockroachUtils.addSystemPropertie(Constants.APP_THREAD_KEY,thread); 97 | CockroachUtils.addSystemPropertie(Constants.APP_THREAD_SLEEP_KEY,sleep); 98 | this.thread = thread; 99 | this.threadSleep = sleep; 100 | return this; 101 | } 102 | 103 | public int getThreadSleep() { 104 | return threadSleep; 105 | } 106 | 107 | public Class getHttpClient() { 108 | return httpClient; 109 | } 110 | 111 | @SuppressWarnings("not supported") 112 | public CockroachConfig setHttpClient(Class httpClient) { 113 | CockroachUtils.addSystemPropertie(Constants.APP_HTTPCLIENT_KEY,httpClient); 114 | this.httpClient = httpClient; 115 | return this; 116 | } 117 | 118 | public Class getStore() { 119 | return store; 120 | } 121 | 122 | public CockroachConfig setStore(Class store) { 123 | CockroachUtils.addSystemPropertie(Constants.APP_STORE_KEY,store); 124 | this.store = store; 125 | return this; 126 | } 127 | 128 | public CockroachConfig setCookie(String cookie) { 129 | CockroachUtils.addSystemPropertie(Constants.APP_COOKIE_KEY,cookie); 130 | this.cookie = cookie; 131 | this.addHttpHeader("Cookie", cookie); 132 | return this; 133 | } 134 | 135 | public String getCookie() { 136 | return cookie; 137 | } 138 | 139 | public CockroachConfig addHttpHeader(String key, String value) { 140 | if(this.httpHeader == null){ 141 | this.httpHeader = new HashMap(); 142 | } 143 | this.httpHeader.put(key, value); 144 | return this; 145 | } 146 | 147 | public Map getHttpHeader() { 148 | return httpHeader; 149 | } 150 | 151 | public Class getTaskErrorHandler() { 152 | return Optional.ofNullable(taskErrorHandler).orElse(DefaultTaskErrorHandler.class); 153 | } 154 | 155 | public CockroachConfig setTaskErrorHandler(Class taskErrorHandler) { 156 | CockroachUtils.addSystemPropertie(Constants.APP_TASK_ERROR_KEY,taskErrorHandler); 157 | this.taskErrorHandler = taskErrorHandler; 158 | return this; 159 | } 160 | 161 | public CockroachConfig setShowHttpClientProgress(Boolean showHttpClientProgress) { 162 | this.showHttpClientProgress = showHttpClientProgress; 163 | return this; 164 | } 165 | 166 | public Boolean getShowHttpClientProgress() { 167 | return showHttpClientProgress; 168 | } 169 | 170 | public Class getCookieGenerator() { 171 | return cookieGenerator; 172 | } 173 | 174 | public CockroachConfig setCookieGenerator(Class cookieGenerator) { 175 | this.cookieGenerator = (Class) cookieGenerator; 176 | return this; 177 | } 178 | 179 | public Class getHeaderGenerator() { 180 | return headerGenerator; 181 | } 182 | 183 | public CockroachConfig setHeaderGenerator(Class headerGenerator) { 184 | this.headerGenerator = headerGenerator; 185 | return this; 186 | } 187 | 188 | public Set> getResponseFilters() { 189 | return responseFilters; 190 | } 191 | 192 | public CockroachConfig setResponseFilters(Set> responseFilters) { 193 | this.responseFilters = responseFilters; 194 | return this; 195 | } 196 | 197 | public CockroachConfig addResponseFilters(Class responseFilter) { 198 | this.responseFilters.add(responseFilter); 199 | return this; 200 | } 201 | 202 | public void print() { 203 | logger.info("---------------------------config--------------------------"); 204 | logger.info("AppName: "+this.getAppName()); 205 | logger.info("Proxys: "+this.getProxys()); 206 | logger.info("Threads: "+this.getThread()); 207 | logger.info("ThreadSleep: "+this.getThreadSleep()); 208 | logger.info("IHttpClient: "+this.getHttpClient()); 209 | logger.info("HttpClientProgress: "+this.getShowHttpClientProgress()); 210 | logger.info("Store: "+this.getStore()); 211 | logger.info("Cookie: "+this.getCookie()); 212 | logger.info("CookieGenerator: "+this.getCookieGenerator()); 213 | logger.info("HttpHeaders: "+this.getHttpHeader()); 214 | logger.info("HttpHeadersGenerator: "+this.getHeaderGenerator()); 215 | logger.info("AutoClose: "+this.autoClose); 216 | logger.info("TaskErrorHandler: "+this.getTaskErrorHandler()); 217 | logger.info("ResponseFilters: "+this.getResponseFilters()); 218 | logger.info("-------------------------------------------------------------"); 219 | } 220 | 221 | public Class getExecutersListener() { 222 | return this.executersListener; 223 | } 224 | 225 | public void setExecutersListener(Class executersListener) { 226 | this.executersListener = executersListener; 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. --------------------------------------------------------------------------------