├── .scalafix.conf ├── .scalafmt.conf ├── .sonarcloud.properties ├── .travis.yml ├── README.md ├── bin ├── run-example.sh ├── stop.sh └── test.sh ├── crawler-core ├── build.sbt └── src │ ├── main │ ├── resources │ │ ├── log4j2.xml │ │ └── reference.conf │ └── scala │ │ └── io │ │ └── github │ │ └── wtog │ │ └── crawler │ │ ├── actor │ │ └── ActorManager.scala │ │ ├── downloader │ │ ├── AsyncHttpClientDownloader.scala │ │ ├── ChromeHeadlessDownloader.scala │ │ ├── Downloader.scala │ │ ├── DownloaderActorReceiver.scala │ │ └── proxy │ │ │ ├── ProxyCrawlerPipeline.scala │ │ │ ├── ProxyProvider.scala │ │ │ └── crawler │ │ │ ├── A2UPageProcessor.scala │ │ │ ├── Data5UPageProcessor.scala │ │ │ ├── IP89Processor.scala │ │ │ └── ProxyProcessorTrait.scala │ │ ├── dto │ │ ├── Event.scala │ │ ├── Page.scala │ │ └── RequestSetting.scala │ │ ├── exceptions │ │ └── NonNullArgumentsException.scala │ │ ├── pipeline │ │ ├── ConsolePipeline.scala │ │ ├── Pipeline.scala │ │ └── PipelineActorReceiver.scala │ │ ├── processor │ │ ├── PageProcessor.scala │ │ └── PageProcessorActorReceiver.scala │ │ ├── queue │ │ ├── DuplicateRemovedQueue.scala │ │ ├── RequestQueue.scala │ │ ├── TargetRequestTaskQueue.scala │ │ └── duplicate │ │ │ ├── BitSetStrategy.scala │ │ │ ├── DuplicateRemovedStrategy.scala │ │ │ └── HashMapStrategy.scala │ │ ├── rest │ │ ├── NettyServer.scala │ │ ├── Router.scala │ │ └── Server.scala │ │ ├── schedule │ │ └── ScheduleJobs.scala │ │ ├── selector │ │ ├── HtmlParser.scala │ │ └── Selector.scala │ │ └── spider │ │ ├── Spider.scala │ │ └── SpiderPool.scala │ └── test │ ├── resources │ ├── application-test.conf │ └── log4j2-test.xml │ └── scala │ └── io │ └── github │ └── wtog │ └── crawler │ └── test │ ├── BaseCoreTest.scala │ ├── actor │ ├── ActorTestBase.scala │ └── PageProcessorActorTestkit.scala │ ├── download │ ├── AsyncHttpClientTest.scala │ └── ChromeHeadlessDownloaderTest.scala │ ├── processor │ └── HtmlParserSpec.scala │ ├── proxy │ └── ProxyProviderTest.scala │ ├── queue │ └── DuplicateStrategyTest.scala │ ├── schedule │ └── ScheduleTest.scala │ └── server │ └── TestMockServer.scala ├── crawler-example └── src │ └── main │ ├── resources │ ├── log4j2.xml │ └── reference.conf │ └── scala │ └── io │ └── github │ └── wtog │ └── example │ ├── ExampleTrait.scala │ ├── Main.scala │ └── impl │ ├── BaiduPageProcessor.scala │ ├── LianjiaErshouFangProcessor.scala │ ├── LianjiaRentingProcessor.scala │ ├── ZhihuAnswerPageProcessor.scala │ └── flight │ └── QunarPageProcessor.scala ├── crawler-pipeline └── src │ ├── main │ └── scala │ │ └── io │ │ └── github │ │ └── wtog │ │ └── crawler │ │ └── pipeline │ │ ├── db │ │ ├── DataSource.scala │ │ ├── DataSourceInfo.scala │ │ └── PostgreSQLPipeline.scala │ │ └── file │ │ ├── CsvFilePipeline.scala │ │ └── FilePipeline.scala │ └── test │ └── scala │ └── io │ └── github │ └── wtog │ └── crawler │ └── pipeline │ └── test │ ├── BasePipelineTest.scala │ └── DataSourceTest.scala ├── docker ├── Dockerfile └── build.sh ├── project ├── .gnupg │ ├── pubring.gpg │ └── secring.gpg ├── Dependencies.scala ├── Publish.scala ├── build.properties └── plugins.sbt ├── push.sh ├── utils └── src │ ├── main │ └── scala │ │ └── io │ │ └── github │ │ └── wtog │ │ └── utils │ │ ├── ConfigUtils.scala │ │ ├── JsonUtils.scala │ │ ├── ReflectionUtils.scala │ │ ├── RetryUtils.scala │ │ ├── StringUtils.scala │ │ └── logger │ │ └── Logging.scala │ └── test │ └── scala │ └── io │ └── github │ └── wtog │ └── utils │ └── test │ ├── BaseTest.scala │ ├── JsonUtilsTest.scala │ ├── RetryUtilsTest.scala │ ├── jmh │ └── StringUtilsBenchmark.scala │ └── reflection │ └── ReflectionUtilsTest.scala └── version.sbt /.scalafix.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/.scalafix.conf -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/.scalafmt.conf -------------------------------------------------------------------------------- /.sonarcloud.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/.sonarcloud.properties -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/.travis.yml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/README.md -------------------------------------------------------------------------------- /bin/run-example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/bin/run-example.sh -------------------------------------------------------------------------------- /bin/stop.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/bin/stop.sh -------------------------------------------------------------------------------- /bin/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/bin/test.sh -------------------------------------------------------------------------------- /crawler-core/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/build.sbt -------------------------------------------------------------------------------- /crawler-core/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/resources/log4j2.xml -------------------------------------------------------------------------------- /crawler-core/src/main/resources/reference.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/resources/reference.conf -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/actor/ActorManager.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/actor/ActorManager.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/AsyncHttpClientDownloader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/AsyncHttpClientDownloader.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/ChromeHeadlessDownloader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/ChromeHeadlessDownloader.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/Downloader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/Downloader.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/DownloaderActorReceiver.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/DownloaderActorReceiver.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/ProxyCrawlerPipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/ProxyCrawlerPipeline.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/ProxyProvider.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/ProxyProvider.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/A2UPageProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/A2UPageProcessor.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/Data5UPageProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/Data5UPageProcessor.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/IP89Processor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/IP89Processor.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/ProxyProcessorTrait.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/downloader/proxy/crawler/ProxyProcessorTrait.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/dto/Event.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/dto/Event.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/dto/Page.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/dto/Page.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/dto/RequestSetting.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/dto/RequestSetting.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/exceptions/NonNullArgumentsException.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/exceptions/NonNullArgumentsException.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/pipeline/ConsolePipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/pipeline/ConsolePipeline.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/pipeline/Pipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/pipeline/Pipeline.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/pipeline/PipelineActorReceiver.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/pipeline/PipelineActorReceiver.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/processor/PageProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/processor/PageProcessor.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/processor/PageProcessorActorReceiver.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/processor/PageProcessorActorReceiver.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/queue/DuplicateRemovedQueue.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/queue/DuplicateRemovedQueue.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/queue/RequestQueue.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/queue/RequestQueue.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/queue/TargetRequestTaskQueue.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/queue/TargetRequestTaskQueue.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/queue/duplicate/BitSetStrategy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/queue/duplicate/BitSetStrategy.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/queue/duplicate/DuplicateRemovedStrategy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/queue/duplicate/DuplicateRemovedStrategy.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/queue/duplicate/HashMapStrategy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/queue/duplicate/HashMapStrategy.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/rest/NettyServer.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/rest/NettyServer.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/rest/Router.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/rest/Router.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/rest/Server.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/rest/Server.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/schedule/ScheduleJobs.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/schedule/ScheduleJobs.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/selector/HtmlParser.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/selector/HtmlParser.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/selector/Selector.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/selector/Selector.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/spider/Spider.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/spider/Spider.scala -------------------------------------------------------------------------------- /crawler-core/src/main/scala/io/github/wtog/crawler/spider/SpiderPool.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/main/scala/io/github/wtog/crawler/spider/SpiderPool.scala -------------------------------------------------------------------------------- /crawler-core/src/test/resources/application-test.conf: -------------------------------------------------------------------------------- 1 | include "reference.conf" 2 | -------------------------------------------------------------------------------- /crawler-core/src/test/resources/log4j2-test.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/resources/log4j2-test.xml -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/BaseCoreTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/BaseCoreTest.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/actor/ActorTestBase.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/actor/ActorTestBase.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/actor/PageProcessorActorTestkit.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/actor/PageProcessorActorTestkit.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/download/AsyncHttpClientTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/download/AsyncHttpClientTest.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/download/ChromeHeadlessDownloaderTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/download/ChromeHeadlessDownloaderTest.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/processor/HtmlParserSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/processor/HtmlParserSpec.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/proxy/ProxyProviderTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/proxy/ProxyProviderTest.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/queue/DuplicateStrategyTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/queue/DuplicateStrategyTest.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/schedule/ScheduleTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/schedule/ScheduleTest.scala -------------------------------------------------------------------------------- /crawler-core/src/test/scala/io/github/wtog/crawler/test/server/TestMockServer.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-core/src/test/scala/io/github/wtog/crawler/test/server/TestMockServer.scala -------------------------------------------------------------------------------- /crawler-example/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/resources/log4j2.xml -------------------------------------------------------------------------------- /crawler-example/src/main/resources/reference.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/resources/reference.conf -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/ExampleTrait.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/ExampleTrait.scala -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/Main.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/Main.scala -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/impl/BaiduPageProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/impl/BaiduPageProcessor.scala -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/impl/LianjiaErshouFangProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/impl/LianjiaErshouFangProcessor.scala -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/impl/LianjiaRentingProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/impl/LianjiaRentingProcessor.scala -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/impl/ZhihuAnswerPageProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/impl/ZhihuAnswerPageProcessor.scala -------------------------------------------------------------------------------- /crawler-example/src/main/scala/io/github/wtog/example/impl/flight/QunarPageProcessor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-example/src/main/scala/io/github/wtog/example/impl/flight/QunarPageProcessor.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/db/DataSource.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/db/DataSource.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/db/DataSourceInfo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/db/DataSourceInfo.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/db/PostgreSQLPipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/db/PostgreSQLPipeline.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/file/CsvFilePipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/file/CsvFilePipeline.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/file/FilePipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/main/scala/io/github/wtog/crawler/pipeline/file/FilePipeline.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/test/scala/io/github/wtog/crawler/pipeline/test/BasePipelineTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/test/scala/io/github/wtog/crawler/pipeline/test/BasePipelineTest.scala -------------------------------------------------------------------------------- /crawler-pipeline/src/test/scala/io/github/wtog/crawler/pipeline/test/DataSourceTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/crawler-pipeline/src/test/scala/io/github/wtog/crawler/pipeline/test/DataSourceTest.scala -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/docker/build.sh -------------------------------------------------------------------------------- /project/.gnupg/pubring.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/project/.gnupg/pubring.gpg -------------------------------------------------------------------------------- /project/.gnupg/secring.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/project/.gnupg/secring.gpg -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/project/Dependencies.scala -------------------------------------------------------------------------------- /project/Publish.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/project/Publish.scala -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.7.1 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/project/plugins.sbt -------------------------------------------------------------------------------- /push.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/push.sh -------------------------------------------------------------------------------- /utils/src/main/scala/io/github/wtog/utils/ConfigUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/main/scala/io/github/wtog/utils/ConfigUtils.scala -------------------------------------------------------------------------------- /utils/src/main/scala/io/github/wtog/utils/JsonUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/main/scala/io/github/wtog/utils/JsonUtils.scala -------------------------------------------------------------------------------- /utils/src/main/scala/io/github/wtog/utils/ReflectionUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/main/scala/io/github/wtog/utils/ReflectionUtils.scala -------------------------------------------------------------------------------- /utils/src/main/scala/io/github/wtog/utils/RetryUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/main/scala/io/github/wtog/utils/RetryUtils.scala -------------------------------------------------------------------------------- /utils/src/main/scala/io/github/wtog/utils/StringUtils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/main/scala/io/github/wtog/utils/StringUtils.scala -------------------------------------------------------------------------------- /utils/src/main/scala/io/github/wtog/utils/logger/Logging.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/main/scala/io/github/wtog/utils/logger/Logging.scala -------------------------------------------------------------------------------- /utils/src/test/scala/io/github/wtog/utils/test/BaseTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/test/scala/io/github/wtog/utils/test/BaseTest.scala -------------------------------------------------------------------------------- /utils/src/test/scala/io/github/wtog/utils/test/JsonUtilsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/test/scala/io/github/wtog/utils/test/JsonUtilsTest.scala -------------------------------------------------------------------------------- /utils/src/test/scala/io/github/wtog/utils/test/RetryUtilsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/test/scala/io/github/wtog/utils/test/RetryUtilsTest.scala -------------------------------------------------------------------------------- /utils/src/test/scala/io/github/wtog/utils/test/jmh/StringUtilsBenchmark.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/test/scala/io/github/wtog/utils/test/jmh/StringUtilsBenchmark.scala -------------------------------------------------------------------------------- /utils/src/test/scala/io/github/wtog/utils/test/reflection/ReflectionUtilsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtog/web-crawler/HEAD/utils/src/test/scala/io/github/wtog/utils/test/reflection/ReflectionUtilsTest.scala -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "0.1.3-SNAPSHOT" 2 | --------------------------------------------------------------------------------