├── .gitignore ├── README.md ├── conf ├── fetch.xml ├── letv_conf_temp.xml ├── template.xml ├── test_youku_dm.xml ├── wasu_conf.xml ├── youku_conf.xml └── youku_conf_temp.xml ├── pom.xml ├── setpath.bat ├── src ├── main │ ├── java │ │ └── org │ │ │ └── sbs │ │ │ ├── goodcrawler │ │ │ ├── bootstrap │ │ │ │ ├── BootStrap.java │ │ │ │ ├── CrawlerStatus.java │ │ │ │ ├── foreman │ │ │ │ │ ├── ExtractForeman.java │ │ │ │ │ ├── FetchForeman.java │ │ │ │ │ ├── Foreman.java │ │ │ │ │ ├── StoreForeman.java │ │ │ │ │ └── package-info.java │ │ │ │ └── package-info.java │ │ │ ├── conf │ │ │ │ ├── Configurable.java │ │ │ │ ├── Configuration.java │ │ │ │ ├── GlobalConstants.java │ │ │ │ ├── JobConfigurationManager.java │ │ │ │ ├── PropertyConfigurationHelper.java │ │ │ │ ├── Worker.java │ │ │ │ └── package-info.java │ │ │ ├── exception │ │ │ │ ├── ConfigurationException.java │ │ │ │ ├── ExtractException.java │ │ │ │ ├── QueueException.java │ │ │ │ └── package-info.java │ │ │ ├── extractor │ │ │ │ ├── DefaultExtractWorker.java │ │ │ │ ├── DefaultExtractor.java │ │ │ │ ├── ExtractResult.java │ │ │ │ ├── ExtractWorker.java │ │ │ │ ├── Extractor.java │ │ │ │ ├── GCElement.java │ │ │ │ ├── GCPage.java │ │ │ │ ├── htmlelment │ │ │ │ │ ├── AbstractHtmlElement.java │ │ │ │ │ ├── CommonHtmlElement.java │ │ │ │ │ ├── HtmlAnchorElementOfPage.java │ │ │ │ │ ├── HtmlAnchorElementOfString.java │ │ │ │ │ ├── HtmlElementExtractType.java │ │ │ │ │ ├── HtmlElementType.java │ │ │ │ │ ├── HtmlPageElement.java │ │ │ │ │ └── package-info.java │ │ │ │ ├── package-info.java │ │ │ │ ├── selector │ │ │ │ │ ├── AbstractElementCssSelector.java │ │ │ │ │ ├── DateElementCssSelector.java │ │ │ │ │ ├── FileElementCssSelector.java │ │ │ │ │ ├── IFConditions.java │ │ │ │ │ ├── IntegerElementCssSelector.java │ │ │ │ │ ├── ListElementCssSelector.java │ │ │ │ │ ├── NumericaElementCssSelector.java │ │ │ │ │ ├── PageElementSelector.java │ │ │ │ │ ├── SelectPageElement.java │ │ │ │ │ ├── SelectorAttr.java │ │ │ │ │ ├── SelectorType.java │ │ │ │ │ ├── SetElementCssSelector.java │ │ │ │ │ ├── StringElementCssSelector.java │ │ │ │ │ ├── action │ │ │ │ │ │ ├── EmptyAction.java │ │ │ │ │ │ ├── FileSelectAction.java │ │ │ │ │ │ ├── IntegerSelectorAction.java │ │ │ │ │ │ ├── ListSelectorAction.java │ │ │ │ │ │ ├── SelectorAction.java │ │ │ │ │ │ ├── StringSelectorAction.java │ │ │ │ │ │ ├── file │ │ │ │ │ │ │ ├── DownLoadFileAction.java │ │ │ │ │ │ │ ├── DownLoadImageResizeAction.java │ │ │ │ │ │ │ └── FileActionType.java │ │ │ │ │ │ ├── integer │ │ │ │ │ │ │ ├── IntegerAbsAction.java │ │ │ │ │ │ │ ├── IntegerActionType.java │ │ │ │ │ │ │ ├── IntegerBetweenAction.java │ │ │ │ │ │ │ └── package-info.java │ │ │ │ │ │ ├── list │ │ │ │ │ │ │ ├── ListFilterAction.java │ │ │ │ │ │ │ └── package-info.java │ │ │ │ │ │ ├── package-info.java │ │ │ │ │ │ └── string │ │ │ │ │ │ │ ├── ActionFactory.java │ │ │ │ │ │ │ ├── StringActionType.java │ │ │ │ │ │ │ ├── StringAfterAction.java │ │ │ │ │ │ │ ├── StringAfterLastAction.java │ │ │ │ │ │ │ ├── StringBeforeAction.java │ │ │ │ │ │ │ ├── StringBeforeLastAction.java │ │ │ │ │ │ │ ├── StringBetweenAction.java │ │ │ │ │ │ │ ├── StringFilterAction.java │ │ │ │ │ │ │ ├── StringPerfixAction.java │ │ │ │ │ │ │ ├── StringReplaceAction.java │ │ │ │ │ │ │ ├── StringSplitAction.java │ │ │ │ │ │ │ ├── StringSubAction.java │ │ │ │ │ │ │ ├── StringSuffixAction.java │ │ │ │ │ │ │ └── package-info.java │ │ │ │ │ ├── exception │ │ │ │ │ │ ├── DownLoadException.java │ │ │ │ │ │ ├── IntegerBetweenExpressionException.java │ │ │ │ │ │ ├── SelectorConfigException.java │ │ │ │ │ │ └── package-info.java │ │ │ │ │ ├── expression │ │ │ │ │ │ ├── GrExpression.java │ │ │ │ │ │ ├── SimpleExpression.java │ │ │ │ │ │ └── SimpleExpressionExtent.java │ │ │ │ │ ├── factory │ │ │ │ │ │ └── ElementCssSelectorFactory.java │ │ │ │ │ └── package-info.java │ │ │ │ └── template │ │ │ │ │ ├── ExtractTemplate.java │ │ │ │ │ └── package-info.java │ │ │ ├── fetcher │ │ │ │ ├── AjaxCallFetcher.java │ │ │ │ ├── CustomFetchStatus.java │ │ │ │ ├── DefaultFetchWorker.java │ │ │ │ ├── FailedPageBackup.java │ │ │ │ ├── FetchStatus.java │ │ │ │ ├── FetchWorker.java │ │ │ │ ├── Fetcher.java │ │ │ │ ├── FetcherInstance.java │ │ │ │ ├── FetcherType.java │ │ │ │ ├── IdleConnectionMonitorThread.java │ │ │ │ ├── PageFetcher.java │ │ │ │ ├── ResynchronizingAjaxController.java │ │ │ │ └── package-info.java │ │ │ ├── jobconf │ │ │ │ ├── ExtractConfig.java │ │ │ │ ├── FetchConfig.java │ │ │ │ ├── JobConfig.java │ │ │ │ ├── StoreConfig.java │ │ │ │ └── package-info.java │ │ │ ├── page │ │ │ │ ├── BinaryParseData.java │ │ │ │ ├── ExtractedPage.java │ │ │ │ ├── ExtractedUrlAnchorPair.java │ │ │ │ ├── HtmlContentHandler.java │ │ │ │ ├── HtmlParseData.java │ │ │ │ ├── Page.java │ │ │ │ ├── PageFetchResult.java │ │ │ │ ├── ParseData.java │ │ │ │ ├── Parser.java │ │ │ │ ├── TextParseData.java │ │ │ │ └── package-info.java │ │ │ ├── plugin │ │ │ │ ├── EsClient.java │ │ │ │ ├── ExBulk.java │ │ │ │ ├── IndexScanner.java │ │ │ │ ├── ReIndex.java │ │ │ │ ├── classloader │ │ │ │ │ ├── CommonClassLoader.java │ │ │ │ │ └── PluginClassLoader.java │ │ │ │ ├── extract │ │ │ │ │ ├── ExtractYouku.java │ │ │ │ │ ├── Extractor66ys.java │ │ │ │ │ ├── ExtractorDytt8.java │ │ │ │ │ └── package-info.java │ │ │ │ ├── package-info.java │ │ │ │ └── storage │ │ │ │ │ ├── ElasticSearchStorage.java │ │ │ │ │ ├── Movie.java │ │ │ │ │ ├── MovieSource.java │ │ │ │ │ ├── Prepare.java │ │ │ │ │ ├── p │ │ │ │ │ ├── IESStoragePlugin.java │ │ │ │ │ └── WasuEsStorePlugin.java │ │ │ │ │ └── package-info.java │ │ │ ├── schedule │ │ │ │ ├── ReCraw.java │ │ │ │ └── RecrawFetherWorkor.java │ │ │ └── storage │ │ │ │ ├── DefaultStoreWorker.java │ │ │ │ ├── LocalFileStorage.java │ │ │ │ ├── Storage.java │ │ │ │ ├── StorageType.java │ │ │ │ ├── StoreResult.java │ │ │ │ ├── StoreWorker.java │ │ │ │ └── package-info.java │ │ │ ├── jetty │ │ │ ├── JettyFactory.java │ │ │ └── StartServer.java │ │ │ ├── pendingqueue │ │ │ ├── AbsPendingQueue.java │ │ │ ├── PendRecraw.java │ │ │ ├── PendingManager.java │ │ │ ├── PendingPages.java │ │ │ ├── PendingStore.java │ │ │ ├── PendingUrls.java │ │ │ └── package-info.java │ │ │ ├── robotstxt │ │ │ ├── HostDirectives.java │ │ │ ├── RobotstxtConfig.java │ │ │ ├── RobotstxtParser.java │ │ │ ├── RobotstxtServer.java │ │ │ └── RuleSet.java │ │ │ ├── url │ │ │ ├── TLDList.java │ │ │ ├── URLCanonicalizer.java │ │ │ ├── UlrFilters.java │ │ │ ├── UrlResolver.java │ │ │ ├── UrlSignatureSet.java │ │ │ ├── WebURL.java │ │ │ └── package-info.java │ │ │ ├── util │ │ │ ├── BinaryDateDwonLoader.java │ │ │ ├── BloomFilter.java │ │ │ ├── BloomfilterHelper.java │ │ │ ├── CharUtil.java │ │ │ ├── CheckIfUniqueUrl.java │ │ │ ├── CheckIfUniqueUrlByBloomfilter.java │ │ │ ├── CheckIfUniqueUrlByMd5.java │ │ │ ├── ChineseSpelling.java │ │ │ ├── DateTimeUtil.java │ │ │ ├── EncryptUtils.java │ │ │ ├── IO.java │ │ │ ├── ImageCompress.java │ │ │ ├── ImgUtil.java │ │ │ ├── JsonUtil.java │ │ │ ├── MD5Utils.java │ │ │ ├── MapUtils.java │ │ │ ├── MurmurHash.java │ │ │ ├── PinyinUtil.java │ │ │ ├── RegexList.java │ │ │ ├── Simhash.java │ │ │ ├── StringHelper.java │ │ │ ├── StringUtil.java │ │ │ ├── UrlUtils.java │ │ │ ├── Util.java │ │ │ ├── XmlConverUtil.java │ │ │ ├── download │ │ │ │ ├── DownLoadPool.java │ │ │ │ ├── DownloadInfo.java │ │ │ │ └── MultiThreadDownload.java │ │ │ └── image │ │ │ │ ├── ImageResize.java │ │ │ │ └── ImageResizePool.java │ │ │ └── web │ │ │ ├── ContextListener.java │ │ │ ├── CrawlerManager.java │ │ │ ├── GoodServlet.java │ │ │ ├── Start.java │ │ │ ├── Status.java │ │ │ ├── Stop.java │ │ │ └── package-info.java │ ├── resources │ │ ├── conf.properties │ │ ├── default_mapping.json │ │ ├── job_conf.xml │ │ ├── log4j.xml │ │ ├── logback.xml │ │ ├── mapping.json │ │ ├── tld-names.txt │ │ └── webdefault-windows.xml │ └── webapp │ │ ├── META-INF │ │ └── MANIFEST.MF │ │ ├── WEB-INF │ │ └── web.xml │ │ └── index.jsp └── test │ └── java │ └── org │ └── sbs │ ├── AppTest.java │ ├── ListLinks.java │ ├── T.java │ ├── extract │ ├── TestWasu.java │ ├── TestYouku.java │ └── Tester.java │ └── htmlunit │ ├── HtmlUnitTest.java │ ├── element │ ├── GcElementTest.java │ └── package-info.java │ └── package-info.java └── start.bat /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/README.md -------------------------------------------------------------------------------- /conf/fetch.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/fetch.xml -------------------------------------------------------------------------------- /conf/letv_conf_temp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/letv_conf_temp.xml -------------------------------------------------------------------------------- /conf/template.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/template.xml -------------------------------------------------------------------------------- /conf/test_youku_dm.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/test_youku_dm.xml -------------------------------------------------------------------------------- /conf/wasu_conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/wasu_conf.xml -------------------------------------------------------------------------------- /conf/youku_conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/youku_conf.xml -------------------------------------------------------------------------------- /conf/youku_conf_temp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/conf/youku_conf_temp.xml -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/pom.xml -------------------------------------------------------------------------------- /setpath.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/setpath.bat -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/BootStrap.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/BootStrap.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/CrawlerStatus.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/CrawlerStatus.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/foreman/ExtractForeman.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/foreman/ExtractForeman.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/foreman/FetchForeman.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/foreman/FetchForeman.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/foreman/Foreman.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/foreman/Foreman.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/foreman/StoreForeman.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/foreman/StoreForeman.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/foreman/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/foreman/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/bootstrap/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/bootstrap/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/Configurable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/Configurable.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/Configuration.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/Configuration.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/GlobalConstants.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/GlobalConstants.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/JobConfigurationManager.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/JobConfigurationManager.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/PropertyConfigurationHelper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/PropertyConfigurationHelper.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/Worker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/Worker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/conf/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/conf/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/exception/ConfigurationException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/exception/ConfigurationException.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/exception/ExtractException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/exception/ExtractException.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/exception/QueueException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/exception/QueueException.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/exception/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/exception/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/DefaultExtractWorker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/DefaultExtractWorker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/DefaultExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/DefaultExtractor.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/ExtractResult.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/ExtractResult.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/ExtractWorker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/ExtractWorker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/Extractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/Extractor.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/GCElement.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/GCElement.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/GCPage.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/GCPage.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/AbstractHtmlElement.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/AbstractHtmlElement.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/CommonHtmlElement.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/CommonHtmlElement.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlAnchorElementOfPage.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlAnchorElementOfPage.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlAnchorElementOfString.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlAnchorElementOfString.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlElementExtractType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlElementExtractType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlElementType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlElementType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlPageElement.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/HtmlPageElement.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/htmlelment/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/htmlelment/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/AbstractElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/AbstractElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/DateElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/DateElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/FileElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/FileElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/IFConditions.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/IFConditions.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/IntegerElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/IntegerElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/ListElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/ListElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/NumericaElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/NumericaElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/PageElementSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/PageElementSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/SelectPageElement.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/SelectPageElement.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/SelectorAttr.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/SelectorAttr.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/SelectorType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/SelectorType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/SetElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/SetElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/StringElementCssSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/StringElementCssSelector.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/EmptyAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/EmptyAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/FileSelectAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/FileSelectAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/IntegerSelectorAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/IntegerSelectorAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/ListSelectorAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/ListSelectorAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/SelectorAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/SelectorAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/StringSelectorAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/StringSelectorAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/file/DownLoadFileAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/file/DownLoadFileAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/file/DownLoadImageResizeAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/file/DownLoadImageResizeAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/file/FileActionType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/file/FileActionType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/IntegerAbsAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/IntegerAbsAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/IntegerActionType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/IntegerActionType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/IntegerBetweenAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/IntegerBetweenAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/integer/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/list/ListFilterAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/list/ListFilterAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/list/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/list/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/ActionFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/ActionFactory.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringActionType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringActionType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringAfterAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringAfterAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringAfterLastAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringAfterLastAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringBeforeAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringBeforeAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringBeforeLastAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringBeforeLastAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringBetweenAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringBetweenAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringFilterAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringFilterAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringPerfixAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringPerfixAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringReplaceAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringReplaceAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringSplitAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringSplitAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringSubAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringSubAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringSuffixAction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/StringSuffixAction.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/action/string/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/exception/DownLoadException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/exception/DownLoadException.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/exception/IntegerBetweenExpressionException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/exception/IntegerBetweenExpressionException.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/exception/SelectorConfigException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/exception/SelectorConfigException.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/exception/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/exception/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/expression/GrExpression.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/expression/GrExpression.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/expression/SimpleExpression.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/expression/SimpleExpression.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/expression/SimpleExpressionExtent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/expression/SimpleExpressionExtent.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/factory/ElementCssSelectorFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/factory/ElementCssSelectorFactory.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/selector/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/selector/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/template/ExtractTemplate.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/template/ExtractTemplate.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/extractor/template/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/extractor/template/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/AjaxCallFetcher.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/AjaxCallFetcher.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/CustomFetchStatus.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/CustomFetchStatus.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/DefaultFetchWorker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/DefaultFetchWorker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/FailedPageBackup.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/FailedPageBackup.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/FetchStatus.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/FetchStatus.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/FetchWorker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/FetchWorker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/Fetcher.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/Fetcher.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/FetcherInstance.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/FetcherInstance.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/FetcherType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/FetcherType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/IdleConnectionMonitorThread.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/IdleConnectionMonitorThread.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/PageFetcher.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/PageFetcher.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/ResynchronizingAjaxController.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/ResynchronizingAjaxController.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/fetcher/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/fetcher/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/jobconf/ExtractConfig.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/jobconf/ExtractConfig.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/jobconf/FetchConfig.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/jobconf/FetchConfig.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/jobconf/JobConfig.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/jobconf/JobConfig.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/jobconf/StoreConfig.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/jobconf/StoreConfig.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/jobconf/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/jobconf/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/BinaryParseData.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/BinaryParseData.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/ExtractedPage.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/ExtractedPage.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/ExtractedUrlAnchorPair.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/ExtractedUrlAnchorPair.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/HtmlContentHandler.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/HtmlContentHandler.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/HtmlParseData.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/HtmlParseData.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/Page.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/Page.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/PageFetchResult.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/PageFetchResult.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/ParseData.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/ParseData.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/Parser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/Parser.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/TextParseData.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/TextParseData.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/page/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/page/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/EsClient.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/EsClient.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/ExBulk.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/ExBulk.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/IndexScanner.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/IndexScanner.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/ReIndex.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/ReIndex.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/classloader/CommonClassLoader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/classloader/CommonClassLoader.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/classloader/PluginClassLoader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/classloader/PluginClassLoader.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/extract/ExtractYouku.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/extract/ExtractYouku.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/extract/Extractor66ys.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/extract/Extractor66ys.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/extract/ExtractorDytt8.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/extract/ExtractorDytt8.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/extract/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/extract/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/ElasticSearchStorage.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/ElasticSearchStorage.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/Movie.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/Movie.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/MovieSource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/MovieSource.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/Prepare.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/Prepare.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/p/IESStoragePlugin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/p/IESStoragePlugin.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/p/WasuEsStorePlugin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/p/WasuEsStorePlugin.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/plugin/storage/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/plugin/storage/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/schedule/ReCraw.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/schedule/ReCraw.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/schedule/RecrawFetherWorkor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/schedule/RecrawFetherWorkor.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/DefaultStoreWorker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/DefaultStoreWorker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/LocalFileStorage.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/LocalFileStorage.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/Storage.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/Storage.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/StorageType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/StorageType.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/StoreResult.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/StoreResult.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/StoreWorker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/StoreWorker.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/goodcrawler/storage/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/goodcrawler/storage/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/jetty/JettyFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/jetty/JettyFactory.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/jetty/StartServer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/jetty/StartServer.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/AbsPendingQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/AbsPendingQueue.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/PendRecraw.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/PendRecraw.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/PendingManager.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/PendingManager.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/PendingPages.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/PendingPages.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/PendingStore.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/PendingStore.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/PendingUrls.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/PendingUrls.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/pendingqueue/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/pendingqueue/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/robotstxt/HostDirectives.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/robotstxt/HostDirectives.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/robotstxt/RobotstxtConfig.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/robotstxt/RobotstxtConfig.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/robotstxt/RobotstxtParser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/robotstxt/RobotstxtParser.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/robotstxt/RobotstxtServer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/robotstxt/RobotstxtServer.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/robotstxt/RuleSet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/robotstxt/RuleSet.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/TLDList.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/TLDList.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/URLCanonicalizer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/URLCanonicalizer.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/UlrFilters.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/UlrFilters.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/UrlResolver.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/UrlResolver.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/UrlSignatureSet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/UrlSignatureSet.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/WebURL.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/WebURL.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/url/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/url/package-info.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/BinaryDateDwonLoader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/BinaryDateDwonLoader.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/BloomFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/BloomFilter.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/BloomfilterHelper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/BloomfilterHelper.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/CharUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/CharUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/CheckIfUniqueUrl.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/CheckIfUniqueUrl.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/CheckIfUniqueUrlByBloomfilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/CheckIfUniqueUrlByBloomfilter.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/CheckIfUniqueUrlByMd5.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/CheckIfUniqueUrlByMd5.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/ChineseSpelling.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/ChineseSpelling.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/DateTimeUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/DateTimeUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/EncryptUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/EncryptUtils.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/IO.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/IO.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/ImageCompress.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/ImageCompress.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/ImgUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/ImgUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/JsonUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/JsonUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/MD5Utils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/MD5Utils.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/MapUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/MapUtils.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/MurmurHash.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/MurmurHash.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/PinyinUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/PinyinUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/RegexList.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/RegexList.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/Simhash.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/Simhash.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/StringHelper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/StringHelper.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/StringUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/StringUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/UrlUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/UrlUtils.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/Util.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/Util.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/XmlConverUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/XmlConverUtil.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/download/DownLoadPool.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/download/DownLoadPool.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/download/DownloadInfo.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/download/DownloadInfo.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/download/MultiThreadDownload.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/download/MultiThreadDownload.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/image/ImageResize.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/image/ImageResize.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/util/image/ImageResizePool.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/util/image/ImageResizePool.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/ContextListener.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/ContextListener.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/CrawlerManager.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/CrawlerManager.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/GoodServlet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/GoodServlet.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/Start.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/Start.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/Status.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/Status.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/Stop.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/Stop.java -------------------------------------------------------------------------------- /src/main/java/org/sbs/web/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/java/org/sbs/web/package-info.java -------------------------------------------------------------------------------- /src/main/resources/conf.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/conf.properties -------------------------------------------------------------------------------- /src/main/resources/default_mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/default_mapping.json -------------------------------------------------------------------------------- /src/main/resources/job_conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/job_conf.xml -------------------------------------------------------------------------------- /src/main/resources/log4j.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/log4j.xml -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/logback.xml -------------------------------------------------------------------------------- /src/main/resources/mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/mapping.json -------------------------------------------------------------------------------- /src/main/resources/tld-names.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/tld-names.txt -------------------------------------------------------------------------------- /src/main/resources/webdefault-windows.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/resources/webdefault-windows.xml -------------------------------------------------------------------------------- /src/main/webapp/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Class-Path: 3 | 4 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/webapp/WEB-INF/web.xml -------------------------------------------------------------------------------- /src/main/webapp/index.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/main/webapp/index.jsp -------------------------------------------------------------------------------- /src/test/java/org/sbs/AppTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/AppTest.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/ListLinks.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/ListLinks.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/T.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/T.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/extract/TestWasu.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/extract/TestWasu.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/extract/TestYouku.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/extract/TestYouku.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/extract/Tester.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/extract/Tester.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/htmlunit/HtmlUnitTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/htmlunit/HtmlUnitTest.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/htmlunit/element/GcElementTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/htmlunit/element/GcElementTest.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/htmlunit/element/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/htmlunit/element/package-info.java -------------------------------------------------------------------------------- /src/test/java/org/sbs/htmlunit/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/src/test/java/org/sbs/htmlunit/package-info.java -------------------------------------------------------------------------------- /start.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenbaise/goodcrawler/HEAD/start.bat --------------------------------------------------------------------------------