├── .github ├── dependabot.yml └── workflows │ ├── docker-image.yml │ └── maven.yml ├── .gitignore ├── .readthedocs.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── RELEASING.md ├── SECURITY.md ├── commons ├── pom.xml └── src │ ├── main │ ├── java │ │ ├── org │ │ │ └── archive │ │ │ │ ├── bdb │ │ │ │ ├── AutoKryo.java │ │ │ │ ├── BdbModule.java │ │ │ │ ├── DisposableStoredSortedMap.java │ │ │ │ ├── KryoBinding.java │ │ │ │ └── StoredQueue.java │ │ │ │ ├── checkpointing │ │ │ │ ├── Checkpoint.java │ │ │ │ └── Checkpointable.java │ │ │ │ ├── io │ │ │ │ ├── Arc2Warc.java │ │ │ │ ├── CrawlerJournal.java │ │ │ │ ├── ReadSourceEditor.java │ │ │ │ └── Warc2Arc.java │ │ │ │ ├── net │ │ │ │ ├── ClientFTP.java │ │ │ │ ├── ClientSFTP.java │ │ │ │ ├── MitmProxy.java │ │ │ │ ├── UURI.java │ │ │ │ ├── UURIFactory.java │ │ │ │ └── webdriver │ │ │ │ │ ├── BiDiEvent.java │ │ │ │ │ ├── BiDiJson.java │ │ │ │ │ ├── BiDiModule.java │ │ │ │ │ ├── Browser.java │ │ │ │ │ ├── BrowsingContext.java │ │ │ │ │ ├── LocalWebDriverBiDi.java │ │ │ │ │ ├── Network.java │ │ │ │ │ ├── Script.java │ │ │ │ │ ├── Session.java │ │ │ │ │ ├── WebDriverBiDi.java │ │ │ │ │ ├── WebDriverException.java │ │ │ │ │ └── WebDriverTimeoutException.java │ │ │ │ ├── spring │ │ │ │ ├── BeanFieldsPatternValidator.java │ │ │ │ ├── ConfigFile.java │ │ │ │ ├── ConfigFileEditor.java │ │ │ │ ├── ConfigPath.java │ │ │ │ ├── ConfigPathConfigurer.java │ │ │ │ ├── ConfigPathEditor.java │ │ │ │ ├── ConfigString.java │ │ │ │ ├── HasKeyedProperties.java │ │ │ │ ├── HasValidator.java │ │ │ │ ├── HeritrixLifecycleProcessor.java │ │ │ │ ├── KeyedProperties.java │ │ │ │ ├── OverlayContext.java │ │ │ │ ├── OverlayMapsSource.java │ │ │ │ ├── PathSharingContext.java │ │ │ │ ├── Required.java │ │ │ │ ├── RequiredAnnotationBeanPostProcessor.java │ │ │ │ ├── Sheet.java │ │ │ │ └── WriteTarget.java │ │ │ │ ├── surt │ │ │ │ └── SURTTokenizer.java │ │ │ │ └── util │ │ │ │ ├── AbstractLongFPSet.java │ │ │ │ ├── Base32.java │ │ │ │ ├── BloomFilter.java │ │ │ │ ├── BloomFilter64bit.java │ │ │ │ ├── Histotable.java │ │ │ │ ├── IdentityCacheable.java │ │ │ │ ├── IdleBarrier.java │ │ │ │ ├── Iteratorable.java │ │ │ │ ├── JSONUtils.java │ │ │ │ ├── JndiUtils.java │ │ │ │ ├── KeyTool.java │ │ │ │ ├── LRU.java │ │ │ │ ├── LongToIntConsistentHash.java │ │ │ │ ├── ObjectIdentityBdbManualCache.java │ │ │ │ ├── ObjectIdentityCache.java │ │ │ │ ├── ObjectIdentityMemCache.java │ │ │ │ ├── OneLineSimpleLogger.java │ │ │ │ ├── PaddingStringBuffer.java │ │ │ │ ├── PrefixFinder.java │ │ │ │ ├── ReportUtils.java │ │ │ │ ├── Supplier.java │ │ │ │ ├── UriUtils.java │ │ │ │ ├── bdbje │ │ │ │ └── EnhancedEnvironment.java │ │ │ │ ├── fingerprint │ │ │ │ ├── ArrayLongFPCache.java │ │ │ │ ├── LongFPSet.java │ │ │ │ ├── LongFPSetCache.java │ │ │ │ └── MemLongFPSet.java │ │ │ │ ├── iterator │ │ │ │ └── CompositeIterator.java │ │ │ │ └── ms │ │ │ │ ├── BlockFileSystem.java │ │ │ │ ├── BlockInputStream.java │ │ │ │ ├── Cp1252.java │ │ │ │ ├── DefaultBlockFileSystem.java │ │ │ │ ├── DefaultEntry.java │ │ │ │ ├── Doc.java │ │ │ │ ├── Entry.java │ │ │ │ ├── HeaderBlock.java │ │ │ │ ├── Piece.java │ │ │ │ ├── PieceReader.java │ │ │ │ ├── PieceTable.java │ │ │ │ └── package.html │ │ └── st │ │ │ └── ata │ │ │ └── util │ │ │ └── FPGenerator.java │ └── resources │ │ └── org │ │ └── archive │ │ ├── i18n │ │ ├── LocaleCache_en.utf8 │ │ ├── LocaleCache_en_CA.utf8 │ │ ├── LocaleCache_en_US_borkborkbork.utf8 │ │ └── LocaleCache_jp.utf8 │ │ └── util │ │ ├── timestamp.txt │ │ └── version.txt │ ├── site │ └── overview.html │ └── test │ ├── java │ └── org │ │ └── archive │ │ ├── bdb │ │ └── StoredQueueTest.java │ │ ├── io │ │ └── ArchiveTest.java │ │ ├── net │ │ └── MitmProxyTest.java │ │ ├── settings │ │ └── file │ │ │ ├── BdbModuleTest.java │ │ │ └── PrefixFinderTest.java │ │ ├── spring │ │ └── PathSharingContextTest.java │ │ ├── surt │ │ └── SURTTokenizerTest.java │ │ └── util │ │ ├── BenchmarkBlooms.java │ │ ├── BloomFilter64bitTest.java │ │ ├── BloomFilterTest.java │ │ ├── IdentityCacheableWrapper.java │ │ ├── LongToIntConsistentHashTest.java │ │ ├── ObjectIdentityBdbManualCacheTest.java │ │ ├── PaddingStringBufferTest.java │ │ ├── SURTTest.java │ │ ├── SurtPrefixSetTest.java │ │ ├── TextUtilsTest.java │ │ ├── UriUtilsTest.java │ │ ├── fingerprint │ │ ├── ArrayLongFPCacheTest.java │ │ ├── LongFPSetCacheTest.java │ │ ├── LongFPSetTestCase.java │ │ └── MemLongFPSetTest.java │ │ └── ms │ │ ├── 15336-doc-preface.doc │ │ ├── 15336-doc-preface.txt │ │ ├── DocTest.java │ │ ├── PieceReaderTest.java │ │ ├── X.doc │ │ └── X.txt │ └── resources │ └── org │ └── archive │ ├── settings │ └── path │ │ ├── anonymous.resolved.txt │ │ ├── global.get.txt │ │ ├── global.resolved.txt │ │ ├── o1.get.txt │ │ └── o1.resolved.txt │ └── spring │ ├── PathSharingContextTestBeans.cxml │ └── PathSharingContextTestBeans.groovy ├── contrib ├── pom.xml └── src │ ├── main │ ├── assembly │ │ └── dist.xml │ └── java │ │ └── org │ │ └── archive │ │ ├── crawler │ │ ├── event │ │ │ ├── AMQPUrlPublishedEvent.java │ │ │ └── AMQPUrlReceivedEvent.java │ │ ├── frontier │ │ │ └── AMQPUrlReceiver.java │ │ ├── prefetch │ │ │ ├── HostQuotaEnforcer.java │ │ │ └── SourceQuotaEnforcer.java │ │ └── reporting │ │ │ └── XmlCrawlSummaryReport.java │ │ ├── modules │ │ ├── AMQPProducer.java │ │ ├── AMQPProducerProcessor.java │ │ ├── AMQPPublishProcessor.java │ │ ├── AMQPUrlWaiter.java │ │ ├── deciderules │ │ │ ├── DecideRuleSequenceWithAMQPFeed.java │ │ │ └── ExpressionDecideRule.java │ │ ├── extractor │ │ │ ├── ExtractorPDFContent.java │ │ │ ├── ExtractorYoutubeChannelFormatStream.java │ │ │ ├── ExtractorYoutubeDL.java │ │ │ ├── ExtractorYoutubeFormatStream.java │ │ │ └── KnowledgableExtractorJS.java │ │ ├── postprocessor │ │ │ ├── AMQPCrawlLogFeed.java │ │ │ ├── CrawlLogJsonBuilder.java │ │ │ ├── KafkaCrawlLogFeed.java │ │ │ ├── TroughCrawlLogFeed.java │ │ │ └── WARCLimitEnforcer.java │ │ └── recrawl │ │ │ ├── FetchHistoryHelper.java │ │ │ ├── TroughContentDigestHistory.java │ │ │ └── wbm │ │ │ └── WbmPersistLoadProcessor.java │ │ └── trough │ │ └── TroughClient.java │ └── test │ ├── java │ └── org │ │ └── archive │ │ └── modules │ │ ├── extractor │ │ ├── ExtractorPDFContentTest.java │ │ ├── ExtractorYoutubeChannelFormatStreamTest.java │ │ ├── ExtractorYoutubeDLTest.java │ │ └── ExtractorYoutubeFormatStreamTest.java │ │ └── recrawl │ │ └── wbm │ │ └── WbmPersistLoadProcessorTest.java │ └── resources │ ├── ExtractorPDFContentTest1.pdf │ ├── ExtractorPDFContentTest2.pdf │ ├── ExtractorPDFContentTest3.pdf │ ├── ExtractorPDFContentTest4.pdf │ ├── ExtractorYoutubeChannelFormatStream.txt │ ├── ExtractorYoutubeDL.json │ ├── ExtractorYoutubeFormatStream.txt │ └── ExtractorYoutubeFormatStream2.txt ├── dist ├── LICENSE.txt ├── pom.xml └── src │ ├── main │ ├── assembly │ │ ├── dist.xml │ │ └── src.xml │ ├── bin │ │ ├── arcreader │ │ ├── arcreader.cmd │ │ ├── dependencies.xsl │ │ ├── extractor │ │ ├── extractor.cmd │ │ ├── foreground_heritrix │ │ ├── foreground_heritrix.cmd │ │ ├── heritrix │ │ ├── heritrix.cmd │ │ ├── hoppath.pl │ │ ├── htmlextractor │ │ ├── htmlextractor.cmd │ │ ├── make_reports.pl │ │ ├── manifest_bundle.pl │ │ └── xdocToTxt.xsl │ ├── conf │ │ ├── heritrix.cacerts │ │ ├── jobs │ │ │ └── .gitignore │ │ └── logging.properties │ ├── extras │ │ └── pagerank │ │ │ ├── GenGraph.java │ │ │ ├── PageRank.java │ │ │ ├── README.txt │ │ │ ├── assignUrlIndex.pl │ │ │ ├── pageRankSetup.pl │ │ │ └── run-pr.sh │ └── licenses │ │ ├── ant.LICENSE │ │ ├── ant.NOTICE │ │ ├── bsh.LICENSE │ │ ├── commons-cli.LICENSE │ │ ├── commons-cli.NOTICE │ │ ├── commons-codec.LICENSE │ │ ├── commons-codec.NOTICE │ │ ├── commons-el.LICENSE │ │ ├── commons-el.NOTICE │ │ ├── commons-io.LICENSE │ │ ├── commons-io.NOTICE │ │ ├── commons-lang.LICENSE │ │ ├── commons-lang.NOTICE │ │ ├── commons-logging.LICENSE │ │ ├── commons-net.LICENSE │ │ ├── commons-net.NOTICE │ │ ├── dnsjava-2.0.3.README │ │ ├── fastutil-5.0.7.LICENSE │ │ ├── jasper.LICENSE │ │ ├── javaswf.LICENSE │ │ ├── je-3.2.44.LICENSE │ │ ├── jericho-html-2.3.LICENSE │ │ ├── jets3t-0.5.0.LICENSE │ │ ├── jetty.LICENSE │ │ ├── junit.LICENSE.HTML │ │ ├── libidn.LICENSE │ │ ├── oro-2.0.8.LICENSE │ │ └── servlet-4.1.34.LICENSE │ └── test │ └── java │ └── org │ └── archive │ └── crawler │ └── BasicProfileTest.java ├── docgen ├── pom.xml └── src │ └── main │ ├── java │ └── org │ │ └── archive │ │ └── crawler │ │ └── BeanDocProcessor.java │ └── resources │ └── META-INF │ └── services │ └── javax.annotation.processing.Processor ├── docker ├── Dockerfile ├── Dockerfile.contrib ├── Makefile ├── README.md ├── docker-compose.yml └── entrypoint.sh ├── docs ├── Makefile ├── README.md ├── _ext │ └── beandoc.py ├── api.rst ├── bean-reference.rst ├── conf.py ├── configuring-jobs.rst ├── getting-started.rst ├── glossary.rst ├── index.rst ├── operating.rst └── requirements.txt ├── engine ├── .cvsignore ├── pom.xml ├── src │ ├── design │ │ ├── credentials.gif │ │ └── credentials.zargo │ ├── main │ │ ├── assembly │ │ │ └── dist.xml │ │ ├── java │ │ │ ├── META-INF │ │ │ │ └── MANIFEST-MF │ │ │ ├── freemarker_implicit.ftl │ │ │ └── org │ │ │ │ └── archive │ │ │ │ ├── crawler │ │ │ │ ├── Heritrix.java │ │ │ │ ├── datamodel │ │ │ │ │ └── UriUniqFilter.java │ │ │ │ ├── deciderules │ │ │ │ │ ├── ClassKeyMatchesRegexDecideRule.java │ │ │ │ │ └── package.html │ │ │ │ ├── doc-files │ │ │ │ │ ├── processing_steps.dia │ │ │ │ │ └── processing_steps.png │ │ │ │ ├── event │ │ │ │ │ ├── CrawlStateEvent.java │ │ │ │ │ ├── CrawlURIDispositionEvent.java │ │ │ │ │ └── StatSnapshotEvent.java │ │ │ │ ├── framework │ │ │ │ │ ├── ActionDirectory.java │ │ │ │ │ ├── BeanLookupBindings.java │ │ │ │ │ ├── CheckpointService.java │ │ │ │ │ ├── CheckpointSuccessEvent.java │ │ │ │ │ ├── CheckpointValidator.java │ │ │ │ │ ├── CrawlController.java │ │ │ │ │ ├── CrawlJob.java │ │ │ │ │ ├── CrawlLimitEnforcer.java │ │ │ │ │ ├── CrawlStatus.java │ │ │ │ │ ├── Engine.java │ │ │ │ │ ├── Frontier.java │ │ │ │ │ ├── Scoper.java │ │ │ │ │ ├── ToePool.java │ │ │ │ │ └── ToeThread.java │ │ │ │ ├── frontier │ │ │ │ │ ├── AbstractFrontier.java │ │ │ │ │ ├── AntiCalendarCostAssignmentPolicy.java │ │ │ │ │ ├── AssignmentLevelSurtQueueAssignmentPolicy.java │ │ │ │ │ ├── BdbFrontier.java │ │ │ │ │ ├── BdbMultipleWorkQueues.java │ │ │ │ │ ├── BdbWorkQueue.java │ │ │ │ │ ├── BucketQueueAssignmentPolicy.java │ │ │ │ │ ├── CostAssignmentPolicy.java │ │ │ │ │ ├── DelayedWorkQueue.java │ │ │ │ │ ├── FrontierJournal.java │ │ │ │ │ ├── HostnameQueueAssignmentPolicy.java │ │ │ │ │ ├── HostnameQueueAssignmentPolicyWithLimits.java │ │ │ │ │ ├── IPQueueAssignmentPolicy.java │ │ │ │ │ ├── QueueAssignmentPolicy.java │ │ │ │ │ ├── RecyclingSerialBinding.java │ │ │ │ │ ├── SurtAuthorityQueueAssignmentPolicy.java │ │ │ │ │ ├── SurtAuthorityQueueAssignmentPolicyWithLimits.java │ │ │ │ │ ├── URIAuthorityBasedQueueAssignmentPolicy.java │ │ │ │ │ ├── UnitCostAssignmentPolicy.java │ │ │ │ │ ├── WagCostAssignmentPolicy.java │ │ │ │ │ ├── WorkQueue.java │ │ │ │ │ ├── WorkQueueFrontier.java │ │ │ │ │ ├── ZeroCostAssignmentPolicy.java │ │ │ │ │ └── precedence │ │ │ │ │ │ ├── BaseQueuePrecedencePolicy.java │ │ │ │ │ │ ├── BaseUriPrecedencePolicy.java │ │ │ │ │ │ ├── CostUriPrecedencePolicy.java │ │ │ │ │ │ ├── HighestUriQueuePrecedencePolicy.java │ │ │ │ │ │ ├── HopsUriPrecedencePolicy.java │ │ │ │ │ │ ├── PrecedenceLoader.java │ │ │ │ │ │ ├── PrecedenceProvider.java │ │ │ │ │ │ ├── PreloadedUriPrecedencePolicy.java │ │ │ │ │ │ ├── QueuePrecedencePolicy.java │ │ │ │ │ │ ├── SimplePrecedenceProvider.java │ │ │ │ │ │ ├── SuccessCountsQueuePrecedencePolicy.java │ │ │ │ │ │ └── UriPrecedencePolicy.java │ │ │ │ ├── io │ │ │ │ │ ├── NonFatalErrorFormatter.java │ │ │ │ │ ├── RuntimeErrorFormatter.java │ │ │ │ │ ├── StatisticsLogFormatter.java │ │ │ │ │ ├── UriErrorFormatter.java │ │ │ │ │ └── UriProcessingFormatter.java │ │ │ │ ├── migrate │ │ │ │ │ └── MigrateH1to3Tool.java │ │ │ │ ├── monitor │ │ │ │ │ ├── DiskSpaceMonitor.java │ │ │ │ │ └── package-info.java │ │ │ │ ├── package.html │ │ │ │ ├── postprocessor │ │ │ │ │ ├── CandidatesProcessor.java │ │ │ │ │ ├── DispositionProcessor.java │ │ │ │ │ ├── LowDiskPauseProcessor.java │ │ │ │ │ ├── ReschedulingProcessor.java │ │ │ │ │ └── SupplementaryLinksScoper.java │ │ │ │ ├── prefetch │ │ │ │ │ ├── CandidateScoper.java │ │ │ │ │ ├── FrontierPreparer.java │ │ │ │ │ ├── PreconditionEnforcer.java │ │ │ │ │ ├── Preselector.java │ │ │ │ │ ├── QuotaEnforcer.java │ │ │ │ │ └── RuntimeLimitEnforcer.java │ │ │ │ ├── processor │ │ │ │ │ ├── BrowserProcessor.java │ │ │ │ │ ├── CrawlMapper.java │ │ │ │ │ ├── HashCrawlMapper.java │ │ │ │ │ └── LexicalCrawlMapper.java │ │ │ │ ├── reporting │ │ │ │ │ ├── AlertHandler.java │ │ │ │ │ ├── AlertThreadGroup.java │ │ │ │ │ ├── CrawlStatSnapshot.java │ │ │ │ │ ├── CrawlSummaryReport.java │ │ │ │ │ ├── CrawlerLoggerModule.java │ │ │ │ │ ├── FrontierNonemptyReport.java │ │ │ │ │ ├── FrontierSummaryReport.java │ │ │ │ │ ├── HostsReport.java │ │ │ │ │ ├── MimetypesReport.java │ │ │ │ │ ├── ProcessorsReport.java │ │ │ │ │ ├── Report.java │ │ │ │ │ ├── ResponseCodeReport.java │ │ │ │ │ ├── SeedRecord.java │ │ │ │ │ ├── SeedsReport.java │ │ │ │ │ ├── SourceTagsReport.java │ │ │ │ │ ├── StatisticsTracker.java │ │ │ │ │ └── ToeThreadsReport.java │ │ │ │ ├── restlet │ │ │ │ │ ├── BaseResource.java │ │ │ │ │ ├── BeanBrowseResource.java │ │ │ │ │ ├── BeanDocResource.java │ │ │ │ │ ├── DescriptorUpdater.java │ │ │ │ │ ├── EditRepresentation.java │ │ │ │ │ ├── EngineApplication.java │ │ │ │ │ ├── EngineResource.java │ │ │ │ │ ├── EnhDirectory.java │ │ │ │ │ ├── EnhDirectoryResource.java │ │ │ │ │ ├── Flash.java │ │ │ │ │ ├── JobRelatedResource.java │ │ │ │ │ ├── JobResource.java │ │ │ │ │ ├── NoSniHostCheckHttpsServerHelper.java │ │ │ │ │ ├── PagedRepresentation.java │ │ │ │ │ ├── RateLimitGuard.java │ │ │ │ │ ├── ReportGenResource.java │ │ │ │ │ ├── ScriptResource.java │ │ │ │ │ ├── ScriptingConsole.java │ │ │ │ │ ├── WebJars.java │ │ │ │ │ ├── XmlMarshaller.java │ │ │ │ │ └── models │ │ │ │ │ │ ├── BeansModel.java │ │ │ │ │ │ ├── CrawlJobModel.java │ │ │ │ │ │ ├── EngineModel.java │ │ │ │ │ │ ├── ScriptModel.java │ │ │ │ │ │ └── ViewModel.java │ │ │ │ ├── spring │ │ │ │ │ ├── DecideRuledSheetAssociation.java │ │ │ │ │ ├── SheetAssociation.java │ │ │ │ │ ├── SheetOverlaysManager.java │ │ │ │ │ └── SurtPrefixesSheetAssociation.java │ │ │ │ └── util │ │ │ │ │ ├── BdbUriUniqFilter.java │ │ │ │ │ ├── BenchmarkUriUniqFilters.java │ │ │ │ │ ├── BloomUriUniqFilter.java │ │ │ │ │ ├── CheckpointUtils.java │ │ │ │ │ ├── DiskFPMergeUriUniqFilter.java │ │ │ │ │ ├── FPMergeUriUniqFilter.java │ │ │ │ │ ├── FPUriUniqFilter.java │ │ │ │ │ ├── LogReader.java │ │ │ │ │ ├── Logs.java │ │ │ │ │ ├── MemFPMergeUriUniqFilter.java │ │ │ │ │ ├── MemUriUniqFilter.java │ │ │ │ │ ├── NoopUriUniqFilter.java │ │ │ │ │ ├── RecoveryLogMapper.java │ │ │ │ │ ├── SeedUrlNotFoundException.java │ │ │ │ │ ├── SetBasedUriUniqFilter.java │ │ │ │ │ └── TopNSet.java │ │ │ │ └── overview.html │ │ └── resources │ │ │ ├── arcMetaheaderBody.xsl │ │ │ └── org │ │ │ └── archive │ │ │ └── crawler │ │ │ ├── extras │ │ │ └── adaptive │ │ │ │ ├── AdaptiveRevisitFrontier_en.utf8 │ │ │ │ └── WaitEvaluator_en.utf8 │ │ │ ├── framework │ │ │ ├── CrawlControllerImpl_en.utf8 │ │ │ ├── CrawlScope_en.utf8 │ │ │ ├── CrawlerProcessor_en.utf8 │ │ │ └── Scoper_en.utf8 │ │ │ ├── frontier │ │ │ ├── AbstractFrontier_en.utf8 │ │ │ ├── BdbFrontier_en.utf8 │ │ │ ├── BucketQueueAssignmentPolicy_en.utf8 │ │ │ ├── WorkQueueFrontier_en.utf8 │ │ │ └── precedence │ │ │ │ ├── BaseQueuePrecedencePolicy_en.utf8 │ │ │ │ ├── BaseUriPrecedencePolicy_en.utf8 │ │ │ │ ├── CostUriPrecedencePolicy_en.utf8 │ │ │ │ ├── HighestUriQueuePrecedencePolicy_en.utf8 │ │ │ │ ├── HopsUriPrecedencePolicy_en.utf8 │ │ │ │ └── SuccessCountsQueuePrecedencePolicy_en.utf8 │ │ │ ├── migrate │ │ │ ├── H1toH3.map │ │ │ └── migrate-template-crawler-beans.cxml │ │ │ ├── postprocessor │ │ │ ├── CrawlStateUpdater_en.utf8 │ │ │ ├── FrontierScheduler_en.utf8 │ │ │ ├── LinksScoper_en.utf8 │ │ │ ├── LowDiskPauseProcessor_en.utf8 │ │ │ └── SupplementaryLinksScoper_en.utf8 │ │ │ ├── prefetch │ │ │ ├── PreconditionEnforcer_en.utf8 │ │ │ ├── Preselector_en.utf8 │ │ │ ├── QuotaEnforcer_en.utf8 │ │ │ └── RuntimeLimitEnforcer_en.utf8 │ │ │ ├── processor │ │ │ ├── CrawlMapper_en.utf8 │ │ │ ├── HashCrawlMapper_en.utf8 │ │ │ └── LexicalCrawlMapper_en.utf8 │ │ │ └── restlet │ │ │ ├── Beans.ftl │ │ │ ├── Edit.ftl │ │ │ ├── Engine.ftl │ │ │ ├── Job.ftl │ │ │ ├── Script.ftl │ │ │ ├── css │ │ │ ├── foundation.css │ │ │ ├── foundation.min.css │ │ │ ├── heritrix.css │ │ │ └── normalize.css │ │ │ ├── img │ │ │ └── heritrix-logo.gif │ │ │ ├── js │ │ │ ├── foundation.min.js │ │ │ ├── foundation │ │ │ │ ├── foundation.alerts.js │ │ │ │ ├── foundation.clearing.js │ │ │ │ ├── foundation.cookie.js │ │ │ │ ├── foundation.dropdown.js │ │ │ │ ├── foundation.forms.js │ │ │ │ ├── foundation.joyride.js │ │ │ │ ├── foundation.js │ │ │ │ ├── foundation.magellan.js │ │ │ │ ├── foundation.orbit.js │ │ │ │ ├── foundation.placeholder.js │ │ │ │ ├── foundation.reveal.js │ │ │ │ ├── foundation.section.js │ │ │ │ ├── foundation.tooltips.js │ │ │ │ └── foundation.topbar.js │ │ │ └── vendor │ │ │ │ ├── custom.modernizr.js │ │ │ │ ├── jquery.js │ │ │ │ └── zepto.js │ │ │ ├── profile-crawler-beans.cxml │ │ │ └── profile-crawler-beans.groovy │ ├── test │ │ ├── java │ │ │ └── org │ │ │ │ └── archive │ │ │ │ ├── crawler │ │ │ │ ├── datamodel │ │ │ │ │ ├── CandidateURITest.java │ │ │ │ │ └── CrawlURITest.java │ │ │ │ ├── framework │ │ │ │ │ ├── CrawlControllerTest.java │ │ │ │ │ ├── CrawlLimitEnforcerTest.java │ │ │ │ │ ├── CrawlerProcessorTestBase.java │ │ │ │ │ └── EngineTest.java │ │ │ │ ├── frontier │ │ │ │ │ ├── BdbFrontierTest.java │ │ │ │ │ ├── BdbMultipleWorkQueuesTest.java │ │ │ │ │ ├── BucketQueueAssignmentPolicyTest.java │ │ │ │ │ ├── FrontierJournalTest.java │ │ │ │ │ └── precedence │ │ │ │ │ │ ├── BaseQueuePrecedencePolicyTest.java │ │ │ │ │ │ ├── BaseUriPrecedencePolicyTest.java │ │ │ │ │ │ ├── CostUriPrecedencePolicyTest.java │ │ │ │ │ │ ├── HighestUriQueuePrecedencePolicyTest.java │ │ │ │ │ │ ├── HopsUriPrecedencePolicyTest.java │ │ │ │ │ │ └── SuccessCountsQueuePrecedencePolicyTest.java │ │ │ │ ├── postprocessor │ │ │ │ │ ├── LowDiskPauseProcessorTest.java │ │ │ │ │ └── SupplementaryLinksScoperTest.java │ │ │ │ ├── prefetch │ │ │ │ │ ├── PreconditionEnforcerTest.java │ │ │ │ │ ├── PreselectorTest.java │ │ │ │ │ ├── QuotaEnforcerTest.java │ │ │ │ │ └── RuntimeLimitEnforcerTest.java │ │ │ │ ├── processor │ │ │ │ │ ├── BrowserProcessorTest.java │ │ │ │ │ ├── HashCrawlMapperTest.java │ │ │ │ │ └── LexicalCrawlMapperTest.java │ │ │ │ ├── restlet │ │ │ │ │ ├── ProfileCrawlerBeansTest.java │ │ │ │ │ ├── ScriptingConsoleTest.java │ │ │ │ │ └── XmlMarshallerTest.java │ │ │ │ ├── selftest │ │ │ │ │ ├── BackgroundImageExtractionSelfTestCase.java │ │ │ │ │ ├── BadURIsStopPageParsingSelfTest.java │ │ │ │ │ ├── CharsetSelfTest.java │ │ │ │ │ ├── CheckpointSelfTest.java │ │ │ │ │ ├── FormAuthSelfTest.java │ │ │ │ │ ├── FormAuthServlet.java │ │ │ │ │ ├── FormLoginSelfTest.java │ │ │ │ │ ├── FramesSelfTestCase.java │ │ │ │ │ ├── HttpAuthSelfTest.java │ │ │ │ │ ├── KeyWordProcessor.java │ │ │ │ │ ├── KeyWordUriPrecedencePolicy.java │ │ │ │ │ ├── MaxLinkHopsSelfTest.java │ │ │ │ │ ├── Precedence1SelfTest.java │ │ │ │ │ ├── Precedence2SelfTest.java │ │ │ │ │ ├── Precedence3SelfTest.java │ │ │ │ │ ├── Precedence4SelfTest.java │ │ │ │ │ ├── RandomServlet.java │ │ │ │ │ ├── RandomServletTest.java │ │ │ │ │ ├── SelfTestBase.java │ │ │ │ │ ├── SimpleSelfTest.java │ │ │ │ │ ├── StatisticsSelfTest.java │ │ │ │ │ ├── UserAgentSelfTest.java │ │ │ │ │ ├── UserAgentServlet.java │ │ │ │ │ └── package.html │ │ │ │ └── util │ │ │ │ │ ├── BdbUriUniqFilterTest.java │ │ │ │ │ ├── BloomUriUniqFilterTest.java │ │ │ │ │ ├── FPUriUniqFilterTest.java │ │ │ │ │ └── TopNSetTest.java │ │ │ │ └── modules │ │ │ │ └── fetcher │ │ │ │ └── FormAuthTest.java │ │ └── resources │ │ │ └── logging.properties │ ├── webapps │ │ └── selftest │ │ │ ├── Auth │ │ │ ├── basic │ │ │ │ ├── basic-loggedin.html │ │ │ │ └── index.html │ │ │ ├── form │ │ │ │ ├── get │ │ │ │ │ ├── error.html │ │ │ │ │ ├── get-loggedin.html │ │ │ │ │ ├── index.html │ │ │ │ │ └── success.jsp │ │ │ │ ├── index.html │ │ │ │ └── post │ │ │ │ │ ├── error.html │ │ │ │ │ ├── index.html │ │ │ │ │ ├── post-loggedin.html │ │ │ │ │ └── success.jsp │ │ │ └── index.html │ │ │ ├── BackgroundImageExtraction │ │ │ ├── example-background-image.jpeg │ │ │ └── index.html │ │ │ ├── BadURIsStopPageParsing │ │ │ ├── goodone.html │ │ │ ├── goodthree.html │ │ │ ├── goodtwo.html │ │ │ ├── index.html │ │ │ ├── one.html │ │ │ ├── three.html │ │ │ └── two.html │ │ │ ├── Charset │ │ │ ├── charsetselftest_end.html │ │ │ ├── index.html │ │ │ ├── shiftjis.jsp │ │ │ └── utf8.jsp │ │ │ ├── Checkpoint │ │ │ └── index.html │ │ │ ├── FlashParse │ │ │ ├── index.html │ │ │ ├── pirates.swf │ │ │ └── success.html │ │ │ ├── FormTagExtraction │ │ │ ├── index.html │ │ │ ├── inputtag.html │ │ │ └── optiontag.html │ │ │ ├── Frames │ │ │ ├── index.html │ │ │ ├── leftframe.html │ │ │ ├── noframe.html │ │ │ ├── rightframe.html │ │ │ └── topframe.html │ │ │ ├── MaxLinkHops │ │ │ ├── 1.html │ │ │ ├── 2.html │ │ │ ├── 3.html │ │ │ ├── 4.html │ │ │ ├── 5.html │ │ │ ├── 6.html │ │ │ └── index.html │ │ │ ├── Refresh │ │ │ ├── index.html │ │ │ ├── refresh.html │ │ │ └── refresh2.html │ │ │ ├── RobotsExclusion │ │ │ ├── README.txt │ │ │ ├── excluded.html │ │ │ ├── excluded │ │ │ │ └── level2 │ │ │ │ │ └── level3 │ │ │ │ │ └── excluded.html │ │ │ ├── included.html │ │ │ └── index.html │ │ │ ├── SimpleDocumentTypes │ │ │ ├── binaries │ │ │ │ ├── avi.avi │ │ │ │ ├── doc.doc │ │ │ │ ├── jpg.jpg │ │ │ │ ├── mp3.mp3 │ │ │ │ ├── pdf.pdf │ │ │ │ ├── ppt.ppt │ │ │ │ ├── ps.ps │ │ │ │ ├── rtf.rtf │ │ │ │ ├── wav.wav │ │ │ │ ├── wpd.wpd │ │ │ │ └── xls.xls │ │ │ ├── html.html │ │ │ ├── index.html │ │ │ └── txt.txt │ │ │ ├── SimpleJavascriptExtraction │ │ │ ├── index.html │ │ │ ├── jscriptOpenWindow.html │ │ │ └── jscriptOpenWindowArg.html │ │ │ ├── SpacesInHrefPath │ │ │ ├── index.html │ │ │ └── spaces in path.html │ │ │ ├── TrickyRelativeURIs │ │ │ ├── anothersub │ │ │ │ └── reluptarget.html │ │ │ ├── index.html │ │ │ └── reluptricky.html │ │ │ ├── WEB-INF │ │ │ └── web.xml │ │ │ ├── index.jsp │ │ │ └── robots.txt │ └── xsd │ │ └── arc │ │ └── 1.0 │ │ ├── arc.html │ │ ├── arc.xsd │ │ └── example.xml └── testdata │ └── selftest │ ├── BackgroundImageExtractionSelfTestCase │ └── htdocs │ │ ├── example-background-image.jpeg │ │ └── index.html │ ├── BadURIsStopPageParsingSelfTest │ └── htdocs │ │ ├── goodone.html │ │ ├── goodthree.html │ │ ├── goodtwo.html │ │ ├── index.html │ │ ├── one.html │ │ ├── three.html │ │ └── two.html │ ├── CharsetSelfTest │ └── htdocs │ │ ├── index.html │ │ └── link.html │ ├── CheckpointSelfTest │ └── profile │ │ ├── config.txt │ │ ├── seeds.txt │ │ └── sheets │ │ └── default.single │ ├── FlashParseSelfTest │ └── htdocs │ │ ├── index.html │ │ ├── pirates.swf │ │ └── success.html │ ├── FormAuthSelfTest │ └── htdocs │ │ ├── failure.html │ │ ├── index.html │ │ ├── link1.html │ │ ├── link2.html │ │ ├── link3.html │ │ └── success.html │ ├── FormLoginSelfTest │ └── htdocs │ │ ├── failure.html │ │ ├── index.html │ │ ├── link1.html │ │ ├── link2.html │ │ ├── link3.html │ │ └── success.html │ ├── FramesSelfTestCase │ └── htdocs │ │ ├── index.html │ │ ├── leftframe.html │ │ ├── noframe.html │ │ ├── rightframe.html │ │ └── topframe.html │ ├── HttpAuthSelfTest │ └── htdocs │ │ ├── basic │ │ ├── index.html │ │ ├── link1.html │ │ ├── link2.html │ │ └── link3.html │ │ ├── failure.html │ │ ├── index.html │ │ ├── link1.html │ │ ├── link2.html │ │ ├── link3.html │ │ └── success.html │ ├── MaxLinkHopsSelfTest │ └── htdocs │ │ ├── 1.html │ │ ├── 2.html │ │ ├── 3.html │ │ ├── 4.html │ │ ├── 5.html │ │ └── index.html │ ├── Precedence1SelfTest │ └── htdocs │ │ ├── five │ │ ├── a.html │ │ └── b.html │ │ ├── one │ │ ├── a.html │ │ └── b.html │ │ ├── seed.html │ │ └── ten │ │ ├── a.html │ │ └── b.html │ ├── Precedence2SelfTest │ ├── htdocs │ │ ├── five │ │ │ ├── a.html │ │ │ └── b.html │ │ ├── one │ │ │ ├── a.html │ │ │ └── b.html │ │ ├── seed.html │ │ └── ten │ │ │ ├── a.html │ │ │ └── b.html │ └── profile │ │ └── rank.txt │ ├── Precedence3SelfTest │ └── htdocs │ │ ├── A.html │ │ ├── B.html │ │ ├── C.html │ │ ├── D.html │ │ ├── E.html │ │ ├── F.html │ │ ├── G.html │ │ ├── H.html │ │ ├── I.html │ │ ├── J.html │ │ ├── K.html │ │ ├── L.html │ │ ├── M.html │ │ ├── N.html │ │ └── O.html │ ├── Precedence4SelfTest │ └── htdocs │ │ ├── five │ │ ├── a.html │ │ └── b.html │ │ ├── one │ │ ├── a.html │ │ └── b.html │ │ ├── seed.html │ │ └── ten │ │ ├── a.html │ │ └── b.html │ ├── SimpleSelfTest │ └── htdocs │ │ ├── index.html │ │ ├── link1.html │ │ ├── link2.html │ │ └── link3.html │ ├── StatisticsSelfTest │ └── htdocs │ │ ├── a.html │ │ ├── b.html │ │ ├── b1.html │ │ ├── b2.html │ │ └── b3.html │ ├── UserAgentSelfTest │ └── .gitignore │ └── conf │ ├── heritrix.properties │ └── selftest-crawler-beans.cxml ├── modules ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── archive │ │ │ ├── crawler │ │ │ └── util │ │ │ │ └── CrawledBytesHistotable.java │ │ │ ├── modules │ │ │ ├── CandidateChain.java │ │ │ ├── CoreAttributeConstants.java │ │ │ ├── CrawlMetadata.java │ │ │ ├── CrawlURI.java │ │ │ ├── DispositionChain.java │ │ │ ├── FetchChain.java │ │ │ ├── ProcessResult.java │ │ │ ├── Processor.java │ │ │ ├── ProcessorChain.java │ │ │ ├── ProcessorTestBase.java │ │ │ ├── SchedulingConstants.java │ │ │ ├── ScriptedProcessor.java │ │ │ ├── SimpleFileLoggerProvider.java │ │ │ ├── behaviors │ │ │ │ ├── Behavior.java │ │ │ │ ├── ExtractLinksBehavior.java │ │ │ │ ├── Page.java │ │ │ │ └── ScrollDownBehavior.java │ │ │ ├── canonicalize │ │ │ │ ├── BaseRule.java │ │ │ │ ├── CanonicalizationRule.java │ │ │ │ ├── FixupQueryString.java │ │ │ │ ├── LowercaseRule.java │ │ │ │ ├── RegexRule.java │ │ │ │ ├── RulesCanonicalizationPolicy.java │ │ │ │ ├── StripExtraSlashes.java │ │ │ │ ├── StripSessionCFIDs.java │ │ │ │ ├── StripSessionIDs.java │ │ │ │ ├── StripUserinfoRule.java │ │ │ │ ├── StripWWWNRule.java │ │ │ │ ├── StripWWWRule.java │ │ │ │ └── UriCanonicalizationPolicy.java │ │ │ ├── credential │ │ │ │ ├── Credential.java │ │ │ │ ├── CredentialStore.java │ │ │ │ ├── HtmlFormCredential.java │ │ │ │ ├── HttpAuthenticationCredential.java │ │ │ │ └── package.html │ │ │ ├── deciderules │ │ │ │ ├── AcceptDecideRule.java │ │ │ │ ├── AddRedirectFromRootServerToScope.java │ │ │ │ ├── ContentLengthDecideRule.java │ │ │ │ ├── ContentTypeMatchesRegexDecideRule.java │ │ │ │ ├── ContentTypeNotMatchesRegexDecideRule.java │ │ │ │ ├── DecideResult.java │ │ │ │ ├── DecideRule.java │ │ │ │ ├── DecideRuleSequence.java │ │ │ │ ├── ExternalGeoLocationDecideRule.java │ │ │ │ ├── ExternalGeoLookupInterface.java │ │ │ │ ├── FetchStatusDecideRule.java │ │ │ │ ├── FetchStatusMatchesRegexDecideRule.java │ │ │ │ ├── FetchStatusNotMatchesRegexDecideRule.java │ │ │ │ ├── HasViaDecideRule.java │ │ │ │ ├── HopCrossesAssignmentLevelDomainDecideRule.java │ │ │ │ ├── HopsPathMatchesRegexDecideRule.java │ │ │ │ ├── IpAddressSetDecideRule.java │ │ │ │ ├── MatchesFilePatternDecideRule.java │ │ │ │ ├── MatchesListRegexDecideRule.java │ │ │ │ ├── MatchesRegexDecideRule.java │ │ │ │ ├── MatchesStatusCodeDecideRule.java │ │ │ │ ├── NotMatchesFilePatternDecideRule.java │ │ │ │ ├── NotMatchesListRegexDecideRule.java │ │ │ │ ├── NotMatchesRegexDecideRule.java │ │ │ │ ├── NotMatchesStatusCodeDecideRule.java │ │ │ │ ├── PathologicalPathDecideRule.java │ │ │ │ ├── PredicatedDecideRule.java │ │ │ │ ├── PrerequisiteAcceptDecideRule.java │ │ │ │ ├── RejectDecideRule.java │ │ │ │ ├── ResourceLongerThanDecideRule.java │ │ │ │ ├── ResourceNoLongerThanDecideRule.java │ │ │ │ ├── ResponseContentLengthDecideRule.java │ │ │ │ ├── SchemeNotInSetDecideRule.java │ │ │ │ ├── ScriptedDecideRule.java │ │ │ │ ├── SeedAcceptDecideRule.java │ │ │ │ ├── SourceSeedDecideRule.java │ │ │ │ ├── TooManyHopsDecideRule.java │ │ │ │ ├── TooManyPathSegmentsDecideRule.java │ │ │ │ ├── TransclusionDecideRule.java │ │ │ │ ├── ViaSurtPrefixedDecideRule.java │ │ │ │ ├── recrawl │ │ │ │ │ └── IdenticalDigestDecideRule.java │ │ │ │ └── surt │ │ │ │ │ ├── NotOnDomainsDecideRule.java │ │ │ │ │ ├── NotOnHostsDecideRule.java │ │ │ │ │ ├── NotSurtPrefixedDecideRule.java │ │ │ │ │ ├── OnDomainsDecideRule.java │ │ │ │ │ ├── OnHostsDecideRule.java │ │ │ │ │ └── SurtPrefixedDecideRule.java │ │ │ ├── extractor │ │ │ │ ├── AggressiveExtractorHTML.java │ │ │ │ ├── ConfigurableExtractorJS.java │ │ │ │ ├── ContentExtractor.java │ │ │ │ ├── ContentExtractorTestBase.java │ │ │ │ ├── CustomSWFTags.java │ │ │ │ ├── Extractor.java │ │ │ │ ├── ExtractorCSS.java │ │ │ │ ├── ExtractorDOC.java │ │ │ │ ├── ExtractorHTML.java │ │ │ │ ├── ExtractorHTTP.java │ │ │ │ ├── ExtractorImpliedURI.java │ │ │ │ ├── ExtractorJS.java │ │ │ │ ├── ExtractorMultipleRegex.java │ │ │ │ ├── ExtractorPDF.java │ │ │ │ ├── ExtractorParameters.java │ │ │ │ ├── ExtractorRobotsTxt.java │ │ │ │ ├── ExtractorSWF.java │ │ │ │ ├── ExtractorSitemap.java │ │ │ │ ├── ExtractorURI.java │ │ │ │ ├── ExtractorUniversal.java │ │ │ │ ├── ExtractorXML.java │ │ │ │ ├── HTMLLinkContext.java │ │ │ │ ├── HTTPContentDigest.java │ │ │ │ ├── Hop.java │ │ │ │ ├── JerichoExtractorHTML.java │ │ │ │ ├── LinkContext.java │ │ │ │ ├── PDFParser.java │ │ │ │ ├── StringExtractorTestBase.java │ │ │ │ ├── TempDirProvider.java │ │ │ │ ├── TrapSuppressExtractor.java │ │ │ │ └── UriErrorLoggerModule.java │ │ │ ├── fetcher │ │ │ │ ├── AbstractCookieStore.java │ │ │ │ ├── BasicExecutionAwareEntityEnclosingRequest.java │ │ │ │ ├── BasicExecutionAwareRequest.java │ │ │ │ ├── BdbCookieStore.java │ │ │ │ ├── DefaultServerCache.java │ │ │ │ ├── FetchDNS.java │ │ │ │ ├── FetchErrors.java │ │ │ │ ├── FetchFTP.java │ │ │ │ ├── FetchHTTP.java │ │ │ │ ├── FetchHTTP2.java │ │ │ │ ├── FetchHTTPCookieStore.java │ │ │ │ ├── FetchHTTPRequest.java │ │ │ │ ├── FetchSFTP.java │ │ │ │ ├── FetchStats.java │ │ │ │ ├── FetchStatusCodes.java │ │ │ │ ├── FetchWhois.java │ │ │ │ ├── HostResolver.java │ │ │ │ ├── SimpleCookieStore.java │ │ │ │ ├── SocksSSLSocketFactory.java │ │ │ │ ├── SocksSocketFactory.java │ │ │ │ └── UserAgentProvider.java │ │ │ ├── forms │ │ │ │ ├── ExtractorHTMLForms.java │ │ │ │ ├── FormLoginProcessor.java │ │ │ │ └── HTMLForm.java │ │ │ ├── net │ │ │ │ ├── BdbServerCache.java │ │ │ │ ├── CrawlHost.java │ │ │ │ ├── CrawlServer.java │ │ │ │ ├── CustomRobotsPolicy.java │ │ │ │ ├── DefaultTempDirProvider.java │ │ │ │ ├── FirstNamedRobotsPolicy.java │ │ │ │ ├── IgnoreRobotsPolicy.java │ │ │ │ ├── MostFavoredRobotsPolicy.java │ │ │ │ ├── ObeyRobotsPolicy.java │ │ │ │ ├── RobotsDirectives.java │ │ │ │ ├── RobotsPolicy.java │ │ │ │ ├── RobotsTxtOnlyPolicy.java │ │ │ │ ├── Robotstxt.java │ │ │ │ └── ServerCache.java │ │ │ ├── package-info.java │ │ │ ├── recrawl │ │ │ │ ├── AbstractContentDigestHistory.java │ │ │ │ ├── AbstractPersistProcessor.java │ │ │ │ ├── BdbContentDigestHistory.java │ │ │ │ ├── ContentDigestHistoryLoader.java │ │ │ │ ├── ContentDigestHistoryStorer.java │ │ │ │ ├── FetchHistoryProcessor.java │ │ │ │ ├── PersistLoadProcessor.java │ │ │ │ ├── PersistLogProcessor.java │ │ │ │ ├── PersistOnlineProcessor.java │ │ │ │ ├── PersistProcessor.java │ │ │ │ ├── PersistStoreProcessor.java │ │ │ │ └── RecrawlAttributeConstants.java │ │ │ ├── revisit │ │ │ │ ├── AbstractProfile.java │ │ │ │ ├── IdenticalPayloadDigestRevisit.java │ │ │ │ ├── RevisitProfile.java │ │ │ │ └── ServerNotModifiedRevisit.java │ │ │ ├── seeds │ │ │ │ ├── SeedListener.java │ │ │ │ ├── SeedModule.java │ │ │ │ └── TextSeedModule.java │ │ │ ├── warc │ │ │ │ ├── BaseWARCRecordBuilder.java │ │ │ │ ├── DnsResponseRecordBuilder.java │ │ │ │ ├── FtpControlConversationRecordBuilder.java │ │ │ │ ├── FtpResponseRecordBuilder.java │ │ │ │ ├── HttpRequestRecordBuilder.java │ │ │ │ ├── HttpResponseRecordBuilder.java │ │ │ │ ├── MetadataRecordBuilder.java │ │ │ │ ├── RevisitRecordBuilder.java │ │ │ │ ├── WARCRecordBuilder.java │ │ │ │ └── WhoisResponseRecordBuilder.java │ │ │ └── writer │ │ │ │ ├── ARCWriterProcessor.java │ │ │ │ ├── BaseWARCWriterProcessor.java │ │ │ │ ├── Kw3Constants.java │ │ │ │ ├── Kw3WriterProcessor.java │ │ │ │ ├── MirrorWriterProcessor.java │ │ │ │ ├── WARCWriterChainProcessor.java │ │ │ │ ├── WARCWriterProcessor.java │ │ │ │ └── WriterPoolProcessor.java │ │ │ └── state │ │ │ └── ModuleTestBase.java │ └── resources │ │ └── org │ │ └── archive │ │ └── modules │ │ ├── BeanShellProcessor_en.utf8 │ │ ├── Processor_en.utf8 │ │ ├── canonicalize │ │ ├── BaseRule_en.utf8 │ │ ├── FixupQueryString_en.utf8 │ │ ├── LowercaseRule_en.utf8 │ │ ├── RegexRule_en.utf8 │ │ ├── StripExtraSlashes_en.utf8 │ │ ├── StripSessionCFIDs_en.utf8 │ │ ├── StripSessionIDs_en.utf8 │ │ ├── StripUserinfoRule_en.utf8 │ │ ├── StripWWWNRule_en.utf8 │ │ └── StripWWWRule_en.utf8 │ │ ├── credential │ │ ├── CredentialStore_en.utf8 │ │ ├── Credential_en.utf8 │ │ ├── HtmlFormCredential_en.utf8 │ │ └── Rfc2617Credential_en.utf8 │ │ ├── deciderules │ │ ├── DecideRuleSequence_en.utf8 │ │ ├── DecideRule_en.utf8 │ │ ├── HasViaDecideRule_en.utf8 │ │ ├── HopsPathMatchesRegExpDecideRule_en.utf8 │ │ ├── MatchesRegExpDecideRule_en.utf8 │ │ └── PredicatedAcceptDecideRule_en.utf8 │ │ ├── extractor │ │ ├── AggressiveExtractorHTML_en.utf8 │ │ ├── ExtractorCSS_en.utf8 │ │ ├── ExtractorDOC_en.utf8 │ │ ├── ExtractorHTML_en.utf8 │ │ ├── ExtractorHTTP_en.utf8 │ │ ├── ExtractorImpliedURI_en.utf8 │ │ ├── ExtractorJS_en.utf8 │ │ ├── ExtractorPDF_en.utf8 │ │ ├── ExtractorSWF_en.utf8 │ │ ├── ExtractorURI_en.utf8 │ │ ├── ExtractorUniversal_en.utf8 │ │ ├── ExtractorXML_en.utf8 │ │ ├── Extractor_en.utf8 │ │ ├── HTTPContentDigest_en.utf8 │ │ └── TrapSuppressExtractor_en.utf8 │ │ ├── fetcher │ │ ├── FetchDNS_en.utf8 │ │ ├── FetchFTP_en.utf8 │ │ └── FetchHTTP_en.utf8 │ │ ├── net │ │ └── RobotsHonoringPolicy_en.utf8 │ │ ├── recrawl │ │ ├── FetchHistoryProcessor_en.utf8 │ │ ├── PersistLoadProcessor_en.utf8 │ │ ├── PersistLogProcessor_en.utf8 │ │ ├── PersistOnlineProcessor_en.utf8 │ │ └── PersistStoreProcessor_en.utf8 │ │ └── writer │ │ ├── ARCWriterProcessor_en.utf8 │ │ ├── MirrorWriterProcessor_en.utf8 │ │ ├── WARCWriterProcessor_en.utf8 │ │ ├── WriterPoolProcessor_en.utf8 │ │ └── arc_metadata_template.xml │ └── test │ ├── java │ └── org │ │ └── archive │ │ └── modules │ │ ├── ScriptedProcessorTest.java │ │ ├── canonicalize │ │ ├── FixupQueryStringTest.java │ │ ├── LowercaseRuleTest.java │ │ ├── RegexRuleTest.java │ │ ├── RulesCanonicalizationPolicyTest.java │ │ ├── StripSessionCFIDsTest.java │ │ ├── StripSessionIDsTest.java │ │ ├── StripUserinfoRuleTest.java │ │ ├── StripWWWNRuleTest.java │ │ └── StripWWWRuleTest.java │ │ ├── credential │ │ ├── CredentialStoreTest.java │ │ ├── HtmlFormCredentialTest.java │ │ └── HttpAuthenticationCredentialTest.java │ │ ├── deciderules │ │ ├── DecideRuleSequenceTest.java │ │ ├── HasViaDecideRuleTest.java │ │ ├── HopsPathMatchesRegexDecideRuleTest.java │ │ ├── MatchesListRegexDecideRuleTest.java │ │ ├── MatchesStatusCodeDecideRuleTest.java │ │ ├── NotMatchesStatusCodeDecideRuleTest.java │ │ └── ViaSurtPrefixedDecideRuleTest.java │ │ ├── extractor │ │ ├── AggressiveExtractorHTMLTest.java │ │ ├── ExtractorCSSTest.java │ │ ├── ExtractorDOCTest.java │ │ ├── ExtractorHTMLTest.java │ │ ├── ExtractorHTTPTest.java │ │ ├── ExtractorImpliedURITest.java │ │ ├── ExtractorJSTest.java │ │ ├── ExtractorMultipleRegexTest.java │ │ ├── ExtractorPDFTest.java │ │ ├── ExtractorSWFTest.java │ │ ├── ExtractorURITest.java │ │ ├── ExtractorUniversalTest.java │ │ ├── ExtractorXMLTest.java │ │ ├── HTTPContentDigestTest.java │ │ ├── JerichoExtractorHTMLTest.java │ │ ├── PDFParserTest.java │ │ └── UnitTestUriLoggerModule.java │ │ ├── fetcher │ │ ├── CookieFetchHTTPIntegrationTest.java │ │ ├── CookieStoreTest.java │ │ ├── FetchDNSTest.java │ │ ├── FetchFTPTest.java │ │ ├── FetchHTTP2Test.java │ │ ├── FetchHTTPTest.java │ │ └── FetchHTTPTestServers.java │ │ ├── forms │ │ └── FormLoginProcessorTest.java │ │ ├── net │ │ ├── CrawlHostTest.java │ │ ├── CrawlServerTest.java │ │ ├── FirstNamedRobotsPolicyTest.java │ │ ├── RobotsPolicyTest.java │ │ ├── RobotstxtTest.java │ │ └── ServerCacheTest.java │ │ ├── recrawl │ │ ├── ContentDigestHistoryTest.java │ │ ├── FetchHistoryProcessorTest.java │ │ ├── PersistLoadProcessorTest.java │ │ ├── PersistLogProcessorTest.java │ │ └── PersistStoreProcessorTest.java │ │ └── writer │ │ ├── ARCWriterProcessorTest.java │ │ ├── MirrorWriterProcessorTest.java │ │ ├── WARCWriterChainProcessorTest.java │ │ └── WARCWriterProcessorTest.java │ └── resources │ └── org │ └── archive │ └── crawler │ └── modules │ └── extractor │ └── PDFParserTest.pdf └── pom.xml /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/.github/workflows/docker-image.yml -------------------------------------------------------------------------------- /.github/workflows/maven.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/.github/workflows/maven.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/.gitignore -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/.readthedocs.yml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/README.md -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/RELEASING.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/SECURITY.md -------------------------------------------------------------------------------- /commons/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/pom.xml -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/bdb/AutoKryo.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/bdb/AutoKryo.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/bdb/BdbModule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/bdb/BdbModule.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/bdb/DisposableStoredSortedMap.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/bdb/DisposableStoredSortedMap.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/bdb/KryoBinding.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/bdb/KryoBinding.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/bdb/StoredQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/bdb/StoredQueue.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/checkpointing/Checkpoint.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/checkpointing/Checkpoint.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/checkpointing/Checkpointable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/checkpointing/Checkpointable.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/io/Arc2Warc.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/io/Arc2Warc.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/io/CrawlerJournal.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/io/CrawlerJournal.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/io/ReadSourceEditor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/io/ReadSourceEditor.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/io/Warc2Arc.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/io/Warc2Arc.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/ClientFTP.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/ClientFTP.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/ClientSFTP.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/ClientSFTP.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/MitmProxy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/MitmProxy.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/UURI.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/UURI.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/UURIFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/UURIFactory.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/BiDiEvent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/BiDiEvent.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/BiDiJson.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/BiDiJson.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/BiDiModule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/BiDiModule.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/Browser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/Browser.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/BrowsingContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/BrowsingContext.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/LocalWebDriverBiDi.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/LocalWebDriverBiDi.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/Network.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/Network.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/Script.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/Script.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/Session.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/Session.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/WebDriverBiDi.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/WebDriverBiDi.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/net/webdriver/WebDriverException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/net/webdriver/WebDriverException.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/BeanFieldsPatternValidator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/BeanFieldsPatternValidator.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/ConfigFile.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/ConfigFile.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/ConfigFileEditor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/ConfigFileEditor.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/ConfigPath.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/ConfigPath.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/ConfigPathConfigurer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/ConfigPathConfigurer.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/ConfigPathEditor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/ConfigPathEditor.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/ConfigString.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/ConfigString.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/HasKeyedProperties.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/HasKeyedProperties.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/HasValidator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/HasValidator.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/HeritrixLifecycleProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/HeritrixLifecycleProcessor.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/KeyedProperties.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/KeyedProperties.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/OverlayContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/OverlayContext.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/OverlayMapsSource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/OverlayMapsSource.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/PathSharingContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/PathSharingContext.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/Required.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/Required.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/Sheet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/Sheet.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/spring/WriteTarget.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/spring/WriteTarget.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/surt/SURTTokenizer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/surt/SURTTokenizer.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/AbstractLongFPSet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/AbstractLongFPSet.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/Base32.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/Base32.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/BloomFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/BloomFilter.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/BloomFilter64bit.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/BloomFilter64bit.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/Histotable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/Histotable.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/IdentityCacheable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/IdentityCacheable.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/IdleBarrier.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/IdleBarrier.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/Iteratorable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/Iteratorable.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/JSONUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/JSONUtils.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/JndiUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/JndiUtils.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/KeyTool.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/KeyTool.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/LRU.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/LRU.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/LongToIntConsistentHash.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/LongToIntConsistentHash.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ObjectIdentityBdbManualCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ObjectIdentityBdbManualCache.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ObjectIdentityCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ObjectIdentityCache.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ObjectIdentityMemCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ObjectIdentityMemCache.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/OneLineSimpleLogger.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/OneLineSimpleLogger.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/PaddingStringBuffer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/PaddingStringBuffer.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/PrefixFinder.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/PrefixFinder.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ReportUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ReportUtils.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/Supplier.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/Supplier.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/UriUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/UriUtils.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/bdbje/EnhancedEnvironment.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/bdbje/EnhancedEnvironment.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/fingerprint/ArrayLongFPCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/fingerprint/ArrayLongFPCache.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/fingerprint/LongFPSet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/fingerprint/LongFPSet.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/fingerprint/LongFPSetCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/fingerprint/LongFPSetCache.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/fingerprint/MemLongFPSet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/fingerprint/MemLongFPSet.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/iterator/CompositeIterator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/iterator/CompositeIterator.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/BlockFileSystem.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/BlockFileSystem.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/BlockInputStream.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/BlockInputStream.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/Cp1252.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/Cp1252.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/DefaultBlockFileSystem.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/DefaultBlockFileSystem.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/DefaultEntry.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/DefaultEntry.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/Doc.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/Doc.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/Entry.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/Entry.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/HeaderBlock.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/HeaderBlock.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/Piece.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/Piece.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/PieceReader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/PieceReader.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/PieceTable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/PieceTable.java -------------------------------------------------------------------------------- /commons/src/main/java/org/archive/util/ms/package.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/org/archive/util/ms/package.html -------------------------------------------------------------------------------- /commons/src/main/java/st/ata/util/FPGenerator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/java/st/ata/util/FPGenerator.java -------------------------------------------------------------------------------- /commons/src/main/resources/org/archive/i18n/LocaleCache_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/resources/org/archive/i18n/LocaleCache_en.utf8 -------------------------------------------------------------------------------- /commons/src/main/resources/org/archive/i18n/LocaleCache_en_CA.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/resources/org/archive/i18n/LocaleCache_en_CA.utf8 -------------------------------------------------------------------------------- /commons/src/main/resources/org/archive/i18n/LocaleCache_jp.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/main/resources/org/archive/i18n/LocaleCache_jp.utf8 -------------------------------------------------------------------------------- /commons/src/main/resources/org/archive/util/timestamp.txt: -------------------------------------------------------------------------------- 1 | ${build.timestamp} 2 | -------------------------------------------------------------------------------- /commons/src/main/resources/org/archive/util/version.txt: -------------------------------------------------------------------------------- 1 | ${pom.version} 2 | -------------------------------------------------------------------------------- /commons/src/site/overview.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/site/overview.html -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/bdb/StoredQueueTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/bdb/StoredQueueTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/io/ArchiveTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/io/ArchiveTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/net/MitmProxyTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/net/MitmProxyTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/settings/file/BdbModuleTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/settings/file/BdbModuleTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/settings/file/PrefixFinderTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/settings/file/PrefixFinderTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/spring/PathSharingContextTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/spring/PathSharingContextTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/surt/SURTTokenizerTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/surt/SURTTokenizerTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/BenchmarkBlooms.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/BenchmarkBlooms.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/BloomFilter64bitTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/BloomFilter64bitTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/BloomFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/BloomFilterTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/IdentityCacheableWrapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/IdentityCacheableWrapper.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/LongToIntConsistentHashTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/LongToIntConsistentHashTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ObjectIdentityBdbManualCacheTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/ObjectIdentityBdbManualCacheTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/PaddingStringBufferTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/PaddingStringBufferTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/SURTTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/SURTTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/SurtPrefixSetTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/SurtPrefixSetTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/TextUtilsTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/TextUtilsTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/UriUtilsTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/UriUtilsTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/fingerprint/ArrayLongFPCacheTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/fingerprint/ArrayLongFPCacheTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/fingerprint/LongFPSetCacheTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/fingerprint/LongFPSetCacheTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/fingerprint/LongFPSetTestCase.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/fingerprint/LongFPSetTestCase.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/fingerprint/MemLongFPSetTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/fingerprint/MemLongFPSetTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ms/15336-doc-preface.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/ms/15336-doc-preface.doc -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ms/15336-doc-preface.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/ms/15336-doc-preface.txt -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ms/DocTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/ms/DocTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ms/PieceReaderTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/ms/PieceReaderTest.java -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ms/X.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/java/org/archive/util/ms/X.doc -------------------------------------------------------------------------------- /commons/src/test/java/org/archive/util/ms/X.txt: -------------------------------------------------------------------------------- 1 | X -------------------------------------------------------------------------------- /commons/src/test/resources/org/archive/settings/path/anonymous.resolved.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/resources/org/archive/settings/path/anonymous.resolved.txt -------------------------------------------------------------------------------- /commons/src/test/resources/org/archive/settings/path/global.get.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/resources/org/archive/settings/path/global.get.txt -------------------------------------------------------------------------------- /commons/src/test/resources/org/archive/settings/path/global.resolved.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/resources/org/archive/settings/path/global.resolved.txt -------------------------------------------------------------------------------- /commons/src/test/resources/org/archive/settings/path/o1.get.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/resources/org/archive/settings/path/o1.get.txt -------------------------------------------------------------------------------- /commons/src/test/resources/org/archive/settings/path/o1.resolved.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/commons/src/test/resources/org/archive/settings/path/o1.resolved.txt -------------------------------------------------------------------------------- /contrib/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/pom.xml -------------------------------------------------------------------------------- /contrib/src/main/assembly/dist.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/assembly/dist.xml -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/crawler/event/AMQPUrlPublishedEvent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/crawler/event/AMQPUrlPublishedEvent.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/crawler/event/AMQPUrlReceivedEvent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/crawler/event/AMQPUrlReceivedEvent.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/crawler/frontier/AMQPUrlReceiver.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/crawler/frontier/AMQPUrlReceiver.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/crawler/prefetch/HostQuotaEnforcer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/crawler/prefetch/HostQuotaEnforcer.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/crawler/prefetch/SourceQuotaEnforcer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/crawler/prefetch/SourceQuotaEnforcer.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/AMQPProducer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/AMQPProducer.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/AMQPProducerProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/AMQPProducerProcessor.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/AMQPPublishProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/AMQPPublishProcessor.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/AMQPUrlWaiter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/AMQPUrlWaiter.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/extractor/ExtractorPDFContent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/extractor/ExtractorPDFContent.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/modules/recrawl/FetchHistoryHelper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/modules/recrawl/FetchHistoryHelper.java -------------------------------------------------------------------------------- /contrib/src/main/java/org/archive/trough/TroughClient.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/main/java/org/archive/trough/TroughClient.java -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorPDFContentTest1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorPDFContentTest1.pdf -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorPDFContentTest2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorPDFContentTest2.pdf -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorPDFContentTest3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorPDFContentTest3.pdf -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorPDFContentTest4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorPDFContentTest4.pdf -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorYoutubeChannelFormatStream.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorYoutubeChannelFormatStream.txt -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorYoutubeDL.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorYoutubeDL.json -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorYoutubeFormatStream.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorYoutubeFormatStream.txt -------------------------------------------------------------------------------- /contrib/src/test/resources/ExtractorYoutubeFormatStream2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/contrib/src/test/resources/ExtractorYoutubeFormatStream2.txt -------------------------------------------------------------------------------- /dist/LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/LICENSE.txt -------------------------------------------------------------------------------- /dist/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/pom.xml -------------------------------------------------------------------------------- /dist/src/main/assembly/dist.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/assembly/dist.xml -------------------------------------------------------------------------------- /dist/src/main/assembly/src.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/assembly/src.xml -------------------------------------------------------------------------------- /dist/src/main/bin/arcreader: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/arcreader -------------------------------------------------------------------------------- /dist/src/main/bin/arcreader.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/arcreader.cmd -------------------------------------------------------------------------------- /dist/src/main/bin/dependencies.xsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/dependencies.xsl -------------------------------------------------------------------------------- /dist/src/main/bin/extractor: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/extractor -------------------------------------------------------------------------------- /dist/src/main/bin/extractor.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/extractor.cmd -------------------------------------------------------------------------------- /dist/src/main/bin/foreground_heritrix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/foreground_heritrix -------------------------------------------------------------------------------- /dist/src/main/bin/foreground_heritrix.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/foreground_heritrix.cmd -------------------------------------------------------------------------------- /dist/src/main/bin/heritrix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/heritrix -------------------------------------------------------------------------------- /dist/src/main/bin/heritrix.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/heritrix.cmd -------------------------------------------------------------------------------- /dist/src/main/bin/hoppath.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/hoppath.pl -------------------------------------------------------------------------------- /dist/src/main/bin/htmlextractor: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/htmlextractor -------------------------------------------------------------------------------- /dist/src/main/bin/htmlextractor.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/htmlextractor.cmd -------------------------------------------------------------------------------- /dist/src/main/bin/make_reports.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/make_reports.pl -------------------------------------------------------------------------------- /dist/src/main/bin/manifest_bundle.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/manifest_bundle.pl -------------------------------------------------------------------------------- /dist/src/main/bin/xdocToTxt.xsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/bin/xdocToTxt.xsl -------------------------------------------------------------------------------- /dist/src/main/conf/heritrix.cacerts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/conf/heritrix.cacerts -------------------------------------------------------------------------------- /dist/src/main/conf/jobs/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/conf/jobs/.gitignore -------------------------------------------------------------------------------- /dist/src/main/conf/logging.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/conf/logging.properties -------------------------------------------------------------------------------- /dist/src/main/extras/pagerank/GenGraph.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/extras/pagerank/GenGraph.java -------------------------------------------------------------------------------- /dist/src/main/extras/pagerank/PageRank.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/extras/pagerank/PageRank.java -------------------------------------------------------------------------------- /dist/src/main/extras/pagerank/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/extras/pagerank/README.txt -------------------------------------------------------------------------------- /dist/src/main/extras/pagerank/assignUrlIndex.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/extras/pagerank/assignUrlIndex.pl -------------------------------------------------------------------------------- /dist/src/main/extras/pagerank/pageRankSetup.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/extras/pagerank/pageRankSetup.pl -------------------------------------------------------------------------------- /dist/src/main/extras/pagerank/run-pr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/extras/pagerank/run-pr.sh -------------------------------------------------------------------------------- /dist/src/main/licenses/ant.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/ant.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/ant.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/ant.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/bsh.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/bsh.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-cli.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-cli.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-cli.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-cli.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-codec.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-codec.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-codec.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-codec.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-el.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-el.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-el.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-el.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-io.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-io.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-io.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-io.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-lang.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-lang.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-lang.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-lang.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-logging.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-logging.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-net.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-net.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/commons-net.NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/commons-net.NOTICE -------------------------------------------------------------------------------- /dist/src/main/licenses/dnsjava-2.0.3.README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/dnsjava-2.0.3.README -------------------------------------------------------------------------------- /dist/src/main/licenses/fastutil-5.0.7.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/fastutil-5.0.7.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/jasper.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/jasper.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/javaswf.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/javaswf.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/je-3.2.44.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/je-3.2.44.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/jericho-html-2.3.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/jericho-html-2.3.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/jets3t-0.5.0.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/jets3t-0.5.0.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/jetty.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/jetty.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/junit.LICENSE.HTML: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/junit.LICENSE.HTML -------------------------------------------------------------------------------- /dist/src/main/licenses/libidn.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/libidn.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/oro-2.0.8.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/oro-2.0.8.LICENSE -------------------------------------------------------------------------------- /dist/src/main/licenses/servlet-4.1.34.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/main/licenses/servlet-4.1.34.LICENSE -------------------------------------------------------------------------------- /dist/src/test/java/org/archive/crawler/BasicProfileTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/dist/src/test/java/org/archive/crawler/BasicProfileTest.java -------------------------------------------------------------------------------- /docgen/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docgen/pom.xml -------------------------------------------------------------------------------- /docgen/src/main/java/org/archive/crawler/BeanDocProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docgen/src/main/java/org/archive/crawler/BeanDocProcessor.java -------------------------------------------------------------------------------- /docgen/src/main/resources/META-INF/services/javax.annotation.processing.Processor: -------------------------------------------------------------------------------- 1 | org.archive.crawler.BeanDocProcessor -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/Dockerfile.contrib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docker/Dockerfile.contrib -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docker/Makefile -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docker/README.md -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docker/docker-compose.yml -------------------------------------------------------------------------------- /docker/entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docker/entrypoint.sh -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/_ext/beandoc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/_ext/beandoc.py -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/api.rst -------------------------------------------------------------------------------- /docs/bean-reference.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/bean-reference.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/configuring-jobs.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/configuring-jobs.rst -------------------------------------------------------------------------------- /docs/getting-started.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/getting-started.rst -------------------------------------------------------------------------------- /docs/glossary.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/glossary.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/operating.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/operating.rst -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /engine/.cvsignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/.cvsignore -------------------------------------------------------------------------------- /engine/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/pom.xml -------------------------------------------------------------------------------- /engine/src/design/credentials.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/design/credentials.gif -------------------------------------------------------------------------------- /engine/src/design/credentials.zargo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/design/credentials.zargo -------------------------------------------------------------------------------- /engine/src/main/assembly/dist.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/assembly/dist.xml -------------------------------------------------------------------------------- /engine/src/main/java/META-INF/MANIFEST-MF: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /engine/src/main/java/freemarker_implicit.ftl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/freemarker_implicit.ftl -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/Heritrix.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/Heritrix.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/datamodel/UriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/datamodel/UriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/deciderules/package.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/deciderules/package.html -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/doc-files/processing_steps.dia: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/doc-files/processing_steps.dia -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/doc-files/processing_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/doc-files/processing_steps.png -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/event/CrawlStateEvent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/event/CrawlStateEvent.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/event/CrawlURIDispositionEvent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/event/CrawlURIDispositionEvent.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/event/StatSnapshotEvent.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/event/StatSnapshotEvent.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/ActionDirectory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/ActionDirectory.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/BeanLookupBindings.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/BeanLookupBindings.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/CheckpointService.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/CheckpointService.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/CheckpointValidator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/CheckpointValidator.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/CrawlController.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/CrawlController.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/CrawlJob.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/CrawlJob.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/CrawlLimitEnforcer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/CrawlLimitEnforcer.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/CrawlStatus.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/CrawlStatus.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/Engine.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/Engine.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/Frontier.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/Frontier.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/Scoper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/Scoper.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/ToePool.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/ToePool.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/framework/ToeThread.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/framework/ToeThread.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/AbstractFrontier.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/AbstractFrontier.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/BdbFrontier.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/BdbFrontier.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/BdbMultipleWorkQueues.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/BdbMultipleWorkQueues.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/BdbWorkQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/BdbWorkQueue.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/CostAssignmentPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/CostAssignmentPolicy.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/DelayedWorkQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/DelayedWorkQueue.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/FrontierJournal.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/FrontierJournal.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/QueueAssignmentPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/QueueAssignmentPolicy.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/WorkQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/WorkQueue.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/frontier/WorkQueueFrontier.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/frontier/WorkQueueFrontier.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/io/NonFatalErrorFormatter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/io/NonFatalErrorFormatter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/io/RuntimeErrorFormatter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/io/RuntimeErrorFormatter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/io/StatisticsLogFormatter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/io/StatisticsLogFormatter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/io/UriErrorFormatter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/io/UriErrorFormatter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/io/UriProcessingFormatter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/io/UriProcessingFormatter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/migrate/MigrateH1to3Tool.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/migrate/MigrateH1to3Tool.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/monitor/DiskSpaceMonitor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/monitor/DiskSpaceMonitor.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/monitor/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/monitor/package-info.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/package.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/package.html -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/prefetch/CandidateScoper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/prefetch/CandidateScoper.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/prefetch/FrontierPreparer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/prefetch/FrontierPreparer.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/prefetch/PreconditionEnforcer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/prefetch/PreconditionEnforcer.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/prefetch/Preselector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/prefetch/Preselector.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/prefetch/QuotaEnforcer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/prefetch/QuotaEnforcer.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/prefetch/RuntimeLimitEnforcer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/prefetch/RuntimeLimitEnforcer.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/processor/BrowserProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/processor/BrowserProcessor.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/processor/CrawlMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/processor/CrawlMapper.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/processor/HashCrawlMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/processor/HashCrawlMapper.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/processor/LexicalCrawlMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/processor/LexicalCrawlMapper.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/AlertHandler.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/AlertHandler.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/AlertThreadGroup.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/AlertThreadGroup.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/CrawlStatSnapshot.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/CrawlStatSnapshot.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/CrawlSummaryReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/CrawlSummaryReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/CrawlerLoggerModule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/CrawlerLoggerModule.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/HostsReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/HostsReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/MimetypesReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/MimetypesReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/ProcessorsReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/ProcessorsReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/Report.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/Report.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/ResponseCodeReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/ResponseCodeReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/SeedRecord.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/SeedRecord.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/SeedsReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/SeedsReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/SourceTagsReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/SourceTagsReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/StatisticsTracker.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/StatisticsTracker.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/reporting/ToeThreadsReport.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/reporting/ToeThreadsReport.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/BaseResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/BaseResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/BeanBrowseResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/BeanBrowseResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/BeanDocResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/BeanDocResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/DescriptorUpdater.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/DescriptorUpdater.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/EditRepresentation.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/EditRepresentation.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/EngineApplication.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/EngineApplication.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/EngineResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/EngineResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/EnhDirectory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/EnhDirectory.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/EnhDirectoryResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/EnhDirectoryResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/Flash.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/Flash.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/JobRelatedResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/JobRelatedResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/JobResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/JobResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/PagedRepresentation.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/PagedRepresentation.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/RateLimitGuard.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/RateLimitGuard.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/ReportGenResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/ReportGenResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/ScriptResource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/ScriptResource.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/ScriptingConsole.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/ScriptingConsole.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/WebJars.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/WebJars.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/XmlMarshaller.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/XmlMarshaller.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/models/BeansModel.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/models/BeansModel.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/models/CrawlJobModel.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/models/CrawlJobModel.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/models/EngineModel.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/models/EngineModel.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/models/ScriptModel.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/models/ScriptModel.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/restlet/models/ViewModel.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/restlet/models/ViewModel.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/spring/SheetAssociation.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/spring/SheetAssociation.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/spring/SheetOverlaysManager.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/spring/SheetOverlaysManager.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/BdbUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/BdbUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/BenchmarkUriUniqFilters.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/BenchmarkUriUniqFilters.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/BloomUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/BloomUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/CheckpointUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/CheckpointUtils.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/DiskFPMergeUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/DiskFPMergeUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/FPMergeUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/FPMergeUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/FPUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/FPUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/LogReader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/LogReader.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/Logs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/Logs.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/MemFPMergeUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/MemFPMergeUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/MemUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/MemUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/NoopUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/NoopUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/RecoveryLogMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/RecoveryLogMapper.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/SeedUrlNotFoundException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/SeedUrlNotFoundException.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/SetBasedUriUniqFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/SetBasedUriUniqFilter.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/crawler/util/TopNSet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/crawler/util/TopNSet.java -------------------------------------------------------------------------------- /engine/src/main/java/org/archive/overview.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/java/org/archive/overview.html -------------------------------------------------------------------------------- /engine/src/main/resources/arcMetaheaderBody.xsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/arcMetaheaderBody.xsl -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/framework/CrawlScope_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/framework/CrawlScope_en.utf8 -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/framework/Scoper_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/framework/Scoper_en.utf8 -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/frontier/BdbFrontier_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/frontier/BdbFrontier_en.utf8 -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/migrate/H1toH3.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/migrate/H1toH3.map -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/prefetch/Preselector_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/prefetch/Preselector_en.utf8 -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/prefetch/QuotaEnforcer_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/prefetch/QuotaEnforcer_en.utf8 -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/processor/CrawlMapper_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/processor/CrawlMapper_en.utf8 -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/Beans.ftl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/Beans.ftl -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/Edit.ftl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/Edit.ftl -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/Engine.ftl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/Engine.ftl -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/Job.ftl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/Job.ftl -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/Script.ftl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/Script.ftl -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/css/foundation.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/css/foundation.css -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/css/foundation.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/css/foundation.min.css -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/css/heritrix.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/css/heritrix.css -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/css/normalize.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/css/normalize.css -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/img/heritrix-logo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/img/heritrix-logo.gif -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/js/foundation.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/js/foundation.min.js -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/js/vendor/jquery.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/js/vendor/jquery.js -------------------------------------------------------------------------------- /engine/src/main/resources/org/archive/crawler/restlet/js/vendor/zepto.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/main/resources/org/archive/crawler/restlet/js/vendor/zepto.js -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/datamodel/CandidateURITest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/datamodel/CandidateURITest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/datamodel/CrawlURITest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/datamodel/CrawlURITest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/framework/CrawlControllerTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/framework/CrawlControllerTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/framework/EngineTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/framework/EngineTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/frontier/BdbFrontierTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/frontier/BdbFrontierTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/frontier/FrontierJournalTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/frontier/FrontierJournalTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/prefetch/PreselectorTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/prefetch/PreselectorTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/prefetch/QuotaEnforcerTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/prefetch/QuotaEnforcerTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/processor/BrowserProcessorTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/processor/BrowserProcessorTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/processor/HashCrawlMapperTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/processor/HashCrawlMapperTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/restlet/ScriptingConsoleTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/restlet/ScriptingConsoleTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/restlet/XmlMarshallerTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/restlet/XmlMarshallerTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/CharsetSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/CharsetSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/CheckpointSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/CheckpointSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/FormAuthSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/FormAuthSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/FormAuthServlet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/FormAuthServlet.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/FormLoginSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/FormLoginSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/FramesSelfTestCase.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/FramesSelfTestCase.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/HttpAuthSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/HttpAuthSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/KeyWordProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/KeyWordProcessor.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/MaxLinkHopsSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/MaxLinkHopsSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/Precedence1SelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/Precedence1SelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/Precedence2SelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/Precedence2SelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/Precedence3SelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/Precedence3SelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/Precedence4SelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/Precedence4SelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/RandomServlet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/RandomServlet.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/RandomServletTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/RandomServletTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/SelfTestBase.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/SelfTestBase.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/SimpleSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/SimpleSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/StatisticsSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/StatisticsSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/UserAgentSelfTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/UserAgentSelfTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/UserAgentServlet.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/UserAgentServlet.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/selftest/package.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/selftest/package.html -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/util/BdbUriUniqFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/util/BdbUriUniqFilterTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/util/BloomUriUniqFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/util/BloomUriUniqFilterTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/util/FPUriUniqFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/util/FPUriUniqFilterTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/crawler/util/TopNSetTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/crawler/util/TopNSetTest.java -------------------------------------------------------------------------------- /engine/src/test/java/org/archive/modules/fetcher/FormAuthTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/java/org/archive/modules/fetcher/FormAuthTest.java -------------------------------------------------------------------------------- /engine/src/test/resources/logging.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/test/resources/logging.properties -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/basic/basic-loggedin.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/basic/basic-loggedin.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/basic/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/basic/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/get/error.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/get/error.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/get/get-loggedin.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/get/get-loggedin.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/get/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/get/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/get/success.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/get/success.jsp -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/post/error.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/post/error.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/post/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/post/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/post/post-loggedin.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/post/post-loggedin.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/form/post/success.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/form/post/success.jsp -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Auth/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Auth/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BackgroundImageExtraction/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BackgroundImageExtraction/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/goodone.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/goodone.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/goodthree.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/goodthree.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/goodtwo.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/goodtwo.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/one.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/one.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/three.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/three.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/BadURIsStopPageParsing/two.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/BadURIsStopPageParsing/two.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Charset/charsetselftest_end.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Charset/charsetselftest_end.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Charset/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Charset/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Charset/shiftjis.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Charset/shiftjis.jsp -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Charset/utf8.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Charset/utf8.jsp -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Checkpoint/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Checkpoint/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/FlashParse/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/FlashParse/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/FlashParse/pirates.swf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/FlashParse/pirates.swf -------------------------------------------------------------------------------- /engine/src/webapps/selftest/FlashParse/success.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/FlashParse/success.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/FormTagExtraction/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/FormTagExtraction/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/FormTagExtraction/inputtag.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/FormTagExtraction/inputtag.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/FormTagExtraction/optiontag.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/FormTagExtraction/optiontag.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Frames/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Frames/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Frames/leftframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Frames/leftframe.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Frames/noframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Frames/noframe.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Frames/rightframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Frames/rightframe.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Frames/topframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Frames/topframe.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/1.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/2.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/3.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/4.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/5.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/6.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/MaxLinkHops/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/MaxLinkHops/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Refresh/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Refresh/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Refresh/refresh.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Refresh/refresh.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/Refresh/refresh2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/Refresh/refresh2.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/RobotsExclusion/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/RobotsExclusion/README.txt -------------------------------------------------------------------------------- /engine/src/webapps/selftest/RobotsExclusion/excluded.html: -------------------------------------------------------------------------------- 1 | This page should not be fetched. 2 | -------------------------------------------------------------------------------- /engine/src/webapps/selftest/RobotsExclusion/included.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/RobotsExclusion/included.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/RobotsExclusion/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/RobotsExclusion/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/avi.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/avi.avi -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/doc.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/doc.doc -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/jpg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/jpg.jpg -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/mp3.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/mp3.mp3 -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/pdf.pdf -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/ppt.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/ppt.ppt -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/ps.ps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/ps.ps -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/rtf.rtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/rtf.rtf -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/wav.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/wav.wav -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/wpd.wpd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/wpd.wpd -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/binaries/xls.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/binaries/xls.xls -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/html.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/html.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleDocumentTypes/txt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleDocumentTypes/txt.txt -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SimpleJavascriptExtraction/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SimpleJavascriptExtraction/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SpacesInHrefPath/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SpacesInHrefPath/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/SpacesInHrefPath/spaces in path.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/SpacesInHrefPath/spaces in path.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/TrickyRelativeURIs/anothersub/reluptarget.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/TrickyRelativeURIs/anothersub/reluptarget.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/TrickyRelativeURIs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/TrickyRelativeURIs/index.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/TrickyRelativeURIs/reluptricky.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/TrickyRelativeURIs/reluptricky.html -------------------------------------------------------------------------------- /engine/src/webapps/selftest/WEB-INF/web.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/WEB-INF/web.xml -------------------------------------------------------------------------------- /engine/src/webapps/selftest/index.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/index.jsp -------------------------------------------------------------------------------- /engine/src/webapps/selftest/robots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/webapps/selftest/robots.txt -------------------------------------------------------------------------------- /engine/src/xsd/arc/1.0/arc.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/xsd/arc/1.0/arc.html -------------------------------------------------------------------------------- /engine/src/xsd/arc/1.0/arc.xsd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/xsd/arc/1.0/arc.xsd -------------------------------------------------------------------------------- /engine/src/xsd/arc/1.0/example.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/src/xsd/arc/1.0/example.xml -------------------------------------------------------------------------------- /engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/goodone.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/goodone.html -------------------------------------------------------------------------------- /engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/goodtwo.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/goodtwo.html -------------------------------------------------------------------------------- /engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/one.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/one.html -------------------------------------------------------------------------------- /engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/three.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/three.html -------------------------------------------------------------------------------- /engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/two.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/BadURIsStopPageParsingSelfTest/htdocs/two.html -------------------------------------------------------------------------------- /engine/testdata/selftest/CharsetSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/CharsetSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/CharsetSelfTest/htdocs/link.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /engine/testdata/selftest/CheckpointSelfTest/profile/config.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /engine/testdata/selftest/CheckpointSelfTest/profile/seeds.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/CheckpointSelfTest/profile/seeds.txt -------------------------------------------------------------------------------- /engine/testdata/selftest/CheckpointSelfTest/profile/sheets/default.single: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/CheckpointSelfTest/profile/sheets/default.single -------------------------------------------------------------------------------- /engine/testdata/selftest/FlashParseSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FlashParseSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FlashParseSelfTest/htdocs/pirates.swf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FlashParseSelfTest/htdocs/pirates.swf -------------------------------------------------------------------------------- /engine/testdata/selftest/FlashParseSelfTest/htdocs/success.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FlashParseSelfTest/htdocs/success.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormAuthSelfTest/htdocs/failure.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormAuthSelfTest/htdocs/failure.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormAuthSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormAuthSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormAuthSelfTest/htdocs/link1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormAuthSelfTest/htdocs/link1.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormAuthSelfTest/htdocs/link2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormAuthSelfTest/htdocs/link2.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormAuthSelfTest/htdocs/link3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormAuthSelfTest/htdocs/link3.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormAuthSelfTest/htdocs/success.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormAuthSelfTest/htdocs/success.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormLoginSelfTest/htdocs/failure.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormLoginSelfTest/htdocs/failure.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormLoginSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormLoginSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormLoginSelfTest/htdocs/link1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormLoginSelfTest/htdocs/link1.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormLoginSelfTest/htdocs/link2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormLoginSelfTest/htdocs/link2.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormLoginSelfTest/htdocs/link3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormLoginSelfTest/htdocs/link3.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FormLoginSelfTest/htdocs/success.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FormLoginSelfTest/htdocs/success.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FramesSelfTestCase/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FramesSelfTestCase/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FramesSelfTestCase/htdocs/leftframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FramesSelfTestCase/htdocs/leftframe.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FramesSelfTestCase/htdocs/noframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FramesSelfTestCase/htdocs/noframe.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FramesSelfTestCase/htdocs/rightframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FramesSelfTestCase/htdocs/rightframe.html -------------------------------------------------------------------------------- /engine/testdata/selftest/FramesSelfTestCase/htdocs/topframe.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/FramesSelfTestCase/htdocs/topframe.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/link1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/link1.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/link2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/link2.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/link3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/basic/link3.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/failure.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/failure.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/link1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/link1.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/link2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/link2.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/link3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/link3.html -------------------------------------------------------------------------------- /engine/testdata/selftest/HttpAuthSelfTest/htdocs/success.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/HttpAuthSelfTest/htdocs/success.html -------------------------------------------------------------------------------- /engine/testdata/selftest/MaxLinkHopsSelfTest/htdocs/1.html: -------------------------------------------------------------------------------- 1 | link -------------------------------------------------------------------------------- /engine/testdata/selftest/MaxLinkHopsSelfTest/htdocs/2.html: -------------------------------------------------------------------------------- 1 | link -------------------------------------------------------------------------------- /engine/testdata/selftest/MaxLinkHopsSelfTest/htdocs/3.html: -------------------------------------------------------------------------------- 1 | link -------------------------------------------------------------------------------- /engine/testdata/selftest/MaxLinkHopsSelfTest/htdocs/4.html: -------------------------------------------------------------------------------- 1 | link -------------------------------------------------------------------------------- /engine/testdata/selftest/MaxLinkHopsSelfTest/htdocs/5.html: -------------------------------------------------------------------------------- 1 | link -------------------------------------------------------------------------------- /engine/testdata/selftest/MaxLinkHopsSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- 1 | link -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/five/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/five/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/five/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/five/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/one/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/one/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/one/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/one/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/seed.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/seed.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/ten/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/ten/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence1SelfTest/htdocs/ten/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence1SelfTest/htdocs/ten/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/five/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/five/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/five/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/five/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/one/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/one/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/one/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/one/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/seed.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/seed.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/ten/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/ten/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/htdocs/ten/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/htdocs/ten/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence2SelfTest/profile/rank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence2SelfTest/profile/rank.txt -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/A.html: -------------------------------------------------------------------------------- 1 | keyword 2 | 3 | leaf 4 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/B.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/B.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/C.html: -------------------------------------------------------------------------------- 1 | leaf 2 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/D.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/D.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/E.html: -------------------------------------------------------------------------------- 1 | keyword 2 | 3 | leaf 4 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/F.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/F.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/G.html: -------------------------------------------------------------------------------- 1 | 2 | leaf 3 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/H.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/H.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/I.html: -------------------------------------------------------------------------------- 1 | keyword 2 | 3 | leaf 4 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/J.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/J.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/K.html: -------------------------------------------------------------------------------- 1 | 2 | leaf 3 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/L.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/L.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/M.html: -------------------------------------------------------------------------------- 1 | keyword 2 | 3 | leaf 4 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/N.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence3SelfTest/htdocs/N.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence3SelfTest/htdocs/O.html: -------------------------------------------------------------------------------- 1 | 2 | leaf 3 | -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/five/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/five/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/five/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/five/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/one/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/one/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/one/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/one/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/seed.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/seed.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/ten/a.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/ten/a.html -------------------------------------------------------------------------------- /engine/testdata/selftest/Precedence4SelfTest/htdocs/ten/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/Precedence4SelfTest/htdocs/ten/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/SimpleSelfTest/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/SimpleSelfTest/htdocs/index.html -------------------------------------------------------------------------------- /engine/testdata/selftest/SimpleSelfTest/htdocs/link1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/SimpleSelfTest/htdocs/link1.html -------------------------------------------------------------------------------- /engine/testdata/selftest/SimpleSelfTest/htdocs/link2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/SimpleSelfTest/htdocs/link2.html -------------------------------------------------------------------------------- /engine/testdata/selftest/SimpleSelfTest/htdocs/link3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/SimpleSelfTest/htdocs/link3.html -------------------------------------------------------------------------------- /engine/testdata/selftest/StatisticsSelfTest/htdocs/a.html: -------------------------------------------------------------------------------- 1 |

nothing here

2 | -------------------------------------------------------------------------------- /engine/testdata/selftest/StatisticsSelfTest/htdocs/b.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/StatisticsSelfTest/htdocs/b.html -------------------------------------------------------------------------------- /engine/testdata/selftest/StatisticsSelfTest/htdocs/b1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/StatisticsSelfTest/htdocs/b1.html -------------------------------------------------------------------------------- /engine/testdata/selftest/StatisticsSelfTest/htdocs/b2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/StatisticsSelfTest/htdocs/b2.html -------------------------------------------------------------------------------- /engine/testdata/selftest/StatisticsSelfTest/htdocs/b3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/StatisticsSelfTest/htdocs/b3.html -------------------------------------------------------------------------------- /engine/testdata/selftest/UserAgentSelfTest/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/UserAgentSelfTest/.gitignore -------------------------------------------------------------------------------- /engine/testdata/selftest/conf/heritrix.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/conf/heritrix.properties -------------------------------------------------------------------------------- /engine/testdata/selftest/conf/selftest-crawler-beans.cxml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/engine/testdata/selftest/conf/selftest-crawler-beans.cxml -------------------------------------------------------------------------------- /modules/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/pom.xml -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/crawler/util/CrawledBytesHistotable.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/crawler/util/CrawledBytesHistotable.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/CandidateChain.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/CandidateChain.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/CoreAttributeConstants.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/CoreAttributeConstants.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/CrawlMetadata.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/CrawlMetadata.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/CrawlURI.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/CrawlURI.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/DispositionChain.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/DispositionChain.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/FetchChain.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/FetchChain.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/ProcessResult.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/ProcessResult.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/Processor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/Processor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/ProcessorChain.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/ProcessorChain.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/ProcessorTestBase.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/ProcessorTestBase.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/SchedulingConstants.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/SchedulingConstants.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/ScriptedProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/ScriptedProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/SimpleFileLoggerProvider.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/SimpleFileLoggerProvider.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/behaviors/Behavior.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/behaviors/Behavior.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/behaviors/Page.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/behaviors/Page.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/behaviors/ScrollDownBehavior.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/behaviors/ScrollDownBehavior.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/BaseRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/BaseRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/FixupQueryString.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/FixupQueryString.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/LowercaseRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/LowercaseRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/RegexRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/RegexRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/StripSessionIDs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/StripSessionIDs.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/StripWWWNRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/StripWWWNRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/canonicalize/StripWWWRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/canonicalize/StripWWWRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/credential/Credential.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/credential/Credential.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/credential/CredentialStore.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/credential/CredentialStore.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/credential/HtmlFormCredential.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/credential/HtmlFormCredential.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/credential/package.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/credential/package.html -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/deciderules/AcceptDecideRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/deciderules/AcceptDecideRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/deciderules/DecideResult.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/deciderules/DecideResult.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/deciderules/DecideRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/deciderules/DecideRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/deciderules/HasViaDecideRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/deciderules/HasViaDecideRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/deciderules/RejectDecideRule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/deciderules/RejectDecideRule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ContentExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ContentExtractor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/CustomSWFTags.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/CustomSWFTags.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/Extractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/Extractor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorCSS.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorCSS.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorDOC.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorDOC.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorHTML.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorHTML.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorHTTP.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorHTTP.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorImpliedURI.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorImpliedURI.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorJS.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorJS.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorPDF.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorPDF.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorParameters.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorParameters.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorSWF.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorSWF.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorSitemap.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorSitemap.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorURI.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorURI.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/ExtractorXML.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/ExtractorXML.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/HTMLLinkContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/HTMLLinkContext.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/Hop.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/Hop.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/LinkContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/LinkContext.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/PDFParser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/PDFParser.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/extractor/TempDirProvider.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/extractor/TempDirProvider.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/BdbCookieStore.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/BdbCookieStore.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/DefaultServerCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/DefaultServerCache.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchDNS.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchDNS.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchErrors.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchErrors.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchFTP.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchFTP.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchHTTP.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchHTTP.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchHTTP2.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchHTTP2.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchHTTPRequest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchHTTPRequest.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchSFTP.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchSFTP.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchStats.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchStats.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchStatusCodes.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchStatusCodes.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/FetchWhois.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/FetchWhois.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/HostResolver.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/HostResolver.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/SimpleCookieStore.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/SimpleCookieStore.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/SocksSocketFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/SocksSocketFactory.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/fetcher/UserAgentProvider.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/fetcher/UserAgentProvider.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/forms/ExtractorHTMLForms.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/forms/ExtractorHTMLForms.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/forms/FormLoginProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/forms/FormLoginProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/forms/HTMLForm.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/forms/HTMLForm.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/BdbServerCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/BdbServerCache.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/CrawlHost.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/CrawlHost.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/CrawlServer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/CrawlServer.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/CustomRobotsPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/CustomRobotsPolicy.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/DefaultTempDirProvider.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/DefaultTempDirProvider.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/FirstNamedRobotsPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/FirstNamedRobotsPolicy.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/IgnoreRobotsPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/IgnoreRobotsPolicy.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/ObeyRobotsPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/ObeyRobotsPolicy.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/RobotsDirectives.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/RobotsDirectives.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/RobotsPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/RobotsPolicy.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/RobotsTxtOnlyPolicy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/RobotsTxtOnlyPolicy.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/Robotstxt.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/Robotstxt.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/net/ServerCache.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/net/ServerCache.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/package-info.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/recrawl/PersistProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/recrawl/PersistProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/revisit/AbstractProfile.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/revisit/AbstractProfile.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/revisit/RevisitProfile.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/revisit/RevisitProfile.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/seeds/SeedListener.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/seeds/SeedListener.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/seeds/SeedModule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/seeds/SeedModule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/seeds/TextSeedModule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/seeds/TextSeedModule.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/warc/BaseWARCRecordBuilder.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/warc/BaseWARCRecordBuilder.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/warc/MetadataRecordBuilder.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/warc/MetadataRecordBuilder.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/warc/RevisitRecordBuilder.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/warc/RevisitRecordBuilder.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/warc/WARCRecordBuilder.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/warc/WARCRecordBuilder.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/writer/ARCWriterProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/writer/ARCWriterProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/writer/Kw3Constants.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/writer/Kw3Constants.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/writer/Kw3WriterProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/writer/Kw3WriterProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/writer/WARCWriterProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/writer/WARCWriterProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/modules/writer/WriterPoolProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/modules/writer/WriterPoolProcessor.java -------------------------------------------------------------------------------- /modules/src/main/java/org/archive/state/ModuleTestBase.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/java/org/archive/state/ModuleTestBase.java -------------------------------------------------------------------------------- /modules/src/main/resources/org/archive/modules/BeanShellProcessor_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/resources/org/archive/modules/BeanShellProcessor_en.utf8 -------------------------------------------------------------------------------- /modules/src/main/resources/org/archive/modules/Processor_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/resources/org/archive/modules/Processor_en.utf8 -------------------------------------------------------------------------------- /modules/src/main/resources/org/archive/modules/fetcher/FetchDNS_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/resources/org/archive/modules/fetcher/FetchDNS_en.utf8 -------------------------------------------------------------------------------- /modules/src/main/resources/org/archive/modules/fetcher/FetchFTP_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/resources/org/archive/modules/fetcher/FetchFTP_en.utf8 -------------------------------------------------------------------------------- /modules/src/main/resources/org/archive/modules/fetcher/FetchHTTP_en.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/main/resources/org/archive/modules/fetcher/FetchHTTP_en.utf8 -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/ScriptedProcessorTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/ScriptedProcessorTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/canonicalize/RegexRuleTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/canonicalize/RegexRuleTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorCSSTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorCSSTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorDOCTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorDOCTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorJSTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorJSTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorPDFTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorPDFTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorSWFTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorSWFTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorURITest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorURITest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/ExtractorXMLTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/ExtractorXMLTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/extractor/PDFParserTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/extractor/PDFParserTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/fetcher/CookieStoreTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/fetcher/CookieStoreTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/fetcher/FetchDNSTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/fetcher/FetchDNSTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/fetcher/FetchFTPTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/fetcher/FetchFTPTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/fetcher/FetchHTTP2Test.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/fetcher/FetchHTTP2Test.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/fetcher/FetchHTTPTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/fetcher/FetchHTTPTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/net/CrawlHostTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/net/CrawlHostTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/net/CrawlServerTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/net/CrawlServerTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/net/RobotsPolicyTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/net/RobotsPolicyTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/net/RobotstxtTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/net/RobotstxtTest.java -------------------------------------------------------------------------------- /modules/src/test/java/org/archive/modules/net/ServerCacheTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/modules/src/test/java/org/archive/modules/net/ServerCacheTest.java -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/internetarchive/heritrix3/HEAD/pom.xml --------------------------------------------------------------------------------