├── .gitattributes ├── .github └── workflows │ └── main.yml ├── .gitignore ├── CocoCrawler.sln ├── CocoCrawler ├── Builders │ ├── CrawlerEngineBuilder.cs │ ├── EngineSettingsBuilder.cs │ └── PageCrawlJobBuilder.cs ├── CocoCrawler.csproj ├── CrawlJob │ ├── Cookie.cs │ ├── PageBrowserActions │ │ ├── PageAction.cs │ │ ├── PageActionType.cs │ │ ├── PageActions.cs │ │ └── PageActionsBuilder.cs │ ├── PageCrawlJob.cs │ └── PageTasks │ │ ├── CrawlPageExtractListTask.cs │ │ ├── CrawlPageExtractObjectTask.cs │ │ ├── CrawlPageOpenLinksTask.cs │ │ ├── CrawlPagePaginateTask.cs │ │ └── IPageCrawlTask.cs ├── CrawlOutputs │ ├── ConsoleCrawlOutput.cs │ ├── CsvFileCrawlOutput.cs │ └── ICrawlOutput.cs ├── Crawler │ ├── CrawlResult.cs │ ├── ICrawler.cs │ └── PuppeteerCrawler.cs ├── CrawlerEngine.cs ├── EngineSettings.cs ├── Exceptions │ ├── CocoCrawlerBuilderException.cs │ └── CocoCrawlerPageLimitReachedException.cs ├── Parser │ ├── AngleSharpParser.cs │ ├── CssSelector.cs │ └── IParser.cs ├── Scheduler │ ├── IScheduler.cs │ └── InMemoryScheduler.cs └── VisitedUrlTracker │ ├── FileVisitedUrlTracker.cs │ ├── IVisitedUrlTracker.cs │ └── InMemoryVisitedUrlTracker.cs ├── Examples ├── BackgroundServiceExtractList │ ├── BackgroundServiceExtractList.csproj │ ├── Program.cs │ └── RedditListingBackgroundService.cs └── BackgroundServiceOpenLinksAndExtractObject │ ├── BackgroundServiceOpenLinksAndExtractObjects.csproj │ ├── Program.cs │ └── RedditPostsBackgroundService.cs ├── LICENSE.txt ├── README.md └── Tests └── CocoCrawler.IntegrationTests ├── BrowserCollection.cs ├── CocoCrawler.IntegrationTests.csproj ├── ConfigureEngine ├── CookiesTests.cs ├── ThrowOnBuilderExceptionsTests.cs ├── UserAgentTests.cs └── VisitedLinksTests.cs ├── Outputs └── CsvOutputTests.cs ├── Scenarios ├── ExtractListAndPaginate │ ├── ExtractListAndPaginateTests.cs │ └── Responses │ │ ├── main-page.html │ │ └── page-2.html ├── OpenLinkAndClick │ └── OpenLinkAndClickTests.cs └── OpenLinksExtractObjectAndPaginate │ └── OpenLinksExtractObjectAndPaginateTests.cs └── WireMockExtensions.cs /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/.github/workflows/main.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/.gitignore -------------------------------------------------------------------------------- /CocoCrawler.sln: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler.sln -------------------------------------------------------------------------------- /CocoCrawler/Builders/CrawlerEngineBuilder.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Builders/CrawlerEngineBuilder.cs -------------------------------------------------------------------------------- /CocoCrawler/Builders/EngineSettingsBuilder.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Builders/EngineSettingsBuilder.cs -------------------------------------------------------------------------------- /CocoCrawler/Builders/PageCrawlJobBuilder.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Builders/PageCrawlJobBuilder.cs -------------------------------------------------------------------------------- /CocoCrawler/CocoCrawler.csproj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CocoCrawler.csproj -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/Cookie.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/Cookie.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageBrowserActions/PageAction.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageBrowserActions/PageAction.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageBrowserActions/PageActionType.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageBrowserActions/PageActionType.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageBrowserActions/PageActions.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageBrowserActions/PageActions.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageBrowserActions/PageActionsBuilder.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageBrowserActions/PageActionsBuilder.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageCrawlJob.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageCrawlJob.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageTasks/CrawlPageExtractListTask.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageTasks/CrawlPageExtractListTask.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageTasks/CrawlPageExtractObjectTask.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageTasks/CrawlPageExtractObjectTask.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageTasks/CrawlPageOpenLinksTask.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageTasks/CrawlPageOpenLinksTask.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageTasks/CrawlPagePaginateTask.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageTasks/CrawlPagePaginateTask.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlJob/PageTasks/IPageCrawlTask.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlJob/PageTasks/IPageCrawlTask.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlOutputs/ConsoleCrawlOutput.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlOutputs/ConsoleCrawlOutput.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlOutputs/CsvFileCrawlOutput.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlOutputs/CsvFileCrawlOutput.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlOutputs/ICrawlOutput.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlOutputs/ICrawlOutput.cs -------------------------------------------------------------------------------- /CocoCrawler/Crawler/CrawlResult.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Crawler/CrawlResult.cs -------------------------------------------------------------------------------- /CocoCrawler/Crawler/ICrawler.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Crawler/ICrawler.cs -------------------------------------------------------------------------------- /CocoCrawler/Crawler/PuppeteerCrawler.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Crawler/PuppeteerCrawler.cs -------------------------------------------------------------------------------- /CocoCrawler/CrawlerEngine.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/CrawlerEngine.cs -------------------------------------------------------------------------------- /CocoCrawler/EngineSettings.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/EngineSettings.cs -------------------------------------------------------------------------------- /CocoCrawler/Exceptions/CocoCrawlerBuilderException.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Exceptions/CocoCrawlerBuilderException.cs -------------------------------------------------------------------------------- /CocoCrawler/Exceptions/CocoCrawlerPageLimitReachedException.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Exceptions/CocoCrawlerPageLimitReachedException.cs -------------------------------------------------------------------------------- /CocoCrawler/Parser/AngleSharpParser.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Parser/AngleSharpParser.cs -------------------------------------------------------------------------------- /CocoCrawler/Parser/CssSelector.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Parser/CssSelector.cs -------------------------------------------------------------------------------- /CocoCrawler/Parser/IParser.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Parser/IParser.cs -------------------------------------------------------------------------------- /CocoCrawler/Scheduler/IScheduler.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Scheduler/IScheduler.cs -------------------------------------------------------------------------------- /CocoCrawler/Scheduler/InMemoryScheduler.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/Scheduler/InMemoryScheduler.cs -------------------------------------------------------------------------------- /CocoCrawler/VisitedUrlTracker/FileVisitedUrlTracker.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/VisitedUrlTracker/FileVisitedUrlTracker.cs -------------------------------------------------------------------------------- /CocoCrawler/VisitedUrlTracker/IVisitedUrlTracker.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/VisitedUrlTracker/IVisitedUrlTracker.cs -------------------------------------------------------------------------------- /CocoCrawler/VisitedUrlTracker/InMemoryVisitedUrlTracker.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/CocoCrawler/VisitedUrlTracker/InMemoryVisitedUrlTracker.cs -------------------------------------------------------------------------------- /Examples/BackgroundServiceExtractList/BackgroundServiceExtractList.csproj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Examples/BackgroundServiceExtractList/BackgroundServiceExtractList.csproj -------------------------------------------------------------------------------- /Examples/BackgroundServiceExtractList/Program.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Examples/BackgroundServiceExtractList/Program.cs -------------------------------------------------------------------------------- /Examples/BackgroundServiceExtractList/RedditListingBackgroundService.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Examples/BackgroundServiceExtractList/RedditListingBackgroundService.cs -------------------------------------------------------------------------------- /Examples/BackgroundServiceOpenLinksAndExtractObject/BackgroundServiceOpenLinksAndExtractObjects.csproj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Examples/BackgroundServiceOpenLinksAndExtractObject/BackgroundServiceOpenLinksAndExtractObjects.csproj -------------------------------------------------------------------------------- /Examples/BackgroundServiceOpenLinksAndExtractObject/Program.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Examples/BackgroundServiceOpenLinksAndExtractObject/Program.cs -------------------------------------------------------------------------------- /Examples/BackgroundServiceOpenLinksAndExtractObject/RedditPostsBackgroundService.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Examples/BackgroundServiceOpenLinksAndExtractObject/RedditPostsBackgroundService.cs -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/README.md -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/BrowserCollection.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/BrowserCollection.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/CocoCrawler.IntegrationTests.csproj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/CocoCrawler.IntegrationTests.csproj -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/ConfigureEngine/CookiesTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/ConfigureEngine/CookiesTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/ConfigureEngine/ThrowOnBuilderExceptionsTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/ConfigureEngine/ThrowOnBuilderExceptionsTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/ConfigureEngine/UserAgentTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/ConfigureEngine/UserAgentTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/ConfigureEngine/VisitedLinksTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/ConfigureEngine/VisitedLinksTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/Outputs/CsvOutputTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/Outputs/CsvOutputTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/Scenarios/ExtractListAndPaginate/ExtractListAndPaginateTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/Scenarios/ExtractListAndPaginate/ExtractListAndPaginateTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/Scenarios/ExtractListAndPaginate/Responses/main-page.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/Scenarios/ExtractListAndPaginate/Responses/main-page.html -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/Scenarios/ExtractListAndPaginate/Responses/page-2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/Scenarios/ExtractListAndPaginate/Responses/page-2.html -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/Scenarios/OpenLinkAndClick/OpenLinkAndClickTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/Scenarios/OpenLinkAndClick/OpenLinkAndClickTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/Scenarios/OpenLinksExtractObjectAndPaginate/OpenLinksExtractObjectAndPaginateTests.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/Scenarios/OpenLinksExtractObjectAndPaginate/OpenLinksExtractObjectAndPaginateTests.cs -------------------------------------------------------------------------------- /Tests/CocoCrawler.IntegrationTests/WireMockExtensions.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Marcel0024/CocoCrawler/HEAD/Tests/CocoCrawler.IntegrationTests/WireMockExtensions.cs --------------------------------------------------------------------------------