├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── dockerhub.yml │ └── maven.yml ├── .gitignore ├── .mvn └── jvm.config ├── API ├── README.md ├── pom.xml ├── src │ └── main │ │ ├── java │ │ └── crawlercommons │ │ │ └── urlfrontier │ │ │ └── CrawlID.java │ │ └── protobuf │ │ └── urlfrontier.proto └── urlfrontier.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── client ├── README.md ├── pom.xml └── src │ └── main │ └── java │ └── crawlercommons │ └── urlfrontier │ └── client │ ├── Client.java │ ├── CountURLs.java │ ├── DeleteCrawl.java │ ├── DeleteQueue.java │ ├── GetActive.java │ ├── GetStats.java │ ├── GetURLStatus.java │ ├── GetURLs.java │ ├── ListCrawls.java │ ├── ListNodes.java │ ├── ListQueues.java │ ├── ListURLs.java │ ├── PutURLs.java │ ├── SetActive.java │ ├── SetCrawlLimit.java │ └── SetLogLevel.java ├── logo.png ├── logo.svg ├── pom.xml ├── service ├── README.md ├── config.ini ├── monitoring │ ├── README.md │ ├── docker-compose.yml │ ├── prometheus.yml │ └── provisioning │ │ ├── dashboards │ │ ├── URLFrontier-Prometheus.json │ │ └── dashboards.yml │ │ └── datasources │ │ ├── loki.yml │ │ └── prometheus.yml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── crawlercommons │ │ │ └── urlfrontier │ │ │ └── service │ │ │ ├── AbstractFrontierService.java │ │ │ ├── CloseableIterator.java │ │ │ ├── ConcurrentInsertionOrderMap.java │ │ │ ├── ConcurrentOrderedMap.java │ │ │ ├── QueueInterface.java │ │ │ ├── QueueWithinCrawl.java │ │ │ ├── SynchronizedStreamObserver.java │ │ │ ├── URLFrontierServer.java │ │ │ ├── cluster │ │ │ ├── DistributedFrontierService.java │ │ │ ├── Hearbeat.java │ │ │ └── HeartbeatListener.java │ │ │ ├── memory │ │ │ ├── InternalURL.java │ │ │ ├── MemoryFrontierService.java │ │ │ └── URLQueue.java │ │ │ └── rocksdb │ │ │ ├── QueueMetadata.java │ │ │ ├── RocksDBService.java │ │ │ └── ShardedRocksDBService.java │ └── resources │ │ └── logback.xml │ └── test │ └── java │ └── crawlercommons │ └── urlfrontier │ └── service │ ├── ConcurrentOrderedMapTest.java │ ├── MemoryFrontierServiceTest.java │ ├── RocksDBServiceTest.java │ └── ServiceTestUtil.java └── tests ├── README.md ├── pom.xml └── src └── test └── java └── crawlercommons └── urlfrontier └── service └── URLFrontierServiceTest.java /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/workflows/dockerhub.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/.github/workflows/dockerhub.yml -------------------------------------------------------------------------------- /.github/workflows/maven.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/.github/workflows/maven.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/.gitignore -------------------------------------------------------------------------------- /.mvn/jvm.config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/.mvn/jvm.config -------------------------------------------------------------------------------- /API/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/API/README.md -------------------------------------------------------------------------------- /API/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/API/pom.xml -------------------------------------------------------------------------------- /API/src/main/java/crawlercommons/urlfrontier/CrawlID.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/API/src/main/java/crawlercommons/urlfrontier/CrawlID.java -------------------------------------------------------------------------------- /API/src/main/protobuf/urlfrontier.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/API/src/main/protobuf/urlfrontier.proto -------------------------------------------------------------------------------- /API/urlfrontier.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/API/urlfrontier.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/README.md -------------------------------------------------------------------------------- /client/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/README.md -------------------------------------------------------------------------------- /client/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/pom.xml -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/Client.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/Client.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/DeleteCrawl.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/DeleteCrawl.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/DeleteQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/DeleteQueue.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/GetActive.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/GetActive.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/GetStats.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/GetStats.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/GetURLStatus.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/GetURLStatus.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/GetURLs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/GetURLs.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/ListCrawls.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/ListCrawls.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/ListNodes.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/ListNodes.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/ListQueues.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/ListQueues.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/ListURLs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/ListURLs.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/PutURLs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/PutURLs.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/SetActive.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/SetActive.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/SetCrawlLimit.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/SetCrawlLimit.java -------------------------------------------------------------------------------- /client/src/main/java/crawlercommons/urlfrontier/client/SetLogLevel.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/client/src/main/java/crawlercommons/urlfrontier/client/SetLogLevel.java -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/logo.png -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/logo.svg -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/pom.xml -------------------------------------------------------------------------------- /service/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/README.md -------------------------------------------------------------------------------- /service/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/config.ini -------------------------------------------------------------------------------- /service/monitoring/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/README.md -------------------------------------------------------------------------------- /service/monitoring/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/docker-compose.yml -------------------------------------------------------------------------------- /service/monitoring/prometheus.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/prometheus.yml -------------------------------------------------------------------------------- /service/monitoring/provisioning/dashboards/URLFrontier-Prometheus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/provisioning/dashboards/URLFrontier-Prometheus.json -------------------------------------------------------------------------------- /service/monitoring/provisioning/dashboards/dashboards.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/provisioning/dashboards/dashboards.yml -------------------------------------------------------------------------------- /service/monitoring/provisioning/datasources/loki.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/provisioning/datasources/loki.yml -------------------------------------------------------------------------------- /service/monitoring/provisioning/datasources/prometheus.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/monitoring/provisioning/datasources/prometheus.yml -------------------------------------------------------------------------------- /service/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/pom.xml -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/CloseableIterator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/CloseableIterator.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/ConcurrentInsertionOrderMap.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/ConcurrentInsertionOrderMap.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/ConcurrentOrderedMap.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/ConcurrentOrderedMap.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/QueueInterface.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/QueueInterface.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/QueueWithinCrawl.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/QueueWithinCrawl.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/SynchronizedStreamObserver.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/SynchronizedStreamObserver.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/URLFrontierServer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/URLFrontierServer.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/cluster/DistributedFrontierService.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/cluster/DistributedFrontierService.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/cluster/Hearbeat.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/cluster/Hearbeat.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/cluster/HeartbeatListener.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/cluster/HeartbeatListener.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/memory/InternalURL.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/memory/InternalURL.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/memory/MemoryFrontierService.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/memory/MemoryFrontierService.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/memory/URLQueue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/memory/URLQueue.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/rocksdb/QueueMetadata.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/rocksdb/QueueMetadata.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/rocksdb/RocksDBService.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/rocksdb/RocksDBService.java -------------------------------------------------------------------------------- /service/src/main/java/crawlercommons/urlfrontier/service/rocksdb/ShardedRocksDBService.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/java/crawlercommons/urlfrontier/service/rocksdb/ShardedRocksDBService.java -------------------------------------------------------------------------------- /service/src/main/resources/logback.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/main/resources/logback.xml -------------------------------------------------------------------------------- /service/src/test/java/crawlercommons/urlfrontier/service/ConcurrentOrderedMapTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/test/java/crawlercommons/urlfrontier/service/ConcurrentOrderedMapTest.java -------------------------------------------------------------------------------- /service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java -------------------------------------------------------------------------------- /service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java -------------------------------------------------------------------------------- /service/src/test/java/crawlercommons/urlfrontier/service/ServiceTestUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/service/src/test/java/crawlercommons/urlfrontier/service/ServiceTestUtil.java -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/tests/README.md -------------------------------------------------------------------------------- /tests/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/tests/pom.xml -------------------------------------------------------------------------------- /tests/src/test/java/crawlercommons/urlfrontier/service/URLFrontierServiceTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crawler-commons/url-frontier/HEAD/tests/src/test/java/crawlercommons/urlfrontier/service/URLFrontierServiceTest.java --------------------------------------------------------------------------------