├── .github ├── FUNDING.yml └── workflows │ ├── rust.yml │ └── release.yml ├── src ├── notifier │ ├── mod.rs │ └── nats.rs ├── crawler │ ├── mod.rs │ ├── routeviews.rs │ ├── riperis.rs │ └── common.rs ├── peer.rs ├── error.rs ├── cli │ ├── bootstrap.rs │ ├── utils.rs │ ├── backup.rs │ └── api.rs ├── db │ ├── utils.rs │ ├── meta.rs │ ├── latest_files.rs │ └── mod.rs ├── item.rs ├── query.rs ├── collector.rs ├── config.rs └── shortcuts.rs ├── .gitignore ├── docker-compose.yaml ├── examples ├── latest.rs ├── query.rs ├── peers.rs ├── health_check.rs ├── timestamps.rs ├── simple_config.rs └── shortcuts.rs ├── .dockerignore ├── deployment ├── nomad_api_raw.hcl └── nomad_backup_raw.hcl ├── Dockerfile ├── LICENSE ├── Cargo.toml └── CHANGELOG.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: bgpkit 4 | -------------------------------------------------------------------------------- /src/notifier/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "nats")] 2 | mod nats; 3 | 4 | #[cfg(feature = "nats")] 5 | pub use nats::NatsNotifier; 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | # Cargo.lock 3 | .idea 4 | 5 | *.sqlite3* 6 | *.db 7 | *shm 8 | *wal 9 | *.duckdb* 10 | *.parquet 11 | /.env 12 | .claude 13 | CLAUDE.md 14 | .env 15 | .DS_Store -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | bgpkit-broker: 4 | image: bgpkit/bgpkit-broker:latest 5 | ports: 6 | - "40064:40064" 7 | volumes: 8 | - ./data:/bgpkit-broker 9 | restart: unless-stopped 10 | -------------------------------------------------------------------------------- /examples/latest.rs: -------------------------------------------------------------------------------- 1 | use bgpkit_broker::BgpkitBroker; 2 | 3 | pub fn main() { 4 | let broker = BgpkitBroker::new(); 5 | 6 | // method 1: create iterator from reference (so that you can reuse the broker object) 7 | // same as `&broker.into_iter()` 8 | for item in broker.latest().unwrap() { 9 | println!("{}", item); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # flyctl launch added from .gitignore 2 | target 3 | **/Cargo.lock 4 | **/.idea 5 | 6 | **/*.sqlite3* 7 | **/*.duckdb* 8 | **/*.parquet 9 | .env 10 | 11 | # flyctl launch added from .idea/.gitignore 12 | # Default ignored files 13 | .idea/shelf 14 | .idea/workspace.xml 15 | # Editor-based HTTP Client requests 16 | .idea/httpRequests 17 | # Datasource local storage ignored files 18 | .idea/dataSources 19 | .idea/dataSources.local.xml 20 | fly.toml 21 | -------------------------------------------------------------------------------- /deployment/nomad_api_raw.hcl: -------------------------------------------------------------------------------- 1 | job "bgpkit-broker-api" { 2 | type = "service" 3 | group "broker" { 4 | task "api" { 5 | driver = "raw_exec" 6 | 7 | config { 8 | command = "/usr/local/bin/bgpkit-broker" 9 | args = [ 10 | "serve", 11 | "--port", "40064", 12 | "--env", "/usr/local/etc/bgpkit.d/broker.env", 13 | "/var/db/bgpkit/bgpkit_broker.sqlite3" 14 | ] 15 | } 16 | 17 | resources { 18 | memory = 4000 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Build cli 22 | run: cargo build --features cli --verbose 23 | - name: Test SDK 24 | run: cargo test --no-default-features --verbose 25 | - name: Run clippy 26 | run: cargo clippy --all-features -- -D warnings 27 | -------------------------------------------------------------------------------- /deployment/nomad_backup_raw.hcl: -------------------------------------------------------------------------------- 1 | job "bgpkit-broker-backup" { 2 | type = "batch" 3 | periodic { 4 | crons = ["5 8 * * *"] 5 | prohibit_overlap = true 6 | } 7 | 8 | 9 | task "bgpkit-broker-backup" { 10 | driver = "raw_exec" 11 | 12 | config { 13 | command = "/usr/local/bin/bgpkit-broker" 14 | args = [ 15 | "backup", 16 | "--env", "/usr/local/etc/bgpkit.d/broker.env", 17 | "/var/db/bgpkit/bgpkit_broker.sqlite3", 18 | "s3://spaces/broker/bgpkit_broker.sqlite3", 19 | "--sqlite-cmd-path", "/usr/local/bin/sqlite3" 20 | ] 21 | } 22 | 23 | resources { 24 | memory = 4000 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # select build image 2 | FROM rust:1.90.0 AS build 3 | 4 | # create a new empty shell project 5 | RUN USER=root cargo new --bin my_project 6 | WORKDIR /my_project 7 | 8 | # copy your source tree 9 | COPY ./src ./src 10 | COPY ./Cargo.toml . 11 | 12 | # build for release 13 | RUN cargo build --release --all-features 14 | 15 | # our final base 16 | FROM debian:trixie-slim 17 | 18 | # copy the build artifact from the build stage 19 | COPY --from=build /my_project/target/release/bgpkit-broker /usr/local/bin/bgpkit-broker 20 | 21 | RUN apt update && apt install -y curl tini sqlite3 22 | WORKDIR /bgpkit-broker 23 | 24 | EXPOSE 40064 25 | ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/bgpkit-broker"] 26 | CMD ["serve", "bgpkit-broker.sqlite3", "--bootstrap", "--silent"] 27 | -------------------------------------------------------------------------------- /src/crawler/mod.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | mod riperis; 3 | mod routeviews; 4 | 5 | use chrono::NaiveDate; 6 | use tracing::{debug, info}; 7 | 8 | use crate::{BrokerError, BrokerItem}; 9 | use riperis::crawl_ripe_ris; 10 | use routeviews::crawl_routeviews; 11 | 12 | use crate::Collector; 13 | 14 | pub async fn crawl_collector( 15 | collector: &Collector, 16 | from_ts: Option, 17 | ) -> Result, BrokerError> { 18 | debug!("crawl collector {} from {:?}", &collector.id, from_ts); 19 | if from_ts.is_none() { 20 | info!("bootstrap crawl for collector {}", &collector.id); 21 | } 22 | 23 | let items = match collector.project.as_str() { 24 | "riperis" => crawl_ripe_ris(collector, from_ts).await, 25 | "routeviews" => crawl_routeviews(collector, from_ts).await, 26 | _ => panic!("unknown project {}", collector.project), 27 | }; 28 | debug!( 29 | "crawl collector {} from {:?}... done", 30 | &collector.id, from_ts 31 | ); 32 | items 33 | } 34 | -------------------------------------------------------------------------------- /src/peer.rs: -------------------------------------------------------------------------------- 1 | use chrono::NaiveDate; 2 | use serde::{Deserialize, Serialize}; 3 | use std::net::IpAddr; 4 | 5 | /// MRT collector peer information 6 | /// 7 | /// Represents the information of an MRT collector peer. 8 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 9 | #[cfg_attr(feature = "cli", derive(tabled::Tabled))] 10 | pub struct BrokerPeer { 11 | /// The date of the latest available data. 12 | pub date: NaiveDate, 13 | /// The IP address of the collector peer. 14 | pub ip: IpAddr, 15 | /// The ASN (Autonomous System Number) of the collector peer. 16 | pub asn: u32, 17 | /// The name of the collector. 18 | pub collector: String, 19 | /// The number of IPv4 prefixes. 20 | pub num_v4_pfxs: u32, 21 | /// The number of IPv6 prefixes. 22 | pub num_v6_pfxs: u32, 23 | /// The number of connected ASNs. 24 | pub num_connected_asns: u32, 25 | } 26 | #[derive(Debug, Clone, Serialize, Deserialize)] 27 | pub(crate) struct BrokerPeersResult { 28 | pub count: u32, 29 | pub data: Vec, 30 | } 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mingwei Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /examples/query.rs: -------------------------------------------------------------------------------- 1 | use bgpkit_broker::{BgpkitBroker, BrokerItem}; 2 | 3 | pub fn main() { 4 | let broker = BgpkitBroker::new() 5 | .broker_url("https://api.bgpkit.com/v3/broker") 6 | .ts_start("1634693400") 7 | .ts_end("1634693400") 8 | .collector_id("rrc00,route-views2"); 9 | 10 | // method 1: create iterator from reference (so that you can reuse the broker object) 11 | // same as `&broker.into_iter()` 12 | for item in &broker { 13 | println!("{}", item); 14 | } 15 | 16 | let broker = BgpkitBroker::new() 17 | .ts_start("1634693400") 18 | .ts_end("1634693400"); 19 | 20 | // method 2: create iterator from the broker object (taking ownership) 21 | let items = broker.into_iter().collect::>(); 22 | assert!(items.len() >= 53); 23 | 24 | // count total number of items 25 | let broker = BgpkitBroker::new() 26 | .ts_start("2024-01-01") 27 | .ts_end("2024-01-02") 28 | .collector_id("route-views2"); 29 | match broker.query_total_count() { 30 | Ok(total) => println!("total: {}", total), 31 | Err(err) => println!("error: {}", err), 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! Error handling module. 2 | use thiserror::Error; 3 | 4 | /// Broker error enum. 5 | #[derive(Error, Debug)] 6 | pub enum BrokerError { 7 | #[error("NetworkError: {0}")] 8 | NetworkError(#[from] reqwest::Error), 9 | 10 | #[error("BrokerError: {0}")] 11 | BrokerError(String), 12 | 13 | #[error("ConfigurationError: {0}")] 14 | ConfigurationError(String), 15 | 16 | #[cfg(feature = "cli")] 17 | #[error("CrawlerError: {0}")] 18 | CrawlerError(String), 19 | 20 | #[cfg(feature = "cli")] 21 | #[error("IoError: {0}")] 22 | IoError(#[from] std::io::Error), 23 | 24 | #[cfg(feature = "cli")] 25 | #[error("ConfigConfigError: {0}")] 26 | ConfigJsonError(#[from] serde_json::Error), 27 | 28 | #[cfg(feature = "cli")] 29 | #[error("ConfigUnknownError: {0}")] 30 | ConfigUnknownError(String), 31 | 32 | #[error("DateTimeParseError: {0}")] 33 | DateTimeParseError(#[from] chrono::ParseError), 34 | 35 | #[cfg(feature = "backend")] 36 | #[error("DatabaseError: {0}")] 37 | DatabaseError(#[from] sqlx::Error), 38 | 39 | #[cfg(feature = "nats")] 40 | #[error("NotifierError: {0}")] 41 | NotifierError(String), 42 | } 43 | -------------------------------------------------------------------------------- /examples/peers.rs: -------------------------------------------------------------------------------- 1 | //! This example retrieves a list of full-feed MRT collector peers from route-views.amsix and print 2 | //! out the top 10 peers with the most connected ASNs. 3 | //! 4 | //! Example output 5 | //! ```text 6 | //! 2024-10-31,route-views.amsix,58511,80.249.212.104,2567,960791,0 7 | //! 2024-10-31,route-views.amsix,267613,80.249.213.223,2268,965321,0 8 | //! 2024-10-31,route-views.amsix,267613,2001:7f8:1:0:a500:26:7613:1,2011,0,206667 9 | //! 2024-10-31,route-views.amsix,12779,80.249.209.17,1932,951788,0 10 | //! 2024-10-31,route-views.amsix,9002,2001:7f8:1::a500:9002:1,1896,0,202069 11 | //! 2024-10-31,route-views.amsix,38880,80.249.212.75,1883,992214,0 12 | //! 2024-10-31,route-views.amsix,58511,2001:7f8:1::a505:8511:1,1853,0,216981 13 | //! 2024-10-31,route-views.amsix,9002,80.249.209.216,1318,956345,0 14 | //! 2024-10-31,route-views.amsix,42541,80.249.212.84,1302,952091,0 15 | //! 2024-10-31,route-views.amsix,12779,2001:7f8:1::a501:2779:1,1247,0,201726 16 | //! ``` 17 | 18 | fn main() { 19 | let broker = bgpkit_broker::BgpkitBroker::new() 20 | .collector_id("route-views.amsix") 21 | .peers_only_full_feed(true); 22 | let mut peers = broker.get_peers().unwrap(); 23 | peers.sort_by(|a, b| b.num_connected_asns.cmp(&a.num_connected_asns)); 24 | for peer in peers.iter().take(10) { 25 | println!( 26 | "{},{},{},{},{},{},{}", 27 | peer.date, 28 | peer.collector, 29 | peer.asn, 30 | peer.ip, 31 | peer.num_connected_asns, 32 | peer.num_v4_pfxs, 33 | peer.num_v6_pfxs, 34 | ); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | permissions: 4 | contents: write 5 | 6 | on: 7 | push: 8 | tags: 9 | - v[0-9]+.* 10 | 11 | jobs: 12 | create-release: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: taiki-e/create-gh-release-action@v1 17 | with: 18 | # (optional) Path to changelog. 19 | changelog: CHANGELOG.md 20 | # (required) GitHub token for creating GitHub Releases. 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | cargo-publish: 24 | needs: create-release 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Publish to crates.io 29 | run: > 30 | cargo publish 31 | --all-features 32 | --verbose 33 | --token ${{ secrets.CARGO_REGISTRY_TOKEN }} 34 | 35 | upload-assets: 36 | needs: create-release 37 | strategy: 38 | matrix: 39 | include: 40 | - target: aarch64-unknown-linux-gnu 41 | os: ubuntu-latest 42 | - target: x86_64-unknown-linux-gnu 43 | os: ubuntu-latest 44 | - target: universal-apple-darwin 45 | os: macos-latest 46 | runs-on: ${{ matrix.os }} 47 | steps: 48 | - uses: actions/checkout@v4 49 | - uses: taiki-e/upload-rust-binary-action@v1 50 | with: 51 | # (required) Comma-separated list of binary names (non-extension portion of filename) to build and upload. 52 | # Note that glob pattern is not supported yet. 53 | bin: bgpkit-broker 54 | features: cli 55 | checksum: sha256 56 | # (optional) Target triple, default is host triple. 57 | target: ${{ matrix.target }} 58 | # (required) GitHub token for uploading assets to GitHub Releases. 59 | token: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /examples/health_check.rs: -------------------------------------------------------------------------------- 1 | use bgpkit_broker::BgpkitBroker; 2 | 3 | /// Simple example demonstrating health check functionality. 4 | fn main() { 5 | println!("=== BGPKIT Broker Health Check Demo ===\n"); 6 | 7 | // Check default API endpoint health 8 | let broker = BgpkitBroker::new(); 9 | 10 | println!("Checking broker API health at: {}", broker.broker_url); 11 | 12 | match broker.health_check() { 13 | Ok(()) => { 14 | println!("✓ Broker API is healthy and responding"); 15 | 16 | // Test a simple query to further verify functionality 17 | println!("\nTesting basic query functionality..."); 18 | match broker.latest() { 19 | Ok(items) => { 20 | println!("✓ Successfully retrieved {} latest files", items.len()); 21 | 22 | // Show first few items as examples 23 | for item in items.iter().take(3) { 24 | println!( 25 | " - {}: {} ({})", 26 | item.collector_id, item.data_type, item.url 27 | ); 28 | } 29 | } 30 | Err(e) => println!("✗ Query failed: {}", e), 31 | } 32 | } 33 | Err(e) => { 34 | println!("✗ Broker API health check failed: {}", e); 35 | println!(" This might indicate network issues or API downtime"); 36 | } 37 | } 38 | 39 | // Test with custom endpoint 40 | println!("\n=== Testing Custom Endpoint ==="); 41 | let custom_broker = BgpkitBroker::new().broker_url("https://invalid-endpoint.example.com/api"); 42 | 43 | println!("Checking invalid endpoint: {}", custom_broker.broker_url); 44 | match custom_broker.health_check() { 45 | Ok(()) => println!("✓ Unexpected success - endpoint responded"), 46 | Err(e) => println!("✓ Expected failure: {}", e), 47 | } 48 | 49 | println!("\n=== Health Check Complete ==="); 50 | } 51 | -------------------------------------------------------------------------------- /examples/timestamps.rs: -------------------------------------------------------------------------------- 1 | use bgpkit_broker::BgpkitBroker; 2 | 3 | /// This example demonstrates flexible timestamp parsing capabilities. 4 | /// The SDK accepts various timestamp formats for user convenience. 5 | fn main() { 6 | println!("=== BGPKIT Broker Timestamp Formats Demo ===\n"); 7 | 8 | // Common timestamp formats supported 9 | let examples = [ 10 | ("Unix timestamp", "1640995200"), 11 | ("RFC3339 date", "2022-01-01T00:00:00Z"), 12 | ("Simple date", "2022-01-01"), 13 | ("Date with slashes", "2022/01/01"), 14 | ("Compact date", "20220101"), 15 | ]; 16 | 17 | println!("Supported timestamp formats:"); 18 | for (description, timestamp) in examples { 19 | println!(" ✓ {}: '{}'", description, timestamp); 20 | } 21 | 22 | // Demonstrate practical usage 23 | println!("\n=== Practical Example ==="); 24 | 25 | // Query using simple date format 26 | let broker = BgpkitBroker::new() 27 | .ts_start("2022-01-01") 28 | .ts_end("2022-01-02") 29 | .collector_id("route-views2") 30 | .data_type("rib") 31 | .page_size(5); 32 | 33 | match broker.query() { 34 | Ok(items) => { 35 | println!( 36 | "✓ Found {} BGP archive files using simple date format", 37 | items.len() 38 | ); 39 | for item in items.iter().take(2) { 40 | println!(" - {} at {}", item.data_type, item.ts_start); 41 | } 42 | } 43 | Err(e) => println!("✗ Query failed: {}", e), 44 | } 45 | 46 | // Show error handling for invalid format 47 | println!("\n=== Error Handling ==="); 48 | let invalid_broker = BgpkitBroker::new().ts_start("invalid-date").page_size(1); 49 | 50 | match invalid_broker.query() { 51 | Ok(_) => println!("✗ Unexpected success with invalid date"), 52 | Err(_) => println!("✓ Proper error handling: validation occurs at query time"), 53 | } 54 | 55 | println!("\n=== Demo Complete ==="); 56 | } 57 | -------------------------------------------------------------------------------- /src/cli/bootstrap.rs: -------------------------------------------------------------------------------- 1 | // https://spaces.bgpkit.org/broker/bgpkit_broker.sqlite3 2 | 3 | use futures_util::StreamExt; 4 | use indicatif::{ProgressBar, ProgressStyle}; 5 | use std::cmp::min; 6 | use std::fs::File; 7 | use std::io::Write; 8 | use std::time::Duration; 9 | use tracing::info; 10 | 11 | pub async fn download_file(url: &str, path: &str, silent: bool) -> Result<(), String> { 12 | info!("downloading bootstrap database file {} to {}", &url, &path); 13 | let client = reqwest::ClientBuilder::new() 14 | .user_agent("bgpkit-broker/3") 15 | .connect_timeout(Duration::from_secs(30)) 16 | .build() 17 | .or(Err("Failed to create reqwest client".to_string()))?; 18 | 19 | // Reqwest setup 20 | let res = client 21 | .get(url) 22 | .send() 23 | .await 24 | .or(Err(format!("Failed to GET from '{}'", &url)))?; 25 | let total_size = res 26 | .content_length() 27 | .ok_or(format!("Failed to get content length from '{}'", &url))?; 28 | 29 | // Indicatif setup 30 | let pb = ProgressBar::new(total_size); 31 | let style = ProgressStyle::default_bar() 32 | .template("{msg}\n{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})") 33 | .map_err(|e| format!("Failed to create progress bar style: {}", e))? 34 | .progress_chars("#>-"); 35 | pb.set_style(style); 36 | if !silent { 37 | pb.set_message(format!("Downloading {} to {}...", url, path)); 38 | } 39 | 40 | // download chunks 41 | let mut file = File::create(path).or(Err(format!("Failed to create file '{}'", path)))?; 42 | let mut downloaded: u64 = 0; 43 | let mut stream = res.bytes_stream(); 44 | 45 | while let Some(item) = stream.next().await { 46 | let chunk = item.or(Err("Error while downloading file".to_string()))?; 47 | file.write_all(&chunk) 48 | .or(Err("Error while writing to file".to_string()))?; 49 | let new = min(downloaded + (chunk.len() as u64), total_size); 50 | downloaded = new; 51 | if !silent { 52 | pb.set_position(new); 53 | } 54 | } 55 | 56 | if !silent { 57 | pb.finish_with_message(format!("Downloading {} to {}... Done", url, path)); 58 | } 59 | info!("bootstrap download finished"); 60 | Ok(()) 61 | } 62 | -------------------------------------------------------------------------------- /examples/simple_config.rs: -------------------------------------------------------------------------------- 1 | use bgpkit_broker::BgpkitBroker; 2 | 3 | /// This example demonstrates the simple configuration API. 4 | /// Configuration methods return Self for easy chaining, with validation occurring at query time. 5 | fn main() { 6 | println!("=== BGPKIT Broker Simple Configuration Demo ===\n"); 7 | 8 | // Example 1: Valid configuration with method chaining 9 | println!("1. Valid configuration:"); 10 | let broker = configure_broker(); 11 | println!("✓ Configuration successful!"); 12 | println!(" Query params: {}", broker.query_params); 13 | 14 | // Make a real query to trigger validation 15 | match broker.query() { 16 | Ok(items) => println!(" ✓ Found {} BGP archive files", items.len()), 17 | Err(e) => println!(" ✗ Query error: {}", e), 18 | } 19 | 20 | // Example 2: Invalid configurations (errors occur at query time) 21 | println!("\n2. Invalid configurations (errors detected at query time):"); 22 | 23 | let test_cases = [ 24 | ("Invalid timestamp", "invalid-timestamp"), 25 | ("Invalid collector", "nonexistent-collector"), 26 | ("Invalid date", "2022-13-01"), 27 | ]; 28 | 29 | for (description, invalid_value) in test_cases { 30 | let broker = BgpkitBroker::new() 31 | .ts_start(invalid_value) 32 | .page(1) 33 | .page_size(10); 34 | 35 | match broker.query() { 36 | Ok(_) => println!(" {} -> Unexpected success", description), 37 | Err(e) => println!(" {} -> ✓ Expected error: {}", description, e), 38 | } 39 | } 40 | 41 | // Example 3: Different ways to configure 42 | println!("\n3. Configuration patterns:"); 43 | 44 | // Single method calls 45 | let _broker = BgpkitBroker::new().ts_start("1634693400"); 46 | println!(" ✓ Single method call works"); 47 | 48 | // Method chaining 49 | let _broker = BgpkitBroker::new() 50 | .ts_start("1634693400") 51 | .ts_end("1634693500") 52 | .collector_id("rrc00") 53 | .page_size(50); 54 | println!(" ✓ Method chaining works"); 55 | 56 | println!("\n=== Current API Benefits ==="); 57 | println!("✓ Clean method names without prefixes/suffixes"); 58 | println!("✓ Simple method chaining without Result handling"); 59 | println!("✓ Validation occurs at query time with helpful error messages"); 60 | println!("✓ Flexible timestamp formats accepted"); 61 | println!("✓ Configuration is always successful, errors only on invalid queries"); 62 | 63 | println!("\n=== Demo Complete ==="); 64 | } 65 | 66 | /// Helper function demonstrating clean configuration 67 | fn configure_broker() -> BgpkitBroker { 68 | BgpkitBroker::new() 69 | .ts_start("2022-01-01T00:00:00Z") 70 | .ts_end("2022-01-01T01:00:00Z") 71 | .collector_id("route-views2") 72 | .project("routeviews") 73 | .data_type("rib") 74 | .page_size(5) 75 | } 76 | -------------------------------------------------------------------------------- /src/db/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::query::BrokerCollector; 2 | use chrono::{Datelike, Duration, NaiveDateTime, Timelike}; 3 | 4 | pub(crate) fn infer_url( 5 | collector: &BrokerCollector, 6 | ts_start: &NaiveDateTime, 7 | is_rib: bool, 8 | ) -> (String, NaiveDateTime) { 9 | let project = collector.project.as_str(); 10 | let collector_url = collector 11 | .url 12 | .trim_end_matches('/') 13 | .trim_end_matches("bgpdata") 14 | .trim_end_matches('/'); 15 | let updates_interval = collector.updates_interval; 16 | 17 | let (url, ts_end) = match project { 18 | "route-views" => match is_rib { 19 | true => ( 20 | format!( 21 | "{}/bgpdata/{}.{:02}/RIBS/rib.{}{:02}{:02}.{:02}{:02}.bz2", 22 | collector_url, 23 | ts_start.year(), 24 | ts_start.month(), 25 | ts_start.year(), 26 | ts_start.month(), 27 | ts_start.day(), 28 | ts_start.hour(), 29 | ts_start.minute(), 30 | ), 31 | *ts_start, 32 | ), 33 | false => ( 34 | format!( 35 | "{}/bgpdata/{}.{:02}/UPDATES/updates.{}{:02}{:02}.{:02}{:02}.bz2", 36 | collector_url, 37 | ts_start.year(), 38 | ts_start.month(), 39 | ts_start.year(), 40 | ts_start.month(), 41 | ts_start.day(), 42 | ts_start.hour(), 43 | ts_start.minute(), 44 | ), 45 | *ts_start + Duration::seconds(updates_interval), 46 | ), 47 | }, 48 | "ripe-ris" => match is_rib { 49 | true => ( 50 | format!( 51 | "{}/{}.{:02}/bview.{}{:02}{:02}.{:02}{:02}.gz", 52 | collector_url, 53 | ts_start.year(), 54 | ts_start.month(), 55 | ts_start.year(), 56 | ts_start.month(), 57 | ts_start.day(), 58 | ts_start.hour(), 59 | ts_start.minute(), 60 | ), 61 | *ts_start, 62 | ), 63 | false => ( 64 | format!( 65 | "{}/{}.{:02}/updates.{}{:02}{:02}.{:02}{:02}.gz", 66 | collector_url, 67 | ts_start.year(), 68 | ts_start.month(), 69 | ts_start.year(), 70 | ts_start.month(), 71 | ts_start.day(), 72 | ts_start.hour(), 73 | ts_start.minute(), 74 | ), 75 | *ts_start + Duration::seconds(updates_interval), 76 | ), 77 | }, 78 | _ => { 79 | todo!() 80 | } 81 | }; 82 | (url, ts_end) 83 | } 84 | -------------------------------------------------------------------------------- /src/cli/utils.rs: -------------------------------------------------------------------------------- 1 | use bgpkit_broker::BrokerItem; 2 | use bgpkit_commons::mrt_collectors::MrtCollector; 3 | use chrono::NaiveDateTime; 4 | use itertools::Itertools; 5 | use serde::Serialize; 6 | use std::collections::{HashMap, HashSet}; 7 | use std::path::Path; 8 | 9 | #[derive(Serialize, tabled::Tabled)] 10 | pub struct CollectorInfo { 11 | pub project: String, 12 | pub name: String, 13 | pub country: String, 14 | pub activated_on: NaiveDateTime, 15 | pub data_url: String, 16 | } 17 | 18 | pub fn get_missing_collectors(latest_items: &[BrokerItem]) -> Vec { 19 | let latest_collectors: HashSet = latest_items 20 | .iter() 21 | .map(|i| i.collector_id.clone()) 22 | .collect(); 23 | 24 | let all_collectors = match bgpkit_commons::mrt_collectors::get_all_collectors() { 25 | Ok(collectors) => collectors, 26 | Err(_) => return Vec::new(), 27 | }; 28 | 29 | let all_collectors_map: HashMap = all_collectors 30 | .into_iter() 31 | .map(|c| (c.name.clone(), c)) 32 | .collect(); 33 | 34 | let all_collector_names: HashSet = all_collectors_map 35 | .values() 36 | .map(|c| c.name.clone()) 37 | .collect(); 38 | 39 | let country_map = match bgpkit_commons::countries::Countries::new() { 40 | Ok(map) => map, 41 | Err(_) => return Vec::new(), 42 | }; 43 | 44 | // get the difference between the two sets 45 | let missing_collectors: Vec = all_collector_names 46 | .difference(&latest_collectors) 47 | .filter_map(|c| { 48 | // convert to CollectorInfo 49 | let collector = all_collectors_map.get(c)?; 50 | let country_name = country_map 51 | .lookup_by_code(&collector.country) 52 | .map(|c| c.name.clone()) 53 | .unwrap_or_else(|| collector.country.clone()); 54 | Some(CollectorInfo { 55 | project: collector.project.to_string(), 56 | name: collector.name.clone(), 57 | country: country_name, 58 | activated_on: collector.activated_on, 59 | data_url: collector.data_url.clone(), 60 | }) 61 | }) 62 | .sorted_by(|a, b| a.name.cmp(&b.name)) 63 | .collect(); 64 | 65 | missing_collectors 66 | } 67 | 68 | pub fn is_local_path(path: &str) -> bool { 69 | if path.contains("://") { 70 | return false; 71 | } 72 | let path = Path::new(path); 73 | path.is_absolute() || path.is_relative() 74 | } 75 | 76 | pub fn parse_s3_path(path: &str) -> Option<(String, String)> { 77 | // split a path like s3://bucket/path/to/file into (bucket, path/to/file) 78 | let parts = path.split("://").collect::>(); 79 | if parts.len() != 2 || parts[0] != "s3" { 80 | return None; 81 | } 82 | let parts = parts[1].split('/').collect::>(); 83 | let bucket = parts[0].to_string(); 84 | // join the rest delimited by `/` 85 | let path = format!("/{}", parts[1..].join("/")); 86 | if parts.ends_with(&["/"]) { 87 | return None; 88 | } 89 | Some((bucket, path)) 90 | } 91 | -------------------------------------------------------------------------------- /src/cli/backup.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | use std::process::{exit, Command}; 3 | use tracing::{error, info}; 4 | 5 | pub(crate) fn backup_database( 6 | from: &str, 7 | to: &str, 8 | force: bool, 9 | sqlite_cmd_path: Option, 10 | ) -> Result<(), String> { 11 | // back up to local directory 12 | if std::fs::metadata(to).is_ok() && !force { 13 | error!("The specified database path already exists, skip backing up."); 14 | exit(1); 15 | } 16 | 17 | let sqlite_path = sqlite_cmd_path.unwrap_or_else(|| match which::which("sqlite3") { 18 | Ok(p) => p.to_string_lossy().to_string(), 19 | Err(_) => { 20 | error!("sqlite3 not found in PATH, please install sqlite3 first."); 21 | exit(1); 22 | } 23 | }); 24 | 25 | let mut command = Command::new(sqlite_path.as_str()); 26 | command.arg(from).arg(format!(".backup {}", to).as_str()); 27 | 28 | let command_str = format!( 29 | "{} {}", 30 | command.get_program().to_string_lossy(), 31 | command 32 | .get_args() 33 | .map(|s| { 34 | let str = s.to_string_lossy(); 35 | // if string contains space, wrap it with single quote 36 | if str.contains(' ') { 37 | format!("'{}'", str) 38 | } else { 39 | str.to_string() 40 | } 41 | }) 42 | .join(" ") 43 | ); 44 | 45 | info!("running command: {}", command_str); 46 | 47 | let output = command.output().expect("Failed to execute command"); 48 | 49 | match output.status.success() { 50 | true => Ok(()), 51 | false => Err(format!( 52 | "Command executed with error: {}", 53 | String::from_utf8_lossy(&output.stderr) 54 | )), 55 | } 56 | } 57 | 58 | pub(crate) async fn perform_periodic_backup( 59 | from: &str, 60 | backup_to: &str, 61 | sqlite_cmd_path: Option, 62 | ) -> Result<(), String> { 63 | info!("performing periodic backup from {} to {}", from, backup_to); 64 | 65 | if crate::utils::is_local_path(backup_to) { 66 | backup_database(from, backup_to, true, sqlite_cmd_path) 67 | } else if let Some((bucket, s3_path)) = crate::utils::parse_s3_path(backup_to) { 68 | perform_s3_backup(from, &bucket, &s3_path, sqlite_cmd_path).await 69 | } else { 70 | Err("invalid backup destination format".to_string()) 71 | } 72 | } 73 | 74 | async fn perform_s3_backup( 75 | from: &str, 76 | bucket: &str, 77 | s3_path: &str, 78 | sqlite_cmd_path: Option, 79 | ) -> Result<(), String> { 80 | let temp_dir = 81 | tempfile::tempdir().map_err(|e| format!("failed to create temporary directory: {}", e))?; 82 | let temp_file_path = temp_dir 83 | .path() 84 | .join("temp.db") 85 | .to_str() 86 | .ok_or("failed to convert temp file path to string")? 87 | .to_string(); 88 | 89 | match backup_database(from, &temp_file_path, true, sqlite_cmd_path) { 90 | Ok(_) => { 91 | info!( 92 | "uploading backup file {} to S3 at s3://{}/{}", 93 | &temp_file_path, bucket, s3_path 94 | ); 95 | match oneio::s3_upload(bucket, s3_path, &temp_file_path) { 96 | Ok(_) => { 97 | info!("periodic backup file uploaded to S3"); 98 | Ok(()) 99 | } 100 | Err(e) => { 101 | error!("failed to upload periodic backup file to S3: {}", e); 102 | Err(format!("failed to upload backup file to S3: {}", e)) 103 | } 104 | } 105 | } 106 | Err(e) => { 107 | error!("failed to create periodic backup database: {}", e); 108 | Err(e) 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bgpkit-broker" 3 | version = "0.10.1" 4 | edition = "2021" 5 | authors = ["Mingwei Zhang "] 6 | readme = "README.md" 7 | license = "MIT" 8 | repository = "https://github.com/bgpkit/bgpkit-broker" 9 | documentation = "https://docs.rs/bgpkit-broker" 10 | description = """ 11 | A library and command-line to provide indexing and searching functionalities for public BGP data archive files over time. 12 | """ 13 | keywords = ["bgp", "bgpkit", "api"] 14 | 15 | [[bin]] 16 | path = "src/cli/main.rs" 17 | name = "bgpkit-broker" 18 | required-features = ["cli"] 19 | 20 | [dependencies] 21 | 22 | ############################################# 23 | # Core Broker Rust SDK dependencies 24 | ############################################# 25 | chrono = { version = "0.4", features = ["serde"] } 26 | log = "0.4" 27 | reqwest = { version = "0.12", default-features = false, features = [ 28 | "blocking", 29 | "json", 30 | "stream", 31 | "rustls-tls-native-roots", 32 | ] } 33 | serde = { version = "1", features = ["derive"] } 34 | serde_json = "1" 35 | thiserror = "2.0" 36 | tracing = "0.1" 37 | lazy_static = "1" 38 | dotenvy = "0.15" 39 | 40 | ############################################# 41 | # Optional dependencies 42 | ############################################# 43 | 44 | # command-line interface dependencies 45 | clap = { version = "4.3", features = ["derive"], optional = true } 46 | dirs = { version = "6", optional = true } 47 | humantime = { version = "2.1", optional = true } 48 | num_cpus = { version = "1.15", optional = true } 49 | tabled = { version = "0.20.0", optional = true } 50 | tracing-subscriber = { version = "0.3", optional = true } 51 | indicatif = { version = "0.18.0", optional = true } 52 | futures-util = { version = "0.3.28", optional = true } 53 | itertools = { version = "0.14.0", optional = true } 54 | tempfile = { version = "3.8", optional = true } 55 | which = { version = "8.0.0", optional = true } 56 | bgpkit-commons = { version = "0.9.2", default-features = false, features = ["mrt_collectors", "countries"], optional = true } 57 | 58 | # crawler dependencies 59 | futures = { version = "0.3", optional = true } 60 | oneio = { version = "0.20.0", default-features = false, features = ["https", "s3"], optional = true } 61 | regex = { version = "1", optional = true } 62 | scraper = { version = "0.24", optional = true } 63 | tokio = { version = "1", optional = true, features = ["full"] } 64 | 65 | # api dependencies 66 | axum = { version = "0.8", optional = true } 67 | tower-http = { version = "0.6", optional = true, features = ["cors"] } 68 | http = { version = "1.0", optional = true } 69 | axum-prometheus = { version = "0.9.0", optional = true } 70 | 71 | # database dependencies 72 | sqlx = { version = "0.8", features = [ 73 | "runtime-tokio", 74 | "sqlite", 75 | ], optional = true } 76 | async-nats = { version = "0.42.0", optional = true } 77 | 78 | [features] 79 | default = [] 80 | cli = [ 81 | # command-line interface 82 | "clap", 83 | "dirs", 84 | "humantime", 85 | "num_cpus", 86 | "tracing-subscriber", 87 | "tabled", 88 | "itertools", 89 | "tempfile", 90 | "which", 91 | "bgpkit-commons", 92 | # crawler 93 | "futures", 94 | "oneio", 95 | "regex", 96 | "scraper", 97 | "tokio", 98 | # notification 99 | "nats", 100 | # database 101 | "backend", 102 | # bootstrap 103 | "indicatif", 104 | "futures-util", 105 | # API 106 | "axum", 107 | "axum-prometheus", 108 | "http", 109 | "tower-http", 110 | ] 111 | backend = ["tokio", "sqlx"] 112 | 113 | # notification features 114 | nats = ["async-nats"] 115 | 116 | [dev-dependencies] 117 | tracing-subscriber = "0.3.17" 118 | 119 | [package.metadata.binstall] 120 | pkg-url = "{ repo }/releases/download/v{ version }/{ name }-{ target }.tar.gz" 121 | pkg-fmt = "tgz" 122 | 123 | [lints.clippy] 124 | uninlined_format_args = "allow" 125 | unwrap_used = "deny" -------------------------------------------------------------------------------- /src/db/meta.rs: -------------------------------------------------------------------------------- 1 | use crate::{BrokerError, LocalBrokerDb}; 2 | use serde::{Deserialize, Serialize}; 3 | use sqlx::sqlite::SqliteRow; 4 | use sqlx::Row; 5 | use tracing::{debug, info}; 6 | 7 | /// Default number of days to retain meta entries. 8 | const DEFAULT_META_RETENTION_DAYS: i64 = 30; 9 | 10 | /// Get the number of days to retain meta entries. 11 | /// Default is 30 days. Can be configured via BGPKIT_BROKER_META_RETENTION_DAYS. 12 | fn get_meta_retention_days() -> i64 { 13 | std::env::var("BGPKIT_BROKER_META_RETENTION_DAYS") 14 | .ok() 15 | .and_then(|s| s.parse().ok()) 16 | .unwrap_or(DEFAULT_META_RETENTION_DAYS) 17 | } 18 | 19 | #[derive(Debug, Clone, Serialize, Deserialize)] 20 | pub struct UpdatesMeta { 21 | /// database update timestamp 22 | pub update_ts: i64, 23 | /// database update duration in seconds 24 | pub update_duration: i32, 25 | /// number of items inserted 26 | pub insert_count: i32, 27 | } 28 | 29 | impl LocalBrokerDb { 30 | pub async fn insert_meta( 31 | &self, 32 | crawl_duration: i32, 33 | item_inserted: i32, 34 | ) -> Result, BrokerError> { 35 | debug!("Inserting meta information..."); 36 | let now_ts = chrono::Utc::now().timestamp(); 37 | let inserted: Vec = sqlx::query(&format!( 38 | r#" 39 | INSERT INTO meta (update_ts, update_duration, insert_count) 40 | VALUES ('{}', {}, {}) 41 | RETURNING update_ts, update_duration, insert_count 42 | "#, 43 | now_ts, crawl_duration, item_inserted 44 | )) 45 | .map(|row: SqliteRow| { 46 | let update_ts = row.get::(0); 47 | let update_duration = row.get::(1); 48 | let insert_count = row.get::(2); 49 | UpdatesMeta { 50 | update_ts, 51 | update_duration, 52 | insert_count, 53 | } 54 | }) 55 | .fetch_all(&self.conn_pool) 56 | .await?; 57 | Ok(inserted) 58 | } 59 | 60 | pub async fn get_latest_updates_meta(&self) -> Result, BrokerError> { 61 | let entries = sqlx::query( 62 | r#" 63 | SELECT update_ts, update_duration, insert_count FROM meta ORDER BY update_ts DESC LIMIT 1; 64 | "#, 65 | ).map(|row: SqliteRow| { 66 | let update_ts = row.get::(0); 67 | let update_duration = row.get::(1); 68 | let insert_count = row.get::(2); 69 | UpdatesMeta { 70 | update_ts, 71 | update_duration, 72 | insert_count, 73 | } 74 | }).fetch_all(&self.conn_pool).await?; 75 | if entries.is_empty() { 76 | Ok(None) 77 | } else { 78 | Ok(Some(entries[0].clone())) 79 | } 80 | } 81 | 82 | /// Retrieves the total number of entries in the `files` table. 83 | /// 84 | /// # Returns 85 | /// 86 | /// * `Ok(i64)` - If the query is successful, this contains the count of entries in the `files` table. 87 | /// * `Err(BrokerError)` - If there is an issue executing the query or fetching the result. 88 | pub async fn get_entry_count(&self) -> Result { 89 | let count = sqlx::query( 90 | r#" 91 | SELECT count(*) FROM files 92 | "#, 93 | ) 94 | .map(|row: SqliteRow| row.get::(0)) 95 | .fetch_one(&self.conn_pool) 96 | .await?; 97 | Ok(count) 98 | } 99 | 100 | /// Deletes meta table entries older than the configured retention period. 101 | /// 102 | /// # Environment Variables 103 | /// * `BGPKIT_BROKER_META_RETENTION_DAYS` - Number of days to retain meta entries (default: 30) 104 | /// 105 | /// # Returns 106 | /// * `Ok(u64)` - Number of deleted entries 107 | /// * `Err(BrokerError)` - If there is an issue executing the query 108 | pub async fn cleanup_old_meta_entries(&self) -> Result { 109 | let retention_days = get_meta_retention_days(); 110 | let cutoff_ts = chrono::Utc::now().timestamp() - (retention_days * 24 * 60 * 60); 111 | 112 | debug!( 113 | "Cleaning up meta entries older than {} days (before timestamp {})", 114 | retention_days, cutoff_ts 115 | ); 116 | 117 | let result = sqlx::query(&format!("DELETE FROM meta WHERE update_ts < {}", cutoff_ts)) 118 | .execute(&self.conn_pool) 119 | .await?; 120 | 121 | let deleted = result.rows_affected(); 122 | if deleted > 0 { 123 | info!( 124 | "Cleaned up {} old meta entries (older than {} days)", 125 | deleted, retention_days 126 | ); 127 | } 128 | 129 | Ok(deleted) 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /examples/shortcuts.rs: -------------------------------------------------------------------------------- 1 | //! This example demonstrates the convenience shortcuts for common BGP data queries. 2 | //! 3 | //! The shortcuts module provides three main convenience methods: 4 | //! - daily_ribs(): Get RIB files captured at midnight (daily snapshots) 5 | //! - recent_updates(hours): Get update files from the last N hours 6 | //! - most_diverse_collectors(n, project): Find collectors with the most diverse peer ASNs, optionally filtered by project 7 | 8 | fn main() { 9 | let broker = bgpkit_broker::BgpkitBroker::new(); 10 | 11 | // Example 1: Get daily RIB files from January 1, 2024 12 | println!("=== Daily RIBs Example ==="); 13 | match broker 14 | .clone() 15 | .ts_start("2024-01-01") 16 | .ts_end("2024-01-02") 17 | .collector_id("route-views2") 18 | .daily_ribs() 19 | { 20 | Ok(daily_ribs) => { 21 | println!("Found {} daily RIB files", daily_ribs.len()); 22 | for item in daily_ribs.iter().take(3) { 23 | println!( 24 | " Daily RIB: {} from {} at {}", 25 | item.collector_id, 26 | item.ts_start.format("%Y-%m-%d %H:%M:%S"), 27 | item.url 28 | ); 29 | } 30 | } 31 | Err(e) => println!("Error getting daily RIBs: {}", e), 32 | } 33 | 34 | // Example 2: Get recent update files from the last 24 hours 35 | println!("\n=== Recent Updates Example ==="); 36 | match broker.clone().recent_updates(24) { 37 | Ok(recent_updates) => { 38 | println!( 39 | "Found {} update files from last 24 hours", 40 | recent_updates.len() 41 | ); 42 | for item in recent_updates.iter().take(5) { 43 | println!( 44 | " Update: {} from {} at {}", 45 | item.collector_id, 46 | item.ts_start.format("%Y-%m-%d %H:%M:%S"), 47 | item.url 48 | ); 49 | } 50 | } 51 | Err(e) => println!("Error getting recent updates: {}", e), 52 | } 53 | 54 | // Example 3: Find the most diverse collectors 55 | println!("\n=== Most Diverse Collectors Example ==="); 56 | match broker.clone().most_diverse_collectors(5, None) { 57 | Ok(diverse_collectors) => { 58 | println!("Top {} most diverse collectors:", diverse_collectors.len()); 59 | for (i, collector) in diverse_collectors.iter().enumerate() { 60 | println!(" {}. {}", i + 1, collector); 61 | } 62 | 63 | // Use the diverse collectors to get RIB files 64 | if !diverse_collectors.is_empty() { 65 | let collector_list = diverse_collectors.join(","); 66 | println!("\n--- Using diverse collectors to get RIB files ---"); 67 | 68 | match broker 69 | .clone() 70 | .collector_id(collector_list) 71 | .data_type("rib") 72 | .page_size(5) 73 | .query_single_page() 74 | { 75 | Ok(ribs) => { 76 | println!("Found {} RIB files from diverse collectors", ribs.len()); 77 | for item in ribs { 78 | println!( 79 | " RIB: {} from {} at {}", 80 | item.collector_id, 81 | item.ts_start.format("%Y-%m-%d %H:%M:%S"), 82 | item.url 83 | ); 84 | } 85 | } 86 | Err(e) => println!("Error getting RIBs from diverse collectors: {}", e), 87 | } 88 | } 89 | } 90 | Err(e) => println!("Error finding diverse collectors: {}", e), 91 | } 92 | 93 | // Example 4: Compare project-specific diverse collectors 94 | println!("\n=== Project-Specific Diverse Collectors Example ==="); 95 | 96 | // RouteViews collectors 97 | match broker 98 | .clone() 99 | .most_diverse_collectors(3, Some("routeviews")) 100 | { 101 | Ok(rv_collectors) => { 102 | println!("Top {} RouteViews diverse collectors:", rv_collectors.len()); 103 | for (i, collector) in rv_collectors.iter().enumerate() { 104 | println!(" {}. {}", i + 1, collector); 105 | } 106 | } 107 | Err(e) => println!("Error finding RouteViews collectors: {}", e), 108 | } 109 | 110 | // RIPE RIS collectors 111 | match broker.clone().most_diverse_collectors(3, Some("riperis")) { 112 | Ok(ripe_collectors) => { 113 | println!("Top {} RIPE RIS diverse collectors:", ripe_collectors.len()); 114 | for (i, collector) in ripe_collectors.iter().enumerate() { 115 | println!(" {}. {}", i + 1, collector); 116 | } 117 | } 118 | Err(e) => println!("Error finding RIPE RIS collectors: {}", e), 119 | } 120 | 121 | println!("\n=== Shortcuts Example Complete ==="); 122 | } 123 | -------------------------------------------------------------------------------- /src/notifier/nats.rs: -------------------------------------------------------------------------------- 1 | use crate::{BrokerError, BrokerItem}; 2 | use async_nats::Subscriber; 3 | use futures::StreamExt; 4 | use tracing::{error, info}; 5 | 6 | pub struct NatsNotifier { 7 | client: async_nats::Client, 8 | root_subject: String, 9 | subscriber: Option, 10 | } 11 | 12 | fn item_to_subject(root_subject: &str, item: &BrokerItem) -> String { 13 | let project = match item.collector_id.starts_with("rrc") { 14 | true => "riperis", 15 | false => "route-views", 16 | }; 17 | 18 | let subject = root_subject.strip_suffix('.').unwrap_or(root_subject); 19 | 20 | format!( 21 | "{}.{}.{}.{}", 22 | subject, project, item.collector_id, item.data_type 23 | ) 24 | } 25 | 26 | impl NatsNotifier { 27 | /// Creates a new NATS notifier. 28 | pub async fn new(url: Option) -> Result { 29 | dotenvy::dotenv().ok(); 30 | 31 | let url = match url { 32 | None => match dotenvy::var("BGPKIT_BROKER_NATS_URL") { 33 | Ok(url) => url, 34 | Err(_) => { 35 | return Err(BrokerError::NotifierError( 36 | "BGPKIT_BROKER_NATS_URL env variable not set".to_string(), 37 | )); 38 | } 39 | }, 40 | Some(u) => u, 41 | }; 42 | let user = dotenvy::var("BGPKIT_BROKER_NATS_USER").unwrap_or("public".to_string()); 43 | let password = dotenvy::var("BGPKIT_BROKER_NATS_PASSWORD").unwrap_or("public".to_string()); 44 | 45 | let root_subject = dotenvy::var("BGPKIT_BROKER_NATS_ROOT_SUBJECT") 46 | .unwrap_or_else(|_| "public.broker".to_string()); 47 | 48 | let client = match async_nats::ConnectOptions::new() 49 | .user_and_password(user, password) 50 | .connect(url.clone()) 51 | .await 52 | { 53 | Ok(c) => { 54 | info!( 55 | "successfully connected to NATS server at {} with root subject '{}'", 56 | &url, root_subject 57 | ); 58 | c 59 | } 60 | Err(e) => { 61 | return Err(BrokerError::BrokerError(format!( 62 | "NATS connection error: {}", 63 | e 64 | ))); 65 | } 66 | }; 67 | 68 | Ok(Self { 69 | client, 70 | root_subject, 71 | subscriber: None, 72 | }) 73 | } 74 | 75 | /// Publishes broker items to NATS server. 76 | /// 77 | /// # Arguments 78 | /// 79 | /// * `items` - A reference to a vector of `BrokerItem` objects to be published. 80 | /// 81 | /// # Errors 82 | /// 83 | /// Returns an `async_nats::Error` if there was an error during the publishing process. 84 | pub async fn send(&self, items: &[BrokerItem]) -> Result<(), BrokerError> { 85 | for item in items { 86 | let item_str = serde_json::to_string(item)?; 87 | let subject = item_to_subject(self.root_subject.as_str(), item); 88 | if let Err(e) = self.client.publish(subject, item_str.into()).await { 89 | return Err(BrokerError::NotifierError(format!( 90 | "NATS publish error: {}", 91 | e 92 | ))); 93 | } 94 | } 95 | if let Err(e) = self.client.flush().await { 96 | return Err(BrokerError::NotifierError(format!( 97 | "NATS flush error: {}", 98 | e 99 | ))); 100 | }; 101 | Ok(()) 102 | } 103 | 104 | pub async fn start_subscription(&mut self, subject: Option) -> Result<(), BrokerError> { 105 | let sub = match subject { 106 | Some(s) => s, 107 | None => format!( 108 | "{}.>", 109 | self.root_subject 110 | .strip_suffix('.') 111 | .unwrap_or(self.root_subject.as_str()) 112 | ), 113 | }; 114 | 115 | match self.client.subscribe(sub.clone()).await { 116 | Ok(subscriber) => { 117 | info!("subscribed to NATS subject: {}", sub); 118 | self.subscriber = Some(subscriber); 119 | Ok(()) 120 | } 121 | Err(e) => Err(BrokerError::BrokerError(format!( 122 | "NATS subscription error: {}", 123 | e 124 | ))), 125 | } 126 | } 127 | 128 | pub async fn next(&mut self) -> Option { 129 | match self.subscriber.as_mut() { 130 | None => None, 131 | Some(s) => match s.next().await { 132 | None => None, 133 | Some(msg) => { 134 | let msg_text = match std::str::from_utf8(msg.payload.as_ref()) { 135 | Ok(text) => text, 136 | Err(e) => { 137 | error!("NATS message UTF-8 decode error: {}", e); 138 | return None; 139 | } 140 | }; 141 | match serde_json::from_str::(msg_text) { 142 | Ok(item) => Some(item), 143 | Err(_e) => { 144 | error!("NATS message deserialization error: {}", msg_text); 145 | None 146 | } 147 | } 148 | } 149 | }, 150 | } 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/crawler/routeviews.rs: -------------------------------------------------------------------------------- 1 | use crate::crawler::common::{ 2 | crawl_months_list, extract_link_size, fetch_body, get_crawler_month_concurrency, 3 | remove_trailing_slash, 4 | }; 5 | use crate::crawler::Collector; 6 | use crate::{BrokerError, BrokerItem}; 7 | use chrono::{NaiveDate, NaiveDateTime}; 8 | use futures::stream::StreamExt; 9 | use regex::Regex; 10 | use tracing::debug; 11 | 12 | /// Crawl RouteViews MRT data dump for a given collector. 13 | /// 14 | /// Example: . 15 | /// A few things to note: 16 | /// - at the root level, there are one directory per month, e.g. `2001.01/` 17 | /// - this means a single crawl of the root page will give us all the months available 18 | /// - each month directory contains two subdirectories, `UPDATES/` and `RIBS/` 19 | /// - each subdirectory contains a list of files, e.g. `updates.20010101.0000.bz2` or `rib.20010101.0000.bz2` 20 | /// 21 | /// # Arguments 22 | /// 23 | /// * `collector`: the [Collector] to crawl 24 | /// * `from_ts`: optional start date for the crawl to start from, provide None for bootstrap 25 | /// 26 | /// returns: Result, Error> 27 | pub async fn crawl_routeviews( 28 | collector: &Collector, 29 | from_ts: Option, 30 | ) -> Result, BrokerError> { 31 | let collector_url = remove_trailing_slash(collector.url.as_str()); 32 | 33 | let months_to_crawl = crawl_months_list(collector_url.as_str(), from_ts).await?; 34 | let mut stream = futures::stream::iter(months_to_crawl.into_iter().map(|month| { 35 | let url = format!("{}/{}", collector_url.as_str(), month.format("%Y.%m/")); 36 | crawl_month(url, collector.id.clone()) 37 | })) 38 | .buffer_unordered(get_crawler_month_concurrency()); 39 | 40 | let mut res = vec![]; 41 | while let Some(result) = stream.next().await { 42 | let items = result?; 43 | res.extend(items); 44 | } 45 | Ok(res) 46 | } 47 | 48 | async fn crawl_month(url: String, collector_id: String) -> Result, BrokerError> { 49 | let root_url = remove_trailing_slash(url.as_str()); 50 | debug!("crawling data for {} ...", root_url.as_str()); 51 | 52 | let mut all_items = vec![]; 53 | 54 | // RIBS 55 | for subdir in ["RIBS", "UPDATES"] { 56 | let url = format!("{}/{}", &root_url, subdir); 57 | let body = fetch_body(url.as_str()).await?; 58 | let collector_id_clone = collector_id.clone(); 59 | let data_items: Vec = tokio::task::spawn_blocking(move || { 60 | let items = extract_link_size(body.as_str()); 61 | items 62 | .iter() 63 | .filter_map(|(link, size)| { 64 | let url = format!("{}/{}", &url, link); 65 | #[allow(clippy::regex_creation_in_loops)] 66 | let link_time_pattern: Regex = 67 | Regex::new(r".*(........\.....)\.bz2.*").expect("invalid regex pattern"); 68 | let time_str = link_time_pattern.captures(&url)?.get(1)?.as_str(); 69 | let unix_time = NaiveDateTime::parse_from_str(time_str, "%Y%m%d.%H%M").ok()?; 70 | match link.contains("update") { 71 | true => Some(BrokerItem { 72 | ts_start: unix_time, 73 | ts_end: unix_time + chrono::Duration::seconds(15 * 60), 74 | url: url.clone(), 75 | rough_size: *size, 76 | collector_id: collector_id_clone.clone(), 77 | data_type: "updates".to_string(), 78 | exact_size: 0, 79 | }), 80 | false => Some(BrokerItem { 81 | ts_start: unix_time, 82 | ts_end: unix_time, 83 | url: url.clone(), 84 | rough_size: *size, 85 | collector_id: collector_id_clone.clone(), 86 | data_type: "rib".to_string(), 87 | exact_size: 0, 88 | }), 89 | } 90 | }) 91 | .collect() 92 | }) 93 | .await 94 | .expect("blocking task panicked"); 95 | all_items.extend(data_items); 96 | } 97 | 98 | debug!("crawling data for {} ... finished", &root_url); 99 | Ok(all_items) 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | use super::*; 105 | use chrono::Utc; 106 | 107 | #[tokio::test] 108 | async fn test_crawl_routeviews() { 109 | let collector = Collector { 110 | id: "route-views2".to_string(), 111 | project: "routeviews".to_string(), 112 | url: "https://routeviews.org/bgpdata/".to_string(), 113 | }; 114 | 115 | let two_months_ago = Utc::now().date_naive() - chrono::Duration::days(60); 116 | let items = crawl_routeviews(&collector, Some(two_months_ago)) 117 | .await 118 | .unwrap(); 119 | dbg!(items); 120 | } 121 | 122 | #[tokio::test] 123 | async fn test_crawl_months() { 124 | let root_url = "https://routeviews.org/bgpdata/"; 125 | let months = crawl_months_list(root_url, None).await.unwrap(); 126 | dbg!(months); 127 | let current_month = crawl_months_list(root_url, Some(Utc::now().date_naive())) 128 | .await 129 | .unwrap(); 130 | assert!(!current_month.is_empty()); 131 | } 132 | 133 | #[tokio::test] 134 | async fn test_crawl_month() { 135 | let items = crawl_month( 136 | "https://routeviews.org/bgpdata/2016.11/".to_string(), 137 | "route-views2".to_string(), 138 | ) 139 | .await 140 | .unwrap(); 141 | for item in items { 142 | println!("{}", item.url); 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/crawler/riperis.rs: -------------------------------------------------------------------------------- 1 | use crate::crawler::common::{ 2 | crawl_months_list, extract_link_size, fetch_body, get_crawler_month_concurrency, 3 | remove_trailing_slash, 4 | }; 5 | use crate::crawler::Collector; 6 | use crate::{BrokerError, BrokerItem}; 7 | use chrono::{NaiveDate, NaiveDateTime}; 8 | use futures::stream::StreamExt; 9 | use regex::Regex; 10 | use tracing::debug; 11 | 12 | /// Crawl RIPE RIS MRT data dump for a given collector. 13 | /// 14 | /// Example: . 15 | /// A few things to note: 16 | /// - at the root level, there are one directory per month, e.g. `2001.01/` 17 | /// - this means a single crawl of the root page will give us all the months available 18 | /// - each month directory contains a list of files, e.g. `updates.20010101.0000.gz` or `bview.20010101.0000.gz` (the latter is a full dump, the former is an incremental update) 19 | /// 20 | /// # Arguments 21 | /// 22 | /// * `collector`: the [Collector] to crawl 23 | /// * `from_ts`: optional start date for the crawl to start from, provide None for bootstrap 24 | /// 25 | /// returns: Result, Error> 26 | pub async fn crawl_ripe_ris( 27 | collector: &Collector, 28 | from_ts: Option, 29 | ) -> Result, BrokerError> { 30 | let collector_url = remove_trailing_slash(collector.url.as_str()); 31 | 32 | let months_to_crawl = crawl_months_list(collector_url.as_str(), from_ts).await?; 33 | let mut stream = futures::stream::iter(months_to_crawl.into_iter().map(|month| { 34 | let url = format!("{}/{}", collector_url.as_str(), month.format("%Y.%m/")); 35 | crawl_month(url, collector.id.clone()) 36 | })) 37 | .buffer_unordered(get_crawler_month_concurrency()); 38 | 39 | let mut res = vec![]; 40 | while let Some(result) = stream.next().await { 41 | let items = result?; 42 | res.extend(items); 43 | } 44 | Ok(res) 45 | } 46 | 47 | async fn crawl_month(url: String, collector_id: String) -> Result, BrokerError> { 48 | let url = remove_trailing_slash(url.as_str()); 49 | debug!("crawling data for {} ...", url.as_str()); 50 | let body = fetch_body(url.as_str()).await?; 51 | debug!(" download for {} finished ", url.as_str()); 52 | 53 | let new_url = url.to_string(); 54 | 55 | let data_items: Vec = tokio::task::spawn_blocking(move || { 56 | let items = extract_link_size(body.as_str()); 57 | items 58 | .iter() 59 | .filter_map(|(link, size)| { 60 | let url = match url.as_str().contains("https") { 61 | true => format!("{}/{}", url, link), 62 | false => format!("{}/{}", url, link).replace("http", "https"), 63 | }; 64 | let updates_link_pattern: Regex = 65 | Regex::new(r".*(........\.....)\.gz.*").expect("invalid regex pattern"); 66 | let time_str = updates_link_pattern.captures(&url)?.get(1)?.as_str(); 67 | let unix_time = NaiveDateTime::parse_from_str(time_str, "%Y%m%d.%H%M").ok()?; 68 | match link.contains("update") { 69 | true => Some(BrokerItem { 70 | ts_start: unix_time, 71 | ts_end: unix_time + chrono::Duration::seconds(5 * 60), 72 | url: url.clone(), 73 | rough_size: *size, 74 | collector_id: collector_id.clone(), 75 | data_type: "updates".to_string(), 76 | exact_size: 0, 77 | }), 78 | false => Some(BrokerItem { 79 | ts_start: unix_time, 80 | ts_end: unix_time, 81 | url: url.clone(), 82 | rough_size: *size, 83 | collector_id: collector_id.clone(), 84 | data_type: "rib".to_string(), 85 | exact_size: 0, 86 | }), 87 | } 88 | }) 89 | .collect() 90 | }) 91 | .await 92 | .expect("blocking task panicked"); 93 | 94 | debug!("crawling data for {} ... finished", &new_url); 95 | Ok(data_items) 96 | } 97 | 98 | #[cfg(test)] 99 | mod tests { 100 | use super::*; 101 | use chrono::Utc; 102 | 103 | #[tokio::test] 104 | async fn test_crawl_ripe_ris() { 105 | tracing_subscriber::fmt::init(); 106 | let collector = Collector { 107 | id: "rrc00".to_string(), 108 | project: "riperis".to_string(), 109 | url: "https://data.ris.ripe.net/rrc00/".to_string(), 110 | }; 111 | 112 | let two_months_ago = Utc::now().date_naive() - chrono::Duration::days(60); 113 | let _items = crawl_ripe_ris(&collector, Some(two_months_ago)) 114 | .await 115 | .unwrap(); 116 | let _after_date = NaiveDate::from_ymd_opt(2023, 5, 3) 117 | .unwrap() 118 | .and_hms_opt(0, 0, 0) 119 | .unwrap(); 120 | } 121 | 122 | #[tokio::test] 123 | async fn test_crawl_months() { 124 | let months = crawl_months_list("https://data.ris.ripe.net/rrc00/", None) 125 | .await 126 | .unwrap(); 127 | dbg!(months); 128 | let current_month = crawl_months_list( 129 | "https://data.ris.ripe.net/rrc00/", 130 | Some(Utc::now().date_naive()), 131 | ) 132 | .await 133 | .unwrap(); 134 | 135 | assert_eq!(current_month.len(), 1); 136 | } 137 | 138 | #[tokio::test] 139 | async fn test_crawl_month() { 140 | let items = crawl_month( 141 | "https://data.ris.ripe.net/rrc00/2008.09/".to_string(), 142 | "rrc00".to_string(), 143 | ) 144 | .await 145 | .unwrap(); 146 | for item in items { 147 | println!("{}", item.url); 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/item.rs: -------------------------------------------------------------------------------- 1 | //! BrokerItem module define the broker search results 2 | use serde::{Deserialize, Serialize}; 3 | use std::cmp::Ordering; 4 | use std::fmt::{Display, Formatter}; 5 | 6 | /// BGPKIT Broker data item. 7 | /// 8 | /// The fields are: 9 | /// - [ts_start][BrokerItem::ts_start]: the starting timestamp of the data file 10 | /// - [ts_end][BrokerItem::ts_end]: the ending timestamp of the data file 11 | /// - [collector_id][BrokerItem::collector_id]: the collector id of the item: e.g. `rrc00` 12 | /// - [data_type][BrokerItem::data_type]: type of the data item: `rib` or `updates` 13 | /// - [url][BrokerItem::url]: the URL to the data item file 14 | /// - [rough_size][BrokerItem::rough_size]: rough file size extracted from the collector webpage 15 | /// - [exact_size][BrokerItem::exact_size]: exact file size extracted by crawling the file 16 | /// 17 | /// An array of [BrokerItem]s can be sorted with the following order: 18 | /// 1. smaller timestamp before larger timestamp 19 | /// 2. RIB before updates 20 | /// 3. then alphabetical order on collector ID (route-views before rrc) 21 | #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] 22 | #[cfg_attr(feature = "cli", derive(tabled::Tabled))] 23 | pub struct BrokerItem { 24 | /// start timestamp 25 | pub ts_start: chrono::NaiveDateTime, 26 | /// end timestamps 27 | pub ts_end: chrono::NaiveDateTime, 28 | /// the collector id of the item: e.g. `rrc00` 29 | pub collector_id: String, 30 | /// type of the data item: `rib` or `updates` 31 | pub data_type: String, 32 | /// the URL to the data item file 33 | pub url: String, 34 | /// rough file size extracted from the hosting site page 35 | pub rough_size: i64, 36 | /// exact file size extracted by crawling the file 37 | pub exact_size: i64, 38 | } 39 | 40 | impl BrokerItem { 41 | /// Checks if the data type is "rib" (i.e. RIB dump). 42 | /// 43 | /// # Return 44 | /// Returns `true` if the data type is "rib", otherwise `false`. 45 | pub fn is_rib(&self) -> bool { 46 | self.data_type.as_str() == "rib" 47 | } 48 | } 49 | 50 | #[allow(clippy::unwrap_used)] 51 | impl Display for BrokerItem { 52 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 53 | write!(f, "{}", serde_json::to_string(self).unwrap()) 54 | } 55 | } 56 | 57 | impl PartialOrd for BrokerItem { 58 | fn partial_cmp(&self, other: &Self) -> Option { 59 | Some(self.cmp(other)) 60 | } 61 | } 62 | 63 | impl Ord for BrokerItem { 64 | fn cmp(&self, other: &Self) -> Ordering { 65 | // compare BrokerItems with the following sequence 66 | // 1. ts_start 67 | // 2. data_type 68 | // 3. collector_id 69 | self.ts_start 70 | .cmp(&other.ts_start) // smaller timestamp comes earlier 71 | .then(self.data_type.cmp(&other.data_type)) // RIB before updates on the same timestamp 72 | .then(self.collector_id.cmp(&other.collector_id)) // route-viewsX before rrcX 73 | } 74 | } 75 | 76 | #[cfg(test)] 77 | mod tests { 78 | use super::*; 79 | use chrono::DateTime; 80 | 81 | #[test] 82 | fn test_sorting() { 83 | let mut items = vec![ 84 | BrokerItem { 85 | ts_start: DateTime::from_timestamp(10, 0).unwrap().naive_utc(), 86 | ts_end: Default::default(), 87 | collector_id: "rrc00".to_string(), 88 | data_type: "updates".to_string(), 89 | url: "".to_string(), 90 | rough_size: 0, 91 | exact_size: 0, 92 | }, 93 | BrokerItem { 94 | ts_start: DateTime::from_timestamp(9, 0).unwrap().naive_utc(), 95 | ts_end: Default::default(), 96 | collector_id: "rrc00".to_string(), 97 | data_type: "updates".to_string(), 98 | url: "".to_string(), 99 | rough_size: 0, 100 | exact_size: 0, 101 | }, 102 | BrokerItem { 103 | ts_start: DateTime::from_timestamp(10, 0).unwrap().naive_utc(), 104 | ts_end: Default::default(), 105 | collector_id: "rrc00".to_string(), 106 | data_type: "rib".to_string(), 107 | url: "".to_string(), 108 | rough_size: 0, 109 | exact_size: 0, 110 | }, 111 | BrokerItem { 112 | ts_start: DateTime::from_timestamp(10, 0).unwrap().naive_utc(), 113 | ts_end: Default::default(), 114 | collector_id: "route-views2".to_string(), 115 | data_type: "rib".to_string(), 116 | url: "".to_string(), 117 | rough_size: 0, 118 | exact_size: 0, 119 | }, 120 | ]; 121 | let correct_items = vec![ 122 | BrokerItem { 123 | ts_start: DateTime::from_timestamp(9, 0).unwrap().naive_utc(), 124 | ts_end: Default::default(), 125 | collector_id: "rrc00".to_string(), 126 | data_type: "updates".to_string(), 127 | url: "".to_string(), 128 | rough_size: 0, 129 | exact_size: 0, 130 | }, 131 | BrokerItem { 132 | ts_start: DateTime::from_timestamp(10, 0).unwrap().naive_utc(), 133 | ts_end: Default::default(), 134 | collector_id: "route-views2".to_string(), 135 | data_type: "rib".to_string(), 136 | url: "".to_string(), 137 | rough_size: 0, 138 | exact_size: 0, 139 | }, 140 | BrokerItem { 141 | ts_start: DateTime::from_timestamp(10, 0).unwrap().naive_utc(), 142 | ts_end: Default::default(), 143 | collector_id: "rrc00".to_string(), 144 | data_type: "rib".to_string(), 145 | url: "".to_string(), 146 | rough_size: 0, 147 | exact_size: 0, 148 | }, 149 | BrokerItem { 150 | ts_start: DateTime::from_timestamp(10, 0).unwrap().naive_utc(), 151 | ts_end: Default::default(), 152 | collector_id: "rrc00".to_string(), 153 | data_type: "updates".to_string(), 154 | url: "".to_string(), 155 | rough_size: 0, 156 | exact_size: 0, 157 | }, 158 | ]; 159 | 160 | assert_ne!(items, correct_items); 161 | items.sort(); 162 | assert_eq!(items, correct_items); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/db/latest_files.rs: -------------------------------------------------------------------------------- 1 | use crate::db::utils::infer_url; 2 | use crate::query::BrokerCollector; 3 | use crate::{BrokerError, BrokerItem}; 4 | use chrono::{DateTime, NaiveDateTime}; 5 | use sqlx::sqlite::SqliteRow; 6 | use sqlx::Row; 7 | use std::collections::HashMap; 8 | use tracing::error; 9 | 10 | use super::LocalBrokerDb; 11 | 12 | impl LocalBrokerDb { 13 | /// get the latest timestamp (ts_start) of data entries in broker database 14 | pub async fn get_latest_timestamp(&self) -> Result, BrokerError> { 15 | // FIXME: handle empty database case 16 | let timestamp = sqlx::query( 17 | r#" 18 | SELECT MAX(timestamp) FROM files 19 | "#, 20 | ) 21 | .map(|row: SqliteRow| row.get::(0)) 22 | .fetch_one(&self.conn_pool) 23 | .await?; 24 | 25 | let datetime = DateTime::from_timestamp(timestamp, 0).map(|dt| dt.naive_utc()); 26 | Ok(datetime) 27 | } 28 | 29 | pub async fn bootstrap_latest_table(&self) { 30 | if let Err(e) = sqlx::query( 31 | r#" 32 | INSERT INTO "latest" ("timestamp", "collector_name", "type", "rough_size", "exact_size") 33 | SELECT 34 | MAX("timestamp") AS timestamp, 35 | collector_name, 36 | type, 37 | MAX(rough_size) AS rough_size, 38 | MAX(exact_size) AS exact_size 39 | FROM 40 | files_view 41 | GROUP BY 42 | collector_name, type 43 | ON CONFLICT (collector_name, type) 44 | DO UPDATE SET 45 | "timestamp" = CASE 46 | WHEN excluded."timestamp" > "latest"."timestamp" THEN excluded."timestamp" 47 | ELSE "latest"."timestamp" 48 | END, 49 | "rough_size" = CASE 50 | WHEN excluded."timestamp" > "latest"."timestamp" THEN excluded."rough_size" 51 | ELSE "latest"."rough_size" 52 | END, 53 | "exact_size" = CASE 54 | WHEN excluded."timestamp" > "latest"."timestamp" THEN excluded."exact_size" 55 | ELSE "latest"."exact_size" 56 | END; 57 | "# 58 | ).execute(&self.conn_pool).await { 59 | error!("failed to bootstrap latest table: {}", e); 60 | } 61 | } 62 | 63 | pub async fn update_latest_files(&self, files: &[BrokerItem], bootstrap: bool) { 64 | let value_str = match bootstrap { 65 | true => r#" 66 | SELECT 67 | MAX("timestamp") AS timestamp, 68 | collector_name, 69 | type, 70 | MAX(rough_size) AS rough_size, 71 | MAX(exact_size) AS exact_size 72 | FROM 73 | files_view 74 | GROUP BY 75 | collector_name, type 76 | "# 77 | .to_string(), 78 | false => { 79 | if files.is_empty() { 80 | return; 81 | } 82 | let values = files 83 | .iter() 84 | .map(|item| { 85 | let ts = item.ts_start.and_utc().timestamp(); 86 | format!( 87 | "({}, '{}', '{}', {}, {})", 88 | ts, 89 | item.collector_id.as_str(), 90 | item.data_type.as_str(), 91 | item.rough_size, 92 | item.exact_size 93 | ) 94 | }) 95 | .collect::>() 96 | .join(", "); 97 | format!(" VALUES {} ", values) 98 | } 99 | }; 100 | let query_str = format!( 101 | r#" 102 | INSERT INTO "latest" ("timestamp", "collector_name", "type", "rough_size", "exact_size") 103 | {} 104 | ON CONFLICT (collector_name, type) 105 | DO UPDATE SET 106 | "timestamp" = CASE 107 | WHEN excluded."timestamp" > "latest"."timestamp" THEN excluded."timestamp" 108 | ELSE "latest"."timestamp" 109 | END, 110 | "rough_size" = CASE 111 | WHEN excluded."timestamp" > "latest"."timestamp" THEN excluded."rough_size" 112 | ELSE "latest"."rough_size" 113 | END, 114 | "exact_size" = CASE 115 | WHEN excluded."timestamp" > "latest"."timestamp" THEN excluded."exact_size" 116 | ELSE "latest"."exact_size" 117 | END; 118 | "#, 119 | value_str 120 | ); 121 | if let Err(e) = sqlx::query(query_str.as_str()) 122 | .execute(&self.conn_pool) 123 | .await 124 | { 125 | error!("failed to update latest files: {}", e); 126 | } 127 | } 128 | 129 | pub async fn get_latest_files(&self) -> Vec { 130 | let collector_name_to_info = self 131 | .collectors 132 | .iter() 133 | .map(|c| (c.name.clone(), c.clone())) 134 | .collect::>(); 135 | match sqlx::query( 136 | "select timestamp, collector_name, type, rough_size, exact_size from latest", 137 | ) 138 | .map(|row: SqliteRow| { 139 | let timestamp = row.get::(0); 140 | let collector_name = row.get::(1); 141 | let type_name = row.get::(2); 142 | let rough_size = row.get::(3); 143 | let exact_size = row.get::(4); 144 | 145 | // Skip if collector not found 146 | let collector = match collector_name_to_info.get(&collector_name) { 147 | Some(c) => c, 148 | None => return None, 149 | }; 150 | 151 | let is_rib = type_name.as_str() == "rib"; 152 | 153 | let ts_start = match DateTime::from_timestamp(timestamp, 0) { 154 | Some(dt) => dt.naive_utc(), 155 | None => return None, 156 | }; 157 | let (url, ts_end) = infer_url(collector, &ts_start, is_rib); 158 | 159 | Some(BrokerItem { 160 | ts_start, 161 | ts_end, 162 | collector_id: collector_name, 163 | data_type: type_name, 164 | url, 165 | rough_size, 166 | exact_size, 167 | }) 168 | }) 169 | .fetch_all(&self.conn_pool) 170 | .await 171 | { 172 | Ok(items) => items.into_iter().flatten().collect(), 173 | Err(e) => { 174 | error!("failed to get latest files: {}", e); 175 | Vec::new() 176 | } 177 | } 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/query.rs: -------------------------------------------------------------------------------- 1 | //! Query-related structs and implementation. 2 | use crate::BrokerItem; 3 | use serde::{Deserialize, Serialize}; 4 | use std::fmt::{Display, Formatter}; 5 | use std::net::IpAddr; 6 | 7 | /// QueryParams represents the query parameters to the backend API. 8 | /// 9 | /// Example for constructing a QueryParams: 10 | /// ``` 11 | /// use bgpkit_broker::QueryParams; 12 | /// let mut params = QueryParams::new(); 13 | /// params = params.ts_start("1633046400"); 14 | /// params = params.ts_end("1633132800"); 15 | /// params = params.collector_id("rrc00"); 16 | /// params = params.project("riperis"); 17 | /// params = params.data_type("rib"); 18 | /// params = params.page(2); 19 | /// params = params.page_size(20); 20 | /// ``` 21 | /// The above example constructs a query that searches for BGP archive files that are: 22 | /// - after 2021-10-01T00:00:00 UTC 23 | /// - before 2021-10-02T00:00:00 UTC 24 | /// - from collector `rrc00` 25 | /// - from `riperis` collectors (already implied by collector=`rrc00` though) 26 | /// - rib table dump files 27 | /// - second page 28 | /// - each page contains 20 items 29 | #[derive(Debug, Serialize, Deserialize, Clone)] 30 | pub struct QueryParams { 31 | /// start unix timestamp: files with time after or equals to `ts_start` will match 32 | pub ts_start: Option, 33 | /// end unix timestamp: files with time before or equals to `ts_end` will match 34 | pub ts_end: Option, 35 | /// collector identifier, e.g. `rrc00` or `route-views2` 36 | pub collector_id: Option, 37 | /// archive project name: `riperis` or `routeviews` 38 | pub project: Option, 39 | /// archive data type: `rib` or `updates` 40 | pub data_type: Option, 41 | /// page number to seek to, starting from 1, default to 1 42 | pub page: i64, 43 | /// number of items each page contains, default to 10, max to 100000 44 | pub page_size: i64, 45 | /// collector peer IP address (for listing peers info) 46 | pub peers_ip: Option, 47 | /// collector peer ASN (for listing peers info) 48 | pub peers_asn: Option, 49 | /// collector peer full feed status (for listing peers info) 50 | pub peers_only_full_feed: bool, 51 | } 52 | 53 | /// Sorting order enum 54 | #[derive(Debug, Serialize, Deserialize, Clone)] 55 | pub enum SortOrder { 56 | /// `ASC` -> sort by increasing on timestamp 57 | ASC, 58 | /// `DESC` -> sort by decreasing on timestamp 59 | DESC, 60 | } 61 | 62 | /// Default [QueryParams] values 63 | impl Default for QueryParams { 64 | fn default() -> Self { 65 | QueryParams { 66 | ts_start: None, 67 | ts_end: None, 68 | collector_id: None, 69 | project: None, 70 | data_type: None, 71 | page: 1, 72 | page_size: 100, 73 | peers_ip: None, 74 | peers_asn: None, 75 | peers_only_full_feed: false, 76 | } 77 | } 78 | } 79 | 80 | impl Display for SortOrder { 81 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 82 | match self { 83 | SortOrder::ASC => { 84 | write!(f, "asc") 85 | } 86 | SortOrder::DESC => { 87 | write!(f, "desc") 88 | } 89 | } 90 | } 91 | } 92 | 93 | impl Display for QueryParams { 94 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 95 | let mut params_vec = vec![]; 96 | if let Some(v) = &self.ts_start { 97 | params_vec.push(format!("ts_start={}", v)); 98 | } 99 | if let Some(v) = &self.ts_end { 100 | params_vec.push(format!("ts_end={}", v)); 101 | } 102 | if let Some(v) = &self.collector_id { 103 | params_vec.push(format!("collector_id={}", v)); 104 | } 105 | if let Some(v) = &self.project { 106 | params_vec.push(format!("project={}", v)); 107 | } 108 | if let Some(v) = &self.data_type { 109 | params_vec.push(format!("data_type={}", v)); 110 | } 111 | params_vec.push(format!("page={}", self.page)); 112 | params_vec.push(format!("page_size={}", self.page_size)); 113 | 114 | if !params_vec.is_empty() { 115 | write!(f, "?{}", params_vec.join("&")) 116 | } else { 117 | write!(f, "") 118 | } 119 | } 120 | } 121 | 122 | impl QueryParams { 123 | pub fn new() -> QueryParams { 124 | QueryParams { 125 | ts_start: None, 126 | ts_end: None, 127 | collector_id: None, 128 | project: None, 129 | data_type: None, 130 | page: 1, 131 | page_size: 10, 132 | ..Default::default() 133 | } 134 | } 135 | 136 | /// set starting timestamp for the search and returns a new [QueryParams] object. 137 | /// 138 | /// ``` 139 | /// use bgpkit_broker::QueryParams; 140 | /// let mut params = QueryParams::new(); 141 | /// params = params.ts_start("1633046400"); 142 | /// ``` 143 | pub fn ts_start(self, ts_start: &str) -> Self { 144 | QueryParams { 145 | ts_start: Some(ts_start.to_string()), 146 | ..self 147 | } 148 | } 149 | 150 | /// set ending timestamp for the search and returns a new [QueryParams] object. 151 | /// 152 | /// ``` 153 | /// use bgpkit_broker::QueryParams; 154 | /// let mut params = QueryParams::new(); 155 | /// params = params.ts_end("1633046400"); 156 | /// ``` 157 | pub fn ts_end(self, ts_end: &str) -> Self { 158 | QueryParams { 159 | ts_end: Some(ts_end.to_string()), 160 | ..self 161 | } 162 | } 163 | 164 | /// set page number for the each for pagination. **the page number starts from 1**. 165 | /// 166 | /// ``` 167 | /// use bgpkit_broker::QueryParams; 168 | /// let mut params = QueryParams::new(); 169 | /// params = params.page(3); 170 | /// ``` 171 | pub fn page(self, page: i64) -> Self { 172 | QueryParams { page, ..self } 173 | } 174 | 175 | /// set each page's size (number of items per page). 176 | /// 177 | /// ``` 178 | /// use bgpkit_broker::QueryParams; 179 | /// let mut params = QueryParams::new(); 180 | /// params = params.page_size(20); 181 | /// ``` 182 | pub fn page_size(self, page_size: i64) -> Self { 183 | QueryParams { page_size, ..self } 184 | } 185 | 186 | /// set the type of data to search for: 187 | /// - `rib`: table dump files 188 | /// - `updates`: BGP updates files 189 | /// 190 | /// Without specifying a data type, it defaults to search for all types. 191 | /// 192 | /// ``` 193 | /// use bgpkit_broker::QueryParams; 194 | /// let mut params = QueryParams::new(); 195 | /// params = params.data_type("rib"); 196 | /// ``` 197 | pub fn data_type(self, data_type: &str) -> Self { 198 | QueryParams { 199 | data_type: Some(data_type.to_string()), 200 | ..self 201 | } 202 | } 203 | 204 | /// set searching for only data from specific project: 205 | /// - `routeviews`: RouteViews 206 | /// - `riperis`: RIPE RIS 207 | /// 208 | /// ``` 209 | /// use bgpkit_broker::QueryParams; 210 | /// let mut params = QueryParams::new(); 211 | /// params = params.project("routeviews"); 212 | /// ``` 213 | pub fn project(self, project: &str) -> Self { 214 | QueryParams { 215 | project: Some(project.to_string()), 216 | ..self 217 | } 218 | } 219 | 220 | /// set searching for only data from specific collector, 221 | /// examples: `rrc00`, `route-views2` 222 | /// 223 | /// ``` 224 | /// use bgpkit_broker::QueryParams; 225 | /// let mut params = QueryParams::new(); 226 | /// params = params.collector_id("rrc00"); 227 | /// ``` 228 | pub fn collector_id(self, collector_id: &str) -> Self { 229 | QueryParams { 230 | collector_id: Some(collector_id.to_string()), 231 | ..self 232 | } 233 | } 234 | } 235 | 236 | #[allow(dead_code)] 237 | #[derive(Debug, Clone, Serialize, Deserialize)] 238 | #[cfg_attr(feature = "cli", derive(tabled::Tabled))] 239 | pub struct BrokerCollector { 240 | pub id: i64, 241 | pub name: String, 242 | pub url: String, 243 | pub project: String, 244 | pub updates_interval: i64, 245 | } 246 | 247 | #[allow(dead_code)] 248 | #[derive(Debug, Clone, Serialize, Deserialize)] 249 | #[cfg_attr(feature = "cli", derive(tabled::Tabled))] 250 | pub struct BrokerItemType { 251 | pub id: i64, 252 | pub name: String, 253 | } 254 | 255 | #[derive(Debug, Serialize, Deserialize)] 256 | pub(crate) struct CollectorLatestResult { 257 | /// total number of items 258 | pub count: u32, 259 | 260 | /// array of [BrokerItem] 261 | pub data: Vec, 262 | } 263 | 264 | /// Query result struct that contains data or error message 265 | #[derive(Debug, Serialize, Deserialize)] 266 | pub(crate) struct BrokerQueryResult { 267 | /// total number of items 268 | pub total: Option, 269 | /// number of items returned in **current** call 270 | pub count: Option, 271 | /// the page number of the current call 272 | pub page: Option, 273 | /// the number of items per page 274 | pub page_size: Option, 275 | /// Error message 276 | pub error: Option, 277 | /// the returning data [Item]s 278 | pub data: Vec, 279 | } 280 | 281 | #[allow(clippy::unwrap_used)] 282 | impl Display for BrokerQueryResult { 283 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 284 | write!(f, "{}", serde_json::to_string(self).unwrap()) 285 | } 286 | } 287 | 288 | #[cfg(test)] 289 | mod tests { 290 | use super::*; 291 | 292 | #[test] 293 | fn test_param_to_string() { 294 | let param = QueryParams { 295 | ts_start: Some("1".to_string()), 296 | ts_end: Some("2".to_string()), 297 | collector_id: None, 298 | project: Some("test_project".to_string()), 299 | data_type: None, 300 | page: 1, 301 | page_size: 20, 302 | ..Default::default() 303 | }; 304 | 305 | assert_eq!( 306 | "?ts_start=1&ts_end=2&project=test_project&page=1&page_size=20".to_string(), 307 | param.to_string() 308 | ); 309 | 310 | let param = QueryParams { 311 | ts_start: None, 312 | ts_end: None, 313 | collector_id: None, 314 | project: None, 315 | data_type: None, 316 | page: 1, 317 | page_size: 20, 318 | ..Default::default() 319 | }; 320 | 321 | assert_eq!("?page=1&page_size=20".to_string(), param.to_string()); 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /src/collector.rs: -------------------------------------------------------------------------------- 1 | use crate::BrokerError; 2 | use lazy_static::lazy_static; 3 | use serde::{Deserialize, Serialize}; 4 | use std::collections::HashMap; 5 | use tracing::debug; 6 | 7 | #[derive(Debug, Clone, Serialize, Deserialize)] 8 | pub struct Collector { 9 | pub id: String, 10 | pub project: String, 11 | pub url: String, 12 | } 13 | 14 | #[derive(Debug, Serialize, Deserialize, Clone)] 15 | pub struct Config { 16 | pub projects: Vec, 17 | } 18 | #[derive(Debug, Serialize, Deserialize, Clone)] 19 | pub struct ConfProject { 20 | name: String, 21 | collectors: Vec, 22 | } 23 | #[derive(Debug, Serialize, Deserialize, Clone)] 24 | pub struct ConfCollector { 25 | id: String, 26 | url: String, 27 | } 28 | 29 | impl Config { 30 | pub fn to_project_map(&self) -> HashMap { 31 | let mut map = HashMap::new(); 32 | for p in &self.projects { 33 | let project = p.name.clone(); 34 | for c in &p.collectors { 35 | map.insert(c.id.clone(), project.clone()); 36 | } 37 | } 38 | map 39 | } 40 | } 41 | 42 | pub fn load_collectors() -> Result, BrokerError> { 43 | // load config 44 | debug!("loading default collectors config..."); 45 | let config: Config = DEFAULT_COLLECTORS_CONFIG.clone(); 46 | 47 | Ok(config 48 | .projects 49 | .into_iter() 50 | .flat_map(|project| { 51 | assert!(["routeviews", "riperis"].contains(&project.name.as_str())); 52 | let project_name = project.name.clone(); 53 | project 54 | .collectors 55 | .into_iter() 56 | .map(|c| Collector { 57 | id: c.id, 58 | project: project_name.clone(), 59 | url: c.url, 60 | }) 61 | .collect::>() 62 | }) 63 | .collect()) 64 | } 65 | 66 | lazy_static! { 67 | pub static ref DEFAULT_COLLECTORS_CONFIG: Config = serde_json::from_str( 68 | r#" 69 | { 70 | "projects": [ 71 | { 72 | "name": "riperis", 73 | "collectors": [ 74 | { 75 | "id": "rrc00", 76 | "url": "https://data.ris.ripe.net/rrc00" 77 | }, 78 | { 79 | "id": "rrc01", 80 | "url": "https://data.ris.ripe.net/rrc01" 81 | }, 82 | { 83 | "id": "rrc02", 84 | "url": "https://data.ris.ripe.net/rrc02" 85 | }, 86 | { 87 | "id": "rrc03", 88 | "url": "https://data.ris.ripe.net/rrc03" 89 | }, 90 | { 91 | "id": "rrc04", 92 | "url": "https://data.ris.ripe.net/rrc04" 93 | }, 94 | { 95 | "id": "rrc05", 96 | "url": "https://data.ris.ripe.net/rrc05" 97 | }, 98 | { 99 | "id": "rrc06", 100 | "url": "https://data.ris.ripe.net/rrc06" 101 | }, 102 | { 103 | "id": "rrc07", 104 | "url": "https://data.ris.ripe.net/rrc07" 105 | }, 106 | { 107 | "id": "rrc08", 108 | "url": "https://data.ris.ripe.net/rrc08" 109 | }, 110 | { 111 | "id": "rrc09", 112 | "url": "https://data.ris.ripe.net/rrc09" 113 | }, 114 | { 115 | "id": "rrc10", 116 | "url": "https://data.ris.ripe.net/rrc10" 117 | }, 118 | { 119 | "id": "rrc11", 120 | "url": "https://data.ris.ripe.net/rrc11" 121 | }, 122 | { 123 | "id": "rrc12", 124 | "url": "https://data.ris.ripe.net/rrc12" 125 | }, 126 | { 127 | "id": "rrc13", 128 | "url": "https://data.ris.ripe.net/rrc13" 129 | }, 130 | { 131 | "id": "rrc14", 132 | "url": "https://data.ris.ripe.net/rrc14" 133 | }, 134 | { 135 | "id": "rrc15", 136 | "url": "https://data.ris.ripe.net/rrc15" 137 | }, 138 | { 139 | "id": "rrc16", 140 | "url": "https://data.ris.ripe.net/rrc16" 141 | }, 142 | { 143 | "id": "rrc18", 144 | "url": "https://data.ris.ripe.net/rrc18" 145 | }, 146 | { 147 | "id": "rrc19", 148 | "url": "https://data.ris.ripe.net/rrc19" 149 | }, 150 | { 151 | "id": "rrc20", 152 | "url": "https://data.ris.ripe.net/rrc20" 153 | }, 154 | { 155 | "id": "rrc21", 156 | "url": "https://data.ris.ripe.net/rrc21" 157 | }, 158 | { 159 | "id": "rrc22", 160 | "url": "https://data.ris.ripe.net/rrc22" 161 | }, 162 | { 163 | "id": "rrc23", 164 | "url": "https://data.ris.ripe.net/rrc23" 165 | }, 166 | { 167 | "id": "rrc24", 168 | "url": "https://data.ris.ripe.net/rrc24" 169 | }, 170 | { 171 | "id": "rrc25", 172 | "url": "https://data.ris.ripe.net/rrc25" 173 | }, 174 | { 175 | "id": "rrc26", 176 | "url": "https://data.ris.ripe.net/rrc26" 177 | } 178 | ] 179 | }, 180 | { 181 | "name": "routeviews", 182 | "collectors": [ 183 | { 184 | "id": "amsix.ams", 185 | "url": "https://archive.routeviews.org/amsix.ams/bgpdata" 186 | }, 187 | { 188 | "id": "cix.atl", 189 | "url": "https://archive.routeviews.org/cix.atl/bgpdata" 190 | }, 191 | { 192 | "id": "decix.jhb", 193 | "url": "https://archive.routeviews.org/decix.jhb/bgpdata" 194 | }, 195 | { 196 | "id": "iraq-ixp.bgw", 197 | "url": "https://archive.routeviews.org/iraq-ixp.bgw/bgpdata" 198 | }, 199 | { 200 | "id": "pacwave.lax", 201 | "url": "https://archive.routeviews.org/pacwave.lax/bgpdata" 202 | }, 203 | { 204 | "id": "pit.scl", 205 | "url": "https://archive.routeviews.org/pit.scl/bgpdata" 206 | }, 207 | { 208 | "id": "pitmx.qro", 209 | "url": "https://archive.routeviews.org/pitmx.qro/bgpdata" 210 | }, 211 | { 212 | "id": "route-views2", 213 | "url": "https://archive.routeviews.org/bgpdata" 214 | }, 215 | { 216 | "id": "route-views3", 217 | "url": "https://archive.routeviews.org/route-views3/bgpdata" 218 | }, 219 | { 220 | "id": "route-views4", 221 | "url": "https://archive.routeviews.org/route-views4/bgpdata" 222 | }, 223 | { 224 | "id": "route-views5", 225 | "url": "https://archive.routeviews.org/route-views5/bgpdata" 226 | }, 227 | { 228 | "id": "route-views6", 229 | "url": "https://archive.routeviews.org/route-views6/bgpdata" 230 | }, 231 | { 232 | "id": "route-views7", 233 | "url": "https://archive.routeviews.org/route-views7/bgpdata" 234 | }, 235 | { 236 | "id": "route-views8", 237 | "url": "https://archive.routeviews.org/route-views8/bgpdata" 238 | }, 239 | { 240 | "id":"route-views.amsix", 241 | "url": "https://archive.routeviews.org/route-views.amsix/bgpdata" 242 | }, 243 | { 244 | "id":"route-views.chicago", 245 | "url": "https://archive.routeviews.org/route-views.chicago/bgpdata" 246 | }, 247 | { 248 | "id":"route-views.chile", 249 | "url": "https://archive.routeviews.org/route-views.chile/bgpdata" 250 | }, 251 | { 252 | "id":"route-views.eqix", 253 | "url": "https://archive.routeviews.org/route-views.eqix/bgpdata" 254 | }, 255 | { 256 | "id":"route-views.flix", 257 | "url": "https://archive.routeviews.org/route-views.flix/bgpdata" 258 | }, 259 | { 260 | "id":"route-views.gorex", 261 | "url": "https://archive.routeviews.org/route-views.gorex/bgpdata" 262 | }, 263 | { 264 | "id":"route-views.isc", 265 | "url": "https://archive.routeviews.org/route-views.isc/bgpdata" 266 | }, 267 | { 268 | "id":"route-views.kixp", 269 | "url": "https://archive.routeviews.org/route-views.kixp/bgpdata" 270 | }, 271 | { 272 | "id":"route-views.jinx", 273 | "url": "https://archive.routeviews.org/route-views.jinx/bgpdata" 274 | }, 275 | { 276 | "id":"route-views.linx", 277 | "url": "https://archive.routeviews.org/route-views.linx/bgpdata" 278 | }, 279 | { 280 | "id":"route-views.napafrica", 281 | "url": "https://archive.routeviews.org/route-views.napafrica/bgpdata" 282 | }, 283 | { 284 | "id":"route-views.nwax", 285 | "url": "https://archive.routeviews.org/route-views.nwax/bgpdata" 286 | }, 287 | { 288 | "id":"route-views.phoix", 289 | "url": "https://archive.routeviews.org/route-views.phoix/bgpdata" 290 | }, 291 | { 292 | "id":"route-views.telxatl", 293 | "url": "https://archive.routeviews.org/route-views.telxatl/bgpdata" 294 | }, 295 | { 296 | "id":"route-views.wide", 297 | "url": "https://archive.routeviews.org/route-views.wide/bgpdata" 298 | }, 299 | { 300 | "id":"route-views.sydney", 301 | "url": "https://archive.routeviews.org/route-views.sydney/bgpdata" 302 | }, 303 | { 304 | "id":"route-views.saopaulo", 305 | "url": "https://archive.routeviews.org/route-views.saopaulo/bgpdata" 306 | }, 307 | { 308 | "id":"route-views2.saopaulo", 309 | "url": "https://archive.routeviews.org/route-views2.saopaulo/bgpdata" 310 | }, 311 | { 312 | "id":"route-views.sg", 313 | "url": "https://archive.routeviews.org/route-views.sg/bgpdata" 314 | }, 315 | { 316 | "id":"route-views.perth", 317 | "url": "https://archive.routeviews.org/route-views.perth/bgpdata" 318 | }, 319 | { 320 | "id":"route-views.peru", 321 | "url": "https://archive.routeviews.org/route-views.peru/bgpdata" 322 | }, 323 | { 324 | "id":"route-views.sfmix", 325 | "url": "https://archive.routeviews.org/route-views.sfmix/bgpdata" 326 | }, 327 | { 328 | "id":"route-views.siex", 329 | "url": "https://archive.routeviews.org/route-views.siex/bgpdata" 330 | }, 331 | { 332 | "id":"route-views.soxrs", 333 | "url": "https://archive.routeviews.org/route-views.soxrs/bgpdata" 334 | }, 335 | { 336 | "id":"route-views.mwix", 337 | "url": "https://archive.routeviews.org/route-views.mwix/bgpdata" 338 | }, 339 | { 340 | "id":"route-views.rio", 341 | "url": "https://archive.routeviews.org/route-views.rio/bgpdata" 342 | }, 343 | { 344 | "id":"route-views.fortaleza", 345 | "url": "https://archive.routeviews.org/route-views.fortaleza/bgpdata" 346 | }, 347 | { 348 | "id":"route-views.gixa", 349 | "url": "https://archive.routeviews.org/route-views.gixa/bgpdata" 350 | }, 351 | { 352 | "id":"route-views.bdix", 353 | "url": "https://archive.routeviews.org/route-views.bdix/bgpdata" 354 | }, 355 | { 356 | "id":"route-views.bknix", 357 | "url": "https://archive.routeviews.org/route-views.bknix/bgpdata" 358 | }, 359 | { 360 | "id":"route-views.ny", 361 | "url": "https://archive.routeviews.org/route-views.ny/bgpdata" 362 | }, 363 | { 364 | "id":"route-views.uaeix", 365 | "url": "https://archive.routeviews.org/route-views.uaeix/bgpdata" 366 | }, 367 | { 368 | "id":"interlan.otp", 369 | "url": "https://archive.routeviews.org/interlan.otp/bgpdata" 370 | }, 371 | { 372 | "id":"kinx.icn", 373 | "url": "https://archive.routeviews.org/kinx.icn/bgpdata" 374 | }, 375 | { 376 | "id": "iix.cgk", 377 | "url": "https://archive.routeviews.org/iix.cgk/bgpdata" 378 | }, 379 | { 380 | "id": "ixpn.lagos", 381 | "url": "https://archive.routeviews.org/ixpn.lagos/bgpdata" 382 | }, 383 | { 384 | "id":"namex.fco", 385 | "url": "https://archive.routeviews.org/namex.fco/bgpdata" 386 | }, 387 | { 388 | "id":"getafix.mnl", 389 | "url": "http://archive.routeviews.org/getafix.mnl/bgpdata" 390 | }, 391 | { 392 | "id":"ix-br.gru", 393 | "url": "http://archive.routeviews.org/ix-br.gru/bgpdata" 394 | }, 395 | { 396 | "id":"ix-br2.gru", 397 | "url": "http://archive.routeviews.org/ix-br2.gru/bgpdata" 398 | }, 399 | { 400 | "id":"hkix.hkg", 401 | "url": "http://archive.routeviews.org/hkix.hkg/bgpdata" 402 | }, 403 | { 404 | "id":"netnod.mmx", 405 | "url": "http://archive.routeviews.org/netnod.mmx/bgpdata" 406 | }, 407 | { 408 | "id":"crix.sjo", 409 | "url": "http://archive.routeviews.org/crix.sjo/bgpdata" 410 | } 411 | ] 412 | } 413 | ] 414 | } 415 | "# 416 | ) 417 | .expect("invalid default collectors configuration"); 418 | } 419 | -------------------------------------------------------------------------------- /src/crawler/common.rs: -------------------------------------------------------------------------------- 1 | use crate::BrokerError; 2 | use chrono::{Datelike, NaiveDate, Utc}; 3 | use regex::{Captures, Regex}; 4 | use scraper::{Html, Selector}; 5 | use std::time::Duration; 6 | use tracing::{debug, warn}; 7 | 8 | const SIZE_KB: u64 = u64::pow(1024, 1); 9 | const SIZE_MB: u64 = u64::pow(1024, 2); 10 | const SIZE_GB: u64 = u64::pow(1024, 3); 11 | 12 | /// Get the maximum number of retry attempts for crawling. 13 | /// Default is 3 attempts. Can be configured via BGPKIT_BROKER_CRAWLER_MAX_RETRIES. 14 | pub(crate) fn get_crawler_max_retries() -> u32 { 15 | std::env::var("BGPKIT_BROKER_CRAWLER_MAX_RETRIES") 16 | .ok() 17 | .and_then(|s| s.parse().ok()) 18 | .unwrap_or(3) 19 | } 20 | 21 | /// Get the initial backoff duration in milliseconds. 22 | /// Default is 1000ms (1 second). Can be configured via BGPKIT_BROKER_CRAWLER_BACKOFF_MS. 23 | pub(crate) fn get_crawler_backoff_ms() -> u64 { 24 | std::env::var("BGPKIT_BROKER_CRAWLER_BACKOFF_MS") 25 | .ok() 26 | .and_then(|s| s.parse().ok()) 27 | .unwrap_or(1000) 28 | } 29 | 30 | /// Get the number of concurrent month crawls per collector. 31 | /// 32 | /// This controls how many months are crawled in parallel for each collector. 33 | /// Each month crawl involves fetching the month's directory listing and parsing 34 | /// all MRT file entries. 35 | /// 36 | /// **Note:** This setting is primarily relevant during bootstrap crawls when all 37 | /// historical months need to be fetched. During regular updates, typically only 38 | /// 1-2 months (current and possibly previous) are crawled, so this setting has 39 | /// minimal impact. 40 | /// 41 | /// Default is 2 concurrent months. Can be configured via BGPKIT_BROKER_CRAWLER_MONTH_CONCURRENCY. 42 | /// 43 | /// Recommended values: 44 | /// - 1-2: Conservative, suitable for avoiding rate limits 45 | /// - 3-5: Balanced performance and server load 46 | /// - 10+: Aggressive, may trigger rate limiting on some servers 47 | pub(crate) fn get_crawler_month_concurrency() -> usize { 48 | std::env::var("BGPKIT_BROKER_CRAWLER_MONTH_CONCURRENCY") 49 | .ok() 50 | .and_then(|s| s.parse().ok()) 51 | .unwrap_or(2) 52 | } 53 | 54 | fn size_str_to_bytes(size_str: &str, size_pattern: &Regex) -> Option { 55 | let cap: Captures = size_pattern.captures(size_str)?; 56 | let mut size = match cap[1].to_string().parse::() { 57 | Ok(x) => x, 58 | Err(_) => return None, 59 | }; 60 | size *= match cap[2].to_ascii_lowercase().as_str() { 61 | "k" => SIZE_KB, 62 | "m" => SIZE_MB, 63 | "g" => SIZE_GB, 64 | "" => 1, 65 | other => panic!("unknown file size multiplier {}", other), 66 | } as f64; 67 | Some(size as i64) 68 | } 69 | 70 | /// Extract the MRT file links and file sizes from the body of a RouteViews or RIPE RIS page. 71 | /// 72 | /// # Arguments 73 | /// 74 | /// * `body`: RouteViews or RIPE RIS page body 75 | /// 76 | /// returns: Vec<(String, i64)> 77 | pub fn extract_link_size(body: &str) -> Vec<(String, i64)> { 78 | let mut res: Vec<(String, i64)> = vec![]; 79 | 80 | if body.contains("table") { 81 | let size_pattern: Regex = 82 | Regex::new(r" *([\d.]+)([MKGmkg]*)").expect("invalid regex pattern"); 83 | // table-based html pages, works with RouteViews and RIPE RIS old version 84 | let fragment = Html::parse_fragment(body); 85 | let row_selector = Selector::parse("tr").expect("invalid selector"); 86 | let link_selector = Selector::parse("a").expect("invalid selector"); 87 | for elem in fragment.select(&row_selector) { 88 | let text_arr = elem 89 | .text() 90 | .filter(|t| t.is_ascii() && !t.trim().is_empty()) 91 | .collect::>(); 92 | let text = text_arr.join(""); 93 | if text.is_empty() || text.contains("Name") || text.contains("Parent") { 94 | continue; 95 | } 96 | let href = match elem.select(&link_selector).next() { 97 | Some(e) => e.value().attr("href"), 98 | None => continue, 99 | }; 100 | let size = match size_str_to_bytes(text_arr[2], &size_pattern) { 101 | None => continue, 102 | Some(v) => v, 103 | }; 104 | if let Some(href_str) = href { 105 | res.push((href_str.to_string(), size)); 106 | } 107 | } 108 | } else { 109 | let size_pattern: Regex = 110 | Regex::new(r" +([\d.]+)([MKGmkg]*)$").expect("invalid regex pattern"); 111 | for line in body.lines() { 112 | let size = match size_str_to_bytes(line, &size_pattern) { 113 | Some(s) => s, 114 | None => continue, 115 | }; 116 | 117 | let fragment = Html::parse_fragment(line); 118 | let link_selector = Selector::parse("a").expect("invalid selector"); 119 | let mut link = "".to_string(); 120 | if let Some(elem) = fragment.select(&link_selector).next() { 121 | if let Some(href) = elem.value().attr("href") { 122 | link = href.to_string(); 123 | } 124 | } 125 | if !link.is_empty() { 126 | res.push((link, size)); 127 | } 128 | } 129 | } 130 | res 131 | } 132 | 133 | /// Fetch the body of a URL with retry logic and exponential backoff. 134 | /// 135 | /// # Arguments 136 | /// * `url` - The URL to fetch 137 | /// 138 | /// # Environment Variables 139 | /// * `BGPKIT_BROKER_CRAWLER_MAX_RETRIES` - Maximum number of retry attempts (default: 3) 140 | /// * `BGPKIT_BROKER_CRAWLER_BACKOFF_MS` - Initial backoff duration in milliseconds (default: 1000) 141 | /// 142 | /// # Returns 143 | /// The body of the response as a string, or an error if all retries failed. 144 | pub(crate) async fn fetch_body(url: &str) -> Result { 145 | let max_retries = get_crawler_max_retries(); 146 | let initial_backoff_ms = get_crawler_backoff_ms(); 147 | 148 | let client = reqwest::ClientBuilder::new() 149 | .user_agent("bgpkit-broker/3") 150 | .pool_max_idle_per_host(0) 151 | .timeout(Duration::from_secs(30)) 152 | .build()?; 153 | 154 | let mut last_error: Option = None; 155 | 156 | for attempt in 0..max_retries { 157 | match client.get(url).send().await { 158 | Ok(response) => match response.text().await { 159 | Ok(body) => return Ok(body), 160 | Err(e) => { 161 | last_error = Some(e.into()); 162 | } 163 | }, 164 | Err(e) => { 165 | last_error = Some(e.into()); 166 | } 167 | } 168 | 169 | // If not the last attempt, wait with exponential backoff 170 | if attempt < max_retries - 1 { 171 | let backoff_ms = initial_backoff_ms * (1 << attempt); // 2^attempt * initial 172 | warn!( 173 | "fetch_body failed for {} (attempt {}/{}), retrying in {}ms: {:?}", 174 | url, 175 | attempt + 1, 176 | max_retries, 177 | backoff_ms, 178 | last_error 179 | ); 180 | tokio::time::sleep(Duration::from_millis(backoff_ms)).await; 181 | } 182 | } 183 | 184 | // All retries exhausted 185 | debug!( 186 | "fetch_body failed for {} after {} attempts", 187 | url, max_retries 188 | ); 189 | Err(last_error.unwrap_or_else(|| { 190 | BrokerError::BrokerError(format!( 191 | "fetch_body failed for {} after {} attempts", 192 | url, max_retries 193 | )) 194 | })) 195 | } 196 | 197 | /// Remove trailing slash from a string. 198 | /// 199 | /// # Arguments 200 | /// 201 | /// * `s`: 202 | /// 203 | /// returns: String 204 | pub(crate) fn remove_trailing_slash(s: impl ToString) -> String { 205 | let mut s = s.to_string(); 206 | if s.ends_with('/') { 207 | s.pop(); 208 | } 209 | s 210 | } 211 | 212 | pub(crate) async fn crawl_months_list( 213 | collector_root_url: &str, 214 | from_month: Option, 215 | ) -> Result, BrokerError> { 216 | let rounded_month = from_month.and_then(|d| NaiveDate::from_ymd_opt(d.year(), d.month(), 1)); 217 | 218 | let month_link_pattern: Regex = 219 | Regex::new(r#".*"#).expect("invalid regex pattern"); 220 | let body = fetch_body(collector_root_url).await?; 221 | let mut res = vec![]; 222 | for cap in month_link_pattern.captures_iter(body.as_str()) { 223 | let month = cap[1].to_owned(); 224 | let parsed_month = 225 | NaiveDate::parse_from_str(format!("{}.01", month.as_str()).as_str(), "%Y.%m.%d")?; 226 | if let Some(rounded) = rounded_month { 227 | if let Some(new_month) = NaiveDate::from_ymd_opt(rounded.year(), rounded.month(), 1) { 228 | if parsed_month < new_month { 229 | continue; 230 | } 231 | } 232 | } 233 | if parsed_month > Utc::now().naive_utc().date() { 234 | continue; 235 | } 236 | res.push(parsed_month); 237 | } 238 | Ok(res) 239 | } 240 | 241 | #[cfg(test)] 242 | mod tests { 243 | use super::*; 244 | 245 | #[test] 246 | fn test_extract_link_size() { 247 | const RIPE_OLD: &str = r#" 248 | 249 | 250 | Index of /rrc00/2022.11 251 | 252 | 253 |

Index of /rrc00/2022.11

254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 |
 NameLast modifiedSizeDescription

 Parent Directory  -  
 updates.20221128.2220.gz2022-11-28 22:25 6.4M 
 updates.20221128.2215.gz2022-11-28 22:20 3.8M 
 bview.20221102.0800.gz2022-11-02 10:14 1.5G 
 bview.20221102.0000.gz2022-11-02 02:13 1.5G 

264 | 265 | "#; 266 | 267 | const RIPE_NEW: &str = r#" 268 | Index of /rrc00/2001.01/ 269 | 270 |

Index of /rrc00/2001.01/


../
271 | bview.20010101.0609.gz                             01-Jan-2001 06:09     12M
272 | bview.20010101.1410.gz                             01-Jan-2001 14:10     12M
273 | updates.20010131.2236.gz                           31-Jan-2001 22:36     98K
274 | updates.20010131.2251.gz                           31-Jan-2001 22:51     97K
275 | 

276 | 277 | "#; 278 | let res = extract_link_size(RIPE_NEW); 279 | assert_eq!(res.len(), 4); 280 | 281 | const ROUTEVIEWS: &str = r#" 282 | 283 | 284 | Index of /route-views.bdix/bgpdata/2022.10/UPDATES 285 | 286 | 287 |

Index of /route-views.bdix/bgpdata/2022.10/UPDATES

288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 |
[ICO]NameLast modifiedSizeDescription

[PARENTDIR]Parent Directory   -  
[   ]updates.20221001.000..>2022-10-01 00:00 14  
[   ]updates.20221001.001..>2022-10-01 00:15 14  
[   ]updates.20221026.154..>2022-10-26 15:45 14  
[   ]updates.20221026.160..>2022-10-26 16:00 14  

298 | 299 | "#; 300 | 301 | let res = extract_link_size(RIPE_OLD); 302 | assert_eq!(res.len(), 4); 303 | let res = extract_link_size(ROUTEVIEWS); 304 | assert_eq!(res.len(), 4); 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | //! Configuration management for BGPKIT Broker. 2 | //! 3 | //! This module provides a centralized configuration struct that loads settings 4 | //! from environment variables. All configuration is loaded once at startup 5 | //! and can be displayed for logging purposes. 6 | 7 | use std::fmt; 8 | use std::time::Duration; 9 | 10 | /// Default values for crawler configuration 11 | const DEFAULT_CRAWLER_MAX_RETRIES: u32 = 3; 12 | const DEFAULT_CRAWLER_BACKOFF_MS: u64 = 1000; 13 | const DEFAULT_CRAWLER_COLLECTOR_CONCURRENCY: usize = 2; 14 | const DEFAULT_CRAWLER_MONTH_CONCURRENCY: usize = 2; 15 | 16 | /// Default values for backup configuration 17 | const DEFAULT_BACKUP_INTERVAL_HOURS: u64 = 24; 18 | 19 | /// Default values for database maintenance 20 | const DEFAULT_META_RETENTION_DAYS: i64 = 30; 21 | 22 | /// Crawler configuration settings. 23 | /// 24 | /// Controls how the broker crawls BGP archive servers for MRT files. 25 | #[derive(Debug, Clone)] 26 | pub struct CrawlerConfig { 27 | /// Maximum number of retry attempts for failed HTTP requests. 28 | /// Environment variable: `BGPKIT_BROKER_CRAWLER_MAX_RETRIES` 29 | pub max_retries: u32, 30 | 31 | /// Initial backoff duration in milliseconds between retries. 32 | /// Environment variable: `BGPKIT_BROKER_CRAWLER_BACKOFF_MS` 33 | pub backoff_ms: u64, 34 | 35 | /// Number of collectors to crawl simultaneously. 36 | /// Environment variable: `BGPKIT_BROKER_CRAWLER_COLLECTOR_CONCURRENCY` 37 | pub collector_concurrency: usize, 38 | 39 | /// Number of months to crawl in parallel per collector. 40 | /// Primarily affects bootstrap crawls; regular updates typically only fetch 1-2 months. 41 | /// Environment variable: `BGPKIT_BROKER_CRAWLER_MONTH_CONCURRENCY` 42 | pub month_concurrency: usize, 43 | } 44 | 45 | impl Default for CrawlerConfig { 46 | fn default() -> Self { 47 | Self { 48 | max_retries: DEFAULT_CRAWLER_MAX_RETRIES, 49 | backoff_ms: DEFAULT_CRAWLER_BACKOFF_MS, 50 | collector_concurrency: DEFAULT_CRAWLER_COLLECTOR_CONCURRENCY, 51 | month_concurrency: DEFAULT_CRAWLER_MONTH_CONCURRENCY, 52 | } 53 | } 54 | } 55 | 56 | impl CrawlerConfig { 57 | /// Load crawler configuration from environment variables. 58 | pub fn from_env() -> Self { 59 | Self { 60 | max_retries: std::env::var("BGPKIT_BROKER_CRAWLER_MAX_RETRIES") 61 | .ok() 62 | .and_then(|s| s.parse().ok()) 63 | .unwrap_or(DEFAULT_CRAWLER_MAX_RETRIES), 64 | backoff_ms: std::env::var("BGPKIT_BROKER_CRAWLER_BACKOFF_MS") 65 | .ok() 66 | .and_then(|s| s.parse().ok()) 67 | .unwrap_or(DEFAULT_CRAWLER_BACKOFF_MS), 68 | collector_concurrency: std::env::var("BGPKIT_BROKER_CRAWLER_COLLECTOR_CONCURRENCY") 69 | .ok() 70 | .and_then(|s| s.parse().ok()) 71 | .unwrap_or(DEFAULT_CRAWLER_COLLECTOR_CONCURRENCY), 72 | month_concurrency: std::env::var("BGPKIT_BROKER_CRAWLER_MONTH_CONCURRENCY") 73 | .ok() 74 | .and_then(|s| s.parse().ok()) 75 | .unwrap_or(DEFAULT_CRAWLER_MONTH_CONCURRENCY), 76 | } 77 | } 78 | } 79 | 80 | impl fmt::Display for CrawlerConfig { 81 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 82 | write!( 83 | f, 84 | "collector_concurrency={}, month_concurrency={}, max_retries={}, backoff_ms={}", 85 | self.collector_concurrency, self.month_concurrency, self.max_retries, self.backoff_ms 86 | ) 87 | } 88 | } 89 | 90 | /// Backup configuration settings. 91 | /// 92 | /// Controls automatic database backups to local or S3 storage. 93 | #[derive(Debug, Clone)] 94 | pub struct BackupConfig { 95 | /// Destination path for backups (local path or S3 URL). 96 | /// Environment variable: `BGPKIT_BROKER_BACKUP_TO` 97 | pub destination: Option, 98 | 99 | /// Interval between backups in hours. 100 | /// Environment variable: `BGPKIT_BROKER_BACKUP_INTERVAL_HOURS` 101 | pub interval_hours: u64, 102 | 103 | /// URL to ping on successful backup (for monitoring). 104 | /// Environment variable: `BGPKIT_BROKER_BACKUP_HEARTBEAT_URL` 105 | pub heartbeat_url: Option, 106 | } 107 | 108 | impl Default for BackupConfig { 109 | fn default() -> Self { 110 | Self { 111 | destination: None, 112 | interval_hours: DEFAULT_BACKUP_INTERVAL_HOURS, 113 | heartbeat_url: None, 114 | } 115 | } 116 | } 117 | 118 | impl BackupConfig { 119 | /// Load backup configuration from environment variables. 120 | pub fn from_env() -> Self { 121 | Self { 122 | destination: std::env::var("BGPKIT_BROKER_BACKUP_TO").ok(), 123 | interval_hours: std::env::var("BGPKIT_BROKER_BACKUP_INTERVAL_HOURS") 124 | .ok() 125 | .and_then(|s| s.parse().ok()) 126 | .unwrap_or(DEFAULT_BACKUP_INTERVAL_HOURS), 127 | heartbeat_url: std::env::var("BGPKIT_BROKER_BACKUP_HEARTBEAT_URL").ok(), 128 | } 129 | } 130 | 131 | /// Returns true if backup is configured. 132 | pub fn is_enabled(&self) -> bool { 133 | self.destination.is_some() 134 | } 135 | 136 | /// Get the backup interval as a Duration. 137 | pub fn interval(&self) -> Duration { 138 | Duration::from_secs(self.interval_hours * 60 * 60) 139 | } 140 | } 141 | 142 | /// Heartbeat configuration for monitoring. 143 | #[derive(Debug, Clone, Default)] 144 | pub struct HeartbeatConfig { 145 | /// General heartbeat URL (pinged after each update). 146 | /// Environment variable: `BGPKIT_BROKER_HEARTBEAT_URL` 147 | pub general_url: Option, 148 | 149 | /// Backup heartbeat URL (pinged after each backup). 150 | /// Environment variable: `BGPKIT_BROKER_BACKUP_HEARTBEAT_URL` 151 | pub backup_url: Option, 152 | } 153 | 154 | impl HeartbeatConfig { 155 | /// Load heartbeat configuration from environment variables. 156 | pub fn from_env() -> Self { 157 | Self { 158 | general_url: std::env::var("BGPKIT_BROKER_HEARTBEAT_URL").ok(), 159 | backup_url: std::env::var("BGPKIT_BROKER_BACKUP_HEARTBEAT_URL").ok(), 160 | } 161 | } 162 | 163 | /// Returns true if any heartbeat is configured. 164 | pub fn is_any_enabled(&self) -> bool { 165 | self.general_url.is_some() || self.backup_url.is_some() 166 | } 167 | } 168 | 169 | /// NATS notification configuration. 170 | #[derive(Debug, Clone, Default)] 171 | pub struct NatsConfig { 172 | /// NATS server URL. 173 | /// Environment variable: `BGPKIT_BROKER_NATS_URL` 174 | pub url: Option, 175 | 176 | /// NATS username. 177 | /// Environment variable: `BGPKIT_BROKER_NATS_USER` 178 | pub user: Option, 179 | 180 | /// NATS root subject for messages. 181 | /// Environment variable: `BGPKIT_BROKER_NATS_ROOT_SUBJECT` 182 | pub root_subject: Option, 183 | } 184 | 185 | impl NatsConfig { 186 | /// Load NATS configuration from environment variables. 187 | pub fn from_env() -> Self { 188 | Self { 189 | url: std::env::var("BGPKIT_BROKER_NATS_URL").ok(), 190 | user: std::env::var("BGPKIT_BROKER_NATS_USER").ok(), 191 | root_subject: std::env::var("BGPKIT_BROKER_NATS_ROOT_SUBJECT").ok(), 192 | } 193 | } 194 | 195 | /// Returns true if NATS is configured. 196 | pub fn is_enabled(&self) -> bool { 197 | self.url.is_some() 198 | } 199 | } 200 | 201 | /// Database maintenance configuration. 202 | #[derive(Debug, Clone)] 203 | pub struct DatabaseConfig { 204 | /// Number of days to retain meta entries. 205 | /// Environment variable: `BGPKIT_BROKER_META_RETENTION_DAYS` 206 | pub meta_retention_days: i64, 207 | } 208 | 209 | impl Default for DatabaseConfig { 210 | fn default() -> Self { 211 | Self { 212 | meta_retention_days: DEFAULT_META_RETENTION_DAYS, 213 | } 214 | } 215 | } 216 | 217 | impl DatabaseConfig { 218 | /// Load database configuration from environment variables. 219 | pub fn from_env() -> Self { 220 | Self { 221 | meta_retention_days: std::env::var("BGPKIT_BROKER_META_RETENTION_DAYS") 222 | .ok() 223 | .and_then(|s| s.parse().ok()) 224 | .unwrap_or(DEFAULT_META_RETENTION_DAYS), 225 | } 226 | } 227 | } 228 | 229 | /// Complete BGPKIT Broker configuration. 230 | /// 231 | /// This struct aggregates all configuration settings and provides methods 232 | /// for loading from environment variables and displaying configuration summaries. 233 | #[derive(Debug, Clone, Default)] 234 | pub struct BrokerConfig { 235 | /// Crawler settings 236 | pub crawler: CrawlerConfig, 237 | 238 | /// Backup settings 239 | pub backup: BackupConfig, 240 | 241 | /// Heartbeat settings 242 | pub heartbeat: HeartbeatConfig, 243 | 244 | /// NATS notification settings 245 | pub nats: NatsConfig, 246 | 247 | /// Database maintenance settings 248 | pub database: DatabaseConfig, 249 | } 250 | 251 | impl BrokerConfig { 252 | /// Create a new BrokerConfig with default values. 253 | pub fn new() -> Self { 254 | Self::default() 255 | } 256 | 257 | /// Load all configuration from environment variables. 258 | pub fn from_env() -> Self { 259 | Self { 260 | crawler: CrawlerConfig::from_env(), 261 | backup: BackupConfig::from_env(), 262 | heartbeat: HeartbeatConfig::from_env(), 263 | nats: NatsConfig::from_env(), 264 | database: DatabaseConfig::from_env(), 265 | } 266 | } 267 | 268 | /// Display configuration summary for logging. 269 | /// 270 | /// Returns a vector of log lines suitable for info-level logging. 271 | pub fn display_summary( 272 | &self, 273 | do_update: bool, 274 | do_api: bool, 275 | update_interval: u64, 276 | host: &str, 277 | port: u16, 278 | ) -> Vec { 279 | let mut lines = Vec::new(); 280 | 281 | lines.push("=== BGPKIT Broker Configuration ===".to_string()); 282 | 283 | // Update service status 284 | if do_update { 285 | lines.push(format!( 286 | "Periodic updates: ENABLED (interval: {} seconds)", 287 | update_interval 288 | )); 289 | lines.push(format!("Crawler config: {}", self.crawler)); 290 | } else { 291 | lines.push("Periodic updates: DISABLED".to_string()); 292 | } 293 | 294 | // API service status 295 | if do_api { 296 | lines.push(format!("API service: ENABLED ({}:{})", host, port)); 297 | } else { 298 | lines.push("API service: DISABLED".to_string()); 299 | } 300 | 301 | // Backup configuration 302 | if let Some(ref dest) = self.backup.destination { 303 | let is_s3 = oneio::s3_url_parse(dest).is_ok(); 304 | let s3_ok = is_s3 && oneio::s3_env_check().is_ok(); 305 | 306 | if is_s3 && !s3_ok { 307 | lines.push(format!( 308 | "Backup: CONFIGURED to S3 ({}) every {} hours - WARNING: S3 env vars not set", 309 | dest, self.backup.interval_hours 310 | )); 311 | } else if is_s3 { 312 | lines.push(format!( 313 | "Backup: CONFIGURED to S3 ({}) every {} hours", 314 | dest, self.backup.interval_hours 315 | )); 316 | } else { 317 | lines.push(format!( 318 | "Backup: CONFIGURED to local path ({}) every {} hours", 319 | dest, self.backup.interval_hours 320 | )); 321 | } 322 | } else { 323 | lines.push("Backup: DISABLED".to_string()); 324 | } 325 | 326 | // Heartbeat configuration 327 | let general = self.heartbeat.general_url.is_some(); 328 | let backup = self.heartbeat.backup_url.is_some(); 329 | match (general, backup) { 330 | (true, true) => { 331 | lines.push("Heartbeats: CONFIGURED (both general and backup)".to_string()) 332 | } 333 | (true, false) => lines.push("Heartbeats: CONFIGURED (general only)".to_string()), 334 | (false, true) => lines.push("Heartbeats: CONFIGURED (backup only)".to_string()), 335 | (false, false) => lines.push("Heartbeats: DISABLED".to_string()), 336 | } 337 | 338 | // NATS configuration 339 | if self.nats.is_enabled() { 340 | lines.push("NATS notifications: CONFIGURED".to_string()); 341 | } else { 342 | lines.push("NATS notifications: DISABLED".to_string()); 343 | } 344 | 345 | // Database maintenance 346 | lines.push(format!( 347 | "Database: meta_retention_days={}", 348 | self.database.meta_retention_days 349 | )); 350 | 351 | lines.push("=====================================".to_string()); 352 | 353 | lines 354 | } 355 | } 356 | 357 | #[cfg(test)] 358 | mod tests { 359 | use super::*; 360 | 361 | #[test] 362 | fn test_default_config() { 363 | let config = BrokerConfig::default(); 364 | assert_eq!(config.crawler.max_retries, 3); 365 | assert_eq!(config.crawler.backoff_ms, 1000); 366 | assert_eq!(config.crawler.collector_concurrency, 2); 367 | assert_eq!(config.crawler.month_concurrency, 2); 368 | assert_eq!(config.backup.interval_hours, 24); 369 | assert_eq!(config.database.meta_retention_days, 30); 370 | assert!(!config.backup.is_enabled()); 371 | assert!(!config.nats.is_enabled()); 372 | } 373 | 374 | #[test] 375 | fn test_crawler_config_display() { 376 | let config = CrawlerConfig::default(); 377 | let display = format!("{}", config); 378 | assert!(display.contains("collector_concurrency=2")); 379 | assert!(display.contains("month_concurrency=2")); 380 | assert!(display.contains("max_retries=3")); 381 | assert!(display.contains("backoff_ms=1000")); 382 | } 383 | 384 | #[test] 385 | fn test_backup_interval() { 386 | let config = BackupConfig { 387 | destination: Some("test".to_string()), 388 | interval_hours: 12, 389 | heartbeat_url: None, 390 | }; 391 | assert_eq!(config.interval(), Duration::from_secs(12 * 60 * 60)); 392 | } 393 | } 394 | -------------------------------------------------------------------------------- /src/cli/api.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::get_missing_collectors; 2 | use axum::extract::{Query, State}; 3 | use axum::response::IntoResponse; 4 | use axum::routing::get; 5 | use axum::{Json, Router}; 6 | use axum_prometheus::PrometheusMetricLayerBuilder; 7 | use bgpkit_broker::{BrokerItem, LocalBrokerDb, DEFAULT_PAGE_SIZE}; 8 | use chrono::{DateTime, NaiveDate, NaiveDateTime}; 9 | use clap::Args; 10 | use http::{Method, StatusCode}; 11 | use log::error; 12 | use serde::{Deserialize, Serialize}; 13 | use serde_json::json; 14 | use std::sync::Arc; 15 | use tower_http::cors::{Any, CorsLayer}; 16 | use tracing::info; 17 | 18 | struct AppState { 19 | database: LocalBrokerDb, 20 | } 21 | 22 | #[derive(Args, Debug, Serialize, Deserialize)] 23 | pub struct BrokerSearchQuery { 24 | /// Start timestamp 25 | #[clap(short = 't', long)] 26 | pub ts_start: Option, 27 | 28 | /// End timestamp 29 | #[clap(short = 'T', long)] 30 | pub ts_end: Option, 31 | 32 | /// Duration string, e.g. 1 hour 33 | #[clap(short = 'd', long)] 34 | pub duration: Option, 35 | 36 | /// filter by route collector projects, i.e. `route-views` or `riperis` 37 | #[clap(short, long)] 38 | pub project: Option, 39 | 40 | /// filter by collector IDs, e.g. 'rrc00', 'route-views2. use comma to separate multiple collectors 41 | #[clap(short, long)] 42 | pub collector_id: Option, 43 | 44 | /// filter by data types, i.e. 'updates', 'rib'. 45 | #[clap(short = 'D', long)] 46 | pub data_type: Option, 47 | 48 | /// page number 49 | #[clap(long)] 50 | pub page: Option, 51 | 52 | /// page size 53 | #[clap(long)] 54 | pub page_size: Option, 55 | } 56 | 57 | #[derive(Debug, Serialize, Deserialize)] 58 | pub struct BrokerHealthQueryParams { 59 | /// maximum allowed delay in seconds 60 | pub max_delay_secs: Option, 61 | } 62 | 63 | #[derive(Serialize, Deserialize, Clone, Debug)] 64 | pub struct BrokerSearchResult { 65 | pub total: usize, 66 | pub count: usize, 67 | pub page: usize, 68 | pub page_size: usize, 69 | pub error: Option, 70 | pub data: Vec, 71 | pub meta: Option, 72 | } 73 | 74 | #[derive(Serialize, Deserialize)] 75 | enum BrokerApiError { 76 | BrokerNotHealthy(String), 77 | SearchError(String), 78 | } 79 | 80 | #[derive(Serialize, Deserialize, Clone, Debug)] 81 | pub struct Meta { 82 | pub latest_update_ts: NaiveDateTime, 83 | pub latest_update_duration: i32, 84 | } 85 | 86 | /// Search MRT files meta data from BGPKIT Broker database 87 | async fn search( 88 | query: Query, 89 | State(state): State>, 90 | ) -> impl IntoResponse { 91 | let (page, page_size) = ( 92 | query.page.unwrap_or(1), 93 | query.page_size.unwrap_or(DEFAULT_PAGE_SIZE), 94 | ); 95 | if page == 0 { 96 | return ( 97 | StatusCode::BAD_REQUEST, 98 | Json(BrokerApiError::SearchError( 99 | "page number start from 1".to_string(), 100 | )), 101 | ) 102 | .into_response(); 103 | } 104 | if page_size > 1000 { 105 | return ( 106 | StatusCode::BAD_REQUEST, 107 | Json(BrokerApiError::SearchError( 108 | "maximum page size is 1000".to_string(), 109 | )), 110 | ) 111 | .into_response(); 112 | } 113 | 114 | let mut ts_start = match &query.ts_start { 115 | Some(s) => match parse_time_str(s.as_str()) { 116 | Ok(ts) => Some(ts), 117 | Err(e) => { 118 | let err_msg = format!("cannot parse ts_start {}: {}", s, e); 119 | error!("{}", &err_msg); 120 | error!("{:?}", &query); 121 | return ( 122 | StatusCode::BAD_REQUEST, 123 | Json(BrokerApiError::SearchError(err_msg)), 124 | ) 125 | .into_response(); 126 | } 127 | }, 128 | None => None, 129 | }; 130 | 131 | let mut ts_end = match &query.ts_end { 132 | Some(s) => match parse_time_str(s.as_str()) { 133 | Ok(ts) => Some(ts), 134 | Err(e) => { 135 | let err_msg = format!("cannot parse ts_end {}: {}", s, e); 136 | error!("{}", &err_msg); 137 | error!("{:?}", &query); 138 | return ( 139 | StatusCode::BAD_REQUEST, 140 | Json(BrokerApiError::SearchError(err_msg)), 141 | ) 142 | .into_response(); 143 | } 144 | }, 145 | None => None, 146 | }; 147 | 148 | match (ts_start, ts_end) { 149 | (Some(start), None) => { 150 | if let Some(duration_str) = &query.duration { 151 | match humantime::parse_duration(duration_str.as_str()) { 152 | Ok(d) => { 153 | if let Ok(duration) = chrono::Duration::from_std(d) { 154 | ts_end = Some(start + duration); 155 | } 156 | } 157 | Err(_) => { 158 | return ( 159 | StatusCode::BAD_REQUEST, 160 | Json(BrokerApiError::SearchError(format!( 161 | "cannot parse time duration string: {}", 162 | duration_str 163 | ))), 164 | ) 165 | .into_response(); 166 | } 167 | } 168 | } 169 | } 170 | (None, Some(end)) => { 171 | if let Some(duration_str) = &query.duration { 172 | match humantime::parse_duration(duration_str.as_str()) { 173 | Ok(d) => { 174 | if let Ok(duration) = chrono::Duration::from_std(d) { 175 | ts_start = Some(end - duration); 176 | } 177 | } 178 | Err(_) => { 179 | return ( 180 | StatusCode::BAD_REQUEST, 181 | Json(BrokerApiError::SearchError(format!( 182 | "cannot parse time duration string: {}", 183 | duration_str 184 | ))), 185 | ) 186 | .into_response(); 187 | } 188 | } 189 | } 190 | } 191 | _ => {} 192 | }; 193 | 194 | let collectors = query 195 | .collector_id 196 | .as_ref() 197 | .map(|s| s.split(',').map(|s| s.trim().to_string()).collect()); 198 | 199 | let search_result = match state 200 | .database 201 | .search( 202 | collectors, 203 | query.project.clone(), 204 | query.data_type.clone(), 205 | ts_start, 206 | ts_end, 207 | Some(page), 208 | Some(page_size), 209 | ) 210 | .await 211 | { 212 | Ok(result) => result, 213 | Err(e) => { 214 | return ( 215 | StatusCode::INTERNAL_SERVER_ERROR, 216 | Json(BrokerApiError::SearchError(format!( 217 | "database search failed: {}", 218 | e 219 | ))), 220 | ) 221 | .into_response(); 222 | } 223 | }; 224 | 225 | let meta = state 226 | .database 227 | .get_latest_updates_meta() 228 | .await 229 | .unwrap_or_default() 230 | .and_then(|data| { 231 | Some(Meta { 232 | latest_update_ts: chrono::DateTime::from_timestamp(data.update_ts, 0)?.naive_utc(), 233 | latest_update_duration: data.update_duration, 234 | }) 235 | }); 236 | 237 | Json(BrokerSearchResult { 238 | total: search_result.total, 239 | count: search_result.items.len(), 240 | page, 241 | page_size, 242 | error: None, 243 | data: search_result.items, 244 | meta, 245 | }) 246 | .into_response() 247 | } 248 | 249 | /// Get the latest MRT files meta information 250 | async fn latest(State(state): State>) -> impl IntoResponse { 251 | let items = state.database.get_latest_files().await; 252 | let meta = state 253 | .database 254 | .get_latest_updates_meta() 255 | .await 256 | .unwrap_or_default() 257 | .and_then(|data| { 258 | Some(Meta { 259 | latest_update_ts: chrono::DateTime::from_timestamp(data.update_ts, 0)?.naive_utc(), 260 | latest_update_duration: data.update_duration, 261 | }) 262 | }); 263 | 264 | Json(BrokerSearchResult { 265 | total: items.len(), 266 | count: items.len(), 267 | page: 0, 268 | page_size: items.len(), 269 | error: None, 270 | data: items, 271 | meta, 272 | }) 273 | } 274 | 275 | /// Return Broker API and database health 276 | async fn health( 277 | query: Query, 278 | State(state): State>, 279 | ) -> impl IntoResponse { 280 | match state.database.get_latest_timestamp().await { 281 | Ok(data) => match data { 282 | None => ( 283 | StatusCode::SERVICE_UNAVAILABLE, 284 | Json(BrokerApiError::BrokerNotHealthy( 285 | "database not bootstrap".to_string(), 286 | )), 287 | ) 288 | .into_response(), 289 | Some(ts) => { 290 | // data is there, service is ok. 291 | // this endpoint does not check for data freshness, as there are applications 292 | // that do not require fresh data (e.g., historical analysis). 293 | 294 | let latest_file_ts = ts.and_utc().timestamp(); 295 | let now_ts = chrono::Utc::now().timestamp(); 296 | 297 | if let Some(max_delay) = query.max_delay_secs { 298 | if now_ts - latest_file_ts > max_delay as i64 { 299 | return ( 300 | StatusCode::SERVICE_UNAVAILABLE, 301 | Json(BrokerApiError::BrokerNotHealthy(format!( 302 | "database is not fresh, latest file timestamp: {}, delay: {}s", 303 | latest_file_ts, 304 | now_ts - latest_file_ts 305 | ))), 306 | ) 307 | .into_response(); 308 | } 309 | } 310 | 311 | Json( 312 | json!({"status": "OK", "message": "database is healthy", "meta": { 313 | "latest_file_ts": latest_file_ts, 314 | "delay_secs": now_ts - latest_file_ts, 315 | }}), 316 | ) 317 | .into_response() 318 | } 319 | }, 320 | Err(_) => ( 321 | StatusCode::SERVICE_UNAVAILABLE, 322 | Json(BrokerApiError::BrokerNotHealthy( 323 | "database connection error".to_string(), 324 | )), 325 | ) 326 | .into_response(), 327 | } 328 | } 329 | 330 | async fn missing_collectors(State(state): State>) -> impl IntoResponse { 331 | let latest_items = state.database.get_latest_files().await; 332 | let missing_collectors = get_missing_collectors(&latest_items); 333 | 334 | match missing_collectors.is_empty() { 335 | true => ( 336 | StatusCode::OK, 337 | Json(json!( 338 | { 339 | "status": "OK", 340 | "message": "no missing collectors", 341 | "missing_collectors": [] 342 | } 343 | )), 344 | ) 345 | .into_response(), 346 | false => ( 347 | StatusCode::SERVICE_UNAVAILABLE, 348 | Json(json!( 349 | { 350 | "status": "Need action", 351 | "message": "have missing collectors", 352 | "missing_collectors": missing_collectors 353 | } 354 | )), 355 | ) 356 | .into_response(), 357 | } 358 | } 359 | 360 | /// Parse a timestamp string into NaiveDateTime 361 | /// 362 | /// The timestamp string can be either unix timestamp or RFC3339 format string (e.g. 2020-01-01T00:00:00Z). 363 | fn parse_time_str(ts_str: &str) -> Result { 364 | if let Ok(ts_end) = ts_str.parse::() { 365 | // it's unix timestamp 366 | return DateTime::from_timestamp(ts_end, 0) 367 | .map(|dt| dt.naive_utc()) 368 | .ok_or_else(|| format!("invalid unix timestamp: {}", ts_end)); 369 | } 370 | 371 | if let Ok(d) = NaiveDate::parse_from_str(ts_str, "%Y-%m-%d") { 372 | // it's a date 373 | return d 374 | .and_hms_opt(0, 0, 0) 375 | .ok_or_else(|| format!("invalid date: {}", ts_str)); 376 | } 377 | 378 | if let Ok(t) = DateTime::parse_from_rfc3339(ts_str) { 379 | // it's a correct RFC3339 time 380 | return Ok(t.naive_utc()); 381 | } 382 | 383 | if let Ok(t) = DateTime::parse_from_rfc2822(ts_str) { 384 | // it's a correct RFC2822 time 385 | return Ok(t.naive_utc()); 386 | } 387 | 388 | // at this point, the input not any valid time string format. 389 | // we guess it could be a timezone-less time string, 390 | // so let's remove potential "Z" and add timezone and try again 391 | let ts_str = ts_str.trim_end_matches('Z').to_string() + "+00:00"; 392 | match DateTime::parse_from_rfc3339(ts_str.as_str()) { 393 | Ok(t) => Ok(t.naive_utc()), 394 | Err(_) => Err(format!( 395 | "Invalid timestamp format: {}, should be either unix timestamp or RFC3339", 396 | ts_str 397 | )), 398 | } 399 | } 400 | 401 | pub async fn start_api_service( 402 | database: LocalBrokerDb, 403 | host: String, 404 | port: u16, 405 | root: String, 406 | ) -> std::io::Result<()> { 407 | let (metric_layer, metric_handle) = PrometheusMetricLayerBuilder::new() 408 | .with_ignore_patterns(&["/metrics"]) 409 | .with_prefix("bgpkit_broker") 410 | .with_default_metrics() 411 | .build_pair(); 412 | let cors_layer = CorsLayer::new() 413 | // allow `GET` and `POST` when accessing the resource 414 | .allow_methods([Method::GET, Method::POST]) 415 | // allow requests from any origin 416 | .allow_origin(Any); 417 | 418 | let database = Arc::new(AppState { database }); 419 | let app = Router::new() 420 | .route("/search", get(search)) 421 | .route("/latest", get(latest)) 422 | .route("/health", get(health)) 423 | .route("/missing_collectors", get(missing_collectors)) 424 | .route("/metrics", get(|| async move { metric_handle.render() })) 425 | .with_state(database) 426 | .layer(metric_layer) 427 | .layer(cors_layer); 428 | info!("Starting API service on {}:{}", host, port); 429 | 430 | let root_app = if root == "/" { 431 | // If root is "/", just use the app router directly 432 | app 433 | } else { 434 | // Otherwise, nest under the specified path 435 | Router::new().nest(root.as_str(), app) 436 | }; 437 | 438 | let socket_str = format!("{}:{}", host, port); 439 | let listener = tokio::net::TcpListener::bind(socket_str).await?; 440 | info!("listening on {}", listener.local_addr()?); 441 | axum::serve(listener, root_app).await?; 442 | 443 | Ok(()) 444 | } 445 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## Unreleased 6 | 7 | ### New collector 8 | 9 | * Added RouteViews `ixpn.lagos` collector to the supported collectors list 10 | 11 | ## v0.10.1 - 2025-12-03 12 | 13 | ### Configuration management 14 | 15 | * Added `BrokerConfig` struct for centralized configuration management 16 | * Consolidates all environment variable configuration into a single struct 17 | * Provides `from_env()` method for loading configuration at startup 18 | * Includes `display_summary()` method for logging configuration state 19 | * Sub-configs: `CrawlerConfig`, `BackupConfig`, `HeartbeatConfig`, `NatsConfig`, `DatabaseConfig` 20 | 21 | ### Crawler improvements 22 | 23 | * Added configurable concurrency for crawler operations 24 | * `BGPKIT_BROKER_CRAWLER_COLLECTOR_CONCURRENCY` - Number of collectors to crawl simultaneously (default: 2) 25 | * `BGPKIT_BROKER_CRAWLER_MONTH_CONCURRENCY` - Number of months to crawl in parallel per collector (default: 2, primarily affects bootstrap) 26 | * Reduced default concurrency from 5 collectors / 10 months to 2/2 to avoid rate limiting 27 | 28 | ### CLI improvements 29 | 30 | * Added update timing and performance logging 31 | * Logs completion time, items inserted, and percentage of update interval used 32 | * Warns when update time exceeds 80% of interval or exceeds interval entirely 33 | * Enhanced startup logging to display all non-secret configuration values 34 | * Disabled ANSI color codes in log output for better compatibility with non-terminal environments 35 | 36 | ### Database improvements 37 | 38 | * Added index on `meta.update_ts` column for faster meta table queries and cleanup operations 39 | 40 | ## v0.10.0 - 2025-12-02 41 | 42 | ### New collector 43 | 44 | * Added RouteViews `crix.sjo` collector to the supported collectors list 45 | 46 | ### SDK improvements 47 | 48 | * Added `get_snapshot_files()` method to get MRT files needed to construct routing table snapshots 49 | * Takes an array of collector IDs and a target timestamp 50 | * Returns `Vec` with RIB URL and ordered updates URLs for each collector 51 | * Finds the closest RIB dump before the target timestamp and all updates files between RIB and target 52 | * Useful for applications that need to reconstruct exact routing table state at a specific point in time 53 | 54 | ### Crawler improvements 55 | 56 | * Added retry logic with exponential backoff for failed HTTP requests during crawling 57 | * Default: 3 retries with 1 second initial backoff (doubles each retry: 1s, 2s, 4s) 58 | * Configurable via `BGPKIT_BROKER_CRAWLER_MAX_RETRIES` and `BGPKIT_BROKER_CRAWLER_BACKOFF_MS` 59 | 60 | ### Database maintenance 61 | 62 | * Added automatic cleanup of old meta table entries 63 | * Entries older than 30 days are automatically deleted after each database update 64 | * Configurable via `BGPKIT_BROKER_META_RETENTION_DAYS` 65 | 66 | ### Code improvements 67 | 68 | * Replaced `.unwrap()` calls with proper error handling throughout the codebase 69 | * CLI now handles errors gracefully with informative error messages 70 | * Database operations log errors instead of panicking 71 | * Crawlers skip malformed entries instead of crashing 72 | * API endpoints handle invalid data gracefully 73 | 74 | ## v0.9.2 - 2025-11-18 75 | 76 | ### New collector 77 | 78 | * add RouteViews `iix.cgk` collector to the supported collectors list 79 | * official RouteViews news: https://www.routeviews.org/routeviews/2025/11/14/new-collector-at-iix-jakarta-indonesia/ 80 | 81 | ### Cli improvements 82 | 83 | * fixed a shorthand parameter collision for `bgpkit-broker search` command 84 | * associated `-D` for `--data-type`, and `-d` for `--duration` 85 | 86 | ## v0.9.1 - 2025-11-05 87 | 88 | ### New collector 89 | 90 | * add RouteViews `netnod.mmx` collector to the supported collectors list 91 | * official RouteViews news: https://www.routeviews.org/routeviews/2025/11/04/new-collector-at-netnod-stockholm-sweden-copenhagen-denmark/ 92 | 93 | ### SDK improvements 94 | 95 | * add user-agent with SDK library version to HTTP requests 96 | 97 | ### CLI improvements 98 | 99 | * `bgpkit-broker serve` now starts the API server before the initial backup is complete, reducing starting time significantly 100 | 101 | ### Bug fixes 102 | 103 | * **Collector validation improvements**: Fixed collector ID validation to be truly permissive 104 | * Unknown/new collectors are now allowed (permissive behavior as intended) 105 | * Collector IDs are properly normalized (trimmed, deduplicated, comma-joined) 106 | * Fixed multi-collector filtering in `latest()` method to correctly handle comma-separated lists 107 | * Example: `.collector_id("rrc00,route-views2")` now correctly matches items from either collector 108 | 109 | ## v0.9.0 - 2025-10-30 110 | 111 | ### SDK Changes 112 | 113 | * **Total count support**: Added `query_total_count()` method to fetch total matching item count 114 | * New `query_total_count()` method returns total count without fetching items 115 | * `BrokerQueryResult` now includes optional `total` field for pagination support 116 | * Enables efficient count queries for pagination UI components 117 | 118 | ### API Changes 119 | 120 | * **Search result enhancement**: Added `total` field to `/v3/search` endpoint response 121 | * API now returns the total count of matching items alongside paginated results 122 | * `BrokerSearchResult` includes `total` field for client-side pagination 123 | 124 | ### Code Improvements 125 | 126 | * Refactored bootstrap download logging to centralize and clean redundant code 127 | * Updated oneio dependency to version 0.20.0 with specific features 128 | 129 | ### Data Updates 130 | 131 | * Added the following recent RouteViews collectors: 132 | * `hkix.hkg` 133 | * `ix-br.gru` 134 | 135 | ## v0.8.1 - 2025-09-01 136 | 137 | ### Improvements 138 | 139 | * Updated dependencies to the latest versions 140 | 141 | ### Bug fixes 142 | 143 | * Fixed incorrect database initialization schema error 144 | 145 | ### Data updates 146 | 147 | * Added getafix.mnl collector 148 | * Added ix-br2.gru collector 149 | 150 | ## v0.8.0 - 2025-08-17 151 | 152 | ### Breaking Changes 153 | 154 | None - This release maintains API compatibility with previous versions. 155 | 156 | ### API Changes 157 | 158 | None - This release maintains API compatibility with previous versions. 159 | 160 | ### Backend Changes 161 | 162 | * **Periodic backup system**: Added automated database backup functionality to the serve command 163 | * Enabled via `BGPKIT_BROKER_BACKUP_TO` environment variable for backup destination 164 | * Supports both local file system and S3 destinations (e.g., `s3://bucket/path/backup.db`) 165 | * Configurable backup interval via `BGPKIT_BROKER_BACKUP_INTERVAL_HOURS` (default: 24 hours) 166 | * Backup-specific heartbeat notifications via `BGPKIT_BROKER_BACKUP_HEARTBEAT_URL` 167 | * Separate heartbeat endpoints for regular updates vs backup completion 168 | * Eliminates race conditions by running backups only after successful database updates 169 | 170 | * **Configuration summary display**: Added startup configuration summary for better operational visibility 171 | * Displays periodic update status, API service configuration, backup settings, and heartbeat configuration 172 | * Shows S3 environment validation status when S3 backup destinations are configured 173 | * NATS notification status and configuration validation 174 | * Helps operators quickly verify service configuration at startup 175 | 176 | * **Bootstrap sequence optimization**: Fixed duplicate database updates during bootstrap process 177 | * Eliminated redundant database update that occurred both during bootstrap and update thread startup 178 | * Streamlined bootstrap flow for cleaner startup sequence and reduced processing time 179 | 180 | ### SDK Changes 181 | 182 | * **Shortcuts module**: Added convenience methods for common BGP data queries 183 | * `daily_ribs()` - Filter RIB files captured at midnight (00:00:00) for daily snapshots 184 | * `recent_updates(hours)` - Get BGP update files from the last N hours 185 | * `most_diverse_collectors(n, project)` - Find collectors with maximum ASN diversity using greedy algorithm 186 | * All shortcuts integrate seamlessly with existing filtering methods and support method chaining 187 | * Enhanced diversity algorithm selects collectors based on unique ASN coverage from full-feed peers 188 | * Project filtering supported for targeted RouteViews or RIPE RIS analysis 189 | 190 | * **Configuration validation**: Restructured parameter validation for better error handling 191 | * Moved validation from configuration time to query execution time 192 | * Added `validate_configuration()` method with parameter checking 193 | * Validation includes timestamps, collectors, projects, data types, page numbers, and page sizes 194 | * Provides detailed error messages with valid options for invalid parameters 195 | * Maintains method chaining simplicity while ensuring data correctness at query time 196 | 197 | * **Test performance**: Improved test suite execution speed 198 | * Updated validation tests to call `validate_configuration()` directly instead of `query()` 199 | * Modified `test_recent_updates` to test parameter configuration without network calls 200 | * Tests now run significantly faster while maintaining coverage and functionality 201 | 202 | * **Timestamp parsing**: Enhanced timestamp parsing with timezone support and better validation 203 | * Added support for RFC3339 timestamps with timezone offsets (e.g., `2022-01-01T00:00:00+00:00`, 204 | `2022-01-01T05:00:00-05:00`) 205 | * Support for pure dates (e.g., `2022-01-01`), Unix timestamps, RFC3339 formats, and various date separators 206 | * Internal `parse_timestamp` function now returns `DateTime` with proper timezone conversion 207 | * Validation occurs at query time with helpful error messages for invalid timestamp formats 208 | * Pure dates automatically converted to start-of-day UTC timestamps 209 | 210 | * **Database tests**: Updated database tests to use temporary files with proper cleanup 211 | * Replace hardcoded test database paths with unique temporary file paths using system temp directory 212 | * Add automatic cleanup of SQLite database files including WAL and SHM files 213 | * Improve test isolation and prevent interference between test runs 214 | * Tests now suitable for CI/CD environments without leaving leftover files 215 | 216 | ## v0.7.11 - 2025-04-08 217 | 218 | ### Highlights 219 | 220 | * NATS notifier is now configured via the following env variables 221 | * `BGPKIT_BROKER_NATS_URL`: the URL for the NATS server, such as `nats.broker.bgpkit.com` 222 | * `BGPKIT_BROKER_NATS_USER`: NATS server user name 223 | * `BGPKIT_BROKER_NATS_PASSWORD`: NATS server password 224 | * `BGPKIT_BROKER_NATS_ROOT_SUBJECT`: NATS server root subject, such as `public.broker` 225 | 226 | ## v0.7.10 - 2025-03-26 227 | 228 | ### Highlights 229 | 230 | * add `route-views8` collector 231 | * add `/missing_collectors` endpoint to check for collectors that have not been added yet 232 | * remove `/docs` and `utopia` dependency to remove clutter 233 | * freshen up dependencies 234 | 235 | ### Bug fixes 236 | 237 | * fixed an issue where incorrectly formatted timestring may cause the API to panic 238 | 239 | ## v0.7.9 - 2025-03-24 240 | 241 | ### Highlights 242 | 243 | * `bgpkit-broker serve` and `bgpkit-broker backup` commands now runs SQLite `ANALYZE` command to once to ensure the 244 | performance is up to date to all the recently inserted data. 245 | 246 | ## v0.7.8 - 2025-03-20 247 | 248 | ### Highlights 249 | 250 | * `bgpkit-broker backup` command now supports a bootstrapping source database 251 | * this is useful to set up independent backup executions separate from the running API services 252 | 253 | ## v0.7.7 - 2025-03-07 254 | 255 | ### Highlights 256 | 257 | * Fix installation instruction for cargo 258 | * Change `@0.7` to `@^0.7` to correctly use the latest `v0.7.x` version. 259 | * Add recently added RouteViews collectors 260 | * new collectors are `interlan.otp` (Romania),`kinx.icn` (South Korea), and `namex.fco` (Italy) 261 | * users update the version to `v0.7.7` can run the same `bgpkit-broker update` command to automatically bootstrap 262 | data for these collectors 263 | 264 | ## v0.7.6 - 2024-10-31 265 | 266 | ### Highlights 267 | 268 | * migrate default broker API endpoint to `https://api.bgpkit.com/v3/broker` 269 | * Full API docs is available at `https://api.bgpkit.com/docs` 270 | * add `get_peers` to `BgpkitBroker` struct 271 | * fetches the list of peers for a given collector 272 | * allow specifying filters the same way as querying MRT files 273 | * available filter functions include: 274 | * `.peers_asn(ASN)` 275 | * `.peers_ip(IP)` 276 | * `.collector_id(COLLECTOR_ID)` 277 | * `.peers_only_full_feed(TRUE/FALSE)` 278 | * returns `Vec` 279 | 280 | ```rust 281 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 282 | pub struct BrokerPeer { 283 | /// The date of the latest available data. 284 | pub date: NaiveDate, 285 | /// The IP address of the collector peer. 286 | pub ip: IpAddr, 287 | /// The ASN (Autonomous System Number) of the collector peer. 288 | pub asn: u32, 289 | /// The name of the collector. 290 | pub collector: String, 291 | /// The number of IPv4 prefixes. 292 | pub num_v4_pfxs: u32, 293 | /// The number of IPv6 prefixes. 294 | pub num_v6_pfxs: u32, 295 | /// The number of connected ASNs. 296 | pub num_connected_asns: u32, 297 | } 298 | ``` 299 | 300 | ## v0.7.5 - 2024-08-23 301 | 302 | ### [NEW] deploy at fly.io 303 | 304 | * add a deployment config file for fly.io 305 | 306 | ### Bugfix 307 | 308 | * fix an issue where the API returns URL with additional `/bgpdata` for some new route-views collectors. 309 | 310 | ### Documentation 311 | 312 | * improved documentation on deploying bgpkit-broker services. 313 | 314 | ## v0.7.4 - 2024-08-22 315 | 316 | ### Highlights 317 | 318 | * add a new 30s timeout for fetching web page content for the crawler 319 | * the async `reqwest::get` function by default does not have a timeout and thus we could potentially stuck waiting 320 | forever. 321 | * add new `max_delay_secs` parameter to the `/health` endpoint to allow user specify a maximum delay in seconds and 322 | the API will return error code 503 if the delay for the latest file timestamp (any file) is greater than the specified 323 | value. 324 | * this allows better monitoring for the API's health without sending heartbeats. 325 | 326 | ## v0.7.3 - 2024-08-14 327 | 328 | ### Hotfix 329 | 330 | * fix an issue where the main thread waits for updater thread and never starts the API thread 331 | 332 | ## v0.7.2 - 2024-08-13 333 | 334 | ### Highlights 335 | 336 | * panic if the cli updater thread failed for some reason 337 | * previously, the failed thread goes silently, and the main process continues running 338 | * reduce logging if not configuring NATS notifier 339 | * previously, the missing notifier message appear at every fetch 340 | 341 | ## v0.7.1 - 2024-08-05 342 | 343 | ### Highlights 344 | 345 | * updated the `bgpkit-broker update` command to allow bootstrapping new collectors on demand 346 | * a number of new RouteViews collectors have been added 347 | * `amsix.ams` `cix.atl` `decix.jhb` `iraq-ixp.bgw` `pacwave.lax` `pit.scl` `pitmx.qro` `route-views7` 348 | * added a new `allow_invalid_cert` function to the constructor and deprecated the `disable_ssl_check` function 349 | * they work the same way, but the new function name is more accurate 350 | * constructor also checks for `ONEIO_ACCEPT_INVALID_CERTS=true` environment variable to allow invalid certs (not 351 | recommended, use at your own risk) 352 | * improved project filter. it now matches the name of the collector to project names 353 | * this works with the newly added RouteViews collectors whose names do not prefix with `route-views` 354 | 355 | ## v0.7.0 - 2024-06-18 356 | 357 | ### [NEW] NATS notification 358 | 359 | Added new-file notification by publishing newly indexed BGP MRT file meta information in JSON format to user-specified 360 | NATS server. 361 | 362 | The NATS URL and credentials is configured via environment variables: 363 | 364 | * `BGPKIT_BROKER_NATS_URL` like `nats://nats.bgpkit.com:4222` (default) 365 | * `BGPKIT_BROKER_NATS_ROOT_SUBJECT` such as `public.broker.` (default) 366 | 367 | The notification is published to `public.broker.{PROJECT}.{COLLECTOR}.{DATA_TYPE}` where 368 | 369 | * `PROJECT` is `riperis` or `route-views` 370 | * `COLLECTOR` is the route collector IDs like `rrc00` or `route-views2` 371 | * `DATA_TYPE` is `rib` or `updates` 372 | 373 | Example of what a subscriber can be notified for: 374 | 375 | * `public.broker.>`: any new MRT files 376 | * `public.broker.route-views.>`: new RouteViews updates files 377 | * `public.broker.rrc00.rib`: new RIPE RIS rrc00 RIB dump files 378 | * `public.broker.*.rib`: any new RIB dump files 379 | 380 | This PR also adds a new subcommand `bgpkit-broker live` that listens to the specified NATS server for new file 381 | notification. 382 | 383 | ``` 384 | Streaming live from a broker NATS server 385 | 386 | Usage: bgpkit-broker live [OPTIONS] 387 | 388 | Options: 389 | --no-log disable logging 390 | -u, --url URL to NATS server, e.g. nats://localhost:4222. If not specified, will try to read from BGPKIT_BROKER_NATS_URL env variable 391 | --env 392 | -s, --subject Subject to subscribe to, default to public.broker.> 393 | -p, --pretty Pretty print JSON output 394 | -h, --help Print help 395 | -V, --version Print version 396 | ``` 397 | 398 | ### [NEW] `bgpkit-broker doctor` subcommand 399 | 400 | Added `bgpkit-broker doctor` subcommand that checks the broker instance status and missing collectors. 401 | 402 | ```text 403 | Check broker instance health and missing collectors 404 | 405 | Usage: bgpkit-broker doctor [OPTIONS] 406 | 407 | Options: 408 | --no-log disable logging 409 | --env 410 | -h, --help Print help 411 | -V, --version Print version 412 | ``` 413 | 414 | Example output: 415 | 416 | ```text 417 | checking broker instance health... 418 | broker instance at https://api.broker.bgpkit.com/v3 is healthy 419 | 420 | checking for missing collectors... 421 | missing the following collectors: 422 | | project | name | country | activated_on | data_url | 423 | |------------|--------------|-----------------|---------------------|----------------------------------------------------| 424 | | routeviews | decix.jhb | Malaysia | 2022-12-20 12:00:00 | http://archive.routeviews.org/decix.jhb/bgpdata | 425 | | routeviews | pacwave.lax | United States | 2023-03-30 12:00:00 | http://archive.routeviews.org/pacwave.lax/bgpdata | 426 | | routeviews | pit.scl | Chile | 2023-08-31 23:45:00 | http://archive.routeviews.org/pit.scl/bgpdata | 427 | | routeviews | amsix.ams | The Netherlands | 2024-02-22 23:20:00 | http://archive.routeviews.org/amsix.ams/bgpdata | 428 | | routeviews | pitmx.qro | Mexico | 2024-02-23 22:15:00 | http://archive.routeviews.org/pitmx.qro/bgpdata | 429 | | routeviews | iraq-ixp.bgw | Iraq | 2024-04-13 00:01:00 | http://archive.routeviews.org/iraq-ixp.bgw/bgpdata | 430 | ``` 431 | 432 | ### [NEW] Heartbeat URL support 433 | 434 | If `BGPKIT_BROKER_HEARTBEAT_URL` environment is set, when running the `bgpkit-broker serve` subcommand, the instance 435 | will periodically send a GET request to the configured heartbeat URL. 436 | 437 | ### [NEW] `bgpkit-broker latest` subcommand 438 | 439 | Added `latest` subcommand to CLI to display latest MRT files for all collectors. 440 | 441 | ### Developer experience improvements 442 | 443 | - add `.is_rib()` to `BrokerItem` struct 444 | - add strict ordering definition for `BrokerItem` struct 445 | 446 | An array of `BrokerItem`s can be sorted with the following order: 447 | 448 | 1. smaller timestamp before larger timestamp 449 | 2. RIB before updates 450 | 3. then alphabetical order on collector ID (route-views before rrc) 451 | 452 | ### Breaking changes 453 | 454 | - switch to `rustls` as the default TLS backend 455 | 456 | ## V0.6.1 457 | 458 | ### What's Changed 459 | 460 | * switch http lib to reqwest from ureq by @digizeph in https://github.com/bgpkit/bgpkit-broker/pull/20 461 | 462 | **Full Changelog**: https://github.com/bgpkit/bgpkit-broker/compare/v0.6.0...v0.6.1 -------------------------------------------------------------------------------- /src/shortcuts.rs: -------------------------------------------------------------------------------- 1 | //! Convenience methods for common BGP data queries. 2 | //! 3 | //! This module provides shortcuts for frequently used query patterns to make it easier 4 | //! to find specific types of BGP data without manually configuring filters. 5 | 6 | use crate::{BgpkitBroker, BrokerError, BrokerItem}; 7 | use chrono::{DateTime, Timelike, Utc}; 8 | use serde::{Deserialize, Serialize}; 9 | use std::collections::HashMap; 10 | use std::fmt::Display; 11 | 12 | /// MRT files needed to construct a routing table snapshot for a specific collector. 13 | /// 14 | /// This struct contains the RIB dump URL and the list of updates files that need to be 15 | /// applied to reconstruct the routing table state at a specific point in time. 16 | /// 17 | /// # Fields 18 | /// 19 | /// * `collector_id` - The ID of the BGP collector (e.g., "route-views2", "rrc00") 20 | /// * `rib_url` - URL of the RIB dump file to use as the initial routing table 21 | /// * `updates_urls` - URLs of the updates MRT files to apply to the initial RIB, 22 | /// ordered chronologically from oldest to newest 23 | /// 24 | /// # Example 25 | /// 26 | /// ```no_run 27 | /// use bgpkit_broker::BgpkitBroker; 28 | /// 29 | /// let broker = BgpkitBroker::new(); 30 | /// let snapshots = broker.get_snapshot_files( 31 | /// &["route-views2", "rrc00"], 32 | /// "2024-01-01T12:00:00Z" 33 | /// ).unwrap(); 34 | /// 35 | /// for snapshot in snapshots { 36 | /// println!("Collector: {}", snapshot.collector_id); 37 | /// println!("RIB URL: {}", snapshot.rib_url); 38 | /// println!("Updates files: {}", snapshot.updates_urls.len()); 39 | /// } 40 | /// ``` 41 | #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] 42 | pub struct SnapshotFiles { 43 | /// The collector ID (e.g., "route-views2", "rrc00") 44 | pub collector_id: String, 45 | /// URL of the RIB dump file to build the initial routing table 46 | pub rib_url: String, 47 | /// URLs of the updates MRT files to apply to the initial RIB, in chronological order 48 | pub updates_urls: Vec, 49 | } 50 | 51 | impl Display for SnapshotFiles { 52 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 53 | write!( 54 | f, 55 | "SnapshotFiles {{ collector_id: {}, rib_url: {}, updates_count: {} }}", 56 | self.collector_id, 57 | self.rib_url, 58 | self.updates_urls.len() 59 | ) 60 | } 61 | } 62 | 63 | impl BgpkitBroker { 64 | /// Get daily RIB files that were captured at midnight (00:00:00). 65 | /// 66 | /// This filters for RIB dumps where both the hour and minute are 0, 67 | /// which typically represents the daily snapshots taken at midnight. 68 | /// 69 | /// # Examples 70 | /// 71 | /// Get daily RIB files with date range and specific collector: 72 | /// ```no_run 73 | /// use bgpkit_broker::BgpkitBroker; 74 | /// 75 | /// let daily_ribs = BgpkitBroker::new() 76 | /// .ts_start("2024-01-01") 77 | /// .ts_end("2024-01-02") 78 | /// .collector_id("route-views2") 79 | /// .daily_ribs() 80 | /// .unwrap(); 81 | /// 82 | /// for item in daily_ribs { 83 | /// println!("Daily RIB: {} from {} at {}", 84 | /// item.collector_id, 85 | /// item.ts_start.format("%Y-%m-%d %H:%M:%S"), 86 | /// item.url); 87 | /// } 88 | /// ``` 89 | /// 90 | /// Chain with project filtering for RouteViews daily snapshots: 91 | /// ```no_run 92 | /// use bgpkit_broker::BgpkitBroker; 93 | /// 94 | /// let rv_daily_ribs = BgpkitBroker::new() 95 | /// .ts_start("2024-01-01") 96 | /// .ts_end("2024-01-31") 97 | /// .project("routeviews") 98 | /// .daily_ribs() 99 | /// .unwrap(); 100 | /// 101 | /// println!("Found {} RouteViews daily RIBs", rv_daily_ribs.len()); 102 | /// ``` 103 | pub fn daily_ribs(&self) -> Result, BrokerError> { 104 | // Clone the broker and add RIB filter 105 | let rib_broker = self.clone().data_type("rib"); 106 | 107 | // Get all RIB items and filter for midnight captures 108 | let all_ribs = rib_broker.query()?; 109 | 110 | let daily_ribs = all_ribs 111 | .into_iter() 112 | .filter(|item| { 113 | // Filter for files captured at midnight (00:00:00) 114 | item.ts_start.hour() == 0 && item.ts_start.minute() == 0 115 | }) 116 | .collect(); 117 | 118 | Ok(daily_ribs) 119 | } 120 | 121 | /// Get BGP update files from the last N hours. 122 | /// 123 | /// This method calculates the timestamp for N hours ago and queries for 124 | /// update files from that time until now. 125 | /// 126 | /// # Arguments 127 | /// 128 | /// * `hours` - Number of hours to look back from current time 129 | /// 130 | /// # Examples 131 | /// 132 | /// Get recent updates from specific collectors with detailed output: 133 | /// ```no_run 134 | /// use bgpkit_broker::BgpkitBroker; 135 | /// 136 | /// let recent_updates = BgpkitBroker::new() 137 | /// .collector_id("route-views2,rrc00") 138 | /// .recent_updates(24) 139 | /// .unwrap(); 140 | /// 141 | /// println!("Found {} update files from last 24 hours", recent_updates.len()); 142 | /// for item in recent_updates.iter().take(5) { 143 | /// println!("Update: {} from {} at {}", 144 | /// item.collector_id, 145 | /// item.ts_start.format("%Y-%m-%d %H:%M:%S"), 146 | /// item.url); 147 | /// } 148 | /// ``` 149 | /// 150 | /// Chain with diverse collectors for comprehensive recent data: 151 | /// ```no_run 152 | /// use bgpkit_broker::BgpkitBroker; 153 | /// 154 | /// let broker = BgpkitBroker::new(); 155 | /// let diverse_collectors = broker.most_diverse_collectors(5, None).unwrap(); 156 | /// 157 | /// let comprehensive_updates = broker 158 | /// .collector_id(diverse_collectors.join(",")) 159 | /// .recent_updates(6) 160 | /// .unwrap(); 161 | /// 162 | /// println!("Got {} updates from {} diverse collectors", 163 | /// comprehensive_updates.len(), diverse_collectors.len()); 164 | /// ``` 165 | pub fn recent_updates(&self, hours: u32) -> Result, BrokerError> { 166 | let now = Utc::now(); 167 | let hours_ago = now - chrono::Duration::hours(hours as i64); 168 | 169 | // Clone the broker and add time range and updates filter 170 | let updates_broker = self 171 | .clone() 172 | .data_type("updates") 173 | .ts_start(hours_ago.format("%Y-%m-%dT%H:%M:%SZ").to_string()) 174 | .ts_end(now.format("%Y-%m-%dT%H:%M:%SZ").to_string()); 175 | 176 | updates_broker.query() 177 | } 178 | 179 | /// Get the N collectors with the most diverse peer ASNs from full-feed peers. 180 | /// 181 | /// This method queries collectors' peer data, filters for full-feed peers only, 182 | /// and uses a greedy algorithm to find the combination of collectors that covers 183 | /// the most unique ASNs with minimal overlap. Optionally filters by project. 184 | /// 185 | /// # Arguments 186 | /// 187 | /// * `n` - Maximum number of collectors to return 188 | /// * `project` - Optional project filter: "riperis" or "routeviews" 189 | /// 190 | /// # Returns 191 | /// 192 | /// A vector of collector IDs optimized for maximum ASN diversity. 193 | /// If fewer than N collectors exist, returns all available collectors. 194 | /// 195 | /// # Examples 196 | /// 197 | /// Find diverse collectors across all projects and use them for daily RIB analysis: 198 | /// ```no_run 199 | /// use bgpkit_broker::BgpkitBroker; 200 | /// 201 | /// let broker = BgpkitBroker::new() 202 | /// .ts_start("2024-01-01") 203 | /// .ts_end("2024-01-02"); 204 | /// 205 | /// let diverse_collectors = broker.most_diverse_collectors(5, None).unwrap(); 206 | /// let collector_list = diverse_collectors.join(","); 207 | /// 208 | /// let daily_ribs = broker 209 | /// .clone() 210 | /// .collector_id(collector_list) 211 | /// .daily_ribs() 212 | /// .unwrap(); 213 | /// 214 | /// println!("Found {} daily RIBs from {} diverse collectors", 215 | /// daily_ribs.len(), diverse_collectors.len()); 216 | /// ``` 217 | /// 218 | /// Get diverse RouteViews collectors for recent update analysis: 219 | /// ```no_run 220 | /// use bgpkit_broker::BgpkitBroker; 221 | /// 222 | /// let broker = BgpkitBroker::new(); 223 | /// let rv_collectors = broker.most_diverse_collectors(3, Some("routeviews")).unwrap(); 224 | /// 225 | /// let recent_updates = broker 226 | /// .clone() 227 | /// .collector_id(rv_collectors.join(",")) 228 | /// .recent_updates(6) 229 | /// .unwrap(); 230 | /// ``` 231 | /// 232 | /// Compare diversity between RIPE RIS and RouteViews: 233 | /// ```no_run 234 | /// use bgpkit_broker::BgpkitBroker; 235 | /// 236 | /// let broker = BgpkitBroker::new(); 237 | /// let ripe_collectors = broker.most_diverse_collectors(3, Some("riperis")).unwrap(); 238 | /// let rv_collectors = broker.most_diverse_collectors(3, Some("routeviews")).unwrap(); 239 | /// 240 | /// println!("RIPE diverse collectors: {:?}", ripe_collectors); 241 | /// println!("RouteViews diverse collectors: {:?}", rv_collectors); 242 | /// ``` 243 | pub fn most_diverse_collectors( 244 | &self, 245 | n: usize, 246 | project: Option<&str>, 247 | ) -> Result, BrokerError> { 248 | // Get all full-feed peers, optionally filtered by project 249 | let mut full_feed_broker = self.clone().peers_only_full_feed(true); 250 | if let Some(proj) = project { 251 | full_feed_broker = full_feed_broker.project(proj); 252 | } 253 | let peers = full_feed_broker.get_peers()?; 254 | 255 | // Group ASNs by collector 256 | let mut collector_asn_sets: HashMap> = 257 | HashMap::new(); 258 | 259 | for peer in peers { 260 | collector_asn_sets 261 | .entry(peer.collector.clone()) 262 | .or_default() 263 | .insert(peer.asn); 264 | } 265 | 266 | if collector_asn_sets.is_empty() || n == 0 { 267 | return Ok(Vec::new()); 268 | } 269 | 270 | // Greedy algorithm: select collectors that maximize unique ASN coverage 271 | let mut selected_collectors = Vec::new(); 272 | let mut covered_asns = std::collections::HashSet::new(); 273 | let mut remaining_collectors = collector_asn_sets; 274 | 275 | for _ in 0..n { 276 | if remaining_collectors.is_empty() { 277 | break; 278 | } 279 | 280 | // Find collector that adds the most new ASNs 281 | let best_collector = remaining_collectors 282 | .iter() 283 | .max_by_key(|(_, asns)| asns.difference(&covered_asns).count()) 284 | .map(|(collector, _)| collector.clone()); 285 | 286 | if let Some(collector) = best_collector { 287 | if let Some(asns) = remaining_collectors.remove(&collector) { 288 | // Add new ASNs to covered set 289 | covered_asns.extend(&asns); 290 | selected_collectors.push(collector); 291 | } 292 | } else { 293 | break; 294 | } 295 | } 296 | 297 | Ok(selected_collectors) 298 | } 299 | 300 | /// Get the MRT files needed to construct routing table snapshots at a specific timestamp. 301 | /// 302 | /// This function finds the RIB dump and updates files needed to reconstruct the routing 303 | /// table state at the given timestamp for each specified collector. For each collector, 304 | /// it finds: 305 | /// - The closest RIB dump file before or at the target timestamp 306 | /// - All updates files between the RIB dump timestamp and the target timestamp 307 | /// 308 | /// This is useful for applications that need to reconstruct the exact routing table 309 | /// state at a specific point in time by replaying updates on top of a RIB snapshot. 310 | /// 311 | /// # Arguments 312 | /// 313 | /// * `collector_ids` - Array of collector IDs to get snapshot files for (e.g., `["route-views2", "rrc00"]`) 314 | /// * `timestamp` - Target timestamp for the routing table snapshot. Supports multiple formats: 315 | /// - Unix timestamp: `"1640995200"` 316 | /// - RFC3339: `"2022-01-01T12:00:00Z"` 317 | /// - Date with time: `"2022-01-01 12:00:00"` 318 | /// - Pure date: `"2022-01-01"` (uses start of day) 319 | /// 320 | /// # Returns 321 | /// 322 | /// A vector of [`SnapshotFiles`] structs, one for each collector that has available data. 323 | /// Collectors without a suitable RIB dump before the target timestamp are excluded. 324 | /// 325 | /// # Examples 326 | /// 327 | /// ## Basic usage 328 | /// 329 | /// ```no_run 330 | /// use bgpkit_broker::BgpkitBroker; 331 | /// 332 | /// let broker = BgpkitBroker::new(); 333 | /// let snapshots = broker.get_snapshot_files( 334 | /// &["route-views2", "rrc00"], 335 | /// "2024-01-01T12:00:00Z" 336 | /// ).unwrap(); 337 | /// 338 | /// for snapshot in snapshots { 339 | /// println!("Collector: {}", snapshot.collector_id); 340 | /// println!("RIB URL: {}", snapshot.rib_url); 341 | /// println!("Updates to apply: {}", snapshot.updates_urls.len()); 342 | /// for url in &snapshot.updates_urls { 343 | /// println!(" - {}", url); 344 | /// } 345 | /// } 346 | /// ``` 347 | /// 348 | /// ## Using with bgpkit-parser for routing table reconstruction 349 | /// 350 | /// ```no_run 351 | /// use bgpkit_broker::BgpkitBroker; 352 | /// 353 | /// let broker = BgpkitBroker::new(); 354 | /// let snapshots = broker.get_snapshot_files( 355 | /// &["route-views2"], 356 | /// "2024-01-01T06:30:00Z" 357 | /// ).unwrap(); 358 | /// 359 | /// if let Some(snapshot) = snapshots.first() { 360 | /// // 1. Parse the RIB dump to get initial routing table 361 | /// println!("Load RIB from: {}", snapshot.rib_url); 362 | /// 363 | /// // 2. Apply updates in order to reach target timestamp 364 | /// for update_url in &snapshot.updates_urls { 365 | /// println!("Apply updates from: {}", update_url); 366 | /// } 367 | /// } 368 | /// ``` 369 | /// 370 | /// ## Get snapshot for a specific time using different timestamp formats 371 | /// 372 | /// ```no_run 373 | /// use bgpkit_broker::BgpkitBroker; 374 | /// 375 | /// let broker = BgpkitBroker::new(); 376 | /// 377 | /// // Using Unix timestamp 378 | /// let snapshots = broker.get_snapshot_files(&["rrc00"], "1704110400").unwrap(); 379 | /// 380 | /// // Using pure date (midnight) 381 | /// let snapshots = broker.get_snapshot_files(&["rrc00"], "2024-01-01").unwrap(); 382 | /// 383 | /// // Using RFC3339 format 384 | /// let snapshots = broker.get_snapshot_files(&["rrc00"], "2024-01-01T12:00:00Z").unwrap(); 385 | /// ``` 386 | pub fn get_snapshot_files, T: Display>( 387 | &self, 388 | collector_ids: &[S], 389 | timestamp: T, 390 | ) -> Result, BrokerError> { 391 | // Parse and validate the target timestamp 392 | let target_ts = Self::parse_timestamp(×tamp.to_string())?; 393 | 394 | // We need to search for RIB files that could be before the target timestamp. 395 | // RIB dumps typically happen every 2 hours (RouteViews) or every 8 hours (RIPE RIS). 396 | // To be safe, we search up to 24 hours before the target timestamp for RIB files. 397 | let search_start = target_ts - chrono::Duration::hours(24); 398 | 399 | let mut results = Vec::new(); 400 | 401 | for collector_id in collector_ids { 402 | let collector_id_str = collector_id.as_ref(); 403 | 404 | // Query for RIB files from search_start to target_ts 405 | let rib_items = self 406 | .clone() 407 | .collector_id(collector_id_str) 408 | .data_type("rib") 409 | .ts_start(search_start.format("%Y-%m-%dT%H:%M:%SZ").to_string()) 410 | .ts_end(target_ts.format("%Y-%m-%dT%H:%M:%SZ").to_string()) 411 | .query()?; 412 | 413 | // Find the closest RIB dump at or before the target timestamp 414 | let closest_rib = rib_items 415 | .into_iter() 416 | .filter(|item| { 417 | let item_ts = DateTime::::from_naive_utc_and_offset(item.ts_start, Utc); 418 | item_ts <= target_ts 419 | }) 420 | .max_by_key(|item| item.ts_start); 421 | 422 | let Some(rib_item) = closest_rib else { 423 | // No RIB dump found for this collector, skip it 424 | continue; 425 | }; 426 | 427 | // Query for updates files between the RIB timestamp and target timestamp 428 | let rib_ts = DateTime::::from_naive_utc_and_offset(rib_item.ts_start, Utc); 429 | 430 | let updates_items = self 431 | .clone() 432 | .collector_id(collector_id_str) 433 | .data_type("updates") 434 | .ts_start(rib_ts.format("%Y-%m-%dT%H:%M:%SZ").to_string()) 435 | .ts_end(target_ts.format("%Y-%m-%dT%H:%M:%SZ").to_string()) 436 | .query()?; 437 | 438 | // Filter updates that start after the RIB and end at or before the target timestamp 439 | // Sort by timestamp to ensure chronological order 440 | let mut filtered_updates: Vec = updates_items 441 | .into_iter() 442 | .filter(|item| { 443 | let item_start = DateTime::::from_naive_utc_and_offset(item.ts_start, Utc); 444 | let item_end = DateTime::::from_naive_utc_and_offset(item.ts_end, Utc); 445 | // Updates file must start after or at the RIB time 446 | // and end at or before the target timestamp 447 | item_start >= rib_ts && item_end <= target_ts 448 | }) 449 | .collect(); 450 | 451 | filtered_updates.sort_by_key(|item| item.ts_start); 452 | 453 | let updates_urls: Vec = 454 | filtered_updates.into_iter().map(|item| item.url).collect(); 455 | 456 | results.push(SnapshotFiles { 457 | collector_id: collector_id_str.to_string(), 458 | rib_url: rib_item.url, 459 | updates_urls, 460 | }); 461 | } 462 | 463 | Ok(results) 464 | } 465 | } 466 | 467 | #[cfg(test)] 468 | mod tests { 469 | use super::*; 470 | 471 | #[test] 472 | fn test_daily_ribs() { 473 | let broker = BgpkitBroker::new() 474 | .ts_start("1634693400") // 2021-10-20 00:00:00 475 | .ts_end("1634693400"); 476 | 477 | let result = broker.daily_ribs(); 478 | assert!(result.is_ok()); 479 | 480 | let daily_ribs = result.unwrap(); 481 | // All returned items should be RIBs captured at midnight 482 | for item in &daily_ribs { 483 | assert!(item.is_rib()); 484 | assert_eq!(item.ts_start.hour(), 0); 485 | assert_eq!(item.ts_start.minute(), 0); 486 | } 487 | } 488 | 489 | #[test] 490 | fn test_recent_updates() { 491 | use chrono::{Duration, Utc}; 492 | let broker = BgpkitBroker::new(); 493 | 494 | // Test that the recent_updates method constructs the correct query parameters 495 | // instead of actually executing the slow query 496 | let now = Utc::now(); 497 | let hours_ago = now - Duration::hours(24); 498 | 499 | let updates_broker = broker 500 | .clone() 501 | .data_type("updates") 502 | .ts_start(hours_ago.format("%Y-%m-%dT%H:%M:%SZ").to_string()) 503 | .ts_end(now.format("%Y-%m-%dT%H:%M:%SZ").to_string()); 504 | 505 | // Verify the parameters are set correctly 506 | assert_eq!( 507 | updates_broker.query_params.data_type, 508 | Some("updates".to_string()) 509 | ); 510 | assert!(updates_broker.query_params.ts_start.is_some()); 511 | assert!(updates_broker.query_params.ts_end.is_some()); 512 | 513 | // Verify configuration validation passes 514 | let validation_result = updates_broker.validate_configuration(); 515 | assert!(validation_result.is_ok()); 516 | } 517 | 518 | #[test] 519 | fn test_most_diverse_collectors() { 520 | let broker = BgpkitBroker::new(); 521 | let result = broker.most_diverse_collectors(5, None); 522 | assert!(result.is_ok()); 523 | 524 | let collectors = result.unwrap(); 525 | assert!(!collectors.is_empty()); 526 | assert!(collectors.len() <= 5); 527 | 528 | // Should not contain duplicates 529 | let unique_collectors: std::collections::HashSet<_> = collectors.iter().collect(); 530 | assert_eq!(unique_collectors.len(), collectors.len()); 531 | } 532 | 533 | #[test] 534 | fn test_most_diverse_collectors_zero() { 535 | let broker = BgpkitBroker::new(); 536 | let result = broker.most_diverse_collectors(0, None); 537 | assert!(result.is_ok()); 538 | assert_eq!(result.unwrap().len(), 0); 539 | } 540 | 541 | #[test] 542 | fn test_most_diverse_collectors_project_filter() { 543 | let broker = BgpkitBroker::new(); 544 | 545 | // Test with routeviews filter 546 | let rv_result = broker.most_diverse_collectors(3, Some("routeviews")); 547 | assert!(rv_result.is_ok()); 548 | 549 | // Test with riperis filter 550 | let ripe_result = broker.most_diverse_collectors(3, Some("riperis")); 551 | assert!(ripe_result.is_ok()); 552 | 553 | // Results should not contain duplicates 554 | let rv_collectors = rv_result.unwrap(); 555 | let ripe_collectors = ripe_result.unwrap(); 556 | 557 | if !rv_collectors.is_empty() { 558 | let unique_rv: std::collections::HashSet<_> = rv_collectors.iter().collect(); 559 | assert_eq!(unique_rv.len(), rv_collectors.len()); 560 | } 561 | 562 | if !ripe_collectors.is_empty() { 563 | let unique_ripe: std::collections::HashSet<_> = ripe_collectors.iter().collect(); 564 | assert_eq!(unique_ripe.len(), ripe_collectors.len()); 565 | } 566 | } 567 | 568 | #[test] 569 | fn test_get_snapshot_files() { 570 | let broker = BgpkitBroker::new(); 571 | 572 | // Test with a known timestamp (2021-10-20 04:00:00 UTC) 573 | // This should find a RIB dump at 02:00:00 and updates between 02:00 and 04:00 574 | let result = broker.get_snapshot_files(&["route-views2"], "2021-10-20T04:00:00Z"); 575 | assert!(result.is_ok()); 576 | 577 | let snapshots = result.unwrap(); 578 | // Should have at least one snapshot (if data is available) 579 | if !snapshots.is_empty() { 580 | let snapshot = &snapshots[0]; 581 | assert_eq!(snapshot.collector_id, "route-views2"); 582 | assert!(!snapshot.rib_url.is_empty()); 583 | // Updates URLs should be in chronological order 584 | assert!(snapshot.updates_urls.iter().all(|url| !url.is_empty())); 585 | } 586 | } 587 | 588 | #[test] 589 | fn test_get_snapshot_files_multiple_collectors() { 590 | let broker = BgpkitBroker::new(); 591 | 592 | // Test with multiple collectors 593 | let result = broker.get_snapshot_files(&["route-views2", "rrc00"], "2021-10-20T04:00:00Z"); 594 | assert!(result.is_ok()); 595 | 596 | let snapshots = result.unwrap(); 597 | // Check that collector IDs are unique 598 | let collector_ids: std::collections::HashSet<_> = 599 | snapshots.iter().map(|s| &s.collector_id).collect(); 600 | assert_eq!(collector_ids.len(), snapshots.len()); 601 | } 602 | 603 | #[test] 604 | fn test_get_snapshot_files_invalid_timestamp() { 605 | let broker = BgpkitBroker::new(); 606 | 607 | // Test with an invalid timestamp 608 | let result = broker.get_snapshot_files(&["route-views2"], "invalid-timestamp"); 609 | assert!(result.is_err()); 610 | assert!(matches!( 611 | result.err(), 612 | Some(BrokerError::ConfigurationError(_)) 613 | )); 614 | } 615 | 616 | #[test] 617 | fn test_get_snapshot_files_empty_collectors() { 618 | let broker = BgpkitBroker::new(); 619 | 620 | // Test with empty collectors array 621 | let empty: &[&str] = &[]; 622 | let result = broker.get_snapshot_files(empty, "2021-10-20T04:00:00Z"); 623 | assert!(result.is_ok()); 624 | assert!(result.unwrap().is_empty()); 625 | } 626 | 627 | #[test] 628 | fn test_snapshot_files_display() { 629 | let snapshot = SnapshotFiles { 630 | collector_id: "route-views2".to_string(), 631 | rib_url: "http://example.com/rib.bz2".to_string(), 632 | updates_urls: vec![ 633 | "http://example.com/updates1.bz2".to_string(), 634 | "http://example.com/updates2.bz2".to_string(), 635 | ], 636 | }; 637 | 638 | let display = format!("{}", snapshot); 639 | assert!(display.contains("route-views2")); 640 | assert!(display.contains("http://example.com/rib.bz2")); 641 | assert!(display.contains("updates_count: 2")); 642 | } 643 | } 644 | -------------------------------------------------------------------------------- /src/db/mod.rs: -------------------------------------------------------------------------------- 1 | mod latest_files; 2 | mod meta; 3 | mod utils; 4 | 5 | use crate::db::utils::infer_url; 6 | use crate::query::{BrokerCollector, BrokerItemType}; 7 | use crate::{BrokerError, BrokerItem, Collector}; 8 | use chrono::{DateTime, Duration, NaiveDateTime}; 9 | use sqlx::sqlite::SqliteRow; 10 | use sqlx::Row; 11 | use sqlx::SqlitePool; 12 | use sqlx::{migrate::MigrateDatabase, Sqlite}; 13 | use std::collections::HashMap; 14 | use tracing::{debug, error, info}; 15 | 16 | pub use meta::UpdatesMeta; 17 | 18 | pub const DEFAULT_PAGE_SIZE: usize = 100; 19 | 20 | #[derive(Clone)] 21 | pub struct LocalBrokerDb { 22 | /// shared connection pool for reading and writing 23 | conn_pool: SqlitePool, 24 | collectors: Vec, 25 | types: Vec, 26 | } 27 | 28 | pub struct DbSearchResult { 29 | pub items: Vec, 30 | pub page: usize, 31 | pub page_size: usize, 32 | pub total: usize, 33 | } 34 | 35 | fn get_ts_start_clause(ts: i64) -> String { 36 | format!( 37 | r#" 38 | ( 39 | (project_name='ripe-ris' AND type='updates' AND timestamp > {} - {}) 40 | OR (project_name='route-views' AND type='updates' AND timestamp > {} - {}) 41 | OR (type='rib' AND timestamp >= {}) 42 | ) 43 | "#, 44 | ts, 45 | 5 * 60, 46 | ts, 47 | 15 * 60, 48 | ts 49 | ) 50 | } 51 | 52 | fn get_ts_end_clause(ts: i64) -> String { 53 | format!("timestamp < {}", ts) 54 | } 55 | 56 | impl LocalBrokerDb { 57 | pub async fn new(path: &str) -> Result { 58 | info!("open local broker db at {}", path); 59 | 60 | if !Sqlite::database_exists(path).await? { 61 | match Sqlite::create_database(path).await { 62 | Ok(_) => info!("Created db at {}", path), 63 | Err(error) => panic!("error: {}", error), 64 | } 65 | } 66 | let conn_pool = SqlitePool::connect(path).await?; 67 | 68 | let mut db = LocalBrokerDb { 69 | conn_pool, 70 | collectors: vec![], 71 | types: vec![], 72 | }; 73 | db.initialize().await?; 74 | 75 | Ok(db) 76 | } 77 | 78 | async fn initialize(&mut self) -> Result<(), BrokerError> { 79 | sqlx::query( 80 | r#" 81 | CREATE TABLE IF NOT EXISTS meta( 82 | update_ts INTEGER, 83 | update_duration INTEGER, 84 | insert_count INTEGER 85 | ); 86 | 87 | CREATE TABLE IF NOT EXISTS collectors ( 88 | id INTEGER PRIMARY KEY, 89 | name TEXT, 90 | url TEXT, 91 | project TEXT, 92 | updates_interval INTEGER 93 | ); 94 | 95 | CREATE TABLE IF NOT EXISTS types ( 96 | id INTEGER PRIMARY KEY, 97 | name TEXT 98 | ); 99 | 100 | CREATE TABLE IF NOT EXISTS files( 101 | timestamp INTEGER, 102 | collector_id INTEGER, 103 | type_id INTEGER, 104 | rough_size INTEGER, 105 | exact_size INTEGER, 106 | constraint files_unique_pk 107 | unique (timestamp, collector_id, type_id) 108 | ); 109 | 110 | CREATE TABLE IF NOT EXISTS latest( 111 | timestamp INTEGER, 112 | collector_name TEXT, 113 | type TEXT, 114 | rough_size INTEGER, 115 | exact_size INTEGER, 116 | constraint latest_unique_pk 117 | unique (collector_name, type) 118 | ); 119 | 120 | CREATE INDEX IF NOT EXISTS idx_files_timestamp 121 | ON files(timestamp); 122 | 123 | CREATE INDEX IF NOT EXISTS idx_meta_update_ts 124 | ON meta(update_ts); 125 | 126 | CREATE VIEW IF NOT EXISTS files_view AS 127 | SELECT 128 | i.timestamp, i.rough_size, i.exact_size, 129 | t.name AS type, 130 | c.name AS collector_name, 131 | c.url AS collector_url, 132 | c.project AS project_name, 133 | c.updates_interval AS updates_interval 134 | FROM collectors c 135 | JOIN files i ON c.id = i.collector_id 136 | JOIN types t ON t.id = i.type_id; 137 | 138 | PRAGMA journal_mode=WAL; 139 | "#, 140 | ) 141 | .execute(&self.conn_pool) 142 | .await?; 143 | 144 | self.reload_collectors().await; 145 | self.types = sqlx::query("select id, name from types") 146 | .map(|row: SqliteRow| BrokerItemType { 147 | id: row.get::("id"), 148 | name: row.get::("name"), 149 | }) 150 | .fetch_all(&self.conn_pool) 151 | .await?; 152 | 153 | Ok(()) 154 | } 155 | 156 | pub async fn reload_collectors(&mut self) { 157 | match sqlx::query("select id, name, url, project, updates_interval from collectors") 158 | .map(|row: SqliteRow| BrokerCollector { 159 | id: row.get::("id"), 160 | name: row.get::("name"), 161 | url: row.get::("url"), 162 | project: row.get::("project"), 163 | updates_interval: row.get::("updates_interval"), 164 | }) 165 | .fetch_all(&self.conn_pool) 166 | .await 167 | { 168 | Ok(collectors) => self.collectors = collectors, 169 | Err(e) => { 170 | error!("failed to reload collectors: {}", e); 171 | } 172 | } 173 | } 174 | 175 | async fn force_checkpoint(&self) { 176 | if let Err(e) = sqlx::query("PRAGMA wal_checkpoint(TRUNCATE);") 177 | .execute(&self.conn_pool) 178 | .await 179 | { 180 | error!("failed to force checkpoint: {}", e); 181 | } 182 | } 183 | 184 | #[allow(clippy::too_many_arguments)] 185 | pub async fn search( 186 | &self, 187 | collectors: Option>, 188 | project: Option, 189 | data_type: Option, 190 | ts_start: Option, 191 | ts_end: Option, 192 | page: Option, 193 | page_size: Option, 194 | ) -> Result { 195 | let mut where_clauses: Vec = vec![]; 196 | if let Some(collectors) = collectors { 197 | if !collectors.is_empty() { 198 | let collectors_array_str = collectors 199 | .into_iter() 200 | .map(|c| format!("'{}'", c)) 201 | .collect::>() 202 | .join(","); 203 | where_clauses.push(format!("collector_name IN ({})", collectors_array_str)); 204 | } 205 | } 206 | if let Some(project) = project { 207 | match project.to_lowercase().as_str() { 208 | "ris" | "riperis" | "ripe-ris" => { 209 | where_clauses.push("project_name='ripe-ris'".to_string()); 210 | } 211 | "routeviews" | "rv" | "route-views" => { 212 | where_clauses.push("project_name='route-views'".to_string()); 213 | } 214 | _ => { 215 | return Err(BrokerError::BrokerError(format!( 216 | "Unknown project: {}", 217 | project 218 | ))); 219 | } 220 | } 221 | } 222 | if let Some(data_type) = data_type { 223 | match data_type.as_str() { 224 | "updates" | "update" | "u" => { 225 | where_clauses.push("type = 'updates'".to_string()); 226 | } 227 | "rib" | "ribs" | "r" => { 228 | where_clauses.push("type = 'rib'".to_string()); 229 | } 230 | _ => { 231 | return Err(BrokerError::BrokerError(format!( 232 | "Unknown data_type: {}", 233 | data_type 234 | ))); 235 | } 236 | } 237 | } 238 | 239 | match (ts_start, ts_end) { 240 | (Some(ts_start), None) => { 241 | where_clauses.push(get_ts_start_clause(ts_start.and_utc().timestamp())); 242 | } 243 | (None, Some(ts_end)) => { 244 | where_clauses.push(get_ts_end_clause(ts_end.and_utc().timestamp())); 245 | } 246 | (Some(ts_start), Some(ts_end)) => { 247 | let start = ts_start; 248 | let end = match ts_start == ts_end { 249 | true => { 250 | // making sure when searching with the same timestamp, we always include the given timestamp 251 | ts_start + Duration::seconds(1) 252 | } 253 | false => ts_end, 254 | }; 255 | where_clauses.push(get_ts_start_clause(start.and_utc().timestamp())); 256 | where_clauses.push(get_ts_end_clause(end.and_utc().timestamp())); 257 | } 258 | (None, None) => {} 259 | } 260 | 261 | // page starting from 1 262 | let (limit, offset) = match (page, page_size) { 263 | (Some(page), Some(page_size)) => (page_size, page_size * (page - 1)), 264 | (Some(page), None) => (DEFAULT_PAGE_SIZE, DEFAULT_PAGE_SIZE * (page - 1)), 265 | (None, Some(page_size)) => (page_size, 0), 266 | (None, None) => (0, 0), 267 | }; 268 | 269 | let limit_clause = match limit { 270 | 0 => "".to_string(), 271 | _ => format!("LIMIT {} OFFSET {}", limit, offset), 272 | }; 273 | 274 | // Build the WHERE clause string once to use in both queries 275 | let where_clause_str = match where_clauses.len() { 276 | 0 => "".to_string(), 277 | _ => format!("WHERE {}", where_clauses.join(" AND ")), 278 | }; 279 | 280 | // First query: Get total count 281 | let count_query = format!( 282 | "SELECT COUNT(*) as total FROM files_view {}", 283 | where_clause_str 284 | ); 285 | debug!("Count query: {}", count_query.as_str()); 286 | 287 | let total_count = sqlx::query(count_query.as_str()) 288 | .map(|row: SqliteRow| row.get::("total") as usize) 289 | .fetch_one(&self.conn_pool) 290 | .await?; 291 | 292 | // Second query: Get paginated results 293 | let query_string = format!( 294 | r#" 295 | SELECT collector_name, collector_url, project_name, timestamp, type, rough_size, exact_size, updates_interval 296 | FROM files_view 297 | {} 298 | ORDER BY timestamp ASC, type, collector_name 299 | {} 300 | "#, 301 | where_clause_str, limit_clause, 302 | ); 303 | debug!("Data query: {}", query_string.as_str()); 304 | 305 | let collector_name_to_info = self 306 | .collectors 307 | .iter() 308 | .map(|c| (c.name.clone(), c.clone())) 309 | .collect::>(); 310 | 311 | let items: Vec> = sqlx::query(query_string.as_str()) 312 | .map(|row: SqliteRow| { 313 | let collector_name = row.get::("collector_name"); 314 | let _collector_url = row.get::("collector_url"); 315 | let _project_name = row.get::("project_name"); 316 | let timestamp = row.get::("timestamp"); 317 | let type_name = row.get::("type"); 318 | let rough_size = row.get::("rough_size"); 319 | let exact_size = row.get::("exact_size"); 320 | let _updates_interval = row.get::("updates_interval"); 321 | 322 | let collector = collector_name_to_info.get(collector_name.as_str())?; 323 | 324 | let ts_start = DateTime::from_timestamp(timestamp, 0)?.naive_utc(); 325 | 326 | let (url, ts_end) = infer_url(collector, &ts_start, type_name.as_str() == "rib"); 327 | Some(BrokerItem { 328 | ts_start, 329 | ts_end, 330 | collector_id: collector_name, 331 | data_type: type_name, 332 | url, 333 | rough_size, 334 | exact_size, 335 | }) 336 | }) 337 | .fetch_all(&self.conn_pool) 338 | .await?; 339 | 340 | Ok(DbSearchResult { 341 | items: items.into_iter().flatten().collect(), 342 | page: page.unwrap_or(1), 343 | page_size: page_size.unwrap_or(DEFAULT_PAGE_SIZE), 344 | total: total_count, 345 | }) 346 | } 347 | 348 | /// Runs the SQLite `ANALYZE` command on the database connection pool. 349 | /// 350 | /// This method updates SQLite's internal statistics used for query planning, 351 | /// helping to optimize database query performance. 352 | /// 353 | /// # Returns 354 | /// 355 | /// * `Ok(())` - If the analysis operation executed successfully. 356 | /// * `Err(BrokerError)` - If an error occurred during the execution of the analysis command. 357 | pub async fn analyze(&self) -> Result<(), BrokerError> { 358 | info!("doing sqlite3 analyze..."); 359 | sqlx::query("ANALYZE").execute(&self.conn_pool).await?; 360 | info!("doing sqlite3 analyze...done"); 361 | Ok(()) 362 | } 363 | 364 | /// Inserts a batch of items into the "files" table. 365 | /// 366 | /// # Arguments 367 | /// 368 | /// * `items` - A reference to a vector of `BrokerItem` structs to be inserted. 369 | /// * `update_latest` - A boolean value indicating whether to update the latest files. 370 | /// 371 | /// # Returns 372 | /// 373 | /// Returns a `Result` containing a vector of inserted `BrokerItem` structs or a `BrokerError`. 374 | pub async fn insert_items( 375 | &self, 376 | items: &[BrokerItem], 377 | update_latest: bool, 378 | ) -> Result, BrokerError> { 379 | // 1. fetch all collectors, get collector name-to-id mapping 380 | let collector_name_to_id = self 381 | .collectors 382 | .iter() 383 | .map(|c| (c.name.clone(), c.id)) 384 | .collect::>(); 385 | let collector_id_to_info = self 386 | .collectors 387 | .iter() 388 | .map(|c| (c.id, c.clone())) 389 | .collect::>(); 390 | 391 | // 2. fetch all types, get file type name-to-id mapping 392 | let type_name_to_id = self 393 | .types 394 | .iter() 395 | .map(|t| (t.name.clone(), t.id)) 396 | .collect::>(); 397 | let type_id_to_name = self 398 | .types 399 | .iter() 400 | .map(|t| (t.id, t.name.clone())) 401 | .collect::>(); 402 | 403 | // 3. batch insert into "files" table 404 | debug!("Inserting {} items...", items.len()); 405 | let mut inserted: Vec = vec![]; 406 | for batch in items.chunks(1000) { 407 | let values_str = batch 408 | .iter() 409 | .filter_map(|item| { 410 | let collector_id = match collector_name_to_id.get(item.collector_id.as_str()) { 411 | Some(id) => *id, 412 | None => { 413 | error!( 414 | "Collector name to id mapping {} not found", 415 | item.collector_id 416 | ); 417 | return None; 418 | } 419 | }; 420 | let type_id = match type_name_to_id.get(item.data_type.as_str()) { 421 | Some(id) => *id, 422 | None => { 423 | error!("Type name to id mapping {} not found", item.data_type); 424 | return None; 425 | } 426 | }; 427 | Some(format!( 428 | "({}, {}, {}, {}, {})", 429 | item.ts_start.and_utc().timestamp(), 430 | collector_id, 431 | type_id, 432 | item.rough_size, 433 | item.exact_size, 434 | )) 435 | }) 436 | .collect::>() 437 | .join(", "); 438 | if values_str.is_empty() { 439 | continue; 440 | } 441 | let inserted_rows: Vec> = sqlx::query( 442 | format!( 443 | r#"INSERT OR IGNORE INTO files (timestamp, collector_id, type_id, rough_size, exact_size) VALUES {} 444 | RETURNING timestamp, collector_id, type_id, rough_size, exact_size 445 | "#, 446 | values_str 447 | ).as_str() 448 | ).map(|row: SqliteRow|{ 449 | let timestamp = row.get::(0); 450 | let collector_id = row.get::(1); 451 | let type_id = row.get::(2); 452 | let rough_size = row.get::(3); 453 | let exact_size = row.get::(4); 454 | 455 | let collector = collector_id_to_info.get(&collector_id)?; 456 | let type_name = type_id_to_name.get(&type_id)?.to_owned(); 457 | let is_rib = type_name.as_str() == "rib"; 458 | 459 | let ts_start = DateTime::from_timestamp(timestamp, 0)?.naive_utc(); 460 | let (url, ts_end) = infer_url( 461 | collector, 462 | &ts_start, 463 | is_rib, 464 | ); 465 | 466 | Some(BrokerItem{ 467 | ts_start, 468 | ts_end, 469 | collector_id: collector.name.clone(), 470 | data_type: type_name, 471 | url, 472 | rough_size, 473 | exact_size, 474 | }) 475 | }).fetch_all(&self.conn_pool).await?; 476 | inserted.extend(inserted_rows.into_iter().flatten()); 477 | } 478 | debug!("Inserted {} items", inserted.len()); 479 | if update_latest { 480 | self.update_latest_files(&inserted, false).await; 481 | } 482 | 483 | self.force_checkpoint().await; 484 | Ok(inserted) 485 | } 486 | 487 | pub async fn insert_collector(&self, collector: &Collector) -> Result<(), BrokerError> { 488 | let count = sqlx::query( 489 | r#" 490 | SELECT count(*) FROM collectors where name = ? 491 | "#, 492 | ) 493 | .bind(collector.id.as_str()) 494 | .map(|row: SqliteRow| row.get::(0)) 495 | .fetch_one(&self.conn_pool) 496 | .await?; 497 | if count > 0 { 498 | // the collector already exists 499 | return Ok(()); 500 | } 501 | 502 | let (project, interval) = match collector.project.to_lowercase().as_str() { 503 | "riperis" | "ripe-ris" => ("ripe-ris", 5 * 60), 504 | "routeviews" | "route-views" => ("route-views", 15 * 60), 505 | _ => panic!("Unknown project: {}", collector.project), 506 | }; 507 | 508 | sqlx::query( 509 | r#" 510 | INSERT INTO collectors (name, url, project, updates_interval) 511 | VALUES (?, ?, ?, ?) 512 | "#, 513 | ) 514 | .bind(collector.id.as_str()) 515 | .bind(collector.url.as_str()) 516 | .bind(project) 517 | .bind(interval) 518 | .execute(&self.conn_pool) 519 | .await?; 520 | Ok(()) 521 | } 522 | } 523 | 524 | #[cfg(test)] 525 | mod tests { 526 | use super::*; 527 | use chrono::DateTime; 528 | use std::path::PathBuf; 529 | 530 | /// Helper function to create a temporary database file path 531 | fn create_temp_db_path(test_name: &str) -> PathBuf { 532 | let mut temp_dir = std::env::temp_dir(); 533 | temp_dir.push(format!( 534 | "bgpkit_broker_test_{}_{}.sqlite3", 535 | test_name, 536 | chrono::Utc::now().timestamp_millis() 537 | )); 538 | temp_dir 539 | } 540 | 541 | /// Helper function to ensure cleanup of database files 542 | fn cleanup_db_file(path: &PathBuf) { 543 | // Remove the main database file 544 | if path.exists() { 545 | let _ = std::fs::remove_file(path); 546 | } 547 | 548 | // Remove WAL and SHM files that SQLite creates 549 | let wal_path = path.with_extension("sqlite3-wal"); 550 | if wal_path.exists() { 551 | let _ = std::fs::remove_file(wal_path); 552 | } 553 | 554 | let shm_path = path.with_extension("sqlite3-shm"); 555 | if shm_path.exists() { 556 | let _ = std::fs::remove_file(shm_path); 557 | } 558 | } 559 | 560 | #[tokio::test] 561 | async fn test() { 562 | let db_path = create_temp_db_path("test"); 563 | let db_path_str = db_path.to_str().unwrap(); 564 | 565 | let db = LocalBrokerDb::new(db_path_str).await.unwrap(); 566 | 567 | // Test basic database operations 568 | let entry_count = db.get_entry_count().await.unwrap(); 569 | assert_eq!(entry_count, 0); // New database should be empty 570 | 571 | let _latest_timestamp = db.get_latest_timestamp().await.unwrap(); 572 | // New database might return None or some default timestamp depending on SQLite behavior 573 | // The important thing is that the call succeeds 574 | 575 | // Test search with filters 576 | let result = db 577 | .search( 578 | Some(vec!["rrc21".to_string(), "route-views2".to_string()]), 579 | None, 580 | Some("rib".to_string()), 581 | Some(DateTime::from_timestamp(1672531200, 0).unwrap().naive_utc()), 582 | Some(DateTime::from_timestamp(1672617600, 0).unwrap().naive_utc()), 583 | None, 584 | None, 585 | ) 586 | .await 587 | .unwrap(); 588 | 589 | assert!(result.items.is_empty()); // No data in fresh database 590 | assert_eq!(result.total, 0); // Total should also be 0 591 | 592 | // Cleanup 593 | drop(db); 594 | cleanup_db_file(&db_path); 595 | } 596 | 597 | #[tokio::test] 598 | async fn test_get_mappings() { 599 | let db_path = create_temp_db_path("get_mappings"); 600 | let db_path_str = db_path.to_str().unwrap(); 601 | 602 | let db = LocalBrokerDb::new(db_path_str).await.unwrap(); 603 | 604 | // Verify collectors and types are loaded (should be empty in fresh database) 605 | assert!(db.collectors.is_empty()); 606 | assert!(db.types.is_empty()); 607 | 608 | // Cleanup 609 | drop(db); 610 | cleanup_db_file(&db_path); 611 | } 612 | 613 | #[tokio::test] 614 | async fn test_inserts() { 615 | let db_path = create_temp_db_path("inserts"); 616 | let db_path_str = db_path.to_str().unwrap(); 617 | 618 | let db = LocalBrokerDb::new(db_path_str).await.unwrap(); 619 | 620 | // First we need to populate collectors and types for the test data 621 | use crate::Collector; 622 | 623 | // Insert test collectors 624 | let test_collectors = vec![ 625 | Collector { 626 | id: "rrc00".to_string(), 627 | project: "riperis".to_string(), 628 | url: "https://data.ris.ripe.net/rrc00/".to_string(), 629 | }, 630 | Collector { 631 | id: "rrc01".to_string(), 632 | project: "riperis".to_string(), 633 | url: "https://data.ris.ripe.net/rrc01/".to_string(), 634 | }, 635 | Collector { 636 | id: "route-views2".to_string(), 637 | project: "routeviews".to_string(), 638 | url: "http://archive.routeviews.org/route-views2/".to_string(), 639 | }, 640 | ]; 641 | 642 | for collector in &test_collectors { 643 | db.insert_collector(collector).await.unwrap(); 644 | } 645 | 646 | // Insert test data types 647 | sqlx::query("INSERT INTO types (name) VALUES ('updates'), ('rib')") 648 | .execute(&db.conn_pool) 649 | .await 650 | .unwrap(); 651 | 652 | // Reload mappings after insertions 653 | let mut db = db; // Take ownership to call mutable method 654 | db.reload_collectors().await; 655 | db.types = sqlx::query("select id, name from types") 656 | .map(|row: SqliteRow| BrokerItemType { 657 | id: row.get::("id"), 658 | name: row.get::("name"), 659 | }) 660 | .fetch_all(&db.conn_pool) 661 | .await 662 | .unwrap(); 663 | 664 | // Now test item insertion 665 | let items = vec![ 666 | BrokerItem { 667 | ts_start: DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc(), // 2022-01-01 668 | ts_end: Default::default(), 669 | collector_id: "rrc00".to_string(), 670 | data_type: "updates".to_string(), 671 | url: "test.com".to_string(), 672 | rough_size: 1000, 673 | exact_size: 1024, 674 | }, 675 | BrokerItem { 676 | ts_start: DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc(), 677 | ts_end: Default::default(), 678 | collector_id: "rrc01".to_string(), 679 | data_type: "rib".to_string(), 680 | url: "test.com".to_string(), 681 | rough_size: 2000, 682 | exact_size: 2048, 683 | }, 684 | BrokerItem { 685 | ts_start: DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc(), 686 | ts_end: Default::default(), 687 | collector_id: "route-views2".to_string(), 688 | data_type: "updates".to_string(), 689 | url: "test.com".to_string(), 690 | rough_size: 3000, 691 | exact_size: 3072, 692 | }, 693 | ]; 694 | 695 | let inserted = db.insert_items(&items, true).await.unwrap(); 696 | assert_eq!(inserted.len(), 3); 697 | 698 | // Verify insertion worked 699 | let entry_count = db.get_entry_count().await.unwrap(); 700 | assert_eq!(entry_count, 3); 701 | 702 | // Cleanup 703 | drop(db); 704 | cleanup_db_file(&db_path); 705 | } 706 | 707 | #[tokio::test] 708 | async fn test_get_latest() { 709 | let db_path = create_temp_db_path("get_latest"); 710 | let db_path_str = db_path.to_str().unwrap(); 711 | 712 | let db = LocalBrokerDb::new(db_path_str).await.unwrap(); 713 | 714 | // Test get_latest_files on empty database 715 | let files = db.get_latest_files().await; 716 | assert!(files.is_empty()); 717 | 718 | // Cleanup 719 | drop(db); 720 | cleanup_db_file(&db_path); 721 | } 722 | 723 | #[tokio::test] 724 | async fn test_update_latest() { 725 | let db_path = create_temp_db_path("update_latest"); 726 | let db_path_str = db_path.to_str().unwrap(); 727 | 728 | let db = LocalBrokerDb::new(db_path_str).await.unwrap(); 729 | 730 | // Test update_latest_files with empty items (should not crash) 731 | db.update_latest_files(&[], false).await; 732 | 733 | let files = db.get_latest_files().await; 734 | assert!(files.is_empty()); 735 | 736 | // Cleanup 737 | drop(db); 738 | cleanup_db_file(&db_path); 739 | } 740 | } 741 | --------------------------------------------------------------------------------