├── .cargo └── config.toml ├── .github └── workflows │ ├── push.yml │ └── release.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── flow.conf ├── rustfmt.toml ├── src ├── args.rs ├── bgp │ ├── flow.rs │ ├── mod.rs │ ├── msg.rs │ ├── nlri.rs │ └── route.rs ├── integration_tests │ ├── flowspec.rs │ ├── helpers │ │ ├── bird.rs │ │ ├── cli.rs │ │ ├── exabgp.rs │ │ ├── kernel │ │ │ ├── linux.rs │ │ │ ├── mod.rs │ │ │ └── rtnl.rs │ │ └── mod.rs │ ├── kernel_linux.rs │ └── mod.rs ├── ipc.rs ├── kernel │ ├── linux │ │ ├── mod.rs │ │ └── nft.rs │ ├── mod.rs │ └── rtnl.rs ├── main.rs ├── net.rs └── util.rs └── xtask ├── Cargo.toml └── main.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [alias] 2 | xtask = "run --package xtask --" 3 | -------------------------------------------------------------------------------- /.github/workflows/push.yml: -------------------------------------------------------------------------------- 1 | name: Push actions 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | RUSTFLAGS: "-Dwarnings" 10 | 11 | jobs: 12 | rustfmt: 13 | name: Rustfmt 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: dtolnay/rust-toolchain@nightly 18 | with: 19 | components: rustfmt 20 | - name: Run Rustfmt 21 | run: cargo fmt -- --check 22 | clippy: 23 | name: Clippy 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | - uses: dtolnay/rust-toolchain@nightly 28 | with: 29 | components: clippy 30 | - name: Run Clippy 31 | run: cargo clippy 32 | stable-test: 33 | name: Run tests on stable channel 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v4 37 | - uses: dtolnay/rust-toolchain@stable 38 | - name: Install dependencies 39 | run: | 40 | sudo apt install exabgp libreadline-dev 41 | wget https://bird.network.cz/download/bird-3.0.0.tar.gz 42 | tar xf bird-3.0.0.tar.gz 43 | cd bird-3.0.0 44 | ./configure 45 | make -j$(nproc) 46 | sudo make install 47 | - name: Build 48 | run: cargo build --verbose 49 | - name: Run tests 50 | run: cargo test --verbose -- --skip integration_tests::kernel 51 | - name: Run tests with unshare 52 | run: | 53 | sudo sh -c 'echo 0 > /proc/sys/kernel/apparmor_restrict_unprivileged_userns' 54 | cargo xtask unshare test --verbose 55 | cross-test: 56 | name: Build for ${{ matrix.target }} 57 | runs-on: ubuntu-latest 58 | strategy: 59 | fail-fast: false 60 | matrix: 61 | target: 62 | - x86_64-unknown-linux-gnu 63 | - aarch64-unknown-linux-gnu 64 | - armv7-unknown-linux-gnueabihf 65 | - x86_64-unknown-linux-musl 66 | - aarch64-unknown-linux-musl 67 | - armv7-unknown-linux-musleabihf 68 | steps: 69 | - uses: actions/checkout@v4 70 | - uses: dtolnay/rust-toolchain@stable 71 | - uses: cargo-bins/cargo-binstall@main 72 | - name: Install cross-rs 73 | run: cargo binstall cross 74 | - name: Build debug binary 75 | run: cross build --target=${{ matrix.target }} --verbose 76 | - name: Run cross unit tests 77 | run: cross test --target=${{ matrix.target }} --verbose -- --skip integration_tests 78 | - name: Build release binary 79 | run: cross build --release --target=${{ matrix.target }} --verbose 80 | - name: Upload debug binary 81 | if: always() 82 | uses: actions/upload-artifact@v4 83 | with: 84 | name: flow-debug-${{ matrix.target }} 85 | path: target/${{ matrix.target }}/debug/flow 86 | - name: Upload release binary 87 | uses: actions/upload-artifact@v4 88 | with: 89 | name: flow-release-${{ matrix.target }} 90 | path: target/${{ matrix.target }}/release/flow 91 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release actions 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: write 9 | 10 | env: 11 | CARGO_TERM_COLOR: always 12 | 13 | jobs: 14 | build: 15 | name: Build release binary for ${{ matrix.target }} 16 | runs-on: ubuntu-latest 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | target: 21 | - x86_64-unknown-linux-gnu 22 | - aarch64-unknown-linux-gnu 23 | - armv7-unknown-linux-gnueabihf 24 | - x86_64-unknown-linux-musl 25 | - aarch64-unknown-linux-musl 26 | - armv7-unknown-linux-musleabihf 27 | steps: 28 | - uses: actions/checkout@v4 29 | - uses: cargo-bins/cargo-binstall@main 30 | - name: Install cross-rs 31 | run: cargo binstall cross 32 | - name: Build on ${{ matrix.target }} 33 | run: cross build --release --target=${{ matrix.target }} --verbose 34 | - name: Upload 35 | env: 36 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 37 | run: | 38 | _version="$(sed 's|refs/tags/v||' <(echo ${{ github.ref }}))" 39 | _archive="flow-${_version}-${{ matrix.target }}" 40 | mkdir "$_archive" 41 | cp target/${{ matrix.target }}/release/flow "$_archive" 42 | cp README.md LICENSE "$_archive" 43 | tar cJvf "$_archive.tar.xz" "$_archive" 44 | sha256sum "$_archive.tar.xz" > "$_archive.tar.xz.sha256" 45 | gh release upload "v$_version" "$_archive.tar.xz"{,.sha256} 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # These are backup files generated by rustfmt 7 | **/*.rs.bk 8 | 9 | # MSVC Windows builds of rustc generate these, which store debugging information 10 | *.pdb 11 | 12 | .vscode 13 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "flow" 3 | version = "0.2.0" 4 | edition = "2024" 5 | default-run = "flow" 6 | 7 | [workspace] 8 | members = [".", "xtask"] 9 | 10 | [workspace.dependencies] 11 | clap = { version = "4.5.20", features = ["derive"] } 12 | 13 | [features] 14 | __gen = ["dep:clap_complete", "dep:clap_mangen"] 15 | 16 | [dependencies] 17 | anstyle = "1.0.8" 18 | anyhow = "1.0.86" 19 | clap = { workspace = true } 20 | clap-verbosity-flag = "3.0.0" 21 | clap_complete = { version = "4.5.38", optional = true } 22 | clap_mangen = { version = "0.2.24", optional = true } 23 | either = "1.13.0" 24 | env_logger = { version = "0.11.5", default-features = false } 25 | futures = { version = "0.3.30", default-features = false, features = [ 26 | "std", 27 | "async-await", 28 | ] } 29 | itertools = "0.14.0" 30 | libc = "0.2.161" 31 | log = "0.4.21" 32 | nix = { version = "0.29.0", features = ["user"] } 33 | num-integer = "0.1.46" 34 | postcard = { version = "1.0.10", default-features = false, features = [ 35 | "use-std", 36 | ] } 37 | replace_with = "0.1.7" 38 | serde = { version = "1.0.215", features = ["derive"] } 39 | serde_json = "1.0.132" 40 | smallvec = { version = "1.13.2", features = [ 41 | "union", 42 | "const_generics", 43 | "const_new", 44 | "serde", 45 | ] } 46 | strum = { version = "0.26.2", features = ["derive"] } 47 | thiserror = "2.0.0" 48 | tokio = { version = "1.38.0", features = [ 49 | "rt", 50 | "net", 51 | "macros", 52 | "io-util", 53 | "signal", 54 | "sync", 55 | "time", 56 | ] } 57 | tokio-util = "0.7.11" 58 | 59 | [target.'cfg(target_os = "linux")'.dependencies] 60 | nftables = { version = "0.6.0", features = ["tokio"] } 61 | 62 | [target.'cfg(any(target_os = "linux", target_os = "freebsd"))'.dependencies] 63 | rtnetlink = { git = "https://github.com/hack3ric/rust-rtnetlink", branch = "for-flow" } 64 | 65 | [build-dependencies] 66 | cfg_aliases = "0.2.1" 67 | 68 | [dev-dependencies] 69 | async-tempfile = "0.6.0" 70 | macro_rules_attribute = "0.2.0" 71 | nix = { version = "0.29.0", features = ["net"] } 72 | rand = "0.9.0" 73 | test-case = "3.3.1" 74 | tokio = { version = "1.38.0", features = ["time"] } 75 | version-compare = "0.2.0" 76 | 77 | [profile.release] 78 | opt-level = 3 79 | lto = true 80 | codegen-units = 1 81 | panic = "abort" 82 | strip = true 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2024, Eric Long 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Flow 2 | 3 | Flow enables [BGP Flow Specification (flowspec)](https://www.rfc-editor.org/rfc/rfc8955.html) on Linux software routers/firewalls. It acts as a sink that receives routes from another BGP speaker and transforms flowspecs into [nftables](https://wiki.nftables.org) rules and kernel routing tables via [rtnetlink(7)](https://www.man7.org/linux/man-pages/man7/rtnetlink.7.html). 4 | 5 | It: 6 | 7 | - executes BGP flowspec on Linux, using nftables and rtnetlink; 8 | - enables what previously can only be done on commercial routers or [bulky routing software](https://frrouting.org) on lightweight Linux systems. 9 | 10 | It doesn't: 11 | 12 | - work as a full-blown BGP client; you will need another BGP implementation (likely running on the same node) like [BIRD](https://bird.network.cz), [OpenBGPD](https://www.openbgpd.org) or [GoBGP](https://osrg.github.io/gobgp/) to peer with others; 13 | - allow multiple BGP sessions, only one-on-one; 14 | - initiate BGP session actively; 15 | - currently support VRF and VPN routes. 16 | 17 | **Flow has yet to be tested thoroughly and not suitable for production for now. Use and test at your own risk!** 18 | 19 | ## Implemented RFCs/Drafts 20 | 21 | - [RFC 8955](https://www.rfc-editor.org/rfc/rfc8955.html): Dissemination of Flow Specification Rules, except: 22 | - VPNv4 flowspec routes 23 | - `rt-redirect` traffic action 24 | - Validation procedure 25 | - [RFC 8956](https://www.rfc-editor.org/rfc/rfc8956.html): Dissemination of Flow Specification Rules for IPv6, except: 26 | - VPNv6 flowspec routes 27 | - `rt-redirect-ipv6` traffic action 28 | - [draft-ietf-idr-flowspec-redirect-ip-03](https://www.ietf.org/archive/id/draft-ietf-idr-flowspec-redirect-ip-03.html): BGP Flow-Spec Redirect-to-IP Action 29 | 30 | ## Usage 31 | 32 | Run Flow with default settings that listens to wildcard with port 179, local AS 65000, router ID 127.0.0.1, and no restriction to the other BGP speaker: 33 | 34 | ```console 35 | # flow run 36 | ``` 37 | 38 | Allow only local AS (IBGP) and IPv6 loopback incoming IP for peer, and change listening port to 1179: 39 | 40 | ```console 41 | # flow run -b [::]:1179 -l 65001 -r 65001 -a ::1/128 42 | ``` 43 | 44 | The configuration options can be stored in a file and passed directly to Flow using the `-f` option: 45 | 46 | ```console 47 | # flow run -f flow.conf 48 | ``` 49 | 50 | ``` 51 | # flow.conf 52 | 53 | # Each line is exactly one argument without `--`, and spaces are preserved as-is. 54 | # Empty lines and lines starting with '#' are ignored. 55 | 56 | bind=[::1]:1179 57 | local-as=65001 58 | remote-as=65001 59 | 60 | # The prefix length can be omitted if it contains only one IP 61 | allowed-ips=::1 62 | ``` 63 | 64 | Configure the remote BGP speaker so it connects and sends flowspecs to Flow. You may need to enable multihop if they are connecting through loopback. For example in BIRD: 65 | 66 | ``` 67 | flow4 table myflow4; 68 | flow6 table myflow6; 69 | 70 | protocol bgp flow { 71 | local port 1180 as 65001; 72 | neighbor ::1 port 1179 as 65001; 73 | multihop; 74 | 75 | flow4 { table myflow4; import none; export all; }; 76 | flow6 { table myflow6; import none; export all; }; 77 | } 78 | ``` 79 | 80 | Show information of currently running Flow instance: 81 | 82 | ```console 83 | # flow show 84 | ``` 85 | 86 | ## Building 87 | 88 | Just use your general Cargo workflow: 89 | 90 | ```console 91 | $ cargo build 92 | $ cargo run 93 | $ cargo xtask sudo run -- run # short for: 94 | $ cargo --config "target.'cfg(target_os = \"\")'.runner = 'sudo -E'" run -- run 95 | ``` 96 | 97 | To generate manpages and shell autocompletions into target/assets directory, run: 98 | 99 | ```console 100 | $ cargo xtask gen 101 | ``` 102 | 103 | ## Running Tests 104 | 105 | Intergration tests involve exchanging information with BGP daemons and modifying kernel network interface. For example, on Linux, install BIRD (>2.x), ExaBGP (>4.x) and use [`unshare(1)`](https://www.man7.org/linux/man-pages/man1/unshare.1.html) to run the full sets of tests: 106 | 107 | ```console 108 | $ cargo xtask unshare test # shortcut for: 109 | $ cargo --config "target.'cfg(target_os = \"linux\")'.runner = 'unshare -rn'" test 110 | ``` 111 | 112 | If `unshare` or similar unprivileged isolation methods are unavailable, be careful when running tests with root, since modifications to host network may not be completely reverted in test code: 113 | 114 | ```console 115 | $ cargo xtask sudo test # shortcut for: 116 | $ cargo --config "target.'cfg(target_os = \"\")'.runner = 'sudo -E'" test 117 | ``` 118 | 119 | Or, skip integration tests and run only unit tests: 120 | 121 | ```console 122 | $ cargo test -- --skip integration_tests 123 | ``` 124 | 125 | BIRD and ExaBGP path can be specified via the `FLOW_BIRD_PATH` environment variable: 126 | 127 | ```console 128 | $ FLOW_BIRD_PATH=/path/to/my/bird FLOW_EXABGP_PATH=/path/to/my/exabgp cargo <...options> test 129 | ``` 130 | 131 | ## Future Work 132 | 133 | - **Programmatic handling**: custom traffic filter actions and route handling (not limited to flowspecs) 134 | - [**Validation procedure**](https://www.rfc-editor.org/rfc/rfc8955.html#name-validation-procedure): currently this can be done from the connecting BGP speaker, but for the sake of completeness and also future programmability it should also be done here 135 | - **VPN routes and VRF redirection**: does not have many knowledge right now, but certainly doable 136 | - **\*BSD support**: provide an alternative to Linux; first FreeBSD (that uses `pf` and reuse existing rtnetlink code), and then OpenBSD ([route(4)](https://man.openbsd.org/route.4)) 137 | 138 | ## License 139 | 140 | Flow is licensed under the BSD 2-Clause License. See LICENSE file for detail. 141 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use cfg_aliases::cfg_aliases; 2 | 3 | fn main() { 4 | cfg_aliases! { 5 | linux: { target_os = "linux" }, 6 | freebsd: { target_os = "freebsd" }, 7 | kernel_supported: { linux }, 8 | rtnetlink_supported: { any(linux, freebsd) }, 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /flow.conf: -------------------------------------------------------------------------------- 1 | bind=[::1]:1179 2 | allowed-ips=::1/128 3 | local-as=65000 4 | remote-as=65000 5 | 6 | # Comment out `dry-run` to apply flowspecs to kernel 7 | dry-run 8 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | imports_granularity = "Module" 2 | tab_spaces = 2 3 | wrap_comments = true 4 | short_array_element_width_threshold = 16 5 | overflow_delimited_expr = true 6 | max_width = 120 7 | chain_width = 80 8 | struct_lit_width = 80 9 | struct_variant_width = 80 10 | -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use crate::net::IpPrefix; 2 | use clap::{Args, Parser, Subcommand}; 3 | use clap_verbosity_flag::{InfoLevel, Verbosity}; 4 | use serde::{Deserialize, Serialize}; 5 | use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; 6 | use std::path::PathBuf; 7 | 8 | #[cfg(kernel_supported)] 9 | use crate::kernel::KernelArgs; 10 | 11 | #[derive(Debug, Parser)] 12 | pub struct Cli { 13 | #[command(subcommand)] 14 | pub command: Command, 15 | #[command(flatten)] 16 | pub verbosity: Verbosity, 17 | 18 | /// Path of runtime directory. 19 | #[arg(long, global = true, default_value = "/run/flow")] 20 | pub run_dir: PathBuf, 21 | } 22 | 23 | #[derive(Debug, Subcommand)] 24 | pub enum Command { 25 | Run(RunArgs), 26 | Show(ShowArgs), 27 | } 28 | 29 | #[derive(Debug, Clone, Parser, Serialize, Deserialize)] 30 | pub struct RunArgs { 31 | /// Address to bind. 32 | #[arg( 33 | short, long, 34 | value_name = "ADDR:PORT", 35 | value_parser = parse_bgp_bind, 36 | default_value_t = SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 179), 37 | )] 38 | pub bind: SocketAddr, 39 | 40 | /// Local AS. 41 | #[arg(short, long, value_name = "ASN", default_value_t = 65000)] 42 | pub local_as: u32, 43 | 44 | /// Allowed remote AS (optional). 45 | #[arg(short, long, value_name = "ASN")] 46 | pub remote_as: Option, 47 | 48 | /// Router ID. 49 | #[arg(short = 'i', long, value_name = "ID", default_value_t = [127, 0, 0, 1].into())] 50 | pub router_id: Ipv4Addr, 51 | 52 | /// Allowed incoming IP prefix. 53 | /// 54 | /// May be specified more than once. 55 | #[arg( 56 | short, long, 57 | value_name = "PREFIX", 58 | value_parser = parse_prefix, 59 | default_values_t = [IpPrefix::V4_ALL, IpPrefix::V6_ALL], 60 | )] 61 | pub allowed_ips: Vec, 62 | 63 | /// Time in seconds before shutdown since the last received keepalive. 64 | /// 65 | /// Keepalive time is set to 1/3 of hold time. Set 0 to disable keepalive 66 | /// mechanism. Hold time of 1 or 2 are invalid and will be rejected. 67 | #[arg(short = 'H', long, default_value_t = 240)] 68 | pub hold_time: u16, 69 | 70 | /// Do not apply flowspecs to kernel settings. 71 | /// 72 | /// On unsupported platforms, this is no-op and no changes will be applied to 73 | /// kernel. 74 | #[arg(short, long)] 75 | pub dry_run: bool, 76 | 77 | /// Platform-specific kernel settings. 78 | #[cfg(kernel_supported)] 79 | #[command(flatten)] 80 | pub kernel: KernelArgs, 81 | 82 | /// File to read arguments from. 83 | /// 84 | /// All CLI arguments except -v are ignored if `--file` is present. 85 | #[arg(short, long)] 86 | pub file: Option, 87 | } 88 | 89 | fn parse_bgp_bind(bind: &str) -> anyhow::Result { 90 | let result = bind.parse().or_else(|_| bind.parse::().map(|ip| (ip, 179).into()))?; 91 | Ok(result) 92 | } 93 | 94 | fn parse_prefix(p: &str) -> anyhow::Result { 95 | let result = p.parse().or_else(|_| { 96 | p.parse::() 97 | .map(|x| IpPrefix::new(x, if x.is_ipv4() { 32 } else { 128 })) 98 | })?; 99 | Ok(result) 100 | } 101 | 102 | #[derive(Debug, Args)] 103 | pub struct ShowArgs {} 104 | -------------------------------------------------------------------------------- /src/bgp/flow.rs: -------------------------------------------------------------------------------- 1 | use super::Result; 2 | use crate::net::{Afi, IpPrefix, IpPrefixError, IpWithPrefix, IpWithPrefixErrorKind}; 3 | use serde::{Deserialize, Serialize}; 4 | use smallvec::{SmallVec, smallvec}; 5 | use std::borrow::Borrow; 6 | use std::cmp::Ordering; 7 | use std::collections::BTreeSet; 8 | use std::fmt::{self, Debug, Display, Formatter, Write}; 9 | use std::hash::{Hash, Hasher}; 10 | use std::io; 11 | use std::io::ErrorKind::UnexpectedEof; 12 | use std::marker::PhantomData; 13 | use std::net::IpAddr; 14 | use strum::{EnumDiscriminants, FromRepr}; 15 | use thiserror::Error; 16 | use tokio::io::{AsyncRead, AsyncReadExt}; 17 | 18 | #[derive(Clone, Serialize, Deserialize)] 19 | pub struct Flowspec { 20 | afi: Afi, 21 | inner: BTreeSet, 22 | } 23 | 24 | impl Flowspec { 25 | pub fn new(afi: Afi) -> Self { 26 | Self { afi, inner: Default::default() } 27 | } 28 | pub fn new_v4() -> Self { 29 | Self::new(Afi::Ipv4) 30 | } 31 | pub fn new_v6() -> Self { 32 | Self::new(Afi::Ipv6) 33 | } 34 | 35 | pub fn insert(&mut self, c: Component) -> Result<(), FlowError> { 36 | if !c.is_valid(self.afi) { 37 | return Err(FlowError::Invalid); 38 | } 39 | let kind = c.kind(); 40 | if !self.inner.insert(ComponentStore(c)) { 41 | return Err(FlowError::Duplicate(kind)); 42 | } 43 | Ok(()) 44 | } 45 | 46 | pub fn with(mut self, c: Component) -> Result { 47 | self.insert(c)?; 48 | Ok(self) 49 | } 50 | 51 | pub fn afi(&self) -> Afi { 52 | self.afi 53 | } 54 | pub fn is_ipv4(&self) -> bool { 55 | self.afi == Afi::Ipv4 56 | } 57 | pub fn is_ipv6(&self) -> bool { 58 | self.afi == Afi::Ipv6 59 | } 60 | 61 | pub fn dst_prefix(&self) -> IpPrefix { 62 | (self.inner) 63 | .get(&ComponentKind::DstPrefix) 64 | .and_then(|x| { 65 | let Component::DstPrefix(pat, offset) = x.0 else { 66 | unreachable!(); 67 | }; 68 | (offset == 0).then_some(pat) 69 | }) 70 | .unwrap_or(match self.afi { 71 | Afi::Ipv4 => IpPrefix::V4_ALL, 72 | Afi::Ipv6 => IpPrefix::V6_ALL, 73 | }) 74 | } 75 | 76 | pub fn write(&self, buf: &mut Vec) { 77 | let mut buf2 = Vec::new(); 78 | self.inner.iter().for_each(|c| c.0.write(&mut buf2)); 79 | let len: u16 = buf2.len().try_into().expect("flowspec length should fit in u16"); 80 | assert!(len < 0xf000); 81 | if len < 240 { 82 | buf.push(len.try_into().unwrap()); 83 | } else if len < 4096 { 84 | buf.extend((len | 0xf000).to_be_bytes()); 85 | } else { 86 | panic!("flowspec length exceeds 0xfff"); 87 | } 88 | buf.extend(buf2); 89 | } 90 | 91 | pub async fn read(reader: &mut R, afi: Afi) -> Result> { 92 | let mut len_bytes = [0; 2]; 93 | match reader.read_u8().await { 94 | Ok(n) => len_bytes[0] = n, 95 | Err(error) if error.kind() == UnexpectedEof => return Ok(None), 96 | Err(error) => return Err(error.into()), 97 | } 98 | let len = if len_bytes[0] & 0xf0 == 0xf0 { 99 | len_bytes[0] &= 0x0f; 100 | len_bytes[1] = reader.read_u8().await?; 101 | u16::from_be_bytes(len_bytes) 102 | } else { 103 | len_bytes[0].into() 104 | }; 105 | let mut flow_reader = reader.take(len.into()); 106 | let mut inner = BTreeSet::::new(); 107 | while let Some(comp) = Component::read(&mut flow_reader, afi).await? { 108 | if inner.last().map(|x| x.0.kind() >= comp.kind()).unwrap_or(false) { 109 | return Err(FlowError::Unsorted.into()); // also probably duplicate 110 | } 111 | if !comp.is_valid(afi) { 112 | return Err(FlowError::Invalid.into()); 113 | } 114 | let kind = comp.kind(); 115 | if !inner.insert(ComponentStore(comp)) { 116 | return Err(FlowError::Duplicate(kind).into()); 117 | } 118 | } 119 | Ok(Some(Self { afi, inner })) 120 | } 121 | pub async fn read_v4(reader: &mut R) -> Result> { 122 | Self::read(reader, Afi::Ipv4).await 123 | } 124 | pub async fn read_v6(reader: &mut R) -> Result> { 125 | Self::read(reader, Afi::Ipv6).await 126 | } 127 | 128 | pub fn components(&self) -> impl Iterator { 129 | self.inner.iter().map(|c| &c.0) 130 | } 131 | 132 | pub fn component_set(&self) -> &BTreeSet { 133 | &self.inner 134 | } 135 | } 136 | 137 | impl Debug for Flowspec { 138 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 139 | Display::fmt(self, f) 140 | } 141 | } 142 | 143 | impl Display for Flowspec { 144 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 145 | match self.afi { 146 | Afi::Ipv4 => f.write_str("flow4 { ")?, 147 | Afi::Ipv6 => f.write_str("flow6 { ")?, 148 | } 149 | if let Some(ComponentStore(first)) = self.inner.first() { 150 | write!(f, "{first}")?; 151 | } else { 152 | f.write_str("")?; 153 | } 154 | for ComponentStore(c) in self.inner.iter().skip(1) { 155 | write!(f, "; {c}")?; 156 | } 157 | f.write_str(" }") 158 | } 159 | } 160 | 161 | impl PartialEq for Flowspec { 162 | fn eq(&self, other: &Self) -> bool { 163 | // ComponentStore's PartialEq only compares kind, so manually implement instead 164 | self.afi == other.afi 165 | && self.inner.len() == other.inner.len() 166 | && self.inner.iter().zip(other.inner.iter()).all(|(a, b)| a.0 == b.0) 167 | } 168 | } 169 | 170 | impl Eq for Flowspec {} 171 | 172 | impl PartialOrd for Flowspec { 173 | fn partial_cmp(&self, other: &Self) -> Option { 174 | Some(self.cmp(other)) 175 | } 176 | } 177 | 178 | impl Ord for Flowspec { 179 | fn cmp(&self, other: &Self) -> Ordering { 180 | match self.afi.cmp(&other.afi) { 181 | Ordering::Equal => {} 182 | ord => return ord, 183 | } 184 | self.inner.iter().map(|x| &x.0).cmp(other.inner.iter().map(|x| &x.0)) 185 | } 186 | } 187 | 188 | #[derive(Debug, Clone, Serialize, Deserialize)] 189 | pub struct ComponentStore(pub Component); 190 | 191 | impl PartialEq for ComponentStore { 192 | fn eq(&self, other: &Self) -> bool { 193 | self.0.kind() == other.0.kind() 194 | } 195 | } 196 | 197 | impl Eq for ComponentStore {} 198 | 199 | impl PartialOrd for ComponentStore { 200 | fn partial_cmp(&self, other: &Self) -> Option { 201 | Some(self.cmp(other)) 202 | } 203 | } 204 | 205 | impl Ord for ComponentStore { 206 | fn cmp(&self, other: &Self) -> Ordering { 207 | self.0.kind().cmp(&other.0.kind()) 208 | } 209 | } 210 | 211 | impl Borrow for ComponentStore { 212 | fn borrow(&self) -> &ComponentKind { 213 | use Component::*; 214 | use ComponentKind as CK; 215 | match self.0 { 216 | DstPrefix(..) => &CK::DstPrefix, 217 | SrcPrefix(..) => &CK::SrcPrefix, 218 | Protocol(..) => &CK::Protocol, 219 | Port(..) => &CK::Port, 220 | DstPort(..) => &CK::DstPort, 221 | SrcPort(..) => &CK::SrcPort, 222 | IcmpType(..) => &CK::IcmpType, 223 | IcmpCode(..) => &CK::IcmpCode, 224 | TcpFlags(..) => &CK::TcpFlags, 225 | PacketLen(..) => &CK::PacketLen, 226 | Dscp(..) => &CK::Dscp, 227 | Fragment(..) => &CK::Fragment, 228 | FlowLabel(..) => &CK::FlowLabel, 229 | } 230 | } 231 | } 232 | 233 | #[derive(Clone, PartialEq, Eq, Hash, EnumDiscriminants, Serialize, Deserialize)] 234 | #[strum_discriminants(name(ComponentKind), derive(FromRepr, PartialOrd, Ord))] 235 | #[repr(u8)] 236 | pub enum Component { 237 | DstPrefix(IpPrefix, u8) = 1, 238 | SrcPrefix(IpPrefix, u8) = 2, 239 | Protocol(Ops) = 3, 240 | Port(Ops) = 4, 241 | DstPort(Ops) = 5, 242 | SrcPort(Ops) = 6, 243 | IcmpType(Ops) = 7, 244 | IcmpCode(Ops) = 8, 245 | TcpFlags(Ops) = 9, 246 | PacketLen(Ops) = 10, 247 | Dscp(Ops) = 11, 248 | Fragment(Ops) = 12, 249 | FlowLabel(Ops) = 13, 250 | } 251 | 252 | impl Component { 253 | #[allow(non_snake_case)] 254 | pub fn DstPrefixV4(p: IpPrefix) -> Self { 255 | assert!(p.is_ipv4()); 256 | Self::DstPrefix(p, 0) 257 | } 258 | 259 | #[allow(non_snake_case)] 260 | pub fn SrcPrefixV4(p: IpPrefix) -> Self { 261 | assert!(p.is_ipv4()); 262 | Self::SrcPrefix(p, 0) 263 | } 264 | 265 | pub fn kind(&self) -> ComponentKind { 266 | self.into() 267 | } 268 | 269 | pub fn write(&self, buf: &mut Vec) { 270 | buf.push(self.kind() as u8); 271 | match self { 272 | Self::DstPrefix(prefix, offset) | Self::SrcPrefix(prefix, offset) => { 273 | if let IpAddr::V6(v6) = prefix.prefix() { 274 | let pattern_bytes = (prefix.len() - offset).div_ceil(8); 275 | buf.extend([prefix.len(), *offset]); 276 | buf.extend((v6.to_bits() << offset).to_be_bytes().into_iter().take(pattern_bytes.into())); 277 | } else { 278 | prefix.write(buf); 279 | } 280 | } 281 | Self::Protocol(ops) 282 | | Self::Port(ops) 283 | | Self::DstPort(ops) 284 | | Self::SrcPort(ops) 285 | | Self::IcmpType(ops) 286 | | Self::IcmpCode(ops) 287 | | Self::PacketLen(ops) 288 | | Self::Dscp(ops) 289 | | Self::FlowLabel(ops) => ops.write(buf), 290 | Self::TcpFlags(ops) | Self::Fragment(ops) => ops.write(buf), 291 | } 292 | } 293 | 294 | pub async fn read(reader: &mut R, afi: Afi) -> Result> { 295 | use ComponentKind as CK; 296 | 297 | let kind = match reader.read_u8().await { 298 | Ok(kind) => kind, 299 | Err(error) if error.kind() == UnexpectedEof => return Ok(None), 300 | Err(error) => return Err(error.into()), 301 | }; 302 | let result = match ComponentKind::from_repr(kind) { 303 | Some(CK::DstPrefix) if afi == Afi::Ipv4 => Self::parse_v4_prefix(Self::DstPrefix, reader).await?, 304 | Some(CK::SrcPrefix) if afi == Afi::Ipv4 => Self::parse_v4_prefix(Self::SrcPrefix, reader).await?, 305 | Some(CK::DstPrefix) if afi == Afi::Ipv6 => Self::parse_v6_prefix_pattern(Self::DstPrefix, reader).await?, 306 | Some(CK::SrcPrefix) if afi == Afi::Ipv6 => Self::parse_v6_prefix_pattern(Self::SrcPrefix, reader).await?, 307 | Some(CK::Protocol) => Self::Protocol(Ops::read(reader).await?), 308 | Some(CK::Port) => Self::Port(Ops::read(reader).await?), 309 | Some(CK::DstPort) => Self::DstPort(Ops::read(reader).await?), 310 | Some(CK::SrcPort) => Self::SrcPort(Ops::read(reader).await?), 311 | Some(CK::IcmpType) => Self::IcmpType(Ops::read(reader).await?), 312 | Some(CK::IcmpCode) => Self::IcmpCode(Ops::read(reader).await?), 313 | Some(CK::TcpFlags) => Self::TcpFlags(Ops::read(reader).await?), 314 | Some(CK::PacketLen) => Self::PacketLen(Ops::read(reader).await?), 315 | Some(CK::Dscp) => Self::Dscp(Ops::read(reader).await?), 316 | Some(CK::Fragment) => Self::Fragment(Ops::read(reader).await?), 317 | Some(CK::FlowLabel) => Self::FlowLabel(Ops::read(reader).await?), 318 | _ => return Err(FlowError::UnsupportedKind(kind).into()), 319 | }; 320 | Ok(Some(result)) 321 | } 322 | 323 | pub fn is_valid(&self, afi: Afi) -> bool { 324 | match afi { 325 | Afi::Ipv4 => self.is_valid_v4(), 326 | Afi::Ipv6 => self.is_valid_v6(), 327 | } 328 | } 329 | 330 | pub fn is_valid_v4(&self) -> bool { 331 | use Component::*; 332 | match self { 333 | DstPrefix(prefix, offset) | SrcPrefix(prefix, offset) => prefix.is_ipv4() && *offset == 0, 334 | Fragment(ops) => ops.0.iter().all(|x| x.value & !0b1111 == 0), 335 | FlowLabel(_) => false, 336 | _ => true, 337 | } 338 | } 339 | 340 | pub fn is_valid_v6(&self) -> bool { 341 | use Component::*; 342 | match self { 343 | DstPrefix(prefix, offset) | SrcPrefix(prefix, offset) => prefix.is_ipv6() && *offset < prefix.len(), 344 | Fragment(ops) => ops.0.iter().all(|x| x.value & !0b1110 == 0), 345 | _ => true, 346 | } 347 | } 348 | 349 | async fn parse_v4_prefix(f: fn(IpPrefix, u8) -> Self, reader: &mut (impl AsyncRead + Unpin)) -> Result { 350 | let (prefix, _) = IpPrefix::read_v4(reader).await?.ok_or_else(|| io::Error::from(UnexpectedEof))?; 351 | Ok(f(prefix, 0)) 352 | } 353 | 354 | async fn parse_v6_prefix_pattern(f: fn(IpPrefix, u8) -> Self, reader: &mut (impl AsyncRead + Unpin)) -> Result { 355 | let len = reader.read_u8().await?; 356 | if len > 128 { 357 | return Err(IpPrefixError { kind: IpWithPrefixErrorKind::PrefixLenTooLong(len, 128).into(), value: None }.into()); 358 | } 359 | let offset = reader.read_u8().await?; 360 | if offset >= len { 361 | return Err(FlowError::PrefixOffsetTooBig(offset, len).into()); 362 | } 363 | let mut buf = [0; 16]; 364 | let pattern_bytes = (len - offset).div_ceil(8); 365 | reader.read_exact(&mut buf[0..pattern_bytes.into()]).await?; 366 | let pattern = u128::from_be_bytes(buf) >> offset; 367 | let prefix = IpWithPrefix::new(IpAddr::V6(pattern.into()), len).prefix(); 368 | Ok(f(prefix, offset)) 369 | } 370 | 371 | fn prefix_offset(&self) -> Option<(IpPrefix, u8)> { 372 | use Component::*; 373 | match self { 374 | DstPrefix(p, o) | SrcPrefix(p, o) => Some((*p, *o)), 375 | _ => None, 376 | } 377 | } 378 | 379 | fn numeric_ops(&self) -> Option<&Ops> { 380 | use Component::*; 381 | match self { 382 | Protocol(ops) | Port(ops) | DstPort(ops) | SrcPort(ops) | IcmpType(ops) | IcmpCode(ops) | PacketLen(ops) 383 | | Dscp(ops) | FlowLabel(ops) => Some(ops), 384 | _ => None, 385 | } 386 | } 387 | 388 | fn bitmask_ops(&self) -> Option<&Ops> { 389 | use Component::*; 390 | match self { 391 | TcpFlags(ops) | Fragment(ops) => Some(ops), 392 | _ => None, 393 | } 394 | } 395 | } 396 | 397 | impl Debug for Component { 398 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 399 | write!(f, "Component({self})") 400 | } 401 | } 402 | 403 | impl Display for Component { 404 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 405 | match self { 406 | Self::DstPrefix(pat, off) if pat.is_ipv6() && *off != 0 => { 407 | write!(f, "dst_ip in {}/{}-{}", pat.prefix(), off, pat.len()) 408 | } 409 | Self::DstPrefix(pat, _) => write!(f, "dst_ip in {pat}"), 410 | Self::SrcPrefix(pat, off) if pat.is_ipv6() && *off != 0 => { 411 | write!(f, "src_ip in {}/{}-{}", pat.prefix(), off, pat.len()) 412 | } 413 | Self::SrcPrefix(pat, _) => write!(f, "src_ip in {pat}"), 414 | Self::Protocol(ops) => write!(f, "protocol {ops}"), 415 | Self::Port(ops) => write!(f, "port {ops}"), 416 | Self::DstPort(ops) => write!(f, "dst_port {ops}"), 417 | Self::SrcPort(ops) => write!(f, "src_port {ops}"), 418 | Self::IcmpType(ops) => write!(f, "icmp.type {ops}"), 419 | Self::IcmpCode(ops) => write!(f, "icmp.code {ops}"), 420 | Self::TcpFlags(ops) => write!(f, "tcp.flags {ops}"), 421 | Self::PacketLen(ops) => write!(f, "len {ops}"), 422 | Self::Dscp(ops) => write!(f, "dscp {ops}"), 423 | Self::Fragment(ops) => write!(f, "frag {ops}"), 424 | Self::FlowLabel(ops) => write!(f, "flow_label {ops}"), 425 | } 426 | } 427 | } 428 | 429 | impl PartialOrd for Component { 430 | fn partial_cmp(&self, other: &Self) -> Option { 431 | Some(self.cmp(other)) 432 | } 433 | } 434 | 435 | impl Ord for Component { 436 | fn cmp(&self, other: &Self) -> Ordering { 437 | match self.kind().cmp(&other.kind()) { 438 | Ordering::Equal => {} 439 | ord => return ord, 440 | } 441 | if let (Some((ip1, off1)), Some((ip2, off2))) = (self.prefix_offset(), other.prefix_offset()) { 442 | match off1.cmp(&off2) { 443 | Ordering::Equal => ip1.cmp(&ip2), 444 | ord => ord, 445 | } 446 | } else if let (Some(ops1), Some(ops2)) = (self.numeric_ops(), other.numeric_ops()) { 447 | ops1.cmp(ops2) 448 | } else if let (Some(ops1), Some(ops2)) = (self.bitmask_ops(), other.bitmask_ops()) { 449 | ops1.cmp(ops2) 450 | } else { 451 | unreachable!() 452 | } 453 | } 454 | } 455 | 456 | /// Operator sequence with values. 457 | #[derive(Serialize, Deserialize)] 458 | pub struct Ops(pub SmallVec<[Op; 4]>); 459 | 460 | impl Ops { 461 | pub fn new(op: Op) -> Self { 462 | Self(smallvec![op.make_or()]) 463 | } 464 | 465 | pub fn with(mut self, op: Op) -> Self { 466 | self.0.push(op); 467 | self 468 | } 469 | pub fn and(self, op: Op) -> Self { 470 | self.with(op.make_and()) 471 | } 472 | pub fn or(self, op: Op) -> Self { 473 | self.with(op.make_or()) 474 | } 475 | 476 | pub fn write(&self, buf: &mut Vec) { 477 | self.0[..self.0.len() - 1].iter().for_each(|x| x.write(buf, false)); 478 | self.0.last().unwrap().write(buf, true); 479 | } 480 | 481 | pub async fn read(reader: &mut R) -> io::Result { 482 | let mut inner = Vec::new(); 483 | let mut first = true; 484 | loop { 485 | let (mut op, eol) = Op::read(reader).await?; 486 | if first { 487 | op = op.make_or(); 488 | } 489 | inner.push(op); 490 | if eol { 491 | break; 492 | } 493 | first = false; 494 | } 495 | assert!(!inner.is_empty()); 496 | inner[0].flags &= 0b1011_1111; // make sure first is always OR 497 | Ok(Self(inner.into())) 498 | } 499 | 500 | pub fn op(&self, data: u64) -> bool { 501 | let mut result = false; 502 | for op in &self.0 { 503 | if op.is_and() { 504 | result &= op.op(data); 505 | } else if result { 506 | return true; 507 | } else { 508 | result |= op.op(data); 509 | } 510 | } 511 | result 512 | } 513 | } 514 | 515 | impl From> for Ops { 516 | fn from(op: Op) -> Self { 517 | Self::new(op) 518 | } 519 | } 520 | 521 | impl Debug for Ops { 522 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 523 | write!(f, "Ops({self})") 524 | } 525 | } 526 | 527 | impl Display for Ops { 528 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 529 | if self.0.len() > 1 { 530 | f.write_char('(')?; 531 | } 532 | K::fmt(f, self.0[0].flags, self.0[0].value)?; 533 | if self.0.len() > 1 { 534 | for op in &self.0[1..] { 535 | if op.is_and() { 536 | f.write_str(" && ")?; 537 | } else { 538 | f.write_str(" || ")?; 539 | } 540 | K::fmt(f, op.flags, op.value)?; 541 | } 542 | } 543 | if self.0.len() > 1 { 544 | f.write_char(')')?; 545 | } 546 | Ok(()) 547 | } 548 | } 549 | 550 | impl Clone for Ops { 551 | fn clone(&self) -> Self { 552 | Self(self.0.clone()) 553 | } 554 | } 555 | 556 | impl PartialEq for Ops { 557 | fn eq(&self, other: &Self) -> bool { 558 | self.0 == other.0 559 | } 560 | } 561 | 562 | impl Eq for Ops {} 563 | 564 | impl PartialOrd for Ops { 565 | fn partial_cmp(&self, other: &Self) -> Option { 566 | Some(self.cmp(other)) 567 | } 568 | } 569 | 570 | impl Ord for Ops { 571 | fn cmp(&self, other: &Self) -> Ordering { 572 | let mut self_buf = Vec::new(); 573 | let mut other_buf = Vec::new(); 574 | self.write(&mut self_buf); 575 | other.write(&mut other_buf); 576 | self_buf.cmp(&other_buf) 577 | } 578 | } 579 | 580 | impl Hash for Ops { 581 | fn hash(&self, state: &mut H) { 582 | self.0.hash(state); 583 | } 584 | } 585 | 586 | #[derive(Serialize, Deserialize)] 587 | pub struct Op { 588 | pub flags: u8, 589 | pub value: u64, 590 | pub _k: PhantomData, 591 | } 592 | 593 | impl Op { 594 | pub const AND: u8 = 0b0100_0000; 595 | 596 | pub fn op(self, data: u64) -> bool { 597 | K::op(self.flags, data, self.value) 598 | } 599 | 600 | pub fn is_and(self) -> bool { 601 | self.flags & Self::AND != 0 602 | } 603 | pub fn is_or(self) -> bool { 604 | !self.is_and() 605 | } 606 | pub fn make_and(mut self) -> Self { 607 | self.flags |= Self::AND; 608 | self 609 | } 610 | pub fn make_or(mut self) -> Self { 611 | self.flags &= !Self::AND; 612 | self 613 | } 614 | pub fn and(self, op: Self) -> Ops { 615 | Ops::new(self).and(op) 616 | } 617 | pub fn or(self, op: Self) -> Ops { 618 | Ops::new(self).or(op) 619 | } 620 | 621 | fn write(self, buf: &mut Vec, eol: bool) { 622 | let op_pos = buf.len(); 623 | buf.push(0); 624 | let len = match self.value { 625 | 0x0..=0xff => 0, 626 | 0x100..=0xffff => 1, 627 | 0x10000..=0xffffffff => 2, 628 | 0x100000000..=0xffffffffffffffff => 3, 629 | }; 630 | match len { 631 | 0 => buf.push(self.value as _), 632 | 1 => buf.extend(u16::to_be_bytes(self.value as _)), 633 | 2 => buf.extend(u32::to_be_bytes(self.value as _)), 634 | 3 => buf.extend(u64::to_be_bytes(self.value)), 635 | _ => unreachable!(), 636 | }; 637 | buf[op_pos] = (self.flags & K::FLAGS_MASK) | (len << 4) | (u8::from(eol) << 7); 638 | } 639 | 640 | async fn read(reader: &mut R) -> io::Result<(Self, bool)> { 641 | let flags = reader.read_u8().await?; 642 | let len = (flags & 0b0011_0000) >> 4; 643 | let value = match len { 644 | 0 => reader.read_u8().await?.into(), 645 | 1 => reader.read_u16().await?.into(), 646 | 2 => reader.read_u32().await?.into(), 647 | 3 => reader.read_u64().await?, 648 | _ => unreachable!(), 649 | }; 650 | let eol = flags & 0b1000_0000 != 0; 651 | let flags = flags & K::FLAGS_MASK; 652 | let _k = PhantomData; 653 | Ok((Self { flags, value, _k }, eol)) 654 | } 655 | } 656 | 657 | impl Op { 658 | pub fn num(flags: NumericFlags, value: u64) -> Self { 659 | Self { flags: flags as u8, value, _k: PhantomData } 660 | } 661 | pub fn lt(value: u64) -> Self { 662 | Self::num(NumericFlags::Lt, value) 663 | } 664 | pub fn gt(value: u64) -> Self { 665 | Self::num(NumericFlags::Gt, value) 666 | } 667 | pub fn eq(value: u64) -> Self { 668 | Self::num(NumericFlags::Eq, value) 669 | } 670 | pub fn le(value: u64) -> Self { 671 | Self::num(NumericFlags::Le, value) 672 | } 673 | pub fn ge(value: u64) -> Self { 674 | Self::num(NumericFlags::Ge, value) 675 | } 676 | pub fn ne(value: u64) -> Self { 677 | Self::num(NumericFlags::Ne, value) 678 | } 679 | } 680 | 681 | impl Op { 682 | pub const DONT_FRAG: u64 = 1; 683 | pub const IS_FRAG: u64 = 1 << 1; 684 | pub const FIRST_FRAG: u64 = 1 << 2; 685 | pub const LAST_FRAG: u64 = 1 << 3; 686 | 687 | pub const FIN: u64 = 1; 688 | pub const SYN: u64 = 1 << 1; 689 | pub const RST: u64 = 1 << 2; 690 | pub const PSH: u64 = 1 << 3; 691 | pub const ACK: u64 = 1 << 4; 692 | pub const URG: u64 = 1 << 5; 693 | pub const ECE: u64 = 1 << 6; 694 | pub const CWR: u64 = 1 << 7; 695 | 696 | pub fn bit(flags: BitmaskFlags, value: u64) -> Self { 697 | Self { flags: flags as u8, value, _k: PhantomData } 698 | } 699 | pub fn any(value: u64) -> Self { 700 | Self::bit(BitmaskFlags::Any, value) 701 | } 702 | pub fn not_any(value: u64) -> Self { 703 | Self::bit(BitmaskFlags::NotAny, value) 704 | } 705 | pub fn all(value: u64) -> Self { 706 | Self::bit(BitmaskFlags::All, value) 707 | } 708 | pub fn not_all(value: u64) -> Self { 709 | Self::bit(BitmaskFlags::NotAll, value) 710 | } 711 | } 712 | 713 | impl Debug for Op { 714 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 715 | write!(f, "Op({self})") 716 | } 717 | } 718 | 719 | impl Display for Op { 720 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 721 | if self.is_and() { 722 | f.write_str("&& ")?; 723 | } else { 724 | f.write_str("|| ")?; 725 | } 726 | K::fmt(f, self.flags, self.value) 727 | } 728 | } 729 | 730 | impl Clone for Op { 731 | fn clone(&self) -> Self { 732 | *self 733 | } 734 | } 735 | 736 | impl Copy for Op {} 737 | 738 | impl PartialEq for Op { 739 | fn eq(&self, other: &Self) -> bool { 740 | self.flags == other.flags && self.value == other.value 741 | } 742 | } 743 | 744 | impl Eq for Op {} 745 | 746 | impl Hash for Op { 747 | fn hash(&self, state: &mut H) { 748 | self.flags.hash(state); 749 | self.value.hash(state); 750 | } 751 | } 752 | 753 | #[derive(Debug, Clone, Copy, PartialEq, Eq, FromRepr)] 754 | #[repr(u8)] 755 | pub enum NumericFlags { 756 | False = 0b000, 757 | Lt = 0b100, 758 | Gt = 0b010, 759 | Eq = 0b001, 760 | Le = 0b101, 761 | Ge = 0b011, 762 | Ne = 0b110, 763 | True = 0b111, 764 | } 765 | 766 | #[derive(Serialize, Deserialize)] 767 | pub enum Numeric {} 768 | 769 | impl OpKind for Numeric { 770 | const FLAGS_MASK: u8 = 0b0100_0111; 771 | 772 | fn op(flags: u8, data: u64, value: u64) -> bool { 773 | let mut result = false; 774 | result |= flags & 0b100 != 0 && data < value; 775 | result |= flags & 0b010 != 0 && data > value; 776 | result |= flags & 0b001 != 0 && data == value; 777 | result 778 | } 779 | 780 | fn fmt(f: &mut Formatter, flags: u8, value: u64) -> fmt::Result { 781 | use NumericFlags::*; 782 | match NumericFlags::from_repr(flags & NumericFlags::True as u8).unwrap() { 783 | False => f.write_str("false"), 784 | Lt => write!(f, "<{value}"), 785 | Gt => write!(f, ">{value}"), 786 | Eq => write!(f, "={value}"), 787 | Le => write!(f, "<={value}"), 788 | Ge => write!(f, ">={value}"), 789 | Ne => write!(f, "!={value}"), 790 | True => f.write_str("true"), 791 | } 792 | } 793 | } 794 | 795 | #[derive(Debug, Clone, Copy, PartialEq, Eq, FromRepr)] 796 | #[repr(u8)] 797 | pub enum BitmaskFlags { 798 | Any = 0b00, 799 | NotAny = 0b10, 800 | All = 0b01, 801 | NotAll = 0b11, 802 | } 803 | 804 | #[derive(Serialize, Deserialize)] 805 | pub enum Bitmask {} 806 | 807 | impl Bitmask { 808 | const NOT: u8 = 0b10; 809 | const MATCH: u8 = 0b01; 810 | } 811 | 812 | impl OpKind for Bitmask { 813 | const FLAGS_MASK: u8 = 0b0100_0011; 814 | 815 | fn op(flags: u8, d: u64, v: u64) -> bool { 816 | let result = if flags & Self::MATCH == 0 { 817 | d & v != 0 818 | } else { 819 | d & v == v 820 | }; 821 | if flags & Self::NOT == 0 { result } else { !result } 822 | } 823 | 824 | fn fmt(f: &mut Formatter, flags: u8, value: u64) -> fmt::Result { 825 | use BitmaskFlags::*; 826 | match BitmaskFlags::from_repr(flags & (Self::NOT | Self::MATCH)).unwrap() { 827 | Any => write!(f, "<0b{value:b}"), 828 | NotAny => write!(f, ">0b{value:b}"), 829 | All => write!(f, "=0b{value:b}"), 830 | NotAll => write!(f, "!=0b{value:b}"), 831 | } 832 | } 833 | } 834 | 835 | pub trait OpKind { 836 | const FLAGS_MASK: u8; 837 | fn op(flags: u8, data: u64, value: u64) -> bool; 838 | fn fmt(f: &mut Formatter, flags: u8, value: u64) -> fmt::Result; 839 | } 840 | 841 | #[derive(Debug, Error)] 842 | pub enum FlowError { 843 | #[error("invalid component")] 844 | Invalid, 845 | #[error("duplicate component {0:?}")] 846 | Duplicate(ComponentKind), 847 | #[error("components are not sorted")] 848 | Unsorted, 849 | #[error("unsupported component kind {0}")] 850 | UnsupportedKind(u8), 851 | #[error("IPv6 prefix component offset too big: {0} >= {1}")] 852 | PrefixOffsetTooBig(u8, u8), 853 | } 854 | 855 | #[cfg(test)] 856 | mod tests { 857 | use super::*; 858 | use test_case::test_case; 859 | 860 | #[tokio::test] 861 | async fn test_flowspec() -> anyhow::Result<()> { 862 | use Component::*; 863 | use ComponentKind as CK; 864 | 865 | let mut f = Flowspec::new_v6() 866 | .with(DstPrefix("::1:1234:5678:9800:0/104".parse()?, 63))? 867 | .with(DstPort(Op::ge(80).and(Op::le(443))))? 868 | .with(TcpFlags(Op::all(Op::SYN | Op::ACK).into()))?; 869 | 870 | f.insert(Component::SrcPrefixV4("10.0.0.0/8".parse()?)) 871 | .expect_err("IPv4 flowspec component should not be inserted to IPv6 flowspec"); 872 | 873 | let mut buf = Vec::new(); 874 | f.write(&mut buf); 875 | 876 | #[rustfmt::skip] 877 | let buf_expected = [ 878 | 18, 879 | CK::DstPrefix as u8, 0x68, 0x3f, 0x89, 0x1a, 0x2b, 0x3c, 0x4c, 0x00, 880 | CK::DstPort as u8, 0x03, 0x50, 0xd5, 0x01, 0xbb, 881 | CK::TcpFlags as u8, 0x81, (Op::SYN | Op::ACK).try_into().unwrap(), 882 | ]; 883 | 884 | println!("{f}"); 885 | println!("{buf:02x?}"); 886 | assert_eq!(buf, buf_expected); 887 | assert_eq!(f, Flowspec::read_v6(&mut &buf[..]).await?.unwrap()); 888 | 889 | Ok(()) 890 | } 891 | 892 | const OP_NUM: PhantomData = PhantomData; 893 | const OP_BIT: PhantomData = PhantomData; 894 | 895 | #[test_case(OP_NUM, &[0b00000011, 114, 0b01010100, 2, 2, 0b10000001, 1], &[1, 114, 200], &[0, 2, 514]; "n ge 114 AND n lt 514 OR n eq 1")] 896 | #[test_case(OP_BIT, &[0b10000001, 0b101], &[85, 1365, 65525, 65535], &[0, 1, 2, 114, 514]; "n bitand 0b101 eq 0b101")] 897 | #[tokio::test] 898 | async fn test_ops(_op: PhantomData, mut seq: &[u8], aye: &[u64], nay: &[u64]) -> anyhow::Result<()> { 899 | let ops = Ops::::read(&mut seq).await?; 900 | aye.iter().for_each(|&n| assert!(ops.op(n), "!ops.op({n})")); 901 | nay.iter().for_each(|&n| assert!(!ops.op(n), "ops.op({n})")); 902 | Ok(()) 903 | } 904 | } 905 | -------------------------------------------------------------------------------- /src/bgp/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod flow; 2 | pub mod msg; 3 | pub mod nlri; 4 | pub mod route; 5 | 6 | use crate::args::RunArgs; 7 | use crate::kernel::{self, KernelAdapter}; 8 | use crate::net::{Afi, IpPrefixError, IpPrefixErrorKind}; 9 | use State::*; 10 | use either::Either; 11 | use flow::FlowError; 12 | use itertools::Itertools; 13 | use log::{debug, error, info, warn}; 14 | use msg::HeaderError::*; 15 | use msg::OpenError::*; 16 | use msg::{Message, MessageSend, Notification, OpenMessage, SendAndReturn, UpdateError}; 17 | use nlri::{Nlri, NlriContent, NlriError, NlriKind}; 18 | use num_integer::gcd; 19 | use replace_with::replace_with_or_abort; 20 | use route::Routes; 21 | use serde::{Deserialize, Serialize}; 22 | use smallvec::SmallVec; 23 | use std::borrow::Cow; 24 | use std::cmp::min; 25 | use std::fmt::Display; 26 | use std::future::{Future, pending}; 27 | use std::io; 28 | use std::net::{IpAddr, SocketAddr}; 29 | use std::rc::Rc; 30 | use strum::EnumDiscriminants; 31 | use thiserror::Error; 32 | use tokio::io::{AsyncRead, AsyncWrite, BufReader}; 33 | use tokio::net::TcpStream; 34 | use tokio::select; 35 | use tokio::time::{Duration, Instant, Interval, interval}; 36 | 37 | #[cfg(test)] 38 | use {crate::integration_tests::TestEvent, tokio::sync::mpsc}; 39 | 40 | /// A (currently passive only) BGP session. 41 | /// 42 | /// Implemented RFCs: 43 | /// - RFC 4271: A Border Gateway Protocol 4 (BGP-4) \[partial\] 44 | /// - RFC 6793: BGP Support for Four-Octet Autonomous System (AS) Number Space 45 | /// - RFC 4760: Multiprotocol Extensions for BGP 46 | /// - RFC 2545: Use of BGP-4 Multiprotocol Extensions for IPv6 Inter-Domain 47 | /// Routing (?) 48 | /// - RFC 1997: BGP Communities Attribute 49 | /// - RFC 4360: BGP Extended Communities Attribute 50 | /// - RFC 5065: Autonomous System Confederations for BGP 51 | /// - RFC 5668: 4-Octet AS Specific BGP Extended Community 52 | /// - RFC 5701: IPv6 Address Specific BGP Extended Community Attribute 53 | /// - RFC 7606: Revised Error Handling for BGP UPDATE Messages 54 | /// - RFC 8092: BGP Large Communities Attribute 55 | /// - RFC 8955: Dissemination of Flow Specification Rules 56 | /// - RFC 8956: Dissemination of Flow Specification Rules for IPv6 57 | #[derive(Debug)] 58 | pub struct Session { 59 | config: RunArgs, 60 | state: State, 61 | routes: Routes, 62 | #[cfg(test)] 63 | event_tx: mpsc::Sender, 64 | } 65 | 66 | impl Session { 67 | pub async fn new(config: RunArgs, #[cfg(test)] event_tx: mpsc::Sender) -> Result { 68 | let kernel = if config.dry_run { 69 | KernelAdapter::Noop 70 | } else { 71 | #[cfg(linux)] 72 | let result = KernelAdapter::linux(config.kernel.clone()).await?; 73 | #[cfg(not(kernel_supported))] 74 | let result = KernelAdapter::Noop; 75 | result 76 | }; 77 | Ok(Self { 78 | config, 79 | state: Active, 80 | routes: Routes::new(kernel), 81 | #[cfg(test)] 82 | event_tx, 83 | }) 84 | } 85 | 86 | pub fn start(&mut self) { 87 | self.state = Active; 88 | } 89 | 90 | pub async fn stop(&mut self) -> Result<()> { 91 | match &mut self.state { 92 | Idle | Connect | Active => {} 93 | OpenSent { stream } | OpenConfirm { stream, .. } | Established { stream, .. } => { 94 | Notification::Cease.send(stream).await?; 95 | } 96 | } 97 | self.state = Idle; 98 | Ok(()) 99 | } 100 | 101 | pub fn config(&self) -> &RunArgs { 102 | &self.config 103 | } 104 | pub fn state(&self) -> &State { 105 | &self.state 106 | } 107 | pub fn routes(&self) -> &Routes { 108 | &self.routes 109 | } 110 | 111 | pub async fn accept(&mut self, mut stream: S, addr: SocketAddr) -> Result<()> { 112 | let ip = addr.ip(); 113 | if !self.config.allowed_ips.iter().any(|x| x.contains(ip)) { 114 | return Err(Error::UnacceptableAddr(ip)); 115 | } else if !matches!(self.state, Active) { 116 | return Err(Error::AlreadyRunning); 117 | } 118 | let open = OpenMessage::with_caps( 119 | self.config.local_as, 120 | self.config.hold_time, 121 | self.config.router_id.to_bits(), 122 | ); 123 | open.send(&mut stream).await?; 124 | replace_with_or_abort(&mut self.state, |_| OpenSent { stream }); 125 | info!("accepting BGP connection from {addr}"); 126 | Ok(()) 127 | } 128 | 129 | pub async fn process(&mut self) -> Result<()> { 130 | let result = self.process_inner().await; 131 | if result.is_err() { 132 | self.state = Active; 133 | self.routes.withdraw_all().await; 134 | } 135 | result 136 | } 137 | 138 | async fn process_inner(&mut self) -> Result<()> { 139 | fn bad_type<'a>(msg: Message, stream: &'a mut (impl AsyncWrite + Unpin)) -> impl Future> + 'a { 140 | BadType(msg.kind() as u8).send_and_return(stream) 141 | } 142 | 143 | match &mut self.state { 144 | Idle | Connect | Active => pending().await, 145 | OpenSent { stream } => match Message::read(stream).await? { 146 | Message::Open(remote_open) => { 147 | if !remote_open.bgp_mp.contains(&(Afi::Ipv4 as _, NlriKind::Flow as _)) 148 | && !remote_open.bgp_mp.contains(&(Afi::Ipv6 as _, NlriKind::Flow as _)) 149 | { 150 | warn!("remote does not seem to support flowspec, is it enabled?"); 151 | } 152 | if !remote_open.supports_4b_asn { 153 | error!("remote does not support 4-octet AS number"); 154 | Unspecific.send_and_return(stream).await?; 155 | } else if self.config.remote_as.is_some_and(|x| remote_open.my_as != x) { 156 | BadPeerAs.send_and_return(stream).await?; 157 | } else if remote_open.hold_time == 1 || remote_open.hold_time == 2 { 158 | UnacceptableHoldTime.send_and_return(stream).await?; 159 | } else { 160 | Message::Keepalive.send(stream).await?; 161 | replace_with_or_abort(&mut self.state, |this| { 162 | let OpenSent { stream } = this else { unreachable!() }; 163 | let hold_time = min(self.config.hold_time, remote_open.hold_time); 164 | let timers = Timers::new(hold_time); 165 | OpenConfirm { stream, remote_open, timers } 166 | }); 167 | } 168 | } 169 | other => bad_type(other, stream).await?, 170 | }, 171 | OpenConfirm { stream, timers, .. } => select! { 172 | msg = Message::read(stream) => match msg? { 173 | Message::Keepalive => { 174 | replace_with_or_abort(&mut self.state, |this| { 175 | let OpenConfirm { stream, remote_open, mut timers } = this else { 176 | unreachable!() 177 | }; 178 | timers.as_mut().map(Timers::update_hold); 179 | Established { stream, remote_open, timers } 180 | }); 181 | info!("established"); 182 | } 183 | other => bad_type(other, stream).await?, 184 | }, 185 | inst = timers.as_mut().unwrap().tick(), if timers.is_some() => { 186 | timers.as_mut().unwrap().process_tick(inst, stream).await?; 187 | } 188 | }, 189 | Established { stream, timers, .. } => select! { 190 | msg = Message::read(stream) => { 191 | match msg { 192 | Ok(Message::Update(msg)) => if let Some((afi, safi)) = msg.is_end_of_rib() { 193 | debug!("received End-of-RIB of ({afi}, {safi:?})"); 194 | #[cfg(test)] 195 | let _ = self.event_tx.send(TestEvent::EndOfRib(afi, safi)).await; 196 | 197 | } else { 198 | debug!("received update: {msg:?}"); 199 | #[cfg(test)] 200 | let _ = self.event_tx.send(TestEvent::Update(msg.clone())).await; 201 | 202 | // here `msg` is partially moved 203 | if msg.nlri.is_some() || msg.old_nlri.is_some() { 204 | let route_info = Rc::new(msg.route_info); 205 | for n in msg.nlri.into_iter().chain(msg.old_nlri) { 206 | self.routes.commit(n, route_info.clone()).await; 207 | } 208 | } 209 | if msg.withdrawn.is_some() || msg.old_withdrawn.is_some() { 210 | for n in msg.withdrawn.into_iter().chain(msg.old_withdrawn) { 211 | self.routes.withdraw(n).await; 212 | } 213 | } 214 | }, 215 | Err(Error::Withdraw(error, nlris)) => { 216 | error!("{error}"); 217 | for n in nlris { 218 | self.routes.withdraw(n).await; 219 | } 220 | }, 221 | Ok(Message::Keepalive) => timers.as_mut().map(Timers::update_hold).unwrap_or(()), 222 | other => bad_type(other?, stream).await?, 223 | }; 224 | } 225 | inst = timers.as_mut().unwrap().tick(), if timers.is_some() => { 226 | timers.as_mut().unwrap().process_tick(inst, stream).await?; 227 | } 228 | result = self.routes.process() => result?, 229 | }, 230 | } 231 | Ok(()) 232 | } 233 | 234 | pub async fn terminate(&mut self) { 235 | self.routes.terminate().await; 236 | } 237 | } 238 | 239 | #[derive(Debug)] 240 | pub enum State { 241 | Idle, 242 | Connect, // never used in passive mode 243 | Active, 244 | OpenSent { stream: S }, 245 | OpenConfirm { stream: S, remote_open: OpenMessage<'static>, timers: Option }, 246 | Established { stream: S, remote_open: OpenMessage<'static>, timers: Option }, 247 | } 248 | 249 | impl State { 250 | pub fn kind(&self) -> StateKind { 251 | match self { 252 | Idle => StateKind::Idle, 253 | Connect => StateKind::Connect, 254 | Active => StateKind::Active, 255 | OpenSent { .. } => StateKind::OpenSent, 256 | OpenConfirm { .. } => StateKind::OpenConfirm, 257 | Established { .. } => StateKind::Established, 258 | } 259 | } 260 | } 261 | 262 | impl State> { 263 | pub fn view(&self) -> StateView { 264 | match self { 265 | Idle => StateView::Idle, 266 | Connect => StateView::Connect, 267 | Active => StateView::Active, 268 | OpenSent { .. } => StateView::OpenSent, 269 | OpenConfirm { stream, remote_open, .. } => StateView::OpenConfirm { 270 | remote_open: Cow::Borrowed(remote_open), 271 | local_addr: stream.get_ref().local_addr().ok(), 272 | remote_addr: stream.get_ref().peer_addr().ok(), 273 | }, 274 | Established { stream, remote_open, .. } => StateView::Established { 275 | remote_open: Cow::Borrowed(remote_open), 276 | local_addr: stream.get_ref().local_addr().ok(), 277 | remote_addr: stream.get_ref().peer_addr().ok(), 278 | }, 279 | } 280 | } 281 | } 282 | 283 | #[derive(Debug)] 284 | pub struct Timers { 285 | clock: Interval, 286 | hold_timer: (Duration, Instant), 287 | keepalive_timer: (Duration, Instant), 288 | } 289 | 290 | impl Timers { 291 | pub fn new(hold_time: u16) -> Option { 292 | (hold_time != 0).then(|| { 293 | let now = Instant::now(); 294 | let keepalive_time = hold_time / 3; 295 | let hold = Duration::from_secs(hold_time.into()); 296 | let keepalive = Duration::from_secs(keepalive_time.into()); 297 | Self { 298 | clock: interval(Duration::from_secs(u64::from(gcd(hold_time, keepalive_time) / 2))), 299 | hold_timer: (hold, now + hold), 300 | keepalive_timer: (keepalive, now + keepalive), 301 | } 302 | }) 303 | } 304 | 305 | pub fn update_hold(&mut self) { 306 | let (dur, next) = &mut self.hold_timer; 307 | *next = Instant::now() + *dur; 308 | } 309 | 310 | pub async fn tick(&mut self) -> Instant { 311 | self.clock.tick().await 312 | } 313 | 314 | pub async fn process_tick(&mut self, inst: Instant, stream: &mut (impl AsyncWrite + Unpin)) -> Result<()> { 315 | if self.hold_timer.1 <= inst { 316 | Notification::HoldTimerExpired.send_and_return(stream).await?; 317 | } 318 | if self.keepalive_timer.1 <= inst { 319 | Message::Keepalive.send(stream).await?; 320 | } 321 | Ok(()) 322 | } 323 | } 324 | 325 | #[derive(Debug, Clone, EnumDiscriminants, Serialize, Deserialize)] 326 | #[strum_discriminants(name(StateKind))] 327 | pub enum StateView<'a> { 328 | Idle, 329 | Connect, 330 | Active, 331 | OpenSent, 332 | OpenConfirm { 333 | remote_open: Cow<'a, OpenMessage<'a>>, 334 | local_addr: Option, 335 | remote_addr: Option, 336 | }, 337 | Established { 338 | remote_open: Cow<'a, OpenMessage<'a>>, 339 | local_addr: Option, 340 | remote_addr: Option, 341 | }, 342 | } 343 | 344 | impl StateView<'_> { 345 | pub fn kind(&self) -> StateKind { 346 | self.into() 347 | } 348 | } 349 | 350 | // Utilities 351 | 352 | #[inline] 353 | fn extend_with_u8_len)>(buf: &mut Vec, extend: F) { 354 | let len_pos = buf.len(); 355 | buf.push(0); 356 | extend(buf); 357 | let len = buf.len() - len_pos - 1; 358 | buf[len_pos] = len.try_into().expect("length should fit in u8"); 359 | } 360 | 361 | #[inline] 362 | fn extend_with_u16_len)>(buf: &mut Vec, extend: F) { 363 | let len_pos = buf.len(); 364 | buf.extend([0; 2]); 365 | extend(buf); 366 | let len = u16::try_from(buf.len() - len_pos - 2).expect(""); 367 | buf[len_pos..len_pos + 2].copy_from_slice(&len.to_be_bytes()) 368 | } 369 | 370 | #[derive(Debug, Error)] 371 | pub enum Error { 372 | #[error("address {0} not acceptable")] 373 | UnacceptableAddr(IpAddr), 374 | #[error("session is already running")] 375 | AlreadyRunning, 376 | 377 | #[error(transparent)] 378 | Notification(#[from] Notification<'static>), 379 | #[error("remote said: {0}")] 380 | Remote(Notification<'static>), 381 | #[error("withdraw {}: {}", print_withdraw(.1.iter()), .0)] 382 | Withdraw(UpdateError<'static>, SmallVec<[Nlri; 1]>), 383 | 384 | #[error(transparent)] 385 | Io(#[from] io::Error), 386 | #[error(transparent)] 387 | IpPrefix(IpPrefixError), 388 | #[error(transparent)] 389 | Flow(#[from] FlowError), 390 | #[error(transparent)] 391 | Nlri(#[from] NlriError), 392 | 393 | #[error(transparent)] 394 | Kernel(#[from] kernel::Error), 395 | } 396 | 397 | impl From for Error { 398 | fn from(e: IpPrefixError) -> Self { 399 | match e.kind { 400 | IpPrefixErrorKind::Io(e) => Self::Io(e), 401 | _ => Self::IpPrefix(e), 402 | } 403 | } 404 | } 405 | 406 | fn print_withdraw<'a>(nlris: impl Iterator + 'a) -> impl Display + 'a { 407 | use Either::*; 408 | use NlriContent::*; 409 | nlris 410 | .flat_map(|x| match &x.content { 411 | Unicast { prefixes, .. } => Left(prefixes.iter().map(|y| Box::new(y) as Box)), 412 | Flow { specs } => Right(specs.iter().map(|y| Box::new(y) as Box)), 413 | }) 414 | .format(", ") 415 | } 416 | 417 | pub type Result = std::result::Result; 418 | -------------------------------------------------------------------------------- /src/bgp/nlri.rs: -------------------------------------------------------------------------------- 1 | //! Network Layer Reachability Information (NLRI). 2 | 3 | use super::flow::Flowspec; 4 | use super::msg::{PF_EXT_LEN, PF_OPTIONAL, PathAttr}; 5 | use super::{Result, extend_with_u16_len}; 6 | use crate::net::{Afi, IpPrefix}; 7 | use serde::{Deserialize, Serialize}; 8 | use smallvec::SmallVec; 9 | use std::collections::BTreeSet; 10 | use std::fmt::{self, Display, Formatter}; 11 | use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; 12 | use strum::{EnumDiscriminants, FromRepr}; 13 | use thiserror::Error; 14 | use tokio::io::{AsyncRead, AsyncReadExt}; 15 | 16 | /// Network Layer Reachability Information (NLRI). 17 | #[derive(Debug, Clone, PartialEq, Eq)] 18 | pub struct Nlri { 19 | pub afi: Afi, 20 | pub content: NlriContent, 21 | } 22 | 23 | /// NLRI contents, defined by (AFI, SAFI) tuple. 24 | #[derive(Debug, Clone, PartialEq, Eq, EnumDiscriminants)] 25 | #[strum_discriminants(name(NlriKind), derive(PartialOrd, Ord, FromRepr, Serialize, Deserialize))] 26 | #[repr(u8)] 27 | pub enum NlriContent { 28 | // We can probably use LPM trie, but B-tree is fine for now 29 | Unicast { prefixes: BTreeSet, next_hop: NextHop } = 1, 30 | Flow { specs: SmallVec<[Flowspec; 4]> } = 133, 31 | } 32 | 33 | impl Nlri { 34 | pub fn new_route(afi: Afi, prefixes: BTreeSet, next_hop: Option) -> Result { 35 | for prefix in &prefixes { 36 | if prefix.afi() != afi { 37 | return Err(NlriError::MultipleAddrFamilies(afi)); 38 | } 39 | } 40 | let next_hop = match next_hop { 41 | Some(next_hop) => next_hop, 42 | _ if afi == Afi::Ipv6 => NextHop::V6(Ipv6Addr::UNSPECIFIED, None), 43 | _ => NextHop::V4(Ipv4Addr::UNSPECIFIED), 44 | }; 45 | Ok(Self { afi, content: NlriContent::Unicast { prefixes, next_hop } }) 46 | } 47 | 48 | pub fn new_flow(afi: Afi, specs: SmallVec<[Flowspec; 4]>) -> Result { 49 | for spec in &specs { 50 | if spec.afi() != afi { 51 | return Err(NlriError::MultipleAddrFamilies(afi)); 52 | } 53 | } 54 | Ok(Self { afi, content: NlriContent::Flow { specs } }) 55 | } 56 | 57 | pub fn into_unicast(self) -> Option<(BTreeSet, NextHop)> { 58 | match self.content { 59 | NlriContent::Unicast { prefixes, next_hop } => Some((prefixes, next_hop)), 60 | _ => None, 61 | } 62 | } 63 | 64 | pub fn into_flow(self) -> Option> { 65 | match self.content { 66 | NlriContent::Flow { specs } => Some(specs), 67 | _ => None, 68 | } 69 | } 70 | 71 | pub fn write_mp_reach(&self, buf: &mut Vec) { 72 | self.write_mp(buf, true); 73 | } 74 | pub fn write_mp_unreach(&self, buf: &mut Vec) { 75 | self.write_mp(buf, false); 76 | } 77 | pub fn write_mp(&self, buf: &mut Vec, reach: bool) { 78 | buf.extend([ 79 | PF_OPTIONAL | PF_EXT_LEN, 80 | if reach { 81 | PathAttr::MpReachNlri 82 | } else { 83 | PathAttr::MpUnreachNlri 84 | } as u8, 85 | ]); 86 | buf.extend(u16::to_be_bytes(self.afi as _)); 87 | match &self.content { 88 | NlriContent::Unicast { prefixes, next_hop } => { 89 | extend_with_u16_len(buf, |buf| { 90 | buf.push(NlriKind::Unicast as u8); 91 | if reach { 92 | next_hop.write_mp(buf); 93 | buf.push(0); // reserved 94 | } 95 | prefixes.iter().for_each(|p| p.write(buf)); 96 | }); 97 | } 98 | NlriContent::Flow { specs } => { 99 | extend_with_u16_len(buf, |buf| { 100 | buf.push(NlriKind::Flow as u8); 101 | if reach { 102 | buf.extend([0; 2]); // null next hop, reserved 103 | } 104 | specs.iter().for_each(|s| s.write(buf)); 105 | }); 106 | } 107 | } 108 | } 109 | 110 | pub async fn read_mp_reach(reader: &mut R) -> Result { 111 | Self::read_mp(reader, true).await 112 | } 113 | pub async fn read_mp_unreach(reader: &mut R) -> Result { 114 | Self::read_mp(reader, false).await 115 | } 116 | pub async fn read_mp(reader: &mut R, reach: bool) -> Result { 117 | let afi = reader.read_u16().await?; 118 | let safi = reader.read_u8().await?; 119 | let next_hop; 120 | if reach { 121 | next_hop = NextHop::read_mp(reader).await?; 122 | let _reserved = reader.read_u8().await?; 123 | } else { 124 | next_hop = (safi != NlriKind::Flow as u8).then_some(NextHop::V6(Ipv6Addr::UNSPECIFIED, None)); 125 | }; 126 | match (Afi::from_repr(afi), NlriKind::from_repr(safi), next_hop) { 127 | (Some(afi @ Afi::Ipv4), Some(NlriKind::Unicast), Some(next_hop)) 128 | | (Some(afi @ Afi::Ipv6), Some(NlriKind::Unicast), Some(next_hop @ NextHop::V6(..))) => { 129 | let mut prefixes = BTreeSet::new(); 130 | while let Some((prefix, _)) = IpPrefix::read(reader, afi).await? { 131 | prefixes.insert(prefix); 132 | } 133 | Ok(Self::new_route(afi, prefixes, Some(next_hop))?) 134 | } 135 | (Some(afi), Some(NlriKind::Flow), None) => { 136 | let mut specs = SmallVec::new_const(); 137 | while let Some(spec) = Flowspec::read(reader, afi).await? { 138 | specs.push(spec); 139 | } 140 | Ok(Self::new_flow(afi, specs)?) 141 | } 142 | (Some(afi), Some(kind @ NlriKind::Unicast), None) | (Some(afi), Some(kind @ NlriKind::Flow), Some(_)) => { 143 | Err(NlriError::InvalidNextHop { afi, kind, next_hop }.into()) 144 | } 145 | _ => Err(NlriError::UnknownTuple(afi, safi).into()), 146 | } 147 | } 148 | } 149 | 150 | /// Next hop address. 151 | /// 152 | /// IPv6 next hop address may include a link-local IPv6 address. 153 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] 154 | pub enum NextHop { 155 | V4(Ipv4Addr), 156 | V6(Ipv6Addr, Option), 157 | } 158 | 159 | impl NextHop { 160 | fn write_mp(&self, buf: &mut Vec) { 161 | match self { 162 | Self::V4(x) => { 163 | buf.push(4); 164 | buf.extend(x.octets()); 165 | } 166 | Self::V6(x, Some(y)) => { 167 | buf.push(32); 168 | buf.extend(x.octets()); 169 | buf.extend(y.octets()); 170 | } 171 | Self::V6(x, None) => { 172 | buf.push(16); 173 | buf.extend(x.octets()); 174 | } 175 | } 176 | } 177 | 178 | async fn read_mp(reader: &mut R) -> Result> { 179 | let len = reader.read_u8().await?; 180 | match len { 181 | 0 => Ok(None), 182 | 4 => Ok(Some(Self::V4(reader.read_u32().await?.into()))), 183 | 16 => Ok(Some(Self::V6(reader.read_u128().await?.into(), None))), 184 | 32 => Ok(Some(Self::V6( 185 | reader.read_u128().await?.into(), 186 | Some(reader.read_u128().await?.into()), 187 | ))), 188 | _ => Err(NlriError::InvalidNextHopLen(len).into()), 189 | } 190 | } 191 | } 192 | 193 | impl Display for NextHop { 194 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 195 | match self { 196 | NextHop::V4(ip) => Display::fmt(ip, f), 197 | NextHop::V6(ip, None) => Display::fmt(ip, f), 198 | NextHop::V6(ip, Some(ll)) => write!(f, "{ip} ({ll})"), 199 | } 200 | } 201 | } 202 | 203 | impl From for NextHop { 204 | fn from(ip: IpAddr) -> Self { 205 | match ip { 206 | IpAddr::V4(ip) => ip.into(), 207 | IpAddr::V6(ip) => ip.into(), 208 | } 209 | } 210 | } 211 | 212 | impl From for NextHop { 213 | fn from(ip: Ipv4Addr) -> Self { 214 | Self::V4(ip) 215 | } 216 | } 217 | 218 | impl From for NextHop { 219 | fn from(ip: Ipv6Addr) -> Self { 220 | if let [0xfe80, ..] = ip.segments() { 221 | Self::V6([0; 8].into(), Some(ip)) 222 | } else { 223 | Self::V6(ip, None) 224 | } 225 | } 226 | } 227 | 228 | impl From<[Ipv6Addr; 2]> for NextHop { 229 | fn from(ips: [Ipv6Addr; 2]) -> Self { 230 | Self::V6(ips[0], Some(ips[1])) 231 | } 232 | } 233 | 234 | #[derive(Debug, Clone, Error)] 235 | pub enum NlriError { 236 | #[error("{0} NLRI contains {n} information", n = if *.0 == Afi::Ipv6 { Afi::Ipv4 } else { Afi::Ipv6 })] 237 | MultipleAddrFamilies(Afi), 238 | 239 | #[error("NLRI ({afi}, {kind:?}) contains invalid next hop: {next_hop:?}")] 240 | InvalidNextHop { afi: Afi, kind: NlriKind, next_hop: Option }, 241 | 242 | #[error("invalid next hop length: {0}")] 243 | InvalidNextHopLen(u8), 244 | 245 | #[error("unknown (AFI, SAFI) tuple: ({0}, {1})")] 246 | UnknownTuple(u16, u8), 247 | } 248 | -------------------------------------------------------------------------------- /src/bgp/route.rs: -------------------------------------------------------------------------------- 1 | use super::flow::Flowspec; 2 | use super::nlri::{NextHop, Nlri, NlriContent}; 3 | use crate::kernel::{self, Kernel, KernelAdapter, KernelHandle}; 4 | use crate::net::IpPrefix; 5 | use crate::util::{BOLD, FG_BLUE_BOLD, FG_GREEN_BOLD, MaybeRc, RESET}; 6 | use either::Either; 7 | use itertools::Itertools; 8 | use log::{Level, LevelFilter, warn}; 9 | use serde::{Deserialize, Serialize}; 10 | use smallvec::SmallVec; 11 | use std::borrow::Cow; 12 | use std::collections::btree_map::Entry; 13 | use std::collections::{BTreeMap, BTreeSet}; 14 | use std::fmt::{self, Debug, Display, Formatter}; 15 | use std::mem::swap; 16 | use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; 17 | use std::rc::Rc; 18 | use strum::{EnumDiscriminants, FromRepr}; 19 | 20 | /// Route storage for a session. 21 | #[derive(Debug, Serialize, Deserialize)] 22 | pub struct Routes { 23 | unicast: BTreeMap>)>, 24 | flow: BTreeMap>)>, 25 | kernel: KernelAdapter, 26 | } 27 | 28 | impl Routes { 29 | pub fn new(kernel: KernelAdapter) -> Self { 30 | Self { unicast: BTreeMap::new(), flow: BTreeMap::new(), kernel } 31 | } 32 | 33 | pub async fn commit(&mut self, nlri: Nlri, info: Rc>) { 34 | match nlri.content { 35 | NlriContent::Unicast { prefixes, next_hop } => self 36 | .unicast 37 | .extend(prefixes.into_iter().map(|p| (p, (next_hop, MaybeRc::Rc(info.clone()))))), 38 | NlriContent::Flow { specs } => { 39 | for spec in specs { 40 | let before = self.flow.range(&spec..).next().map(|(_, (handle, _))| handle); 41 | let id = match self.kernel.apply(&spec, before, &info).await { 42 | Ok(id) => id, 43 | Err(error) => { 44 | warn!("flowspec {spec} rejected: {error}"); 45 | self.withdraw_spec(spec).await; 46 | continue; 47 | } 48 | }; 49 | match self.flow.entry(spec) { 50 | Entry::Vacant(e) => { 51 | e.insert((id, MaybeRc::Rc(info.clone()))); 52 | } 53 | Entry::Occupied(mut e) => { 54 | let (id, _) = e.insert((id, MaybeRc::Rc(info.clone()))); 55 | self.kernel.remove(&id).await; 56 | } 57 | } 58 | } 59 | } 60 | } 61 | } 62 | 63 | pub async fn withdraw(&mut self, nlri: Nlri) { 64 | match nlri.content { 65 | NlriContent::Unicast { prefixes, .. } => prefixes 66 | .into_iter() 67 | .for_each(|p| self.unicast.remove(&p).map(|_| ()).unwrap_or(())), 68 | NlriContent::Flow { specs } => { 69 | for s in specs { 70 | self.withdraw_spec(s).await; 71 | } 72 | } 73 | } 74 | } 75 | 76 | pub async fn withdraw_all(&mut self) { 77 | self.unicast.clear(); 78 | let mut flow = BTreeMap::new(); 79 | swap(&mut flow, &mut self.flow); 80 | for (_, (handle, _)) in flow { 81 | self.kernel.remove(&handle).await; 82 | } 83 | } 84 | 85 | async fn withdraw_spec(&mut self, spec: Flowspec) { 86 | let Some((handle, _)) = self.flow.remove(&spec) else { 87 | return; 88 | }; 89 | self.kernel.remove(&handle).await; 90 | } 91 | 92 | pub async fn process(&mut self) -> kernel::Result<()> { 93 | self.kernel.process().await 94 | } 95 | 96 | pub fn print(&self, verbosity: LevelFilter) { 97 | fn print_info(info: &RouteInfo) { 98 | println!(" {BOLD}Origin:{RESET} {}", info.origin); 99 | if !info.as_path.is_empty() { 100 | println!(" {BOLD}AS Path:{RESET} {}", info.as_path.iter().format(", ")); 101 | } 102 | if !info.comm.is_empty() { 103 | println!(" {BOLD}Communities:{RESET} {}", info.comm.iter().format(", ")); 104 | } 105 | if !info.ext_comm.is_empty() { 106 | println!( 107 | " {BOLD}Extended Communities:{RESET} {}", 108 | info.ext_comm.iter().format(", "), 109 | ); 110 | } 111 | if !info.ipv6_ext_comm.is_empty() { 112 | println!( 113 | " {BOLD}IPv6 Specific Extended Communities:{RESET} {}", 114 | info.ipv6_ext_comm.iter().format(", "), 115 | ); 116 | } 117 | if !info.large_comm.is_empty() { 118 | println!( 119 | " {BOLD}Large Communities:{RESET} {}", 120 | info.large_comm.iter().format(", "), 121 | ); 122 | } 123 | } 124 | 125 | if verbosity >= Level::Debug { 126 | for (prefix, (next_hop, info)) in &self.unicast { 127 | println!("{FG_BLUE_BOLD}Unicast{RESET} {prefix}"); 128 | println!(" {BOLD}Next Hop:{RESET} {next_hop}"); 129 | print_info(info); 130 | println!(); 131 | } 132 | } 133 | 134 | for (spec, (index, info)) in &self.flow { 135 | println!("{FG_GREEN_BOLD}Flowspec{RESET} {spec}"); 136 | if verbosity >= Level::Debug { 137 | println!(" {BOLD}Kernel Rule ID:{RESET} {index}"); 138 | print!(" {BOLD}Hex Representation:{RESET} "); 139 | let mut buf = Vec::new(); 140 | spec.write(&mut buf); 141 | println!("{:02x}", buf.iter().format("")); 142 | } 143 | print_info(info); 144 | println!(); 145 | } 146 | } 147 | 148 | pub async fn terminate(&mut self) { 149 | let mut kernel = KernelAdapter::Noop; 150 | std::mem::swap(&mut kernel, &mut self.kernel); 151 | kernel.terminate().await; 152 | } 153 | } 154 | 155 | #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] 156 | pub struct RouteInfo<'a> { 157 | pub origin: Origin, 158 | 159 | /// AS path, stored in reverse for easy prepending. 160 | pub as_path: SmallVec<[AsSegment; 1]>, 161 | 162 | pub comm: BTreeSet, 163 | pub ext_comm: BTreeSet, 164 | pub ipv6_ext_comm: BTreeSet, 165 | pub large_comm: BTreeSet, 166 | 167 | pub med: Option, 168 | pub local_pref: Option, 169 | pub atomic_aggregate: bool, 170 | pub aggregator: Option<(u16, u32)>, 171 | 172 | /// Transitive but unrecognized path attributes. 173 | pub other_attrs: BTreeMap>, 174 | } 175 | 176 | impl RouteInfo<'_> { 177 | pub fn is_empty(&self) -> bool { 178 | self.as_path.is_empty() 179 | && self.comm.is_empty() 180 | && self.ext_comm.is_empty() 181 | && self.ipv6_ext_comm.is_empty() 182 | && self.large_comm.is_empty() 183 | && self.other_attrs.is_empty() 184 | } 185 | } 186 | 187 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, FromRepr, Serialize, Deserialize)] 188 | #[repr(u8)] 189 | pub enum Origin { 190 | Igp = 0, 191 | Egp = 1, 192 | Incomplete = 2, 193 | } 194 | 195 | impl Display for Origin { 196 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 197 | match self { 198 | Self::Igp => f.write_str("IGP"), 199 | Self::Egp => f.write_str("EGP"), 200 | Self::Incomplete => f.write_str("incomplete"), 201 | } 202 | } 203 | } 204 | 205 | impl Default for Origin { 206 | fn default() -> Self { 207 | Self::Incomplete 208 | } 209 | } 210 | 211 | #[derive(Debug, Clone, EnumDiscriminants, PartialEq, Eq, Serialize, Deserialize)] 212 | #[strum_discriminants(name(AsSegmentKind), derive(FromRepr))] 213 | #[repr(u8)] 214 | pub enum AsSegment { 215 | Set(BTreeSet) = 1, 216 | Sequence(SmallVec<[u32; 4]>) = 2, 217 | ConfedSequence(SmallVec<[u32; 4]>) = 3, 218 | ConfedSet(BTreeSet) = 4, 219 | } 220 | 221 | impl AsSegment { 222 | pub fn kind(&self) -> AsSegmentKind { 223 | self.into() 224 | } 225 | 226 | pub fn write(&self, buf: &mut Vec) { 227 | buf.extend([self.kind() as u8, self.as_count()]); 228 | buf.extend(self.iter().flat_map(u32::to_be_bytes)); 229 | } 230 | 231 | pub fn as_count(&self) -> u8 { 232 | match self { 233 | Self::Set(set) | Self::ConfedSet(set) => set.len().try_into().unwrap(), 234 | Self::Sequence(seq) | Self::ConfedSequence(seq) => seq.len().try_into().unwrap(), 235 | } 236 | } 237 | 238 | pub fn iter(&self) -> impl Iterator + '_ { 239 | match self { 240 | Self::Set(set) | Self::ConfedSet(set) => Either::Left(set.iter().copied()), 241 | Self::Sequence(seq) | Self::ConfedSequence(seq) => Either::Right(seq.iter().copied()), 242 | } 243 | } 244 | 245 | pub fn bytes_len(&self) -> u16 { 246 | (self.as_count() * 4 + 2).into() 247 | } 248 | } 249 | 250 | impl Display for AsSegment { 251 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 252 | match self { 253 | Self::Set(set) => Debug::fmt(set, f), 254 | Self::Sequence(seq) => Display::fmt(&seq.iter().format(", "), f), 255 | Self::ConfedSequence(seq) => write!(f, "Confed {seq:?}"), 256 | Self::ConfedSet(set) => write!(f, "Confed {set:?}"), 257 | } 258 | } 259 | } 260 | 261 | /// RFC 1997 communities. 262 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 263 | pub struct Community(pub [u16; 2]); 264 | 265 | impl Community { 266 | pub fn from_bytes(bytes: [u8; 4]) -> Self { 267 | Self([ 268 | u16::from_be_bytes([bytes[0], bytes[1]]), 269 | u16::from_be_bytes([bytes[2], bytes[3]]), 270 | ]) 271 | } 272 | } 273 | 274 | impl Debug for Community { 275 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 276 | Display::fmt(self, f) 277 | } 278 | } 279 | 280 | impl Display for Community { 281 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 282 | write!(f, "({}, {})", self.0[0], self.0[1]) 283 | } 284 | } 285 | 286 | /// RFC 4360/5668 extended communities. 287 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 288 | pub struct ExtCommunity(pub [u8; 8]); 289 | 290 | impl ExtCommunity { 291 | pub fn from_bytes(bytes: [u8; 8]) -> Self { 292 | Self(bytes) 293 | } 294 | 295 | pub fn new_as(transitive: bool, sub_kind: u8, asn: u32, local_admin: u32) -> Self { 296 | let mut bytes = [0; 8]; 297 | bytes[1] = sub_kind; 298 | if asn > u16::MAX.into() { 299 | bytes[0] = 2; 300 | let local_admin = local_admin 301 | .try_into() 302 | .expect("4b ASN extended community can only contain 16-bit local admin value"); 303 | bytes[2..6].copy_from_slice(&u32::to_be_bytes(asn)); 304 | bytes[6..8].copy_from_slice(&u16::to_be_bytes(local_admin)); 305 | } else { 306 | bytes[0] = 0; 307 | bytes[2..4].copy_from_slice(&u16::to_be_bytes(asn as _)); 308 | bytes[4..8].copy_from_slice(&u32::to_be_bytes(local_admin)); 309 | } 310 | bytes[0] |= 1 << 7; 311 | if !transitive { 312 | bytes[0] |= 1 << 6; 313 | } 314 | Self(bytes) 315 | } 316 | 317 | pub fn new_ipv4(transitive: bool, sub_kind: u8, ipv4: Ipv4Addr, local_admin: u16) -> Self { 318 | let mut bytes = [0; 8]; 319 | bytes[0] = 1 | (1 << 7); 320 | bytes[1] = sub_kind; 321 | bytes[2..6].copy_from_slice(&ipv4.octets()); 322 | bytes[6..8].copy_from_slice(&u16::to_be_bytes(local_admin)); 323 | if !transitive { 324 | bytes[0] |= 1 << 6; 325 | } 326 | Self(bytes) 327 | } 328 | 329 | pub fn iana_authority(self) -> bool { 330 | self.0[0] & (1 << 7) != 0 331 | } 332 | pub fn is_transitive(self) -> bool { 333 | self.0[0] & (1 << 6) == 0 334 | } 335 | pub fn kind(self) -> u8 { 336 | self.0[0] 337 | } 338 | pub fn kind_struct(self) -> u8 { 339 | self.0[0] & 0b111111 340 | } 341 | pub fn sub_kind(self) -> u8 { 342 | self.0[1] 343 | } 344 | 345 | pub fn admins(self) -> Option<(GlobalAdmin, u32)> { 346 | use GlobalAdmin::*; 347 | match self.kind_struct() { 348 | 0 => Some(( 349 | As(u16::from_be_bytes(self.0[2..4].try_into().unwrap()).into()), 350 | u32::from_be_bytes(self.0[4..8].try_into().unwrap()), 351 | )), 352 | 1 => Some(( 353 | Ipv4(u32::from_be_bytes(self.0[2..6].try_into().unwrap()).into()), 354 | u16::from_be_bytes(self.0[6..8].try_into().unwrap()).into(), 355 | )), 356 | 2 => Some(( 357 | As(u32::from_be_bytes(self.0[2..6].try_into().unwrap())), 358 | u16::from_be_bytes(self.0[6..8].try_into().unwrap()).into(), 359 | )), 360 | _ => None, 361 | } 362 | } 363 | pub fn global_admin(self) -> Option { 364 | self.admins().map(|(g, _)| g) 365 | } 366 | pub fn local_admin(self) -> Option { 367 | self.admins().map(|(_, l)| l) 368 | } 369 | 370 | pub fn opaque_value(self) -> Option { 371 | (self.kind_struct() == 3).then(|| { 372 | let mut bytes = self.0; 373 | bytes[0..2].copy_from_slice(&[0; 2]); 374 | u64::from_be_bytes(bytes) 375 | }) 376 | } 377 | 378 | pub fn action(self) -> Option { 379 | use GlobalAdmin::*; 380 | use TrafficFilterAction::*; 381 | if !self.is_transitive() { 382 | return None; 383 | } 384 | let (g, l) = self.admins()?; 385 | let result = match (self.iana_authority(), g, self.sub_kind()) { 386 | (true, As(desc), 0x06) => TrafficRateBytes { desc: desc as u16, rate: f32::from_bits(l) }, 387 | (true, As(desc), 0x0c) => TrafficRatePackets { desc: desc as u16, rate: f32::from_bits(l) }, 388 | (true, As(_), 0x07) => TrafficAction { terminal: l & 1 == 0, sample: l & (1 << 1) != 0 }, 389 | (true, _, 0x08) => RtRedirect { rt: g, value: l }, 390 | (true, As(_), 0x09) => TrafficMarking { dscp: (l as u8) & 0b111111 }, 391 | (false, Ipv4(ip), 0x0c) => RedirectToIp { ip: ip.into(), copy: l & 1 != 0 }, 392 | _ => return None, 393 | }; 394 | Some(result) 395 | } 396 | } 397 | 398 | impl Debug for ExtCommunity { 399 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 400 | Display::fmt(self, f) 401 | } 402 | } 403 | 404 | impl Display for ExtCommunity { 405 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 406 | if let Some(act) = self.action() { 407 | Display::fmt(&act, f) 408 | } else if let Some((g, l)) = self.admins() { 409 | match [self.kind(), self.sub_kind()] { 410 | [0x00..=0x02, 0x02] => f.write_str("(rt, ")?, 411 | [0x00..=0x02, 0x03] => f.write_str("(ro, ")?, 412 | bytes => write!(f, "({:#06x}, ", u16::from_be_bytes(bytes))?, 413 | } 414 | if l > u16::MAX.into() { 415 | write!(f, "{g}, {l:#010x})") 416 | } else { 417 | write!(f, "{g}, {l:#06x})") 418 | } 419 | } else if let Some(val) = self.opaque_value() { 420 | let kind = u16::from_be_bytes([self.kind(), self.sub_kind()]); 421 | write!(f, "({kind:#06x}, {val:#014x})") 422 | } else { 423 | write!(f, "({:#018x})", u64::from_be_bytes(self.0)) 424 | } 425 | } 426 | } 427 | 428 | /// RFC 5701 IPv6 address-specific extended communities. 429 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 430 | pub struct Ipv6ExtCommunity { 431 | pub kind: u8, 432 | pub sub_kind: u8, 433 | pub global_admin: Ipv6Addr, 434 | pub local_admin: u16, 435 | } 436 | 437 | impl Ipv6ExtCommunity { 438 | pub fn new(transitive: bool, sub_kind: u8, global_admin: Ipv6Addr, local_admin: u16) -> Self { 439 | Self { kind: if transitive { 0 } else { 1 << 6 }, sub_kind, global_admin, local_admin } 440 | } 441 | 442 | pub fn from_bytes(bytes: [u8; 20]) -> Option { 443 | if bytes[0] == 0x00 || bytes[0] == 0x40 { 444 | Some(Self { 445 | kind: bytes[0], 446 | sub_kind: bytes[1], 447 | global_admin: Ipv6Addr::from(<[u8; 16]>::try_from(&bytes[2..18]).unwrap()), 448 | local_admin: u16::from_be_bytes(bytes[18..20].try_into().unwrap()), 449 | }) 450 | } else { 451 | None 452 | } 453 | } 454 | 455 | pub fn iana_authority(self) -> bool { 456 | self.kind & (1 << 7) != 0 457 | } 458 | pub fn is_transitive(self) -> bool { 459 | self.kind & (1 << 6) == 0 460 | } 461 | pub fn kind_struct(self) -> u8 { 462 | self.kind & 0b111111 463 | } 464 | 465 | pub fn action(self) -> Option { 466 | use TrafficFilterAction::*; 467 | let action = match [self.kind, self.sub_kind] { 468 | [0x00, 0x0c] => RedirectToIp { ip: self.global_admin.into(), copy: self.local_admin & 1 != 0 }, 469 | [0x00, 0x0d] => RtRedirectIpv6 { rt: self.global_admin, value: self.local_admin }, 470 | _ => return None, 471 | }; 472 | Some(action) 473 | } 474 | } 475 | 476 | impl Debug for Ipv6ExtCommunity { 477 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 478 | write!(f, "Ipv6ExtCommunity{self}") 479 | } 480 | } 481 | 482 | impl Display for Ipv6ExtCommunity { 483 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 484 | if let Some(action) = self.action() { 485 | Display::fmt(&action, f) 486 | } else { 487 | match [self.kind, self.sub_kind] { 488 | [0x00, 0x02] => f.write_str("(rt, ")?, 489 | [0x00, 0x03] => f.write_str("(ro, ")?, 490 | bytes => write!(f, "({:#06x}, ", u16::from_be_bytes(bytes))?, 491 | } 492 | write!(f, "{}, {:#06x})", self.global_admin, self.local_admin) 493 | } 494 | } 495 | } 496 | 497 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 498 | pub enum GlobalAdmin { 499 | As(u32), 500 | Ipv4(Ipv4Addr), 501 | } 502 | 503 | impl Display for GlobalAdmin { 504 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 505 | match self { 506 | Self::As(n) => Display::fmt(n, f), 507 | Self::Ipv4(ip) => Display::fmt(ip, f), 508 | } 509 | } 510 | } 511 | 512 | #[derive(Debug, Clone, Copy, PartialEq, EnumDiscriminants)] 513 | #[strum_discriminants(name(TrafficFilterActionKind), derive(PartialOrd, Ord))] 514 | pub enum TrafficFilterAction { 515 | TrafficRateBytes { desc: u16, rate: f32 }, 516 | TrafficRatePackets { desc: u16, rate: f32 }, 517 | TrafficAction { terminal: bool, sample: bool }, 518 | RtRedirect { rt: GlobalAdmin, value: u32 }, 519 | RtRedirectIpv6 { rt: Ipv6Addr, value: u16 }, 520 | TrafficMarking { dscp: u8 }, 521 | RedirectToIp { ip: IpAddr, copy: bool }, 522 | } 523 | 524 | impl TrafficFilterAction { 525 | pub fn kind(self) -> TrafficFilterActionKind { 526 | self.into() 527 | } 528 | 529 | pub fn into_ext_comm(self) -> Either { 530 | use Either::*; 531 | use TrafficFilterAction::*; 532 | match self { 533 | TrafficRateBytes { desc, rate } => Left(ExtCommunity::new_as(true, 0x06, desc.into(), rate.to_bits())), 534 | TrafficRatePackets { desc, rate } => Left(ExtCommunity::new_as(true, 0x0c, desc.into(), rate.to_bits())), 535 | TrafficAction { terminal: t, sample: s } => { 536 | Left(ExtCommunity::new_as(true, 0x07, 0, (!t as u32) | ((s as u32) << 1))) 537 | } 538 | RtRedirect { rt: GlobalAdmin::As(rt), value } => Left(ExtCommunity::new_as(true, 0x08, rt, value)), 539 | RtRedirect { rt: GlobalAdmin::Ipv4(rt), value } => { 540 | let value = value.try_into().expect("it should fit in u16"); 541 | Left(ExtCommunity::new_ipv4(true, 0x08, rt, value)) 542 | } 543 | RtRedirectIpv6 { rt, value } => Right(Ipv6ExtCommunity::new(true, 0x0d, rt, value)), 544 | TrafficMarking { dscp } => Left(ExtCommunity::new_as(true, 0x09, 0, (dscp & 0b111111).into())), 545 | RedirectToIp { ip: IpAddr::V4(ip), copy } => Left(ExtCommunity::new_ipv4(true, 0x0c, ip, copy.into())), 546 | RedirectToIp { ip: IpAddr::V6(ip), copy } => Right(Ipv6ExtCommunity::new(true, 0x0c, ip, copy.into())), 547 | } 548 | } 549 | } 550 | 551 | impl Display for TrafficFilterAction { 552 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 553 | use TrafficFilterAction::*; 554 | match self { 555 | TrafficRateBytes { desc, rate } => write!(f, "(traffic-rate-bytes, {desc}, {rate})"), 556 | TrafficRatePackets { desc, rate } => write!(f, "(traffic-rate-packets, {desc}, {rate})"), 557 | TrafficAction { terminal, sample } => write!( 558 | f, 559 | "(traffic-action{}{})", 560 | terminal.then_some(", terminal").unwrap_or(""), 561 | sample.then_some(", sample").unwrap_or(""), 562 | ), 563 | RtRedirect { rt, value } => { 564 | write!(f, "(rt-redirect, {rt}, ")?; 565 | if *value > u16::MAX.into() { 566 | write!(f, "{value:#010x})") 567 | } else { 568 | write!(f, "{value:#06x})") 569 | } 570 | } 571 | RtRedirectIpv6 { rt, value } => write!(f, "(rt-redirect-ipv6, {rt}, {value:#06x})"), 572 | TrafficMarking { dscp } => write!(f, "(traffic-marking, {dscp})"), 573 | RedirectToIp { ip, copy } => write!(f, "(redirect-to-ip, {ip}{})", copy.then_some(", copy").unwrap_or("")), 574 | } 575 | } 576 | } 577 | 578 | /// RFC 8092 large communities. 579 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 580 | pub struct LargeCommunity(pub [u32; 3]); 581 | 582 | impl LargeCommunity { 583 | pub fn from_bytes(bytes: [u8; 12]) -> Self { 584 | Self([ 585 | u32::from_be_bytes(bytes[0..4].try_into().unwrap()), 586 | u32::from_be_bytes(bytes[4..8].try_into().unwrap()), 587 | u32::from_be_bytes(bytes[8..12].try_into().unwrap()), 588 | ]) 589 | } 590 | } 591 | 592 | impl Debug for LargeCommunity { 593 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 594 | Display::fmt(self, f) 595 | } 596 | } 597 | 598 | impl Display for LargeCommunity { 599 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 600 | write!(f, "({}, {}, {})", self.0[0], self.0[1], self.0[2]) 601 | } 602 | } 603 | 604 | #[cfg(test)] 605 | mod tests { 606 | use super::*; 607 | 608 | #[test] 609 | fn test_ext_comm() { 610 | println!("{:?}", ExtCommunity::new_as(true, 3, 207268, 12345)); 611 | println!("{:?}", ExtCommunity::from_bytes((1145141919810000000u64).to_be_bytes())); 612 | println!( 613 | "{:?}", 614 | Ipv6ExtCommunity::new(true, 0x0d, "2a09::".parse().unwrap(), 11451) 615 | ); 616 | } 617 | } 618 | -------------------------------------------------------------------------------- /src/integration_tests/flowspec.rs: -------------------------------------------------------------------------------- 1 | use super::helpers::bird::{ensure_bird_2, ensure_bird_2_16}; 2 | use super::helpers::cli::run_cli_with_bird; 3 | use super::helpers::kernel::{ensure_loopback_up, pick_port}; 4 | use super::{BIRD_CONFIG_1, TestEvent, test_local}; 5 | use crate::args::Cli; 6 | use crate::bgp::flow::Component::*; 7 | use crate::bgp::flow::{Flowspec, Op}; 8 | use crate::bgp::route::{AsSegment, ExtCommunity, GlobalAdmin, Origin, RouteInfo, TrafficFilterAction}; 9 | use anyhow::Context; 10 | use clap::Parser; 11 | use macro_rules_attribute::apply; 12 | use smallvec::{smallvec, smallvec_inline}; 13 | use std::collections::{BTreeMap, BTreeSet}; 14 | use std::time::Duration; 15 | use tokio::select; 16 | use tokio::time::sleep; 17 | 18 | #[apply(test_local!)] 19 | async fn test_flow() -> anyhow::Result<()> { 20 | run_flowspec_route_test([ 21 | ( 22 | "flow4 { dst 10.0.0.0/8; length > 1024; }", 23 | Flowspec::new_v4() 24 | .with(DstPrefix("10.0.0.0/8".parse()?, 0))? 25 | .with(PacketLen(Op::gt(1024).into()))?, 26 | ), 27 | ( 28 | "flow4 { src 123.45.67.192/26; icmp type 3; icmp code >= 2 && < 13; }", 29 | Flowspec::new_v4() 30 | .with(SrcPrefix("123.45.67.192/26".parse()?, 0))? 31 | .with(IcmpType(Op::eq(3).into()))? 32 | .with(IcmpCode(Op::ge(2).and(Op::lt(13))))?, 33 | ), 34 | ("flow4 {}", Flowspec::new_v4()), 35 | ( 36 | "flow6 { dst fec0:1122:3344:5566:7788:99aa:bbcc:ddee/128; 37 | tcp flags 0x03/0x0f && !0/0xff || 0x33/0x33; 38 | dport = 6000; 39 | fragment !is_fragment || !first_fragment; }", 40 | Flowspec::new_v6() 41 | .with(DstPrefix("fec0:1122:3344:5566:7788:99aa:bbcc:ddee/128".parse()?, 0))? 42 | .with(TcpFlags( 43 | Op::all(0x3).and(Op::not_any(0xc)).and(Op::any(0xff)).or(Op::all(0x33)), 44 | ))? 45 | .with(DstPort(Op::eq(6000).into()))? 46 | .with(Fragment(Op::not_any(0b10).or(Op::not_any(0b100))))?, 47 | ), 48 | ( 49 | "flow6 { dst fd00::/8; label 0x8e5 || 0x8e6; }", 50 | Flowspec::new_v6() 51 | .with(DstPrefix("fd00::/8".parse()?, 0))? 52 | .with(FlowLabel(Op::eq(0x8e5).or(Op::eq(0x8e6))))?, 53 | ), 54 | ("flow6 {}", Flowspec::new_v6()), 55 | ]) 56 | .await 57 | } 58 | 59 | #[apply(test_local!)] 60 | async fn test_flow_attr() -> anyhow::Result<()> { 61 | use TrafficFilterAction::*; 62 | 63 | fn tfa_to_ext_comm(iter: impl IntoIterator) -> BTreeSet { 64 | iter.into_iter().map(|x| x.into_ext_comm().left().unwrap()).collect() 65 | } 66 | 67 | let route_info_default = RouteInfo { origin: Origin::Igp, local_pref: Some(100), ..Default::default() }; 68 | run_flowspec_test([ 69 | ( 70 | "flow4 { dst 10.0.0.0/8; length > 1024; } { 71 | bgp_path.prepend(114514); 72 | bgp_path.prepend(1919810); 73 | bgp_ext_community.add((unknown 0x8108, 1.1.1.1, 1234)); 74 | bgp_ext_community.add((unknown 0x8006, 0, 0x453b8000)); 75 | bgp_ext_community.add((unknown 0x800c, 172.20.0.1, 0)); 76 | }", 77 | Flowspec::new_v4() 78 | .with(DstPrefix("10.0.0.0/8".parse()?, 0))? 79 | .with(PacketLen(Op::gt(1024).into()))?, 80 | RouteInfo { 81 | as_path: smallvec_inline![AsSegment::Sequence(smallvec![1919810, 114514])], 82 | ext_comm: tfa_to_ext_comm([ 83 | RtRedirect { rt: GlobalAdmin::Ipv4("1.1.1.1".parse()?), value: 1234 }, 84 | TrafficRateBytes { desc: 0, rate: 3e3 }, 85 | RedirectToIp { ip: "172.20.0.1".parse()?, copy: false }, 86 | ]), 87 | ..route_info_default.clone() 88 | }, 89 | ), 90 | ( 91 | "flow6 { dst ::1.1.1.1/128 offset 96; next header 17; } { 92 | bgp_ext_community.add((unknown 0x8007, 0, 3)); 93 | }", 94 | Flowspec::new_v6() 95 | .with(DstPrefix("::1.1.1.1/128".parse()?, 96))? 96 | .with(Protocol(Op::eq(17).into()))?, 97 | RouteInfo { 98 | ext_comm: tfa_to_ext_comm([TrafficAction { terminal: false, sample: true }]), 99 | ..route_info_default 100 | }, 101 | ), 102 | ]) 103 | .await 104 | } 105 | 106 | #[apply(test_local!)] 107 | async fn test_flow6_offset_bird_2_16() -> anyhow::Result<()> { 108 | ensure_bird_2_16(); 109 | run_flowspec_route_test([ 110 | ( 111 | "flow6 { dst ::1:1234:5678:9800:0/104 offset 60; }", 112 | Flowspec::new_v6().with(DstPrefix("::1:1234:5678:9800:0/104".parse()?, 60))?, 113 | ), 114 | ( 115 | "flow6 { src ::1:1234:5678:9800:0/104 offset 63; }", 116 | Flowspec::new_v6().with(SrcPrefix("::1:1234:5678:9800:0/104".parse()?, 63))?, 117 | ), 118 | ]) 119 | .await 120 | } 121 | 122 | async fn run_flowspec_route_test(flows: impl IntoIterator) -> anyhow::Result<()> { 123 | let route_info_default = RouteInfo { origin: Origin::Igp, local_pref: Some(100), ..Default::default() }; 124 | let flows = flows.into_iter().map(|(v, k)| (k, (v, route_info_default.clone()))).collect(); 125 | run_flowspec_test_inner(flows).await 126 | } 127 | 128 | async fn run_flowspec_test(flows: impl IntoIterator)>) -> anyhow::Result<()> { 129 | let flows = flows.into_iter().map(|(u, k, v)| (k, (u, v))).collect(); 130 | run_flowspec_test_inner(flows).await 131 | } 132 | 133 | async fn run_flowspec_test_inner(mut flows: BTreeMap)>) -> anyhow::Result<()> { 134 | ensure_bird_2(); 135 | ensure_loopback_up().await?; 136 | 137 | let (flow4, flow6) = flows.iter().fold((String::new(), String::new()), |(v4, v6), (k, v)| { 138 | if k.is_ipv4() { 139 | (v4 + "route " + v.0 + ";", v6) 140 | } else { 141 | (v4, v6 + "route " + v.0 + ";") 142 | } 143 | }); 144 | 145 | let flow_port = pick_port().await?.to_string(); 146 | let cli = Cli::try_parse_from([ 147 | "flow", 148 | "run", 149 | "-v", 150 | "--dry-run", 151 | &format!("--bind=[::1]:{flow_port}"), 152 | "--local-as=65000", 153 | "--remote-as=65000", 154 | ])?; 155 | let bird = BIRD_CONFIG_1 156 | .replace("@@BIRD_PORT@@", &pick_port().await?.to_string()) 157 | .replace("@@FLOW_PORT@@", &flow_port) 158 | .replace("@@FLOW4@@", &flow4) 159 | .replace("@@FLOW6@@", &flow6); 160 | 161 | let (mut cli, mut bird, (mut events, close), _g) = run_cli_with_bird(cli, &bird).await?; 162 | let mut end_of_rib_count = 0; 163 | let mut visited = BTreeSet::new(); 164 | let _state = 'outer: loop { 165 | select! { 166 | Some(event) = events.recv(), if !events.is_closed() => match event { 167 | TestEvent::EndOfRib(..) => { 168 | end_of_rib_count += 1; 169 | if end_of_rib_count >= 2 { 170 | let _ = close.send(()); 171 | while let Some(event) = events.recv().await { 172 | if let TestEvent::Exit(state) = event { 173 | break 'outer state; 174 | } 175 | } 176 | panic!("no state received"); 177 | } 178 | } 179 | TestEvent::Update(msg) => { 180 | for nlri in msg.nlri.into_iter().chain(msg.old_nlri) { 181 | let specs = nlri.into_flow().context("received NLRI other than flowspec")?; 182 | for spec in specs { 183 | if let Some((spec1, (_, info))) = flows.remove_entry(&spec) { 184 | visited.insert(spec1); 185 | assert_eq!(info, msg.route_info, "route info does not match for {spec}" ); 186 | } else { 187 | assert!(visited.contains(&spec), "received duplicate flowspec: {spec}"); 188 | panic!("received unknown flowspec: {spec}"); 189 | } 190 | } 191 | } 192 | } 193 | TestEvent::Exit(_) => panic!("unexpected CLI exit event"), 194 | }, 195 | _ = sleep(Duration::from_secs(10)) => panic!("timed out"), 196 | code = &mut cli => panic!("CLI exited early with code {}", code??), 197 | status = bird.wait() => panic!("BIRD exited early with {}", status?), 198 | } 199 | }; 200 | 201 | assert!(flows.is_empty(), "some flowspecs not received: {flows:?}"); 202 | Ok(()) 203 | } 204 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/bird.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::ffi::OsStr; 3 | use std::path::Path; 4 | use std::process::Stdio; 5 | use std::sync::LazyLock; 6 | use std::{env, io}; 7 | use tokio::io::{AsyncBufReadExt, BufReader}; 8 | use tokio::process::{Child, Command}; 9 | use version_compare::compare_to; 10 | 11 | static BIRD_PATH: LazyLock> = LazyLock::new(|| { 12 | env::var_os("FLOW_BIRD_PATH") 13 | .map(Cow::Owned) 14 | .unwrap_or(Cow::Borrowed("bird".as_ref())) 15 | }); 16 | 17 | static BIRD_VERSION: LazyLock, String>> = LazyLock::new(|| { 18 | let output = std::process::Command::new(&*BIRD_PATH) 19 | .arg("--version") 20 | .stdin(Stdio::null()) 21 | .output(); 22 | let mut stderr = match output { 23 | Ok(output) => output.stderr, 24 | Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None), 25 | Err(e) => return Err(e.to_string()), 26 | }; 27 | const PREFIX: &[u8] = b"BIRD version "; 28 | if !stderr.starts_with(PREFIX) { 29 | return Err("invalid `bird --version` output".into()); 30 | } 31 | let prefix_len = PREFIX.len() + if stderr[PREFIX.len()] == b'v' { 1 } else { 0 }; 32 | let mut ver = stderr.split_off(prefix_len); 33 | ver.pop(); 34 | String::from_utf8(ver).map(Some).map_err(|e| e.to_string()) 35 | }); 36 | 37 | fn bird_ver_ge(min_ver: &str) -> anyhow::Result> { 38 | let Some(ver) = BIRD_VERSION.as_ref().map_err(anyhow::Error::msg)? else { 39 | return Ok(None); 40 | }; 41 | compare_to(ver, min_ver, version_compare::Cmp::Ge) 42 | .map(Some) 43 | .map_err(|()| anyhow::Error::msg("invalid version number")) 44 | } 45 | 46 | fn ensure_bird_ver(ver: &str, msg: &'static str) { 47 | match bird_ver_ge(ver) { 48 | Ok(None) => panic!( 49 | "BIRD not found; please install BIRD under PATH, or specify FLOW_BIRD_PATH to \ 50 | point to BIRD executable.", 51 | ), 52 | Ok(Some(false)) => panic!("{msg}"), 53 | Ok(Some(true)) => {} 54 | Err(e) => panic!("{}", e.context("failed to get BIRD version")), 55 | } 56 | } 57 | 58 | pub fn ensure_bird_2() { 59 | ensure_bird_ver( 60 | "2", 61 | "The BIRD in your system is the outdated 1.x version. Please update to BIRD 2.x \ 62 | to run the tests.", 63 | ); 64 | } 65 | 66 | pub fn ensure_bird_2_16() { 67 | ensure_bird_ver( 68 | "2.16", 69 | "BIRD version below 2.16 incorrectly implements Flowspec's IPv6 offset. Upgrade \ 70 | to BIRD 2.16, 3.x, or above to allow respective tests to run, or skip them by \ 71 | passing `--skip bird_2_16` to the test binary.\n\ 72 | See https://gitlab.nic.cz/labs/bird/-/commit/072821e55e2a3bd0fb3ffee309937592 \ 73 | for more information.", 74 | ); 75 | } 76 | 77 | pub async fn run_bird(config_path: impl AsRef, sock_path: impl AsRef) -> anyhow::Result { 78 | let mut bird = Command::new(&*BIRD_PATH) 79 | .arg("-d") 80 | .args(["-c".as_ref(), config_path.as_ref().as_os_str()]) 81 | .args(["-s".as_ref(), sock_path.as_ref().as_os_str()]) 82 | .stdin(Stdio::null()) 83 | .stdout(Stdio::piped()) 84 | .stderr(Stdio::piped()) 85 | .kill_on_drop(true) 86 | .spawn()?; 87 | 88 | let mut bird_stderr = BufReader::new(bird.stderr.take().unwrap()); 89 | tokio::spawn(async move { 90 | let mut buf = String::new(); 91 | while bird_stderr.read_line(&mut buf).await? != 0 { 92 | eprint!("{buf}"); 93 | buf.clear(); 94 | } 95 | anyhow::Ok(()) 96 | }); 97 | 98 | Ok(bird) 99 | } 100 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/cli.rs: -------------------------------------------------------------------------------- 1 | use super::bird::run_bird; 2 | use super::exabgp::run_exabgp; 3 | use super::str_to_file; 4 | use crate::args::Cli; 5 | use crate::cli_entry; 6 | use crate::integration_tests::TestEvent; 7 | use async_tempfile::{TempDir, TempFile}; 8 | use tokio::process::Child; 9 | use tokio::sync::{mpsc, oneshot}; 10 | use tokio::task::JoinHandle; 11 | 12 | pub type CliChild = JoinHandle>; 13 | pub type CliGuard = ( 14 | CliChild, 15 | Child, 16 | (mpsc::Receiver, oneshot::Sender<()>), 17 | (TempFile, TempDir), 18 | ); 19 | 20 | pub async fn run_cli_with_bird(mut cli_opt: Cli, bird_conf: &str) -> anyhow::Result { 21 | let conf_file = str_to_file(bird_conf.as_bytes()).await?; 22 | 23 | let sock_dir = TempDir::new().await?; 24 | cli_opt.run_dir = sock_dir.as_ref().into(); 25 | let bird = run_bird(conf_file.file_path(), sock_dir.join("bird.sock")).await?; 26 | 27 | let (cli, event, close) = run_cli(cli_opt); 28 | Ok((cli, bird, (event, close), (conf_file, sock_dir))) 29 | } 30 | 31 | pub async fn run_cli_with_exabgp(mut cli_opt: Cli, exabgp_conf: &str, port: u16) -> anyhow::Result { 32 | let conf_file = str_to_file(exabgp_conf.as_bytes()).await?; 33 | let sock_dir = TempDir::new().await?; 34 | cli_opt.run_dir = sock_dir.as_ref().into(); 35 | let daemon = run_exabgp(conf_file.file_path(), port).await?; 36 | let (cli, event, close) = run_cli(cli_opt); 37 | Ok((cli, daemon, (event, close), (conf_file, sock_dir))) 38 | } 39 | 40 | fn run_cli(cli_opt: Cli) -> (CliChild, mpsc::Receiver, oneshot::Sender<()>) { 41 | let (event_tx, event_rx) = mpsc::channel(127); 42 | let (close_tx, close_rx) = oneshot::channel(); 43 | let cli = tokio::task::spawn_local(async { 44 | let exit_code = cli_entry(cli_opt, event_tx, close_rx).await; 45 | anyhow::Ok(exit_code) 46 | }); 47 | (cli, event_rx, close_tx) 48 | } 49 | 50 | pub async fn close_cli(chans: (mpsc::Receiver, oneshot::Sender<()>)) { 51 | let (mut events, close) = chans; 52 | let _ = close.send(()); 53 | while let Some(event) = events.recv().await { 54 | if let TestEvent::Exit(_) = event { 55 | break; 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/exabgp.rs: -------------------------------------------------------------------------------- 1 | use nix::unistd::{Uid, User}; 2 | use std::borrow::Cow; 3 | use std::env; 4 | use std::ffi::OsStr; 5 | use std::path::Path; 6 | use std::process::Stdio; 7 | use std::sync::LazyLock; 8 | use tokio::io::{AsyncBufReadExt, BufReader}; 9 | use tokio::process::{Child, Command}; 10 | 11 | static EXABGP_PATH: LazyLock> = LazyLock::new(|| { 12 | env::var_os("FLOW_EXABGP_PATH") 13 | .map(Cow::Owned) 14 | .unwrap_or(Cow::Borrowed("exabgp".as_ref())) 15 | }); 16 | 17 | pub async fn run_exabgp(config_path: impl AsRef, port: u16) -> anyhow::Result { 18 | let mut exabgp = Command::new(&*EXABGP_PATH) 19 | .arg(config_path.as_ref()) 20 | .env("exabgp.tcp.port", port.to_string()) 21 | .env("exabgp.daemon.user", User::from_uid(Uid::effective())?.unwrap().name) 22 | .stdin(Stdio::null()) 23 | .stdout(Stdio::piped()) 24 | .stderr(Stdio::piped()) 25 | .kill_on_drop(true) 26 | .spawn()?; 27 | 28 | let mut exabgp_stderr = BufReader::new(exabgp.stdout.take().unwrap()); 29 | tokio::spawn(async move { 30 | let mut buf = String::new(); 31 | while exabgp_stderr.read_line(&mut buf).await? != 0 { 32 | eprint!("{buf}"); 33 | buf.clear(); 34 | } 35 | anyhow::Ok(()) 36 | }); 37 | 38 | Ok(exabgp) 39 | } 40 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/kernel/linux.rs: -------------------------------------------------------------------------------- 1 | use nftables::helper::DEFAULT_NFT; 2 | use nftables::schema::{NfListObject, NfObject}; 3 | use nftables::stmt::Statement; 4 | use nftables::types::NfFamily; 5 | 6 | pub async fn print_nft_chain(table: &str, chain: &str) -> anyhow::Result<()> { 7 | let output = tokio::process::Command::new("nft") 8 | .args(["-an", "list", "chain", "inet", table, chain]) 9 | .output() 10 | .await?; 11 | println!("{}", String::from_utf8(output.stdout)?); 12 | Ok(()) 13 | } 14 | 15 | pub async fn print_ip_rule(v6: bool) -> anyhow::Result<()> { 16 | let output = tokio::process::Command::new("ip") 17 | .arg(if v6 { "-6" } else { "-4" }) 18 | .arg("rule") 19 | .output() 20 | .await?; 21 | println!("{}", String::from_utf8(output.stdout)?); 22 | Ok(()) 23 | } 24 | 25 | pub async fn print_ip_route(v6: bool, table: u32) -> anyhow::Result<()> { 26 | let output = tokio::process::Command::new("ip") 27 | .arg(if v6 { "-6" } else { "-4" }) 28 | .args(["route", "show", "table", &table.to_string()]) 29 | .output() 30 | .await?; 31 | println!("{}", String::from_utf8(output.stdout)?); 32 | Ok(()) 33 | } 34 | 35 | pub async fn get_nft_stmts(table: &str, chain: &str) -> anyhow::Result>>> { 36 | let args = ["-ns", "list", "chain", "inet", table, chain]; 37 | let chain_obj = nftables::helper::get_current_ruleset_with_args_async(DEFAULT_NFT, args) 38 | .await? 39 | .objects 40 | .into_owned(); 41 | 42 | let rules = chain_obj.into_iter().filter_map(|x| { 43 | if let NfObject::ListObject(NfListObject::Rule(mut rule)) = x { 44 | rule.handle = None; 45 | Some(rule) 46 | } else { 47 | None 48 | } 49 | }); 50 | assert!( 51 | rules 52 | .clone() 53 | .all(|r| r.family == NfFamily::INet && r.table == table && r.chain == chain) 54 | ); 55 | 56 | Ok(rules.map(|r| r.expr.into_owned()).collect()) 57 | } 58 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/kernel/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(linux)] 2 | pub mod linux; 3 | #[cfg(rtnetlink_supported)] 4 | pub mod rtnl; 5 | 6 | use nix::unistd::Uid; 7 | use tokio::net::TcpListener; 8 | 9 | pub fn ensure_root() { 10 | assert!( 11 | Uid::effective().is_root(), 12 | "This test needs root (or an isolated namespace acting like root) to access \ 13 | kernel network interface (rtnetlink, nftables, etc.). Please run the tests with \ 14 | root, unshare(1) or jail(8) to test them.", 15 | ); 16 | } 17 | 18 | #[cfg(rtnetlink_supported)] 19 | pub async fn ensure_loopback_up() -> anyhow::Result<()> { 20 | use rtnetlink::{LinkMessageBuilder, LinkUnspec}; 21 | 22 | if !Uid::effective().is_root() { 23 | return Ok(()); 24 | } 25 | let (conn, handle, _) = rtnetlink::new_connection()?; 26 | tokio::spawn(conn); 27 | handle 28 | .link() 29 | .set(LinkMessageBuilder::::new().index(1).up().build()) 30 | .execute() 31 | .await?; 32 | Ok(()) 33 | } 34 | 35 | #[cfg(not(rtnetlink_supported))] 36 | pub async fn ensure_loopback_up() -> anyhow::Result<()> {} 37 | 38 | pub async fn pick_port() -> anyhow::Result { 39 | let sock = TcpListener::bind("127.0.0.1:0").await?; 40 | Ok(sock.local_addr()?.port()) 41 | } 42 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/kernel/rtnl.rs: -------------------------------------------------------------------------------- 1 | use crate::net::IpWithPrefix; 2 | use futures::TryStreamExt; 3 | use nix::net::if_::if_nametoindex; 4 | use rand::Rng; 5 | use rand::distr::Alphanumeric; 6 | use rtnetlink::packet_route::AddressFamily; 7 | use rtnetlink::packet_route::link::InfoKind; 8 | use rtnetlink::packet_route::route::{RouteAttribute, RouteMessage, RoutePreference, RouteProtocol}; 9 | use rtnetlink::packet_route::rule::{RuleAction, RuleAttribute, RuleHeader, RuleMessage}; 10 | use rtnetlink::packet_utils::Emitable; 11 | use rtnetlink::packet_utils::nla::Nla; 12 | use rtnetlink::{Handle, IpVersion, LinkMessageBuilder, LinkUnspec, RouteMessageBuilder}; 13 | use std::cmp::Ordering; 14 | use std::net::IpAddr; 15 | 16 | pub async fn create_dummy_link(handle: &Handle, addr: IpWithPrefix) -> anyhow::Result { 17 | let name: String = "dummy_" 18 | .chars() 19 | .chain(rand::rng().sample_iter(&Alphanumeric).take(8).map(char::from)) 20 | .collect(); 21 | let link_msg = LinkMessageBuilder::::new_with_info_kind(InfoKind::Dummy) 22 | .name(name.clone()) 23 | .up() 24 | .build(); 25 | handle.link().add(link_msg).execute().await?; 26 | let index = if_nametoindex(&*name)?; 27 | handle.address().add(index, addr.addr(), addr.prefix_len()).execute().await?; 28 | Ok(index) 29 | } 30 | 31 | pub async fn remove_link(handle: &Handle, index: u32) -> anyhow::Result<()> { 32 | handle.link().del(index).execute().await?; 33 | Ok(()) 34 | } 35 | 36 | pub async fn get_ip_rule(handle: &Handle, ip_version: IpVersion) -> anyhow::Result> { 37 | use RuleAttribute::*; 38 | 39 | let mut buf = Vec::new(); 40 | let mut stream = handle.rule().get(ip_version).execute(); 41 | while let Some(mut msg) = stream.try_next().await? { 42 | // normalize first 43 | msg.attributes.retain(|attr| { 44 | if let RuleAttribute::Table(_) = attr { 45 | msg.header.table = 0; 46 | return true; 47 | } 48 | // The following attributes have no meaning (?) but are still present in 49 | // get_ip_rule's output. Probably subject to kernel's internal change. 50 | !matches!( 51 | attr, 52 | SuppressPrefixLen(u32::MAX) | FwMask(u32::MAX) | Protocol(RouteProtocol::Unspec) 53 | ) 54 | }); 55 | msg.attributes.sort_by(rule_attr_sort); 56 | buf.push(msg); 57 | } 58 | Ok(buf) 59 | } 60 | 61 | pub fn make_ip_rule_mark(ip_version: IpVersion, prio: u32, mark: u32, table: u32) -> RuleMessage { 62 | use RuleAttribute::*; 63 | 64 | let mut msg = RuleMessage::default(); 65 | msg.header = RuleHeader { 66 | family: match ip_version { 67 | IpVersion::V4 => AddressFamily::Inet, 68 | IpVersion::V6 => AddressFamily::Inet6, 69 | }, 70 | table: if table > 0xff { 0 } else { table as u8 }, 71 | action: RuleAction::ToTable, 72 | ..Default::default() 73 | }; 74 | msg.attributes.extend([Priority(prio), FwMark(mark)]); 75 | msg.attributes.extend((table > 0xff).then_some(Table(table))); 76 | msg.attributes.sort_by(rule_attr_sort); 77 | msg 78 | } 79 | 80 | fn rule_attr_sort(a: &RuleAttribute, b: &RuleAttribute) -> Ordering { 81 | match a.kind().cmp(&b.kind()) { 82 | Ordering::Equal => {} 83 | ord => return ord, 84 | } 85 | let (al, bl) = (a.value_len(), b.value_len()); 86 | match al.cmp(&bl) { 87 | Ordering::Equal => {} 88 | ord => return ord, 89 | } 90 | let (mut abuf, mut bbuf) = (vec![0; al], vec![0; bl]); 91 | a.emit(&mut abuf); 92 | b.emit(&mut bbuf); 93 | abuf.cmp(&bbuf) 94 | } 95 | 96 | pub async fn get_ip_route(handle: &Handle, ip_version: IpVersion, table: u32) -> anyhow::Result> { 97 | let mut msg = RouteMessageBuilder::::new().table_id(table).build(); 98 | msg.header.address_family = match ip_version { 99 | IpVersion::V4 => AddressFamily::Inet, 100 | IpVersion::V6 => AddressFamily::Inet6, 101 | }; 102 | let mut buf = Vec::new(); 103 | let mut stream = handle.route().get(msg).execute(); 104 | while let Some(mut msg) = stream.try_next().await? { 105 | if msg.header.table as u32 == table || msg.attributes.contains(&RouteAttribute::Table(table)) { 106 | route_msg_normalize(&mut msg); 107 | buf.push(msg); 108 | } 109 | } 110 | Ok(buf) 111 | } 112 | 113 | pub fn route_msg_normalize(msg: &mut RouteMessage) { 114 | use RouteAttribute::*; 115 | msg.attributes.retain(|attr| match attr { 116 | Table(table) => { 117 | if *table > 0xff { 118 | msg.header.table = 0; 119 | } 120 | true 121 | } 122 | CacheInfo(_) => false, // TODO: match all zero on non-exhaustive struct 123 | Priority(1024) | Preference(RoutePreference::Medium) => false, 124 | _ => true, 125 | }); 126 | msg.attributes.sort_by(route_attr_sort); 127 | } 128 | 129 | fn route_attr_sort(a: &RouteAttribute, b: &RouteAttribute) -> Ordering { 130 | match a.kind().cmp(&b.kind()) { 131 | Ordering::Equal => {} 132 | ord => return ord, 133 | } 134 | let (al, bl) = (a.value_len(), b.value_len()); 135 | match al.cmp(&bl) { 136 | Ordering::Equal => {} 137 | ord => return ord, 138 | } 139 | let (mut abuf, mut bbuf) = (vec![0; al], vec![0; bl]); 140 | a.emit(&mut abuf); 141 | b.emit(&mut bbuf); 142 | abuf.cmp(&bbuf) 143 | } 144 | -------------------------------------------------------------------------------- /src/integration_tests/helpers/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod bird; 2 | pub mod cli; 3 | pub mod exabgp; 4 | pub mod kernel; 5 | 6 | use async_tempfile::TempFile; 7 | use tokio::io::AsyncWriteExt; 8 | 9 | pub async fn str_to_file(content: &[u8]) -> Result { 10 | let mut file = TempFile::new().await?; 11 | file.write_all(content).await?; 12 | file.flush().await?; 13 | Ok(file) 14 | } 15 | -------------------------------------------------------------------------------- /src/integration_tests/kernel_linux.rs: -------------------------------------------------------------------------------- 1 | use super::helpers::bird::ensure_bird_2; 2 | use super::helpers::cli::{CliGuard, close_cli, run_cli_with_bird, run_cli_with_exabgp}; 3 | use super::helpers::kernel::rtnl::{create_dummy_link, get_ip_route, get_ip_rule, remove_link, route_msg_normalize}; 4 | use super::helpers::kernel::{ensure_loopback_up, ensure_root, pick_port}; 5 | use super::{BIRD_CONFIG_1, EXABGP_CONFIG_1, TestEvent}; 6 | use crate::args::Cli; 7 | use crate::bgp::flow::Op; 8 | use crate::integration_tests::helpers::kernel::linux::{get_nft_stmts, print_ip_route, print_ip_rule, print_nft_chain}; 9 | use crate::integration_tests::helpers::kernel::rtnl::make_ip_rule_mark; 10 | use crate::kernel::nft::{ 11 | ACCEPT, DROP, make_limit, make_meta, make_payload_field, mangle_stmt, prefix_stmt, range_stmt, 12 | }; 13 | use crate::net::IpPrefix; 14 | use clap::Parser; 15 | use itertools::Itertools; 16 | use macro_rules_attribute::apply; 17 | use nftables::expr::Expression::Number; 18 | use nftables::expr::{self, MetaKey}; 19 | use rand::Rng; 20 | use rand::distr::Alphanumeric; 21 | use rtnetlink::packet_route::route::{RouteAttribute, RouteType}; 22 | use rtnetlink::{IpVersion, RouteMessageBuilder}; 23 | use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; 24 | use std::time::Duration; 25 | use tokio::select; 26 | use tokio::time::sleep; 27 | 28 | #[apply(test_local!)] 29 | async fn test_order() -> anyhow::Result<()> { 30 | let (name, (_g1, _g2, chans, _g3)) = run_kernel_test_bird(0xffff0000, [ 31 | "flow4 { dst 10.0.0.0/9; length > 1024; } { bgp_ext_community.add((unknown 0x8006, 0, 0)); }", 32 | "flow4 { dst 10.0.0.0/10; length > 1024; } { bgp_ext_community.add((unknown 0x8006, 0, 0x4c97a25c)); }", 33 | "flow6 { src fdfd::/128; next header 17; } { bgp_ext_community.add((unknown 0x800c, 0, 0)); }", 34 | "flow6 { dst fdfd::/16; } { bgp_ext_community.add((unknown 0x800c, 0, 0)); }", 35 | ]) 36 | .await?; 37 | 38 | print_nft_chain(&name, &name).await?; 39 | 40 | let result = get_nft_stmts(&name, &name).await?; 41 | close_cli(chans).await; 42 | 43 | assert_eq!(result, [ 44 | vec![ 45 | prefix_stmt("daddr", "10.0.0.0/10".parse()?).unwrap(), 46 | range_stmt(make_payload_field("ip", "length"), &Op::gt(1024).into(), 0xffff)?.unwrap(), 47 | make_limit(true, 79500000., "bytes", "second"), 48 | DROP, 49 | ], 50 | vec![ 51 | prefix_stmt("daddr", "10.0.0.0/10".parse()?).unwrap(), 52 | range_stmt(make_payload_field("ip", "length"), &Op::gt(1024).into(), 0xffff)?.unwrap(), 53 | ACCEPT, 54 | ], 55 | vec![ 56 | prefix_stmt("daddr", "10.0.0.0/9".parse()?).unwrap(), 57 | range_stmt(make_payload_field("ip", "length"), &Op::gt(1024).into(), 0xffff)?.unwrap(), 58 | DROP, 59 | ], 60 | vec![prefix_stmt("daddr", "fdfd::/16".parse()?).unwrap(), DROP], 61 | vec![ 62 | prefix_stmt("saddr", "fdfd::/128".parse()?).unwrap(), 63 | range_stmt(make_meta(MetaKey::L4proto), &Op::eq(17).into(), 0xff)?.unwrap(), 64 | DROP, 65 | ], 66 | ]); 67 | Ok(()) 68 | } 69 | 70 | #[apply(test_local!)] 71 | async fn test_redirect_to_ip() -> anyhow::Result<()> { 72 | let (conn, handle, _) = rtnetlink::new_connection()?; 73 | tokio::spawn(conn); 74 | 75 | let table_id = 0xffff0001; 76 | let dummy_id = create_dummy_link(&handle, "10.128.128.254/24".parse()?).await?; 77 | let (name, (_g1, bird, chans, _g2)) = run_kernel_test_bird(table_id, [ 78 | "flow4 { dst 172.20.0.0/16; } { bgp_ext_community.add((unknown 0x000c, 10.128.128.1, 0)); }", 79 | "flow4 { dst 172.21.0.0/16; } { bgp_ext_community.add((unknown 0x000c, 10.128.128.1, 0)); }", 80 | ]) 81 | .await?; 82 | 83 | print_nft_chain(&name, &name).await?; 84 | print_ip_rule(false).await?; 85 | print_ip_route(false, table_id).await?; 86 | 87 | let nft_stmts = get_nft_stmts(&name, &name).await?; 88 | let ip_rules = get_ip_rule(&handle, IpVersion::V4).await?; 89 | let ip_routes = get_ip_route(&handle, IpVersion::V4, table_id).await?; 90 | close_cli(chans).await; 91 | drop(bird); 92 | remove_link(&handle, dummy_id).await?; 93 | 94 | assert_eq!(nft_stmts, [ 95 | vec![ 96 | prefix_stmt("daddr", "172.20.0.0/16".parse()?).unwrap(), 97 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 98 | ACCEPT, 99 | ], 100 | vec![ 101 | prefix_stmt("daddr", "172.21.0.0/16".parse()?).unwrap(), 102 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 103 | ACCEPT, 104 | ] 105 | ]); 106 | 107 | let ip_rule_exp = make_ip_rule_mark(IpVersion::V4, 100, table_id, table_id); 108 | println!("> ip rule = {ip_rules:?}"); 109 | println!("> exp = {ip_rule_exp:?}"); 110 | assert!(ip_rules.contains(&ip_rule_exp)); 111 | 112 | let mut ip_routes_exp = [ 113 | RouteMessageBuilder::::new() 114 | .table_id(table_id) 115 | .destination_prefix("172.20.0.0".parse()?, 16) 116 | .output_interface(dummy_id) 117 | .gateway("10.128.128.1".parse()?) 118 | .build(), 119 | RouteMessageBuilder::::new() 120 | .table_id(table_id) 121 | .destination_prefix("172.21.0.0".parse()?, 16) 122 | .output_interface(dummy_id) 123 | .gateway("10.128.128.1".parse()?) 124 | .build(), 125 | ]; 126 | ip_routes_exp.iter_mut().for_each(route_msg_normalize); 127 | assert_eq!(ip_routes, ip_routes_exp); 128 | 129 | Ok(()) 130 | } 131 | 132 | #[apply(test_local!)] 133 | async fn test_redirect_to_ipv6() -> anyhow::Result<()> { 134 | let (conn, handle, _) = rtnetlink::new_connection()?; 135 | tokio::spawn(conn); 136 | 137 | let table_id = 0xffff0002; 138 | let dummy_id = create_dummy_link(&handle, "fc64::1/64".parse()?).await?; 139 | let (name, (_g1, exabgp, chans, _g2)) = run_kernel_test_exabgp(table_id, [ 140 | "match { destination fc00::/16; } then { redirect-to-nexthop-ietf fc64::ffff; }", 141 | "match { destination fc65:6565::/32; } then { redirect-to-nexthop-ietf fc64::2333; }", 142 | ]) 143 | .await?; 144 | 145 | print_nft_chain(&name, &name).await?; 146 | print_ip_rule(true).await?; 147 | print_ip_route(true, table_id).await?; 148 | 149 | let nft_stmts = get_nft_stmts(&name, &name).await?; 150 | let ip_rules = get_ip_rule(&handle, IpVersion::V6).await?; 151 | let ip_routes = get_ip_route(&handle, IpVersion::V6, table_id).await?; 152 | close_cli(chans).await; 153 | drop(exabgp); 154 | remove_link(&handle, dummy_id).await?; 155 | 156 | assert_eq!(nft_stmts, [ 157 | vec![ 158 | prefix_stmt("daddr", "fc65:6565::/32".parse()?).unwrap(), 159 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 160 | ACCEPT, 161 | ], 162 | vec![ 163 | prefix_stmt("daddr", "fc00::/16".parse()?).unwrap(), 164 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 165 | ACCEPT, 166 | ], 167 | ]); 168 | 169 | let ip_rule_exp = make_ip_rule_mark(IpVersion::V6, 100, table_id, table_id); 170 | println!("> ip rule = {ip_rules:?}"); 171 | println!("> exp = {ip_rule_exp:?}"); 172 | assert!(ip_rules.contains(&ip_rule_exp)); 173 | 174 | let mut ip_routes_exp = [ 175 | RouteMessageBuilder::::new() 176 | .table_id(table_id) 177 | .destination_prefix("fc00::".parse()?, 16) 178 | .output_interface(dummy_id) 179 | .gateway("fc64::ffff".parse()?) 180 | .build(), 181 | RouteMessageBuilder::::new() 182 | .table_id(table_id) 183 | .destination_prefix("fc65:6565::".parse()?, 32) 184 | .output_interface(dummy_id) 185 | .gateway("fc64::2333".parse()?) 186 | .build(), 187 | ]; 188 | ip_routes_exp.iter_mut().for_each(route_msg_normalize); 189 | assert_eq!(ip_routes, ip_routes_exp); 190 | 191 | Ok(()) 192 | } 193 | 194 | #[apply(test_local!)] 195 | async fn test_ipv4_redirect_to_ipv6() -> anyhow::Result<()> { 196 | let (conn, handle, _) = rtnetlink::new_connection()?; 197 | tokio::spawn(conn); 198 | 199 | let table_id = 0xffff0003; 200 | let dummy_id = create_dummy_link(&handle, "fc65::1/64".parse()?).await?; 201 | let (name, (_g1, exabgp, chans, _g2)) = run_kernel_test_exabgp(table_id, [ 202 | "match { destination 172.17.254.192/26; } then { redirect-to-nexthop-ietf fc65::ffff; }", 203 | "match { destination 192.0.2.0/27; } then { redirect-to-nexthop-ietf fc65::2333; }", 204 | ]) 205 | .await?; 206 | 207 | print_nft_chain(&name, &name).await?; 208 | print_ip_rule(false).await?; 209 | print_ip_route(false, table_id).await?; 210 | 211 | let nft_stmts = get_nft_stmts(&name, &name).await?; 212 | let ip_rules = get_ip_rule(&handle, IpVersion::V4).await?; 213 | let ip_routes = get_ip_route(&handle, IpVersion::V4, table_id).await?; 214 | close_cli(chans).await; 215 | drop(exabgp); 216 | remove_link(&handle, dummy_id).await?; 217 | 218 | assert_eq!(nft_stmts, [ 219 | vec![ 220 | prefix_stmt("daddr", "192.0.2.0/27".parse()?).unwrap(), 221 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 222 | ACCEPT, 223 | ], 224 | vec![ 225 | prefix_stmt("daddr", "172.17.254.192/26".parse()?).unwrap(), 226 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 227 | ACCEPT, 228 | ], 229 | ]); 230 | 231 | let ip_rule_exp = make_ip_rule_mark(IpVersion::V4, 100, table_id, table_id); 232 | println!("> ip rule = {ip_rules:?}"); 233 | println!("> exp = {ip_rule_exp:?}"); 234 | assert!(ip_rules.contains(&ip_rule_exp)); 235 | 236 | let mut ip_routes_exp = [ 237 | RouteMessageBuilder::::new() 238 | .table_id(table_id) 239 | .destination_prefix("172.17.254.192".parse()?, 26) 240 | .output_interface(dummy_id) 241 | .gateway("fc65::ffff".parse()?) 242 | .build(), 243 | RouteMessageBuilder::::new() 244 | .table_id(table_id) 245 | .destination_prefix("192.0.2.0".parse()?, 27) 246 | .output_interface(dummy_id) 247 | .gateway("fc65::2333".parse()?) 248 | .build(), 249 | ]; 250 | ip_routes_exp.iter_mut().for_each(route_msg_normalize); 251 | assert_eq!(ip_routes, ip_routes_exp); 252 | 253 | Ok(()) 254 | } 255 | 256 | #[apply(test_local!)] 257 | async fn test_unreachable_routes() -> anyhow::Result<()> { 258 | let (conn, handle, _) = rtnetlink::new_connection()?; 259 | tokio::spawn(conn); 260 | 261 | let table_id = 0xffff0004; 262 | let unreach_prefixes = ["192.0.2.0/24", "fc99::/64"]; 263 | let unreach_msgs: Vec<_> = unreach_prefixes 264 | .into_iter() 265 | .map(|p| { 266 | let p = p.parse::().unwrap(); 267 | RouteMessageBuilder::::new() 268 | .destination_prefix(p.prefix(), p.len()) 269 | .unwrap() 270 | .kind(RouteType::Unreachable) 271 | .build() 272 | }) 273 | .collect(); 274 | for msg in unreach_msgs.iter().cloned() { 275 | handle.route().add(msg).execute().await?; 276 | } 277 | 278 | let (name, (_g1, exabgp, chans, _g2)) = run_kernel_test_exabgp(table_id, [ 279 | "match { destination 172.17.254.192/26; } then { redirect-to-nexthop-ietf 192.0.2.128; }", 280 | "match { destination 192.0.2.0/27; } then { redirect-to-nexthop-ietf fc99::2333; }", 281 | "match { destination fc42::/32; } then { redirect-to-nexthop-ietf fc99::6666; }", 282 | ]) 283 | .await?; 284 | 285 | print_nft_chain(&name, &name).await?; 286 | print_ip_rule(false).await?; 287 | print_ip_rule(true).await?; 288 | print_ip_route(false, 254).await?; 289 | print_ip_route(true, 254).await?; 290 | print_ip_route(false, table_id).await?; 291 | print_ip_route(true, table_id).await?; 292 | 293 | let nft_stmts = get_nft_stmts(&name, &name).await?; 294 | let ip_rules = [ 295 | get_ip_rule(&handle, IpVersion::V4).await?, 296 | get_ip_rule(&handle, IpVersion::V6).await?, 297 | ]; 298 | let ip_routes = [ 299 | get_ip_route(&handle, IpVersion::V4, table_id).await?, 300 | get_ip_route(&handle, IpVersion::V6, table_id).await?, 301 | ]; 302 | let ip_rules: Vec<_> = ip_rules.into_iter().flatten().collect(); 303 | let ip_routes: Vec<_> = ip_routes.into_iter().flatten().collect(); 304 | close_cli(chans).await; 305 | drop(exabgp); 306 | for msg in unreach_msgs { 307 | handle.route().del(msg).execute().await?; 308 | } 309 | 310 | assert_eq!(nft_stmts, [ 311 | vec![ 312 | prefix_stmt("daddr", "192.0.2.0/27".parse()?).unwrap(), 313 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 314 | ACCEPT, 315 | ], 316 | vec![ 317 | prefix_stmt("daddr", "172.17.254.192/26".parse()?).unwrap(), 318 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 319 | ACCEPT, 320 | ], 321 | vec![ 322 | prefix_stmt("daddr", "fc42::/32".parse()?).unwrap(), 323 | mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id)), 324 | ACCEPT, 325 | ], 326 | ]); 327 | 328 | let ip_rules_exp: Vec<_> = [IpVersion::V4, IpVersion::V6] 329 | .into_iter() 330 | .map(|x| make_ip_rule_mark(x, 100, table_id, table_id)) 331 | .collect(); 332 | println!("> ip rule = {ip_rules:?}"); 333 | println!("> exp = {ip_rules_exp:?}"); 334 | assert!(ip_rules_exp.iter().all(|x| ip_rules.contains(x))); 335 | 336 | let ip_routes_exp = ["172.17.254.192/26", "192.0.2.0/27", "fc42::/32"]; 337 | let mut ip_routes_exp: Vec<_> = ip_routes_exp 338 | .into_iter() 339 | .map(|prefix| { 340 | let prefix = prefix.parse::().unwrap(); 341 | let mut msg = RouteMessageBuilder::::new() 342 | .kind(RouteType::Unreachable) 343 | .table_id(table_id) 344 | .destination_prefix(prefix.prefix(), prefix.len()) 345 | .unwrap() 346 | .build(); 347 | if prefix.is_ipv6() { 348 | msg.attributes.push(RouteAttribute::Oif(1)); 349 | } 350 | msg 351 | }) 352 | .collect(); 353 | ip_routes_exp.iter_mut().for_each(route_msg_normalize); 354 | assert_eq!(ip_routes, ip_routes_exp); 355 | 356 | Ok(()) 357 | } 358 | 359 | // TODO: test IPv6 offset 360 | // TODO: test prefix overlap 361 | // TODO: test rtnetlink listen to network changes 362 | 363 | async fn run_kernel_test_bird( 364 | init_table_id: u32, 365 | flows: impl IntoIterator, 366 | ) -> anyhow::Result<(String, CliGuard)> { 367 | ensure_bird_2(); 368 | ensure_root(); 369 | ensure_loopback_up().await?; 370 | 371 | let (flow4, flow6) = flows.into_iter().fold((String::new(), String::new()), |(v4, v6), str| { 372 | if str.starts_with("flow4") { 373 | (v4 + "route " + str + ";", v6) 374 | } else { 375 | (v4, v6 + "route " + str + ";") 376 | } 377 | }); 378 | 379 | let (table_name, flow_port, cli) = prepare_kernel_test(init_table_id).await?; 380 | let bird = BIRD_CONFIG_1 381 | .replace("@@BIRD_PORT@@", &pick_port().await?.to_string()) 382 | .replace("@@FLOW_PORT@@", &flow_port.to_string()) 383 | .replace("@@FLOW4@@", &flow4) 384 | .replace("@@FLOW6@@", &flow6); 385 | 386 | let guard = run_kernel_test_common(run_cli_with_bird(cli, &bird).await?).await?; 387 | Ok((table_name, guard)) 388 | } 389 | 390 | async fn run_kernel_test_exabgp( 391 | init_table_id: u32, 392 | flows: impl IntoIterator, 393 | ) -> anyhow::Result<(String, CliGuard)> { 394 | // ensure_exabgp(); 395 | ensure_root(); 396 | ensure_loopback_up().await?; 397 | 398 | let flows = flows.into_iter().map(|x| format!("route {{ {x} }}")).join("\n"); 399 | 400 | let (table_name, port, cli) = prepare_kernel_test(init_table_id).await?; 401 | let daemon = EXABGP_CONFIG_1.replace("@@FLOWS@@", &flows); 402 | 403 | let guard = run_kernel_test_common(run_cli_with_exabgp(cli, &daemon, port).await?).await?; 404 | Ok((table_name, guard)) 405 | } 406 | 407 | async fn prepare_kernel_test(init_table_id: u32) -> anyhow::Result<(String, u16, Cli)> { 408 | let table_name: String = "flow_test_" 409 | .chars() 410 | .chain(rand::rng().sample_iter(&Alphanumeric).take(8).map(char::from)) 411 | .collect(); 412 | let port = pick_port().await?; 413 | let cli = Cli::try_parse_from([ 414 | "flow", 415 | "run", 416 | "-v", 417 | &format!("--bind=[::1]:{port}"), 418 | "--local-as=65000", 419 | "--remote-as=65000", 420 | "--table", 421 | &table_name, 422 | "--chain", 423 | &table_name, 424 | "--init-table-id", 425 | &init_table_id.to_string(), 426 | ])?; 427 | Ok((table_name, port, cli)) 428 | } 429 | 430 | async fn run_kernel_test_common(g: CliGuard) -> anyhow::Result { 431 | let (mut cli, mut daemon, (mut events, close), g) = g; 432 | let mut end_of_rib_count = 0; 433 | loop { 434 | select! { 435 | Some(event) = events.recv(), if !events.is_closed() => match event { 436 | TestEvent::EndOfRib(..) => { 437 | end_of_rib_count += 1; 438 | if end_of_rib_count >= 2 { 439 | break; 440 | } 441 | } 442 | TestEvent::Update(_) => {}, 443 | TestEvent::Exit(_) => panic!("unexpected CLI exit event"), 444 | }, 445 | _ = sleep(Duration::from_secs(10)) => panic!("timed out"), 446 | code = &mut cli => panic!("CLI exited early with code {}", code??), 447 | status = daemon.wait() => panic!("BIRD exited early with {}", status?), 448 | } 449 | } 450 | Ok((cli, daemon, (events, close), g)) 451 | } 452 | -------------------------------------------------------------------------------- /src/integration_tests/mod.rs: -------------------------------------------------------------------------------- 1 | //! These are "integration" tests for the `flow` binary, in the form of unit 2 | //! tests. 3 | 4 | mod helpers; 5 | 6 | use crate::bgp::Session; 7 | use crate::bgp::msg::UpdateMessage; 8 | use crate::bgp::nlri::NlriKind; 9 | use crate::net::Afi; 10 | use tokio::io::BufReader; 11 | use tokio::net::TcpStream; 12 | 13 | #[derive(Debug)] 14 | pub enum TestEvent { 15 | EndOfRib(Afi, NlriKind), 16 | Update(UpdateMessage<'static>), 17 | Exit(Session>), 18 | } 19 | 20 | macro_rules! test_local { 21 | ( 22 | $(#[$post_attr:meta])* 23 | async fn $name:ident ($($pname:ident : $pty:ty),* $(,)?) 24 | $(-> $ret:ty)? $bl:block 25 | ) => { 26 | $(#[$post_attr])* 27 | #[tokio::test] 28 | async fn $name($($pname: $pty),*) $(-> $ret)? { 29 | tokio::task::LocalSet::new().run_until(async move $bl).await 30 | } 31 | }; 32 | } 33 | 34 | pub(crate) use test_local; 35 | 36 | // Test files 37 | mod flowspec; 38 | #[cfg(linux)] 39 | mod kernel_linux; 40 | 41 | const BIRD_CONFIG_1: &str = "\ 42 | router id 10.234.56.78; 43 | 44 | flow4 table myflow4; 45 | flow6 table myflow6; 46 | 47 | protocol static f4 { 48 | flow4 { table myflow4; }; 49 | @@FLOW4@@ 50 | } 51 | 52 | protocol static f6 { 53 | flow6 { table myflow6; }; 54 | @@FLOW6@@ 55 | } 56 | 57 | protocol bgp flow_test { 58 | debug all; 59 | connect delay time 1; 60 | 61 | local ::1 port @@BIRD_PORT@@ as 65000; 62 | neighbor ::1 port @@FLOW_PORT@@ as 65000; 63 | multihop; 64 | 65 | flow4 { table myflow4; import none; export all; }; 66 | flow6 { table myflow6; import none; export all; }; 67 | }"; 68 | 69 | const EXABGP_CONFIG_1: &str = "\ 70 | neighbor ::1 { 71 | router-id 10.234.56.78; 72 | local-address ::1; 73 | local-as 65000; 74 | peer-as 65000; 75 | 76 | flow { 77 | @@FLOWS@@ 78 | } 79 | }"; 80 | -------------------------------------------------------------------------------- /src/ipc.rs: -------------------------------------------------------------------------------- 1 | use crate::args::RunArgs; 2 | use crate::bgp::route::Routes; 3 | use crate::bgp::{Session, StateView}; 4 | use std::io; 5 | use std::path::{Path, PathBuf}; 6 | use tokio::io::{AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader}; 7 | use tokio::net::{TcpStream, UnixListener, UnixStream}; 8 | 9 | pub struct IpcServer { 10 | path: PathBuf, 11 | listener: UnixListener, 12 | } 13 | 14 | impl IpcServer { 15 | pub fn new(path: impl Into) -> anyhow::Result { 16 | let path = path.into(); 17 | Ok(Self { listener: UnixListener::bind(&path)?, path }) 18 | } 19 | 20 | pub async fn accept(&mut self) -> anyhow::Result { 21 | let (stream, _addr) = self.listener.accept().await?; 22 | Ok(stream) 23 | } 24 | } 25 | 26 | impl Drop for IpcServer { 27 | fn drop(&mut self) { 28 | let _ = std::fs::remove_file(&self.path); 29 | } 30 | } 31 | 32 | impl Session> { 33 | pub async fn write_states(&self, writer: &mut (impl AsyncWrite + Unpin)) -> anyhow::Result<()> { 34 | writer.write_all(&postcard::to_allocvec_cobs(self.config())?).await?; 35 | writer.write_all(&postcard::to_allocvec_cobs(&self.state().view())?).await?; 36 | writer.write_all(&postcard::to_allocvec_cobs(self.routes())?).await?; 37 | Ok(()) 38 | } 39 | } 40 | 41 | pub async fn get_states(path: impl AsRef, buf: &mut Vec) -> anyhow::Result<(RunArgs, StateView, Routes)> { 42 | let mut stream = UnixStream::connect(path).await?; 43 | stream.read_to_end(buf).await?; 44 | let (config, buf) = postcard::take_from_bytes_cobs(buf)?; 45 | let (view, buf) = postcard::take_from_bytes_cobs(buf)?; 46 | let (routes, _) = postcard::take_from_bytes_cobs(buf)?; 47 | Ok((config, view, routes)) 48 | } 49 | 50 | /// Network namespace-aware socket path. 51 | #[cfg(linux)] 52 | pub fn get_sock_path(dir: &Path) -> io::Result { 53 | use std::mem::MaybeUninit; 54 | 55 | let stat = unsafe { 56 | let netns_path = c"/proc/self/ns/net"; 57 | let mut buf = MaybeUninit::uninit(); 58 | if libc::stat(netns_path.as_ptr(), buf.as_mut_ptr()) < 0 { 59 | return Err(io::Error::last_os_error()); 60 | } 61 | buf.assume_init() 62 | }; 63 | Ok(dir.join(format!("{:x}.sock", stat.st_ino))) 64 | } 65 | 66 | #[cfg(not(linux))] 67 | pub fn get_sock_path(dir: &Path) -> io::Result { 68 | Ok(dir.join("flow.sock")) 69 | } 70 | -------------------------------------------------------------------------------- /src/kernel/linux/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod nft; 2 | 3 | use super::rtnl::{RtNetlink, RtNetlinkArgs}; 4 | use super::{Kernel, Result}; 5 | use crate::bgp::flow::Flowspec; 6 | use crate::bgp::route::RouteInfo; 7 | use clap::Args; 8 | use futures::future::OptionFuture; 9 | use futures::join; 10 | use itertools::Itertools; 11 | use nft::Nftables; 12 | use nftables::schema::{NfCmd, NfListObject, NfObject, Nftables as NftablesReq}; 13 | use serde::{Deserialize, Serialize}; 14 | use std::borrow::Cow; 15 | use std::collections::BTreeSet; 16 | use std::future::pending; 17 | 18 | #[derive(Debug, Serialize, Deserialize)] 19 | pub struct Linux { 20 | nft: Nftables, 21 | #[serde(skip)] 22 | rtnl: Option>, 23 | rtnl_args: RtNetlinkArgs, 24 | } 25 | 26 | impl Linux { 27 | pub async fn new(args: KernelArgs) -> Result { 28 | let KernelArgs { table, chain, hooked, hook_priority, rtnl } = args; 29 | Ok(Self { 30 | nft: Nftables::new(table, chain, hooked, hook_priority).await?, 31 | rtnl: None, 32 | rtnl_args: rtnl, 33 | }) 34 | } 35 | } 36 | 37 | impl Kernel for Linux { 38 | type Handle = BTreeSet; 39 | 40 | async fn apply( 41 | &mut self, 42 | spec: &Flowspec, 43 | before: Option<&Self::Handle>, 44 | info: &RouteInfo<'_>, 45 | ) -> Result { 46 | let mut total = 1usize; 47 | let (info_stmts, rt_info) = info 48 | .to_nft_stmts(spec.afi(), spec.dst_prefix(), &mut self.rtnl, &self.rtnl_args) 49 | .map(|(a, b)| (Some(a), b)) 50 | .unwrap_or_default(); 51 | let base = spec 52 | .to_nft_stmts()? 53 | .chain(info_stmts.map(Ok)) 54 | .map_ok(|branch| { 55 | let count = total; 56 | total *= branch.len(); 57 | (branch, count) 58 | }) 59 | .collect::, _>>()?; 60 | let rules = (0..total).map(move |i| { 61 | (base.iter()) 62 | .flat_map(|(x, v)| x[if x.len() == 1 { 0 } else { i / v % x.len() }].iter()) 63 | .cloned() 64 | .collect::>() 65 | }); 66 | 67 | let nftables = NftablesReq { 68 | objects: rules 69 | .into_iter() 70 | .map(|x| { 71 | let cmd = if let Some(before) = before { 72 | NfCmd::Insert(self.nft.make_new_rule_with_index(x.into(), *before.first().unwrap())) 73 | } else { 74 | NfCmd::Add(self.nft.make_new_rule(x.into())) 75 | }; 76 | NfObject::CmdObject(cmd) 77 | }) 78 | .collect(), 79 | }; 80 | let result = self.nft.apply_and_return_ruleset(&nftables).await?; 81 | 82 | let handle: Self::Handle = (result.objects.iter()) 83 | .filter_map(|x| { 84 | if let NfObject::CmdObject(NfCmd::Add(NfListObject::Rule(rule))) 85 | | NfObject::CmdObject(NfCmd::Insert(NfListObject::Rule(rule))) = x 86 | { 87 | Some(rule.handle.unwrap()) 88 | } else { 89 | None 90 | } 91 | }) 92 | .collect(); 93 | 94 | debug_assert_eq!(nftables.objects.len(), handle.len()); 95 | 96 | if let Some((next_hop, table_id)) = rt_info { 97 | let rtnl = self.rtnl.as_mut().expect("RtNetlink should be initialized"); 98 | let real_table_id = match rtnl.add(handle.clone(), spec, next_hop).await { 99 | Ok(table) => table, 100 | Err(error) => { 101 | self.nft.remove_rules(handle).await; 102 | return Err(error); 103 | } 104 | }; 105 | assert_eq!(table_id, real_table_id, "table ID mismatch"); 106 | } 107 | 108 | Ok(handle) 109 | } 110 | 111 | async fn remove(&mut self, handle: &Self::Handle) { 112 | self.nft.remove_rules(handle.iter().copied()).await; 113 | if let Some(rtnl) = &mut self.rtnl { 114 | rtnl.del(handle).await; 115 | if rtnl.is_empty() { 116 | self.rtnl = None; 117 | } 118 | } 119 | } 120 | 121 | async fn process(&mut self) -> Result<()> { 122 | if let Some(rtnl) = &mut self.rtnl { 123 | rtnl.process().await 124 | } else { 125 | // TODO: should wait for self.rtnl to fill and exec rtnl.process() 126 | pending().await 127 | } 128 | } 129 | 130 | async fn terminate(self) { 131 | join!( 132 | self.nft.terminate(), 133 | OptionFuture::from(self.rtnl.map(RtNetlink::terminate)), 134 | ); 135 | } 136 | } 137 | 138 | #[derive(Debug, Clone, Args, Serialize, Deserialize)] 139 | pub struct KernelArgs { 140 | /// nftables table name. 141 | /// 142 | /// The table WILL NOT be automatically deleted when the program exits. 143 | #[arg(long, default_value_t = Cow::Borrowed("flowspecs"))] 144 | pub table: Cow<'static, str>, 145 | 146 | /// nftables chain name. 147 | /// 148 | /// The chain WILL be automatically deleted when the program exits. 149 | #[arg(long, default_value_t = Cow::Borrowed("flowspecs"))] 150 | pub chain: Cow<'static, str>, 151 | 152 | /// Attach flowspec rules to nftables input hook. 153 | /// 154 | /// If not set, the nftables rule must be `jump`ed or `goto`ed from a base 155 | /// (hooked) chain in the same table to take effect. 156 | #[arg(long)] 157 | pub hooked: bool, 158 | 159 | /// Hook priority. 160 | #[arg(long, value_name = "PRIO", default_value_t = 0)] 161 | pub hook_priority: i32, 162 | 163 | #[command(flatten)] 164 | pub rtnl: RtNetlinkArgs, 165 | } 166 | -------------------------------------------------------------------------------- /src/kernel/linux/nft.rs: -------------------------------------------------------------------------------- 1 | use super::Linux; 2 | use crate::bgp::flow::{Bitmask, BitmaskFlags, Component, ComponentKind, Flowspec, Numeric, NumericFlags, Op, Ops}; 3 | use crate::bgp::route::{ExtCommunity, Ipv6ExtCommunity, RouteInfo, TrafficFilterAction, TrafficFilterActionKind}; 4 | use crate::kernel::rtnl::{RtNetlink, RtNetlinkArgs}; 5 | use crate::kernel::{Error, Result}; 6 | use crate::net::{Afi, IpPrefix}; 7 | use crate::util::{Intersect, TruthTable, grace}; 8 | use nftables::batch::Batch; 9 | use nftables::expr::Expression::{Number, String as Str}; 10 | use nftables::helper::{ 11 | DEFAULT_NFT, apply_and_return_ruleset_async, apply_ruleset_async, get_current_ruleset_raw_async, 12 | }; 13 | use nftables::schema::Nftables as NftablesReq; 14 | use nftables::{expr, schema, stmt, types}; 15 | use num_integer::Integer; 16 | use serde::{Deserialize, Serialize}; 17 | use smallvec::{SmallVec, smallvec, smallvec_inline}; 18 | use std::borrow::Cow; 19 | use std::cmp::{Ordering, min}; 20 | use std::collections::{BTreeMap, BTreeSet}; 21 | use std::marker::PhantomData; 22 | use std::mem::replace; 23 | use std::net::IpAddr; 24 | use std::ops::{Not, RangeInclusive}; 25 | 26 | #[derive(Debug, Serialize, Deserialize)] 27 | pub struct Nftables { 28 | table: Cow<'static, str>, 29 | chain: Cow<'static, str>, 30 | } 31 | 32 | impl Nftables { 33 | pub async fn new( 34 | table: impl Into>, 35 | chain: impl Into>, 36 | hooked: bool, 37 | priority: i32, 38 | ) -> Result { 39 | let table = table.into(); 40 | let chain = chain.into(); 41 | let mut batch = Batch::new(); 42 | batch.add(schema::NfListObject::Table(schema::Table { 43 | family: types::NfFamily::INet, 44 | name: table.clone(), 45 | ..Default::default() 46 | })); 47 | batch.add(schema::NfListObject::Chain(schema::Chain { 48 | family: types::NfFamily::INet, 49 | table: table.clone(), 50 | name: chain.clone(), 51 | _type: hooked.then_some(types::NfChainType::Filter), 52 | hook: hooked.then_some(types::NfHook::Input), 53 | prio: hooked.then_some(priority), 54 | ..Default::default() 55 | })); 56 | apply_ruleset_async(&batch.to_nftables()).await?; 57 | Ok(Self { table, chain }) 58 | } 59 | 60 | pub fn make_new_rule(&self, stmts: Cow<'static, [stmt::Statement]>) -> schema::NfListObject<'static> { 61 | schema::NfListObject::Rule(schema::Rule { 62 | family: types::NfFamily::INet, 63 | table: self.table.clone(), 64 | chain: self.chain.clone(), 65 | expr: stmts, 66 | ..Default::default() 67 | }) 68 | } 69 | 70 | pub fn make_new_rule_with_index( 71 | &self, 72 | stmts: Cow<'static, [stmt::Statement]>, 73 | index: u32, 74 | ) -> schema::NfListObject<'static> { 75 | schema::NfListObject::Rule(schema::Rule { 76 | family: types::NfFamily::INet, 77 | table: self.table.clone(), 78 | chain: self.chain.clone(), 79 | expr: stmts, 80 | handle: Some(index), // `index` seems not working, and `handle` works fine 81 | ..Default::default() 82 | }) 83 | } 84 | 85 | #[expect(unused)] 86 | pub async fn get_current_ruleset_raw(&self) -> Result { 87 | let args = ["-n", "-s", "list", "chain", "inet", &self.table, &self.chain]; 88 | Ok(get_current_ruleset_raw_async(DEFAULT_NFT, args).await?) 89 | } 90 | 91 | pub async fn apply_ruleset(&self, n: &NftablesReq<'_>) -> Result<()> { 92 | Ok(apply_ruleset_async(n).await?) 93 | } 94 | 95 | pub async fn apply_and_return_ruleset(&self, n: &NftablesReq<'_>) -> Result> { 96 | Ok(apply_and_return_ruleset_async(n).await?) 97 | } 98 | 99 | fn make_rule_handle(&self, handle: u32) -> schema::NfListObject { 100 | schema::NfListObject::Rule(schema::Rule { 101 | family: types::NfFamily::INet, 102 | table: self.table.clone(), 103 | chain: self.chain.clone(), 104 | handle: Some(handle), 105 | ..Default::default() 106 | }) 107 | } 108 | 109 | pub async fn remove_rules(&self, handle: impl IntoIterator) { 110 | let rm = NftablesReq { 111 | objects: handle 112 | .into_iter() 113 | .map(|x| schema::NfObject::CmdObject(schema::NfCmd::Delete(self.make_rule_handle(x)))) 114 | .collect(), 115 | }; 116 | grace(self.apply_ruleset(&rm).await, "failed to remove nftables rules"); 117 | } 118 | 119 | pub async fn terminate(self) { 120 | let mut batch = Batch::new(); 121 | batch.delete(schema::NfListObject::Chain(schema::Chain { 122 | family: types::NfFamily::INet, 123 | table: self.table.clone(), 124 | name: self.chain.clone(), 125 | ..Default::default() 126 | })); 127 | _ = apply_ruleset_async(&batch.to_nftables()).await; 128 | } 129 | } 130 | 131 | /// Makes sure transport protocol is consistent across components inside a 132 | /// flowspec. 133 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 134 | enum Transport { 135 | Tcp, 136 | Icmp, 137 | Unknown, 138 | } 139 | 140 | type StatementBlock<'a> = SmallVec<[stmt::Statement<'a>; 1]>; 141 | type StatementBranch<'a> = SmallVec<[StatementBlock<'a>; 1]>; 142 | 143 | impl Flowspec { 144 | pub(super) fn to_nft_stmts(&self) -> Result>> + '_> { 145 | use ComponentKind as CK; 146 | 147 | let set = self.component_set(); 148 | let tcp = set.contains(&CK::TcpFlags); 149 | let icmp = set.contains(&CK::IcmpType) || set.contains(&CK::IcmpCode); 150 | let transport = match (tcp, icmp) { 151 | (false, false) => Transport::Unknown, 152 | (false, true) => Transport::Icmp, 153 | (true, false) => Transport::Tcp, 154 | _ => return Err(Error::MatchNothing), 155 | }; 156 | 157 | let first = make_match( 158 | stmt::Operator::EQ, 159 | make_meta(expr::MetaKey::Nfproto), 160 | Str(if self.afi() == Afi::Ipv4 { "ipv4" } else { "ipv6" }.into()), 161 | ); 162 | let result = Some(Ok(smallvec_inline![smallvec_inline![first]])) 163 | .into_iter() 164 | .chain(self.components().map(move |x| x.to_nft_stmts(self.afi(), transport))) 165 | .filter(|x| x.is_err() || x.as_ref().is_ok_and(|y| !y.is_empty())); 166 | Ok(result) 167 | } 168 | } 169 | 170 | impl Component { 171 | fn to_nft_stmts(&self, afi: Afi, tp: Transport) -> Result> { 172 | use Component::*; 173 | use Transport::*; 174 | 175 | let ip_ver = if afi == Afi::Ipv4 { "ip" } else { "ip6" }; 176 | let icmp = if afi == Afi::Ipv4 { "icmp" } else { "icmpv6" }; 177 | let (th, tp_code) = match tp { 178 | Tcp => (Ok("tcp"), Some(6)), 179 | Icmp => (Err(Error::MatchNothing), Some(if afi == Afi::Ipv4 { 1 } else { 58 })), 180 | Unknown => (Ok("th"), None), 181 | }; 182 | let result: StatementBranch = match self { 183 | &DstPrefix(prefix, 0) => prefix_stmt("daddr", prefix).into_iter().map(|x| smallvec_inline![x]).collect(), 184 | &SrcPrefix(prefix, 0) => prefix_stmt("saddr", prefix).into_iter().map(|x| smallvec_inline![x]).collect(), 185 | &DstPrefix(pattern, offset) => pattern_stmt(false, pattern, offset).into_iter().collect(), 186 | &SrcPrefix(pattern, offset) => pattern_stmt(true, pattern, offset).into_iter().collect(), 187 | 188 | Protocol(ops) => match tp_code { 189 | Some(code) => ops.op(code).then(SmallVec::new_const).ok_or(Error::MatchNothing)?, 190 | None => range_stmt_branch(make_meta(expr::MetaKey::L4proto), ops, 0xff)?, 191 | }, 192 | 193 | Port(ops) => { 194 | let th = th?; 195 | range_stmt(make_payload_field(th, "dport"), ops, 0xffff)? 196 | .into_iter() 197 | .chain(range_stmt(make_payload_field(th, "sport"), ops, 0xffff)?) 198 | .map(|x| smallvec_inline![x]) 199 | .collect() 200 | } 201 | DstPort(ops) => range_stmt_branch(make_payload_field(th?, "dport"), ops, 0xffff)?, 202 | SrcPort(ops) => range_stmt_branch(make_payload_field(th?, "sport"), ops, 0xffff)?, 203 | IcmpType(ops) if tp == Icmp => range_stmt_branch(make_payload_field(icmp, "type"), ops, 0xff)?, 204 | IcmpCode(ops) if tp == Icmp => range_stmt_branch(make_payload_field(icmp, "code"), ops, 0xff)?, 205 | IcmpType(_) | IcmpCode(_) => return Err(Error::MatchNothing), 206 | TcpFlags(ops) => { 207 | let tt = ops.to_truth_table(); 208 | let tt = tt.shrink(0b11111111); 209 | if tt.is_always_false() { 210 | return Err(Error::MatchNothing); 211 | } else if tt.is_always_true() { 212 | return Ok(SmallVec::new_const()); 213 | } 214 | smallvec_inline![smallvec_inline![make_match( 215 | if tt.inv { 216 | stmt::Operator::NEQ 217 | } else { 218 | stmt::Operator::EQ 219 | }, 220 | expr::Expression::BinaryOperation(Box::new(expr::BinaryOperation::AND( 221 | make_payload_field("tcp", "flags"), 222 | Number(tt.mask as u32), 223 | ))), 224 | expr::Expression::Named(expr::NamedExpression::Set( 225 | (tt.truth.iter().copied()) 226 | .map(|x| expr::SetItem::Element(Number(x as u32))) 227 | .collect(), 228 | )), 229 | )]] 230 | } 231 | PacketLen(ops) => { 232 | let ops = if afi == Afi::Ipv4 { 233 | Cow::Borrowed(ops) 234 | } else { 235 | Cow::Owned(ops.with_offset(-40)) 236 | }; 237 | range_stmt_branch(make_payload_field(ip_ver, "length"), &ops, 0xffff)? 238 | } 239 | Dscp(ops) => range_stmt_branch(make_payload_field(ip_ver, "dscp"), ops, 0x3f)?, 240 | Fragment(ops) => { 241 | // int frag_op_value = [LF,FF,IsF,DF] 242 | // possible: [DF], [IsF], [FF], [LF], [LF,IsF](=[LF]) 243 | let mask = if afi == Afi::Ipv4 { 0b1111 } else { 0b1110 }; 244 | let tt = ops.to_truth_table(); 245 | let tt = tt.shrink(mask); 246 | let valid_set = [0b0001, 0b0010, 0b1010, 0b0100, 0b1000].into_iter().collect(); 247 | let mut new_set: BTreeSet<_> = tt.possible_values_masked().intersection(&valid_set).copied().collect(); 248 | new_set.remove(&0b1010).then(|| new_set.insert(0b1000)); 249 | 250 | let mut iter = new_set.into_iter().peekable(); 251 | let mut branch = StatementBranch::new(); 252 | 253 | let frag_off = if afi == Afi::Ipv4 { 254 | make_payload_field("ip", "frag-off") 255 | } else { 256 | make_exthdr("frag", "frag-off", 0) 257 | }; 258 | let mf = if afi == Afi::Ipv4 { 259 | make_payload_raw(expr::PayloadBase::NH, 18, 1) 260 | } else { 261 | make_exthdr("frag", "more-fragments", 0) 262 | }; 263 | 264 | // DF (IPv4) 265 | if let Some(0b0001) = iter.peek() { 266 | iter.next(); 267 | branch.push(smallvec_inline![make_match( 268 | stmt::Operator::EQ, 269 | make_payload_raw(expr::PayloadBase::NH, 17, 1), 270 | Number(1), 271 | )]); 272 | } 273 | // IsF: {ip,frag} frag-off != 0 274 | if let Some(0b0010) = iter.peek() { 275 | iter.next(); 276 | branch.push(smallvec_inline![make_match( 277 | stmt::Operator::NEQ, 278 | frag_off.clone(), 279 | Number(0) 280 | )]); 281 | } 282 | // FF: {ip,frag} frag-off == 0 && MF == 1 283 | if let Some(0b0100) = iter.peek() { 284 | iter.next(); 285 | branch.push(smallvec![ 286 | make_match(stmt::Operator::EQ, frag_off.clone(), Number(0)), 287 | make_match(stmt::Operator::EQ, mf.clone(), Number(1)), 288 | ]); 289 | } 290 | // LF: {ip,frag} frag-off != 0 && MF == 0 291 | if let Some(0b1000) = iter.peek() { 292 | iter.next(); 293 | branch.push(smallvec![ 294 | make_match(stmt::Operator::NEQ, frag_off, Number(0)), 295 | make_match(stmt::Operator::EQ, mf, Number(0)), 296 | ]); 297 | } 298 | branch 299 | } 300 | FlowLabel(ops) => range_stmt_branch(make_payload_field("ip6", "flowlabel"), ops, 0x1fff)?, 301 | }; 302 | Ok(result) 303 | } 304 | } 305 | 306 | impl RouteInfo<'_> { 307 | pub(super) fn to_nft_stmts( 308 | &self, 309 | afi: Afi, 310 | prefix: IpPrefix, 311 | rtnl: &mut Option>, 312 | rtnl_args: &RtNetlinkArgs, 313 | ) -> Option<(StatementBranch<'static>, Option<(IpAddr, u32)>)> { 314 | let set = (self.ext_comm.iter().copied()) 315 | .filter_map(ExtCommunity::action) 316 | .chain(self.ipv6_ext_comm.iter().copied().filter_map(Ipv6ExtCommunity::action)) 317 | .map(|x| (x.kind(), x)) 318 | .collect::>(); 319 | let mut terminal = set 320 | .get(&TrafficFilterActionKind::TrafficAction) 321 | .map(|x| { 322 | let &TrafficFilterAction::TrafficAction { terminal, .. } = x else { 323 | unreachable!() 324 | }; 325 | terminal 326 | }) 327 | .unwrap_or(true); 328 | let mut last_term = false; 329 | let mut rt_info = None; 330 | let mut result = set 331 | .into_values() 332 | .map(move |x| x.to_nft_stmts(afi, prefix, rtnl, rtnl_args)) 333 | .map(|(x, r, term)| { 334 | term.then(|| terminal = false); 335 | rt_info = r; 336 | (x, replace(&mut last_term, term)) 337 | }) 338 | .map_while(|(x, term)| term.not().then_some(x)) 339 | .filter(|x| !x.is_empty()) 340 | .collect::(); 341 | if terminal { 342 | let ll = result.last().and_then(|x| x.last()); 343 | if ll.is_some_and(|x| *x == ACCEPT || *x == DROP) || ll.is_none() { 344 | result.push(smallvec_inline![ACCEPT]); 345 | } else { 346 | result.last_mut().unwrap().push(ACCEPT); 347 | } 348 | } 349 | result.is_empty().not().then_some((result, rt_info)) 350 | } 351 | } 352 | 353 | impl TrafficFilterAction { 354 | fn to_nft_stmts( 355 | self, 356 | afi: Afi, 357 | prefix: IpPrefix, 358 | rtnl: &mut Option>, 359 | rtnl_args: &RtNetlinkArgs, 360 | ) -> (StatementBlock<'static>, Option<(IpAddr, u32)>, bool) { 361 | use TrafficFilterAction::*; 362 | let action = match self { 363 | TrafficRateBytes { rate, .. } | TrafficRatePackets { rate, .. } if rate <= 0. || rate.is_nan() => { 364 | return (smallvec_inline![DROP], None, true); 365 | } 366 | TrafficRateBytes { rate, .. } => smallvec![make_limit(true, rate, "bytes", "second"), DROP], 367 | TrafficRatePackets { rate, .. } => smallvec![make_limit(true, rate, "packets", "second"), DROP], 368 | TrafficAction { sample: true, .. } => smallvec_inline![stmt::Statement::Log(Some(stmt::Log::new(None))),], 369 | TrafficAction { .. } => SmallVec::new_const(), 370 | RtRedirect { .. } | RtRedirectIpv6 { .. } => SmallVec::new_const(), // redirect is not supported at the moment 371 | TrafficMarking { dscp } => smallvec_inline![mangle_stmt( 372 | make_payload_field(if afi == Afi::Ipv4 { "ip" } else { "ip6" }, "dscp"), 373 | Number(dscp.into()) 374 | )], 375 | RedirectToIp { ip, copy: true } => smallvec_inline![stmt::Statement::Dup(stmt::Dup { 376 | addr: Str(ip.to_string().into()), 377 | dev: None, 378 | })], 379 | RedirectToIp { ip, copy: false } => { 380 | let rtnl = if let Some(rtnl) = rtnl { 381 | rtnl 382 | } else { 383 | let new = RtNetlink::new(rtnl_args.clone()).unwrap(); 384 | rtnl.get_or_insert(new) 385 | }; 386 | let table_id = rtnl.next_table_for(prefix); 387 | let result = smallvec_inline![mangle_stmt(make_meta(expr::MetaKey::Mark), Number(table_id))]; 388 | return (result, Some((ip, table_id)), false); 389 | } 390 | }; 391 | (action, None, false) 392 | } 393 | } 394 | 395 | pub(crate) const ACCEPT: stmt::Statement = stmt::Statement::Accept(None); 396 | pub(crate) const DROP: stmt::Statement = stmt::Statement::Drop(None); 397 | 398 | pub(crate) fn make_match<'a>( 399 | op: stmt::Operator, 400 | left: expr::Expression<'a>, 401 | right: expr::Expression<'a>, 402 | ) -> stmt::Statement<'a> { 403 | stmt::Statement::Match(stmt::Match { left, right, op }) 404 | } 405 | 406 | pub(crate) fn make_limit<'a>(over: bool, rate: f32, unit: &'a str, per: &'a str) -> stmt::Statement<'a> { 407 | stmt::Statement::Limit(stmt::Limit { 408 | rate: rate.round() as u32, 409 | rate_unit: Some(unit.into()), 410 | per: Some(per.into()), 411 | burst: Some(0), 412 | burst_unit: Some("bytes".into()), 413 | inv: Some(over), 414 | }) 415 | } 416 | 417 | pub(crate) fn make_payload_raw(base: expr::PayloadBase, offset: u32, len: u32) -> expr::Expression<'static> { 418 | expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload::PayloadRaw( 419 | expr::PayloadRaw { base, offset, len }, 420 | ))) 421 | } 422 | 423 | pub(crate) fn make_payload_field<'a>(protocol: &'a str, field: &'a str) -> expr::Expression<'a> { 424 | expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload::PayloadField( 425 | expr::PayloadField { protocol: protocol.into(), field: field.into() }, 426 | ))) 427 | } 428 | 429 | pub(crate) fn make_meta(key: expr::MetaKey) -> expr::Expression<'static> { 430 | expr::Expression::Named(expr::NamedExpression::Meta(expr::Meta { key })) 431 | } 432 | 433 | pub(crate) fn make_exthdr<'a>(name: &'a str, field: &'a str, offset: u32) -> expr::Expression<'a> { 434 | expr::Expression::Named(expr::NamedExpression::Exthdr(expr::Exthdr { 435 | name: name.into(), 436 | field: Some(field.into()), 437 | offset: Some(offset), 438 | })) 439 | } 440 | 441 | pub(crate) fn prefix_stmt(field: &'static str, prefix: IpPrefix) -> Option> { 442 | (prefix.len() != 0).then(|| { 443 | make_match( 444 | stmt::Operator::EQ, 445 | make_payload_field(if prefix.afi() == Afi::Ipv4 { "ip" } else { "ip6" }, field), 446 | if prefix.is_single() { 447 | Str(format!("{}", prefix.prefix()).into()) 448 | } else { 449 | expr::Expression::Named(expr::NamedExpression::Prefix(expr::Prefix { 450 | addr: Box::new(Str(format!("{}", prefix.prefix()).into())), 451 | len: prefix.len().into(), 452 | })) 453 | }, 454 | ) 455 | }) 456 | } 457 | 458 | pub(crate) fn pattern_stmt(src: bool, pattern: IpPrefix, offset: u8) -> Option> { 459 | if pattern.len() == 0 { 460 | return None; 461 | } 462 | 463 | let mut buf = SmallVec::new_const(); 464 | 465 | buf.push(make_match( 466 | stmt::Operator::EQ, 467 | make_meta(expr::MetaKey::Nfproto), 468 | Str("ipv6".into()), 469 | )); 470 | 471 | let addr_offset = if src { 64 } else { 192 }; 472 | let start_32bit = offset.next_multiple_of(32); 473 | let pre_rem = start_32bit - offset; 474 | let end_32bit = pattern.len().prev_multiple_of(&32); // this uses num::Integer, not std 475 | let post_rem = pattern.len() - end_32bit; 476 | 477 | let IpAddr::V6(ip) = pattern.prefix() else { 478 | unreachable!(); 479 | }; 480 | if start_32bit + 32 <= end_32bit { 481 | if pre_rem > 0 { 482 | let num = ip.to_bits() >> (128 - start_32bit); 483 | buf.push(make_match( 484 | stmt::Operator::EQ, 485 | make_payload_raw(expr::PayloadBase::NH, addr_offset + offset as u32, pre_rem.into()), 486 | Number(num.try_into().unwrap()), 487 | )); 488 | } 489 | for i in (start_32bit..end_32bit).step_by(32) { 490 | let num = (ip.to_bits() >> (pattern.len() - 8 - i)) as u32; 491 | buf.push(make_match( 492 | stmt::Operator::EQ, 493 | make_payload_raw(expr::PayloadBase::NH, addr_offset + i as u32, 32), 494 | Number(num), 495 | )); 496 | } 497 | if post_rem > 0 { 498 | let num = ((ip.to_bits() >> (128 - pattern.len())) as u32) & (u32::MAX >> (32 - post_rem)); 499 | buf.push(make_match( 500 | stmt::Operator::EQ, 501 | make_payload_raw(expr::PayloadBase::NH, addr_offset + end_32bit as u32, post_rem.into()), 502 | Number(num), 503 | )); 504 | } 505 | } else { 506 | let num = ip.to_bits() >> (128 - pattern.len()); 507 | buf.push(make_match( 508 | stmt::Operator::EQ, 509 | make_payload_raw( 510 | expr::PayloadBase::NH, 511 | addr_offset + u32::from(offset), 512 | u32::from(pattern.len() - offset), 513 | ), 514 | Number(num.try_into().unwrap()), 515 | )); 516 | } 517 | 518 | Some(buf) 519 | } 520 | 521 | pub(crate) fn range_stmt<'a>( 522 | left: expr::Expression<'a>, 523 | ops: &Ops, 524 | max: u64, 525 | ) -> Result>> { 526 | let ranges = ops.to_ranges(); 527 | if is_sorted_ranges_always_true(&ranges) { 528 | return Ok(None); 529 | } else if ranges.is_empty() { 530 | return Err(Error::MatchNothing); 531 | } 532 | let right = if ranges.len() == 1 { 533 | let (start, end) = ranges.into_iter().next().unwrap().into_inner(); 534 | if start == end { 535 | Number(start as u32) 536 | } else { 537 | expr::Expression::Range(Box::new(expr::Range { 538 | range: [Number(start as u32), Number(min(end, max) as u32)], 539 | })) 540 | } 541 | } else { 542 | let allowed = ranges 543 | .into_iter() 544 | .map(RangeInclusive::into_inner) 545 | .filter_map(|(a, b)| (a <= max).then_some(if b <= max { a..=b } else { a..=max })) 546 | .map(|x| { 547 | let (start, end) = x.into_inner(); 548 | // HACK: Does nftables itself support 64-bit integers? We shrink it for now. 549 | // But most of the matching expressions is smaller than 32 bits anyway. 550 | let expr = if start == end { 551 | Number(start as u32) 552 | } else { 553 | expr::Expression::Range(Box::new(expr::Range { 554 | range: [Number(start as u32), Number(end as u32)], 555 | })) 556 | }; 557 | expr::SetItem::Element(expr) 558 | }) 559 | .collect(); 560 | expr::Expression::Named(expr::NamedExpression::Set(allowed)) 561 | }; 562 | Ok(Some(make_match(stmt::Operator::EQ, left, right))) 563 | } 564 | 565 | pub(crate) fn range_stmt_branch<'a>( 566 | left: expr::Expression<'a>, 567 | ops: &Ops, 568 | max: u64, 569 | ) -> Result> { 570 | range_stmt(left, ops, max).map(|x| x.into_iter().map(|x| smallvec_inline![x]).collect()) 571 | } 572 | 573 | pub(crate) fn mangle_stmt<'a>(key: expr::Expression<'a>, value: expr::Expression<'a>) -> stmt::Statement<'a> { 574 | stmt::Statement::Mangle(stmt::Mangle { key, value }) 575 | } 576 | 577 | impl Ops { 578 | fn to_ranges(&self) -> Vec> { 579 | let mut buf = Vec::new(); 580 | let mut cur = SmallVec::<[_; 4]>::new(); 581 | cur.extend(self.0[0].to_range_iter()); 582 | 583 | for op in &self.0[1..] { 584 | if op.is_and() { 585 | if cur.is_empty() { 586 | continue; 587 | } 588 | let Some((r1, r2)) = op.to_ranges() else { 589 | cur.clear(); 590 | continue; 591 | }; 592 | 593 | let mut addition = SmallVec::<[_; 4]>::new(); 594 | cur.retain(|x| { 595 | if let Some(y) = x.clone().intersect(r1.clone()) { 596 | if let Some(r2) = &r2 { 597 | addition.extend(x.clone().intersect(r2.clone())); 598 | } 599 | *x = y; 600 | return true; 601 | } else if let Some(r2) = &r2 { 602 | if let Some(y) = x.clone().intersect(r2.clone()) { 603 | *x = y; 604 | return true; 605 | } 606 | } 607 | false 608 | }); 609 | cur.extend(addition); 610 | } else { 611 | buf.extend(cur.drain(..)); 612 | cur.extend(op.to_range_iter()); 613 | } 614 | } 615 | buf.extend(cur); 616 | buf.sort_unstable_by_key(|x| x.clone().into_inner()); 617 | buf 618 | } 619 | 620 | pub fn offset(&mut self, offset: i64) { 621 | self.0.iter_mut().for_each(|x| *x = x.offset(offset)); 622 | } 623 | 624 | pub fn with_offset(&self, offset: i64) -> Self { 625 | let mut ops = self.clone(); 626 | ops.offset(offset); 627 | ops 628 | } 629 | } 630 | 631 | impl Ops { 632 | fn to_truth_table(&self) -> TruthTable { 633 | let mut buf = TruthTable::always_false(); 634 | let mut cur = self.0[0].to_truth_table(); 635 | 636 | for op in &self.0[1..] { 637 | if op.is_and() { 638 | if cur.is_always_false() { 639 | continue; 640 | } 641 | cur = cur.and(op.to_truth_table()); 642 | } else { 643 | buf = buf.or(cur); 644 | cur = op.to_truth_table(); 645 | } 646 | } 647 | buf = buf.or(cur); 648 | buf 649 | } 650 | } 651 | 652 | impl Op { 653 | fn to_ranges(self) -> Option<(RangeInclusive, Option>)> { 654 | use NumericFlags::*; 655 | match NumericFlags::from_repr(self.flags & 0b111).unwrap() { 656 | False => None, 657 | Lt if self.value == 0 => None, 658 | Gt if self.value == u64::MAX => None, 659 | Lt => Some((0..=self.value - 1, None)), 660 | Gt => Some((self.value + 1..=u64::MAX, None)), 661 | Eq => Some((self.value..=self.value, None)), 662 | Le => Some((0..=self.value, None)), 663 | Ge => Some((self.value..=u64::MAX, None)), 664 | Ne => Some((0..=self.value - 1, Some(self.value + 1..=u64::MAX))), 665 | True => Some((0..=u64::MAX, None)), 666 | } 667 | } 668 | 669 | fn to_range_iter(self) -> impl Iterator> + Clone { 670 | self 671 | .to_ranges() 672 | .map(|(a, b)| [Some(a), b].into_iter().flatten()) 673 | .into_iter() 674 | .flatten() 675 | } 676 | 677 | /// Offset the operator by adding n (no overflow) to every value compared. 678 | fn offset(self, n: i64) -> Self { 679 | use NumericFlags::*; 680 | use Ordering::*; 681 | 682 | let diff = n.unsigned_abs(); 683 | let f = NumericFlags::from_repr(self.flags & 0b111).unwrap(); 684 | 685 | let (flags, value) = match (f, n.cmp(&0)) { 686 | (_, Equal) => return self, 687 | (False | True, _) => (f, 0), 688 | (Lt | Le | Eq | Ne, Less) => self.value.checked_sub(diff).map(|v| (f, v)).unwrap_or((False, 0)), 689 | (Lt | Le, Greater) => self.value.checked_add(diff).map(|v| (f, v)).unwrap_or((True, 0)), 690 | (Gt | Ge | Eq | Ne, Greater) => self.value.checked_add(diff).map(|v| (f, v)).unwrap_or((False, 0)), 691 | (Gt | Ge, Less) => self.value.checked_sub(diff).map(|v| (f, v)).unwrap_or((True, 0)), 692 | }; 693 | 694 | let flags = self.flags & Self::AND | flags as u8; 695 | Self { flags, value, _k: PhantomData } 696 | } 697 | } 698 | 699 | impl Op { 700 | pub fn to_truth_table(self) -> TruthTable { 701 | use BitmaskFlags::*; 702 | let (inv, init) = match (BitmaskFlags::from_repr(self.flags & 0b11).unwrap(), self.value) { 703 | (Any | NotAll, 0) => (false, None), // always false 704 | (NotAny | All, 0) => (true, None), // always true 705 | (Any, _) => (true, Some(0)), 706 | (NotAny, _) => (false, Some(0)), 707 | (All, _) => (false, Some(self.value)), 708 | (NotAll, _) => (true, Some(self.value)), 709 | }; 710 | let mut truth = BTreeSet::new(); 711 | truth.extend(init); 712 | TruthTable { mask: self.value, inv, truth } 713 | } 714 | } 715 | 716 | /// This assumes the ranges are sorted as (range.start, range.end). 717 | fn is_sorted_ranges_always_true<'a>(ranges: impl IntoIterator>) -> bool { 718 | let mut iter = ranges.into_iter(); 719 | let Some(mut buf) = iter.next().cloned() else { 720 | return false; 721 | }; 722 | for r in iter { 723 | if buf.end() >= r.start() { 724 | buf = *buf.start()..=*r.end(); 725 | } else { 726 | return false; 727 | } 728 | } 729 | buf == (0..=u64::MAX) 730 | } 731 | 732 | #[cfg(test)] 733 | mod tests { 734 | use super::*; 735 | use test_case::test_case; 736 | 737 | #[test_case(&[0x03, 114, 0x54, 2, 2, 0x81, 1], &[1..=1, 114..=513])] 738 | #[test_case(&[0x06, 114, 0x56, 2, 2, 0xd6, 7, 127], &[0..=113, 115..=513, 515..=1918, 1920..=u64::MAX])] 739 | #[tokio::test] 740 | async fn test_ops_to_range(mut seq: &[u8], result: &[RangeInclusive]) -> anyhow::Result<()> { 741 | let ops = Ops::::read(&mut seq).await?; 742 | let ranges = ops.to_ranges(); 743 | println!("{ranges:?}"); 744 | assert_eq!(ranges, result); 745 | Ok(()) 746 | } 747 | } 748 | -------------------------------------------------------------------------------- /src/kernel/mod.rs: -------------------------------------------------------------------------------- 1 | //! Bridges flowspecs to OS kernel. 2 | //! 3 | //! Currently only Linux is supported. Future support may include *BSD using 4 | //! `pf` as backend. 5 | 6 | #[cfg(linux)] 7 | mod linux; 8 | #[cfg(linux)] 9 | pub use linux::*; 10 | 11 | #[cfg(rtnetlink_supported)] 12 | mod rtnl; 13 | 14 | use crate::bgp::flow::Flowspec; 15 | use crate::bgp::route::RouteInfo; 16 | use serde::{Deserialize, Serialize}; 17 | use std::future::{Future, pending, ready}; 18 | use strum::Display; 19 | use thiserror::Error; 20 | 21 | /// Interface between BGP flowspec and the OS. 22 | pub trait Kernel: Sized { 23 | /// Type representing a flowspec's counterpart in kernel. 24 | type Handle: Eq + Ord; 25 | 26 | /// Apply a flowspec to kernel. 27 | fn apply( 28 | &mut self, 29 | spec: &Flowspec, 30 | before: Option<&Self::Handle>, 31 | info: &RouteInfo<'_>, 32 | ) -> impl Future>; 33 | 34 | /// Remove a flowspec from kernel using previously returned handle. 35 | fn remove(&mut self, handle: &Self::Handle) -> impl Future; 36 | 37 | /// Process notifications from kernel, timers, etc. 38 | fn process(&mut self) -> impl Future> { 39 | pending() 40 | } 41 | 42 | /// Drops the kernel interface and do asynchronous cleanups. 43 | fn terminate(self) -> impl Future { 44 | ready(()) 45 | } 46 | } 47 | 48 | /// Adapter of different `Kernel` implementations. 49 | #[derive(Debug, Serialize, Deserialize)] 50 | pub enum KernelAdapter { 51 | /// Do nothing. 52 | Noop, 53 | 54 | /// Linux implementation, using nftables and rtnetlink. 55 | #[cfg(linux)] 56 | Linux(Linux), 57 | } 58 | 59 | impl KernelAdapter { 60 | #[cfg(linux)] 61 | pub async fn linux(args: KernelArgs) -> Result { 62 | Ok(Self::Linux(Linux::new(args).await?)) 63 | } 64 | } 65 | 66 | impl Kernel for KernelAdapter { 67 | type Handle = KernelHandle; 68 | 69 | async fn apply( 70 | &mut self, 71 | _spec: &Flowspec, 72 | _before: Option<&Self::Handle>, 73 | _info: &RouteInfo<'_>, 74 | ) -> Result { 75 | match self { 76 | Self::Noop => Ok(KernelHandle::Noop), 77 | #[cfg(linux)] 78 | Self::Linux(linux) => Ok(KernelHandle::Linux( 79 | linux.apply(_spec, _before.map(|l| l.as_linux().unwrap()), _info).await?, 80 | )), 81 | } 82 | } 83 | 84 | async fn remove(&mut self, handle: &Self::Handle) { 85 | match (self, handle) { 86 | (Self::Noop, KernelHandle::Noop) => {} 87 | #[cfg(linux)] 88 | (Self::Linux(linux), KernelHandle::Linux(handle)) => linux.remove(handle).await, 89 | #[cfg(linux)] 90 | _ => panic!("handle mismatch"), 91 | } 92 | } 93 | 94 | async fn process(&mut self) -> Result<()> { 95 | match self { 96 | Self::Noop => pending().await, 97 | #[cfg(linux)] 98 | Self::Linux(linux) => linux.process().await, 99 | } 100 | } 101 | 102 | async fn terminate(self) { 103 | match self { 104 | Self::Noop => {} 105 | #[cfg(linux)] 106 | Self::Linux(linux) => linux.terminate().await, 107 | } 108 | } 109 | } 110 | 111 | #[derive(Debug, Display, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] 112 | pub enum KernelHandle { 113 | #[strum(to_string = "()")] 114 | Noop, 115 | 116 | #[cfg(linux)] 117 | #[strum(to_string = "{0:?}")] 118 | Linux(::Handle), 119 | } 120 | 121 | impl KernelHandle { 122 | #[cfg(linux)] 123 | pub fn as_linux(&self) -> Option<&::Handle> { 124 | match self { 125 | KernelHandle::Linux(linux) => Some(linux), 126 | _ => None, 127 | } 128 | } 129 | } 130 | 131 | #[derive(Debug, Error)] 132 | pub enum Error { 133 | #[cfg(linux)] 134 | #[error(transparent)] 135 | Nftables(#[from] nftables::helper::NftablesError), 136 | 137 | #[cfg(rtnetlink_supported)] 138 | #[error(transparent)] 139 | RtNetlink(#[from] rtnetlink::Error), 140 | 141 | #[error("flowspec matches nothing")] 142 | MatchNothing, 143 | } 144 | 145 | pub type Result = std::result::Result; 146 | -------------------------------------------------------------------------------- /src/kernel/rtnl.rs: -------------------------------------------------------------------------------- 1 | use super::{Kernel, Result}; 2 | use crate::bgp::flow::Flowspec; 3 | use crate::net::{Afi, IpPrefix}; 4 | use crate::util::grace; 5 | use clap::Args; 6 | use futures::channel::mpsc::UnboundedReceiver; 7 | use futures::{StreamExt, try_join}; 8 | use libc::{EHOSTUNREACH, ENETUNREACH}; 9 | use rtnetlink::Error::NetlinkError; 10 | use rtnetlink::packet_core::{NetlinkMessage, NetlinkPayload}; 11 | use rtnetlink::packet_route::address::{AddressAttribute, AddressMessage}; 12 | use rtnetlink::packet_route::route::{RouteAddress, RouteAttribute, RouteMessage, RouteType, RouteVia}; 13 | use rtnetlink::packet_route::rule::{RuleAction, RuleAttribute, RuleMessage}; 14 | use rtnetlink::packet_route::{AddressFamily, RouteNetlinkMessage}; 15 | use rtnetlink::{Handle, RouteMessageBuilder}; 16 | use serde::{Deserialize, Serialize}; 17 | use std::collections::{BTreeMap, BTreeSet}; 18 | use std::io; 19 | use std::net::IpAddr; 20 | use std::time::Duration; 21 | use tokio::select; 22 | use tokio::time::{Interval, interval}; 23 | 24 | // TODO: maintain device info similar to BIRD's "device" protocol, and use it to 25 | // ensure only direct traffic to neighbours 26 | // For now, we allow any address by `ip route get`. 27 | 28 | #[derive(Debug)] 29 | pub struct RtNetlink { 30 | args: RtNetlinkArgs, 31 | handle: Handle, 32 | msgs: UnboundedReceiver<(NetlinkMessage, rtnetlink::sys::SocketAddr)>, 33 | routes: BTreeMap, 34 | rules: BTreeMap>, 35 | timer: Interval, 36 | } 37 | 38 | impl RtNetlink { 39 | pub fn new(args: RtNetlinkArgs) -> io::Result { 40 | let (conn, handle, msgs) = rtnetlink::new_connection()?; 41 | let scan_time = args.route_scan_time; 42 | tokio::spawn(conn); 43 | Ok(Self { 44 | args, 45 | handle, 46 | msgs, 47 | routes: BTreeMap::new(), 48 | rules: BTreeMap::new(), 49 | timer: interval(Duration::from_secs(scan_time)), 50 | }) 51 | } 52 | 53 | pub async fn add(&mut self, id: K::Handle, spec: &Flowspec, next_hop: IpAddr) -> Result { 54 | let prefix = spec.dst_prefix(); 55 | let attrs = self.get_route(prefix.afi(), next_hop).await?; 56 | 57 | // Create table first... 58 | let (table_id, table_created) = if let Some((table_id, prefixes)) = 59 | (self.rules).iter_mut().find(|(_, v)| v.iter().all(|p| !p.overlaps(prefix))) 60 | { 61 | // there's a table whose content doesn't overlap with our prefix, we reuse it 62 | prefixes.insert(prefix); 63 | (*table_id, false) 64 | } else { 65 | let table_id = self.next_table(); 66 | self.rules.insert(table_id, Some(prefix).into_iter().collect()); 67 | 68 | let rule_add = (self.handle.rule().add()) 69 | .fw_mark(table_id) 70 | .action(RuleAction::ToTable) 71 | .table_id(table_id) 72 | .priority(self.args.rt_rule_priority); 73 | 74 | // TODO: separate v4 and v6 tables 75 | try_join!(rule_add.clone().v4().execute(), rule_add.v6().execute())?; 76 | (table_id, true) 77 | }; 78 | 79 | // ...and then add route to the table... 80 | let mut msg = RouteMessageBuilder::::new() 81 | .destination_prefix(prefix.prefix(), prefix.len()) 82 | .expect("destination prefix should be valid") 83 | .table_id(table_id) 84 | .build(); 85 | if let Some(attrs) = &attrs { 86 | msg.attributes.extend(attrs.iter().cloned()); 87 | } else { 88 | msg.header.kind = RouteType::Unreachable; 89 | }; 90 | if let Err(error) = self.handle.route().add(msg).execute().await { 91 | if table_created { 92 | self.rules.remove(&table_id); 93 | self.del_rule(table_id).await; 94 | } 95 | return Err(error.into()); 96 | } 97 | 98 | // ...and finally insert to our own database 99 | self.routes.insert(id, RouteEntry { prefix, next_hop, table_id, attrs }); 100 | 101 | Ok(table_id) 102 | } 103 | 104 | pub fn next_table(&self) -> u32 { 105 | (self.rules) 106 | .last_key_value() 107 | .map(|(k, _)| *k + 1) 108 | .unwrap_or(self.args.init_table_id) 109 | } 110 | 111 | pub fn next_table_for(&self, prefix: IpPrefix) -> u32 { 112 | // TODO: room for optimization 113 | (self.rules.iter()) 114 | .find(|(_, v)| v.iter().all(|p| !p.overlaps(prefix))) 115 | .map(|(k, _)| *k) 116 | .unwrap_or_else(|| self.next_table()) 117 | } 118 | 119 | pub async fn del(&mut self, id: &K::Handle) { 120 | let Some(RouteEntry { prefix, table_id, .. }) = self.routes.remove(id) else { 121 | return; 122 | }; 123 | self.del_route(table_id, prefix).await; 124 | 125 | let prefixes = self.rules.get_mut(&table_id).expect("route contains non-existent table??"); 126 | prefixes.remove(&prefix); 127 | if prefixes.is_empty() { 128 | self.rules.remove(&table_id); 129 | self.del_rule(table_id).await; 130 | } 131 | } 132 | 133 | async fn del_route(&self, table_id: u32, prefix: IpPrefix) { 134 | let mut msg = RouteMessageBuilder::::new() 135 | .destination_prefix(prefix.prefix(), prefix.len()) 136 | .expect("destination prefix should be valid") 137 | .table_id(table_id) 138 | .build(); 139 | if self.handle.route().del(msg.clone()).execute().await.is_err() { 140 | msg.header.kind = RouteType::Unreachable; 141 | grace(self.handle.route().del(msg).execute().await, "failed to delete route"); 142 | } 143 | } 144 | 145 | /// Deletes kernel `ip rule`. `self.rules` remains unchanged. 146 | async fn del_rule(&self, table_id: u32) { 147 | // TODO: add RuleMessageBuilder to rtnetlink crate 148 | let mut msg = RuleMessage::default(); 149 | msg.header.family = AddressFamily::Inet; 150 | msg.attributes.push(RuleAttribute::FwMark(table_id)); 151 | msg.header.action = RuleAction::ToTable; 152 | if table_id > 255 { 153 | msg.attributes.push(RuleAttribute::Table(table_id)); 154 | } else { 155 | msg.header.table = table_id as u8; 156 | } 157 | grace( 158 | self.handle.rule().del(msg.clone()).execute().await, 159 | "failed to delete IPv4 rule", 160 | ); 161 | msg.header.family = AddressFamily::Inet6; 162 | grace( 163 | self.handle.rule().del(msg).execute().await, 164 | "failed to delete IPv6 rule", 165 | ); 166 | } 167 | 168 | pub fn is_empty(&self) -> bool { 169 | self.routes.is_empty() 170 | } 171 | 172 | // route & addr change: check if next hop in changed prefix 173 | // rule change: full update for AF 174 | // link change: full update 175 | // scan timer triggered: full update 176 | pub async fn process(&mut self) -> Result<()> { 177 | use NetlinkPayload::*; 178 | use RouteNetlinkMessage::*; 179 | 180 | fn af_to_wildcard(f: AddressFamily) -> IpPrefix { 181 | if f == AddressFamily::Inet { 182 | IpPrefix::V4_ALL 183 | } else { 184 | IpPrefix::V6_ALL 185 | } 186 | } 187 | 188 | fn route_msg_dst_prefix(msg: RouteMessage) -> IpPrefix { 189 | use RouteAddress::{Inet, Inet6}; 190 | use RouteAttribute::Destination; 191 | let dst_len = msg.header.destination_prefix_length; 192 | (dst_len != 0) 193 | .then(|| { 194 | (msg.attributes.into_iter()) 195 | .filter_map(|x| match x { 196 | Destination(Inet(ip)) => Some(IpPrefix::new(IpAddr::V4(ip), dst_len)), 197 | Destination(Inet6(ip)) => Some(IpPrefix::new(IpAddr::V6(ip), dst_len)), 198 | _ => None, 199 | }) 200 | .next() 201 | }) 202 | .flatten() 203 | .unwrap_or_else(|| af_to_wildcard(msg.header.address_family)) 204 | } 205 | 206 | fn addr_msg_dst_prefix(msg: AddressMessage) -> IpPrefix { 207 | use AddressAttribute::Address; 208 | let dst_len = msg.header.prefix_len; 209 | (dst_len != 0) 210 | .then(|| { 211 | (msg.attributes.into_iter()) 212 | .filter_map(|x| if let Address(ip) = x { Some(ip) } else { None }) 213 | .map(|x| IpPrefix::new(x, dst_len)) 214 | .next() 215 | }) 216 | .flatten() 217 | .unwrap_or_else(|| af_to_wildcard(msg.header.family)) 218 | } 219 | 220 | select! { 221 | _ = self.timer.tick() => self.process_all().await, 222 | Some((msg, _)) = self.msgs.next() => match msg.payload { 223 | InnerMessage(msg) => match msg { 224 | NewRoute(msg) | DelRoute(msg) => self.process_prefix(route_msg_dst_prefix(msg)).await, 225 | NewAddress(msg) | DelAddress(msg) => self.process_prefix(addr_msg_dst_prefix(msg)).await, 226 | NewRule(msg) | DelRule(msg) => self.process_prefix(af_to_wildcard(msg.header.family)).await, 227 | NewLink(_) | DelLink(_) => self.process_all().await, 228 | _ => Ok(()), 229 | }, 230 | _ => Ok(()), 231 | }, 232 | } 233 | } 234 | 235 | async fn process_prefix(&mut self, prefix: IpPrefix) -> Result<()> { 236 | Self::process_iter( 237 | &self.handle, 238 | self.routes.values_mut().filter(|x| prefix.contains(x.next_hop)), 239 | ) 240 | .await 241 | } 242 | 243 | async fn process_all(&mut self) -> Result<()> { 244 | Self::process_iter(&self.handle, self.routes.values_mut()).await 245 | } 246 | 247 | async fn process_iter(handle: &Handle, iter: impl Iterator) -> Result<()> { 248 | // TODO: remove route if next hop becomes unreachable 249 | for RouteEntry { prefix, next_hop, table_id, attrs } in iter { 250 | let new_attrs = Self::get_route_from_handle(handle, prefix.afi(), *next_hop).await?; 251 | if *attrs != new_attrs { 252 | *attrs = new_attrs.clone(); 253 | let mut msg = RouteMessageBuilder::::new() 254 | .destination_prefix(prefix.prefix(), prefix.len()) 255 | .expect("destination prefix should be valid") 256 | .table_id(*table_id) 257 | .build(); 258 | if let Some(attrs) = &attrs { 259 | msg.attributes.extend(attrs.iter().cloned()); 260 | } else { 261 | msg.header.kind = RouteType::Unreachable; 262 | }; 263 | handle.route().add(msg).replace().execute().await?; 264 | } 265 | } 266 | Ok(()) 267 | } 268 | 269 | async fn get_route(&self, afi: Afi, ip: IpAddr) -> Result>> { 270 | Self::get_route_from_handle(&self.handle, afi, ip).await 271 | } 272 | 273 | async fn get_route_from_handle(handle: &Handle, prefix_afi: Afi, ip: IpAddr) -> Result>> { 274 | let msg = RouteMessageBuilder::::new() 275 | .destination_prefix(ip, if ip.is_ipv4() { 32 } else { 128 }) 276 | .expect("destination prefix should be valid") 277 | .build(); 278 | let rt = match handle.route().get(msg).execute().next().await.unwrap() { 279 | Ok(rt) => rt, 280 | Err(NetlinkError(e)) if [ENETUNREACH, EHOSTUNREACH].contains(&-e.raw_code()) => return Ok(None), 281 | Err(error) => return Err(error.into()), 282 | }; 283 | 284 | let mut has_gateway = false; 285 | let mut attrs = rt 286 | .attributes 287 | .into_iter() 288 | .filter(|x| { 289 | if matches!(x, RouteAttribute::Gateway(_) | RouteAttribute::Via(_)) { 290 | has_gateway = true; 291 | true 292 | } else { 293 | matches!(x, RouteAttribute::Oif(_)) 294 | } 295 | }) 296 | .collect::>(); 297 | if !has_gateway { 298 | if let (Afi::Ipv4, IpAddr::V6(v6)) = (prefix_afi, ip) { 299 | attrs.push(RouteAttribute::Via(RouteVia::Inet6(v6))); 300 | } else { 301 | attrs.push(RouteAttribute::Gateway(ip.into())); 302 | } 303 | } 304 | Ok(Some(attrs)) 305 | } 306 | 307 | pub async fn terminate(self) { 308 | for (table_id, prefixes) in &self.rules { 309 | self.del_rule(*table_id).await; 310 | for prefix in prefixes { 311 | self.del_route(*table_id, *prefix).await; 312 | } 313 | } 314 | } 315 | } 316 | 317 | #[derive(Debug, Clone)] 318 | pub struct RouteEntry { 319 | prefix: IpPrefix, 320 | next_hop: IpAddr, 321 | table_id: u32, 322 | attrs: Option>, 323 | } 324 | 325 | #[derive(Debug, Clone, Args, Serialize, Deserialize)] 326 | pub struct RtNetlinkArgs { 327 | /// Time between each routing table scan. 328 | /// 329 | /// Netlink allows route change notifications and does not need to scan the 330 | /// entire routing table every time, so this value could be set higher. 331 | #[arg(long, value_name = "TIME", default_value_t = 60)] 332 | pub route_scan_time: u64, 333 | 334 | /// Initial routing table ID. 335 | /// 336 | /// Table IDs are also used as fwmarks. 337 | #[arg(long, value_name = "ID", default_value_t = 0xffff0000)] 338 | pub init_table_id: u32, // TODO: specify table range 339 | 340 | /// Route rule priority as shown in `ip rule`. 341 | #[arg(long, value_name = "PRIO", default_value_t = 100)] 342 | pub rt_rule_priority: u32, 343 | } 344 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | pub mod bgp; 2 | pub mod ipc; 3 | pub mod kernel; 4 | pub mod net; 5 | pub mod util; 6 | 7 | mod args; 8 | 9 | #[cfg(test)] 10 | pub mod integration_tests; 11 | 12 | use anstyle::{Reset, Style}; 13 | use anyhow::Context; 14 | use args::{Cli, Command, RunArgs, ShowArgs}; 15 | use bgp::{Session, StateView}; 16 | use clap::Parser; 17 | use env_logger::fmt::Formatter; 18 | use ipc::{IpcServer, get_sock_path}; 19 | use itertools::Itertools; 20 | use log::{Level, LevelFilter, Record, error, info, warn}; 21 | use std::fs::{File, create_dir_all}; 22 | use std::io::{self, BufRead, Write}; 23 | use std::net::Ipv4Addr; 24 | use std::path::Path; 25 | use tokio::io::BufReader; 26 | use tokio::net::TcpListener; 27 | use tokio::select; 28 | use util::{BOLD, FG_GREEN_BOLD, RESET}; 29 | 30 | #[cfg(test)] 31 | use { 32 | integration_tests::TestEvent, 33 | std::sync::atomic::AtomicBool, 34 | std::sync::atomic::Ordering::SeqCst, 35 | tokio::sync::{mpsc, oneshot}, 36 | }; 37 | 38 | #[cfg(not(test))] 39 | use { 40 | futures::future::{FutureExt, select}, 41 | tokio::pin, 42 | tokio::signal::unix::{SignalKind, signal}, 43 | }; 44 | 45 | async fn run( 46 | mut args: RunArgs, 47 | sock_path: &Path, 48 | #[cfg(test)] event_tx: mpsc::Sender, 49 | #[cfg(test)] mut close_rx: oneshot::Receiver<()>, 50 | ) -> anyhow::Result { 51 | if let Some(file) = args.file { 52 | let cmd = std::env::args().next().unwrap(); 53 | args = RunArgs::parse_from( 54 | Some(Ok(format!("{cmd} run"))) 55 | .into_iter() 56 | .chain(std::io::BufReader::new(File::open(file)?).lines()) 57 | .filter(|x| !x.as_ref().is_ok_and(|x| x.is_empty() || x.starts_with('#'))) 58 | .map_ok(|x| "--".to_string() + &x) 59 | .collect::, _>>()?, 60 | ); 61 | if args.file.is_some() { 62 | warn!("`file` option in configuration file ignored"); 63 | } 64 | } 65 | 66 | let bind = args.bind; 67 | let listener = TcpListener::bind(&bind) 68 | .await 69 | .with_context(|| format!("failed to bind to {bind:?}"))?; 70 | 71 | let local_as = args.local_as; 72 | let router_id = args.router_id; 73 | 74 | #[cfg(not(test))] 75 | let mut bgp = Session::new(args).await?; 76 | #[cfg(test)] 77 | let mut bgp = Session::new(args, event_tx.clone()).await?; 78 | 79 | create_dir_all(Path::new(sock_path).parent().unwrap_or(Path::new("/")))?; 80 | let mut ipc = 81 | IpcServer::new(sock_path).with_context(|| format!("failed to create socket at {}", sock_path.display()))?; 82 | 83 | info!("Flow listening to {bind:?} as AS{local_as}, router ID {router_id}"); 84 | 85 | #[cfg(not(test))] 86 | let (mut sigint, mut sigterm) = ( 87 | signal(SignalKind::interrupt()).context("failed to register signal handler")?, 88 | signal(SignalKind::terminate()).context("failed to register signal handler")?, 89 | ); 90 | 91 | loop { 92 | let select = async { 93 | #[cfg(not(test))] 94 | pin! { 95 | let sigint = sigint.recv().map(|_| "SIGINT"); 96 | let sigterm = sigterm.recv().map(|_| "SIGTERM"); 97 | let signal_select = select(sigint, sigterm); 98 | } 99 | #[cfg(test)] 100 | let signal_select = &mut close_rx; 101 | 102 | select! { 103 | result = listener.accept(), if matches!(bgp.state(), bgp::State::Active) => { 104 | let (stream, mut addr) = result.context("failed to accept TCP connection")?; 105 | addr.set_ip(addr.ip().to_canonical()); 106 | bgp.accept(BufReader::new(stream), addr).await.context("failed to accept BGP connection")?; 107 | } 108 | result = bgp.process() => match result { 109 | Ok(()) => {} 110 | #[cfg(not(test))] 111 | Err(bgp::Error::Io(error)) if error.kind() == io::ErrorKind::UnexpectedEof => warn!("remote closed"), 112 | #[cfg(not(test))] 113 | Err(e @ (bgp::Error::Notification(_) | bgp::Error::Remote(_))) => error!("BGP error: {e}"), 114 | Err(_) => result.context("failed to process BGP")?, 115 | }, 116 | result = ipc.accept() => { 117 | let mut stream = result.context("failed to accept IPC connection")?; 118 | bgp.write_states(&mut stream).await.context("failed to write to IPC channel")?; 119 | }, 120 | 121 | _signal = signal_select => { 122 | #[cfg(not(test))] 123 | { 124 | let (signal, _) = _signal.factor_first(); 125 | warn!("{signal} received, exiting"); 126 | } 127 | return Ok(Some(0)) 128 | } 129 | } 130 | anyhow::Ok(None) 131 | }; 132 | match select.await { 133 | Ok(Some(x)) => { 134 | bgp.terminate().await; 135 | #[cfg(test)] 136 | let _ = event_tx.send(TestEvent::Exit(bgp)).await; 137 | return Ok(x); 138 | } 139 | Ok(None) => {} 140 | #[cfg(not(test))] 141 | Err(error) => error!("{error:?}"), 142 | #[cfg(test)] 143 | Err(error) => { 144 | bgp.terminate().await; 145 | let _ = event_tx.send(TestEvent::Exit(bgp)).await; 146 | return Err(error); 147 | } 148 | } 149 | } 150 | } 151 | 152 | async fn show(_args: ShowArgs, verbosity: LevelFilter, sock_path: &Path) -> anyhow::Result<()> { 153 | use StateView::*; 154 | let mut buf = Vec::new(); 155 | let (config, state, routes) = ipc::get_states(sock_path, &mut buf) 156 | .await 157 | .with_context(|| format!("failed to connect to {}", sock_path.display()))?; 158 | 159 | println!("{FG_GREEN_BOLD}Flow{RESET} listening to {:?}", config.bind); 160 | println!(" {BOLD}State:{RESET} {:?}", state.kind()); 161 | println!(" {BOLD}Local AS:{RESET} {}", config.local_as); 162 | println!(" {BOLD}Local Router ID:{RESET} {}", config.router_id); 163 | match state { 164 | Idle | Connect | Active | OpenSent => { 165 | if let Some(remote_as) = config.remote_as { 166 | println!(" {BOLD}Remote AS:{RESET} {remote_as}"); 167 | } 168 | println!(" {BOLD}Allowed IPs:{RESET} {}", config.allowed_ips.iter().format(", ")); 169 | } 170 | OpenConfirm { remote_open, local_addr, remote_addr } | Established { remote_open, local_addr, remote_addr } => { 171 | if let Some(local_addr) = local_addr { 172 | println!(" {BOLD}Local Address:{RESET} {local_addr}"); 173 | } 174 | println!(" {BOLD}Remote AS:{RESET} {}", remote_open.my_as); 175 | println!( 176 | " {BOLD}Remote Router ID:{RESET} {}", 177 | Ipv4Addr::from_bits(remote_open.bgp_id), 178 | ); 179 | if let Some(remote_addr) = remote_addr { 180 | println!(" {BOLD}Remote Address:{RESET} {remote_addr}"); 181 | } 182 | if verbosity >= Level::Debug { 183 | println!( 184 | " {BOLD}Hold Time:{RESET} {}", 185 | config.hold_time.min(remote_open.hold_time), 186 | ) 187 | } 188 | } 189 | } 190 | println!(); 191 | 192 | routes.print(verbosity); 193 | Ok(()) 194 | } 195 | 196 | fn format_log(f: &mut Formatter, record: &Record<'_>) -> io::Result<()> { 197 | use anstyle::AnsiColor::*; 198 | use log::Level::*; 199 | 200 | let (level_color, text_color, str) = match record.level() { 201 | Error => (Red, None, "Error"), 202 | Warn => (Yellow, None, " Warn"), 203 | Info => (Green, None, " Info"), 204 | Debug => (Blue, None, "Debug"), 205 | Trace => (BrightBlack, Some(BrightBlack), "Trace"), 206 | }; 207 | 208 | let level_style = Style::new().bold().fg_color(Some(level_color.into())); 209 | write!(f, "{}{}{} ", level_style, str, Reset)?; 210 | 211 | if let Some(text_color) = text_color { 212 | let text_style = Style::new().fg_color(Some(text_color.into())); 213 | writeln!(f, "{}{}{}", text_style, record.args(), Reset) 214 | } else { 215 | writeln!(f, "{}", record.args()) 216 | } 217 | } 218 | 219 | pub async fn cli_entry( 220 | cli: Cli, 221 | #[cfg(test)] event_tx: mpsc::Sender, 222 | #[cfg(test)] close_rx: oneshot::Receiver<()>, 223 | ) -> u8 { 224 | let mut builder = env_logger::builder(); 225 | builder 226 | .filter_level(cli.verbosity.log_level_filter()) 227 | .format(format_log) 228 | .filter_module("netlink", LevelFilter::Off); 229 | #[cfg(test)] 230 | builder.is_test(true); 231 | #[cfg(not(test))] 232 | builder.init(); 233 | #[cfg(test)] 234 | { 235 | static BUILDER_INITED: AtomicBool = AtomicBool::new(false); 236 | if !BUILDER_INITED.swap(true, SeqCst) { 237 | builder.init(); 238 | } 239 | } 240 | 241 | let sock_path = get_sock_path(&cli.run_dir).unwrap(); 242 | 243 | match cli.command { 244 | Command::Run(args) => { 245 | #[cfg(test)] 246 | let result = run(args, &sock_path, event_tx, close_rx).await; 247 | #[cfg(not(test))] 248 | let result = run(args, &sock_path).await; 249 | match result { 250 | Ok(x) => x, 251 | Err(error) => { 252 | error!("fatal error: {error:?}"); 253 | 1 254 | } 255 | } 256 | } 257 | Command::Show(args) => match show(args, cli.verbosity.log_level_filter(), &sock_path).await { 258 | Ok(()) => 0, 259 | Err(error) => { 260 | error!("{error:?}"); 261 | 1 262 | } 263 | }, 264 | } 265 | } 266 | 267 | #[cfg(not(any(test, feature = "__gen")))] 268 | #[tokio::main(flavor = "current_thread")] 269 | async fn main() -> std::process::ExitCode { 270 | let cli = Cli::parse(); 271 | cli_entry(cli).await.into() 272 | } 273 | 274 | /// Manpage and autocompletion generator. 275 | /// 276 | /// The `args` module links to all parts of the program and not possible to 277 | /// include it only, so we can only generate manpage right inside `main.rs`, 278 | /// gated by `__gen` feature. 279 | #[cfg(feature = "__gen")] 280 | fn main() { 281 | use clap::{CommandFactory, ValueEnum}; 282 | 283 | let target_dir = "target/assets"; 284 | std::fs::create_dir_all(target_dir).unwrap(); 285 | let mut cli = Cli::command(); 286 | 287 | // We generate manpages first since clap_complete will call `cli.build()`, and 288 | // the manpages generated after that will contain thing like "flow-help-help". 289 | clap_mangen::generate_to(cli.clone(), target_dir).unwrap(); 290 | 291 | for &shell in clap_complete::Shell::value_variants() { 292 | clap_complete::generate_to(shell, &mut cli, env!("CARGO_PKG_NAME"), target_dir).unwrap(); 293 | } 294 | 295 | eprintln!("Manpages and autocompletions successfully generated to {target_dir}."); 296 | } 297 | -------------------------------------------------------------------------------- /src/net.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use std::cmp::Ordering; 3 | use std::fmt::{self, Debug, Display, Formatter}; 4 | use std::io; 5 | use std::io::ErrorKind::UnexpectedEof; 6 | use std::net::{AddrParseError, IpAddr, Ipv4Addr, Ipv6Addr}; 7 | use std::num::ParseIntError; 8 | use std::str::FromStr; 9 | use strum::FromRepr; 10 | use thiserror::Error; 11 | use tokio::io::{AsyncRead, AsyncReadExt}; 12 | 13 | /// 14 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, FromRepr, Serialize, Deserialize)] 15 | #[repr(u16)] 16 | pub enum Afi { 17 | Ipv4 = 1, 18 | Ipv6 = 2, 19 | } 20 | 21 | impl Display for Afi { 22 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 23 | match self { 24 | Self::Ipv4 => f.write_str("IPv4"), 25 | Self::Ipv6 => f.write_str("IPv6"), 26 | } 27 | } 28 | } 29 | 30 | /// Max prefix length of a certain IP type. 31 | pub const fn prefix_max_len(prefix: IpAddr) -> u8 { 32 | match prefix { 33 | IpAddr::V4(_) => 32, 34 | IpAddr::V6(_) => 128, 35 | } 36 | } 37 | 38 | /// IP address with its prefix length attached. 39 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 40 | pub struct IpWithPrefix { 41 | addr: IpAddr, 42 | prefix_len: u8, 43 | } 44 | 45 | impl IpWithPrefix { 46 | pub const fn new(addr: IpAddr, prefix_len: u8) -> Self { 47 | assert!( 48 | prefix_len <= prefix_max_len(addr), 49 | "prefix length should not exceed its range" 50 | ); 51 | Self { addr, prefix_len } 52 | } 53 | 54 | pub const fn addr(self) -> IpAddr { 55 | self.addr 56 | } 57 | 58 | pub const fn prefix_len(self) -> u8 { 59 | self.prefix_len 60 | } 61 | 62 | const fn mask_raw(self) -> u128 { 63 | if self.prefix_len == 0 { 64 | 0 65 | } else { 66 | u128::MAX.wrapping_shl((prefix_max_len(self.addr) - self.prefix_len) as u32) 67 | } 68 | } 69 | 70 | pub fn mask(self) -> IpAddr { 71 | match self.addr { 72 | IpAddr::V4(_) => Ipv4Addr::from(self.mask_raw() as u32).into(), 73 | IpAddr::V6(_) => Ipv6Addr::from(self.mask_raw()).into(), 74 | } 75 | } 76 | 77 | pub fn prefix(self) -> IpPrefix { 78 | let mut inner = self; 79 | match (&mut inner.addr, self.mask()) { 80 | (IpAddr::V4(v4), IpAddr::V4(mask)) => *v4 &= mask, 81 | (IpAddr::V6(v6), IpAddr::V6(mask)) => *v6 &= mask, 82 | _ => unreachable!(), 83 | }; 84 | IpPrefix { inner } 85 | } 86 | 87 | pub const fn afi(&self) -> Afi { 88 | if self.is_ipv4() { Afi::Ipv4 } else { Afi::Ipv6 } 89 | } 90 | 91 | pub const fn is_ipv4(&self) -> bool { 92 | self.addr.is_ipv4() 93 | } 94 | 95 | pub const fn is_ipv6(&self) -> bool { 96 | self.addr.is_ipv6() 97 | } 98 | } 99 | 100 | impl Debug for IpWithPrefix { 101 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 102 | Display::fmt(self, f) 103 | } 104 | } 105 | 106 | impl Display for IpWithPrefix { 107 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 108 | write!(f, "{}/{}", self.addr, self.prefix_len) 109 | } 110 | } 111 | 112 | impl FromStr for IpWithPrefix { 113 | type Err = IpWithPrefixError; 114 | 115 | fn from_str(s: &str) -> Result { 116 | use IpWithPrefixErrorKind::*; 117 | 118 | let Some((addr, prefix_len)) = s.split_once('/') else { 119 | return Err(IpWithPrefixError::new(PrefixLenNotFound, s)); 120 | }; 121 | let addr = addr.parse::().map_err(|e| IpWithPrefixError::new(e, s))?; 122 | let prefix_len = prefix_len.parse::().map_err(|e| IpWithPrefixError::new(e, s))?; 123 | let max_len = prefix_max_len(addr); 124 | if prefix_len > max_len { 125 | Err(IpWithPrefixError::new(PrefixLenTooLong(prefix_len, max_len), s)) 126 | } else { 127 | Ok(Self { addr, prefix_len }) 128 | } 129 | } 130 | } 131 | 132 | impl From for IpWithPrefix { 133 | fn from(addr: IpAddr) -> Self { 134 | Self { addr, prefix_len: prefix_max_len(addr) } 135 | } 136 | } 137 | 138 | #[derive(Debug, Clone, Error)] 139 | #[error("error parsing IP with prefix '{value}': {kind}")] 140 | pub struct IpWithPrefixError { 141 | kind: IpWithPrefixErrorKind, 142 | value: String, 143 | } 144 | 145 | impl IpWithPrefixError { 146 | fn new(kind: impl Into, value: impl Into) -> Self { 147 | Self { kind: kind.into(), value: value.into() } 148 | } 149 | } 150 | 151 | #[derive(Debug, Clone, Error)] 152 | pub enum IpWithPrefixErrorKind { 153 | #[error(transparent)] 154 | AddrParse(#[from] AddrParseError), 155 | #[error("prefix length not found")] 156 | PrefixLenNotFound, 157 | #[error("parsing prefix length failed: {0}")] 158 | PrefixLenParse(#[from] ParseIntError), 159 | #[error("prefix length too long ({0} > {1})")] 160 | PrefixLenTooLong(u8, u8), 161 | } 162 | 163 | /// IP prefix. 164 | #[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] 165 | pub struct IpPrefix { 166 | inner: IpWithPrefix, 167 | } 168 | 169 | impl IpPrefix { 170 | pub const V4_ALL: Self = Self { inner: IpWithPrefix { addr: IpAddr::V4(Ipv4Addr::UNSPECIFIED), prefix_len: 0 } }; 171 | pub const V6_ALL: Self = Self { inner: IpWithPrefix { addr: IpAddr::V6(Ipv6Addr::UNSPECIFIED), prefix_len: 0 } }; 172 | 173 | pub fn new(prefix: IpAddr, len: u8) -> Self { 174 | let inner = IpWithPrefix::new(prefix, len); 175 | let result = inner.prefix(); 176 | assert_eq!(result.inner, inner, "bits out of range must be zeroes"); 177 | result 178 | } 179 | 180 | pub const fn prefix(self) -> IpAddr { 181 | self.inner.addr 182 | } 183 | 184 | #[allow(clippy::len_without_is_empty)] 185 | pub const fn len(self) -> u8 { 186 | self.inner.prefix_len 187 | } 188 | 189 | pub fn mask(self) -> IpAddr { 190 | self.inner.mask() 191 | } 192 | 193 | pub fn contains>(self, other: T) -> bool { 194 | use IpAddr::*; 195 | use std::cmp::Ordering::*; 196 | 197 | let other = other.into(); 198 | match self.len().cmp(&other.len()) { 199 | Less => match (self.prefix(), self.inner.mask(), other.prefix()) { 200 | (V4(p1), V4(mask), V4(p2)) => p2 & mask == p1, 201 | (V6(p1), V6(mask), V6(p2)) => p2 & mask == p1, 202 | _ => false, 203 | }, 204 | Equal => self == other, 205 | Greater => false, 206 | } 207 | } 208 | 209 | pub fn overlaps>(self, other: T) -> bool { 210 | use IpAddr::*; 211 | use std::cmp::Ordering::*; 212 | 213 | let other = other.into(); 214 | match self.len().cmp(&other.len()) { 215 | Less => match (self.prefix(), self.inner.mask(), other.prefix()) { 216 | (V4(p1), V4(mask), V4(p2)) => (p1.to_bits() ^ p2.to_bits()) & mask.to_bits() == 0, 217 | (V6(p1), V6(mask), V6(p2)) => (p1.to_bits() ^ p2.to_bits()) & mask.to_bits() == 0, 218 | _ => false, 219 | }, 220 | Equal => self == other, 221 | Greater => other.overlaps(self), 222 | } 223 | } 224 | 225 | pub const fn is_single(self) -> bool { 226 | prefix_max_len(self.prefix()) == self.len() 227 | } 228 | 229 | pub const fn afi(self) -> Afi { 230 | self.inner.afi() 231 | } 232 | 233 | pub const fn is_ipv4(self) -> bool { 234 | self.inner.is_ipv4() 235 | } 236 | 237 | pub const fn is_ipv6(self) -> bool { 238 | self.inner.is_ipv6() 239 | } 240 | 241 | pub fn write(self, buf: &mut Vec) { 242 | let prefix_bytes = self.len().div_ceil(8); 243 | buf.push(self.len()); 244 | match self.prefix() { 245 | IpAddr::V4(v4) => { 246 | assert!(prefix_bytes <= 4); 247 | buf.extend(v4.octets().into_iter().take(prefix_bytes.into())) 248 | } 249 | IpAddr::V6(v6) => { 250 | assert!(prefix_bytes <= 16); 251 | buf.extend(v6.octets().into_iter().take(prefix_bytes.into())) 252 | } 253 | } 254 | } 255 | 256 | pub async fn read(reader: &mut R, afi: Afi) -> Result, IpPrefixError> { 257 | match afi { 258 | Afi::Ipv4 => Self::read_v4(reader).await, 259 | Afi::Ipv6 => Self::read_v6(reader).await, 260 | } 261 | } 262 | 263 | pub async fn read_v4(reader: &mut R) -> Result, IpPrefixError> { 264 | Self::read_generic::<32, 4, _, _>(reader, IpAddr::V4).await 265 | } 266 | 267 | pub async fn read_v6(reader: &mut R) -> Result, IpPrefixError> { 268 | Self::read_generic::<128, 16, _, _>(reader, IpAddr::V6).await 269 | } 270 | 271 | async fn read_generic( 272 | reader: &mut R, 273 | ctor: fn(T) -> IpAddr, 274 | ) -> Result, IpPrefixError> 275 | where 276 | T: From<[u8; M]>, 277 | R: AsyncRead + Unpin, 278 | { 279 | let len = match reader.read_u8().await { 280 | Ok(len) => len, 281 | Err(error) if error.kind() == UnexpectedEof => return Ok(None), 282 | Err(error) => return Err(error.into()), 283 | }; 284 | if len > L { 285 | return Err(IpPrefixError { kind: IpWithPrefixErrorKind::PrefixLenTooLong(len, L).into(), value: None }); 286 | } 287 | let mut buf = [0; M]; 288 | let prefix_bytes = len.div_ceil(8); 289 | reader.read_exact(&mut buf[0..prefix_bytes.into()]).await?; 290 | let inner = IpWithPrefix { addr: ctor(buf.into()), prefix_len: len }; 291 | let result = inner.prefix(); 292 | if result.inner == inner { 293 | Ok(Some((result, prefix_bytes + 1))) 294 | } else { 295 | Err(IpPrefixError { kind: IpPrefixErrorKind::TrailingBitsNonZero, value: None }) 296 | } 297 | } 298 | } 299 | 300 | impl Debug for IpPrefix { 301 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 302 | Display::fmt(&self.inner, f) 303 | } 304 | } 305 | 306 | impl Display for IpPrefix { 307 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 308 | Display::fmt(&self.inner, f) 309 | } 310 | } 311 | 312 | impl FromStr for IpPrefix { 313 | type Err = IpPrefixError; 314 | 315 | fn from_str(s: &str) -> Result { 316 | let inner = s 317 | .parse::() 318 | .map_err(|e| IpPrefixError { kind: e.kind.into(), value: Some(e.value) })?; 319 | let result = inner.prefix(); 320 | if result.inner == inner { 321 | Ok(result) 322 | } else { 323 | Err(IpPrefixError { kind: IpPrefixErrorKind::TrailingBitsNonZero, value: Some(s.into()) }) 324 | } 325 | } 326 | } 327 | 328 | impl From for IpPrefix { 329 | fn from(addr: IpAddr) -> Self { 330 | Self::new(addr, prefix_max_len(addr)) 331 | } 332 | } 333 | 334 | impl PartialOrd for IpPrefix { 335 | fn partial_cmp(&self, other: &Self) -> Option { 336 | Some(self.cmp(other)) 337 | } 338 | } 339 | 340 | impl Ord for IpPrefix { 341 | fn cmp(&self, other: &Self) -> Ordering { 342 | match self.inner.prefix_len.cmp(&other.inner.prefix_len) { 343 | Ordering::Equal => self.inner.addr.cmp(&other.inner.addr), 344 | Ordering::Greater => Ordering::Less, 345 | Ordering::Less => Ordering::Greater, 346 | } 347 | } 348 | } 349 | 350 | #[derive(Debug, Error)] 351 | pub struct IpPrefixError { 352 | pub kind: IpPrefixErrorKind, 353 | pub value: Option, 354 | } 355 | 356 | impl Display for IpPrefixError { 357 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 358 | if let Some(value) = &self.value { 359 | write!(f, "error parsing IP prefix '{}': {}", value, self.kind) 360 | } else { 361 | write!(f, "error parsing IP prefix: {}", self.kind) 362 | } 363 | } 364 | } 365 | 366 | impl From for IpPrefixError { 367 | fn from(e: io::Error) -> Self { 368 | Self { kind: IpPrefixErrorKind::Io(e), value: None } 369 | } 370 | } 371 | 372 | #[derive(Debug, Error)] 373 | pub enum IpPrefixErrorKind { 374 | #[error(transparent)] 375 | Io(#[from] io::Error), 376 | #[error(transparent)] 377 | IpWithPrefixParse(#[from] IpWithPrefixErrorKind), 378 | #[error("trailing bits of a prefix are non-zero")] 379 | TrailingBitsNonZero, 380 | } 381 | 382 | #[cfg(test)] 383 | mod tests { 384 | use super::*; 385 | use IpPrefixErrorKind::*; 386 | use IpWithPrefixErrorKind::*; 387 | use test_case::test_case; 388 | 389 | #[test_case("192.0.2.66/27")] 390 | #[test_case("2001:db8::dead:cafe/32")] 391 | fn test_ip_with_prefix_valid(prefix: &str) { 392 | assert_eq!(prefix.parse::().unwrap().to_string(), prefix); 393 | } 394 | 395 | #[test_case("2a0c:32d7:da9:1ba0/64", |e| matches!(e.kind, AddrParse(_)))] 396 | #[test_case("128.42.65.125", |e| matches!(e.kind, PrefixLenNotFound))] 397 | #[test_case("83.34.123.31/-1", |e| matches!(e.kind, PrefixLenParse(_)))] 398 | #[test_case("83.34.123.31/56", |e| matches!(e.kind, PrefixLenTooLong(_, 32)))] 399 | #[test_case("::64:ff9b:1.2.3.4/897123", |e| matches!(e.kind, PrefixLenParse(_)))] 400 | fn test_ip_with_prefix_invalid(prefix: &str, matcher: impl Fn(IpWithPrefixError) -> bool) { 401 | let error = prefix.parse::().unwrap_err(); 402 | assert!(matcher(error)); 403 | } 404 | 405 | #[test_case("192.0.2.64/27")] 406 | #[test_case("2001:db8::/32")] 407 | #[test_case("2001:db8:dead:beef::/64")] 408 | fn test_ip_prefix_valid(prefix: &str) { 409 | assert_eq!(prefix.parse::().unwrap().to_string(), prefix); 410 | } 411 | 412 | #[test_case("192.0.2.65/27", |e| matches!(e.kind, TrailingBitsNonZero))] 413 | #[test_case("fe80::1/64", |e| matches!(e.kind, TrailingBitsNonZero))] 414 | fn test_ip_prefix_invalid(prefix: &str, matcher: impl Fn(IpPrefixError) -> bool) { 415 | let error = prefix.parse::().unwrap_err(); 416 | assert!(matcher(error)); 417 | } 418 | 419 | #[test_case("192.168.0.0/16", "192.168.233.0/24", true)] 420 | #[test_case("192.168.0.0/32", "192.168.233.0/24", false)] 421 | #[test_case("fdfd:abcc:deff::/48", "fdfd:abcc:deff:1233::/64", true)] 422 | #[test_case("fdfd:abcc:deff::/64", "fdfd:abcc:deff:1233::/64", false)] 423 | fn test_ip_prefix_contains(a: &str, b: &str, result: bool) { 424 | let (a, b) = (a.parse::().unwrap(), b.parse::().unwrap()); 425 | assert_eq!(a.contains(b), result); 426 | } 427 | } 428 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use anstyle::{AnsiColor, Color, Reset, Style}; 2 | use log::warn; 3 | use serde::{Deserialize, Deserializer, Serialize, Serializer}; 4 | use smallvec::SmallVec; 5 | use std::borrow::Cow; 6 | use std::cmp::min; 7 | use std::collections::BTreeSet; 8 | use std::fmt::{self, Display, Formatter, Write}; 9 | use std::ops::{Add, Deref, RangeInclusive}; 10 | use std::rc::Rc; 11 | 12 | pub const FG_GREEN_BOLD: Style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Green))).bold(); 13 | pub const FG_BLUE_BOLD: Style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue))).bold(); 14 | pub const BOLD: Style = Style::new().bold(); 15 | pub const RESET: Reset = Reset; 16 | 17 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 18 | pub enum MaybeRc { 19 | Rc(Rc), 20 | Owned(T), 21 | } 22 | 23 | impl Display for MaybeRc { 24 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 25 | ::fmt(self, f) 26 | } 27 | } 28 | 29 | impl Default for MaybeRc { 30 | fn default() -> Self { 31 | Self::Owned(T::default()) 32 | } 33 | } 34 | 35 | impl Deref for MaybeRc { 36 | type Target = T; 37 | 38 | fn deref(&self) -> &Self::Target { 39 | match self { 40 | Self::Rc(t) => t, 41 | Self::Owned(t) => t, 42 | } 43 | } 44 | } 45 | 46 | impl Serialize for MaybeRc { 47 | fn serialize(&self, ser: S) -> Result { 48 | T::serialize(self, ser) 49 | } 50 | } 51 | 52 | impl<'de, T: Deserialize<'de>> Deserialize<'de> for MaybeRc { 53 | fn deserialize>(de: D) -> Result { 54 | Ok(Self::Owned(T::deserialize(de)?)) 55 | } 56 | } 57 | 58 | pub trait Intersect: Sized { 59 | fn intersect(self, other: T) -> Option; 60 | } 61 | 62 | impl Intersect for RangeInclusive { 63 | fn intersect(self, other: Self) -> Option { 64 | if self.start() > other.start() { 65 | Self::intersect(other, self) 66 | } else if self.end() < other.start() { 67 | None 68 | } else { 69 | let (_, self_end) = self.into_inner(); 70 | let (other_start, other_end) = other.into_inner(); 71 | Some(other_start..=min(self_end, other_end)) 72 | } 73 | } 74 | } 75 | 76 | #[derive(Debug, Clone)] 77 | pub struct TruthTable { 78 | pub mask: u64, 79 | pub inv: bool, 80 | pub truth: BTreeSet, 81 | } 82 | 83 | impl TruthTable { 84 | pub fn new(mask: u64, inv: bool, truth: impl IntoIterator) -> Self { 85 | let truth = truth.into_iter().map(|x| x & mask).collect(); 86 | let mut result = Self { mask, inv, truth }; 87 | result.optimize(); 88 | result 89 | } 90 | 91 | fn optimize(&mut self) { 92 | if self.truth.len() > (1 << (self.mask.count_ones() - 1)) { 93 | if self.inv { 94 | self.truth = self.possible_values_masked().into_owned(); 95 | self.inv = false; 96 | } else { 97 | self.inv = true; 98 | self.truth = self.possible_values_masked().into_owned(); 99 | } 100 | } 101 | } 102 | 103 | pub const fn always_true() -> Self { 104 | Self { mask: 0, inv: true, truth: BTreeSet::new() } 105 | } 106 | 107 | pub const fn always_false() -> Self { 108 | Self { mask: 0, inv: false, truth: BTreeSet::new() } 109 | } 110 | 111 | pub fn is_always_true(&self) -> bool { 112 | self.inv && self.truth.is_empty() || !self.inv && self.truth.len() == 1 << self.mask.count_ones() 113 | } 114 | 115 | pub fn is_always_false(&self) -> bool { 116 | !self.inv && self.truth.is_empty() || self.inv && self.truth.len() == 1 << self.mask.count_ones() 117 | } 118 | 119 | pub fn and(self, other: Self) -> Self { 120 | if self.is_always_false() || other.is_always_false() { 121 | Self::always_false() 122 | } else if self.is_always_true() { 123 | other 124 | } else if other.is_always_true() { 125 | self 126 | } else { 127 | match (self.inv, other.inv) { 128 | (false, false) => self.truth_intersection(&other, false), 129 | (true, true) => self.truth_union(&other, true), 130 | (false, true) => self.truth_difference(&other, false), 131 | (true, false) => other.and(self), 132 | } 133 | } 134 | } 135 | 136 | pub fn or(self, other: Self) -> Self { 137 | if self.is_always_true() || other.is_always_true() { 138 | Self::always_true() 139 | } else if self.is_always_false() { 140 | other 141 | } else if other.is_always_false() { 142 | self 143 | } else { 144 | match (self.inv, other.inv) { 145 | (false, false) => self.truth_union(&other, false), 146 | (true, true) => self.truth_intersection(&other, true), 147 | (false, true) => other.truth_difference(&self, true), 148 | (true, false) => other.or(self), 149 | } 150 | } 151 | } 152 | 153 | pub fn invert(mut self) -> Self { 154 | self.inv = !self.inv; 155 | self 156 | } 157 | 158 | pub fn possible_values_masked(&self) -> Cow> { 159 | if self.inv { 160 | Cow::Owned( 161 | iter_masked(self.mask) 162 | .collect::>() 163 | .difference(&self.truth) 164 | .copied() 165 | .collect(), 166 | ) 167 | } else { 168 | Cow::Borrowed(&self.truth) 169 | } 170 | } 171 | 172 | pub fn shrink(&self, other_mask: u64) -> Cow { 173 | let mask = self.mask & other_mask; 174 | if mask == self.mask { 175 | Cow::Borrowed(self) 176 | } else { 177 | Cow::Owned(Self { mask, inv: self.inv, truth: self.truth.iter().map(|v| v & mask).collect() }) 178 | } 179 | } 180 | 181 | pub fn expand(&self, other_mask: u64) -> Cow { 182 | let mask = self.mask | other_mask; 183 | if mask == self.mask { 184 | Cow::Borrowed(self) 185 | } else { 186 | Cow::Owned(Self { 187 | mask, 188 | inv: self.inv, 189 | truth: iter_masked(other_mask & !self.mask) 190 | .flat_map(|a| self.truth.iter().map(move |b| a | b)) 191 | .collect(), 192 | }) 193 | } 194 | } 195 | 196 | fn expand_set(&self, other_mask: u64) -> Cow> { 197 | match self.expand(other_mask) { 198 | Cow::Borrowed(x) => Cow::Borrowed(&x.truth), 199 | Cow::Owned(x) => Cow::Owned(x.truth), 200 | } 201 | } 202 | 203 | fn truth_intersection(&self, other: &Self, inv: bool) -> Self { 204 | self.truth_op(other, inv, |a, b| a.intersection(b).copied().collect()) 205 | } 206 | fn truth_union(&self, other: &Self, inv: bool) -> Self { 207 | self.truth_op(other, inv, |a, b| a.union(b).copied().collect()) 208 | } 209 | fn truth_difference(&self, other: &Self, inv: bool) -> Self { 210 | self.truth_op(other, inv, |a, b| a.difference(b).copied().collect()) 211 | } 212 | fn truth_op(&self, other: &Self, inv: bool, f: F) -> Self 213 | where 214 | F: for<'a> FnOnce(&'a BTreeSet, &'a BTreeSet) -> BTreeSet, 215 | { 216 | Self { 217 | mask: self.mask | other.mask, 218 | inv, 219 | truth: f(&self.expand_set(other.mask), &other.expand_set(self.mask)), 220 | } 221 | } 222 | } 223 | 224 | impl PartialEq for TruthTable { 225 | fn eq(&self, other: &Self) -> bool { 226 | self.mask == other.mask && self.possible_values_masked() == other.possible_values_masked() 227 | } 228 | } 229 | 230 | impl Eq for TruthTable {} 231 | 232 | fn pos_of_set_bits(mut mask: u64) -> SmallVec<[u8; 6]> { 233 | let mut pos = SmallVec::with_capacity(mask.count_ones().try_into().unwrap()); 234 | while mask.trailing_zeros() < 64 { 235 | pos.push(mask.trailing_zeros().try_into().unwrap()); 236 | mask ^= 1 << mask.trailing_zeros(); 237 | } 238 | pos 239 | } 240 | 241 | /// Iterator over every possible value under the mask. 242 | fn iter_masked(mask: u64) -> impl Iterator + Clone + 'static { 243 | let pos = pos_of_set_bits(mask); 244 | let empty_zero = pos.is_empty().then_some(0); 245 | (0u64..1 << mask.count_ones()) 246 | .map(move |x| pos.iter().enumerate().map(|(i, p)| ((x >> i) & 1) << p).fold(0, Add::add)) 247 | .chain(empty_zero) 248 | } 249 | 250 | impl Display for TruthTable { 251 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 252 | write!(f, "({}{:b}) {{", if f.alternate() { "0b" } else { "" }, self.mask)?; 253 | let possible_values = self.possible_values_masked(); 254 | let mut iter = possible_values.iter(); 255 | if let Some(first) = iter.next() { 256 | if f.alternate() { 257 | f.write_str("0b")?; 258 | } 259 | for _ in 0..first.leading_zeros() - self.mask.leading_zeros() { 260 | f.write_char('0')?; 261 | } 262 | if *first > 0 { 263 | write!(f, "{:b}", first)?; 264 | } 265 | for val in iter { 266 | f.write_str(", ")?; 267 | if f.alternate() { 268 | f.write_str("0b")?; 269 | } 270 | for _ in 0..val.leading_zeros() - self.mask.leading_zeros() { 271 | f.write_char('0')?; 272 | } 273 | if *val > 0 { 274 | write!(f, "{:b}", val)?; 275 | } 276 | } 277 | } 278 | f.write_char('}') 279 | } 280 | } 281 | 282 | // TODO: macro with format args 283 | pub fn grace(result: Result, msg: &str) { 284 | match result { 285 | Ok(_) => {} 286 | Err(error) => warn!("{msg}: {error}"), 287 | } 288 | } 289 | 290 | #[cfg(test)] 291 | mod tests { 292 | use super::*; 293 | use crate::bgp::flow::Op; 294 | use test_case::test_case; 295 | 296 | #[test_case(0..=5, 5..=10, Some(5..=5))] 297 | #[test_case(0..=114, 5..=10, Some(5..=10))] 298 | #[test_case(0..=5, 114..=514, None)] 299 | fn test_intersect(a: RangeInclusive, b: RangeInclusive, result: Option>) { 300 | assert_eq!(a.intersect(b), result); 301 | } 302 | 303 | #[test] 304 | fn test_truth_table() { 305 | assert!(TruthTable::always_true().is_always_true()); 306 | assert!(TruthTable::always_false().is_always_false()); 307 | assert!(Op::not_any(0b0000).to_truth_table().is_always_true()); 308 | 309 | let op1 = Op::all(0b0100); 310 | let op2 = Op::not_all(0b1010); 311 | let tt = op1.to_truth_table().or(op2.to_truth_table()); 312 | assert_eq!(tt, TruthTable::new(0b1110, true, [0b1010])); 313 | 314 | assert_eq!( 315 | tt, 316 | TruthTable::new(0b1110, false, [0b0000, 0b0010, 0b0100, 0b0110, 0b1000, 0b1100, 0b1110]), 317 | ); 318 | } 319 | } 320 | -------------------------------------------------------------------------------- /xtask/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xtask" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [[bin]] 7 | name = "xtask" 8 | path = "main.rs" 9 | 10 | [dependencies] 11 | clap = { workspace = true } 12 | -------------------------------------------------------------------------------- /xtask/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use std::process::{Command, ExitCode}; 3 | 4 | #[derive(Debug, Parser)] 5 | enum Cli { 6 | /// Generate manpages and shell autocompletions into target/assets. 7 | Gen, 8 | /// Run command with `unshare -rn`. 9 | Unshare { 10 | #[arg(trailing_var_arg = true, allow_hyphen_values = true)] 11 | args: Vec, 12 | }, 13 | /// Run command with `sudo -E`. 14 | Sudo { 15 | #[arg(trailing_var_arg = true, allow_hyphen_values = true)] 16 | args: Vec, 17 | }, 18 | /// Run command with `ip netns exec `. 19 | Netns { 20 | netns: String, 21 | /// Prepend `sudo -E` to runner. 22 | #[arg(long)] 23 | sudo: bool, 24 | #[arg(trailing_var_arg = true, allow_hyphen_values = true)] 25 | args: Vec, 26 | }, 27 | } 28 | 29 | fn cargo_with_runner(runner: &str) -> Command { 30 | let mut cmd = Command::new(env!("CARGO")); 31 | let os = std::env::consts::OS; 32 | cmd.args([ 33 | "--config", 34 | &format!("target.'cfg(target_os = \"{os}\")'.runner = '{runner}'"), 35 | ]); 36 | cmd 37 | } 38 | 39 | fn main() -> ExitCode { 40 | let status = match Cli::parse() { 41 | Cli::Gen => Command::new(env!("CARGO")).args(["run", "--features=__gen"]).status().unwrap(), 42 | 43 | #[cfg(target_os = "linux")] 44 | Cli::Unshare { args } => cargo_with_runner("unshare -rn").args(args).status().unwrap(), 45 | 46 | #[cfg(not(target_os = "linux"))] 47 | Cli::Unshare { args } => { 48 | eprintln!("Unshare not supported, running tests as current user"); 49 | Command::new(env!("CARGO")).args(args).status().unwrap() 50 | } 51 | 52 | Cli::Sudo { args } => cargo_with_runner("sudo -E").args(args).status().unwrap(), 53 | 54 | Cli::Netns { netns, sudo, args } => { 55 | let runner = if sudo { 56 | format!("sudo -E ip netns exec {netns}") 57 | } else { 58 | format!("ip netns exec {netns}") 59 | }; 60 | cargo_with_runner(&runner).args(args).status().unwrap() 61 | } 62 | }; 63 | 64 | match status.code() { 65 | Some(code) => ExitCode::from(code as u8), 66 | None => ExitCode::from(255), 67 | } 68 | } 69 | --------------------------------------------------------------------------------